target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qapi/error.h"
  26 #include "qemu/error-report.h"
  27 #include "cpu.h"
  28 #include "cpu-models.h"
  29 #include "qemu/timer.h"
  30 #include "sysemu/sysemu.h"
  31 #include "sysemu/hw_accel.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_vio.h"
  40 #include "hw/ppc/spapr_cpu_core.h"
  41 #include "hw/ppc/ppc.h"
  42 #include "sysemu/watchdog.h"
  43 #include "trace.h"
  44 #include "exec/gdbstub.h"
  45 #include "exec/memattrs.h"
  46 #include "exec/ram_addr.h"
  47 #include "sysemu/hostmem.h"
  48 #include "qemu/cutils.h"
  49 #include "qemu/mmap-alloc.h"
  50 #if defined(TARGET_PPC64)
  51 #include "hw/ppc/spapr_cpu_core.h"
  52 #endif
  53 #include "elf.h"
  54 #include "sysemu/kvm_int.h"
  55
  56 //#define DEBUG_KVM
  57
  58 #ifdef DEBUG_KVM
  59 #define DPRINTF(fmt, ...) \
  60     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  61 #else
  62 #define DPRINTF(fmt, ...) \
  63     do { } while (0)
  64 #endif
  65
  66 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  67
  68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  69     KVM_CAP_LAST_INFO
  70 };
  71
  72 static int cap_interrupt_unset = false;
  73 static int cap_interrupt_level = false;
  74 static int cap_segstate;
  75 static int cap_booke_sregs;
  76 static int cap_ppc_smt;
  77 static int cap_ppc_rma;
  78 static int cap_spapr_tce;
  79 static int cap_spapr_tce_64;
  80 static int cap_spapr_multitce;
  81 static int cap_spapr_vfio;
  82 static int cap_hior;
  83 static int cap_one_reg;
  84 static int cap_epr;
  85 static int cap_ppc_watchdog;
  86 static int cap_papr;
  87 static int cap_htab_fd;
  88 static int cap_fixup_hcalls;
  89 static int cap_htm;             /* Hardware transactional memory support */
  90 static int cap_mmu_radix;
  91 static int cap_mmu_hash_v3;
  92 static int cap_resize_hpt;
  93
  94 static uint32_t debug_inst_opcode;
  95
  96 /* XXX We have a race condition where we actually have a level triggered
  97  *     interrupt, but the infrastructure can't expose that yet, so the guest
  98  *     takes but ignores it, goes to sleep and never gets notified that there's
  99  *     still an interrupt pending.
 100  *
 101  *     As a quick workaround, let's just wake up again 20 ms after we injected
 102  *     an interrupt. That way we can assure that we're always reinjecting
 103  *     interrupts in case the guest swallowed them.
 104  */
 105 static QEMUTimer *idle_timer;
 106
 107 static void kvm_kick_cpu(void *opaque)
 108 {
 109     PowerPCCPU *cpu = opaque;
 110
 111     qemu_cpu_kick(CPU(cpu));
 112 }
 113
 114 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 115  * should only be used for fallback tests - generally we should use
 116  * explicit capabilities for the features we want, rather than
 117  * assuming what is/isn't available depending on the KVM variant. */
 118 static bool kvmppc_is_pr(KVMState *ks)
 119 {
 120     /* Assume KVM-PR if the GET_PVINFO capability is available */
 121     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 122 }
 123
 124 static int kvm_ppc_register_host_cpu_type(void);
 125
 126 int kvm_arch_init(MachineState *ms, KVMState *s)
 127 {
 128     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 129     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 130     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 131     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 132     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 133     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 134     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 135     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 136     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 137     cap_spapr_vfio = false;
 138     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 139     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 140     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 141     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 142     /* Note: we don't set cap_papr here, because this capability is
 143      * only activated after this by kvmppc_set_papr() */
 144     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 145     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 146     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 147     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 148     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 149     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 150
 151     if (!cap_interrupt_level) {
 152         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 153                         "VM to stall at times!\n");
 154     }
 155
 156     kvm_ppc_register_host_cpu_type();
 157
 158     return 0;
 159 }
 160
 161 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 162 {
 163     return 0;
 164 }
 165
 166 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 167 {
 168     CPUPPCState *cenv = &cpu->env;
 169     CPUState *cs = CPU(cpu);
 170     struct kvm_sregs sregs;
 171     int ret;
 172
 173     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 174         /* What we're really trying to say is "if we're on BookE, we use
 175            the native PVR for now". This is the only sane way to check
 176            it though, so we potentially confuse users that they can run
 177            BookE guests on BookS. Let's hope nobody dares enough :) */
 178         return 0;
 179     } else {
 180         if (!cap_segstate) {
 181             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 182             return -ENOSYS;
 183         }
 184     }
 185
 186     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 187     if (ret) {
 188         return ret;
 189     }
 190
 191     sregs.pvr = cenv->spr[SPR_PVR];
 192     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 193 }
 194
 195 /* Set up a shared TLB array with KVM */
 196 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 197 {
 198     CPUPPCState *env = &cpu->env;
 199     CPUState *cs = CPU(cpu);
 200     struct kvm_book3e_206_tlb_params params = {};
 201     struct kvm_config_tlb cfg = {};
 202     unsigned int entries = 0;
 203     int ret, i;
 204
 205     if (!kvm_enabled() ||
 206         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 207         return 0;
 208     }
 209
 210     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 211
 212     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 213         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 214         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 215         entries += params.tlb_sizes[i];
 216     }
 217
 218     assert(entries == env->nb_tlb);
 219     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 220
 221     env->tlb_dirty = true;
 222
 223     cfg.array = (uintptr_t)env->tlb.tlbm;
 224     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 225     cfg.params = (uintptr_t)&params;
 226     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 227
 228     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 229     if (ret < 0) {
 230         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 231                 __func__, strerror(-ret));
 232         return ret;
 233     }
 234
 235     env->kvm_sw_tlb = true;
 236     return 0;
 237 }
 238
 239
 240 #if defined(TARGET_PPC64)
 241 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 242                                        struct kvm_ppc_smmu_info *info)
 243 {
 244     CPUPPCState *env = &cpu->env;
 245     CPUState *cs = CPU(cpu);
 246
 247     memset(info, 0, sizeof(*info));
 248
 249     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 250      * need to "guess" what the supported page sizes are.
 251      *
 252      * For that to work we make a few assumptions:
 253      *
 254      * - Check whether we are running "PR" KVM which only supports 4K
 255      *   and 16M pages, but supports them regardless of the backing
 256      *   store characteritics. We also don't support 1T segments.
 257      *
 258      *   This is safe as if HV KVM ever supports that capability or PR
 259      *   KVM grows supports for more page/segment sizes, those versions
 260      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 261      *   will not hit this fallback
 262      *
 263      * - Else we are running HV KVM. This means we only support page
 264      *   sizes that fit in the backing store. Additionally we only
 265      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 266      *   P7 encodings for the SLB and hash table. Here too, we assume
 267      *   support for any newer processor will mean a kernel that
 268      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 269      *   this fallback.
 270      */
 271     if (kvmppc_is_pr(cs->kvm_state)) {
 272         /* No flags */
 273         info->flags = 0;
 274         info->slb_size = 64;
 275
 276         /* Standard 4k base page size segment */
 277         info->sps[0].page_shift = 12;
 278         info->sps[0].slb_enc = 0;
 279         info->sps[0].enc[0].page_shift = 12;
 280         info->sps[0].enc[0].pte_enc = 0;
 281
 282         /* Standard 16M large page size segment */
 283         info->sps[1].page_shift = 24;
 284         info->sps[1].slb_enc = SLB_VSID_L;
 285         info->sps[1].enc[0].page_shift = 24;
 286         info->sps[1].enc[0].pte_enc = 0;
 287     } else {
 288         int i = 0;
 289
 290         /* HV KVM has backing store size restrictions */
 291         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 292
 293         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 294             info->flags |= KVM_PPC_1T_SEGMENTS;
 295         }
 296
 297         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 298            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 299             info->slb_size = 32;
 300         } else {
 301             info->slb_size = 64;
 302         }
 303
 304         /* Standard 4k base page size segment */
 305         info->sps[i].page_shift = 12;
 306         info->sps[i].slb_enc = 0;
 307         info->sps[i].enc[0].page_shift = 12;
 308         info->sps[i].enc[0].pte_enc = 0;
 309         i++;
 310
 311         /* 64K on MMU 2.06 and later */
 312         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 313             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 314             info->sps[i].page_shift = 16;
 315             info->sps[i].slb_enc = 0x110;
 316             info->sps[i].enc[0].page_shift = 16;
 317             info->sps[i].enc[0].pte_enc = 1;
 318             i++;
 319         }
 320
 321         /* Standard 16M large page size segment */
 322         info->sps[i].page_shift = 24;
 323         info->sps[i].slb_enc = SLB_VSID_L;
 324         info->sps[i].enc[0].page_shift = 24;
 325         info->sps[i].enc[0].pte_enc = 0;
 326     }
 327 }
 328
 329 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 330 {
 331     CPUState *cs = CPU(cpu);
 332     int ret;
 333
 334     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 335         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 336         if (ret == 0) {
 337             return;
 338         }
 339     }
 340
 341     kvm_get_fallback_smmu_info(cpu, info);
 342 }
 343
 344 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 345 {
 346     KVMState *s = KVM_STATE(current_machine->accelerator);
 347     struct ppc_radix_page_info *radix_page_info;
 348     struct kvm_ppc_rmmu_info rmmu_info;
 349     int i;
 350
 351     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 352         return NULL;
 353     }
 354     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 355         return NULL;
 356     }
 357     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 358     radix_page_info->count = 0;
 359     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 360         if (rmmu_info.ap_encodings[i]) {
 361             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 362             radix_page_info->count++;
 363         }
 364     }
 365     return radix_page_info;
 366 }
 367
 368 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 369                                      bool radix, bool gtse,
 370                                      uint64_t proc_tbl)
 371 {
 372     CPUState *cs = CPU(cpu);
 373     int ret;
 374     uint64_t flags = 0;
 375     struct kvm_ppc_mmuv3_cfg cfg = {
 376         .process_table = proc_tbl,
 377     };
 378
 379     if (radix) {
 380         flags |= KVM_PPC_MMUV3_RADIX;
 381     }
 382     if (gtse) {
 383         flags |= KVM_PPC_MMUV3_GTSE;
 384     }
 385     cfg.flags = flags;
 386     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 387     switch (ret) {
 388     case 0:
 389         return H_SUCCESS;
 390     case -EINVAL:
 391         return H_PARAMETER;
 392     case -ENODEV:
 393         return H_NOT_AVAILABLE;
 394     default:
 395         return H_HARDWARE;
 396     }
 397 }
 398
 399 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 400 {
 401     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 402         return true;
 403     }
 404
 405     return (1ul << shift) <= rampgsize;
 406 }
 407
 408 static long max_cpu_page_size;
 409
 410 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 411 {
 412     static struct kvm_ppc_smmu_info smmu_info;
 413     static bool has_smmu_info;
 414     CPUPPCState *env = &cpu->env;
 415     int iq, ik, jq, jk;
 416     bool has_64k_pages = false;
 417
 418     /* We only handle page sizes for 64-bit server guests for now */
 419     if (!(env->mmu_model & POWERPC_MMU_64)) {
 420         return;
 421     }
 422
 423     /* Collect MMU info from kernel if not already */
 424     if (!has_smmu_info) {
 425         kvm_get_smmu_info(cpu, &smmu_info);
 426         has_smmu_info = true;
 427     }
 428
 429     if (!max_cpu_page_size) {
 430         max_cpu_page_size = qemu_getrampagesize();
 431     }
 432
 433     /* Convert to QEMU form */
 434     memset(&env->sps, 0, sizeof(env->sps));
 435
 436     /* If we have HV KVM, we need to forbid CI large pages if our
 437      * host page size is smaller than 64K.
 438      */
 439     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 440         env->ci_large_pages = getpagesize() >= 0x10000;
 441     }
 442
 443     /*
 444      * XXX This loop should be an entry wide AND of the capabilities that
 445      *     the selected CPU has with the capabilities that KVM supports.
 446      */
 447     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 448         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 449         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 450
 451         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 452                                  ksps->page_shift)) {
 453             continue;
 454         }
 455         qsps->page_shift = ksps->page_shift;
 456         qsps->slb_enc = ksps->slb_enc;
 457         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 458             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 459                                      ksps->enc[jk].page_shift)) {
 460                 continue;
 461             }
 462             if (ksps->enc[jk].page_shift == 16) {
 463                 has_64k_pages = true;
 464             }
 465             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 466             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 467             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 468                 break;
 469             }
 470         }
 471         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 472             break;
 473         }
 474     }
 475     env->slb_nr = smmu_info.slb_size;
 476     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 477         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 478     }
 479     if (!has_64k_pages) {
 480         env->mmu_model &= ~POWERPC_MMU_64K;
 481     }
 482 }
 483
 484 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 485 {
 486     Object *mem_obj = object_resolve_path(obj_path, NULL);
 487     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 488     long pagesize;
 489
 490     if (mempath) {
 491         pagesize = qemu_mempath_getpagesize(mempath);
 492         g_free(mempath);
 493     } else {
 494         pagesize = getpagesize();
 495     }
 496
 497     return pagesize >= max_cpu_page_size;
 498 }
 499
 500 #else /* defined (TARGET_PPC64) */
 501
 502 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 503 {
 504 }
 505
 506 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 507 {
 508     return true;
 509 }
 510
 511 #endif /* !defined (TARGET_PPC64) */
 512
 513 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 514 {
 515     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 516 }
 517
 518 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 519  * book3s supports only 1 watchpoint, so array size
 520  * of 4 is sufficient for now.
 521  */
 522 #define MAX_HW_BKPTS 4
 523
 524 static struct HWBreakpoint {
 525     target_ulong addr;
 526     int type;
 527 } hw_debug_points[MAX_HW_BKPTS];
 528
 529 static CPUWatchpoint hw_watchpoint;
 530
 531 /* Default there is no breakpoint and watchpoint supported */
 532 static int max_hw_breakpoint;
 533 static int max_hw_watchpoint;
 534 static int nb_hw_breakpoint;
 535 static int nb_hw_watchpoint;
 536
 537 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 538 {
 539     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 540         max_hw_breakpoint = 2;
 541         max_hw_watchpoint = 2;
 542     }
 543
 544     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 545         fprintf(stderr, "Error initializing h/w breakpoints\n");
 546         return;
 547     }
 548 }
 549
 550 int kvm_arch_init_vcpu(CPUState *cs)
 551 {
 552     PowerPCCPU *cpu = POWERPC_CPU(cs);
 553     CPUPPCState *cenv = &cpu->env;
 554     int ret;
 555
 556     /* Gather server mmu info from KVM and update the CPU state */
 557     kvm_fixup_page_sizes(cpu);
 558
 559     /* Synchronize sregs with kvm */
 560     ret = kvm_arch_sync_sregs(cpu);
 561     if (ret) {
 562         if (ret == -EINVAL) {
 563             error_report("Register sync failed... If you're using kvm-hv.ko,"
 564                          " only \"-cpu host\" is possible");
 565         }
 566         return ret;
 567     }
 568
 569     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 570
 571     switch (cenv->mmu_model) {
 572     case POWERPC_MMU_BOOKE206:
 573         /* This target supports access to KVM's guest TLB */
 574         ret = kvm_booke206_tlb_init(cpu);
 575         break;
 576     case POWERPC_MMU_2_07:
 577         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 578             /* KVM-HV has transactional memory on POWER8 also without the
 579              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 580              * long as it's availble to userspace on the host. */
 581             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 582                 cap_htm = true;
 583             }
 584         }
 585         break;
 586     default:
 587         break;
 588     }
 589
 590     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 591     kvmppc_hw_debug_points_init(cenv);
 592
 593     return ret;
 594 }
 595
 596 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 597 {
 598     CPUPPCState *env = &cpu->env;
 599     CPUState *cs = CPU(cpu);
 600     struct kvm_dirty_tlb dirty_tlb;
 601     unsigned char *bitmap;
 602     int ret;
 603
 604     if (!env->kvm_sw_tlb) {
 605         return;
 606     }
 607
 608     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 609     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 610
 611     dirty_tlb.bitmap = (uintptr_t)bitmap;
 612     dirty_tlb.num_dirty = env->nb_tlb;
 613
 614     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 615     if (ret) {
 616         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 617                 __func__, strerror(-ret));
 618     }
 619
 620     g_free(bitmap);
 621 }
 622
 623 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 624 {
 625     PowerPCCPU *cpu = POWERPC_CPU(cs);
 626     CPUPPCState *env = &cpu->env;
 627     union {
 628         uint32_t u32;
 629         uint64_t u64;
 630     } val;
 631     struct kvm_one_reg reg = {
 632         .id = id,
 633         .addr = (uintptr_t) &val,
 634     };
 635     int ret;
 636
 637     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 638     if (ret != 0) {
 639         trace_kvm_failed_spr_get(spr, strerror(errno));
 640     } else {
 641         switch (id & KVM_REG_SIZE_MASK) {
 642         case KVM_REG_SIZE_U32:
 643             env->spr[spr] = val.u32;
 644             break;
 645
 646         case KVM_REG_SIZE_U64:
 647             env->spr[spr] = val.u64;
 648             break;
 649
 650         default:
 651             /* Don't handle this size yet */
 652             abort();
 653         }
 654     }
 655 }
 656
 657 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 658 {
 659     PowerPCCPU *cpu = POWERPC_CPU(cs);
 660     CPUPPCState *env = &cpu->env;
 661     union {
 662         uint32_t u32;
 663         uint64_t u64;
 664     } val;
 665     struct kvm_one_reg reg = {
 666         .id = id,
 667         .addr = (uintptr_t) &val,
 668     };
 669     int ret;
 670
 671     switch (id & KVM_REG_SIZE_MASK) {
 672     case KVM_REG_SIZE_U32:
 673         val.u32 = env->spr[spr];
 674         break;
 675
 676     case KVM_REG_SIZE_U64:
 677         val.u64 = env->spr[spr];
 678         break;
 679
 680     default:
 681         /* Don't handle this size yet */
 682         abort();
 683     }
 684
 685     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 686     if (ret != 0) {
 687         trace_kvm_failed_spr_set(spr, strerror(errno));
 688     }
 689 }
 690
 691 static int kvm_put_fp(CPUState *cs)
 692 {
 693     PowerPCCPU *cpu = POWERPC_CPU(cs);
 694     CPUPPCState *env = &cpu->env;
 695     struct kvm_one_reg reg;
 696     int i;
 697     int ret;
 698
 699     if (env->insns_flags & PPC_FLOAT) {
 700         uint64_t fpscr = env->fpscr;
 701         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 702
 703         reg.id = KVM_REG_PPC_FPSCR;
 704         reg.addr = (uintptr_t)&fpscr;
 705         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 706         if (ret < 0) {
 707             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 708             return ret;
 709         }
 710
 711         for (i = 0; i < 32; i++) {
 712             uint64_t vsr[2];
 713
 714 #ifdef HOST_WORDS_BIGENDIAN
 715             vsr[0] = float64_val(env->fpr[i]);
 716             vsr[1] = env->vsr[i];
 717 #else
 718             vsr[0] = env->vsr[i];
 719             vsr[1] = float64_val(env->fpr[i]);
 720 #endif
 721             reg.addr = (uintptr_t) &vsr;
 722             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 723
 724             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 725             if (ret < 0) {
 726                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 727                         i, strerror(errno));
 728                 return ret;
 729             }
 730         }
 731     }
 732
 733     if (env->insns_flags & PPC_ALTIVEC) {
 734         reg.id = KVM_REG_PPC_VSCR;
 735         reg.addr = (uintptr_t)&env->vscr;
 736         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 737         if (ret < 0) {
 738             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 739             return ret;
 740         }
 741
 742         for (i = 0; i < 32; i++) {
 743             reg.id = KVM_REG_PPC_VR(i);
 744             reg.addr = (uintptr_t)&env->avr[i];
 745             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 746             if (ret < 0) {
 747                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 748                 return ret;
 749             }
 750         }
 751     }
 752
 753     return 0;
 754 }
 755
 756 static int kvm_get_fp(CPUState *cs)
 757 {
 758     PowerPCCPU *cpu = POWERPC_CPU(cs);
 759     CPUPPCState *env = &cpu->env;
 760     struct kvm_one_reg reg;
 761     int i;
 762     int ret;
 763
 764     if (env->insns_flags & PPC_FLOAT) {
 765         uint64_t fpscr;
 766         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 767
 768         reg.id = KVM_REG_PPC_FPSCR;
 769         reg.addr = (uintptr_t)&fpscr;
 770         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 771         if (ret < 0) {
 772             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 773             return ret;
 774         } else {
 775             env->fpscr = fpscr;
 776         }
 777
 778         for (i = 0; i < 32; i++) {
 779             uint64_t vsr[2];
 780
 781             reg.addr = (uintptr_t) &vsr;
 782             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 783
 784             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 785             if (ret < 0) {
 786                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 787                         vsx ? "VSR" : "FPR", i, strerror(errno));
 788                 return ret;
 789             } else {
 790 #ifdef HOST_WORDS_BIGENDIAN
 791                 env->fpr[i] = vsr[0];
 792                 if (vsx) {
 793                     env->vsr[i] = vsr[1];
 794                 }
 795 #else
 796                 env->fpr[i] = vsr[1];
 797                 if (vsx) {
 798                     env->vsr[i] = vsr[0];
 799                 }
 800 #endif
 801             }
 802         }
 803     }
 804
 805     if (env->insns_flags & PPC_ALTIVEC) {
 806         reg.id = KVM_REG_PPC_VSCR;
 807         reg.addr = (uintptr_t)&env->vscr;
 808         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 809         if (ret < 0) {
 810             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 811             return ret;
 812         }
 813
 814         for (i = 0; i < 32; i++) {
 815             reg.id = KVM_REG_PPC_VR(i);
 816             reg.addr = (uintptr_t)&env->avr[i];
 817             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 818             if (ret < 0) {
 819                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 820                         i, strerror(errno));
 821                 return ret;
 822             }
 823         }
 824     }
 825
 826     return 0;
 827 }
 828
 829 #if defined(TARGET_PPC64)
 830 static int kvm_get_vpa(CPUState *cs)
 831 {
 832     PowerPCCPU *cpu = POWERPC_CPU(cs);
 833     CPUPPCState *env = &cpu->env;
 834     struct kvm_one_reg reg;
 835     int ret;
 836
 837     reg.id = KVM_REG_PPC_VPA_ADDR;
 838     reg.addr = (uintptr_t)&env->vpa_addr;
 839     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 840     if (ret < 0) {
 841         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 842         return ret;
 843     }
 844
 845     assert((uintptr_t)&env->slb_shadow_size
 846            == ((uintptr_t)&env->slb_shadow_addr + 8));
 847     reg.id = KVM_REG_PPC_VPA_SLB;
 848     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 849     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 850     if (ret < 0) {
 851         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 852                 strerror(errno));
 853         return ret;
 854     }
 855
 856     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 857     reg.id = KVM_REG_PPC_VPA_DTL;
 858     reg.addr = (uintptr_t)&env->dtl_addr;
 859     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 860     if (ret < 0) {
 861         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 862                 strerror(errno));
 863         return ret;
 864     }
 865
 866     return 0;
 867 }
 868
 869 static int kvm_put_vpa(CPUState *cs)
 870 {
 871     PowerPCCPU *cpu = POWERPC_CPU(cs);
 872     CPUPPCState *env = &cpu->env;
 873     struct kvm_one_reg reg;
 874     int ret;
 875
 876     /* SLB shadow or DTL can't be registered unless a master VPA is
 877      * registered.  That means when restoring state, if a VPA *is*
 878      * registered, we need to set that up first.  If not, we need to
 879      * deregister the others before deregistering the master VPA */
 880     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 881
 882     if (env->vpa_addr) {
 883         reg.id = KVM_REG_PPC_VPA_ADDR;
 884         reg.addr = (uintptr_t)&env->vpa_addr;
 885         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 886         if (ret < 0) {
 887             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 888             return ret;
 889         }
 890     }
 891
 892     assert((uintptr_t)&env->slb_shadow_size
 893            == ((uintptr_t)&env->slb_shadow_addr + 8));
 894     reg.id = KVM_REG_PPC_VPA_SLB;
 895     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 896     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 897     if (ret < 0) {
 898         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 899         return ret;
 900     }
 901
 902     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 903     reg.id = KVM_REG_PPC_VPA_DTL;
 904     reg.addr = (uintptr_t)&env->dtl_addr;
 905     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 906     if (ret < 0) {
 907         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 908                 strerror(errno));
 909         return ret;
 910     }
 911
 912     if (!env->vpa_addr) {
 913         reg.id = KVM_REG_PPC_VPA_ADDR;
 914         reg.addr = (uintptr_t)&env->vpa_addr;
 915         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 916         if (ret < 0) {
 917             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 918             return ret;
 919         }
 920     }
 921
 922     return 0;
 923 }
 924 #endif /* TARGET_PPC64 */
 925
 926 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 927 {
 928     CPUPPCState *env = &cpu->env;
 929     struct kvm_sregs sregs;
 930     int i;
 931
 932     sregs.pvr = env->spr[SPR_PVR];
 933
 934     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 935
 936     /* Sync SLB */
 937 #ifdef TARGET_PPC64
 938     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 939         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 940         if (env->slb[i].esid & SLB_ESID_V) {
 941             sregs.u.s.ppc64.slb[i].slbe |= i;
 942         }
 943         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 944     }
 945 #endif
 946
 947     /* Sync SRs */
 948     for (i = 0; i < 16; i++) {
 949         sregs.u.s.ppc32.sr[i] = env->sr[i];
 950     }
 951
 952     /* Sync BATs */
 953     for (i = 0; i < 8; i++) {
 954         /* Beware. We have to swap upper and lower bits here */
 955         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 956             | env->DBAT[1][i];
 957         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 958             | env->IBAT[1][i];
 959     }
 960
 961     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 962 }
 963
 964 int kvm_arch_put_registers(CPUState *cs, int level)
 965 {
 966     PowerPCCPU *cpu = POWERPC_CPU(cs);
 967     CPUPPCState *env = &cpu->env;
 968     struct kvm_regs regs;
 969     int ret;
 970     int i;
 971
 972     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 973     if (ret < 0) {
 974         return ret;
 975     }
 976
 977     regs.ctr = env->ctr;
 978     regs.lr  = env->lr;
 979     regs.xer = cpu_read_xer(env);
 980     regs.msr = env->msr;
 981     regs.pc = env->nip;
 982
 983     regs.srr0 = env->spr[SPR_SRR0];
 984     regs.srr1 = env->spr[SPR_SRR1];
 985
 986     regs.sprg0 = env->spr[SPR_SPRG0];
 987     regs.sprg1 = env->spr[SPR_SPRG1];
 988     regs.sprg2 = env->spr[SPR_SPRG2];
 989     regs.sprg3 = env->spr[SPR_SPRG3];
 990     regs.sprg4 = env->spr[SPR_SPRG4];
 991     regs.sprg5 = env->spr[SPR_SPRG5];
 992     regs.sprg6 = env->spr[SPR_SPRG6];
 993     regs.sprg7 = env->spr[SPR_SPRG7];
 994
 995     regs.pid = env->spr[SPR_BOOKE_PID];
 996
 997     for (i = 0;i < 32; i++)
 998         regs.gpr[i] = env->gpr[i];
 999
1000     regs.cr = 0;
1001     for (i = 0; i < 8; i++) {
1002         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1003     }
1004
1005     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1006     if (ret < 0)
1007         return ret;
1008
1009     kvm_put_fp(cs);
1010
1011     if (env->tlb_dirty) {
1012         kvm_sw_tlb_put(cpu);
1013         env->tlb_dirty = false;
1014     }
1015
1016     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1017         ret = kvmppc_put_books_sregs(cpu);
1018         if (ret < 0) {
1019             return ret;
1020         }
1021     }
1022
1023     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1024         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1025     }
1026
1027     if (cap_one_reg) {
1028         int i;
1029
1030         /* We deliberately ignore errors here, for kernels which have
1031          * the ONE_REG calls, but don't support the specific
1032          * registers, there's a reasonable chance things will still
1033          * work, at least until we try to migrate. */
1034         for (i = 0; i < 1024; i++) {
1035             uint64_t id = env->spr_cb[i].one_reg_id;
1036
1037             if (id != 0) {
1038                 kvm_put_one_spr(cs, id, i);
1039             }
1040         }
1041
1042 #ifdef TARGET_PPC64
1043         if (msr_ts) {
1044             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1045                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1046             }
1047             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1048                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1049             }
1050             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1051             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1052             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1053             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1060         }
1061
1062         if (cap_papr) {
1063             if (kvm_put_vpa(cs) < 0) {
1064                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1065             }
1066         }
1067
1068         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1069 #endif /* TARGET_PPC64 */
1070     }
1071
1072     return ret;
1073 }
1074
1075 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1076 {
1077      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1078 }
1079
1080 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1081 {
1082     CPUPPCState *env = &cpu->env;
1083     struct kvm_sregs sregs;
1084     int ret;
1085
1086     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1087     if (ret < 0) {
1088         return ret;
1089     }
1090
1091     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1092         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1093         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1094         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1095         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1096         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1097         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1098         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1099         env->spr[SPR_DECR] = sregs.u.e.dec;
1100         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1101         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1102         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1103     }
1104
1105     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1106         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1107         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1108         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1109         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1110         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1111     }
1112
1113     if (sregs.u.e.features & KVM_SREGS_E_64) {
1114         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1115     }
1116
1117     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1118         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1119     }
1120
1121     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1122         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1123         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1124         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1125         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1126         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1127         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1128         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1129         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1130         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1131         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1132         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1133         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1134         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1135         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1136         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1137         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1138         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1139         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1140         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1141         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1142         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1143         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1144         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1145         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1146         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1147         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1148         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1149         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1150         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1151         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1152         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1153         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1154
1155         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1156             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1157             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1158             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1159             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1160             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1161             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1162         }
1163
1164         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1165             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1166             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1167         }
1168
1169         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1170             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1171             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1172             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1173             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1174         }
1175     }
1176
1177     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1178         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1179         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1180         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1181         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1182         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1183         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1184         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1185         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1186         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1187         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1188     }
1189
1190     if (sregs.u.e.features & KVM_SREGS_EXP) {
1191         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1192     }
1193
1194     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1195         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1196         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1197     }
1198
1199     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1200         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1201         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1202         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1203
1204         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1205             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1206             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1207         }
1208     }
1209
1210     return 0;
1211 }
1212
1213 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1214 {
1215     CPUPPCState *env = &cpu->env;
1216     struct kvm_sregs sregs;
1217     int ret;
1218     int i;
1219
1220     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1221     if (ret < 0) {
1222         return ret;
1223     }
1224
1225     if (!cpu->vhyp) {
1226         ppc_store_sdr1(env, sregs.u.s.sdr1);
1227     }
1228
1229     /* Sync SLB */
1230 #ifdef TARGET_PPC64
1231     /*
1232      * The packed SLB array we get from KVM_GET_SREGS only contains
1233      * information about valid entries. So we flush our internal copy
1234      * to get rid of stale ones, then put all valid SLB entries back
1235      * in.
1236      */
1237     memset(env->slb, 0, sizeof(env->slb));
1238     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1239         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1240         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1241         /*
1242          * Only restore valid entries
1243          */
1244         if (rb & SLB_ESID_V) {
1245             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1246         }
1247     }
1248 #endif
1249
1250     /* Sync SRs */
1251     for (i = 0; i < 16; i++) {
1252         env->sr[i] = sregs.u.s.ppc32.sr[i];
1253     }
1254
1255     /* Sync BATs */
1256     for (i = 0; i < 8; i++) {
1257         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1258         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1259         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1260         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1261     }
1262
1263     return 0;
1264 }
1265
1266 int kvm_arch_get_registers(CPUState *cs)
1267 {
1268     PowerPCCPU *cpu = POWERPC_CPU(cs);
1269     CPUPPCState *env = &cpu->env;
1270     struct kvm_regs regs;
1271     uint32_t cr;
1272     int i, ret;
1273
1274     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1275     if (ret < 0)
1276         return ret;
1277
1278     cr = regs.cr;
1279     for (i = 7; i >= 0; i--) {
1280         env->crf[i] = cr & 15;
1281         cr >>= 4;
1282     }
1283
1284     env->ctr = regs.ctr;
1285     env->lr = regs.lr;
1286     cpu_write_xer(env, regs.xer);
1287     env->msr = regs.msr;
1288     env->nip = regs.pc;
1289
1290     env->spr[SPR_SRR0] = regs.srr0;
1291     env->spr[SPR_SRR1] = regs.srr1;
1292
1293     env->spr[SPR_SPRG0] = regs.sprg0;
1294     env->spr[SPR_SPRG1] = regs.sprg1;
1295     env->spr[SPR_SPRG2] = regs.sprg2;
1296     env->spr[SPR_SPRG3] = regs.sprg3;
1297     env->spr[SPR_SPRG4] = regs.sprg4;
1298     env->spr[SPR_SPRG5] = regs.sprg5;
1299     env->spr[SPR_SPRG6] = regs.sprg6;
1300     env->spr[SPR_SPRG7] = regs.sprg7;
1301
1302     env->spr[SPR_BOOKE_PID] = regs.pid;
1303
1304     for (i = 0;i < 32; i++)
1305         env->gpr[i] = regs.gpr[i];
1306
1307     kvm_get_fp(cs);
1308
1309     if (cap_booke_sregs) {
1310         ret = kvmppc_get_booke_sregs(cpu);
1311         if (ret < 0) {
1312             return ret;
1313         }
1314     }
1315
1316     if (cap_segstate) {
1317         ret = kvmppc_get_books_sregs(cpu);
1318         if (ret < 0) {
1319             return ret;
1320         }
1321     }
1322
1323     if (cap_hior) {
1324         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1325     }
1326
1327     if (cap_one_reg) {
1328         int i;
1329
1330         /* We deliberately ignore errors here, for kernels which have
1331          * the ONE_REG calls, but don't support the specific
1332          * registers, there's a reasonable chance things will still
1333          * work, at least until we try to migrate. */
1334         for (i = 0; i < 1024; i++) {
1335             uint64_t id = env->spr_cb[i].one_reg_id;
1336
1337             if (id != 0) {
1338                 kvm_get_one_spr(cs, id, i);
1339             }
1340         }
1341
1342 #ifdef TARGET_PPC64
1343         if (msr_ts) {
1344             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1345                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1346             }
1347             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1348                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1349             }
1350             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1351             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1352             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1353             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1360         }
1361
1362         if (cap_papr) {
1363             if (kvm_get_vpa(cs) < 0) {
1364                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1365             }
1366         }
1367
1368         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1369 #endif
1370     }
1371
1372     return 0;
1373 }
1374
1375 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1376 {
1377     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1378
1379     if (irq != PPC_INTERRUPT_EXT) {
1380         return 0;
1381     }
1382
1383     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1384         return 0;
1385     }
1386
1387     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1388
1389     return 0;
1390 }
1391
1392 #if defined(TARGET_PPCEMB)
1393 #define PPC_INPUT_INT PPC40x_INPUT_INT
1394 #elif defined(TARGET_PPC64)
1395 #define PPC_INPUT_INT PPC970_INPUT_INT
1396 #else
1397 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1398 #endif
1399
1400 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1401 {
1402     PowerPCCPU *cpu = POWERPC_CPU(cs);
1403     CPUPPCState *env = &cpu->env;
1404     int r;
1405     unsigned irq;
1406
1407     qemu_mutex_lock_iothread();
1408
1409     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1410      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1411     if (!cap_interrupt_level &&
1412         run->ready_for_interrupt_injection &&
1413         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1414         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1415     {
1416         /* For now KVM disregards the 'irq' argument. However, in the
1417          * future KVM could cache it in-kernel to avoid a heavyweight exit
1418          * when reading the UIC.
1419          */
1420         irq = KVM_INTERRUPT_SET;
1421
1422         DPRINTF("injected interrupt %d\n", irq);
1423         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1424         if (r < 0) {
1425             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1426         }
1427
1428         /* Always wake up soon in case the interrupt was level based */
1429         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1430                        (NANOSECONDS_PER_SECOND / 50));
1431     }
1432
1433     /* We don't know if there are more interrupts pending after this. However,
1434      * the guest will return to userspace in the course of handling this one
1435      * anyways, so we will get a chance to deliver the rest. */
1436
1437     qemu_mutex_unlock_iothread();
1438 }
1439
1440 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1441 {
1442     return MEMTXATTRS_UNSPECIFIED;
1443 }
1444
1445 int kvm_arch_process_async_events(CPUState *cs)
1446 {
1447     return cs->halted;
1448 }
1449
1450 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1451 {
1452     CPUState *cs = CPU(cpu);
1453     CPUPPCState *env = &cpu->env;
1454
1455     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1456         cs->halted = 1;
1457         cs->exception_index = EXCP_HLT;
1458     }
1459
1460     return 0;
1461 }
1462
1463 /* map dcr access to existing qemu dcr emulation */
1464 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1465 {
1466     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1467         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1468
1469     return 0;
1470 }
1471
1472 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1473 {
1474     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1475         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1476
1477     return 0;
1478 }
1479
1480 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1481 {
1482     /* Mixed endian case is not handled */
1483     uint32_t sc = debug_inst_opcode;
1484
1485     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1486                             sizeof(sc), 0) ||
1487         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1488         return -EINVAL;
1489     }
1490
1491     return 0;
1492 }
1493
1494 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1495 {
1496     uint32_t sc;
1497
1498     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1499         sc != debug_inst_opcode ||
1500         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1501                             sizeof(sc), 1)) {
1502         return -EINVAL;
1503     }
1504
1505     return 0;
1506 }
1507
1508 static int find_hw_breakpoint(target_ulong addr, int type)
1509 {
1510     int n;
1511
1512     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1513            <= ARRAY_SIZE(hw_debug_points));
1514
1515     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1516         if (hw_debug_points[n].addr == addr &&
1517              hw_debug_points[n].type == type) {
1518             return n;
1519         }
1520     }
1521
1522     return -1;
1523 }
1524
1525 static int find_hw_watchpoint(target_ulong addr, int *flag)
1526 {
1527     int n;
1528
1529     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1530     if (n >= 0) {
1531         *flag = BP_MEM_ACCESS;
1532         return n;
1533     }
1534
1535     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1536     if (n >= 0) {
1537         *flag = BP_MEM_WRITE;
1538         return n;
1539     }
1540
1541     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1542     if (n >= 0) {
1543         *flag = BP_MEM_READ;
1544         return n;
1545     }
1546
1547     return -1;
1548 }
1549
1550 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1551                                   target_ulong len, int type)
1552 {
1553     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1554         return -ENOBUFS;
1555     }
1556
1557     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1558     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1559
1560     switch (type) {
1561     case GDB_BREAKPOINT_HW:
1562         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1563             return -ENOBUFS;
1564         }
1565
1566         if (find_hw_breakpoint(addr, type) >= 0) {
1567             return -EEXIST;
1568         }
1569
1570         nb_hw_breakpoint++;
1571         break;
1572
1573     case GDB_WATCHPOINT_WRITE:
1574     case GDB_WATCHPOINT_READ:
1575     case GDB_WATCHPOINT_ACCESS:
1576         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1577             return -ENOBUFS;
1578         }
1579
1580         if (find_hw_breakpoint(addr, type) >= 0) {
1581             return -EEXIST;
1582         }
1583
1584         nb_hw_watchpoint++;
1585         break;
1586
1587     default:
1588         return -ENOSYS;
1589     }
1590
1591     return 0;
1592 }
1593
1594 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1595                                   target_ulong len, int type)
1596 {
1597     int n;
1598
1599     n = find_hw_breakpoint(addr, type);
1600     if (n < 0) {
1601         return -ENOENT;
1602     }
1603
1604     switch (type) {
1605     case GDB_BREAKPOINT_HW:
1606         nb_hw_breakpoint--;
1607         break;
1608
1609     case GDB_WATCHPOINT_WRITE:
1610     case GDB_WATCHPOINT_READ:
1611     case GDB_WATCHPOINT_ACCESS:
1612         nb_hw_watchpoint--;
1613         break;
1614
1615     default:
1616         return -ENOSYS;
1617     }
1618     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1619
1620     return 0;
1621 }
1622
1623 void kvm_arch_remove_all_hw_breakpoints(void)
1624 {
1625     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1626 }
1627
1628 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1629 {
1630     int n;
1631
1632     /* Software Breakpoint updates */
1633     if (kvm_sw_breakpoints_active(cs)) {
1634         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1635     }
1636
1637     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1638            <= ARRAY_SIZE(hw_debug_points));
1639     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1640
1641     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1642         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1643         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1644         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1645             switch (hw_debug_points[n].type) {
1646             case GDB_BREAKPOINT_HW:
1647                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1648                 break;
1649             case GDB_WATCHPOINT_WRITE:
1650                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1651                 break;
1652             case GDB_WATCHPOINT_READ:
1653                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1654                 break;
1655             case GDB_WATCHPOINT_ACCESS:
1656                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1657                                         KVMPPC_DEBUG_WATCH_READ;
1658                 break;
1659             default:
1660                 cpu_abort(cs, "Unsupported breakpoint type\n");
1661             }
1662             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1663         }
1664     }
1665 }
1666
1667 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1668 {
1669     CPUState *cs = CPU(cpu);
1670     CPUPPCState *env = &cpu->env;
1671     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1672     int handle = 0;
1673     int n;
1674     int flag = 0;
1675
1676     if (cs->singlestep_enabled) {
1677         handle = 1;
1678     } else if (arch_info->status) {
1679         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1680             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1681                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1682                 if (n >= 0) {
1683                     handle = 1;
1684                 }
1685             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1686                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1687                 n = find_hw_watchpoint(arch_info->address,  &flag);
1688                 if (n >= 0) {
1689                     handle = 1;
1690                     cs->watchpoint_hit = &hw_watchpoint;
1691                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1692                     hw_watchpoint.flags = flag;
1693                 }
1694             }
1695         }
1696     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1697         handle = 1;
1698     } else {
1699         /* QEMU is not able to handle debug exception, so inject
1700          * program exception to guest;
1701          * Yes program exception NOT debug exception !!
1702          * When QEMU is using debug resources then debug exception must
1703          * be always set. To achieve this we set MSR_DE and also set
1704          * MSRP_DEP so guest cannot change MSR_DE.
1705          * When emulating debug resource for guest we want guest
1706          * to control MSR_DE (enable/disable debug interrupt on need).
1707          * Supporting both configurations are NOT possible.
1708          * So the result is that we cannot share debug resources
1709          * between QEMU and Guest on BOOKE architecture.
1710          * In the current design QEMU gets the priority over guest,
1711          * this means that if QEMU is using debug resources then guest
1712          * cannot use them;
1713          * For software breakpoint QEMU uses a privileged instruction;
1714          * So there cannot be any reason that we are here for guest
1715          * set debug exception, only possibility is guest executed a
1716          * privileged / illegal instruction and that's why we are
1717          * injecting a program interrupt.
1718          */
1719
1720         cpu_synchronize_state(cs);
1721         /* env->nip is PC, so increment this by 4 to use
1722          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1723          */
1724         env->nip += 4;
1725         cs->exception_index = POWERPC_EXCP_PROGRAM;
1726         env->error_code = POWERPC_EXCP_INVAL;
1727         ppc_cpu_do_interrupt(cs);
1728     }
1729
1730     return handle;
1731 }
1732
1733 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1734 {
1735     PowerPCCPU *cpu = POWERPC_CPU(cs);
1736     CPUPPCState *env = &cpu->env;
1737     int ret;
1738
1739     qemu_mutex_lock_iothread();
1740
1741     switch (run->exit_reason) {
1742     case KVM_EXIT_DCR:
1743         if (run->dcr.is_write) {
1744             DPRINTF("handle dcr write\n");
1745             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1746         } else {
1747             DPRINTF("handle dcr read\n");
1748             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1749         }
1750         break;
1751     case KVM_EXIT_HLT:
1752         DPRINTF("handle halt\n");
1753         ret = kvmppc_handle_halt(cpu);
1754         break;
1755 #if defined(TARGET_PPC64)
1756     case KVM_EXIT_PAPR_HCALL:
1757         DPRINTF("handle PAPR hypercall\n");
1758         run->papr_hcall.ret = spapr_hypercall(cpu,
1759                                               run->papr_hcall.nr,
1760                                               run->papr_hcall.args);
1761         ret = 0;
1762         break;
1763 #endif
1764     case KVM_EXIT_EPR:
1765         DPRINTF("handle epr\n");
1766         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1767         ret = 0;
1768         break;
1769     case KVM_EXIT_WATCHDOG:
1770         DPRINTF("handle watchdog expiry\n");
1771         watchdog_perform_action();
1772         ret = 0;
1773         break;
1774
1775     case KVM_EXIT_DEBUG:
1776         DPRINTF("handle debug exception\n");
1777         if (kvm_handle_debug(cpu, run)) {
1778             ret = EXCP_DEBUG;
1779             break;
1780         }
1781         /* re-enter, this exception was guest-internal */
1782         ret = 0;
1783         break;
1784
1785     default:
1786         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1787         ret = -1;
1788         break;
1789     }
1790
1791     qemu_mutex_unlock_iothread();
1792     return ret;
1793 }
1794
1795 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1796 {
1797     CPUState *cs = CPU(cpu);
1798     uint32_t bits = tsr_bits;
1799     struct kvm_one_reg reg = {
1800         .id = KVM_REG_PPC_OR_TSR,
1801         .addr = (uintptr_t) &bits,
1802     };
1803
1804     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1805 }
1806
1807 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1808 {
1809
1810     CPUState *cs = CPU(cpu);
1811     uint32_t bits = tsr_bits;
1812     struct kvm_one_reg reg = {
1813         .id = KVM_REG_PPC_CLEAR_TSR,
1814         .addr = (uintptr_t) &bits,
1815     };
1816
1817     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1818 }
1819
1820 int kvmppc_set_tcr(PowerPCCPU *cpu)
1821 {
1822     CPUState *cs = CPU(cpu);
1823     CPUPPCState *env = &cpu->env;
1824     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1825
1826     struct kvm_one_reg reg = {
1827         .id = KVM_REG_PPC_TCR,
1828         .addr = (uintptr_t) &tcr,
1829     };
1830
1831     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1832 }
1833
1834 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1835 {
1836     CPUState *cs = CPU(cpu);
1837     int ret;
1838
1839     if (!kvm_enabled()) {
1840         return -1;
1841     }
1842
1843     if (!cap_ppc_watchdog) {
1844         printf("warning: KVM does not support watchdog");
1845         return -1;
1846     }
1847
1848     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1849     if (ret < 0) {
1850         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1851                 __func__, strerror(-ret));
1852         return ret;
1853     }
1854
1855     return ret;
1856 }
1857
1858 static int read_cpuinfo(const char *field, char *value, int len)
1859 {
1860     FILE *f;
1861     int ret = -1;
1862     int field_len = strlen(field);
1863     char line[512];
1864
1865     f = fopen("/proc/cpuinfo", "r");
1866     if (!f) {
1867         return -1;
1868     }
1869
1870     do {
1871         if (!fgets(line, sizeof(line), f)) {
1872             break;
1873         }
1874         if (!strncmp(line, field, field_len)) {
1875             pstrcpy(value, len, line);
1876             ret = 0;
1877             break;
1878         }
1879     } while(*line);
1880
1881     fclose(f);
1882
1883     return ret;
1884 }
1885
1886 uint32_t kvmppc_get_tbfreq(void)
1887 {
1888     char line[512];
1889     char *ns;
1890     uint32_t retval = NANOSECONDS_PER_SECOND;
1891
1892     if (read_cpuinfo("timebase", line, sizeof(line))) {
1893         return retval;
1894     }
1895
1896     if (!(ns = strchr(line, ':'))) {
1897         return retval;
1898     }
1899
1900     ns++;
1901
1902     return atoi(ns);
1903 }
1904
1905 bool kvmppc_get_host_serial(char **value)
1906 {
1907     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1908                                NULL);
1909 }
1910
1911 bool kvmppc_get_host_model(char **value)
1912 {
1913     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1914 }
1915
1916 /* Try to find a device tree node for a CPU with clock-frequency property */
1917 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1918 {
1919     struct dirent *dirp;
1920     DIR *dp;
1921
1922     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1923         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1924         return -1;
1925     }
1926
1927     buf[0] = '\0';
1928     while ((dirp = readdir(dp)) != NULL) {
1929         FILE *f;
1930         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1931                  dirp->d_name);
1932         f = fopen(buf, "r");
1933         if (f) {
1934             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1935             fclose(f);
1936             break;
1937         }
1938         buf[0] = '\0';
1939     }
1940     closedir(dp);
1941     if (buf[0] == '\0') {
1942         printf("Unknown host!\n");
1943         return -1;
1944     }
1945
1946     return 0;
1947 }
1948
1949 static uint64_t kvmppc_read_int_dt(const char *filename)
1950 {
1951     union {
1952         uint32_t v32;
1953         uint64_t v64;
1954     } u;
1955     FILE *f;
1956     int len;
1957
1958     f = fopen(filename, "rb");
1959     if (!f) {
1960         return -1;
1961     }
1962
1963     len = fread(&u, 1, sizeof(u), f);
1964     fclose(f);
1965     switch (len) {
1966     case 4:
1967         /* property is a 32-bit quantity */
1968         return be32_to_cpu(u.v32);
1969     case 8:
1970         return be64_to_cpu(u.v64);
1971     }
1972
1973     return 0;
1974 }
1975
1976 /* Read a CPU node property from the host device tree that's a single
1977  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1978  * (can't find or open the property, or doesn't understand the
1979  * format) */
1980 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1981 {
1982     char buf[PATH_MAX], *tmp;
1983     uint64_t val;
1984
1985     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1986         return -1;
1987     }
1988
1989     tmp = g_strdup_printf("%s/%s", buf, propname);
1990     val = kvmppc_read_int_dt(tmp);
1991     g_free(tmp);
1992
1993     return val;
1994 }
1995
1996 uint64_t kvmppc_get_clockfreq(void)
1997 {
1998     return kvmppc_read_int_cpu_dt("clock-frequency");
1999 }
2000
2001 uint32_t kvmppc_get_vmx(void)
2002 {
2003     return kvmppc_read_int_cpu_dt("ibm,vmx");
2004 }
2005
2006 uint32_t kvmppc_get_dfp(void)
2007 {
2008     return kvmppc_read_int_cpu_dt("ibm,dfp");
2009 }
2010
2011 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2012  {
2013      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2014      CPUState *cs = CPU(cpu);
2015
2016     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2017         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2018         return 0;
2019     }
2020
2021     return 1;
2022 }
2023
2024 int kvmppc_get_hasidle(CPUPPCState *env)
2025 {
2026     struct kvm_ppc_pvinfo pvinfo;
2027
2028     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2029         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2030         return 1;
2031     }
2032
2033     return 0;
2034 }
2035
2036 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2037 {
2038     uint32_t *hc = (uint32_t*)buf;
2039     struct kvm_ppc_pvinfo pvinfo;
2040
2041     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2042         memcpy(buf, pvinfo.hcall, buf_len);
2043         return 0;
2044     }
2045
2046     /*
2047      * Fallback to always fail hypercalls regardless of endianness:
2048      *
2049      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2050      *     li r3, -1
2051      *     b .+8       (becomes nop in wrong endian)
2052      *     bswap32(li r3, -1)
2053      */
2054
2055     hc[0] = cpu_to_be32(0x08000048);
2056     hc[1] = cpu_to_be32(0x3860ffff);
2057     hc[2] = cpu_to_be32(0x48000008);
2058     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2059
2060     return 1;
2061 }
2062
2063 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2064 {
2065     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2066 }
2067
2068 void kvmppc_enable_logical_ci_hcalls(void)
2069 {
2070     /*
2071      * FIXME: it would be nice if we could detect the cases where
2072      * we're using a device which requires the in kernel
2073      * implementation of these hcalls, but the kernel lacks them and
2074      * produce a warning.
2075      */
2076     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2077     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2078 }
2079
2080 void kvmppc_enable_set_mode_hcall(void)
2081 {
2082     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2083 }
2084
2085 void kvmppc_enable_clear_ref_mod_hcalls(void)
2086 {
2087     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2088     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2089 }
2090
2091 void kvmppc_set_papr(PowerPCCPU *cpu)
2092 {
2093     CPUState *cs = CPU(cpu);
2094     int ret;
2095
2096     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2097     if (ret) {
2098         error_report("This vCPU type or KVM version does not support PAPR");
2099         exit(1);
2100     }
2101
2102     /* Update the capability flag so we sync the right information
2103      * with kvm */
2104     cap_papr = 1;
2105 }
2106
2107 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2108 {
2109     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2110 }
2111
2112 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2113 {
2114     CPUState *cs = CPU(cpu);
2115     int ret;
2116
2117     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2118     if (ret && mpic_proxy) {
2119         error_report("This KVM version does not support EPR");
2120         exit(1);
2121     }
2122 }
2123
2124 int kvmppc_smt_threads(void)
2125 {
2126     return cap_ppc_smt ? cap_ppc_smt : 1;
2127 }
2128
2129 #ifdef TARGET_PPC64
2130 off_t kvmppc_alloc_rma(void **rma)
2131 {
2132     off_t size;
2133     int fd;
2134     struct kvm_allocate_rma ret;
2135
2136     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2137      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2138      *                      not necessary on this hardware
2139      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2140      *
2141      * FIXME: We should allow the user to force contiguous RMA
2142      * allocation in the cap_ppc_rma==1 case.
2143      */
2144     if (cap_ppc_rma < 2) {
2145         return 0;
2146     }
2147
2148     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2149     if (fd < 0) {
2150         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2151                 strerror(errno));
2152         return -1;
2153     }
2154
2155     size = MIN(ret.rma_size, 256ul << 20);
2156
2157     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2158     if (*rma == MAP_FAILED) {
2159         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2160         return -1;
2161     };
2162
2163     return size;
2164 }
2165
2166 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2167 {
2168     struct kvm_ppc_smmu_info info;
2169     long rampagesize, best_page_shift;
2170     int i;
2171
2172     if (cap_ppc_rma >= 2) {
2173         return current_size;
2174     }
2175
2176     /* Find the largest hardware supported page size that's less than
2177      * or equal to the (logical) backing page size of guest RAM */
2178     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2179     rampagesize = qemu_getrampagesize();
2180     best_page_shift = 0;
2181
2182     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2183         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2184
2185         if (!sps->page_shift) {
2186             continue;
2187         }
2188
2189         if ((sps->page_shift > best_page_shift)
2190             && ((1UL << sps->page_shift) <= rampagesize)) {
2191             best_page_shift = sps->page_shift;
2192         }
2193     }
2194
2195     return MIN(current_size,
2196                1ULL << (best_page_shift + hash_shift - 7));
2197 }
2198 #endif
2199
2200 bool kvmppc_spapr_use_multitce(void)
2201 {
2202     return cap_spapr_multitce;
2203 }
2204
2205 int kvmppc_spapr_enable_inkernel_multitce(void)
2206 {
2207     int ret;
2208
2209     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2210                             H_PUT_TCE_INDIRECT, 1);
2211     if (!ret) {
2212         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2213                                 H_STUFF_TCE, 1);
2214     }
2215
2216     return ret;
2217 }
2218
2219 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2220                               uint64_t bus_offset, uint32_t nb_table,
2221                               int *pfd, bool need_vfio)
2222 {
2223     long len;
2224     int fd;
2225     void *table;
2226
2227     /* Must set fd to -1 so we don't try to munmap when called for
2228      * destroying the table, which the upper layers -will- do
2229      */
2230     *pfd = -1;
2231     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2232         return NULL;
2233     }
2234
2235     if (cap_spapr_tce_64) {
2236         struct kvm_create_spapr_tce_64 args = {
2237             .liobn = liobn,
2238             .page_shift = page_shift,
2239             .offset = bus_offset >> page_shift,
2240             .size = nb_table,
2241             .flags = 0
2242         };
2243         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2244         if (fd < 0) {
2245             fprintf(stderr,
2246                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2247                     liobn);
2248             return NULL;
2249         }
2250     } else if (cap_spapr_tce) {
2251         uint64_t window_size = (uint64_t) nb_table << page_shift;
2252         struct kvm_create_spapr_tce args = {
2253             .liobn = liobn,
2254             .window_size = window_size,
2255         };
2256         if ((window_size != args.window_size) || bus_offset) {
2257             return NULL;
2258         }
2259         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2260         if (fd < 0) {
2261             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2262                     liobn);
2263             return NULL;
2264         }
2265     } else {
2266         return NULL;
2267     }
2268
2269     len = nb_table * sizeof(uint64_t);
2270     /* FIXME: round this up to page size */
2271
2272     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2273     if (table == MAP_FAILED) {
2274         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2275                 liobn);
2276         close(fd);
2277         return NULL;
2278     }
2279
2280     *pfd = fd;
2281     return table;
2282 }
2283
2284 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2285 {
2286     long len;
2287
2288     if (fd < 0) {
2289         return -1;
2290     }
2291
2292     len = nb_table * sizeof(uint64_t);
2293     if ((munmap(table, len) < 0) ||
2294         (close(fd) < 0)) {
2295         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2296                 strerror(errno));
2297         /* Leak the table */
2298     }
2299
2300     return 0;
2301 }
2302
2303 int kvmppc_reset_htab(int shift_hint)
2304 {
2305     uint32_t shift = shift_hint;
2306
2307     if (!kvm_enabled()) {
2308         /* Full emulation, tell caller to allocate htab itself */
2309         return 0;
2310     }
2311     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2312         int ret;
2313         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2314         if (ret == -ENOTTY) {
2315             /* At least some versions of PR KVM advertise the
2316              * capability, but don't implement the ioctl().  Oops.
2317              * Return 0 so that we allocate the htab in qemu, as is
2318              * correct for PR. */
2319             return 0;
2320         } else if (ret < 0) {
2321             return ret;
2322         }
2323         return shift;
2324     }
2325
2326     /* We have a kernel that predates the htab reset calls.  For PR
2327      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2328      * this era, it has allocated a 16MB fixed size hash table already. */
2329     if (kvmppc_is_pr(kvm_state)) {
2330         /* PR - tell caller to allocate htab */
2331         return 0;
2332     } else {
2333         /* HV - assume 16MB kernel allocated htab */
2334         return 24;
2335     }
2336 }
2337
2338 static inline uint32_t mfpvr(void)
2339 {
2340     uint32_t pvr;
2341
2342     asm ("mfpvr %0"
2343          : "=r"(pvr));
2344     return pvr;
2345 }
2346
2347 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2348 {
2349     if (on) {
2350         *word |= flags;
2351     } else {
2352         *word &= ~flags;
2353     }
2354 }
2355
2356 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2357 {
2358     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2359     uint32_t vmx = kvmppc_get_vmx();
2360     uint32_t dfp = kvmppc_get_dfp();
2361     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2362     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2363
2364     /* Now fix up the class with information we can query from the host */
2365     pcc->pvr = mfpvr();
2366
2367     if (vmx != -1) {
2368         /* Only override when we know what the host supports */
2369         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2370         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2371     }
2372     if (dfp != -1) {
2373         /* Only override when we know what the host supports */
2374         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2375     }
2376
2377     if (dcache_size != -1) {
2378         pcc->l1_dcache_size = dcache_size;
2379     }
2380
2381     if (icache_size != -1) {
2382         pcc->l1_icache_size = icache_size;
2383     }
2384
2385 #if defined(TARGET_PPC64)
2386     pcc->radix_page_info = kvm_get_radix_page_info();
2387
2388     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2389         /*
2390          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2391          * compliant.  More importantly, advertising ISA 3.00
2392          * architected mode may prevent guests from activating
2393          * necessary DD1 workarounds.
2394          */
2395         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2396                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2397     }
2398 #endif /* defined(TARGET_PPC64) */
2399 }
2400
2401 bool kvmppc_has_cap_epr(void)
2402 {
2403     return cap_epr;
2404 }
2405
2406 bool kvmppc_has_cap_htab_fd(void)
2407 {
2408     return cap_htab_fd;
2409 }
2410
2411 bool kvmppc_has_cap_fixup_hcalls(void)
2412 {
2413     return cap_fixup_hcalls;
2414 }
2415
2416 bool kvmppc_has_cap_htm(void)
2417 {
2418     return cap_htm;
2419 }
2420
2421 bool kvmppc_has_cap_mmu_radix(void)
2422 {
2423     return cap_mmu_radix;
2424 }
2425
2426 bool kvmppc_has_cap_mmu_hash_v3(void)
2427 {
2428     return cap_mmu_hash_v3;
2429 }
2430
2431 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2432 {
2433     uint32_t host_pvr = mfpvr();
2434     PowerPCCPUClass *pvr_pcc;
2435
2436     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2437     if (pvr_pcc == NULL) {
2438         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2439     }
2440
2441     return pvr_pcc;
2442 }
2443
2444 static int kvm_ppc_register_host_cpu_type(void)
2445 {
2446     TypeInfo type_info = {
2447         .name = TYPE_HOST_POWERPC_CPU,
2448         .class_init = kvmppc_host_cpu_class_init,
2449     };
2450     PowerPCCPUClass *pvr_pcc;
2451     ObjectClass *oc;
2452     DeviceClass *dc;
2453     int i;
2454
2455     pvr_pcc = kvm_ppc_get_host_cpu_class();
2456     if (pvr_pcc == NULL) {
2457         return -1;
2458     }
2459     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2460     type_register(&type_info);
2461
2462     oc = object_class_by_name(type_info.name);
2463     g_assert(oc);
2464
2465 #if defined(TARGET_PPC64)
2466     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2467     type_info.parent = TYPE_SPAPR_CPU_CORE,
2468     type_info.instance_size = sizeof(sPAPRCPUCore);
2469     type_info.instance_init = NULL;
2470     type_info.class_init = spapr_cpu_core_class_init;
2471     type_info.class_data = (void *) "host";
2472     type_register(&type_info);
2473     g_free((void *)type_info.name);
2474 #endif
2475
2476     /*
2477      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2478      * we want "POWER8" to be a "family" alias that points to the current
2479      * host CPU type, too)
2480      */
2481     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2482     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2483         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2484             char *suffix;
2485
2486             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2487             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2488             if (suffix) {
2489                 *suffix = 0;
2490             }
2491             ppc_cpu_aliases[i].oc = oc;
2492             break;
2493         }
2494     }
2495
2496     return 0;
2497 }
2498
2499 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2500 {
2501     struct kvm_rtas_token_args args = {
2502         .token = token,
2503     };
2504
2505     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2506         return -ENOENT;
2507     }
2508
2509     strncpy(args.name, function, sizeof(args.name));
2510
2511     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2512 }
2513
2514 int kvmppc_get_htab_fd(bool write)
2515 {
2516     struct kvm_get_htab_fd s = {
2517         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2518         .start_index = 0,
2519     };
2520
2521     if (!cap_htab_fd) {
2522         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2523         return -1;
2524     }
2525
2526     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2527 }
2528
2529 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2530 {
2531     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2532     uint8_t buf[bufsize];
2533     ssize_t rc;
2534
2535     do {
2536         rc = read(fd, buf, bufsize);
2537         if (rc < 0) {
2538             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2539                     strerror(errno));
2540             return rc;
2541         } else if (rc) {
2542             uint8_t *buffer = buf;
2543             ssize_t n = rc;
2544             while (n) {
2545                 struct kvm_get_htab_header *head =
2546                     (struct kvm_get_htab_header *) buffer;
2547                 size_t chunksize = sizeof(*head) +
2548                      HASH_PTE_SIZE_64 * head->n_valid;
2549
2550                 qemu_put_be32(f, head->index);
2551                 qemu_put_be16(f, head->n_valid);
2552                 qemu_put_be16(f, head->n_invalid);
2553                 qemu_put_buffer(f, (void *)(head + 1),
2554                                 HASH_PTE_SIZE_64 * head->n_valid);
2555
2556                 buffer += chunksize;
2557                 n -= chunksize;
2558             }
2559         }
2560     } while ((rc != 0)
2561              && ((max_ns < 0)
2562                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2563
2564     return (rc == 0) ? 1 : 0;
2565 }
2566
2567 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2568                            uint16_t n_valid, uint16_t n_invalid)
2569 {
2570     struct kvm_get_htab_header *buf;
2571     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2572     ssize_t rc;
2573
2574     buf = alloca(chunksize);
2575     buf->index = index;
2576     buf->n_valid = n_valid;
2577     buf->n_invalid = n_invalid;
2578
2579     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2580
2581     rc = write(fd, buf, chunksize);
2582     if (rc < 0) {
2583         fprintf(stderr, "Error writing KVM hash table: %s\n",
2584                 strerror(errno));
2585         return rc;
2586     }
2587     if (rc != chunksize) {
2588         /* We should never get a short write on a single chunk */
2589         fprintf(stderr, "Short write, restoring KVM hash table\n");
2590         return -1;
2591     }
2592     return 0;
2593 }
2594
2595 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2596 {
2597     return true;
2598 }
2599
2600 void kvm_arch_init_irq_routing(KVMState *s)
2601 {
2602 }
2603
2604 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2605 {
2606     struct kvm_get_htab_fd ghf = {
2607         .flags = 0,
2608         .start_index = ptex,
2609     };
2610     int fd, rc;
2611     int i;
2612
2613     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2614     if (fd < 0) {
2615         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2616     }
2617
2618     i = 0;
2619     while (i < n) {
2620         struct kvm_get_htab_header *hdr;
2621         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2622         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2623
2624         rc = read(fd, buf, sizeof(buf));
2625         if (rc < 0) {
2626             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2627         }
2628
2629         hdr = (struct kvm_get_htab_header *)buf;
2630         while ((i < n) && ((char *)hdr < (buf + rc))) {
2631             int invalid = hdr->n_invalid;
2632
2633             if (hdr->index != (ptex + i)) {
2634                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2635                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2636             }
2637
2638             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2639             i += hdr->n_valid;
2640
2641             if ((n - i) < invalid) {
2642                 invalid = n - i;
2643             }
2644             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2645             i += hdr->n_invalid;
2646
2647             hdr = (struct kvm_get_htab_header *)
2648                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2649         }
2650     }
2651
2652     close(fd);
2653 }
2654
2655 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2656 {
2657     int fd, rc;
2658     struct kvm_get_htab_fd ghf;
2659     struct {
2660         struct kvm_get_htab_header hdr;
2661         uint64_t pte0;
2662         uint64_t pte1;
2663     } buf;
2664
2665     ghf.flags = 0;
2666     ghf.start_index = 0;     /* Ignored */
2667     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2668     if (fd < 0) {
2669         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2670     }
2671
2672     buf.hdr.n_valid = 1;
2673     buf.hdr.n_invalid = 0;
2674     buf.hdr.index = ptex;
2675     buf.pte0 = cpu_to_be64(pte0);
2676     buf.pte1 = cpu_to_be64(pte1);
2677
2678     rc = write(fd, &buf, sizeof(buf));
2679     if (rc != sizeof(buf)) {
2680         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2681     }
2682     close(fd);
2683 }
2684
2685 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2686                              uint64_t address, uint32_t data, PCIDevice *dev)
2687 {
2688     return 0;
2689 }
2690
2691 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2692                                 int vector, PCIDevice *dev)
2693 {
2694     return 0;
2695 }
2696
2697 int kvm_arch_release_virq_post(int virq)
2698 {
2699     return 0;
2700 }
2701
2702 int kvm_arch_msi_data_to_gsi(uint32_t data)
2703 {
2704     return data & 0xffff;
2705 }
2706
2707 int kvmppc_enable_hwrng(void)
2708 {
2709     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2710         return -1;
2711     }
2712
2713     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2714 }
2715
2716 void kvmppc_check_papr_resize_hpt(Error **errp)
2717 {
2718     if (!kvm_enabled()) {
2719         return; /* No KVM, we're good */
2720     }
2721
2722     if (cap_resize_hpt) {
2723         return; /* Kernel has explicit support, we're good */
2724     }
2725
2726     /* Otherwise fallback on looking for PR KVM */
2727     if (kvmppc_is_pr(kvm_state)) {
2728         return;
2729     }
2730
2731     error_setg(errp,
2732                "Hash page table resizing not available with this KVM version");
2733 }
2734
2735 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2736 {
2737     CPUState *cs = CPU(cpu);
2738     struct kvm_ppc_resize_hpt rhpt = {
2739         .flags = flags,
2740         .shift = shift,
2741     };
2742
2743     if (!cap_resize_hpt) {
2744         return -ENOSYS;
2745     }
2746
2747     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2748 }
2749
2750 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2751 {
2752     CPUState *cs = CPU(cpu);
2753     struct kvm_ppc_resize_hpt rhpt = {
2754         .flags = flags,
2755         .shift = shift,
2756     };
2757
2758     if (!cap_resize_hpt) {
2759         return -ENOSYS;
2760     }
2761
2762     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2763 }
2764
2765 static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg)
2766 {
2767     target_ulong sdr1 = arg.target_ptr;
2768     PowerPCCPU *cpu = POWERPC_CPU(cs);
2769     CPUPPCState *env = &cpu->env;
2770
2771     /* This is just for the benefit of PR KVM */
2772     cpu_synchronize_state(cs);
2773     env->spr[SPR_SDR1] = sdr1;
2774     if (kvmppc_put_books_sregs(cpu) < 0) {
2775         error_report("Unable to update SDR1 in KVM");
2776         exit(1);
2777     }
2778 }
2779
2780 void kvmppc_update_sdr1(target_ulong sdr1)
2781 {
2782     CPUState *cs;
2783
2784     CPU_FOREACH(cs) {
2785         run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1));
2786     }
2787 }