target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qapi/error.h"
  26 #include "qemu/error-report.h"
  27 #include "cpu.h"
  28 #include "cpu-models.h"
  29 #include "qemu/timer.h"
  30 #include "sysemu/sysemu.h"
  31 #include "sysemu/hw_accel.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_vio.h"
  40 #include "hw/ppc/spapr_cpu_core.h"
  41 #include "hw/ppc/ppc.h"
  42 #include "sysemu/watchdog.h"
  43 #include "trace.h"
  44 #include "exec/gdbstub.h"
  45 #include "exec/memattrs.h"
  46 #include "exec/ram_addr.h"
  47 #include "sysemu/hostmem.h"
  48 #include "qemu/cutils.h"
  49 #include "qemu/mmap-alloc.h"
  50 #include "elf.h"
  51 #include "sysemu/kvm_int.h"
  52
  53 //#define DEBUG_KVM
  54
  55 #ifdef DEBUG_KVM
  56 #define DPRINTF(fmt, ...) \
  57     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  58 #else
  59 #define DPRINTF(fmt, ...) \
  60     do { } while (0)
  61 #endif
  62
  63 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  64
  65 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  66     KVM_CAP_LAST_INFO
  67 };
  68
  69 static int cap_interrupt_unset = false;
  70 static int cap_interrupt_level = false;
  71 static int cap_segstate;
  72 static int cap_booke_sregs;
  73 static int cap_ppc_smt;
  74 static int cap_ppc_smt_possible;
  75 static int cap_spapr_tce;
  76 static int cap_spapr_tce_64;
  77 static int cap_spapr_multitce;
  78 static int cap_spapr_vfio;
  79 static int cap_hior;
  80 static int cap_one_reg;
  81 static int cap_epr;
  82 static int cap_ppc_watchdog;
  83 static int cap_papr;
  84 static int cap_htab_fd;
  85 static int cap_fixup_hcalls;
  86 static int cap_htm;             /* Hardware transactional memory support */
  87 static int cap_mmu_radix;
  88 static int cap_mmu_hash_v3;
  89 static int cap_resize_hpt;
  90 static int cap_ppc_pvr_compat;
  91 static int cap_ppc_safe_cache;
  92 static int cap_ppc_safe_bounds_check;
  93 static int cap_ppc_safe_indirect_branch;
  94
  95 static uint32_t debug_inst_opcode;
  96
  97 /* XXX We have a race condition where we actually have a level triggered
  98  *     interrupt, but the infrastructure can't expose that yet, so the guest
  99  *     takes but ignores it, goes to sleep and never gets notified that there's
 100  *     still an interrupt pending.
 101  *
 102  *     As a quick workaround, let's just wake up again 20 ms after we injected
 103  *     an interrupt. That way we can assure that we're always reinjecting
 104  *     interrupts in case the guest swallowed them.
 105  */
 106 static QEMUTimer *idle_timer;
 107
 108 static void kvm_kick_cpu(void *opaque)
 109 {
 110     PowerPCCPU *cpu = opaque;
 111
 112     qemu_cpu_kick(CPU(cpu));
 113 }
 114
 115 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 116  * should only be used for fallback tests - generally we should use
 117  * explicit capabilities for the features we want, rather than
 118  * assuming what is/isn't available depending on the KVM variant. */
 119 static bool kvmppc_is_pr(KVMState *ks)
 120 {
 121     /* Assume KVM-PR if the GET_PVINFO capability is available */
 122     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 123 }
 124
 125 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 126 static void kvmppc_get_cpu_characteristics(KVMState *s);
 127
 128 int kvm_arch_init(MachineState *ms, KVMState *s)
 129 {
 130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 135     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 136     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 137     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 138     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 139     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 140     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 141     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 142     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 143     /* Note: we don't set cap_papr here, because this capability is
 144      * only activated after this by kvmppc_set_papr() */
 145     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 146     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 147     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 148     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 149     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 150     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 152     kvmppc_get_cpu_characteristics(s);
 153     /*
 154      * Note: setting it to false because there is not such capability
 155      * in KVM at this moment.
 156      *
 157      * TODO: call kvm_vm_check_extension() with the right capability
 158      * after the kernel starts implementing it.*/
 159     cap_ppc_pvr_compat = false;
 160
 161     if (!cap_interrupt_level) {
 162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 163                         "VM to stall at times!\n");
 164     }
 165
 166     kvm_ppc_register_host_cpu_type(ms);
 167
 168     return 0;
 169 }
 170
 171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 172 {
 173     return 0;
 174 }
 175
 176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 177 {
 178     CPUPPCState *cenv = &cpu->env;
 179     CPUState *cs = CPU(cpu);
 180     struct kvm_sregs sregs;
 181     int ret;
 182
 183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 184         /* What we're really trying to say is "if we're on BookE, we use
 185            the native PVR for now". This is the only sane way to check
 186            it though, so we potentially confuse users that they can run
 187            BookE guests on BookS. Let's hope nobody dares enough :) */
 188         return 0;
 189     } else {
 190         if (!cap_segstate) {
 191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 192             return -ENOSYS;
 193         }
 194     }
 195
 196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 197     if (ret) {
 198         return ret;
 199     }
 200
 201     sregs.pvr = cenv->spr[SPR_PVR];
 202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 203 }
 204
 205 /* Set up a shared TLB array with KVM */
 206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210     struct kvm_book3e_206_tlb_params params = {};
 211     struct kvm_config_tlb cfg = {};
 212     unsigned int entries = 0;
 213     int ret, i;
 214
 215     if (!kvm_enabled() ||
 216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 217         return 0;
 218     }
 219
 220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 221
 222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 225         entries += params.tlb_sizes[i];
 226     }
 227
 228     assert(entries == env->nb_tlb);
 229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 230
 231     env->tlb_dirty = true;
 232
 233     cfg.array = (uintptr_t)env->tlb.tlbm;
 234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 235     cfg.params = (uintptr_t)&params;
 236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 237
 238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 239     if (ret < 0) {
 240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 241                 __func__, strerror(-ret));
 242         return ret;
 243     }
 244
 245     env->kvm_sw_tlb = true;
 246     return 0;
 247 }
 248
 249
 250 #if defined(TARGET_PPC64)
 251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 252                                        struct kvm_ppc_smmu_info *info)
 253 {
 254     CPUPPCState *env = &cpu->env;
 255     CPUState *cs = CPU(cpu);
 256
 257     memset(info, 0, sizeof(*info));
 258
 259     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 260      * need to "guess" what the supported page sizes are.
 261      *
 262      * For that to work we make a few assumptions:
 263      *
 264      * - Check whether we are running "PR" KVM which only supports 4K
 265      *   and 16M pages, but supports them regardless of the backing
 266      *   store characteritics. We also don't support 1T segments.
 267      *
 268      *   This is safe as if HV KVM ever supports that capability or PR
 269      *   KVM grows supports for more page/segment sizes, those versions
 270      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 271      *   will not hit this fallback
 272      *
 273      * - Else we are running HV KVM. This means we only support page
 274      *   sizes that fit in the backing store. Additionally we only
 275      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 276      *   P7 encodings for the SLB and hash table. Here too, we assume
 277      *   support for any newer processor will mean a kernel that
 278      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 279      *   this fallback.
 280      */
 281     if (kvmppc_is_pr(cs->kvm_state)) {
 282         /* No flags */
 283         info->flags = 0;
 284         info->slb_size = 64;
 285
 286         /* Standard 4k base page size segment */
 287         info->sps[0].page_shift = 12;
 288         info->sps[0].slb_enc = 0;
 289         info->sps[0].enc[0].page_shift = 12;
 290         info->sps[0].enc[0].pte_enc = 0;
 291
 292         /* Standard 16M large page size segment */
 293         info->sps[1].page_shift = 24;
 294         info->sps[1].slb_enc = SLB_VSID_L;
 295         info->sps[1].enc[0].page_shift = 24;
 296         info->sps[1].enc[0].pte_enc = 0;
 297     } else {
 298         int i = 0;
 299
 300         /* HV KVM has backing store size restrictions */
 301         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 302
 303         if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)) {
 304             info->flags |= KVM_PPC_1T_SEGMENTS;
 305         }
 306
 307         if (env->mmu_model == POWERPC_MMU_2_06 ||
 308             env->mmu_model == POWERPC_MMU_2_07) {
 309             info->slb_size = 32;
 310         } else {
 311             info->slb_size = 64;
 312         }
 313
 314         /* Standard 4k base page size segment */
 315         info->sps[i].page_shift = 12;
 316         info->sps[i].slb_enc = 0;
 317         info->sps[i].enc[0].page_shift = 12;
 318         info->sps[i].enc[0].pte_enc = 0;
 319         i++;
 320
 321         /* 64K on MMU 2.06 and later */
 322         if (env->mmu_model == POWERPC_MMU_2_06 ||
 323             env->mmu_model == POWERPC_MMU_2_07) {
 324             info->sps[i].page_shift = 16;
 325             info->sps[i].slb_enc = 0x110;
 326             info->sps[i].enc[0].page_shift = 16;
 327             info->sps[i].enc[0].pte_enc = 1;
 328             i++;
 329         }
 330
 331         /* Standard 16M large page size segment */
 332         info->sps[i].page_shift = 24;
 333         info->sps[i].slb_enc = SLB_VSID_L;
 334         info->sps[i].enc[0].page_shift = 24;
 335         info->sps[i].enc[0].pte_enc = 0;
 336     }
 337 }
 338
 339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 340 {
 341     CPUState *cs = CPU(cpu);
 342     int ret;
 343
 344     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 345         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 346         if (ret == 0) {
 347             return;
 348         }
 349     }
 350
 351     kvm_get_fallback_smmu_info(cpu, info);
 352 }
 353
 354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 355 {
 356     KVMState *s = KVM_STATE(current_machine->accelerator);
 357     struct ppc_radix_page_info *radix_page_info;
 358     struct kvm_ppc_rmmu_info rmmu_info;
 359     int i;
 360
 361     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 362         return NULL;
 363     }
 364     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 365         return NULL;
 366     }
 367     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 368     radix_page_info->count = 0;
 369     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 370         if (rmmu_info.ap_encodings[i]) {
 371             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 372             radix_page_info->count++;
 373         }
 374     }
 375     return radix_page_info;
 376 }
 377
 378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 379                                      bool radix, bool gtse,
 380                                      uint64_t proc_tbl)
 381 {
 382     CPUState *cs = CPU(cpu);
 383     int ret;
 384     uint64_t flags = 0;
 385     struct kvm_ppc_mmuv3_cfg cfg = {
 386         .process_table = proc_tbl,
 387     };
 388
 389     if (radix) {
 390         flags |= KVM_PPC_MMUV3_RADIX;
 391     }
 392     if (gtse) {
 393         flags |= KVM_PPC_MMUV3_GTSE;
 394     }
 395     cfg.flags = flags;
 396     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 397     switch (ret) {
 398     case 0:
 399         return H_SUCCESS;
 400     case -EINVAL:
 401         return H_PARAMETER;
 402     case -ENODEV:
 403         return H_NOT_AVAILABLE;
 404     default:
 405         return H_HARDWARE;
 406     }
 407 }
 408
 409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 410 {
 411     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 412         return true;
 413     }
 414
 415     return (1ul << shift) <= rampgsize;
 416 }
 417
 418 static long max_cpu_page_size;
 419
 420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 421 {
 422     static struct kvm_ppc_smmu_info smmu_info;
 423     static bool has_smmu_info;
 424     CPUPPCState *env = &cpu->env;
 425     int iq, ik, jq, jk;
 426
 427     /* We only handle page sizes for 64-bit server guests for now */
 428     if (!(env->mmu_model & POWERPC_MMU_64)) {
 429         return;
 430     }
 431
 432     /* Collect MMU info from kernel if not already */
 433     if (!has_smmu_info) {
 434         kvm_get_smmu_info(cpu, &smmu_info);
 435         has_smmu_info = true;
 436     }
 437
 438     if (!max_cpu_page_size) {
 439         max_cpu_page_size = qemu_getrampagesize();
 440     }
 441
 442     /* Convert to QEMU form */
 443     memset(cpu->hash64_opts->sps, 0, sizeof(*cpu->hash64_opts->sps));
 444
 445     /* If we have HV KVM, we need to forbid CI large pages if our
 446      * host page size is smaller than 64K.
 447      */
 448     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 449         if (getpagesize() >= 0x10000) {
 450             cpu->hash64_opts->flags |= PPC_HASH64_CI_LARGEPAGE;
 451         } else {
 452             cpu->hash64_opts->flags &= ~PPC_HASH64_CI_LARGEPAGE;
 453         }
 454     }
 455
 456     /*
 457      * XXX This loop should be an entry wide AND of the capabilities that
 458      *     the selected CPU has with the capabilities that KVM supports.
 459      */
 460     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 461         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 462         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 463
 464         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 465                                  ksps->page_shift)) {
 466             continue;
 467         }
 468         qsps->page_shift = ksps->page_shift;
 469         qsps->slb_enc = ksps->slb_enc;
 470         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 471             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 472                                      ksps->enc[jk].page_shift)) {
 473                 continue;
 474             }
 475             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 476             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 477             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 478                 break;
 479             }
 480         }
 481         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 482             break;
 483         }
 484     }
 485     cpu->hash64_opts->slb_size = smmu_info.slb_size;
 486     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 487         cpu->hash64_opts->flags &= ~PPC_HASH64_1TSEG;
 488     }
 489 }
 490
 491 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 492 {
 493     Object *mem_obj = object_resolve_path(obj_path, NULL);
 494     long pagesize = host_memory_backend_pagesize(MEMORY_BACKEND(mem_obj));
 495
 496     return pagesize >= max_cpu_page_size;
 497 }
 498
 499 #else /* defined (TARGET_PPC64) */
 500
 501 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 502 {
 503 }
 504
 505 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 506 {
 507     return true;
 508 }
 509
 510 #endif /* !defined (TARGET_PPC64) */
 511
 512 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 513 {
 514     return POWERPC_CPU(cpu)->vcpu_id;
 515 }
 516
 517 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 518  * book3s supports only 1 watchpoint, so array size
 519  * of 4 is sufficient for now.
 520  */
 521 #define MAX_HW_BKPTS 4
 522
 523 static struct HWBreakpoint {
 524     target_ulong addr;
 525     int type;
 526 } hw_debug_points[MAX_HW_BKPTS];
 527
 528 static CPUWatchpoint hw_watchpoint;
 529
 530 /* Default there is no breakpoint and watchpoint supported */
 531 static int max_hw_breakpoint;
 532 static int max_hw_watchpoint;
 533 static int nb_hw_breakpoint;
 534 static int nb_hw_watchpoint;
 535
 536 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 537 {
 538     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 539         max_hw_breakpoint = 2;
 540         max_hw_watchpoint = 2;
 541     }
 542
 543     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 544         fprintf(stderr, "Error initializing h/w breakpoints\n");
 545         return;
 546     }
 547 }
 548
 549 int kvm_arch_init_vcpu(CPUState *cs)
 550 {
 551     PowerPCCPU *cpu = POWERPC_CPU(cs);
 552     CPUPPCState *cenv = &cpu->env;
 553     int ret;
 554
 555     /* Gather server mmu info from KVM and update the CPU state */
 556     kvm_fixup_page_sizes(cpu);
 557
 558     /* Synchronize sregs with kvm */
 559     ret = kvm_arch_sync_sregs(cpu);
 560     if (ret) {
 561         if (ret == -EINVAL) {
 562             error_report("Register sync failed... If you're using kvm-hv.ko,"
 563                          " only \"-cpu host\" is possible");
 564         }
 565         return ret;
 566     }
 567
 568     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 569
 570     switch (cenv->mmu_model) {
 571     case POWERPC_MMU_BOOKE206:
 572         /* This target supports access to KVM's guest TLB */
 573         ret = kvm_booke206_tlb_init(cpu);
 574         break;
 575     case POWERPC_MMU_2_07:
 576         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 577             /* KVM-HV has transactional memory on POWER8 also without the
 578              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 579              * long as it's availble to userspace on the host. */
 580             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 581                 cap_htm = true;
 582             }
 583         }
 584         break;
 585     default:
 586         break;
 587     }
 588
 589     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 590     kvmppc_hw_debug_points_init(cenv);
 591
 592     return ret;
 593 }
 594
 595 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 596 {
 597     CPUPPCState *env = &cpu->env;
 598     CPUState *cs = CPU(cpu);
 599     struct kvm_dirty_tlb dirty_tlb;
 600     unsigned char *bitmap;
 601     int ret;
 602
 603     if (!env->kvm_sw_tlb) {
 604         return;
 605     }
 606
 607     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 608     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 609
 610     dirty_tlb.bitmap = (uintptr_t)bitmap;
 611     dirty_tlb.num_dirty = env->nb_tlb;
 612
 613     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 614     if (ret) {
 615         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 616                 __func__, strerror(-ret));
 617     }
 618
 619     g_free(bitmap);
 620 }
 621
 622 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 623 {
 624     PowerPCCPU *cpu = POWERPC_CPU(cs);
 625     CPUPPCState *env = &cpu->env;
 626     union {
 627         uint32_t u32;
 628         uint64_t u64;
 629     } val;
 630     struct kvm_one_reg reg = {
 631         .id = id,
 632         .addr = (uintptr_t) &val,
 633     };
 634     int ret;
 635
 636     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 637     if (ret != 0) {
 638         trace_kvm_failed_spr_get(spr, strerror(errno));
 639     } else {
 640         switch (id & KVM_REG_SIZE_MASK) {
 641         case KVM_REG_SIZE_U32:
 642             env->spr[spr] = val.u32;
 643             break;
 644
 645         case KVM_REG_SIZE_U64:
 646             env->spr[spr] = val.u64;
 647             break;
 648
 649         default:
 650             /* Don't handle this size yet */
 651             abort();
 652         }
 653     }
 654 }
 655
 656 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 657 {
 658     PowerPCCPU *cpu = POWERPC_CPU(cs);
 659     CPUPPCState *env = &cpu->env;
 660     union {
 661         uint32_t u32;
 662         uint64_t u64;
 663     } val;
 664     struct kvm_one_reg reg = {
 665         .id = id,
 666         .addr = (uintptr_t) &val,
 667     };
 668     int ret;
 669
 670     switch (id & KVM_REG_SIZE_MASK) {
 671     case KVM_REG_SIZE_U32:
 672         val.u32 = env->spr[spr];
 673         break;
 674
 675     case KVM_REG_SIZE_U64:
 676         val.u64 = env->spr[spr];
 677         break;
 678
 679     default:
 680         /* Don't handle this size yet */
 681         abort();
 682     }
 683
 684     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 685     if (ret != 0) {
 686         trace_kvm_failed_spr_set(spr, strerror(errno));
 687     }
 688 }
 689
 690 static int kvm_put_fp(CPUState *cs)
 691 {
 692     PowerPCCPU *cpu = POWERPC_CPU(cs);
 693     CPUPPCState *env = &cpu->env;
 694     struct kvm_one_reg reg;
 695     int i;
 696     int ret;
 697
 698     if (env->insns_flags & PPC_FLOAT) {
 699         uint64_t fpscr = env->fpscr;
 700         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 701
 702         reg.id = KVM_REG_PPC_FPSCR;
 703         reg.addr = (uintptr_t)&fpscr;
 704         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 705         if (ret < 0) {
 706             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 707             return ret;
 708         }
 709
 710         for (i = 0; i < 32; i++) {
 711             uint64_t vsr[2];
 712
 713 #ifdef HOST_WORDS_BIGENDIAN
 714             vsr[0] = float64_val(env->fpr[i]);
 715             vsr[1] = env->vsr[i];
 716 #else
 717             vsr[0] = env->vsr[i];
 718             vsr[1] = float64_val(env->fpr[i]);
 719 #endif
 720             reg.addr = (uintptr_t) &vsr;
 721             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 722
 723             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 724             if (ret < 0) {
 725                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 726                         i, strerror(errno));
 727                 return ret;
 728             }
 729         }
 730     }
 731
 732     if (env->insns_flags & PPC_ALTIVEC) {
 733         reg.id = KVM_REG_PPC_VSCR;
 734         reg.addr = (uintptr_t)&env->vscr;
 735         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 736         if (ret < 0) {
 737             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 738             return ret;
 739         }
 740
 741         for (i = 0; i < 32; i++) {
 742             reg.id = KVM_REG_PPC_VR(i);
 743             reg.addr = (uintptr_t)&env->avr[i];
 744             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 745             if (ret < 0) {
 746                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 747                 return ret;
 748             }
 749         }
 750     }
 751
 752     return 0;
 753 }
 754
 755 static int kvm_get_fp(CPUState *cs)
 756 {
 757     PowerPCCPU *cpu = POWERPC_CPU(cs);
 758     CPUPPCState *env = &cpu->env;
 759     struct kvm_one_reg reg;
 760     int i;
 761     int ret;
 762
 763     if (env->insns_flags & PPC_FLOAT) {
 764         uint64_t fpscr;
 765         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 766
 767         reg.id = KVM_REG_PPC_FPSCR;
 768         reg.addr = (uintptr_t)&fpscr;
 769         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 770         if (ret < 0) {
 771             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 772             return ret;
 773         } else {
 774             env->fpscr = fpscr;
 775         }
 776
 777         for (i = 0; i < 32; i++) {
 778             uint64_t vsr[2];
 779
 780             reg.addr = (uintptr_t) &vsr;
 781             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 782
 783             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 784             if (ret < 0) {
 785                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 786                         vsx ? "VSR" : "FPR", i, strerror(errno));
 787                 return ret;
 788             } else {
 789 #ifdef HOST_WORDS_BIGENDIAN
 790                 env->fpr[i] = vsr[0];
 791                 if (vsx) {
 792                     env->vsr[i] = vsr[1];
 793                 }
 794 #else
 795                 env->fpr[i] = vsr[1];
 796                 if (vsx) {
 797                     env->vsr[i] = vsr[0];
 798                 }
 799 #endif
 800             }
 801         }
 802     }
 803
 804     if (env->insns_flags & PPC_ALTIVEC) {
 805         reg.id = KVM_REG_PPC_VSCR;
 806         reg.addr = (uintptr_t)&env->vscr;
 807         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 808         if (ret < 0) {
 809             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 810             return ret;
 811         }
 812
 813         for (i = 0; i < 32; i++) {
 814             reg.id = KVM_REG_PPC_VR(i);
 815             reg.addr = (uintptr_t)&env->avr[i];
 816             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 817             if (ret < 0) {
 818                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 819                         i, strerror(errno));
 820                 return ret;
 821             }
 822         }
 823     }
 824
 825     return 0;
 826 }
 827
 828 #if defined(TARGET_PPC64)
 829 static int kvm_get_vpa(CPUState *cs)
 830 {
 831     PowerPCCPU *cpu = POWERPC_CPU(cs);
 832     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 833     struct kvm_one_reg reg;
 834     int ret;
 835
 836     reg.id = KVM_REG_PPC_VPA_ADDR;
 837     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 838     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 839     if (ret < 0) {
 840         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 841         return ret;
 842     }
 843
 844     assert((uintptr_t)&spapr_cpu->slb_shadow_size
 845            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 846     reg.id = KVM_REG_PPC_VPA_SLB;
 847     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 848     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 849     if (ret < 0) {
 850         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 851                 strerror(errno));
 852         return ret;
 853     }
 854
 855     assert((uintptr_t)&spapr_cpu->dtl_size
 856            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 857     reg.id = KVM_REG_PPC_VPA_DTL;
 858     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 859     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 860     if (ret < 0) {
 861         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 862                 strerror(errno));
 863         return ret;
 864     }
 865
 866     return 0;
 867 }
 868
 869 static int kvm_put_vpa(CPUState *cs)
 870 {
 871     PowerPCCPU *cpu = POWERPC_CPU(cs);
 872     sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu);
 873     struct kvm_one_reg reg;
 874     int ret;
 875
 876     /* SLB shadow or DTL can't be registered unless a master VPA is
 877      * registered.  That means when restoring state, if a VPA *is*
 878      * registered, we need to set that up first.  If not, we need to
 879      * deregister the others before deregistering the master VPA */
 880     assert(spapr_cpu->vpa_addr
 881            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
 882
 883     if (spapr_cpu->vpa_addr) {
 884         reg.id = KVM_REG_PPC_VPA_ADDR;
 885         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 886         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 887         if (ret < 0) {
 888             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 889             return ret;
 890         }
 891     }
 892
 893     assert((uintptr_t)&spapr_cpu->slb_shadow_size
 894            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 895     reg.id = KVM_REG_PPC_VPA_SLB;
 896     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 897     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 898     if (ret < 0) {
 899         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 900         return ret;
 901     }
 902
 903     assert((uintptr_t)&spapr_cpu->dtl_size
 904            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 905     reg.id = KVM_REG_PPC_VPA_DTL;
 906     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 907     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 908     if (ret < 0) {
 909         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 910                 strerror(errno));
 911         return ret;
 912     }
 913
 914     if (!spapr_cpu->vpa_addr) {
 915         reg.id = KVM_REG_PPC_VPA_ADDR;
 916         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 917         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 918         if (ret < 0) {
 919             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 920             return ret;
 921         }
 922     }
 923
 924     return 0;
 925 }
 926 #endif /* TARGET_PPC64 */
 927
 928 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 929 {
 930     CPUPPCState *env = &cpu->env;
 931     struct kvm_sregs sregs;
 932     int i;
 933
 934     sregs.pvr = env->spr[SPR_PVR];
 935
 936     if (cpu->vhyp) {
 937         PPCVirtualHypervisorClass *vhc =
 938             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 939         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 940     } else {
 941         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 942     }
 943
 944     /* Sync SLB */
 945 #ifdef TARGET_PPC64
 946     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 947         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 948         if (env->slb[i].esid & SLB_ESID_V) {
 949             sregs.u.s.ppc64.slb[i].slbe |= i;
 950         }
 951         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 952     }
 953 #endif
 954
 955     /* Sync SRs */
 956     for (i = 0; i < 16; i++) {
 957         sregs.u.s.ppc32.sr[i] = env->sr[i];
 958     }
 959
 960     /* Sync BATs */
 961     for (i = 0; i < 8; i++) {
 962         /* Beware. We have to swap upper and lower bits here */
 963         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 964             | env->DBAT[1][i];
 965         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 966             | env->IBAT[1][i];
 967     }
 968
 969     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 970 }
 971
 972 int kvm_arch_put_registers(CPUState *cs, int level)
 973 {
 974     PowerPCCPU *cpu = POWERPC_CPU(cs);
 975     CPUPPCState *env = &cpu->env;
 976     struct kvm_regs regs;
 977     int ret;
 978     int i;
 979
 980     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 981     if (ret < 0) {
 982         return ret;
 983     }
 984
 985     regs.ctr = env->ctr;
 986     regs.lr  = env->lr;
 987     regs.xer = cpu_read_xer(env);
 988     regs.msr = env->msr;
 989     regs.pc = env->nip;
 990
 991     regs.srr0 = env->spr[SPR_SRR0];
 992     regs.srr1 = env->spr[SPR_SRR1];
 993
 994     regs.sprg0 = env->spr[SPR_SPRG0];
 995     regs.sprg1 = env->spr[SPR_SPRG1];
 996     regs.sprg2 = env->spr[SPR_SPRG2];
 997     regs.sprg3 = env->spr[SPR_SPRG3];
 998     regs.sprg4 = env->spr[SPR_SPRG4];
 999     regs.sprg5 = env->spr[SPR_SPRG5];
1000     regs.sprg6 = env->spr[SPR_SPRG6];
1001     regs.sprg7 = env->spr[SPR_SPRG7];
1002
1003     regs.pid = env->spr[SPR_BOOKE_PID];
1004
1005     for (i = 0;i < 32; i++)
1006         regs.gpr[i] = env->gpr[i];
1007
1008     regs.cr = 0;
1009     for (i = 0; i < 8; i++) {
1010         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1011     }
1012
1013     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1014     if (ret < 0)
1015         return ret;
1016
1017     kvm_put_fp(cs);
1018
1019     if (env->tlb_dirty) {
1020         kvm_sw_tlb_put(cpu);
1021         env->tlb_dirty = false;
1022     }
1023
1024     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1025         ret = kvmppc_put_books_sregs(cpu);
1026         if (ret < 0) {
1027             return ret;
1028         }
1029     }
1030
1031     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1032         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1033     }
1034
1035     if (cap_one_reg) {
1036         int i;
1037
1038         /* We deliberately ignore errors here, for kernels which have
1039          * the ONE_REG calls, but don't support the specific
1040          * registers, there's a reasonable chance things will still
1041          * work, at least until we try to migrate. */
1042         for (i = 0; i < 1024; i++) {
1043             uint64_t id = env->spr_cb[i].one_reg_id;
1044
1045             if (id != 0) {
1046                 kvm_put_one_spr(cs, id, i);
1047             }
1048         }
1049
1050 #ifdef TARGET_PPC64
1051         if (msr_ts) {
1052             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1053                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1054             }
1055             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1056                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1057             }
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1065             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1067             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1068         }
1069
1070         if (cap_papr) {
1071             if (kvm_put_vpa(cs) < 0) {
1072                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1073             }
1074         }
1075
1076         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1077 #endif /* TARGET_PPC64 */
1078     }
1079
1080     return ret;
1081 }
1082
1083 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1084 {
1085      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1086 }
1087
1088 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1089 {
1090     CPUPPCState *env = &cpu->env;
1091     struct kvm_sregs sregs;
1092     int ret;
1093
1094     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1095     if (ret < 0) {
1096         return ret;
1097     }
1098
1099     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1100         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1101         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1102         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1103         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1104         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1105         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1106         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1107         env->spr[SPR_DECR] = sregs.u.e.dec;
1108         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1109         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1110         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1111     }
1112
1113     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1114         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1115         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1116         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1117         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1118         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1119     }
1120
1121     if (sregs.u.e.features & KVM_SREGS_E_64) {
1122         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1123     }
1124
1125     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1126         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1127     }
1128
1129     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1130         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1131         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1132         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1133         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1134         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1135         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1136         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1137         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1138         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1139         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1140         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1141         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1142         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1143         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1144         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1145         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1146         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1147         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1148         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1149         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1150         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1151         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1152         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1153         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1154         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1155         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1156         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1157         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1158         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1159         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1160         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1161         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1162
1163         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1164             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1165             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1166             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1167             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1168             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1169             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1170         }
1171
1172         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1173             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1174             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1175         }
1176
1177         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1178             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1179             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1180             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1181             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1182         }
1183     }
1184
1185     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1186         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1187         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1188         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1189         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1190         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1191         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1192         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1193         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1194         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1195         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1196     }
1197
1198     if (sregs.u.e.features & KVM_SREGS_EXP) {
1199         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1200     }
1201
1202     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1203         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1204         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1205     }
1206
1207     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1208         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1209         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1210         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1211
1212         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1213             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1214             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1215         }
1216     }
1217
1218     return 0;
1219 }
1220
1221 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1222 {
1223     CPUPPCState *env = &cpu->env;
1224     struct kvm_sregs sregs;
1225     int ret;
1226     int i;
1227
1228     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1229     if (ret < 0) {
1230         return ret;
1231     }
1232
1233     if (!cpu->vhyp) {
1234         ppc_store_sdr1(env, sregs.u.s.sdr1);
1235     }
1236
1237     /* Sync SLB */
1238 #ifdef TARGET_PPC64
1239     /*
1240      * The packed SLB array we get from KVM_GET_SREGS only contains
1241      * information about valid entries. So we flush our internal copy
1242      * to get rid of stale ones, then put all valid SLB entries back
1243      * in.
1244      */
1245     memset(env->slb, 0, sizeof(env->slb));
1246     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1247         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1248         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1249         /*
1250          * Only restore valid entries
1251          */
1252         if (rb & SLB_ESID_V) {
1253             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1254         }
1255     }
1256 #endif
1257
1258     /* Sync SRs */
1259     for (i = 0; i < 16; i++) {
1260         env->sr[i] = sregs.u.s.ppc32.sr[i];
1261     }
1262
1263     /* Sync BATs */
1264     for (i = 0; i < 8; i++) {
1265         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1266         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1267         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1268         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1269     }
1270
1271     return 0;
1272 }
1273
1274 int kvm_arch_get_registers(CPUState *cs)
1275 {
1276     PowerPCCPU *cpu = POWERPC_CPU(cs);
1277     CPUPPCState *env = &cpu->env;
1278     struct kvm_regs regs;
1279     uint32_t cr;
1280     int i, ret;
1281
1282     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1283     if (ret < 0)
1284         return ret;
1285
1286     cr = regs.cr;
1287     for (i = 7; i >= 0; i--) {
1288         env->crf[i] = cr & 15;
1289         cr >>= 4;
1290     }
1291
1292     env->ctr = regs.ctr;
1293     env->lr = regs.lr;
1294     cpu_write_xer(env, regs.xer);
1295     env->msr = regs.msr;
1296     env->nip = regs.pc;
1297
1298     env->spr[SPR_SRR0] = regs.srr0;
1299     env->spr[SPR_SRR1] = regs.srr1;
1300
1301     env->spr[SPR_SPRG0] = regs.sprg0;
1302     env->spr[SPR_SPRG1] = regs.sprg1;
1303     env->spr[SPR_SPRG2] = regs.sprg2;
1304     env->spr[SPR_SPRG3] = regs.sprg3;
1305     env->spr[SPR_SPRG4] = regs.sprg4;
1306     env->spr[SPR_SPRG5] = regs.sprg5;
1307     env->spr[SPR_SPRG6] = regs.sprg6;
1308     env->spr[SPR_SPRG7] = regs.sprg7;
1309
1310     env->spr[SPR_BOOKE_PID] = regs.pid;
1311
1312     for (i = 0;i < 32; i++)
1313         env->gpr[i] = regs.gpr[i];
1314
1315     kvm_get_fp(cs);
1316
1317     if (cap_booke_sregs) {
1318         ret = kvmppc_get_booke_sregs(cpu);
1319         if (ret < 0) {
1320             return ret;
1321         }
1322     }
1323
1324     if (cap_segstate) {
1325         ret = kvmppc_get_books_sregs(cpu);
1326         if (ret < 0) {
1327             return ret;
1328         }
1329     }
1330
1331     if (cap_hior) {
1332         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1333     }
1334
1335     if (cap_one_reg) {
1336         int i;
1337
1338         /* We deliberately ignore errors here, for kernels which have
1339          * the ONE_REG calls, but don't support the specific
1340          * registers, there's a reasonable chance things will still
1341          * work, at least until we try to migrate. */
1342         for (i = 0; i < 1024; i++) {
1343             uint64_t id = env->spr_cb[i].one_reg_id;
1344
1345             if (id != 0) {
1346                 kvm_get_one_spr(cs, id, i);
1347             }
1348         }
1349
1350 #ifdef TARGET_PPC64
1351         if (msr_ts) {
1352             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1353                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1354             }
1355             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1356                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1357             }
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1365             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1367             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1368         }
1369
1370         if (cap_papr) {
1371             if (kvm_get_vpa(cs) < 0) {
1372                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1373             }
1374         }
1375
1376         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1377 #endif
1378     }
1379
1380     return 0;
1381 }
1382
1383 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1384 {
1385     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1386
1387     if (irq != PPC_INTERRUPT_EXT) {
1388         return 0;
1389     }
1390
1391     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1392         return 0;
1393     }
1394
1395     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1396
1397     return 0;
1398 }
1399
1400 #if defined(TARGET_PPCEMB)
1401 #define PPC_INPUT_INT PPC40x_INPUT_INT
1402 #elif defined(TARGET_PPC64)
1403 #define PPC_INPUT_INT PPC970_INPUT_INT
1404 #else
1405 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1406 #endif
1407
1408 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1409 {
1410     PowerPCCPU *cpu = POWERPC_CPU(cs);
1411     CPUPPCState *env = &cpu->env;
1412     int r;
1413     unsigned irq;
1414
1415     qemu_mutex_lock_iothread();
1416
1417     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1418      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1419     if (!cap_interrupt_level &&
1420         run->ready_for_interrupt_injection &&
1421         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1422         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1423     {
1424         /* For now KVM disregards the 'irq' argument. However, in the
1425          * future KVM could cache it in-kernel to avoid a heavyweight exit
1426          * when reading the UIC.
1427          */
1428         irq = KVM_INTERRUPT_SET;
1429
1430         DPRINTF("injected interrupt %d\n", irq);
1431         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1432         if (r < 0) {
1433             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1434         }
1435
1436         /* Always wake up soon in case the interrupt was level based */
1437         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1438                        (NANOSECONDS_PER_SECOND / 50));
1439     }
1440
1441     /* We don't know if there are more interrupts pending after this. However,
1442      * the guest will return to userspace in the course of handling this one
1443      * anyways, so we will get a chance to deliver the rest. */
1444
1445     qemu_mutex_unlock_iothread();
1446 }
1447
1448 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1449 {
1450     return MEMTXATTRS_UNSPECIFIED;
1451 }
1452
1453 int kvm_arch_process_async_events(CPUState *cs)
1454 {
1455     return cs->halted;
1456 }
1457
1458 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1459 {
1460     CPUState *cs = CPU(cpu);
1461     CPUPPCState *env = &cpu->env;
1462
1463     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1464         cs->halted = 1;
1465         cs->exception_index = EXCP_HLT;
1466     }
1467
1468     return 0;
1469 }
1470
1471 /* map dcr access to existing qemu dcr emulation */
1472 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1473 {
1474     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1475         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1476
1477     return 0;
1478 }
1479
1480 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1481 {
1482     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1483         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1484
1485     return 0;
1486 }
1487
1488 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1489 {
1490     /* Mixed endian case is not handled */
1491     uint32_t sc = debug_inst_opcode;
1492
1493     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1494                             sizeof(sc), 0) ||
1495         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1496         return -EINVAL;
1497     }
1498
1499     return 0;
1500 }
1501
1502 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1503 {
1504     uint32_t sc;
1505
1506     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1507         sc != debug_inst_opcode ||
1508         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1509                             sizeof(sc), 1)) {
1510         return -EINVAL;
1511     }
1512
1513     return 0;
1514 }
1515
1516 static int find_hw_breakpoint(target_ulong addr, int type)
1517 {
1518     int n;
1519
1520     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1521            <= ARRAY_SIZE(hw_debug_points));
1522
1523     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1524         if (hw_debug_points[n].addr == addr &&
1525              hw_debug_points[n].type == type) {
1526             return n;
1527         }
1528     }
1529
1530     return -1;
1531 }
1532
1533 static int find_hw_watchpoint(target_ulong addr, int *flag)
1534 {
1535     int n;
1536
1537     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1538     if (n >= 0) {
1539         *flag = BP_MEM_ACCESS;
1540         return n;
1541     }
1542
1543     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1544     if (n >= 0) {
1545         *flag = BP_MEM_WRITE;
1546         return n;
1547     }
1548
1549     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1550     if (n >= 0) {
1551         *flag = BP_MEM_READ;
1552         return n;
1553     }
1554
1555     return -1;
1556 }
1557
1558 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1559                                   target_ulong len, int type)
1560 {
1561     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1562         return -ENOBUFS;
1563     }
1564
1565     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1566     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1567
1568     switch (type) {
1569     case GDB_BREAKPOINT_HW:
1570         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1571             return -ENOBUFS;
1572         }
1573
1574         if (find_hw_breakpoint(addr, type) >= 0) {
1575             return -EEXIST;
1576         }
1577
1578         nb_hw_breakpoint++;
1579         break;
1580
1581     case GDB_WATCHPOINT_WRITE:
1582     case GDB_WATCHPOINT_READ:
1583     case GDB_WATCHPOINT_ACCESS:
1584         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1585             return -ENOBUFS;
1586         }
1587
1588         if (find_hw_breakpoint(addr, type) >= 0) {
1589             return -EEXIST;
1590         }
1591
1592         nb_hw_watchpoint++;
1593         break;
1594
1595     default:
1596         return -ENOSYS;
1597     }
1598
1599     return 0;
1600 }
1601
1602 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1603                                   target_ulong len, int type)
1604 {
1605     int n;
1606
1607     n = find_hw_breakpoint(addr, type);
1608     if (n < 0) {
1609         return -ENOENT;
1610     }
1611
1612     switch (type) {
1613     case GDB_BREAKPOINT_HW:
1614         nb_hw_breakpoint--;
1615         break;
1616
1617     case GDB_WATCHPOINT_WRITE:
1618     case GDB_WATCHPOINT_READ:
1619     case GDB_WATCHPOINT_ACCESS:
1620         nb_hw_watchpoint--;
1621         break;
1622
1623     default:
1624         return -ENOSYS;
1625     }
1626     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1627
1628     return 0;
1629 }
1630
1631 void kvm_arch_remove_all_hw_breakpoints(void)
1632 {
1633     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1634 }
1635
1636 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1637 {
1638     int n;
1639
1640     /* Software Breakpoint updates */
1641     if (kvm_sw_breakpoints_active(cs)) {
1642         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1643     }
1644
1645     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1646            <= ARRAY_SIZE(hw_debug_points));
1647     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1648
1649     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1650         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1651         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1652         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1653             switch (hw_debug_points[n].type) {
1654             case GDB_BREAKPOINT_HW:
1655                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1656                 break;
1657             case GDB_WATCHPOINT_WRITE:
1658                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1659                 break;
1660             case GDB_WATCHPOINT_READ:
1661                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1662                 break;
1663             case GDB_WATCHPOINT_ACCESS:
1664                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1665                                         KVMPPC_DEBUG_WATCH_READ;
1666                 break;
1667             default:
1668                 cpu_abort(cs, "Unsupported breakpoint type\n");
1669             }
1670             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1671         }
1672     }
1673 }
1674
1675 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1676 {
1677     CPUState *cs = CPU(cpu);
1678     CPUPPCState *env = &cpu->env;
1679     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1680     int handle = 0;
1681     int n;
1682     int flag = 0;
1683
1684     if (cs->singlestep_enabled) {
1685         handle = 1;
1686     } else if (arch_info->status) {
1687         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1688             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1689                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1690                 if (n >= 0) {
1691                     handle = 1;
1692                 }
1693             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1694                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1695                 n = find_hw_watchpoint(arch_info->address,  &flag);
1696                 if (n >= 0) {
1697                     handle = 1;
1698                     cs->watchpoint_hit = &hw_watchpoint;
1699                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1700                     hw_watchpoint.flags = flag;
1701                 }
1702             }
1703         }
1704     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1705         handle = 1;
1706     } else {
1707         /* QEMU is not able to handle debug exception, so inject
1708          * program exception to guest;
1709          * Yes program exception NOT debug exception !!
1710          * When QEMU is using debug resources then debug exception must
1711          * be always set. To achieve this we set MSR_DE and also set
1712          * MSRP_DEP so guest cannot change MSR_DE.
1713          * When emulating debug resource for guest we want guest
1714          * to control MSR_DE (enable/disable debug interrupt on need).
1715          * Supporting both configurations are NOT possible.
1716          * So the result is that we cannot share debug resources
1717          * between QEMU and Guest on BOOKE architecture.
1718          * In the current design QEMU gets the priority over guest,
1719          * this means that if QEMU is using debug resources then guest
1720          * cannot use them;
1721          * For software breakpoint QEMU uses a privileged instruction;
1722          * So there cannot be any reason that we are here for guest
1723          * set debug exception, only possibility is guest executed a
1724          * privileged / illegal instruction and that's why we are
1725          * injecting a program interrupt.
1726          */
1727
1728         cpu_synchronize_state(cs);
1729         /* env->nip is PC, so increment this by 4 to use
1730          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1731          */
1732         env->nip += 4;
1733         cs->exception_index = POWERPC_EXCP_PROGRAM;
1734         env->error_code = POWERPC_EXCP_INVAL;
1735         ppc_cpu_do_interrupt(cs);
1736     }
1737
1738     return handle;
1739 }
1740
1741 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1742 {
1743     PowerPCCPU *cpu = POWERPC_CPU(cs);
1744     CPUPPCState *env = &cpu->env;
1745     int ret;
1746
1747     qemu_mutex_lock_iothread();
1748
1749     switch (run->exit_reason) {
1750     case KVM_EXIT_DCR:
1751         if (run->dcr.is_write) {
1752             DPRINTF("handle dcr write\n");
1753             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1754         } else {
1755             DPRINTF("handle dcr read\n");
1756             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1757         }
1758         break;
1759     case KVM_EXIT_HLT:
1760         DPRINTF("handle halt\n");
1761         ret = kvmppc_handle_halt(cpu);
1762         break;
1763 #if defined(TARGET_PPC64)
1764     case KVM_EXIT_PAPR_HCALL:
1765         DPRINTF("handle PAPR hypercall\n");
1766         run->papr_hcall.ret = spapr_hypercall(cpu,
1767                                               run->papr_hcall.nr,
1768                                               run->papr_hcall.args);
1769         ret = 0;
1770         break;
1771 #endif
1772     case KVM_EXIT_EPR:
1773         DPRINTF("handle epr\n");
1774         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1775         ret = 0;
1776         break;
1777     case KVM_EXIT_WATCHDOG:
1778         DPRINTF("handle watchdog expiry\n");
1779         watchdog_perform_action();
1780         ret = 0;
1781         break;
1782
1783     case KVM_EXIT_DEBUG:
1784         DPRINTF("handle debug exception\n");
1785         if (kvm_handle_debug(cpu, run)) {
1786             ret = EXCP_DEBUG;
1787             break;
1788         }
1789         /* re-enter, this exception was guest-internal */
1790         ret = 0;
1791         break;
1792
1793     default:
1794         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1795         ret = -1;
1796         break;
1797     }
1798
1799     qemu_mutex_unlock_iothread();
1800     return ret;
1801 }
1802
1803 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1804 {
1805     CPUState *cs = CPU(cpu);
1806     uint32_t bits = tsr_bits;
1807     struct kvm_one_reg reg = {
1808         .id = KVM_REG_PPC_OR_TSR,
1809         .addr = (uintptr_t) &bits,
1810     };
1811
1812     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1813 }
1814
1815 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1816 {
1817
1818     CPUState *cs = CPU(cpu);
1819     uint32_t bits = tsr_bits;
1820     struct kvm_one_reg reg = {
1821         .id = KVM_REG_PPC_CLEAR_TSR,
1822         .addr = (uintptr_t) &bits,
1823     };
1824
1825     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1826 }
1827
1828 int kvmppc_set_tcr(PowerPCCPU *cpu)
1829 {
1830     CPUState *cs = CPU(cpu);
1831     CPUPPCState *env = &cpu->env;
1832     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1833
1834     struct kvm_one_reg reg = {
1835         .id = KVM_REG_PPC_TCR,
1836         .addr = (uintptr_t) &tcr,
1837     };
1838
1839     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1840 }
1841
1842 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1843 {
1844     CPUState *cs = CPU(cpu);
1845     int ret;
1846
1847     if (!kvm_enabled()) {
1848         return -1;
1849     }
1850
1851     if (!cap_ppc_watchdog) {
1852         printf("warning: KVM does not support watchdog");
1853         return -1;
1854     }
1855
1856     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1857     if (ret < 0) {
1858         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1859                 __func__, strerror(-ret));
1860         return ret;
1861     }
1862
1863     return ret;
1864 }
1865
1866 static int read_cpuinfo(const char *field, char *value, int len)
1867 {
1868     FILE *f;
1869     int ret = -1;
1870     int field_len = strlen(field);
1871     char line[512];
1872
1873     f = fopen("/proc/cpuinfo", "r");
1874     if (!f) {
1875         return -1;
1876     }
1877
1878     do {
1879         if (!fgets(line, sizeof(line), f)) {
1880             break;
1881         }
1882         if (!strncmp(line, field, field_len)) {
1883             pstrcpy(value, len, line);
1884             ret = 0;
1885             break;
1886         }
1887     } while(*line);
1888
1889     fclose(f);
1890
1891     return ret;
1892 }
1893
1894 uint32_t kvmppc_get_tbfreq(void)
1895 {
1896     char line[512];
1897     char *ns;
1898     uint32_t retval = NANOSECONDS_PER_SECOND;
1899
1900     if (read_cpuinfo("timebase", line, sizeof(line))) {
1901         return retval;
1902     }
1903
1904     if (!(ns = strchr(line, ':'))) {
1905         return retval;
1906     }
1907
1908     ns++;
1909
1910     return atoi(ns);
1911 }
1912
1913 bool kvmppc_get_host_serial(char **value)
1914 {
1915     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1916                                NULL);
1917 }
1918
1919 bool kvmppc_get_host_model(char **value)
1920 {
1921     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1922 }
1923
1924 /* Try to find a device tree node for a CPU with clock-frequency property */
1925 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1926 {
1927     struct dirent *dirp;
1928     DIR *dp;
1929
1930     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1931         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1932         return -1;
1933     }
1934
1935     buf[0] = '\0';
1936     while ((dirp = readdir(dp)) != NULL) {
1937         FILE *f;
1938         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1939                  dirp->d_name);
1940         f = fopen(buf, "r");
1941         if (f) {
1942             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1943             fclose(f);
1944             break;
1945         }
1946         buf[0] = '\0';
1947     }
1948     closedir(dp);
1949     if (buf[0] == '\0') {
1950         printf("Unknown host!\n");
1951         return -1;
1952     }
1953
1954     return 0;
1955 }
1956
1957 static uint64_t kvmppc_read_int_dt(const char *filename)
1958 {
1959     union {
1960         uint32_t v32;
1961         uint64_t v64;
1962     } u;
1963     FILE *f;
1964     int len;
1965
1966     f = fopen(filename, "rb");
1967     if (!f) {
1968         return -1;
1969     }
1970
1971     len = fread(&u, 1, sizeof(u), f);
1972     fclose(f);
1973     switch (len) {
1974     case 4:
1975         /* property is a 32-bit quantity */
1976         return be32_to_cpu(u.v32);
1977     case 8:
1978         return be64_to_cpu(u.v64);
1979     }
1980
1981     return 0;
1982 }
1983
1984 /* Read a CPU node property from the host device tree that's a single
1985  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1986  * (can't find or open the property, or doesn't understand the
1987  * format) */
1988 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1989 {
1990     char buf[PATH_MAX], *tmp;
1991     uint64_t val;
1992
1993     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1994         return -1;
1995     }
1996
1997     tmp = g_strdup_printf("%s/%s", buf, propname);
1998     val = kvmppc_read_int_dt(tmp);
1999     g_free(tmp);
2000
2001     return val;
2002 }
2003
2004 uint64_t kvmppc_get_clockfreq(void)
2005 {
2006     return kvmppc_read_int_cpu_dt("clock-frequency");
2007 }
2008
2009 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2010  {
2011      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2012      CPUState *cs = CPU(cpu);
2013
2014     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2015         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2016         return 0;
2017     }
2018
2019     return 1;
2020 }
2021
2022 int kvmppc_get_hasidle(CPUPPCState *env)
2023 {
2024     struct kvm_ppc_pvinfo pvinfo;
2025
2026     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2027         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2028         return 1;
2029     }
2030
2031     return 0;
2032 }
2033
2034 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2035 {
2036     uint32_t *hc = (uint32_t*)buf;
2037     struct kvm_ppc_pvinfo pvinfo;
2038
2039     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2040         memcpy(buf, pvinfo.hcall, buf_len);
2041         return 0;
2042     }
2043
2044     /*
2045      * Fallback to always fail hypercalls regardless of endianness:
2046      *
2047      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2048      *     li r3, -1
2049      *     b .+8       (becomes nop in wrong endian)
2050      *     bswap32(li r3, -1)
2051      */
2052
2053     hc[0] = cpu_to_be32(0x08000048);
2054     hc[1] = cpu_to_be32(0x3860ffff);
2055     hc[2] = cpu_to_be32(0x48000008);
2056     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2057
2058     return 1;
2059 }
2060
2061 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2062 {
2063     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2064 }
2065
2066 void kvmppc_enable_logical_ci_hcalls(void)
2067 {
2068     /*
2069      * FIXME: it would be nice if we could detect the cases where
2070      * we're using a device which requires the in kernel
2071      * implementation of these hcalls, but the kernel lacks them and
2072      * produce a warning.
2073      */
2074     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2075     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2076 }
2077
2078 void kvmppc_enable_set_mode_hcall(void)
2079 {
2080     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2081 }
2082
2083 void kvmppc_enable_clear_ref_mod_hcalls(void)
2084 {
2085     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2086     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2087 }
2088
2089 void kvmppc_set_papr(PowerPCCPU *cpu)
2090 {
2091     CPUState *cs = CPU(cpu);
2092     int ret;
2093
2094     if (!kvm_enabled()) {
2095         return;
2096     }
2097
2098     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2099     if (ret) {
2100         error_report("This vCPU type or KVM version does not support PAPR");
2101         exit(1);
2102     }
2103
2104     /* Update the capability flag so we sync the right information
2105      * with kvm */
2106     cap_papr = 1;
2107 }
2108
2109 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2110 {
2111     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2112 }
2113
2114 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2115 {
2116     CPUState *cs = CPU(cpu);
2117     int ret;
2118
2119     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2120     if (ret && mpic_proxy) {
2121         error_report("This KVM version does not support EPR");
2122         exit(1);
2123     }
2124 }
2125
2126 int kvmppc_smt_threads(void)
2127 {
2128     return cap_ppc_smt ? cap_ppc_smt : 1;
2129 }
2130
2131 int kvmppc_set_smt_threads(int smt)
2132 {
2133     int ret;
2134
2135     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2136     if (!ret) {
2137         cap_ppc_smt = smt;
2138     }
2139     return ret;
2140 }
2141
2142 void kvmppc_hint_smt_possible(Error **errp)
2143 {
2144     int i;
2145     GString *g;
2146     char *s;
2147
2148     assert(kvm_enabled());
2149     if (cap_ppc_smt_possible) {
2150         g = g_string_new("Available VSMT modes:");
2151         for (i = 63; i >= 0; i--) {
2152             if ((1UL << i) & cap_ppc_smt_possible) {
2153                 g_string_append_printf(g, " %lu", (1UL << i));
2154             }
2155         }
2156         s = g_string_free(g, false);
2157         error_append_hint(errp, "%s.\n", s);
2158         g_free(s);
2159     } else {
2160         error_append_hint(errp,
2161                           "This KVM seems to be too old to support VSMT.\n");
2162     }
2163 }
2164
2165
2166 #ifdef TARGET_PPC64
2167 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2168 {
2169     struct kvm_ppc_smmu_info info;
2170     long rampagesize, best_page_shift;
2171     int i;
2172
2173     /* Find the largest hardware supported page size that's less than
2174      * or equal to the (logical) backing page size of guest RAM */
2175     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2176     rampagesize = qemu_getrampagesize();
2177     best_page_shift = 0;
2178
2179     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2180         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2181
2182         if (!sps->page_shift) {
2183             continue;
2184         }
2185
2186         if ((sps->page_shift > best_page_shift)
2187             && ((1UL << sps->page_shift) <= rampagesize)) {
2188             best_page_shift = sps->page_shift;
2189         }
2190     }
2191
2192     return MIN(current_size,
2193                1ULL << (best_page_shift + hash_shift - 7));
2194 }
2195 #endif
2196
2197 bool kvmppc_spapr_use_multitce(void)
2198 {
2199     return cap_spapr_multitce;
2200 }
2201
2202 int kvmppc_spapr_enable_inkernel_multitce(void)
2203 {
2204     int ret;
2205
2206     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2207                             H_PUT_TCE_INDIRECT, 1);
2208     if (!ret) {
2209         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2210                                 H_STUFF_TCE, 1);
2211     }
2212
2213     return ret;
2214 }
2215
2216 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2217                               uint64_t bus_offset, uint32_t nb_table,
2218                               int *pfd, bool need_vfio)
2219 {
2220     long len;
2221     int fd;
2222     void *table;
2223
2224     /* Must set fd to -1 so we don't try to munmap when called for
2225      * destroying the table, which the upper layers -will- do
2226      */
2227     *pfd = -1;
2228     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2229         return NULL;
2230     }
2231
2232     if (cap_spapr_tce_64) {
2233         struct kvm_create_spapr_tce_64 args = {
2234             .liobn = liobn,
2235             .page_shift = page_shift,
2236             .offset = bus_offset >> page_shift,
2237             .size = nb_table,
2238             .flags = 0
2239         };
2240         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2241         if (fd < 0) {
2242             fprintf(stderr,
2243                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2244                     liobn);
2245             return NULL;
2246         }
2247     } else if (cap_spapr_tce) {
2248         uint64_t window_size = (uint64_t) nb_table << page_shift;
2249         struct kvm_create_spapr_tce args = {
2250             .liobn = liobn,
2251             .window_size = window_size,
2252         };
2253         if ((window_size != args.window_size) || bus_offset) {
2254             return NULL;
2255         }
2256         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2257         if (fd < 0) {
2258             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2259                     liobn);
2260             return NULL;
2261         }
2262     } else {
2263         return NULL;
2264     }
2265
2266     len = nb_table * sizeof(uint64_t);
2267     /* FIXME: round this up to page size */
2268
2269     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2270     if (table == MAP_FAILED) {
2271         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2272                 liobn);
2273         close(fd);
2274         return NULL;
2275     }
2276
2277     *pfd = fd;
2278     return table;
2279 }
2280
2281 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2282 {
2283     long len;
2284
2285     if (fd < 0) {
2286         return -1;
2287     }
2288
2289     len = nb_table * sizeof(uint64_t);
2290     if ((munmap(table, len) < 0) ||
2291         (close(fd) < 0)) {
2292         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2293                 strerror(errno));
2294         /* Leak the table */
2295     }
2296
2297     return 0;
2298 }
2299
2300 int kvmppc_reset_htab(int shift_hint)
2301 {
2302     uint32_t shift = shift_hint;
2303
2304     if (!kvm_enabled()) {
2305         /* Full emulation, tell caller to allocate htab itself */
2306         return 0;
2307     }
2308     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2309         int ret;
2310         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2311         if (ret == -ENOTTY) {
2312             /* At least some versions of PR KVM advertise the
2313              * capability, but don't implement the ioctl().  Oops.
2314              * Return 0 so that we allocate the htab in qemu, as is
2315              * correct for PR. */
2316             return 0;
2317         } else if (ret < 0) {
2318             return ret;
2319         }
2320         return shift;
2321     }
2322
2323     /* We have a kernel that predates the htab reset calls.  For PR
2324      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2325      * this era, it has allocated a 16MB fixed size hash table already. */
2326     if (kvmppc_is_pr(kvm_state)) {
2327         /* PR - tell caller to allocate htab */
2328         return 0;
2329     } else {
2330         /* HV - assume 16MB kernel allocated htab */
2331         return 24;
2332     }
2333 }
2334
2335 static inline uint32_t mfpvr(void)
2336 {
2337     uint32_t pvr;
2338
2339     asm ("mfpvr %0"
2340          : "=r"(pvr));
2341     return pvr;
2342 }
2343
2344 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2345 {
2346     if (on) {
2347         *word |= flags;
2348     } else {
2349         *word &= ~flags;
2350     }
2351 }
2352
2353 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2354 {
2355     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2356     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2357     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2358
2359     /* Now fix up the class with information we can query from the host */
2360     pcc->pvr = mfpvr();
2361
2362     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2363                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2364     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2365                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2366     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2367                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2368
2369     if (dcache_size != -1) {
2370         pcc->l1_dcache_size = dcache_size;
2371     }
2372
2373     if (icache_size != -1) {
2374         pcc->l1_icache_size = icache_size;
2375     }
2376
2377 #if defined(TARGET_PPC64)
2378     pcc->radix_page_info = kvm_get_radix_page_info();
2379
2380     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2381         /*
2382          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2383          * compliant.  More importantly, advertising ISA 3.00
2384          * architected mode may prevent guests from activating
2385          * necessary DD1 workarounds.
2386          */
2387         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2388                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2389     }
2390 #endif /* defined(TARGET_PPC64) */
2391 }
2392
2393 bool kvmppc_has_cap_epr(void)
2394 {
2395     return cap_epr;
2396 }
2397
2398 bool kvmppc_has_cap_fixup_hcalls(void)
2399 {
2400     return cap_fixup_hcalls;
2401 }
2402
2403 bool kvmppc_has_cap_htm(void)
2404 {
2405     return cap_htm;
2406 }
2407
2408 bool kvmppc_has_cap_mmu_radix(void)
2409 {
2410     return cap_mmu_radix;
2411 }
2412
2413 bool kvmppc_has_cap_mmu_hash_v3(void)
2414 {
2415     return cap_mmu_hash_v3;
2416 }
2417
2418 static bool kvmppc_power8_host(void)
2419 {
2420     bool ret = false;
2421 #ifdef TARGET_PPC64
2422     {
2423         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2424         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2425               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2426               (base_pvr == CPU_POWERPC_POWER8_BASE);
2427     }
2428 #endif /* TARGET_PPC64 */
2429     return ret;
2430 }
2431
2432 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2433 {
2434     bool l1d_thread_priv_req = !kvmppc_power8_host();
2435
2436     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2437         return 2;
2438     } else if ((!l1d_thread_priv_req ||
2439                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2440                (c.character & c.character_mask
2441                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2442         return 1;
2443     }
2444
2445     return 0;
2446 }
2447
2448 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2449 {
2450     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2451         return 2;
2452     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2453         return 1;
2454     }
2455
2456     return 0;
2457 }
2458
2459 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2460 {
2461     if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2462         return  SPAPR_CAP_FIXED_CCD;
2463     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2464         return SPAPR_CAP_FIXED_IBS;
2465     }
2466
2467     return 0;
2468 }
2469
2470 static void kvmppc_get_cpu_characteristics(KVMState *s)
2471 {
2472     struct kvm_ppc_cpu_char c;
2473     int ret;
2474
2475     /* Assume broken */
2476     cap_ppc_safe_cache = 0;
2477     cap_ppc_safe_bounds_check = 0;
2478     cap_ppc_safe_indirect_branch = 0;
2479
2480     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2481     if (!ret) {
2482         return;
2483     }
2484     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2485     if (ret < 0) {
2486         return;
2487     }
2488
2489     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2490     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2491     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2492 }
2493
2494 int kvmppc_get_cap_safe_cache(void)
2495 {
2496     return cap_ppc_safe_cache;
2497 }
2498
2499 int kvmppc_get_cap_safe_bounds_check(void)
2500 {
2501     return cap_ppc_safe_bounds_check;
2502 }
2503
2504 int kvmppc_get_cap_safe_indirect_branch(void)
2505 {
2506     return cap_ppc_safe_indirect_branch;
2507 }
2508
2509 bool kvmppc_has_cap_spapr_vfio(void)
2510 {
2511     return cap_spapr_vfio;
2512 }
2513
2514 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2515 {
2516     uint32_t host_pvr = mfpvr();
2517     PowerPCCPUClass *pvr_pcc;
2518
2519     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2520     if (pvr_pcc == NULL) {
2521         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2522     }
2523
2524     return pvr_pcc;
2525 }
2526
2527 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2528 {
2529     TypeInfo type_info = {
2530         .name = TYPE_HOST_POWERPC_CPU,
2531         .class_init = kvmppc_host_cpu_class_init,
2532     };
2533     MachineClass *mc = MACHINE_GET_CLASS(ms);
2534     PowerPCCPUClass *pvr_pcc;
2535     ObjectClass *oc;
2536     DeviceClass *dc;
2537     int i;
2538
2539     pvr_pcc = kvm_ppc_get_host_cpu_class();
2540     if (pvr_pcc == NULL) {
2541         return -1;
2542     }
2543     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2544     type_register(&type_info);
2545     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2546         /* override TCG default cpu type with 'host' cpu model */
2547         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2548     }
2549
2550     oc = object_class_by_name(type_info.name);
2551     g_assert(oc);
2552
2553     /*
2554      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2555      * we want "POWER8" to be a "family" alias that points to the current
2556      * host CPU type, too)
2557      */
2558     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2559     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2560         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2561             char *suffix;
2562
2563             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2564             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2565             if (suffix) {
2566                 *suffix = 0;
2567             }
2568             break;
2569         }
2570     }
2571
2572     return 0;
2573 }
2574
2575 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2576 {
2577     struct kvm_rtas_token_args args = {
2578         .token = token,
2579     };
2580
2581     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2582         return -ENOENT;
2583     }
2584
2585     strncpy(args.name, function, sizeof(args.name));
2586
2587     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2588 }
2589
2590 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2591 {
2592     struct kvm_get_htab_fd s = {
2593         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2594         .start_index = index,
2595     };
2596     int ret;
2597
2598     if (!cap_htab_fd) {
2599         error_setg(errp, "KVM version doesn't support %s the HPT",
2600                    write ? "writing" : "reading");
2601         return -ENOTSUP;
2602     }
2603
2604     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2605     if (ret < 0) {
2606         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2607                    write ? "writing" : "reading", write ? "to" : "from",
2608                    strerror(errno));
2609         return -errno;
2610     }
2611
2612     return ret;
2613 }
2614
2615 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2616 {
2617     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2618     uint8_t buf[bufsize];
2619     ssize_t rc;
2620
2621     do {
2622         rc = read(fd, buf, bufsize);
2623         if (rc < 0) {
2624             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2625                     strerror(errno));
2626             return rc;
2627         } else if (rc) {
2628             uint8_t *buffer = buf;
2629             ssize_t n = rc;
2630             while (n) {
2631                 struct kvm_get_htab_header *head =
2632                     (struct kvm_get_htab_header *) buffer;
2633                 size_t chunksize = sizeof(*head) +
2634                      HASH_PTE_SIZE_64 * head->n_valid;
2635
2636                 qemu_put_be32(f, head->index);
2637                 qemu_put_be16(f, head->n_valid);
2638                 qemu_put_be16(f, head->n_invalid);
2639                 qemu_put_buffer(f, (void *)(head + 1),
2640                                 HASH_PTE_SIZE_64 * head->n_valid);
2641
2642                 buffer += chunksize;
2643                 n -= chunksize;
2644             }
2645         }
2646     } while ((rc != 0)
2647              && ((max_ns < 0)
2648                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2649
2650     return (rc == 0) ? 1 : 0;
2651 }
2652
2653 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2654                            uint16_t n_valid, uint16_t n_invalid)
2655 {
2656     struct kvm_get_htab_header *buf;
2657     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2658     ssize_t rc;
2659
2660     buf = alloca(chunksize);
2661     buf->index = index;
2662     buf->n_valid = n_valid;
2663     buf->n_invalid = n_invalid;
2664
2665     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2666
2667     rc = write(fd, buf, chunksize);
2668     if (rc < 0) {
2669         fprintf(stderr, "Error writing KVM hash table: %s\n",
2670                 strerror(errno));
2671         return rc;
2672     }
2673     if (rc != chunksize) {
2674         /* We should never get a short write on a single chunk */
2675         fprintf(stderr, "Short write, restoring KVM hash table\n");
2676         return -1;
2677     }
2678     return 0;
2679 }
2680
2681 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2682 {
2683     return true;
2684 }
2685
2686 void kvm_arch_init_irq_routing(KVMState *s)
2687 {
2688 }
2689
2690 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2691 {
2692     int fd, rc;
2693     int i;
2694
2695     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2696
2697     i = 0;
2698     while (i < n) {
2699         struct kvm_get_htab_header *hdr;
2700         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2701         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2702
2703         rc = read(fd, buf, sizeof(buf));
2704         if (rc < 0) {
2705             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2706         }
2707
2708         hdr = (struct kvm_get_htab_header *)buf;
2709         while ((i < n) && ((char *)hdr < (buf + rc))) {
2710             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2711
2712             if (hdr->index != (ptex + i)) {
2713                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2714                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2715             }
2716
2717             if (n - i < valid) {
2718                 valid = n - i;
2719             }
2720             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2721             i += valid;
2722
2723             if ((n - i) < invalid) {
2724                 invalid = n - i;
2725             }
2726             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2727             i += invalid;
2728
2729             hdr = (struct kvm_get_htab_header *)
2730                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2731         }
2732     }
2733
2734     close(fd);
2735 }
2736
2737 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2738 {
2739     int fd, rc;
2740     struct {
2741         struct kvm_get_htab_header hdr;
2742         uint64_t pte0;
2743         uint64_t pte1;
2744     } buf;
2745
2746     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2747
2748     buf.hdr.n_valid = 1;
2749     buf.hdr.n_invalid = 0;
2750     buf.hdr.index = ptex;
2751     buf.pte0 = cpu_to_be64(pte0);
2752     buf.pte1 = cpu_to_be64(pte1);
2753
2754     rc = write(fd, &buf, sizeof(buf));
2755     if (rc != sizeof(buf)) {
2756         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2757     }
2758     close(fd);
2759 }
2760
2761 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2762                              uint64_t address, uint32_t data, PCIDevice *dev)
2763 {
2764     return 0;
2765 }
2766
2767 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2768                                 int vector, PCIDevice *dev)
2769 {
2770     return 0;
2771 }
2772
2773 int kvm_arch_release_virq_post(int virq)
2774 {
2775     return 0;
2776 }
2777
2778 int kvm_arch_msi_data_to_gsi(uint32_t data)
2779 {
2780     return data & 0xffff;
2781 }
2782
2783 int kvmppc_enable_hwrng(void)
2784 {
2785     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2786         return -1;
2787     }
2788
2789     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2790 }
2791
2792 void kvmppc_check_papr_resize_hpt(Error **errp)
2793 {
2794     if (!kvm_enabled()) {
2795         return; /* No KVM, we're good */
2796     }
2797
2798     if (cap_resize_hpt) {
2799         return; /* Kernel has explicit support, we're good */
2800     }
2801
2802     /* Otherwise fallback on looking for PR KVM */
2803     if (kvmppc_is_pr(kvm_state)) {
2804         return;
2805     }
2806
2807     error_setg(errp,
2808                "Hash page table resizing not available with this KVM version");
2809 }
2810
2811 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2812 {
2813     CPUState *cs = CPU(cpu);
2814     struct kvm_ppc_resize_hpt rhpt = {
2815         .flags = flags,
2816         .shift = shift,
2817     };
2818
2819     if (!cap_resize_hpt) {
2820         return -ENOSYS;
2821     }
2822
2823     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2824 }
2825
2826 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2827 {
2828     CPUState *cs = CPU(cpu);
2829     struct kvm_ppc_resize_hpt rhpt = {
2830         .flags = flags,
2831         .shift = shift,
2832     };
2833
2834     if (!cap_resize_hpt) {
2835         return -ENOSYS;
2836     }
2837
2838     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2839 }
2840
2841 /*
2842  * This is a helper function to detect a post migration scenario
2843  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2844  * the guest kernel can't handle a PVR value other than the actual host
2845  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2846  *
2847  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2848  * (so, we're HV), return true. The workaround itself is done in
2849  * cpu_post_load.
2850  *
2851  * The order here is important: we'll only check for KVM PR as a
2852  * fallback if the guest kernel can't handle the situation itself.
2853  * We need to avoid as much as possible querying the running KVM type
2854  * in QEMU level.
2855  */
2856 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2857 {
2858     CPUState *cs = CPU(cpu);
2859
2860     if (!kvm_enabled()) {
2861         return false;
2862     }
2863
2864     if (cap_ppc_pvr_compat) {
2865         return false;
2866     }
2867
2868     return !kvmppc_is_pr(cs->kvm_state);
2869 }