target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qapi/error.h"
  26 #include "qemu/error-report.h"
  27 #include "cpu.h"
  28 #include "cpu-models.h"
  29 #include "qemu/timer.h"
  30 #include "sysemu/sysemu.h"
  31 #include "sysemu/hw_accel.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_vio.h"
  40 #include "hw/ppc/spapr_cpu_core.h"
  41 #include "hw/ppc/ppc.h"
  42 #include "sysemu/watchdog.h"
  43 #include "trace.h"
  44 #include "exec/gdbstub.h"
  45 #include "exec/memattrs.h"
  46 #include "exec/ram_addr.h"
  47 #include "sysemu/hostmem.h"
  48 #include "qemu/cutils.h"
  49 #include "qemu/mmap-alloc.h"
  50 #if defined(TARGET_PPC64)
  51 #include "hw/ppc/spapr_cpu_core.h"
  52 #endif
  53 #include "elf.h"
  54 #include "sysemu/kvm_int.h"
  55
  56 //#define DEBUG_KVM
  57
  58 #ifdef DEBUG_KVM
  59 #define DPRINTF(fmt, ...) \
  60     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  61 #else
  62 #define DPRINTF(fmt, ...) \
  63     do { } while (0)
  64 #endif
  65
  66 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  67
  68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  69     KVM_CAP_LAST_INFO
  70 };
  71
  72 static int cap_interrupt_unset = false;
  73 static int cap_interrupt_level = false;
  74 static int cap_segstate;
  75 static int cap_booke_sregs;
  76 static int cap_ppc_smt;
  77 static int cap_ppc_smt_possible;
  78 static int cap_ppc_rma;
  79 static int cap_spapr_tce;
  80 static int cap_spapr_tce_64;
  81 static int cap_spapr_multitce;
  82 static int cap_spapr_vfio;
  83 static int cap_hior;
  84 static int cap_one_reg;
  85 static int cap_epr;
  86 static int cap_ppc_watchdog;
  87 static int cap_papr;
  88 static int cap_htab_fd;
  89 static int cap_fixup_hcalls;
  90 static int cap_htm;             /* Hardware transactional memory support */
  91 static int cap_mmu_radix;
  92 static int cap_mmu_hash_v3;
  93 static int cap_resize_hpt;
  94 static int cap_ppc_pvr_compat;
  95
  96 static uint32_t debug_inst_opcode;
  97
  98 /* XXX We have a race condition where we actually have a level triggered
  99  *     interrupt, but the infrastructure can't expose that yet, so the guest
 100  *     takes but ignores it, goes to sleep and never gets notified that there's
 101  *     still an interrupt pending.
 102  *
 103  *     As a quick workaround, let's just wake up again 20 ms after we injected
 104  *     an interrupt. That way we can assure that we're always reinjecting
 105  *     interrupts in case the guest swallowed them.
 106  */
 107 static QEMUTimer *idle_timer;
 108
 109 static void kvm_kick_cpu(void *opaque)
 110 {
 111     PowerPCCPU *cpu = opaque;
 112
 113     qemu_cpu_kick(CPU(cpu));
 114 }
 115
 116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 117  * should only be used for fallback tests - generally we should use
 118  * explicit capabilities for the features we want, rather than
 119  * assuming what is/isn't available depending on the KVM variant. */
 120 static bool kvmppc_is_pr(KVMState *ks)
 121 {
 122     /* Assume KVM-PR if the GET_PVINFO capability is available */
 123     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 124 }
 125
 126 static int kvm_ppc_register_host_cpu_type(void);
 127
 128 int kvm_arch_init(MachineState *ms, KVMState *s)
 129 {
 130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 134     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 135     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 136     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 137     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 138     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 139     cap_spapr_vfio = false;
 140     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 141     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 142     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 143     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 144     /* Note: we don't set cap_papr here, because this capability is
 145      * only activated after this by kvmppc_set_papr() */
 146     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 147     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 148     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 149     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 150     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 151     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 152     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 153     /*
 154      * Note: setting it to false because there is not such capability
 155      * in KVM at this moment.
 156      *
 157      * TODO: call kvm_vm_check_extension() with the right capability
 158      * after the kernel starts implementing it.*/
 159     cap_ppc_pvr_compat = false;
 160
 161     if (!cap_interrupt_level) {
 162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 163                         "VM to stall at times!\n");
 164     }
 165
 166     kvm_ppc_register_host_cpu_type();
 167
 168     return 0;
 169 }
 170
 171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 172 {
 173     return 0;
 174 }
 175
 176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 177 {
 178     CPUPPCState *cenv = &cpu->env;
 179     CPUState *cs = CPU(cpu);
 180     struct kvm_sregs sregs;
 181     int ret;
 182
 183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 184         /* What we're really trying to say is "if we're on BookE, we use
 185            the native PVR for now". This is the only sane way to check
 186            it though, so we potentially confuse users that they can run
 187            BookE guests on BookS. Let's hope nobody dares enough :) */
 188         return 0;
 189     } else {
 190         if (!cap_segstate) {
 191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 192             return -ENOSYS;
 193         }
 194     }
 195
 196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 197     if (ret) {
 198         return ret;
 199     }
 200
 201     sregs.pvr = cenv->spr[SPR_PVR];
 202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 203 }
 204
 205 /* Set up a shared TLB array with KVM */
 206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210     struct kvm_book3e_206_tlb_params params = {};
 211     struct kvm_config_tlb cfg = {};
 212     unsigned int entries = 0;
 213     int ret, i;
 214
 215     if (!kvm_enabled() ||
 216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 217         return 0;
 218     }
 219
 220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 221
 222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 225         entries += params.tlb_sizes[i];
 226     }
 227
 228     assert(entries == env->nb_tlb);
 229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 230
 231     env->tlb_dirty = true;
 232
 233     cfg.array = (uintptr_t)env->tlb.tlbm;
 234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 235     cfg.params = (uintptr_t)&params;
 236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 237
 238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 239     if (ret < 0) {
 240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 241                 __func__, strerror(-ret));
 242         return ret;
 243     }
 244
 245     env->kvm_sw_tlb = true;
 246     return 0;
 247 }
 248
 249
 250 #if defined(TARGET_PPC64)
 251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 252                                        struct kvm_ppc_smmu_info *info)
 253 {
 254     CPUPPCState *env = &cpu->env;
 255     CPUState *cs = CPU(cpu);
 256
 257     memset(info, 0, sizeof(*info));
 258
 259     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 260      * need to "guess" what the supported page sizes are.
 261      *
 262      * For that to work we make a few assumptions:
 263      *
 264      * - Check whether we are running "PR" KVM which only supports 4K
 265      *   and 16M pages, but supports them regardless of the backing
 266      *   store characteritics. We also don't support 1T segments.
 267      *
 268      *   This is safe as if HV KVM ever supports that capability or PR
 269      *   KVM grows supports for more page/segment sizes, those versions
 270      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 271      *   will not hit this fallback
 272      *
 273      * - Else we are running HV KVM. This means we only support page
 274      *   sizes that fit in the backing store. Additionally we only
 275      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 276      *   P7 encodings for the SLB and hash table. Here too, we assume
 277      *   support for any newer processor will mean a kernel that
 278      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 279      *   this fallback.
 280      */
 281     if (kvmppc_is_pr(cs->kvm_state)) {
 282         /* No flags */
 283         info->flags = 0;
 284         info->slb_size = 64;
 285
 286         /* Standard 4k base page size segment */
 287         info->sps[0].page_shift = 12;
 288         info->sps[0].slb_enc = 0;
 289         info->sps[0].enc[0].page_shift = 12;
 290         info->sps[0].enc[0].pte_enc = 0;
 291
 292         /* Standard 16M large page size segment */
 293         info->sps[1].page_shift = 24;
 294         info->sps[1].slb_enc = SLB_VSID_L;
 295         info->sps[1].enc[0].page_shift = 24;
 296         info->sps[1].enc[0].pte_enc = 0;
 297     } else {
 298         int i = 0;
 299
 300         /* HV KVM has backing store size restrictions */
 301         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 302
 303         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 304             info->flags |= KVM_PPC_1T_SEGMENTS;
 305         }
 306
 307         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 308            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 309             info->slb_size = 32;
 310         } else {
 311             info->slb_size = 64;
 312         }
 313
 314         /* Standard 4k base page size segment */
 315         info->sps[i].page_shift = 12;
 316         info->sps[i].slb_enc = 0;
 317         info->sps[i].enc[0].page_shift = 12;
 318         info->sps[i].enc[0].pte_enc = 0;
 319         i++;
 320
 321         /* 64K on MMU 2.06 and later */
 322         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 323             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 324             info->sps[i].page_shift = 16;
 325             info->sps[i].slb_enc = 0x110;
 326             info->sps[i].enc[0].page_shift = 16;
 327             info->sps[i].enc[0].pte_enc = 1;
 328             i++;
 329         }
 330
 331         /* Standard 16M large page size segment */
 332         info->sps[i].page_shift = 24;
 333         info->sps[i].slb_enc = SLB_VSID_L;
 334         info->sps[i].enc[0].page_shift = 24;
 335         info->sps[i].enc[0].pte_enc = 0;
 336     }
 337 }
 338
 339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 340 {
 341     CPUState *cs = CPU(cpu);
 342     int ret;
 343
 344     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 345         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 346         if (ret == 0) {
 347             return;
 348         }
 349     }
 350
 351     kvm_get_fallback_smmu_info(cpu, info);
 352 }
 353
 354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 355 {
 356     KVMState *s = KVM_STATE(current_machine->accelerator);
 357     struct ppc_radix_page_info *radix_page_info;
 358     struct kvm_ppc_rmmu_info rmmu_info;
 359     int i;
 360
 361     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 362         return NULL;
 363     }
 364     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 365         return NULL;
 366     }
 367     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 368     radix_page_info->count = 0;
 369     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 370         if (rmmu_info.ap_encodings[i]) {
 371             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 372             radix_page_info->count++;
 373         }
 374     }
 375     return radix_page_info;
 376 }
 377
 378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 379                                      bool radix, bool gtse,
 380                                      uint64_t proc_tbl)
 381 {
 382     CPUState *cs = CPU(cpu);
 383     int ret;
 384     uint64_t flags = 0;
 385     struct kvm_ppc_mmuv3_cfg cfg = {
 386         .process_table = proc_tbl,
 387     };
 388
 389     if (radix) {
 390         flags |= KVM_PPC_MMUV3_RADIX;
 391     }
 392     if (gtse) {
 393         flags |= KVM_PPC_MMUV3_GTSE;
 394     }
 395     cfg.flags = flags;
 396     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 397     switch (ret) {
 398     case 0:
 399         return H_SUCCESS;
 400     case -EINVAL:
 401         return H_PARAMETER;
 402     case -ENODEV:
 403         return H_NOT_AVAILABLE;
 404     default:
 405         return H_HARDWARE;
 406     }
 407 }
 408
 409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 410 {
 411     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 412         return true;
 413     }
 414
 415     return (1ul << shift) <= rampgsize;
 416 }
 417
 418 static long max_cpu_page_size;
 419
 420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 421 {
 422     static struct kvm_ppc_smmu_info smmu_info;
 423     static bool has_smmu_info;
 424     CPUPPCState *env = &cpu->env;
 425     int iq, ik, jq, jk;
 426     bool has_64k_pages = false;
 427
 428     /* We only handle page sizes for 64-bit server guests for now */
 429     if (!(env->mmu_model & POWERPC_MMU_64)) {
 430         return;
 431     }
 432
 433     /* Collect MMU info from kernel if not already */
 434     if (!has_smmu_info) {
 435         kvm_get_smmu_info(cpu, &smmu_info);
 436         has_smmu_info = true;
 437     }
 438
 439     if (!max_cpu_page_size) {
 440         max_cpu_page_size = qemu_getrampagesize();
 441     }
 442
 443     /* Convert to QEMU form */
 444     memset(&env->sps, 0, sizeof(env->sps));
 445
 446     /* If we have HV KVM, we need to forbid CI large pages if our
 447      * host page size is smaller than 64K.
 448      */
 449     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 450         env->ci_large_pages = getpagesize() >= 0x10000;
 451     }
 452
 453     /*
 454      * XXX This loop should be an entry wide AND of the capabilities that
 455      *     the selected CPU has with the capabilities that KVM supports.
 456      */
 457     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 458         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 459         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 460
 461         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 462                                  ksps->page_shift)) {
 463             continue;
 464         }
 465         qsps->page_shift = ksps->page_shift;
 466         qsps->slb_enc = ksps->slb_enc;
 467         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 468             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 469                                      ksps->enc[jk].page_shift)) {
 470                 continue;
 471             }
 472             if (ksps->enc[jk].page_shift == 16) {
 473                 has_64k_pages = true;
 474             }
 475             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 476             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 477             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 478                 break;
 479             }
 480         }
 481         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 482             break;
 483         }
 484     }
 485     env->slb_nr = smmu_info.slb_size;
 486     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 487         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 488     }
 489     if (!has_64k_pages) {
 490         env->mmu_model &= ~POWERPC_MMU_64K;
 491     }
 492 }
 493
 494 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 495 {
 496     Object *mem_obj = object_resolve_path(obj_path, NULL);
 497     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 498     long pagesize;
 499
 500     if (mempath) {
 501         pagesize = qemu_mempath_getpagesize(mempath);
 502         g_free(mempath);
 503     } else {
 504         pagesize = getpagesize();
 505     }
 506
 507     return pagesize >= max_cpu_page_size;
 508 }
 509
 510 #else /* defined (TARGET_PPC64) */
 511
 512 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 513 {
 514 }
 515
 516 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 517 {
 518     return true;
 519 }
 520
 521 #endif /* !defined (TARGET_PPC64) */
 522
 523 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 524 {
 525     return POWERPC_CPU(cpu)->vcpu_id;
 526 }
 527
 528 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 529  * book3s supports only 1 watchpoint, so array size
 530  * of 4 is sufficient for now.
 531  */
 532 #define MAX_HW_BKPTS 4
 533
 534 static struct HWBreakpoint {
 535     target_ulong addr;
 536     int type;
 537 } hw_debug_points[MAX_HW_BKPTS];
 538
 539 static CPUWatchpoint hw_watchpoint;
 540
 541 /* Default there is no breakpoint and watchpoint supported */
 542 static int max_hw_breakpoint;
 543 static int max_hw_watchpoint;
 544 static int nb_hw_breakpoint;
 545 static int nb_hw_watchpoint;
 546
 547 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 548 {
 549     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 550         max_hw_breakpoint = 2;
 551         max_hw_watchpoint = 2;
 552     }
 553
 554     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 555         fprintf(stderr, "Error initializing h/w breakpoints\n");
 556         return;
 557     }
 558 }
 559
 560 int kvm_arch_init_vcpu(CPUState *cs)
 561 {
 562     PowerPCCPU *cpu = POWERPC_CPU(cs);
 563     CPUPPCState *cenv = &cpu->env;
 564     int ret;
 565
 566     /* Gather server mmu info from KVM and update the CPU state */
 567     kvm_fixup_page_sizes(cpu);
 568
 569     /* Synchronize sregs with kvm */
 570     ret = kvm_arch_sync_sregs(cpu);
 571     if (ret) {
 572         if (ret == -EINVAL) {
 573             error_report("Register sync failed... If you're using kvm-hv.ko,"
 574                          " only \"-cpu host\" is possible");
 575         }
 576         return ret;
 577     }
 578
 579     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 580
 581     switch (cenv->mmu_model) {
 582     case POWERPC_MMU_BOOKE206:
 583         /* This target supports access to KVM's guest TLB */
 584         ret = kvm_booke206_tlb_init(cpu);
 585         break;
 586     case POWERPC_MMU_2_07:
 587         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 588             /* KVM-HV has transactional memory on POWER8 also without the
 589              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 590              * long as it's availble to userspace on the host. */
 591             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 592                 cap_htm = true;
 593             }
 594         }
 595         break;
 596     default:
 597         break;
 598     }
 599
 600     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 601     kvmppc_hw_debug_points_init(cenv);
 602
 603     return ret;
 604 }
 605
 606 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 607 {
 608     CPUPPCState *env = &cpu->env;
 609     CPUState *cs = CPU(cpu);
 610     struct kvm_dirty_tlb dirty_tlb;
 611     unsigned char *bitmap;
 612     int ret;
 613
 614     if (!env->kvm_sw_tlb) {
 615         return;
 616     }
 617
 618     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 619     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 620
 621     dirty_tlb.bitmap = (uintptr_t)bitmap;
 622     dirty_tlb.num_dirty = env->nb_tlb;
 623
 624     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 625     if (ret) {
 626         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 627                 __func__, strerror(-ret));
 628     }
 629
 630     g_free(bitmap);
 631 }
 632
 633 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 634 {
 635     PowerPCCPU *cpu = POWERPC_CPU(cs);
 636     CPUPPCState *env = &cpu->env;
 637     union {
 638         uint32_t u32;
 639         uint64_t u64;
 640     } val;
 641     struct kvm_one_reg reg = {
 642         .id = id,
 643         .addr = (uintptr_t) &val,
 644     };
 645     int ret;
 646
 647     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 648     if (ret != 0) {
 649         trace_kvm_failed_spr_get(spr, strerror(errno));
 650     } else {
 651         switch (id & KVM_REG_SIZE_MASK) {
 652         case KVM_REG_SIZE_U32:
 653             env->spr[spr] = val.u32;
 654             break;
 655
 656         case KVM_REG_SIZE_U64:
 657             env->spr[spr] = val.u64;
 658             break;
 659
 660         default:
 661             /* Don't handle this size yet */
 662             abort();
 663         }
 664     }
 665 }
 666
 667 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 668 {
 669     PowerPCCPU *cpu = POWERPC_CPU(cs);
 670     CPUPPCState *env = &cpu->env;
 671     union {
 672         uint32_t u32;
 673         uint64_t u64;
 674     } val;
 675     struct kvm_one_reg reg = {
 676         .id = id,
 677         .addr = (uintptr_t) &val,
 678     };
 679     int ret;
 680
 681     switch (id & KVM_REG_SIZE_MASK) {
 682     case KVM_REG_SIZE_U32:
 683         val.u32 = env->spr[spr];
 684         break;
 685
 686     case KVM_REG_SIZE_U64:
 687         val.u64 = env->spr[spr];
 688         break;
 689
 690     default:
 691         /* Don't handle this size yet */
 692         abort();
 693     }
 694
 695     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 696     if (ret != 0) {
 697         trace_kvm_failed_spr_set(spr, strerror(errno));
 698     }
 699 }
 700
 701 static int kvm_put_fp(CPUState *cs)
 702 {
 703     PowerPCCPU *cpu = POWERPC_CPU(cs);
 704     CPUPPCState *env = &cpu->env;
 705     struct kvm_one_reg reg;
 706     int i;
 707     int ret;
 708
 709     if (env->insns_flags & PPC_FLOAT) {
 710         uint64_t fpscr = env->fpscr;
 711         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 712
 713         reg.id = KVM_REG_PPC_FPSCR;
 714         reg.addr = (uintptr_t)&fpscr;
 715         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 716         if (ret < 0) {
 717             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 718             return ret;
 719         }
 720
 721         for (i = 0; i < 32; i++) {
 722             uint64_t vsr[2];
 723
 724 #ifdef HOST_WORDS_BIGENDIAN
 725             vsr[0] = float64_val(env->fpr[i]);
 726             vsr[1] = env->vsr[i];
 727 #else
 728             vsr[0] = env->vsr[i];
 729             vsr[1] = float64_val(env->fpr[i]);
 730 #endif
 731             reg.addr = (uintptr_t) &vsr;
 732             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 733
 734             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 735             if (ret < 0) {
 736                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 737                         i, strerror(errno));
 738                 return ret;
 739             }
 740         }
 741     }
 742
 743     if (env->insns_flags & PPC_ALTIVEC) {
 744         reg.id = KVM_REG_PPC_VSCR;
 745         reg.addr = (uintptr_t)&env->vscr;
 746         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 747         if (ret < 0) {
 748             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 749             return ret;
 750         }
 751
 752         for (i = 0; i < 32; i++) {
 753             reg.id = KVM_REG_PPC_VR(i);
 754             reg.addr = (uintptr_t)&env->avr[i];
 755             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 756             if (ret < 0) {
 757                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 758                 return ret;
 759             }
 760         }
 761     }
 762
 763     return 0;
 764 }
 765
 766 static int kvm_get_fp(CPUState *cs)
 767 {
 768     PowerPCCPU *cpu = POWERPC_CPU(cs);
 769     CPUPPCState *env = &cpu->env;
 770     struct kvm_one_reg reg;
 771     int i;
 772     int ret;
 773
 774     if (env->insns_flags & PPC_FLOAT) {
 775         uint64_t fpscr;
 776         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 777
 778         reg.id = KVM_REG_PPC_FPSCR;
 779         reg.addr = (uintptr_t)&fpscr;
 780         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 781         if (ret < 0) {
 782             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 783             return ret;
 784         } else {
 785             env->fpscr = fpscr;
 786         }
 787
 788         for (i = 0; i < 32; i++) {
 789             uint64_t vsr[2];
 790
 791             reg.addr = (uintptr_t) &vsr;
 792             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 793
 794             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 795             if (ret < 0) {
 796                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 797                         vsx ? "VSR" : "FPR", i, strerror(errno));
 798                 return ret;
 799             } else {
 800 #ifdef HOST_WORDS_BIGENDIAN
 801                 env->fpr[i] = vsr[0];
 802                 if (vsx) {
 803                     env->vsr[i] = vsr[1];
 804                 }
 805 #else
 806                 env->fpr[i] = vsr[1];
 807                 if (vsx) {
 808                     env->vsr[i] = vsr[0];
 809                 }
 810 #endif
 811             }
 812         }
 813     }
 814
 815     if (env->insns_flags & PPC_ALTIVEC) {
 816         reg.id = KVM_REG_PPC_VSCR;
 817         reg.addr = (uintptr_t)&env->vscr;
 818         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 819         if (ret < 0) {
 820             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 821             return ret;
 822         }
 823
 824         for (i = 0; i < 32; i++) {
 825             reg.id = KVM_REG_PPC_VR(i);
 826             reg.addr = (uintptr_t)&env->avr[i];
 827             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 828             if (ret < 0) {
 829                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 830                         i, strerror(errno));
 831                 return ret;
 832             }
 833         }
 834     }
 835
 836     return 0;
 837 }
 838
 839 #if defined(TARGET_PPC64)
 840 static int kvm_get_vpa(CPUState *cs)
 841 {
 842     PowerPCCPU *cpu = POWERPC_CPU(cs);
 843     CPUPPCState *env = &cpu->env;
 844     struct kvm_one_reg reg;
 845     int ret;
 846
 847     reg.id = KVM_REG_PPC_VPA_ADDR;
 848     reg.addr = (uintptr_t)&env->vpa_addr;
 849     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 850     if (ret < 0) {
 851         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 852         return ret;
 853     }
 854
 855     assert((uintptr_t)&env->slb_shadow_size
 856            == ((uintptr_t)&env->slb_shadow_addr + 8));
 857     reg.id = KVM_REG_PPC_VPA_SLB;
 858     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 859     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 860     if (ret < 0) {
 861         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 862                 strerror(errno));
 863         return ret;
 864     }
 865
 866     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 867     reg.id = KVM_REG_PPC_VPA_DTL;
 868     reg.addr = (uintptr_t)&env->dtl_addr;
 869     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 870     if (ret < 0) {
 871         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 872                 strerror(errno));
 873         return ret;
 874     }
 875
 876     return 0;
 877 }
 878
 879 static int kvm_put_vpa(CPUState *cs)
 880 {
 881     PowerPCCPU *cpu = POWERPC_CPU(cs);
 882     CPUPPCState *env = &cpu->env;
 883     struct kvm_one_reg reg;
 884     int ret;
 885
 886     /* SLB shadow or DTL can't be registered unless a master VPA is
 887      * registered.  That means when restoring state, if a VPA *is*
 888      * registered, we need to set that up first.  If not, we need to
 889      * deregister the others before deregistering the master VPA */
 890     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 891
 892     if (env->vpa_addr) {
 893         reg.id = KVM_REG_PPC_VPA_ADDR;
 894         reg.addr = (uintptr_t)&env->vpa_addr;
 895         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 896         if (ret < 0) {
 897             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 898             return ret;
 899         }
 900     }
 901
 902     assert((uintptr_t)&env->slb_shadow_size
 903            == ((uintptr_t)&env->slb_shadow_addr + 8));
 904     reg.id = KVM_REG_PPC_VPA_SLB;
 905     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 906     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 907     if (ret < 0) {
 908         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 909         return ret;
 910     }
 911
 912     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 913     reg.id = KVM_REG_PPC_VPA_DTL;
 914     reg.addr = (uintptr_t)&env->dtl_addr;
 915     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 916     if (ret < 0) {
 917         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 918                 strerror(errno));
 919         return ret;
 920     }
 921
 922     if (!env->vpa_addr) {
 923         reg.id = KVM_REG_PPC_VPA_ADDR;
 924         reg.addr = (uintptr_t)&env->vpa_addr;
 925         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 926         if (ret < 0) {
 927             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 928             return ret;
 929         }
 930     }
 931
 932     return 0;
 933 }
 934 #endif /* TARGET_PPC64 */
 935
 936 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 937 {
 938     CPUPPCState *env = &cpu->env;
 939     struct kvm_sregs sregs;
 940     int i;
 941
 942     sregs.pvr = env->spr[SPR_PVR];
 943
 944     if (cpu->vhyp) {
 945         PPCVirtualHypervisorClass *vhc =
 946             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 947         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 948     } else {
 949         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 950     }
 951
 952     /* Sync SLB */
 953 #ifdef TARGET_PPC64
 954     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 955         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 956         if (env->slb[i].esid & SLB_ESID_V) {
 957             sregs.u.s.ppc64.slb[i].slbe |= i;
 958         }
 959         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 960     }
 961 #endif
 962
 963     /* Sync SRs */
 964     for (i = 0; i < 16; i++) {
 965         sregs.u.s.ppc32.sr[i] = env->sr[i];
 966     }
 967
 968     /* Sync BATs */
 969     for (i = 0; i < 8; i++) {
 970         /* Beware. We have to swap upper and lower bits here */
 971         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 972             | env->DBAT[1][i];
 973         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 974             | env->IBAT[1][i];
 975     }
 976
 977     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 978 }
 979
 980 int kvm_arch_put_registers(CPUState *cs, int level)
 981 {
 982     PowerPCCPU *cpu = POWERPC_CPU(cs);
 983     CPUPPCState *env = &cpu->env;
 984     struct kvm_regs regs;
 985     int ret;
 986     int i;
 987
 988     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 989     if (ret < 0) {
 990         return ret;
 991     }
 992
 993     regs.ctr = env->ctr;
 994     regs.lr  = env->lr;
 995     regs.xer = cpu_read_xer(env);
 996     regs.msr = env->msr;
 997     regs.pc = env->nip;
 998
 999     regs.srr0 = env->spr[SPR_SRR0];
1000     regs.srr1 = env->spr[SPR_SRR1];
1001
1002     regs.sprg0 = env->spr[SPR_SPRG0];
1003     regs.sprg1 = env->spr[SPR_SPRG1];
1004     regs.sprg2 = env->spr[SPR_SPRG2];
1005     regs.sprg3 = env->spr[SPR_SPRG3];
1006     regs.sprg4 = env->spr[SPR_SPRG4];
1007     regs.sprg5 = env->spr[SPR_SPRG5];
1008     regs.sprg6 = env->spr[SPR_SPRG6];
1009     regs.sprg7 = env->spr[SPR_SPRG7];
1010
1011     regs.pid = env->spr[SPR_BOOKE_PID];
1012
1013     for (i = 0;i < 32; i++)
1014         regs.gpr[i] = env->gpr[i];
1015
1016     regs.cr = 0;
1017     for (i = 0; i < 8; i++) {
1018         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1019     }
1020
1021     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1022     if (ret < 0)
1023         return ret;
1024
1025     kvm_put_fp(cs);
1026
1027     if (env->tlb_dirty) {
1028         kvm_sw_tlb_put(cpu);
1029         env->tlb_dirty = false;
1030     }
1031
1032     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1033         ret = kvmppc_put_books_sregs(cpu);
1034         if (ret < 0) {
1035             return ret;
1036         }
1037     }
1038
1039     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1040         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1041     }
1042
1043     if (cap_one_reg) {
1044         int i;
1045
1046         /* We deliberately ignore errors here, for kernels which have
1047          * the ONE_REG calls, but don't support the specific
1048          * registers, there's a reasonable chance things will still
1049          * work, at least until we try to migrate. */
1050         for (i = 0; i < 1024; i++) {
1051             uint64_t id = env->spr_cb[i].one_reg_id;
1052
1053             if (id != 0) {
1054                 kvm_put_one_spr(cs, id, i);
1055             }
1056         }
1057
1058 #ifdef TARGET_PPC64
1059         if (msr_ts) {
1060             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1061                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1062             }
1063             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1064                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1065             }
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1067             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1068             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1069             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1070             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1071             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1072             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1073             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1074             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1075             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1076         }
1077
1078         if (cap_papr) {
1079             if (kvm_put_vpa(cs) < 0) {
1080                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1081             }
1082         }
1083
1084         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1085 #endif /* TARGET_PPC64 */
1086     }
1087
1088     return ret;
1089 }
1090
1091 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1092 {
1093      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1094 }
1095
1096 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1097 {
1098     CPUPPCState *env = &cpu->env;
1099     struct kvm_sregs sregs;
1100     int ret;
1101
1102     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1103     if (ret < 0) {
1104         return ret;
1105     }
1106
1107     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1108         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1109         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1110         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1111         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1112         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1113         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1114         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1115         env->spr[SPR_DECR] = sregs.u.e.dec;
1116         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1117         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1118         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1119     }
1120
1121     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1122         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1123         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1124         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1125         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1126         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1127     }
1128
1129     if (sregs.u.e.features & KVM_SREGS_E_64) {
1130         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1131     }
1132
1133     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1134         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1135     }
1136
1137     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1138         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1139         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1140         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1141         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1142         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1143         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1144         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1145         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1146         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1147         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1148         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1149         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1150         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1151         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1152         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1153         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1154         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1155         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1156         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1157         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1158         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1159         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1160         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1161         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1162         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1163         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1164         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1165         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1166         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1167         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1168         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1169         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1170
1171         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1172             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1173             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1174             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1175             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1176             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1177             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1178         }
1179
1180         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1181             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1182             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1183         }
1184
1185         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1186             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1187             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1188             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1189             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1190         }
1191     }
1192
1193     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1194         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1195         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1196         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1197         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1198         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1199         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1200         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1201         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1202         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1203         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1204     }
1205
1206     if (sregs.u.e.features & KVM_SREGS_EXP) {
1207         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1208     }
1209
1210     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1211         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1212         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1213     }
1214
1215     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1216         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1217         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1218         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1219
1220         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1221             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1222             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1223         }
1224     }
1225
1226     return 0;
1227 }
1228
1229 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1230 {
1231     CPUPPCState *env = &cpu->env;
1232     struct kvm_sregs sregs;
1233     int ret;
1234     int i;
1235
1236     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1237     if (ret < 0) {
1238         return ret;
1239     }
1240
1241     if (!cpu->vhyp) {
1242         ppc_store_sdr1(env, sregs.u.s.sdr1);
1243     }
1244
1245     /* Sync SLB */
1246 #ifdef TARGET_PPC64
1247     /*
1248      * The packed SLB array we get from KVM_GET_SREGS only contains
1249      * information about valid entries. So we flush our internal copy
1250      * to get rid of stale ones, then put all valid SLB entries back
1251      * in.
1252      */
1253     memset(env->slb, 0, sizeof(env->slb));
1254     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1255         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1256         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1257         /*
1258          * Only restore valid entries
1259          */
1260         if (rb & SLB_ESID_V) {
1261             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1262         }
1263     }
1264 #endif
1265
1266     /* Sync SRs */
1267     for (i = 0; i < 16; i++) {
1268         env->sr[i] = sregs.u.s.ppc32.sr[i];
1269     }
1270
1271     /* Sync BATs */
1272     for (i = 0; i < 8; i++) {
1273         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1274         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1275         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1276         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1277     }
1278
1279     return 0;
1280 }
1281
1282 int kvm_arch_get_registers(CPUState *cs)
1283 {
1284     PowerPCCPU *cpu = POWERPC_CPU(cs);
1285     CPUPPCState *env = &cpu->env;
1286     struct kvm_regs regs;
1287     uint32_t cr;
1288     int i, ret;
1289
1290     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1291     if (ret < 0)
1292         return ret;
1293
1294     cr = regs.cr;
1295     for (i = 7; i >= 0; i--) {
1296         env->crf[i] = cr & 15;
1297         cr >>= 4;
1298     }
1299
1300     env->ctr = regs.ctr;
1301     env->lr = regs.lr;
1302     cpu_write_xer(env, regs.xer);
1303     env->msr = regs.msr;
1304     env->nip = regs.pc;
1305
1306     env->spr[SPR_SRR0] = regs.srr0;
1307     env->spr[SPR_SRR1] = regs.srr1;
1308
1309     env->spr[SPR_SPRG0] = regs.sprg0;
1310     env->spr[SPR_SPRG1] = regs.sprg1;
1311     env->spr[SPR_SPRG2] = regs.sprg2;
1312     env->spr[SPR_SPRG3] = regs.sprg3;
1313     env->spr[SPR_SPRG4] = regs.sprg4;
1314     env->spr[SPR_SPRG5] = regs.sprg5;
1315     env->spr[SPR_SPRG6] = regs.sprg6;
1316     env->spr[SPR_SPRG7] = regs.sprg7;
1317
1318     env->spr[SPR_BOOKE_PID] = regs.pid;
1319
1320     for (i = 0;i < 32; i++)
1321         env->gpr[i] = regs.gpr[i];
1322
1323     kvm_get_fp(cs);
1324
1325     if (cap_booke_sregs) {
1326         ret = kvmppc_get_booke_sregs(cpu);
1327         if (ret < 0) {
1328             return ret;
1329         }
1330     }
1331
1332     if (cap_segstate) {
1333         ret = kvmppc_get_books_sregs(cpu);
1334         if (ret < 0) {
1335             return ret;
1336         }
1337     }
1338
1339     if (cap_hior) {
1340         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1341     }
1342
1343     if (cap_one_reg) {
1344         int i;
1345
1346         /* We deliberately ignore errors here, for kernels which have
1347          * the ONE_REG calls, but don't support the specific
1348          * registers, there's a reasonable chance things will still
1349          * work, at least until we try to migrate. */
1350         for (i = 0; i < 1024; i++) {
1351             uint64_t id = env->spr_cb[i].one_reg_id;
1352
1353             if (id != 0) {
1354                 kvm_get_one_spr(cs, id, i);
1355             }
1356         }
1357
1358 #ifdef TARGET_PPC64
1359         if (msr_ts) {
1360             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1361                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1362             }
1363             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1364                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1365             }
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1367             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1368             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1369             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1370             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1371             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1372             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1373             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1374             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1375             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1376         }
1377
1378         if (cap_papr) {
1379             if (kvm_get_vpa(cs) < 0) {
1380                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1381             }
1382         }
1383
1384         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1385 #endif
1386     }
1387
1388     return 0;
1389 }
1390
1391 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1392 {
1393     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1394
1395     if (irq != PPC_INTERRUPT_EXT) {
1396         return 0;
1397     }
1398
1399     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1400         return 0;
1401     }
1402
1403     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1404
1405     return 0;
1406 }
1407
1408 #if defined(TARGET_PPCEMB)
1409 #define PPC_INPUT_INT PPC40x_INPUT_INT
1410 #elif defined(TARGET_PPC64)
1411 #define PPC_INPUT_INT PPC970_INPUT_INT
1412 #else
1413 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1414 #endif
1415
1416 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1417 {
1418     PowerPCCPU *cpu = POWERPC_CPU(cs);
1419     CPUPPCState *env = &cpu->env;
1420     int r;
1421     unsigned irq;
1422
1423     qemu_mutex_lock_iothread();
1424
1425     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1426      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1427     if (!cap_interrupt_level &&
1428         run->ready_for_interrupt_injection &&
1429         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1430         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1431     {
1432         /* For now KVM disregards the 'irq' argument. However, in the
1433          * future KVM could cache it in-kernel to avoid a heavyweight exit
1434          * when reading the UIC.
1435          */
1436         irq = KVM_INTERRUPT_SET;
1437
1438         DPRINTF("injected interrupt %d\n", irq);
1439         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1440         if (r < 0) {
1441             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1442         }
1443
1444         /* Always wake up soon in case the interrupt was level based */
1445         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1446                        (NANOSECONDS_PER_SECOND / 50));
1447     }
1448
1449     /* We don't know if there are more interrupts pending after this. However,
1450      * the guest will return to userspace in the course of handling this one
1451      * anyways, so we will get a chance to deliver the rest. */
1452
1453     qemu_mutex_unlock_iothread();
1454 }
1455
1456 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1457 {
1458     return MEMTXATTRS_UNSPECIFIED;
1459 }
1460
1461 int kvm_arch_process_async_events(CPUState *cs)
1462 {
1463     return cs->halted;
1464 }
1465
1466 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1467 {
1468     CPUState *cs = CPU(cpu);
1469     CPUPPCState *env = &cpu->env;
1470
1471     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1472         cs->halted = 1;
1473         cs->exception_index = EXCP_HLT;
1474     }
1475
1476     return 0;
1477 }
1478
1479 /* map dcr access to existing qemu dcr emulation */
1480 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1481 {
1482     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1483         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1484
1485     return 0;
1486 }
1487
1488 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1489 {
1490     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1491         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1492
1493     return 0;
1494 }
1495
1496 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1497 {
1498     /* Mixed endian case is not handled */
1499     uint32_t sc = debug_inst_opcode;
1500
1501     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1502                             sizeof(sc), 0) ||
1503         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1504         return -EINVAL;
1505     }
1506
1507     return 0;
1508 }
1509
1510 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1511 {
1512     uint32_t sc;
1513
1514     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1515         sc != debug_inst_opcode ||
1516         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1517                             sizeof(sc), 1)) {
1518         return -EINVAL;
1519     }
1520
1521     return 0;
1522 }
1523
1524 static int find_hw_breakpoint(target_ulong addr, int type)
1525 {
1526     int n;
1527
1528     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1529            <= ARRAY_SIZE(hw_debug_points));
1530
1531     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1532         if (hw_debug_points[n].addr == addr &&
1533              hw_debug_points[n].type == type) {
1534             return n;
1535         }
1536     }
1537
1538     return -1;
1539 }
1540
1541 static int find_hw_watchpoint(target_ulong addr, int *flag)
1542 {
1543     int n;
1544
1545     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1546     if (n >= 0) {
1547         *flag = BP_MEM_ACCESS;
1548         return n;
1549     }
1550
1551     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1552     if (n >= 0) {
1553         *flag = BP_MEM_WRITE;
1554         return n;
1555     }
1556
1557     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1558     if (n >= 0) {
1559         *flag = BP_MEM_READ;
1560         return n;
1561     }
1562
1563     return -1;
1564 }
1565
1566 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1567                                   target_ulong len, int type)
1568 {
1569     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1570         return -ENOBUFS;
1571     }
1572
1573     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1574     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1575
1576     switch (type) {
1577     case GDB_BREAKPOINT_HW:
1578         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1579             return -ENOBUFS;
1580         }
1581
1582         if (find_hw_breakpoint(addr, type) >= 0) {
1583             return -EEXIST;
1584         }
1585
1586         nb_hw_breakpoint++;
1587         break;
1588
1589     case GDB_WATCHPOINT_WRITE:
1590     case GDB_WATCHPOINT_READ:
1591     case GDB_WATCHPOINT_ACCESS:
1592         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1593             return -ENOBUFS;
1594         }
1595
1596         if (find_hw_breakpoint(addr, type) >= 0) {
1597             return -EEXIST;
1598         }
1599
1600         nb_hw_watchpoint++;
1601         break;
1602
1603     default:
1604         return -ENOSYS;
1605     }
1606
1607     return 0;
1608 }
1609
1610 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1611                                   target_ulong len, int type)
1612 {
1613     int n;
1614
1615     n = find_hw_breakpoint(addr, type);
1616     if (n < 0) {
1617         return -ENOENT;
1618     }
1619
1620     switch (type) {
1621     case GDB_BREAKPOINT_HW:
1622         nb_hw_breakpoint--;
1623         break;
1624
1625     case GDB_WATCHPOINT_WRITE:
1626     case GDB_WATCHPOINT_READ:
1627     case GDB_WATCHPOINT_ACCESS:
1628         nb_hw_watchpoint--;
1629         break;
1630
1631     default:
1632         return -ENOSYS;
1633     }
1634     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1635
1636     return 0;
1637 }
1638
1639 void kvm_arch_remove_all_hw_breakpoints(void)
1640 {
1641     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1642 }
1643
1644 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1645 {
1646     int n;
1647
1648     /* Software Breakpoint updates */
1649     if (kvm_sw_breakpoints_active(cs)) {
1650         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1651     }
1652
1653     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1654            <= ARRAY_SIZE(hw_debug_points));
1655     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1656
1657     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1658         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1659         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1660         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1661             switch (hw_debug_points[n].type) {
1662             case GDB_BREAKPOINT_HW:
1663                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1664                 break;
1665             case GDB_WATCHPOINT_WRITE:
1666                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1667                 break;
1668             case GDB_WATCHPOINT_READ:
1669                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1670                 break;
1671             case GDB_WATCHPOINT_ACCESS:
1672                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1673                                         KVMPPC_DEBUG_WATCH_READ;
1674                 break;
1675             default:
1676                 cpu_abort(cs, "Unsupported breakpoint type\n");
1677             }
1678             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1679         }
1680     }
1681 }
1682
1683 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1684 {
1685     CPUState *cs = CPU(cpu);
1686     CPUPPCState *env = &cpu->env;
1687     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1688     int handle = 0;
1689     int n;
1690     int flag = 0;
1691
1692     if (cs->singlestep_enabled) {
1693         handle = 1;
1694     } else if (arch_info->status) {
1695         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1696             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1697                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1698                 if (n >= 0) {
1699                     handle = 1;
1700                 }
1701             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1702                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1703                 n = find_hw_watchpoint(arch_info->address,  &flag);
1704                 if (n >= 0) {
1705                     handle = 1;
1706                     cs->watchpoint_hit = &hw_watchpoint;
1707                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1708                     hw_watchpoint.flags = flag;
1709                 }
1710             }
1711         }
1712     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1713         handle = 1;
1714     } else {
1715         /* QEMU is not able to handle debug exception, so inject
1716          * program exception to guest;
1717          * Yes program exception NOT debug exception !!
1718          * When QEMU is using debug resources then debug exception must
1719          * be always set. To achieve this we set MSR_DE and also set
1720          * MSRP_DEP so guest cannot change MSR_DE.
1721          * When emulating debug resource for guest we want guest
1722          * to control MSR_DE (enable/disable debug interrupt on need).
1723          * Supporting both configurations are NOT possible.
1724          * So the result is that we cannot share debug resources
1725          * between QEMU and Guest on BOOKE architecture.
1726          * In the current design QEMU gets the priority over guest,
1727          * this means that if QEMU is using debug resources then guest
1728          * cannot use them;
1729          * For software breakpoint QEMU uses a privileged instruction;
1730          * So there cannot be any reason that we are here for guest
1731          * set debug exception, only possibility is guest executed a
1732          * privileged / illegal instruction and that's why we are
1733          * injecting a program interrupt.
1734          */
1735
1736         cpu_synchronize_state(cs);
1737         /* env->nip is PC, so increment this by 4 to use
1738          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1739          */
1740         env->nip += 4;
1741         cs->exception_index = POWERPC_EXCP_PROGRAM;
1742         env->error_code = POWERPC_EXCP_INVAL;
1743         ppc_cpu_do_interrupt(cs);
1744     }
1745
1746     return handle;
1747 }
1748
1749 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1750 {
1751     PowerPCCPU *cpu = POWERPC_CPU(cs);
1752     CPUPPCState *env = &cpu->env;
1753     int ret;
1754
1755     qemu_mutex_lock_iothread();
1756
1757     switch (run->exit_reason) {
1758     case KVM_EXIT_DCR:
1759         if (run->dcr.is_write) {
1760             DPRINTF("handle dcr write\n");
1761             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1762         } else {
1763             DPRINTF("handle dcr read\n");
1764             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1765         }
1766         break;
1767     case KVM_EXIT_HLT:
1768         DPRINTF("handle halt\n");
1769         ret = kvmppc_handle_halt(cpu);
1770         break;
1771 #if defined(TARGET_PPC64)
1772     case KVM_EXIT_PAPR_HCALL:
1773         DPRINTF("handle PAPR hypercall\n");
1774         run->papr_hcall.ret = spapr_hypercall(cpu,
1775                                               run->papr_hcall.nr,
1776                                               run->papr_hcall.args);
1777         ret = 0;
1778         break;
1779 #endif
1780     case KVM_EXIT_EPR:
1781         DPRINTF("handle epr\n");
1782         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1783         ret = 0;
1784         break;
1785     case KVM_EXIT_WATCHDOG:
1786         DPRINTF("handle watchdog expiry\n");
1787         watchdog_perform_action();
1788         ret = 0;
1789         break;
1790
1791     case KVM_EXIT_DEBUG:
1792         DPRINTF("handle debug exception\n");
1793         if (kvm_handle_debug(cpu, run)) {
1794             ret = EXCP_DEBUG;
1795             break;
1796         }
1797         /* re-enter, this exception was guest-internal */
1798         ret = 0;
1799         break;
1800
1801     default:
1802         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1803         ret = -1;
1804         break;
1805     }
1806
1807     qemu_mutex_unlock_iothread();
1808     return ret;
1809 }
1810
1811 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1812 {
1813     CPUState *cs = CPU(cpu);
1814     uint32_t bits = tsr_bits;
1815     struct kvm_one_reg reg = {
1816         .id = KVM_REG_PPC_OR_TSR,
1817         .addr = (uintptr_t) &bits,
1818     };
1819
1820     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1821 }
1822
1823 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1824 {
1825
1826     CPUState *cs = CPU(cpu);
1827     uint32_t bits = tsr_bits;
1828     struct kvm_one_reg reg = {
1829         .id = KVM_REG_PPC_CLEAR_TSR,
1830         .addr = (uintptr_t) &bits,
1831     };
1832
1833     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1834 }
1835
1836 int kvmppc_set_tcr(PowerPCCPU *cpu)
1837 {
1838     CPUState *cs = CPU(cpu);
1839     CPUPPCState *env = &cpu->env;
1840     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1841
1842     struct kvm_one_reg reg = {
1843         .id = KVM_REG_PPC_TCR,
1844         .addr = (uintptr_t) &tcr,
1845     };
1846
1847     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1848 }
1849
1850 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1851 {
1852     CPUState *cs = CPU(cpu);
1853     int ret;
1854
1855     if (!kvm_enabled()) {
1856         return -1;
1857     }
1858
1859     if (!cap_ppc_watchdog) {
1860         printf("warning: KVM does not support watchdog");
1861         return -1;
1862     }
1863
1864     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1865     if (ret < 0) {
1866         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1867                 __func__, strerror(-ret));
1868         return ret;
1869     }
1870
1871     return ret;
1872 }
1873
1874 static int read_cpuinfo(const char *field, char *value, int len)
1875 {
1876     FILE *f;
1877     int ret = -1;
1878     int field_len = strlen(field);
1879     char line[512];
1880
1881     f = fopen("/proc/cpuinfo", "r");
1882     if (!f) {
1883         return -1;
1884     }
1885
1886     do {
1887         if (!fgets(line, sizeof(line), f)) {
1888             break;
1889         }
1890         if (!strncmp(line, field, field_len)) {
1891             pstrcpy(value, len, line);
1892             ret = 0;
1893             break;
1894         }
1895     } while(*line);
1896
1897     fclose(f);
1898
1899     return ret;
1900 }
1901
1902 uint32_t kvmppc_get_tbfreq(void)
1903 {
1904     char line[512];
1905     char *ns;
1906     uint32_t retval = NANOSECONDS_PER_SECOND;
1907
1908     if (read_cpuinfo("timebase", line, sizeof(line))) {
1909         return retval;
1910     }
1911
1912     if (!(ns = strchr(line, ':'))) {
1913         return retval;
1914     }
1915
1916     ns++;
1917
1918     return atoi(ns);
1919 }
1920
1921 bool kvmppc_get_host_serial(char **value)
1922 {
1923     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1924                                NULL);
1925 }
1926
1927 bool kvmppc_get_host_model(char **value)
1928 {
1929     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1930 }
1931
1932 /* Try to find a device tree node for a CPU with clock-frequency property */
1933 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1934 {
1935     struct dirent *dirp;
1936     DIR *dp;
1937
1938     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1939         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1940         return -1;
1941     }
1942
1943     buf[0] = '\0';
1944     while ((dirp = readdir(dp)) != NULL) {
1945         FILE *f;
1946         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1947                  dirp->d_name);
1948         f = fopen(buf, "r");
1949         if (f) {
1950             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1951             fclose(f);
1952             break;
1953         }
1954         buf[0] = '\0';
1955     }
1956     closedir(dp);
1957     if (buf[0] == '\0') {
1958         printf("Unknown host!\n");
1959         return -1;
1960     }
1961
1962     return 0;
1963 }
1964
1965 static uint64_t kvmppc_read_int_dt(const char *filename)
1966 {
1967     union {
1968         uint32_t v32;
1969         uint64_t v64;
1970     } u;
1971     FILE *f;
1972     int len;
1973
1974     f = fopen(filename, "rb");
1975     if (!f) {
1976         return -1;
1977     }
1978
1979     len = fread(&u, 1, sizeof(u), f);
1980     fclose(f);
1981     switch (len) {
1982     case 4:
1983         /* property is a 32-bit quantity */
1984         return be32_to_cpu(u.v32);
1985     case 8:
1986         return be64_to_cpu(u.v64);
1987     }
1988
1989     return 0;
1990 }
1991
1992 /* Read a CPU node property from the host device tree that's a single
1993  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1994  * (can't find or open the property, or doesn't understand the
1995  * format) */
1996 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1997 {
1998     char buf[PATH_MAX], *tmp;
1999     uint64_t val;
2000
2001     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
2002         return -1;
2003     }
2004
2005     tmp = g_strdup_printf("%s/%s", buf, propname);
2006     val = kvmppc_read_int_dt(tmp);
2007     g_free(tmp);
2008
2009     return val;
2010 }
2011
2012 uint64_t kvmppc_get_clockfreq(void)
2013 {
2014     return kvmppc_read_int_cpu_dt("clock-frequency");
2015 }
2016
2017 uint32_t kvmppc_get_vmx(void)
2018 {
2019     return kvmppc_read_int_cpu_dt("ibm,vmx");
2020 }
2021
2022 uint32_t kvmppc_get_dfp(void)
2023 {
2024     return kvmppc_read_int_cpu_dt("ibm,dfp");
2025 }
2026
2027 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2028  {
2029      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2030      CPUState *cs = CPU(cpu);
2031
2032     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2033         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2034         return 0;
2035     }
2036
2037     return 1;
2038 }
2039
2040 int kvmppc_get_hasidle(CPUPPCState *env)
2041 {
2042     struct kvm_ppc_pvinfo pvinfo;
2043
2044     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2045         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2046         return 1;
2047     }
2048
2049     return 0;
2050 }
2051
2052 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2053 {
2054     uint32_t *hc = (uint32_t*)buf;
2055     struct kvm_ppc_pvinfo pvinfo;
2056
2057     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2058         memcpy(buf, pvinfo.hcall, buf_len);
2059         return 0;
2060     }
2061
2062     /*
2063      * Fallback to always fail hypercalls regardless of endianness:
2064      *
2065      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2066      *     li r3, -1
2067      *     b .+8       (becomes nop in wrong endian)
2068      *     bswap32(li r3, -1)
2069      */
2070
2071     hc[0] = cpu_to_be32(0x08000048);
2072     hc[1] = cpu_to_be32(0x3860ffff);
2073     hc[2] = cpu_to_be32(0x48000008);
2074     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2075
2076     return 1;
2077 }
2078
2079 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2080 {
2081     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2082 }
2083
2084 void kvmppc_enable_logical_ci_hcalls(void)
2085 {
2086     /*
2087      * FIXME: it would be nice if we could detect the cases where
2088      * we're using a device which requires the in kernel
2089      * implementation of these hcalls, but the kernel lacks them and
2090      * produce a warning.
2091      */
2092     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2093     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2094 }
2095
2096 void kvmppc_enable_set_mode_hcall(void)
2097 {
2098     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2099 }
2100
2101 void kvmppc_enable_clear_ref_mod_hcalls(void)
2102 {
2103     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2104     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2105 }
2106
2107 void kvmppc_set_papr(PowerPCCPU *cpu)
2108 {
2109     CPUState *cs = CPU(cpu);
2110     int ret;
2111
2112     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2113     if (ret) {
2114         error_report("This vCPU type or KVM version does not support PAPR");
2115         exit(1);
2116     }
2117
2118     /* Update the capability flag so we sync the right information
2119      * with kvm */
2120     cap_papr = 1;
2121 }
2122
2123 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2124 {
2125     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2126 }
2127
2128 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2129 {
2130     CPUState *cs = CPU(cpu);
2131     int ret;
2132
2133     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2134     if (ret && mpic_proxy) {
2135         error_report("This KVM version does not support EPR");
2136         exit(1);
2137     }
2138 }
2139
2140 int kvmppc_smt_threads(void)
2141 {
2142     return cap_ppc_smt ? cap_ppc_smt : 1;
2143 }
2144
2145 int kvmppc_set_smt_threads(int smt)
2146 {
2147     int ret;
2148
2149     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2150     if (!ret) {
2151         cap_ppc_smt = smt;
2152     }
2153     return ret;
2154 }
2155
2156 void kvmppc_hint_smt_possible(Error **errp)
2157 {
2158     int i;
2159     GString *g;
2160     char *s;
2161
2162     assert(kvm_enabled());
2163     if (cap_ppc_smt_possible) {
2164         g = g_string_new("Available VSMT modes:");
2165         for (i = 63; i >= 0; i--) {
2166             if ((1UL << i) & cap_ppc_smt_possible) {
2167                 g_string_append_printf(g, " %lu", (1UL << i));
2168             }
2169         }
2170         s = g_string_free(g, false);
2171         error_append_hint(errp, "%s.\n", s);
2172         g_free(s);
2173     } else {
2174         error_append_hint(errp,
2175                           "This KVM seems to be too old to support VSMT.\n");
2176     }
2177 }
2178
2179
2180 #ifdef TARGET_PPC64
2181 off_t kvmppc_alloc_rma(void **rma)
2182 {
2183     off_t size;
2184     int fd;
2185     struct kvm_allocate_rma ret;
2186
2187     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2188      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2189      *                      not necessary on this hardware
2190      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2191      *
2192      * FIXME: We should allow the user to force contiguous RMA
2193      * allocation in the cap_ppc_rma==1 case.
2194      */
2195     if (cap_ppc_rma < 2) {
2196         return 0;
2197     }
2198
2199     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2200     if (fd < 0) {
2201         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2202                 strerror(errno));
2203         return -1;
2204     }
2205
2206     size = MIN(ret.rma_size, 256ul << 20);
2207
2208     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2209     if (*rma == MAP_FAILED) {
2210         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2211         return -1;
2212     };
2213
2214     return size;
2215 }
2216
2217 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2218 {
2219     struct kvm_ppc_smmu_info info;
2220     long rampagesize, best_page_shift;
2221     int i;
2222
2223     if (cap_ppc_rma >= 2) {
2224         return current_size;
2225     }
2226
2227     /* Find the largest hardware supported page size that's less than
2228      * or equal to the (logical) backing page size of guest RAM */
2229     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2230     rampagesize = qemu_getrampagesize();
2231     best_page_shift = 0;
2232
2233     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2234         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2235
2236         if (!sps->page_shift) {
2237             continue;
2238         }
2239
2240         if ((sps->page_shift > best_page_shift)
2241             && ((1UL << sps->page_shift) <= rampagesize)) {
2242             best_page_shift = sps->page_shift;
2243         }
2244     }
2245
2246     return MIN(current_size,
2247                1ULL << (best_page_shift + hash_shift - 7));
2248 }
2249 #endif
2250
2251 bool kvmppc_spapr_use_multitce(void)
2252 {
2253     return cap_spapr_multitce;
2254 }
2255
2256 int kvmppc_spapr_enable_inkernel_multitce(void)
2257 {
2258     int ret;
2259
2260     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2261                             H_PUT_TCE_INDIRECT, 1);
2262     if (!ret) {
2263         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2264                                 H_STUFF_TCE, 1);
2265     }
2266
2267     return ret;
2268 }
2269
2270 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2271                               uint64_t bus_offset, uint32_t nb_table,
2272                               int *pfd, bool need_vfio)
2273 {
2274     long len;
2275     int fd;
2276     void *table;
2277
2278     /* Must set fd to -1 so we don't try to munmap when called for
2279      * destroying the table, which the upper layers -will- do
2280      */
2281     *pfd = -1;
2282     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2283         return NULL;
2284     }
2285
2286     if (cap_spapr_tce_64) {
2287         struct kvm_create_spapr_tce_64 args = {
2288             .liobn = liobn,
2289             .page_shift = page_shift,
2290             .offset = bus_offset >> page_shift,
2291             .size = nb_table,
2292             .flags = 0
2293         };
2294         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2295         if (fd < 0) {
2296             fprintf(stderr,
2297                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2298                     liobn);
2299             return NULL;
2300         }
2301     } else if (cap_spapr_tce) {
2302         uint64_t window_size = (uint64_t) nb_table << page_shift;
2303         struct kvm_create_spapr_tce args = {
2304             .liobn = liobn,
2305             .window_size = window_size,
2306         };
2307         if ((window_size != args.window_size) || bus_offset) {
2308             return NULL;
2309         }
2310         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2311         if (fd < 0) {
2312             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2313                     liobn);
2314             return NULL;
2315         }
2316     } else {
2317         return NULL;
2318     }
2319
2320     len = nb_table * sizeof(uint64_t);
2321     /* FIXME: round this up to page size */
2322
2323     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2324     if (table == MAP_FAILED) {
2325         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2326                 liobn);
2327         close(fd);
2328         return NULL;
2329     }
2330
2331     *pfd = fd;
2332     return table;
2333 }
2334
2335 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2336 {
2337     long len;
2338
2339     if (fd < 0) {
2340         return -1;
2341     }
2342
2343     len = nb_table * sizeof(uint64_t);
2344     if ((munmap(table, len) < 0) ||
2345         (close(fd) < 0)) {
2346         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2347                 strerror(errno));
2348         /* Leak the table */
2349     }
2350
2351     return 0;
2352 }
2353
2354 int kvmppc_reset_htab(int shift_hint)
2355 {
2356     uint32_t shift = shift_hint;
2357
2358     if (!kvm_enabled()) {
2359         /* Full emulation, tell caller to allocate htab itself */
2360         return 0;
2361     }
2362     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2363         int ret;
2364         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2365         if (ret == -ENOTTY) {
2366             /* At least some versions of PR KVM advertise the
2367              * capability, but don't implement the ioctl().  Oops.
2368              * Return 0 so that we allocate the htab in qemu, as is
2369              * correct for PR. */
2370             return 0;
2371         } else if (ret < 0) {
2372             return ret;
2373         }
2374         return shift;
2375     }
2376
2377     /* We have a kernel that predates the htab reset calls.  For PR
2378      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2379      * this era, it has allocated a 16MB fixed size hash table already. */
2380     if (kvmppc_is_pr(kvm_state)) {
2381         /* PR - tell caller to allocate htab */
2382         return 0;
2383     } else {
2384         /* HV - assume 16MB kernel allocated htab */
2385         return 24;
2386     }
2387 }
2388
2389 static inline uint32_t mfpvr(void)
2390 {
2391     uint32_t pvr;
2392
2393     asm ("mfpvr %0"
2394          : "=r"(pvr));
2395     return pvr;
2396 }
2397
2398 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2399 {
2400     if (on) {
2401         *word |= flags;
2402     } else {
2403         *word &= ~flags;
2404     }
2405 }
2406
2407 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2408 {
2409     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2410     uint32_t vmx = kvmppc_get_vmx();
2411     uint32_t dfp = kvmppc_get_dfp();
2412     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2413     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2414
2415     /* Now fix up the class with information we can query from the host */
2416     pcc->pvr = mfpvr();
2417
2418     if (vmx != -1) {
2419         /* Only override when we know what the host supports */
2420         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2421         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2422     }
2423     if (dfp != -1) {
2424         /* Only override when we know what the host supports */
2425         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2426     }
2427
2428     if (dcache_size != -1) {
2429         pcc->l1_dcache_size = dcache_size;
2430     }
2431
2432     if (icache_size != -1) {
2433         pcc->l1_icache_size = icache_size;
2434     }
2435
2436 #if defined(TARGET_PPC64)
2437     pcc->radix_page_info = kvm_get_radix_page_info();
2438
2439     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2440         /*
2441          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2442          * compliant.  More importantly, advertising ISA 3.00
2443          * architected mode may prevent guests from activating
2444          * necessary DD1 workarounds.
2445          */
2446         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2447                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2448     }
2449 #endif /* defined(TARGET_PPC64) */
2450 }
2451
2452 bool kvmppc_has_cap_epr(void)
2453 {
2454     return cap_epr;
2455 }
2456
2457 bool kvmppc_has_cap_fixup_hcalls(void)
2458 {
2459     return cap_fixup_hcalls;
2460 }
2461
2462 bool kvmppc_has_cap_htm(void)
2463 {
2464     return cap_htm;
2465 }
2466
2467 bool kvmppc_has_cap_mmu_radix(void)
2468 {
2469     return cap_mmu_radix;
2470 }
2471
2472 bool kvmppc_has_cap_mmu_hash_v3(void)
2473 {
2474     return cap_mmu_hash_v3;
2475 }
2476
2477 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2478 {
2479     uint32_t host_pvr = mfpvr();
2480     PowerPCCPUClass *pvr_pcc;
2481
2482     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2483     if (pvr_pcc == NULL) {
2484         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2485     }
2486
2487     return pvr_pcc;
2488 }
2489
2490 static int kvm_ppc_register_host_cpu_type(void)
2491 {
2492     TypeInfo type_info = {
2493         .name = TYPE_HOST_POWERPC_CPU,
2494         .class_init = kvmppc_host_cpu_class_init,
2495     };
2496     PowerPCCPUClass *pvr_pcc;
2497     ObjectClass *oc;
2498     DeviceClass *dc;
2499     int i;
2500
2501     pvr_pcc = kvm_ppc_get_host_cpu_class();
2502     if (pvr_pcc == NULL) {
2503         return -1;
2504     }
2505     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2506     type_register(&type_info);
2507
2508     oc = object_class_by_name(type_info.name);
2509     g_assert(oc);
2510
2511 #if defined(TARGET_PPC64)
2512     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2513     type_info.parent = TYPE_SPAPR_CPU_CORE,
2514     type_info.instance_size = sizeof(sPAPRCPUCore);
2515     type_info.instance_init = NULL;
2516     type_info.class_init = spapr_cpu_core_class_init;
2517     type_info.class_data = (void *) "host";
2518     type_register(&type_info);
2519     g_free((void *)type_info.name);
2520 #endif
2521
2522     /*
2523      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2524      * we want "POWER8" to be a "family" alias that points to the current
2525      * host CPU type, too)
2526      */
2527     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2528     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2529         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2530             char *suffix;
2531
2532             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2533             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2534             if (suffix) {
2535                 *suffix = 0;
2536             }
2537             break;
2538         }
2539     }
2540
2541     return 0;
2542 }
2543
2544 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2545 {
2546     struct kvm_rtas_token_args args = {
2547         .token = token,
2548     };
2549
2550     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2551         return -ENOENT;
2552     }
2553
2554     strncpy(args.name, function, sizeof(args.name));
2555
2556     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2557 }
2558
2559 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2560 {
2561     struct kvm_get_htab_fd s = {
2562         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2563         .start_index = index,
2564     };
2565     int ret;
2566
2567     if (!cap_htab_fd) {
2568         error_setg(errp, "KVM version doesn't support %s the HPT",
2569                    write ? "writing" : "reading");
2570         return -ENOTSUP;
2571     }
2572
2573     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2574     if (ret < 0) {
2575         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2576                    write ? "writing" : "reading", write ? "to" : "from",
2577                    strerror(errno));
2578         return -errno;
2579     }
2580
2581     return ret;
2582 }
2583
2584 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2585 {
2586     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2587     uint8_t buf[bufsize];
2588     ssize_t rc;
2589
2590     do {
2591         rc = read(fd, buf, bufsize);
2592         if (rc < 0) {
2593             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2594                     strerror(errno));
2595             return rc;
2596         } else if (rc) {
2597             uint8_t *buffer = buf;
2598             ssize_t n = rc;
2599             while (n) {
2600                 struct kvm_get_htab_header *head =
2601                     (struct kvm_get_htab_header *) buffer;
2602                 size_t chunksize = sizeof(*head) +
2603                      HASH_PTE_SIZE_64 * head->n_valid;
2604
2605                 qemu_put_be32(f, head->index);
2606                 qemu_put_be16(f, head->n_valid);
2607                 qemu_put_be16(f, head->n_invalid);
2608                 qemu_put_buffer(f, (void *)(head + 1),
2609                                 HASH_PTE_SIZE_64 * head->n_valid);
2610
2611                 buffer += chunksize;
2612                 n -= chunksize;
2613             }
2614         }
2615     } while ((rc != 0)
2616              && ((max_ns < 0)
2617                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2618
2619     return (rc == 0) ? 1 : 0;
2620 }
2621
2622 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2623                            uint16_t n_valid, uint16_t n_invalid)
2624 {
2625     struct kvm_get_htab_header *buf;
2626     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2627     ssize_t rc;
2628
2629     buf = alloca(chunksize);
2630     buf->index = index;
2631     buf->n_valid = n_valid;
2632     buf->n_invalid = n_invalid;
2633
2634     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2635
2636     rc = write(fd, buf, chunksize);
2637     if (rc < 0) {
2638         fprintf(stderr, "Error writing KVM hash table: %s\n",
2639                 strerror(errno));
2640         return rc;
2641     }
2642     if (rc != chunksize) {
2643         /* We should never get a short write on a single chunk */
2644         fprintf(stderr, "Short write, restoring KVM hash table\n");
2645         return -1;
2646     }
2647     return 0;
2648 }
2649
2650 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2651 {
2652     return true;
2653 }
2654
2655 void kvm_arch_init_irq_routing(KVMState *s)
2656 {
2657 }
2658
2659 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2660 {
2661     int fd, rc;
2662     int i;
2663
2664     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2665
2666     i = 0;
2667     while (i < n) {
2668         struct kvm_get_htab_header *hdr;
2669         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2670         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2671
2672         rc = read(fd, buf, sizeof(buf));
2673         if (rc < 0) {
2674             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2675         }
2676
2677         hdr = (struct kvm_get_htab_header *)buf;
2678         while ((i < n) && ((char *)hdr < (buf + rc))) {
2679             int invalid = hdr->n_invalid;
2680
2681             if (hdr->index != (ptex + i)) {
2682                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2683                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2684             }
2685
2686             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2687             i += hdr->n_valid;
2688
2689             if ((n - i) < invalid) {
2690                 invalid = n - i;
2691             }
2692             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2693             i += hdr->n_invalid;
2694
2695             hdr = (struct kvm_get_htab_header *)
2696                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2697         }
2698     }
2699
2700     close(fd);
2701 }
2702
2703 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2704 {
2705     int fd, rc;
2706     struct {
2707         struct kvm_get_htab_header hdr;
2708         uint64_t pte0;
2709         uint64_t pte1;
2710     } buf;
2711
2712     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2713
2714     buf.hdr.n_valid = 1;
2715     buf.hdr.n_invalid = 0;
2716     buf.hdr.index = ptex;
2717     buf.pte0 = cpu_to_be64(pte0);
2718     buf.pte1 = cpu_to_be64(pte1);
2719
2720     rc = write(fd, &buf, sizeof(buf));
2721     if (rc != sizeof(buf)) {
2722         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2723     }
2724     close(fd);
2725 }
2726
2727 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2728                              uint64_t address, uint32_t data, PCIDevice *dev)
2729 {
2730     return 0;
2731 }
2732
2733 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2734                                 int vector, PCIDevice *dev)
2735 {
2736     return 0;
2737 }
2738
2739 int kvm_arch_release_virq_post(int virq)
2740 {
2741     return 0;
2742 }
2743
2744 int kvm_arch_msi_data_to_gsi(uint32_t data)
2745 {
2746     return data & 0xffff;
2747 }
2748
2749 int kvmppc_enable_hwrng(void)
2750 {
2751     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2752         return -1;
2753     }
2754
2755     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2756 }
2757
2758 void kvmppc_check_papr_resize_hpt(Error **errp)
2759 {
2760     if (!kvm_enabled()) {
2761         return; /* No KVM, we're good */
2762     }
2763
2764     if (cap_resize_hpt) {
2765         return; /* Kernel has explicit support, we're good */
2766     }
2767
2768     /* Otherwise fallback on looking for PR KVM */
2769     if (kvmppc_is_pr(kvm_state)) {
2770         return;
2771     }
2772
2773     error_setg(errp,
2774                "Hash page table resizing not available with this KVM version");
2775 }
2776
2777 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2778 {
2779     CPUState *cs = CPU(cpu);
2780     struct kvm_ppc_resize_hpt rhpt = {
2781         .flags = flags,
2782         .shift = shift,
2783     };
2784
2785     if (!cap_resize_hpt) {
2786         return -ENOSYS;
2787     }
2788
2789     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2790 }
2791
2792 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2793 {
2794     CPUState *cs = CPU(cpu);
2795     struct kvm_ppc_resize_hpt rhpt = {
2796         .flags = flags,
2797         .shift = shift,
2798     };
2799
2800     if (!cap_resize_hpt) {
2801         return -ENOSYS;
2802     }
2803
2804     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2805 }
2806
2807 /*
2808  * This is a helper function to detect a post migration scenario
2809  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2810  * the guest kernel can't handle a PVR value other than the actual host
2811  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2812  *
2813  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2814  * (so, we're HV), return true. The workaround itself is done in
2815  * cpu_post_load.
2816  *
2817  * The order here is important: we'll only check for KVM PR as a
2818  * fallback if the guest kernel can't handle the situation itself.
2819  * We need to avoid as much as possible querying the running KVM type
2820  * in QEMU level.
2821  */
2822 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2823 {
2824     CPUState *cs = CPU(cpu);
2825
2826     if (!kvm_enabled()) {
2827         return false;
2828     }
2829
2830     if (cap_ppc_pvr_compat) {
2831         return false;
2832     }
2833
2834     return !kvmppc_is_pr(cs->kvm_state);
2835 }