target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qapi/error.h"
  26 #include "qemu/error-report.h"
  27 #include "cpu.h"
  28 #include "cpu-models.h"
  29 #include "qemu/timer.h"
  30 #include "sysemu/sysemu.h"
  31 #include "sysemu/hw_accel.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_vio.h"
  40 #include "hw/ppc/spapr_cpu_core.h"
  41 #include "hw/ppc/ppc.h"
  42 #include "sysemu/watchdog.h"
  43 #include "trace.h"
  44 #include "exec/gdbstub.h"
  45 #include "exec/memattrs.h"
  46 #include "exec/ram_addr.h"
  47 #include "sysemu/hostmem.h"
  48 #include "qemu/cutils.h"
  49 #include "qemu/mmap-alloc.h"
  50 #if defined(TARGET_PPC64)
  51 #include "hw/ppc/spapr_cpu_core.h"
  52 #endif
  53 #include "elf.h"
  54 #include "sysemu/kvm_int.h"
  55
  56 //#define DEBUG_KVM
  57
  58 #ifdef DEBUG_KVM
  59 #define DPRINTF(fmt, ...) \
  60     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  61 #else
  62 #define DPRINTF(fmt, ...) \
  63     do { } while (0)
  64 #endif
  65
  66 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  67
  68 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  69     KVM_CAP_LAST_INFO
  70 };
  71
  72 static int cap_interrupt_unset = false;
  73 static int cap_interrupt_level = false;
  74 static int cap_segstate;
  75 static int cap_booke_sregs;
  76 static int cap_ppc_smt;
  77 static int cap_ppc_smt_possible;
  78 static int cap_ppc_rma;
  79 static int cap_spapr_tce;
  80 static int cap_spapr_tce_64;
  81 static int cap_spapr_multitce;
  82 static int cap_spapr_vfio;
  83 static int cap_hior;
  84 static int cap_one_reg;
  85 static int cap_epr;
  86 static int cap_ppc_watchdog;
  87 static int cap_papr;
  88 static int cap_htab_fd;
  89 static int cap_fixup_hcalls;
  90 static int cap_htm;             /* Hardware transactional memory support */
  91 static int cap_mmu_radix;
  92 static int cap_mmu_hash_v3;
  93 static int cap_resize_hpt;
  94 static int cap_ppc_pvr_compat;
  95
  96 static uint32_t debug_inst_opcode;
  97
  98 /* XXX We have a race condition where we actually have a level triggered
  99  *     interrupt, but the infrastructure can't expose that yet, so the guest
 100  *     takes but ignores it, goes to sleep and never gets notified that there's
 101  *     still an interrupt pending.
 102  *
 103  *     As a quick workaround, let's just wake up again 20 ms after we injected
 104  *     an interrupt. That way we can assure that we're always reinjecting
 105  *     interrupts in case the guest swallowed them.
 106  */
 107 static QEMUTimer *idle_timer;
 108
 109 static void kvm_kick_cpu(void *opaque)
 110 {
 111     PowerPCCPU *cpu = opaque;
 112
 113     qemu_cpu_kick(CPU(cpu));
 114 }
 115
 116 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 117  * should only be used for fallback tests - generally we should use
 118  * explicit capabilities for the features we want, rather than
 119  * assuming what is/isn't available depending on the KVM variant. */
 120 static bool kvmppc_is_pr(KVMState *ks)
 121 {
 122     /* Assume KVM-PR if the GET_PVINFO capability is available */
 123     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 124 }
 125
 126 static int kvm_ppc_register_host_cpu_type(void);
 127
 128 int kvm_arch_init(MachineState *ms, KVMState *s)
 129 {
 130     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 131     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 132     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 133     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 134     cap_ppc_smt_possible = kvm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 135     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 136     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 137     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 138     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 139     cap_spapr_vfio = false;
 140     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 141     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 142     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 143     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 144     /* Note: we don't set cap_papr here, because this capability is
 145      * only activated after this by kvmppc_set_papr() */
 146     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 147     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 148     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 149     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 150     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 151     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 152     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 153     /*
 154      * Note: setting it to false because there is not such capability
 155      * in KVM at this moment.
 156      *
 157      * TODO: call kvm_vm_check_extension() with the right capability
 158      * after the kernel starts implementing it.*/
 159     cap_ppc_pvr_compat = false;
 160
 161     if (!cap_interrupt_level) {
 162         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 163                         "VM to stall at times!\n");
 164     }
 165
 166     kvm_ppc_register_host_cpu_type();
 167
 168     return 0;
 169 }
 170
 171 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 172 {
 173     return 0;
 174 }
 175
 176 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 177 {
 178     CPUPPCState *cenv = &cpu->env;
 179     CPUState *cs = CPU(cpu);
 180     struct kvm_sregs sregs;
 181     int ret;
 182
 183     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 184         /* What we're really trying to say is "if we're on BookE, we use
 185            the native PVR for now". This is the only sane way to check
 186            it though, so we potentially confuse users that they can run
 187            BookE guests on BookS. Let's hope nobody dares enough :) */
 188         return 0;
 189     } else {
 190         if (!cap_segstate) {
 191             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 192             return -ENOSYS;
 193         }
 194     }
 195
 196     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 197     if (ret) {
 198         return ret;
 199     }
 200
 201     sregs.pvr = cenv->spr[SPR_PVR];
 202     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 203 }
 204
 205 /* Set up a shared TLB array with KVM */
 206 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210     struct kvm_book3e_206_tlb_params params = {};
 211     struct kvm_config_tlb cfg = {};
 212     unsigned int entries = 0;
 213     int ret, i;
 214
 215     if (!kvm_enabled() ||
 216         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 217         return 0;
 218     }
 219
 220     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 221
 222     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 223         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 224         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 225         entries += params.tlb_sizes[i];
 226     }
 227
 228     assert(entries == env->nb_tlb);
 229     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 230
 231     env->tlb_dirty = true;
 232
 233     cfg.array = (uintptr_t)env->tlb.tlbm;
 234     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 235     cfg.params = (uintptr_t)&params;
 236     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 237
 238     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 239     if (ret < 0) {
 240         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 241                 __func__, strerror(-ret));
 242         return ret;
 243     }
 244
 245     env->kvm_sw_tlb = true;
 246     return 0;
 247 }
 248
 249
 250 #if defined(TARGET_PPC64)
 251 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 252                                        struct kvm_ppc_smmu_info *info)
 253 {
 254     CPUPPCState *env = &cpu->env;
 255     CPUState *cs = CPU(cpu);
 256
 257     memset(info, 0, sizeof(*info));
 258
 259     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 260      * need to "guess" what the supported page sizes are.
 261      *
 262      * For that to work we make a few assumptions:
 263      *
 264      * - Check whether we are running "PR" KVM which only supports 4K
 265      *   and 16M pages, but supports them regardless of the backing
 266      *   store characteritics. We also don't support 1T segments.
 267      *
 268      *   This is safe as if HV KVM ever supports that capability or PR
 269      *   KVM grows supports for more page/segment sizes, those versions
 270      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 271      *   will not hit this fallback
 272      *
 273      * - Else we are running HV KVM. This means we only support page
 274      *   sizes that fit in the backing store. Additionally we only
 275      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 276      *   P7 encodings for the SLB and hash table. Here too, we assume
 277      *   support for any newer processor will mean a kernel that
 278      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 279      *   this fallback.
 280      */
 281     if (kvmppc_is_pr(cs->kvm_state)) {
 282         /* No flags */
 283         info->flags = 0;
 284         info->slb_size = 64;
 285
 286         /* Standard 4k base page size segment */
 287         info->sps[0].page_shift = 12;
 288         info->sps[0].slb_enc = 0;
 289         info->sps[0].enc[0].page_shift = 12;
 290         info->sps[0].enc[0].pte_enc = 0;
 291
 292         /* Standard 16M large page size segment */
 293         info->sps[1].page_shift = 24;
 294         info->sps[1].slb_enc = SLB_VSID_L;
 295         info->sps[1].enc[0].page_shift = 24;
 296         info->sps[1].enc[0].pte_enc = 0;
 297     } else {
 298         int i = 0;
 299
 300         /* HV KVM has backing store size restrictions */
 301         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 302
 303         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 304             info->flags |= KVM_PPC_1T_SEGMENTS;
 305         }
 306
 307         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 308            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 309             info->slb_size = 32;
 310         } else {
 311             info->slb_size = 64;
 312         }
 313
 314         /* Standard 4k base page size segment */
 315         info->sps[i].page_shift = 12;
 316         info->sps[i].slb_enc = 0;
 317         info->sps[i].enc[0].page_shift = 12;
 318         info->sps[i].enc[0].pte_enc = 0;
 319         i++;
 320
 321         /* 64K on MMU 2.06 and later */
 322         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 323             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 324             info->sps[i].page_shift = 16;
 325             info->sps[i].slb_enc = 0x110;
 326             info->sps[i].enc[0].page_shift = 16;
 327             info->sps[i].enc[0].pte_enc = 1;
 328             i++;
 329         }
 330
 331         /* Standard 16M large page size segment */
 332         info->sps[i].page_shift = 24;
 333         info->sps[i].slb_enc = SLB_VSID_L;
 334         info->sps[i].enc[0].page_shift = 24;
 335         info->sps[i].enc[0].pte_enc = 0;
 336     }
 337 }
 338
 339 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 340 {
 341     CPUState *cs = CPU(cpu);
 342     int ret;
 343
 344     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 345         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 346         if (ret == 0) {
 347             return;
 348         }
 349     }
 350
 351     kvm_get_fallback_smmu_info(cpu, info);
 352 }
 353
 354 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 355 {
 356     KVMState *s = KVM_STATE(current_machine->accelerator);
 357     struct ppc_radix_page_info *radix_page_info;
 358     struct kvm_ppc_rmmu_info rmmu_info;
 359     int i;
 360
 361     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 362         return NULL;
 363     }
 364     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 365         return NULL;
 366     }
 367     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 368     radix_page_info->count = 0;
 369     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 370         if (rmmu_info.ap_encodings[i]) {
 371             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 372             radix_page_info->count++;
 373         }
 374     }
 375     return radix_page_info;
 376 }
 377
 378 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 379                                      bool radix, bool gtse,
 380                                      uint64_t proc_tbl)
 381 {
 382     CPUState *cs = CPU(cpu);
 383     int ret;
 384     uint64_t flags = 0;
 385     struct kvm_ppc_mmuv3_cfg cfg = {
 386         .process_table = proc_tbl,
 387     };
 388
 389     if (radix) {
 390         flags |= KVM_PPC_MMUV3_RADIX;
 391     }
 392     if (gtse) {
 393         flags |= KVM_PPC_MMUV3_GTSE;
 394     }
 395     cfg.flags = flags;
 396     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 397     switch (ret) {
 398     case 0:
 399         return H_SUCCESS;
 400     case -EINVAL:
 401         return H_PARAMETER;
 402     case -ENODEV:
 403         return H_NOT_AVAILABLE;
 404     default:
 405         return H_HARDWARE;
 406     }
 407 }
 408
 409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 410 {
 411     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 412         return true;
 413     }
 414
 415     return (1ul << shift) <= rampgsize;
 416 }
 417
 418 static long max_cpu_page_size;
 419
 420 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 421 {
 422     static struct kvm_ppc_smmu_info smmu_info;
 423     static bool has_smmu_info;
 424     CPUPPCState *env = &cpu->env;
 425     int iq, ik, jq, jk;
 426     bool has_64k_pages = false;
 427
 428     /* We only handle page sizes for 64-bit server guests for now */
 429     if (!(env->mmu_model & POWERPC_MMU_64)) {
 430         return;
 431     }
 432
 433     /* Collect MMU info from kernel if not already */
 434     if (!has_smmu_info) {
 435         kvm_get_smmu_info(cpu, &smmu_info);
 436         has_smmu_info = true;
 437     }
 438
 439     if (!max_cpu_page_size) {
 440         max_cpu_page_size = qemu_getrampagesize();
 441     }
 442
 443     /* Convert to QEMU form */
 444     memset(&env->sps, 0, sizeof(env->sps));
 445
 446     /* If we have HV KVM, we need to forbid CI large pages if our
 447      * host page size is smaller than 64K.
 448      */
 449     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 450         env->ci_large_pages = getpagesize() >= 0x10000;
 451     }
 452
 453     /*
 454      * XXX This loop should be an entry wide AND of the capabilities that
 455      *     the selected CPU has with the capabilities that KVM supports.
 456      */
 457     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 458         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 459         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 460
 461         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 462                                  ksps->page_shift)) {
 463             continue;
 464         }
 465         qsps->page_shift = ksps->page_shift;
 466         qsps->slb_enc = ksps->slb_enc;
 467         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 468             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 469                                      ksps->enc[jk].page_shift)) {
 470                 continue;
 471             }
 472             if (ksps->enc[jk].page_shift == 16) {
 473                 has_64k_pages = true;
 474             }
 475             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 476             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 477             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 478                 break;
 479             }
 480         }
 481         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 482             break;
 483         }
 484     }
 485     env->slb_nr = smmu_info.slb_size;
 486     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 487         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 488     }
 489     if (!has_64k_pages) {
 490         env->mmu_model &= ~POWERPC_MMU_64K;
 491     }
 492 }
 493
 494 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 495 {
 496     Object *mem_obj = object_resolve_path(obj_path, NULL);
 497     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 498     long pagesize;
 499
 500     if (mempath) {
 501         pagesize = qemu_mempath_getpagesize(mempath);
 502         g_free(mempath);
 503     } else {
 504         pagesize = getpagesize();
 505     }
 506
 507     return pagesize >= max_cpu_page_size;
 508 }
 509
 510 #else /* defined (TARGET_PPC64) */
 511
 512 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 513 {
 514 }
 515
 516 bool kvmppc_is_mem_backend_page_size_ok(const char *obj_path)
 517 {
 518     return true;
 519 }
 520
 521 #endif /* !defined (TARGET_PPC64) */
 522
 523 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 524 {
 525     return POWERPC_CPU(cpu)->vcpu_id;
 526 }
 527
 528 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 529  * book3s supports only 1 watchpoint, so array size
 530  * of 4 is sufficient for now.
 531  */
 532 #define MAX_HW_BKPTS 4
 533
 534 static struct HWBreakpoint {
 535     target_ulong addr;
 536     int type;
 537 } hw_debug_points[MAX_HW_BKPTS];
 538
 539 static CPUWatchpoint hw_watchpoint;
 540
 541 /* Default there is no breakpoint and watchpoint supported */
 542 static int max_hw_breakpoint;
 543 static int max_hw_watchpoint;
 544 static int nb_hw_breakpoint;
 545 static int nb_hw_watchpoint;
 546
 547 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 548 {
 549     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 550         max_hw_breakpoint = 2;
 551         max_hw_watchpoint = 2;
 552     }
 553
 554     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 555         fprintf(stderr, "Error initializing h/w breakpoints\n");
 556         return;
 557     }
 558 }
 559
 560 int kvm_arch_init_vcpu(CPUState *cs)
 561 {
 562     PowerPCCPU *cpu = POWERPC_CPU(cs);
 563     CPUPPCState *cenv = &cpu->env;
 564     int ret;
 565
 566     /* Gather server mmu info from KVM and update the CPU state */
 567     kvm_fixup_page_sizes(cpu);
 568
 569     /* Synchronize sregs with kvm */
 570     ret = kvm_arch_sync_sregs(cpu);
 571     if (ret) {
 572         if (ret == -EINVAL) {
 573             error_report("Register sync failed... If you're using kvm-hv.ko,"
 574                          " only \"-cpu host\" is possible");
 575         }
 576         return ret;
 577     }
 578
 579     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 580
 581     switch (cenv->mmu_model) {
 582     case POWERPC_MMU_BOOKE206:
 583         /* This target supports access to KVM's guest TLB */
 584         ret = kvm_booke206_tlb_init(cpu);
 585         break;
 586     case POWERPC_MMU_2_07:
 587         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 588             /* KVM-HV has transactional memory on POWER8 also without the
 589              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 590              * long as it's availble to userspace on the host. */
 591             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 592                 cap_htm = true;
 593             }
 594         }
 595         break;
 596     default:
 597         break;
 598     }
 599
 600     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 601     kvmppc_hw_debug_points_init(cenv);
 602
 603     return ret;
 604 }
 605
 606 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 607 {
 608     CPUPPCState *env = &cpu->env;
 609     CPUState *cs = CPU(cpu);
 610     struct kvm_dirty_tlb dirty_tlb;
 611     unsigned char *bitmap;
 612     int ret;
 613
 614     if (!env->kvm_sw_tlb) {
 615         return;
 616     }
 617
 618     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 619     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 620
 621     dirty_tlb.bitmap = (uintptr_t)bitmap;
 622     dirty_tlb.num_dirty = env->nb_tlb;
 623
 624     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 625     if (ret) {
 626         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 627                 __func__, strerror(-ret));
 628     }
 629
 630     g_free(bitmap);
 631 }
 632
 633 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 634 {
 635     PowerPCCPU *cpu = POWERPC_CPU(cs);
 636     CPUPPCState *env = &cpu->env;
 637     union {
 638         uint32_t u32;
 639         uint64_t u64;
 640     } val;
 641     struct kvm_one_reg reg = {
 642         .id = id,
 643         .addr = (uintptr_t) &val,
 644     };
 645     int ret;
 646
 647     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 648     if (ret != 0) {
 649         trace_kvm_failed_spr_get(spr, strerror(errno));
 650     } else {
 651         switch (id & KVM_REG_SIZE_MASK) {
 652         case KVM_REG_SIZE_U32:
 653             env->spr[spr] = val.u32;
 654             break;
 655
 656         case KVM_REG_SIZE_U64:
 657             env->spr[spr] = val.u64;
 658             break;
 659
 660         default:
 661             /* Don't handle this size yet */
 662             abort();
 663         }
 664     }
 665 }
 666
 667 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 668 {
 669     PowerPCCPU *cpu = POWERPC_CPU(cs);
 670     CPUPPCState *env = &cpu->env;
 671     union {
 672         uint32_t u32;
 673         uint64_t u64;
 674     } val;
 675     struct kvm_one_reg reg = {
 676         .id = id,
 677         .addr = (uintptr_t) &val,
 678     };
 679     int ret;
 680
 681     switch (id & KVM_REG_SIZE_MASK) {
 682     case KVM_REG_SIZE_U32:
 683         val.u32 = env->spr[spr];
 684         break;
 685
 686     case KVM_REG_SIZE_U64:
 687         val.u64 = env->spr[spr];
 688         break;
 689
 690     default:
 691         /* Don't handle this size yet */
 692         abort();
 693     }
 694
 695     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 696     if (ret != 0) {
 697         trace_kvm_failed_spr_set(spr, strerror(errno));
 698     }
 699 }
 700
 701 static int kvm_put_fp(CPUState *cs)
 702 {
 703     PowerPCCPU *cpu = POWERPC_CPU(cs);
 704     CPUPPCState *env = &cpu->env;
 705     struct kvm_one_reg reg;
 706     int i;
 707     int ret;
 708
 709     if (env->insns_flags & PPC_FLOAT) {
 710         uint64_t fpscr = env->fpscr;
 711         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 712
 713         reg.id = KVM_REG_PPC_FPSCR;
 714         reg.addr = (uintptr_t)&fpscr;
 715         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 716         if (ret < 0) {
 717             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 718             return ret;
 719         }
 720
 721         for (i = 0; i < 32; i++) {
 722             uint64_t vsr[2];
 723
 724 #ifdef HOST_WORDS_BIGENDIAN
 725             vsr[0] = float64_val(env->fpr[i]);
 726             vsr[1] = env->vsr[i];
 727 #else
 728             vsr[0] = env->vsr[i];
 729             vsr[1] = float64_val(env->fpr[i]);
 730 #endif
 731             reg.addr = (uintptr_t) &vsr;
 732             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 733
 734             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 735             if (ret < 0) {
 736                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 737                         i, strerror(errno));
 738                 return ret;
 739             }
 740         }
 741     }
 742
 743     if (env->insns_flags & PPC_ALTIVEC) {
 744         reg.id = KVM_REG_PPC_VSCR;
 745         reg.addr = (uintptr_t)&env->vscr;
 746         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 747         if (ret < 0) {
 748             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 749             return ret;
 750         }
 751
 752         for (i = 0; i < 32; i++) {
 753             reg.id = KVM_REG_PPC_VR(i);
 754             reg.addr = (uintptr_t)&env->avr[i];
 755             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 756             if (ret < 0) {
 757                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 758                 return ret;
 759             }
 760         }
 761     }
 762
 763     return 0;
 764 }
 765
 766 static int kvm_get_fp(CPUState *cs)
 767 {
 768     PowerPCCPU *cpu = POWERPC_CPU(cs);
 769     CPUPPCState *env = &cpu->env;
 770     struct kvm_one_reg reg;
 771     int i;
 772     int ret;
 773
 774     if (env->insns_flags & PPC_FLOAT) {
 775         uint64_t fpscr;
 776         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 777
 778         reg.id = KVM_REG_PPC_FPSCR;
 779         reg.addr = (uintptr_t)&fpscr;
 780         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 781         if (ret < 0) {
 782             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 783             return ret;
 784         } else {
 785             env->fpscr = fpscr;
 786         }
 787
 788         for (i = 0; i < 32; i++) {
 789             uint64_t vsr[2];
 790
 791             reg.addr = (uintptr_t) &vsr;
 792             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 793
 794             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 795             if (ret < 0) {
 796                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 797                         vsx ? "VSR" : "FPR", i, strerror(errno));
 798                 return ret;
 799             } else {
 800 #ifdef HOST_WORDS_BIGENDIAN
 801                 env->fpr[i] = vsr[0];
 802                 if (vsx) {
 803                     env->vsr[i] = vsr[1];
 804                 }
 805 #else
 806                 env->fpr[i] = vsr[1];
 807                 if (vsx) {
 808                     env->vsr[i] = vsr[0];
 809                 }
 810 #endif
 811             }
 812         }
 813     }
 814
 815     if (env->insns_flags & PPC_ALTIVEC) {
 816         reg.id = KVM_REG_PPC_VSCR;
 817         reg.addr = (uintptr_t)&env->vscr;
 818         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 819         if (ret < 0) {
 820             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 821             return ret;
 822         }
 823
 824         for (i = 0; i < 32; i++) {
 825             reg.id = KVM_REG_PPC_VR(i);
 826             reg.addr = (uintptr_t)&env->avr[i];
 827             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 828             if (ret < 0) {
 829                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 830                         i, strerror(errno));
 831                 return ret;
 832             }
 833         }
 834     }
 835
 836     return 0;
 837 }
 838
 839 #if defined(TARGET_PPC64)
 840 static int kvm_get_vpa(CPUState *cs)
 841 {
 842     PowerPCCPU *cpu = POWERPC_CPU(cs);
 843     CPUPPCState *env = &cpu->env;
 844     struct kvm_one_reg reg;
 845     int ret;
 846
 847     reg.id = KVM_REG_PPC_VPA_ADDR;
 848     reg.addr = (uintptr_t)&env->vpa_addr;
 849     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 850     if (ret < 0) {
 851         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 852         return ret;
 853     }
 854
 855     assert((uintptr_t)&env->slb_shadow_size
 856            == ((uintptr_t)&env->slb_shadow_addr + 8));
 857     reg.id = KVM_REG_PPC_VPA_SLB;
 858     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 859     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 860     if (ret < 0) {
 861         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 862                 strerror(errno));
 863         return ret;
 864     }
 865
 866     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 867     reg.id = KVM_REG_PPC_VPA_DTL;
 868     reg.addr = (uintptr_t)&env->dtl_addr;
 869     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 870     if (ret < 0) {
 871         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 872                 strerror(errno));
 873         return ret;
 874     }
 875
 876     return 0;
 877 }
 878
 879 static int kvm_put_vpa(CPUState *cs)
 880 {
 881     PowerPCCPU *cpu = POWERPC_CPU(cs);
 882     CPUPPCState *env = &cpu->env;
 883     struct kvm_one_reg reg;
 884     int ret;
 885
 886     /* SLB shadow or DTL can't be registered unless a master VPA is
 887      * registered.  That means when restoring state, if a VPA *is*
 888      * registered, we need to set that up first.  If not, we need to
 889      * deregister the others before deregistering the master VPA */
 890     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 891
 892     if (env->vpa_addr) {
 893         reg.id = KVM_REG_PPC_VPA_ADDR;
 894         reg.addr = (uintptr_t)&env->vpa_addr;
 895         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 896         if (ret < 0) {
 897             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 898             return ret;
 899         }
 900     }
 901
 902     assert((uintptr_t)&env->slb_shadow_size
 903            == ((uintptr_t)&env->slb_shadow_addr + 8));
 904     reg.id = KVM_REG_PPC_VPA_SLB;
 905     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 906     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 907     if (ret < 0) {
 908         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 909         return ret;
 910     }
 911
 912     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 913     reg.id = KVM_REG_PPC_VPA_DTL;
 914     reg.addr = (uintptr_t)&env->dtl_addr;
 915     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 916     if (ret < 0) {
 917         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 918                 strerror(errno));
 919         return ret;
 920     }
 921
 922     if (!env->vpa_addr) {
 923         reg.id = KVM_REG_PPC_VPA_ADDR;
 924         reg.addr = (uintptr_t)&env->vpa_addr;
 925         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 926         if (ret < 0) {
 927             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 928             return ret;
 929         }
 930     }
 931
 932     return 0;
 933 }
 934 #endif /* TARGET_PPC64 */
 935
 936 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 937 {
 938     CPUPPCState *env = &cpu->env;
 939     struct kvm_sregs sregs;
 940     int i;
 941
 942     sregs.pvr = env->spr[SPR_PVR];
 943
 944     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 945
 946     /* Sync SLB */
 947 #ifdef TARGET_PPC64
 948     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 949         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 950         if (env->slb[i].esid & SLB_ESID_V) {
 951             sregs.u.s.ppc64.slb[i].slbe |= i;
 952         }
 953         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 954     }
 955 #endif
 956
 957     /* Sync SRs */
 958     for (i = 0; i < 16; i++) {
 959         sregs.u.s.ppc32.sr[i] = env->sr[i];
 960     }
 961
 962     /* Sync BATs */
 963     for (i = 0; i < 8; i++) {
 964         /* Beware. We have to swap upper and lower bits here */
 965         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 966             | env->DBAT[1][i];
 967         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 968             | env->IBAT[1][i];
 969     }
 970
 971     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 972 }
 973
 974 int kvm_arch_put_registers(CPUState *cs, int level)
 975 {
 976     PowerPCCPU *cpu = POWERPC_CPU(cs);
 977     CPUPPCState *env = &cpu->env;
 978     struct kvm_regs regs;
 979     int ret;
 980     int i;
 981
 982     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 983     if (ret < 0) {
 984         return ret;
 985     }
 986
 987     regs.ctr = env->ctr;
 988     regs.lr  = env->lr;
 989     regs.xer = cpu_read_xer(env);
 990     regs.msr = env->msr;
 991     regs.pc = env->nip;
 992
 993     regs.srr0 = env->spr[SPR_SRR0];
 994     regs.srr1 = env->spr[SPR_SRR1];
 995
 996     regs.sprg0 = env->spr[SPR_SPRG0];
 997     regs.sprg1 = env->spr[SPR_SPRG1];
 998     regs.sprg2 = env->spr[SPR_SPRG2];
 999     regs.sprg3 = env->spr[SPR_SPRG3];
1000     regs.sprg4 = env->spr[SPR_SPRG4];
1001     regs.sprg5 = env->spr[SPR_SPRG5];
1002     regs.sprg6 = env->spr[SPR_SPRG6];
1003     regs.sprg7 = env->spr[SPR_SPRG7];
1004
1005     regs.pid = env->spr[SPR_BOOKE_PID];
1006
1007     for (i = 0;i < 32; i++)
1008         regs.gpr[i] = env->gpr[i];
1009
1010     regs.cr = 0;
1011     for (i = 0; i < 8; i++) {
1012         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1013     }
1014
1015     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1016     if (ret < 0)
1017         return ret;
1018
1019     kvm_put_fp(cs);
1020
1021     if (env->tlb_dirty) {
1022         kvm_sw_tlb_put(cpu);
1023         env->tlb_dirty = false;
1024     }
1025
1026     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1027         ret = kvmppc_put_books_sregs(cpu);
1028         if (ret < 0) {
1029             return ret;
1030         }
1031     }
1032
1033     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1034         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1035     }
1036
1037     if (cap_one_reg) {
1038         int i;
1039
1040         /* We deliberately ignore errors here, for kernels which have
1041          * the ONE_REG calls, but don't support the specific
1042          * registers, there's a reasonable chance things will still
1043          * work, at least until we try to migrate. */
1044         for (i = 0; i < 1024; i++) {
1045             uint64_t id = env->spr_cb[i].one_reg_id;
1046
1047             if (id != 0) {
1048                 kvm_put_one_spr(cs, id, i);
1049             }
1050         }
1051
1052 #ifdef TARGET_PPC64
1053         if (msr_ts) {
1054             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1055                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1056             }
1057             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1058                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1059             }
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1065             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1066             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1067             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1068             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1069             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1070         }
1071
1072         if (cap_papr) {
1073             if (kvm_put_vpa(cs) < 0) {
1074                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1075             }
1076         }
1077
1078         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1079 #endif /* TARGET_PPC64 */
1080     }
1081
1082     return ret;
1083 }
1084
1085 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1086 {
1087      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1088 }
1089
1090 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1091 {
1092     CPUPPCState *env = &cpu->env;
1093     struct kvm_sregs sregs;
1094     int ret;
1095
1096     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1097     if (ret < 0) {
1098         return ret;
1099     }
1100
1101     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1102         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1103         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1104         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1105         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1106         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1107         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1108         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1109         env->spr[SPR_DECR] = sregs.u.e.dec;
1110         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1111         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1112         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1113     }
1114
1115     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1116         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1117         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1118         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1119         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1120         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1121     }
1122
1123     if (sregs.u.e.features & KVM_SREGS_E_64) {
1124         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1125     }
1126
1127     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1128         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1129     }
1130
1131     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1132         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1133         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1134         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1135         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1136         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1137         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1138         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1139         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1140         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1141         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1142         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1143         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1144         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1145         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1146         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1147         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1148         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1149         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1150         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1151         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1152         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1153         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1154         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1155         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1156         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1157         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1158         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1159         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1160         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1161         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1162         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1163         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1164
1165         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1166             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1167             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1168             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1169             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1170             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1171             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1172         }
1173
1174         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1175             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1176             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1177         }
1178
1179         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1180             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1181             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1182             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1183             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1184         }
1185     }
1186
1187     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1188         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1189         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1190         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1191         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1192         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1193         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1194         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1195         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1196         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1197         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1198     }
1199
1200     if (sregs.u.e.features & KVM_SREGS_EXP) {
1201         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1202     }
1203
1204     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1205         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1206         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1207     }
1208
1209     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1210         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1211         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1212         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1213
1214         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1215             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1216             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1217         }
1218     }
1219
1220     return 0;
1221 }
1222
1223 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1224 {
1225     CPUPPCState *env = &cpu->env;
1226     struct kvm_sregs sregs;
1227     int ret;
1228     int i;
1229
1230     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1231     if (ret < 0) {
1232         return ret;
1233     }
1234
1235     if (!cpu->vhyp) {
1236         ppc_store_sdr1(env, sregs.u.s.sdr1);
1237     }
1238
1239     /* Sync SLB */
1240 #ifdef TARGET_PPC64
1241     /*
1242      * The packed SLB array we get from KVM_GET_SREGS only contains
1243      * information about valid entries. So we flush our internal copy
1244      * to get rid of stale ones, then put all valid SLB entries back
1245      * in.
1246      */
1247     memset(env->slb, 0, sizeof(env->slb));
1248     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1249         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1250         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1251         /*
1252          * Only restore valid entries
1253          */
1254         if (rb & SLB_ESID_V) {
1255             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1256         }
1257     }
1258 #endif
1259
1260     /* Sync SRs */
1261     for (i = 0; i < 16; i++) {
1262         env->sr[i] = sregs.u.s.ppc32.sr[i];
1263     }
1264
1265     /* Sync BATs */
1266     for (i = 0; i < 8; i++) {
1267         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1268         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1269         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1270         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1271     }
1272
1273     return 0;
1274 }
1275
1276 int kvm_arch_get_registers(CPUState *cs)
1277 {
1278     PowerPCCPU *cpu = POWERPC_CPU(cs);
1279     CPUPPCState *env = &cpu->env;
1280     struct kvm_regs regs;
1281     uint32_t cr;
1282     int i, ret;
1283
1284     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1285     if (ret < 0)
1286         return ret;
1287
1288     cr = regs.cr;
1289     for (i = 7; i >= 0; i--) {
1290         env->crf[i] = cr & 15;
1291         cr >>= 4;
1292     }
1293
1294     env->ctr = regs.ctr;
1295     env->lr = regs.lr;
1296     cpu_write_xer(env, regs.xer);
1297     env->msr = regs.msr;
1298     env->nip = regs.pc;
1299
1300     env->spr[SPR_SRR0] = regs.srr0;
1301     env->spr[SPR_SRR1] = regs.srr1;
1302
1303     env->spr[SPR_SPRG0] = regs.sprg0;
1304     env->spr[SPR_SPRG1] = regs.sprg1;
1305     env->spr[SPR_SPRG2] = regs.sprg2;
1306     env->spr[SPR_SPRG3] = regs.sprg3;
1307     env->spr[SPR_SPRG4] = regs.sprg4;
1308     env->spr[SPR_SPRG5] = regs.sprg5;
1309     env->spr[SPR_SPRG6] = regs.sprg6;
1310     env->spr[SPR_SPRG7] = regs.sprg7;
1311
1312     env->spr[SPR_BOOKE_PID] = regs.pid;
1313
1314     for (i = 0;i < 32; i++)
1315         env->gpr[i] = regs.gpr[i];
1316
1317     kvm_get_fp(cs);
1318
1319     if (cap_booke_sregs) {
1320         ret = kvmppc_get_booke_sregs(cpu);
1321         if (ret < 0) {
1322             return ret;
1323         }
1324     }
1325
1326     if (cap_segstate) {
1327         ret = kvmppc_get_books_sregs(cpu);
1328         if (ret < 0) {
1329             return ret;
1330         }
1331     }
1332
1333     if (cap_hior) {
1334         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1335     }
1336
1337     if (cap_one_reg) {
1338         int i;
1339
1340         /* We deliberately ignore errors here, for kernels which have
1341          * the ONE_REG calls, but don't support the specific
1342          * registers, there's a reasonable chance things will still
1343          * work, at least until we try to migrate. */
1344         for (i = 0; i < 1024; i++) {
1345             uint64_t id = env->spr_cb[i].one_reg_id;
1346
1347             if (id != 0) {
1348                 kvm_get_one_spr(cs, id, i);
1349             }
1350         }
1351
1352 #ifdef TARGET_PPC64
1353         if (msr_ts) {
1354             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1355                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1356             }
1357             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1358                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1359             }
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1365             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1366             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1367             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1368             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1369             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1370         }
1371
1372         if (cap_papr) {
1373             if (kvm_get_vpa(cs) < 0) {
1374                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1375             }
1376         }
1377
1378         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1379 #endif
1380     }
1381
1382     return 0;
1383 }
1384
1385 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1386 {
1387     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1388
1389     if (irq != PPC_INTERRUPT_EXT) {
1390         return 0;
1391     }
1392
1393     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1394         return 0;
1395     }
1396
1397     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1398
1399     return 0;
1400 }
1401
1402 #if defined(TARGET_PPCEMB)
1403 #define PPC_INPUT_INT PPC40x_INPUT_INT
1404 #elif defined(TARGET_PPC64)
1405 #define PPC_INPUT_INT PPC970_INPUT_INT
1406 #else
1407 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1408 #endif
1409
1410 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1411 {
1412     PowerPCCPU *cpu = POWERPC_CPU(cs);
1413     CPUPPCState *env = &cpu->env;
1414     int r;
1415     unsigned irq;
1416
1417     qemu_mutex_lock_iothread();
1418
1419     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1420      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1421     if (!cap_interrupt_level &&
1422         run->ready_for_interrupt_injection &&
1423         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1424         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1425     {
1426         /* For now KVM disregards the 'irq' argument. However, in the
1427          * future KVM could cache it in-kernel to avoid a heavyweight exit
1428          * when reading the UIC.
1429          */
1430         irq = KVM_INTERRUPT_SET;
1431
1432         DPRINTF("injected interrupt %d\n", irq);
1433         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1434         if (r < 0) {
1435             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1436         }
1437
1438         /* Always wake up soon in case the interrupt was level based */
1439         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1440                        (NANOSECONDS_PER_SECOND / 50));
1441     }
1442
1443     /* We don't know if there are more interrupts pending after this. However,
1444      * the guest will return to userspace in the course of handling this one
1445      * anyways, so we will get a chance to deliver the rest. */
1446
1447     qemu_mutex_unlock_iothread();
1448 }
1449
1450 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1451 {
1452     return MEMTXATTRS_UNSPECIFIED;
1453 }
1454
1455 int kvm_arch_process_async_events(CPUState *cs)
1456 {
1457     return cs->halted;
1458 }
1459
1460 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1461 {
1462     CPUState *cs = CPU(cpu);
1463     CPUPPCState *env = &cpu->env;
1464
1465     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1466         cs->halted = 1;
1467         cs->exception_index = EXCP_HLT;
1468     }
1469
1470     return 0;
1471 }
1472
1473 /* map dcr access to existing qemu dcr emulation */
1474 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1475 {
1476     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1477         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1478
1479     return 0;
1480 }
1481
1482 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1483 {
1484     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1485         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1486
1487     return 0;
1488 }
1489
1490 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1491 {
1492     /* Mixed endian case is not handled */
1493     uint32_t sc = debug_inst_opcode;
1494
1495     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1496                             sizeof(sc), 0) ||
1497         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1498         return -EINVAL;
1499     }
1500
1501     return 0;
1502 }
1503
1504 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1505 {
1506     uint32_t sc;
1507
1508     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1509         sc != debug_inst_opcode ||
1510         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1511                             sizeof(sc), 1)) {
1512         return -EINVAL;
1513     }
1514
1515     return 0;
1516 }
1517
1518 static int find_hw_breakpoint(target_ulong addr, int type)
1519 {
1520     int n;
1521
1522     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1523            <= ARRAY_SIZE(hw_debug_points));
1524
1525     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1526         if (hw_debug_points[n].addr == addr &&
1527              hw_debug_points[n].type == type) {
1528             return n;
1529         }
1530     }
1531
1532     return -1;
1533 }
1534
1535 static int find_hw_watchpoint(target_ulong addr, int *flag)
1536 {
1537     int n;
1538
1539     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1540     if (n >= 0) {
1541         *flag = BP_MEM_ACCESS;
1542         return n;
1543     }
1544
1545     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1546     if (n >= 0) {
1547         *flag = BP_MEM_WRITE;
1548         return n;
1549     }
1550
1551     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1552     if (n >= 0) {
1553         *flag = BP_MEM_READ;
1554         return n;
1555     }
1556
1557     return -1;
1558 }
1559
1560 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1561                                   target_ulong len, int type)
1562 {
1563     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1564         return -ENOBUFS;
1565     }
1566
1567     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1568     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1569
1570     switch (type) {
1571     case GDB_BREAKPOINT_HW:
1572         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1573             return -ENOBUFS;
1574         }
1575
1576         if (find_hw_breakpoint(addr, type) >= 0) {
1577             return -EEXIST;
1578         }
1579
1580         nb_hw_breakpoint++;
1581         break;
1582
1583     case GDB_WATCHPOINT_WRITE:
1584     case GDB_WATCHPOINT_READ:
1585     case GDB_WATCHPOINT_ACCESS:
1586         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1587             return -ENOBUFS;
1588         }
1589
1590         if (find_hw_breakpoint(addr, type) >= 0) {
1591             return -EEXIST;
1592         }
1593
1594         nb_hw_watchpoint++;
1595         break;
1596
1597     default:
1598         return -ENOSYS;
1599     }
1600
1601     return 0;
1602 }
1603
1604 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1605                                   target_ulong len, int type)
1606 {
1607     int n;
1608
1609     n = find_hw_breakpoint(addr, type);
1610     if (n < 0) {
1611         return -ENOENT;
1612     }
1613
1614     switch (type) {
1615     case GDB_BREAKPOINT_HW:
1616         nb_hw_breakpoint--;
1617         break;
1618
1619     case GDB_WATCHPOINT_WRITE:
1620     case GDB_WATCHPOINT_READ:
1621     case GDB_WATCHPOINT_ACCESS:
1622         nb_hw_watchpoint--;
1623         break;
1624
1625     default:
1626         return -ENOSYS;
1627     }
1628     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1629
1630     return 0;
1631 }
1632
1633 void kvm_arch_remove_all_hw_breakpoints(void)
1634 {
1635     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1636 }
1637
1638 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1639 {
1640     int n;
1641
1642     /* Software Breakpoint updates */
1643     if (kvm_sw_breakpoints_active(cs)) {
1644         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1645     }
1646
1647     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1648            <= ARRAY_SIZE(hw_debug_points));
1649     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1650
1651     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1652         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1653         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1654         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1655             switch (hw_debug_points[n].type) {
1656             case GDB_BREAKPOINT_HW:
1657                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1658                 break;
1659             case GDB_WATCHPOINT_WRITE:
1660                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1661                 break;
1662             case GDB_WATCHPOINT_READ:
1663                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1664                 break;
1665             case GDB_WATCHPOINT_ACCESS:
1666                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1667                                         KVMPPC_DEBUG_WATCH_READ;
1668                 break;
1669             default:
1670                 cpu_abort(cs, "Unsupported breakpoint type\n");
1671             }
1672             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1673         }
1674     }
1675 }
1676
1677 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1678 {
1679     CPUState *cs = CPU(cpu);
1680     CPUPPCState *env = &cpu->env;
1681     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1682     int handle = 0;
1683     int n;
1684     int flag = 0;
1685
1686     if (cs->singlestep_enabled) {
1687         handle = 1;
1688     } else if (arch_info->status) {
1689         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1690             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1691                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1692                 if (n >= 0) {
1693                     handle = 1;
1694                 }
1695             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1696                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1697                 n = find_hw_watchpoint(arch_info->address,  &flag);
1698                 if (n >= 0) {
1699                     handle = 1;
1700                     cs->watchpoint_hit = &hw_watchpoint;
1701                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1702                     hw_watchpoint.flags = flag;
1703                 }
1704             }
1705         }
1706     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1707         handle = 1;
1708     } else {
1709         /* QEMU is not able to handle debug exception, so inject
1710          * program exception to guest;
1711          * Yes program exception NOT debug exception !!
1712          * When QEMU is using debug resources then debug exception must
1713          * be always set. To achieve this we set MSR_DE and also set
1714          * MSRP_DEP so guest cannot change MSR_DE.
1715          * When emulating debug resource for guest we want guest
1716          * to control MSR_DE (enable/disable debug interrupt on need).
1717          * Supporting both configurations are NOT possible.
1718          * So the result is that we cannot share debug resources
1719          * between QEMU and Guest on BOOKE architecture.
1720          * In the current design QEMU gets the priority over guest,
1721          * this means that if QEMU is using debug resources then guest
1722          * cannot use them;
1723          * For software breakpoint QEMU uses a privileged instruction;
1724          * So there cannot be any reason that we are here for guest
1725          * set debug exception, only possibility is guest executed a
1726          * privileged / illegal instruction and that's why we are
1727          * injecting a program interrupt.
1728          */
1729
1730         cpu_synchronize_state(cs);
1731         /* env->nip is PC, so increment this by 4 to use
1732          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1733          */
1734         env->nip += 4;
1735         cs->exception_index = POWERPC_EXCP_PROGRAM;
1736         env->error_code = POWERPC_EXCP_INVAL;
1737         ppc_cpu_do_interrupt(cs);
1738     }
1739
1740     return handle;
1741 }
1742
1743 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1744 {
1745     PowerPCCPU *cpu = POWERPC_CPU(cs);
1746     CPUPPCState *env = &cpu->env;
1747     int ret;
1748
1749     qemu_mutex_lock_iothread();
1750
1751     switch (run->exit_reason) {
1752     case KVM_EXIT_DCR:
1753         if (run->dcr.is_write) {
1754             DPRINTF("handle dcr write\n");
1755             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1756         } else {
1757             DPRINTF("handle dcr read\n");
1758             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1759         }
1760         break;
1761     case KVM_EXIT_HLT:
1762         DPRINTF("handle halt\n");
1763         ret = kvmppc_handle_halt(cpu);
1764         break;
1765 #if defined(TARGET_PPC64)
1766     case KVM_EXIT_PAPR_HCALL:
1767         DPRINTF("handle PAPR hypercall\n");
1768         run->papr_hcall.ret = spapr_hypercall(cpu,
1769                                               run->papr_hcall.nr,
1770                                               run->papr_hcall.args);
1771         ret = 0;
1772         break;
1773 #endif
1774     case KVM_EXIT_EPR:
1775         DPRINTF("handle epr\n");
1776         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1777         ret = 0;
1778         break;
1779     case KVM_EXIT_WATCHDOG:
1780         DPRINTF("handle watchdog expiry\n");
1781         watchdog_perform_action();
1782         ret = 0;
1783         break;
1784
1785     case KVM_EXIT_DEBUG:
1786         DPRINTF("handle debug exception\n");
1787         if (kvm_handle_debug(cpu, run)) {
1788             ret = EXCP_DEBUG;
1789             break;
1790         }
1791         /* re-enter, this exception was guest-internal */
1792         ret = 0;
1793         break;
1794
1795     default:
1796         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1797         ret = -1;
1798         break;
1799     }
1800
1801     qemu_mutex_unlock_iothread();
1802     return ret;
1803 }
1804
1805 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1806 {
1807     CPUState *cs = CPU(cpu);
1808     uint32_t bits = tsr_bits;
1809     struct kvm_one_reg reg = {
1810         .id = KVM_REG_PPC_OR_TSR,
1811         .addr = (uintptr_t) &bits,
1812     };
1813
1814     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1815 }
1816
1817 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1818 {
1819
1820     CPUState *cs = CPU(cpu);
1821     uint32_t bits = tsr_bits;
1822     struct kvm_one_reg reg = {
1823         .id = KVM_REG_PPC_CLEAR_TSR,
1824         .addr = (uintptr_t) &bits,
1825     };
1826
1827     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1828 }
1829
1830 int kvmppc_set_tcr(PowerPCCPU *cpu)
1831 {
1832     CPUState *cs = CPU(cpu);
1833     CPUPPCState *env = &cpu->env;
1834     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1835
1836     struct kvm_one_reg reg = {
1837         .id = KVM_REG_PPC_TCR,
1838         .addr = (uintptr_t) &tcr,
1839     };
1840
1841     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1842 }
1843
1844 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1845 {
1846     CPUState *cs = CPU(cpu);
1847     int ret;
1848
1849     if (!kvm_enabled()) {
1850         return -1;
1851     }
1852
1853     if (!cap_ppc_watchdog) {
1854         printf("warning: KVM does not support watchdog");
1855         return -1;
1856     }
1857
1858     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1859     if (ret < 0) {
1860         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1861                 __func__, strerror(-ret));
1862         return ret;
1863     }
1864
1865     return ret;
1866 }
1867
1868 static int read_cpuinfo(const char *field, char *value, int len)
1869 {
1870     FILE *f;
1871     int ret = -1;
1872     int field_len = strlen(field);
1873     char line[512];
1874
1875     f = fopen("/proc/cpuinfo", "r");
1876     if (!f) {
1877         return -1;
1878     }
1879
1880     do {
1881         if (!fgets(line, sizeof(line), f)) {
1882             break;
1883         }
1884         if (!strncmp(line, field, field_len)) {
1885             pstrcpy(value, len, line);
1886             ret = 0;
1887             break;
1888         }
1889     } while(*line);
1890
1891     fclose(f);
1892
1893     return ret;
1894 }
1895
1896 uint32_t kvmppc_get_tbfreq(void)
1897 {
1898     char line[512];
1899     char *ns;
1900     uint32_t retval = NANOSECONDS_PER_SECOND;
1901
1902     if (read_cpuinfo("timebase", line, sizeof(line))) {
1903         return retval;
1904     }
1905
1906     if (!(ns = strchr(line, ':'))) {
1907         return retval;
1908     }
1909
1910     ns++;
1911
1912     return atoi(ns);
1913 }
1914
1915 bool kvmppc_get_host_serial(char **value)
1916 {
1917     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1918                                NULL);
1919 }
1920
1921 bool kvmppc_get_host_model(char **value)
1922 {
1923     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1924 }
1925
1926 /* Try to find a device tree node for a CPU with clock-frequency property */
1927 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1928 {
1929     struct dirent *dirp;
1930     DIR *dp;
1931
1932     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1933         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1934         return -1;
1935     }
1936
1937     buf[0] = '\0';
1938     while ((dirp = readdir(dp)) != NULL) {
1939         FILE *f;
1940         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1941                  dirp->d_name);
1942         f = fopen(buf, "r");
1943         if (f) {
1944             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1945             fclose(f);
1946             break;
1947         }
1948         buf[0] = '\0';
1949     }
1950     closedir(dp);
1951     if (buf[0] == '\0') {
1952         printf("Unknown host!\n");
1953         return -1;
1954     }
1955
1956     return 0;
1957 }
1958
1959 static uint64_t kvmppc_read_int_dt(const char *filename)
1960 {
1961     union {
1962         uint32_t v32;
1963         uint64_t v64;
1964     } u;
1965     FILE *f;
1966     int len;
1967
1968     f = fopen(filename, "rb");
1969     if (!f) {
1970         return -1;
1971     }
1972
1973     len = fread(&u, 1, sizeof(u), f);
1974     fclose(f);
1975     switch (len) {
1976     case 4:
1977         /* property is a 32-bit quantity */
1978         return be32_to_cpu(u.v32);
1979     case 8:
1980         return be64_to_cpu(u.v64);
1981     }
1982
1983     return 0;
1984 }
1985
1986 /* Read a CPU node property from the host device tree that's a single
1987  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1988  * (can't find or open the property, or doesn't understand the
1989  * format) */
1990 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1991 {
1992     char buf[PATH_MAX], *tmp;
1993     uint64_t val;
1994
1995     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1996         return -1;
1997     }
1998
1999     tmp = g_strdup_printf("%s/%s", buf, propname);
2000     val = kvmppc_read_int_dt(tmp);
2001     g_free(tmp);
2002
2003     return val;
2004 }
2005
2006 uint64_t kvmppc_get_clockfreq(void)
2007 {
2008     return kvmppc_read_int_cpu_dt("clock-frequency");
2009 }
2010
2011 uint32_t kvmppc_get_vmx(void)
2012 {
2013     return kvmppc_read_int_cpu_dt("ibm,vmx");
2014 }
2015
2016 uint32_t kvmppc_get_dfp(void)
2017 {
2018     return kvmppc_read_int_cpu_dt("ibm,dfp");
2019 }
2020
2021 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2022  {
2023      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2024      CPUState *cs = CPU(cpu);
2025
2026     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2027         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2028         return 0;
2029     }
2030
2031     return 1;
2032 }
2033
2034 int kvmppc_get_hasidle(CPUPPCState *env)
2035 {
2036     struct kvm_ppc_pvinfo pvinfo;
2037
2038     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2039         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2040         return 1;
2041     }
2042
2043     return 0;
2044 }
2045
2046 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2047 {
2048     uint32_t *hc = (uint32_t*)buf;
2049     struct kvm_ppc_pvinfo pvinfo;
2050
2051     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2052         memcpy(buf, pvinfo.hcall, buf_len);
2053         return 0;
2054     }
2055
2056     /*
2057      * Fallback to always fail hypercalls regardless of endianness:
2058      *
2059      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2060      *     li r3, -1
2061      *     b .+8       (becomes nop in wrong endian)
2062      *     bswap32(li r3, -1)
2063      */
2064
2065     hc[0] = cpu_to_be32(0x08000048);
2066     hc[1] = cpu_to_be32(0x3860ffff);
2067     hc[2] = cpu_to_be32(0x48000008);
2068     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2069
2070     return 1;
2071 }
2072
2073 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2074 {
2075     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2076 }
2077
2078 void kvmppc_enable_logical_ci_hcalls(void)
2079 {
2080     /*
2081      * FIXME: it would be nice if we could detect the cases where
2082      * we're using a device which requires the in kernel
2083      * implementation of these hcalls, but the kernel lacks them and
2084      * produce a warning.
2085      */
2086     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2087     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2088 }
2089
2090 void kvmppc_enable_set_mode_hcall(void)
2091 {
2092     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2093 }
2094
2095 void kvmppc_enable_clear_ref_mod_hcalls(void)
2096 {
2097     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2098     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2099 }
2100
2101 void kvmppc_set_papr(PowerPCCPU *cpu)
2102 {
2103     CPUState *cs = CPU(cpu);
2104     int ret;
2105
2106     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2107     if (ret) {
2108         error_report("This vCPU type or KVM version does not support PAPR");
2109         exit(1);
2110     }
2111
2112     /* Update the capability flag so we sync the right information
2113      * with kvm */
2114     cap_papr = 1;
2115 }
2116
2117 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2118 {
2119     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2120 }
2121
2122 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2123 {
2124     CPUState *cs = CPU(cpu);
2125     int ret;
2126
2127     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2128     if (ret && mpic_proxy) {
2129         error_report("This KVM version does not support EPR");
2130         exit(1);
2131     }
2132 }
2133
2134 int kvmppc_smt_threads(void)
2135 {
2136     return cap_ppc_smt ? cap_ppc_smt : 1;
2137 }
2138
2139 int kvmppc_set_smt_threads(int smt)
2140 {
2141     int ret;
2142
2143     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2144     if (!ret) {
2145         cap_ppc_smt = smt;
2146     }
2147     return ret;
2148 }
2149
2150 void kvmppc_hint_smt_possible(Error **errp)
2151 {
2152     int i;
2153     GString *g;
2154     char *s;
2155
2156     assert(kvm_enabled());
2157     if (cap_ppc_smt_possible) {
2158         g = g_string_new("Available VSMT modes:");
2159         for (i = 63; i >= 0; i--) {
2160             if ((1UL << i) & cap_ppc_smt_possible) {
2161                 g_string_append_printf(g, " %lu", (1UL << i));
2162             }
2163         }
2164         s = g_string_free(g, false);
2165         error_append_hint(errp, "%s.\n", s);
2166         g_free(s);
2167     } else {
2168         error_append_hint(errp,
2169                           "This KVM seems to be too old to support VSMT.\n");
2170     }
2171 }
2172
2173
2174 #ifdef TARGET_PPC64
2175 off_t kvmppc_alloc_rma(void **rma)
2176 {
2177     off_t size;
2178     int fd;
2179     struct kvm_allocate_rma ret;
2180
2181     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2182      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2183      *                      not necessary on this hardware
2184      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2185      *
2186      * FIXME: We should allow the user to force contiguous RMA
2187      * allocation in the cap_ppc_rma==1 case.
2188      */
2189     if (cap_ppc_rma < 2) {
2190         return 0;
2191     }
2192
2193     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2194     if (fd < 0) {
2195         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2196                 strerror(errno));
2197         return -1;
2198     }
2199
2200     size = MIN(ret.rma_size, 256ul << 20);
2201
2202     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2203     if (*rma == MAP_FAILED) {
2204         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2205         return -1;
2206     };
2207
2208     return size;
2209 }
2210
2211 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2212 {
2213     struct kvm_ppc_smmu_info info;
2214     long rampagesize, best_page_shift;
2215     int i;
2216
2217     if (cap_ppc_rma >= 2) {
2218         return current_size;
2219     }
2220
2221     /* Find the largest hardware supported page size that's less than
2222      * or equal to the (logical) backing page size of guest RAM */
2223     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2224     rampagesize = qemu_getrampagesize();
2225     best_page_shift = 0;
2226
2227     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2228         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2229
2230         if (!sps->page_shift) {
2231             continue;
2232         }
2233
2234         if ((sps->page_shift > best_page_shift)
2235             && ((1UL << sps->page_shift) <= rampagesize)) {
2236             best_page_shift = sps->page_shift;
2237         }
2238     }
2239
2240     return MIN(current_size,
2241                1ULL << (best_page_shift + hash_shift - 7));
2242 }
2243 #endif
2244
2245 bool kvmppc_spapr_use_multitce(void)
2246 {
2247     return cap_spapr_multitce;
2248 }
2249
2250 int kvmppc_spapr_enable_inkernel_multitce(void)
2251 {
2252     int ret;
2253
2254     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2255                             H_PUT_TCE_INDIRECT, 1);
2256     if (!ret) {
2257         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2258                                 H_STUFF_TCE, 1);
2259     }
2260
2261     return ret;
2262 }
2263
2264 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2265                               uint64_t bus_offset, uint32_t nb_table,
2266                               int *pfd, bool need_vfio)
2267 {
2268     long len;
2269     int fd;
2270     void *table;
2271
2272     /* Must set fd to -1 so we don't try to munmap when called for
2273      * destroying the table, which the upper layers -will- do
2274      */
2275     *pfd = -1;
2276     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2277         return NULL;
2278     }
2279
2280     if (cap_spapr_tce_64) {
2281         struct kvm_create_spapr_tce_64 args = {
2282             .liobn = liobn,
2283             .page_shift = page_shift,
2284             .offset = bus_offset >> page_shift,
2285             .size = nb_table,
2286             .flags = 0
2287         };
2288         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2289         if (fd < 0) {
2290             fprintf(stderr,
2291                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2292                     liobn);
2293             return NULL;
2294         }
2295     } else if (cap_spapr_tce) {
2296         uint64_t window_size = (uint64_t) nb_table << page_shift;
2297         struct kvm_create_spapr_tce args = {
2298             .liobn = liobn,
2299             .window_size = window_size,
2300         };
2301         if ((window_size != args.window_size) || bus_offset) {
2302             return NULL;
2303         }
2304         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2305         if (fd < 0) {
2306             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2307                     liobn);
2308             return NULL;
2309         }
2310     } else {
2311         return NULL;
2312     }
2313
2314     len = nb_table * sizeof(uint64_t);
2315     /* FIXME: round this up to page size */
2316
2317     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2318     if (table == MAP_FAILED) {
2319         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2320                 liobn);
2321         close(fd);
2322         return NULL;
2323     }
2324
2325     *pfd = fd;
2326     return table;
2327 }
2328
2329 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2330 {
2331     long len;
2332
2333     if (fd < 0) {
2334         return -1;
2335     }
2336
2337     len = nb_table * sizeof(uint64_t);
2338     if ((munmap(table, len) < 0) ||
2339         (close(fd) < 0)) {
2340         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2341                 strerror(errno));
2342         /* Leak the table */
2343     }
2344
2345     return 0;
2346 }
2347
2348 int kvmppc_reset_htab(int shift_hint)
2349 {
2350     uint32_t shift = shift_hint;
2351
2352     if (!kvm_enabled()) {
2353         /* Full emulation, tell caller to allocate htab itself */
2354         return 0;
2355     }
2356     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2357         int ret;
2358         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2359         if (ret == -ENOTTY) {
2360             /* At least some versions of PR KVM advertise the
2361              * capability, but don't implement the ioctl().  Oops.
2362              * Return 0 so that we allocate the htab in qemu, as is
2363              * correct for PR. */
2364             return 0;
2365         } else if (ret < 0) {
2366             return ret;
2367         }
2368         return shift;
2369     }
2370
2371     /* We have a kernel that predates the htab reset calls.  For PR
2372      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2373      * this era, it has allocated a 16MB fixed size hash table already. */
2374     if (kvmppc_is_pr(kvm_state)) {
2375         /* PR - tell caller to allocate htab */
2376         return 0;
2377     } else {
2378         /* HV - assume 16MB kernel allocated htab */
2379         return 24;
2380     }
2381 }
2382
2383 static inline uint32_t mfpvr(void)
2384 {
2385     uint32_t pvr;
2386
2387     asm ("mfpvr %0"
2388          : "=r"(pvr));
2389     return pvr;
2390 }
2391
2392 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2393 {
2394     if (on) {
2395         *word |= flags;
2396     } else {
2397         *word &= ~flags;
2398     }
2399 }
2400
2401 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2402 {
2403     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2404     uint32_t vmx = kvmppc_get_vmx();
2405     uint32_t dfp = kvmppc_get_dfp();
2406     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2407     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2408
2409     /* Now fix up the class with information we can query from the host */
2410     pcc->pvr = mfpvr();
2411
2412     if (vmx != -1) {
2413         /* Only override when we know what the host supports */
2414         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2415         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2416     }
2417     if (dfp != -1) {
2418         /* Only override when we know what the host supports */
2419         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2420     }
2421
2422     if (dcache_size != -1) {
2423         pcc->l1_dcache_size = dcache_size;
2424     }
2425
2426     if (icache_size != -1) {
2427         pcc->l1_icache_size = icache_size;
2428     }
2429
2430 #if defined(TARGET_PPC64)
2431     pcc->radix_page_info = kvm_get_radix_page_info();
2432
2433     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2434         /*
2435          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2436          * compliant.  More importantly, advertising ISA 3.00
2437          * architected mode may prevent guests from activating
2438          * necessary DD1 workarounds.
2439          */
2440         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2441                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2442     }
2443 #endif /* defined(TARGET_PPC64) */
2444 }
2445
2446 bool kvmppc_has_cap_epr(void)
2447 {
2448     return cap_epr;
2449 }
2450
2451 bool kvmppc_has_cap_htab_fd(void)
2452 {
2453     return cap_htab_fd;
2454 }
2455
2456 bool kvmppc_has_cap_fixup_hcalls(void)
2457 {
2458     return cap_fixup_hcalls;
2459 }
2460
2461 bool kvmppc_has_cap_htm(void)
2462 {
2463     return cap_htm;
2464 }
2465
2466 bool kvmppc_has_cap_mmu_radix(void)
2467 {
2468     return cap_mmu_radix;
2469 }
2470
2471 bool kvmppc_has_cap_mmu_hash_v3(void)
2472 {
2473     return cap_mmu_hash_v3;
2474 }
2475
2476 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2477 {
2478     uint32_t host_pvr = mfpvr();
2479     PowerPCCPUClass *pvr_pcc;
2480
2481     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2482     if (pvr_pcc == NULL) {
2483         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2484     }
2485
2486     return pvr_pcc;
2487 }
2488
2489 static int kvm_ppc_register_host_cpu_type(void)
2490 {
2491     TypeInfo type_info = {
2492         .name = TYPE_HOST_POWERPC_CPU,
2493         .class_init = kvmppc_host_cpu_class_init,
2494     };
2495     PowerPCCPUClass *pvr_pcc;
2496     ObjectClass *oc;
2497     DeviceClass *dc;
2498     int i;
2499
2500     pvr_pcc = kvm_ppc_get_host_cpu_class();
2501     if (pvr_pcc == NULL) {
2502         return -1;
2503     }
2504     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2505     type_register(&type_info);
2506
2507     oc = object_class_by_name(type_info.name);
2508     g_assert(oc);
2509
2510 #if defined(TARGET_PPC64)
2511     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2512     type_info.parent = TYPE_SPAPR_CPU_CORE,
2513     type_info.instance_size = sizeof(sPAPRCPUCore);
2514     type_info.instance_init = NULL;
2515     type_info.class_init = spapr_cpu_core_class_init;
2516     type_info.class_data = (void *) "host";
2517     type_register(&type_info);
2518     g_free((void *)type_info.name);
2519 #endif
2520
2521     /*
2522      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2523      * we want "POWER8" to be a "family" alias that points to the current
2524      * host CPU type, too)
2525      */
2526     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2527     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2528         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2529             char *suffix;
2530
2531             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2532             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2533             if (suffix) {
2534                 *suffix = 0;
2535             }
2536             ppc_cpu_aliases[i].oc = oc;
2537             break;
2538         }
2539     }
2540
2541     return 0;
2542 }
2543
2544 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2545 {
2546     struct kvm_rtas_token_args args = {
2547         .token = token,
2548     };
2549
2550     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2551         return -ENOENT;
2552     }
2553
2554     strncpy(args.name, function, sizeof(args.name));
2555
2556     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2557 }
2558
2559 int kvmppc_get_htab_fd(bool write)
2560 {
2561     struct kvm_get_htab_fd s = {
2562         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2563         .start_index = 0,
2564     };
2565
2566     if (!cap_htab_fd) {
2567         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2568         return -1;
2569     }
2570
2571     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2572 }
2573
2574 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2575 {
2576     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2577     uint8_t buf[bufsize];
2578     ssize_t rc;
2579
2580     do {
2581         rc = read(fd, buf, bufsize);
2582         if (rc < 0) {
2583             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2584                     strerror(errno));
2585             return rc;
2586         } else if (rc) {
2587             uint8_t *buffer = buf;
2588             ssize_t n = rc;
2589             while (n) {
2590                 struct kvm_get_htab_header *head =
2591                     (struct kvm_get_htab_header *) buffer;
2592                 size_t chunksize = sizeof(*head) +
2593                      HASH_PTE_SIZE_64 * head->n_valid;
2594
2595                 qemu_put_be32(f, head->index);
2596                 qemu_put_be16(f, head->n_valid);
2597                 qemu_put_be16(f, head->n_invalid);
2598                 qemu_put_buffer(f, (void *)(head + 1),
2599                                 HASH_PTE_SIZE_64 * head->n_valid);
2600
2601                 buffer += chunksize;
2602                 n -= chunksize;
2603             }
2604         }
2605     } while ((rc != 0)
2606              && ((max_ns < 0)
2607                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2608
2609     return (rc == 0) ? 1 : 0;
2610 }
2611
2612 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2613                            uint16_t n_valid, uint16_t n_invalid)
2614 {
2615     struct kvm_get_htab_header *buf;
2616     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2617     ssize_t rc;
2618
2619     buf = alloca(chunksize);
2620     buf->index = index;
2621     buf->n_valid = n_valid;
2622     buf->n_invalid = n_invalid;
2623
2624     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2625
2626     rc = write(fd, buf, chunksize);
2627     if (rc < 0) {
2628         fprintf(stderr, "Error writing KVM hash table: %s\n",
2629                 strerror(errno));
2630         return rc;
2631     }
2632     if (rc != chunksize) {
2633         /* We should never get a short write on a single chunk */
2634         fprintf(stderr, "Short write, restoring KVM hash table\n");
2635         return -1;
2636     }
2637     return 0;
2638 }
2639
2640 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2641 {
2642     return true;
2643 }
2644
2645 void kvm_arch_init_irq_routing(KVMState *s)
2646 {
2647 }
2648
2649 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2650 {
2651     struct kvm_get_htab_fd ghf = {
2652         .flags = 0,
2653         .start_index = ptex,
2654     };
2655     int fd, rc;
2656     int i;
2657
2658     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2659     if (fd < 0) {
2660         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2661     }
2662
2663     i = 0;
2664     while (i < n) {
2665         struct kvm_get_htab_header *hdr;
2666         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2667         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2668
2669         rc = read(fd, buf, sizeof(buf));
2670         if (rc < 0) {
2671             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2672         }
2673
2674         hdr = (struct kvm_get_htab_header *)buf;
2675         while ((i < n) && ((char *)hdr < (buf + rc))) {
2676             int invalid = hdr->n_invalid;
2677
2678             if (hdr->index != (ptex + i)) {
2679                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2680                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2681             }
2682
2683             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2684             i += hdr->n_valid;
2685
2686             if ((n - i) < invalid) {
2687                 invalid = n - i;
2688             }
2689             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2690             i += hdr->n_invalid;
2691
2692             hdr = (struct kvm_get_htab_header *)
2693                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2694         }
2695     }
2696
2697     close(fd);
2698 }
2699
2700 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2701 {
2702     int fd, rc;
2703     struct kvm_get_htab_fd ghf;
2704     struct {
2705         struct kvm_get_htab_header hdr;
2706         uint64_t pte0;
2707         uint64_t pte1;
2708     } buf;
2709
2710     ghf.flags = 0;
2711     ghf.start_index = 0;     /* Ignored */
2712     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2713     if (fd < 0) {
2714         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2715     }
2716
2717     buf.hdr.n_valid = 1;
2718     buf.hdr.n_invalid = 0;
2719     buf.hdr.index = ptex;
2720     buf.pte0 = cpu_to_be64(pte0);
2721     buf.pte1 = cpu_to_be64(pte1);
2722
2723     rc = write(fd, &buf, sizeof(buf));
2724     if (rc != sizeof(buf)) {
2725         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2726     }
2727     close(fd);
2728 }
2729
2730 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2731                              uint64_t address, uint32_t data, PCIDevice *dev)
2732 {
2733     return 0;
2734 }
2735
2736 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2737                                 int vector, PCIDevice *dev)
2738 {
2739     return 0;
2740 }
2741
2742 int kvm_arch_release_virq_post(int virq)
2743 {
2744     return 0;
2745 }
2746
2747 int kvm_arch_msi_data_to_gsi(uint32_t data)
2748 {
2749     return data & 0xffff;
2750 }
2751
2752 int kvmppc_enable_hwrng(void)
2753 {
2754     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2755         return -1;
2756     }
2757
2758     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2759 }
2760
2761 void kvmppc_check_papr_resize_hpt(Error **errp)
2762 {
2763     if (!kvm_enabled()) {
2764         return; /* No KVM, we're good */
2765     }
2766
2767     if (cap_resize_hpt) {
2768         return; /* Kernel has explicit support, we're good */
2769     }
2770
2771     /* Otherwise fallback on looking for PR KVM */
2772     if (kvmppc_is_pr(kvm_state)) {
2773         return;
2774     }
2775
2776     error_setg(errp,
2777                "Hash page table resizing not available with this KVM version");
2778 }
2779
2780 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2781 {
2782     CPUState *cs = CPU(cpu);
2783     struct kvm_ppc_resize_hpt rhpt = {
2784         .flags = flags,
2785         .shift = shift,
2786     };
2787
2788     if (!cap_resize_hpt) {
2789         return -ENOSYS;
2790     }
2791
2792     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2793 }
2794
2795 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2796 {
2797     CPUState *cs = CPU(cpu);
2798     struct kvm_ppc_resize_hpt rhpt = {
2799         .flags = flags,
2800         .shift = shift,
2801     };
2802
2803     if (!cap_resize_hpt) {
2804         return -ENOSYS;
2805     }
2806
2807     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2808 }
2809
2810 static void kvmppc_pivot_hpt_cpu(CPUState *cs, run_on_cpu_data arg)
2811 {
2812     target_ulong sdr1 = arg.target_ptr;
2813     PowerPCCPU *cpu = POWERPC_CPU(cs);
2814     CPUPPCState *env = &cpu->env;
2815
2816     /* This is just for the benefit of PR KVM */
2817     cpu_synchronize_state(cs);
2818     env->spr[SPR_SDR1] = sdr1;
2819     if (kvmppc_put_books_sregs(cpu) < 0) {
2820         error_report("Unable to update SDR1 in KVM");
2821         exit(1);
2822     }
2823 }
2824
2825 void kvmppc_update_sdr1(target_ulong sdr1)
2826 {
2827     CPUState *cs;
2828
2829     CPU_FOREACH(cs) {
2830         run_on_cpu(cs, kvmppc_pivot_hpt_cpu, RUN_ON_CPU_TARGET_PTR(sdr1));
2831     }
2832 }
2833
2834 /*
2835  * This is a helper function to detect a post migration scenario
2836  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2837  * the guest kernel can't handle a PVR value other than the actual host
2838  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2839  *
2840  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2841  * (so, we're HV), return true. The workaround itself is done in
2842  * cpu_post_load.
2843  *
2844  * The order here is important: we'll only check for KVM PR as a
2845  * fallback if the guest kernel can't handle the situation itself.
2846  * We need to avoid as much as possible querying the running KVM type
2847  * in QEMU level.
2848  */
2849 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2850 {
2851     CPUState *cs = CPU(cpu);
2852
2853     if (!kvm_enabled()) {
2854         return false;
2855     }
2856
2857     if (cap_ppc_pvr_compat) {
2858         return false;
2859     }
2860
2861     return !kvmppc_is_pr(cs->kvm_state);
2862 }