target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qapi/error.h"
  26 #include "qemu/error-report.h"
  27 #include "cpu.h"
  28 #include "cpu-models.h"
  29 #include "qemu/timer.h"
  30 #include "sysemu/sysemu.h"
  31 #include "sysemu/hw_accel.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_cpu_core.h"
  40 #include "hw/ppc/ppc.h"
  41 #include "sysemu/watchdog.h"
  42 #include "trace.h"
  43 #include "exec/gdbstub.h"
  44 #include "exec/memattrs.h"
  45 #include "exec/ram_addr.h"
  46 #include "sysemu/hostmem.h"
  47 #include "qemu/cutils.h"
  48 #include "qemu/mmap-alloc.h"
  49 #include "elf.h"
  50 #include "sysemu/kvm_int.h"
  51
  52 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  53
  54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  55     KVM_CAP_LAST_INFO
  56 };
  57
  58 static int cap_interrupt_unset;
  59 static int cap_interrupt_level;
  60 static int cap_segstate;
  61 static int cap_booke_sregs;
  62 static int cap_ppc_smt;
  63 static int cap_ppc_smt_possible;
  64 static int cap_spapr_tce;
  65 static int cap_spapr_tce_64;
  66 static int cap_spapr_multitce;
  67 static int cap_spapr_vfio;
  68 static int cap_hior;
  69 static int cap_one_reg;
  70 static int cap_epr;
  71 static int cap_ppc_watchdog;
  72 static int cap_papr;
  73 static int cap_htab_fd;
  74 static int cap_fixup_hcalls;
  75 static int cap_htm;             /* Hardware transactional memory support */
  76 static int cap_mmu_radix;
  77 static int cap_mmu_hash_v3;
  78 static int cap_xive;
  79 static int cap_resize_hpt;
  80 static int cap_ppc_pvr_compat;
  81 static int cap_ppc_safe_cache;
  82 static int cap_ppc_safe_bounds_check;
  83 static int cap_ppc_safe_indirect_branch;
  84 static int cap_ppc_count_cache_flush_assist;
  85 static int cap_ppc_nested_kvm_hv;
  86 static int cap_large_decr;
  87
  88 static uint32_t debug_inst_opcode;
  89
  90 /*
  91  * XXX We have a race condition where we actually have a level triggered
  92  *     interrupt, but the infrastructure can't expose that yet, so the guest
  93  *     takes but ignores it, goes to sleep and never gets notified that there's
  94  *     still an interrupt pending.
  95  *
  96  *     As a quick workaround, let's just wake up again 20 ms after we injected
  97  *     an interrupt. That way we can assure that we're always reinjecting
  98  *     interrupts in case the guest swallowed them.
  99  */
 100 static QEMUTimer *idle_timer;
 101
 102 static void kvm_kick_cpu(void *opaque)
 103 {
 104     PowerPCCPU *cpu = opaque;
 105
 106     qemu_cpu_kick(CPU(cpu));
 107 }
 108
 109 /*
 110  * Check whether we are running with KVM-PR (instead of KVM-HV).  This
 111  * should only be used for fallback tests - generally we should use
 112  * explicit capabilities for the features we want, rather than
 113  * assuming what is/isn't available depending on the KVM variant.
 114  */
 115 static bool kvmppc_is_pr(KVMState *ks)
 116 {
 117     /* Assume KVM-PR if the GET_PVINFO capability is available */
 118     return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 119 }
 120
 121 static int kvm_ppc_register_host_cpu_type(MachineState *ms);
 122 static void kvmppc_get_cpu_characteristics(KVMState *s);
 123 static int kvmppc_get_dec_bits(void);
 124
 125 int kvm_arch_init(MachineState *ms, KVMState *s)
 126 {
 127     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 128     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 129     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 130     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 131     cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE);
 132     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 133     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 134     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 135     cap_spapr_vfio = kvm_vm_check_extension(s, KVM_CAP_SPAPR_TCE_VFIO);
 136     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 137     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 138     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 139     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 140     /*
 141      * Note: we don't set cap_papr here, because this capability is
 142      * only activated after this by kvmppc_set_papr()
 143      */
 144     cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 145     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 146     cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT);
 147     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 148     cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX);
 149     cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3);
 150     cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE);
 151     cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT);
 152     kvmppc_get_cpu_characteristics(s);
 153     cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV);
 154     cap_large_decr = kvmppc_get_dec_bits();
 155     /*
 156      * Note: setting it to false because there is not such capability
 157      * in KVM at this moment.
 158      *
 159      * TODO: call kvm_vm_check_extension() with the right capability
 160      * after the kernel starts implementing it.
 161      */
 162     cap_ppc_pvr_compat = false;
 163
 164     if (!cap_interrupt_level) {
 165         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 166                         "VM to stall at times!\n");
 167     }
 168
 169     kvm_ppc_register_host_cpu_type(ms);
 170
 171     return 0;
 172 }
 173
 174 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 175 {
 176     return 0;
 177 }
 178
 179 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 180 {
 181     CPUPPCState *cenv = &cpu->env;
 182     CPUState *cs = CPU(cpu);
 183     struct kvm_sregs sregs;
 184     int ret;
 185
 186     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 187         /*
 188          * What we're really trying to say is "if we're on BookE, we
 189          * use the native PVR for now". This is the only sane way to
 190          * check it though, so we potentially confuse users that they
 191          * can run BookE guests on BookS. Let's hope nobody dares
 192          * enough :)
 193          */
 194         return 0;
 195     } else {
 196         if (!cap_segstate) {
 197             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 198             return -ENOSYS;
 199         }
 200     }
 201
 202     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 203     if (ret) {
 204         return ret;
 205     }
 206
 207     sregs.pvr = cenv->spr[SPR_PVR];
 208     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 209 }
 210
 211 /* Set up a shared TLB array with KVM */
 212 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 213 {
 214     CPUPPCState *env = &cpu->env;
 215     CPUState *cs = CPU(cpu);
 216     struct kvm_book3e_206_tlb_params params = {};
 217     struct kvm_config_tlb cfg = {};
 218     unsigned int entries = 0;
 219     int ret, i;
 220
 221     if (!kvm_enabled() ||
 222         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 223         return 0;
 224     }
 225
 226     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 227
 228     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 229         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 230         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 231         entries += params.tlb_sizes[i];
 232     }
 233
 234     assert(entries == env->nb_tlb);
 235     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 236
 237     env->tlb_dirty = true;
 238
 239     cfg.array = (uintptr_t)env->tlb.tlbm;
 240     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 241     cfg.params = (uintptr_t)&params;
 242     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 243
 244     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 245     if (ret < 0) {
 246         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 247                 __func__, strerror(-ret));
 248         return ret;
 249     }
 250
 251     env->kvm_sw_tlb = true;
 252     return 0;
 253 }
 254
 255
 256 #if defined(TARGET_PPC64)
 257 static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp)
 258 {
 259     int ret;
 260
 261     assert(kvm_state != NULL);
 262
 263     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 264         error_setg(errp, "KVM doesn't expose the MMU features it supports");
 265         error_append_hint(errp, "Consider switching to a newer KVM\n");
 266         return;
 267     }
 268
 269     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 270     if (ret == 0) {
 271         return;
 272     }
 273
 274     error_setg_errno(errp, -ret,
 275                      "KVM failed to provide the MMU features it supports");
 276 }
 277
 278 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 279 {
 280     KVMState *s = KVM_STATE(current_machine->accelerator);
 281     struct ppc_radix_page_info *radix_page_info;
 282     struct kvm_ppc_rmmu_info rmmu_info;
 283     int i;
 284
 285     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 286         return NULL;
 287     }
 288     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 289         return NULL;
 290     }
 291     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 292     radix_page_info->count = 0;
 293     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 294         if (rmmu_info.ap_encodings[i]) {
 295             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 296             radix_page_info->count++;
 297         }
 298     }
 299     return radix_page_info;
 300 }
 301
 302 target_ulong kvmppc_configure_v3_mmu(PowerPCCPU *cpu,
 303                                      bool radix, bool gtse,
 304                                      uint64_t proc_tbl)
 305 {
 306     CPUState *cs = CPU(cpu);
 307     int ret;
 308     uint64_t flags = 0;
 309     struct kvm_ppc_mmuv3_cfg cfg = {
 310         .process_table = proc_tbl,
 311     };
 312
 313     if (radix) {
 314         flags |= KVM_PPC_MMUV3_RADIX;
 315     }
 316     if (gtse) {
 317         flags |= KVM_PPC_MMUV3_GTSE;
 318     }
 319     cfg.flags = flags;
 320     ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_CONFIGURE_V3_MMU, &cfg);
 321     switch (ret) {
 322     case 0:
 323         return H_SUCCESS;
 324     case -EINVAL:
 325         return H_PARAMETER;
 326     case -ENODEV:
 327         return H_NOT_AVAILABLE;
 328     default:
 329         return H_HARDWARE;
 330     }
 331 }
 332
 333 bool kvmppc_hpt_needs_host_contiguous_pages(void)
 334 {
 335     static struct kvm_ppc_smmu_info smmu_info;
 336
 337     if (!kvm_enabled()) {
 338         return false;
 339     }
 340
 341     kvm_get_smmu_info(&smmu_info, &error_fatal);
 342     return !!(smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL);
 343 }
 344
 345 void kvm_check_mmu(PowerPCCPU *cpu, Error **errp)
 346 {
 347     struct kvm_ppc_smmu_info smmu_info;
 348     int iq, ik, jq, jk;
 349     Error *local_err = NULL;
 350
 351     /* For now, we only have anything to check on hash64 MMUs */
 352     if (!cpu->hash64_opts || !kvm_enabled()) {
 353         return;
 354     }
 355
 356     kvm_get_smmu_info(&smmu_info, &local_err);
 357     if (local_err) {
 358         error_propagate(errp, local_err);
 359         return;
 360     }
 361
 362     if (ppc_hash64_has(cpu, PPC_HASH64_1TSEG)
 363         && !(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 364         error_setg(errp,
 365                    "KVM does not support 1TiB segments which guest expects");
 366         return;
 367     }
 368
 369     if (smmu_info.slb_size < cpu->hash64_opts->slb_size) {
 370         error_setg(errp, "KVM only supports %u SLB entries, but guest needs %u",
 371                    smmu_info.slb_size, cpu->hash64_opts->slb_size);
 372         return;
 373     }
 374
 375     /*
 376      * Verify that every pagesize supported by the cpu model is
 377      * supported by KVM with the same encodings
 378      */
 379     for (iq = 0; iq < ARRAY_SIZE(cpu->hash64_opts->sps); iq++) {
 380         PPCHash64SegmentPageSizes *qsps = &cpu->hash64_opts->sps[iq];
 381         struct kvm_ppc_one_seg_page_size *ksps;
 382
 383         for (ik = 0; ik < ARRAY_SIZE(smmu_info.sps); ik++) {
 384             if (qsps->page_shift == smmu_info.sps[ik].page_shift) {
 385                 break;
 386             }
 387         }
 388         if (ik >= ARRAY_SIZE(smmu_info.sps)) {
 389             error_setg(errp, "KVM doesn't support for base page shift %u",
 390                        qsps->page_shift);
 391             return;
 392         }
 393
 394         ksps = &smmu_info.sps[ik];
 395         if (ksps->slb_enc != qsps->slb_enc) {
 396             error_setg(errp,
 397 "KVM uses SLB encoding 0x%x for page shift %u, but guest expects 0x%x",
 398                        ksps->slb_enc, ksps->page_shift, qsps->slb_enc);
 399             return;
 400         }
 401
 402         for (jq = 0; jq < ARRAY_SIZE(qsps->enc); jq++) {
 403             for (jk = 0; jk < ARRAY_SIZE(ksps->enc); jk++) {
 404                 if (qsps->enc[jq].page_shift == ksps->enc[jk].page_shift) {
 405                     break;
 406                 }
 407             }
 408
 409             if (jk >= ARRAY_SIZE(ksps->enc)) {
 410                 error_setg(errp, "KVM doesn't support page shift %u/%u",
 411                            qsps->enc[jq].page_shift, qsps->page_shift);
 412                 return;
 413             }
 414             if (qsps->enc[jq].pte_enc != ksps->enc[jk].pte_enc) {
 415                 error_setg(errp,
 416 "KVM uses PTE encoding 0x%x for page shift %u/%u, but guest expects 0x%x",
 417                            ksps->enc[jk].pte_enc, qsps->enc[jq].page_shift,
 418                            qsps->page_shift, qsps->enc[jq].pte_enc);
 419                 return;
 420             }
 421         }
 422     }
 423
 424     if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) {
 425         /*
 426          * Mostly what guest pagesizes we can use are related to the
 427          * host pages used to map guest RAM, which is handled in the
 428          * platform code. Cache-Inhibited largepages (64k) however are
 429          * used for I/O, so if they're mapped to the host at all it
 430          * will be a normal mapping, not a special hugepage one used
 431          * for RAM.
 432          */
 433         if (getpagesize() < 0x10000) {
 434             error_setg(errp,
 435                        "KVM can't supply 64kiB CI pages, which guest expects");
 436         }
 437     }
 438 }
 439 #endif /* !defined (TARGET_PPC64) */
 440
 441 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 442 {
 443     return POWERPC_CPU(cpu)->vcpu_id;
 444 }
 445
 446 /*
 447  * e500 supports 2 h/w breakpoint and 2 watchpoint.  book3s supports
 448  * only 1 watchpoint, so array size of 4 is sufficient for now.
 449  */
 450 #define MAX_HW_BKPTS 4
 451
 452 static struct HWBreakpoint {
 453     target_ulong addr;
 454     int type;
 455 } hw_debug_points[MAX_HW_BKPTS];
 456
 457 static CPUWatchpoint hw_watchpoint;
 458
 459 /* Default there is no breakpoint and watchpoint supported */
 460 static int max_hw_breakpoint;
 461 static int max_hw_watchpoint;
 462 static int nb_hw_breakpoint;
 463 static int nb_hw_watchpoint;
 464
 465 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 466 {
 467     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 468         max_hw_breakpoint = 2;
 469         max_hw_watchpoint = 2;
 470     }
 471
 472     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 473         fprintf(stderr, "Error initializing h/w breakpoints\n");
 474         return;
 475     }
 476 }
 477
 478 int kvm_arch_init_vcpu(CPUState *cs)
 479 {
 480     PowerPCCPU *cpu = POWERPC_CPU(cs);
 481     CPUPPCState *cenv = &cpu->env;
 482     int ret;
 483
 484     /* Synchronize sregs with kvm */
 485     ret = kvm_arch_sync_sregs(cpu);
 486     if (ret) {
 487         if (ret == -EINVAL) {
 488             error_report("Register sync failed... If you're using kvm-hv.ko,"
 489                          " only \"-cpu host\" is possible");
 490         }
 491         return ret;
 492     }
 493
 494     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 495
 496     switch (cenv->mmu_model) {
 497     case POWERPC_MMU_BOOKE206:
 498         /* This target supports access to KVM's guest TLB */
 499         ret = kvm_booke206_tlb_init(cpu);
 500         break;
 501     case POWERPC_MMU_2_07:
 502         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 503             /*
 504              * KVM-HV has transactional memory on POWER8 also without
 505              * the KVM_CAP_PPC_HTM extension, so enable it here
 506              * instead as long as it's availble to userspace on the
 507              * host.
 508              */
 509             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 510                 cap_htm = true;
 511             }
 512         }
 513         break;
 514     default:
 515         break;
 516     }
 517
 518     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 519     kvmppc_hw_debug_points_init(cenv);
 520
 521     return ret;
 522 }
 523
 524 int kvm_arch_destroy_vcpu(CPUState *cs)
 525 {
 526     return 0;
 527 }
 528
 529 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 530 {
 531     CPUPPCState *env = &cpu->env;
 532     CPUState *cs = CPU(cpu);
 533     struct kvm_dirty_tlb dirty_tlb;
 534     unsigned char *bitmap;
 535     int ret;
 536
 537     if (!env->kvm_sw_tlb) {
 538         return;
 539     }
 540
 541     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 542     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 543
 544     dirty_tlb.bitmap = (uintptr_t)bitmap;
 545     dirty_tlb.num_dirty = env->nb_tlb;
 546
 547     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 548     if (ret) {
 549         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 550                 __func__, strerror(-ret));
 551     }
 552
 553     g_free(bitmap);
 554 }
 555
 556 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 557 {
 558     PowerPCCPU *cpu = POWERPC_CPU(cs);
 559     CPUPPCState *env = &cpu->env;
 560     union {
 561         uint32_t u32;
 562         uint64_t u64;
 563     } val;
 564     struct kvm_one_reg reg = {
 565         .id = id,
 566         .addr = (uintptr_t) &val,
 567     };
 568     int ret;
 569
 570     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 571     if (ret != 0) {
 572         trace_kvm_failed_spr_get(spr, strerror(errno));
 573     } else {
 574         switch (id & KVM_REG_SIZE_MASK) {
 575         case KVM_REG_SIZE_U32:
 576             env->spr[spr] = val.u32;
 577             break;
 578
 579         case KVM_REG_SIZE_U64:
 580             env->spr[spr] = val.u64;
 581             break;
 582
 583         default:
 584             /* Don't handle this size yet */
 585             abort();
 586         }
 587     }
 588 }
 589
 590 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 591 {
 592     PowerPCCPU *cpu = POWERPC_CPU(cs);
 593     CPUPPCState *env = &cpu->env;
 594     union {
 595         uint32_t u32;
 596         uint64_t u64;
 597     } val;
 598     struct kvm_one_reg reg = {
 599         .id = id,
 600         .addr = (uintptr_t) &val,
 601     };
 602     int ret;
 603
 604     switch (id & KVM_REG_SIZE_MASK) {
 605     case KVM_REG_SIZE_U32:
 606         val.u32 = env->spr[spr];
 607         break;
 608
 609     case KVM_REG_SIZE_U64:
 610         val.u64 = env->spr[spr];
 611         break;
 612
 613     default:
 614         /* Don't handle this size yet */
 615         abort();
 616     }
 617
 618     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 619     if (ret != 0) {
 620         trace_kvm_failed_spr_set(spr, strerror(errno));
 621     }
 622 }
 623
 624 static int kvm_put_fp(CPUState *cs)
 625 {
 626     PowerPCCPU *cpu = POWERPC_CPU(cs);
 627     CPUPPCState *env = &cpu->env;
 628     struct kvm_one_reg reg;
 629     int i;
 630     int ret;
 631
 632     if (env->insns_flags & PPC_FLOAT) {
 633         uint64_t fpscr = env->fpscr;
 634         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 635
 636         reg.id = KVM_REG_PPC_FPSCR;
 637         reg.addr = (uintptr_t)&fpscr;
 638         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 639         if (ret < 0) {
 640             trace_kvm_failed_fpscr_set(strerror(errno));
 641             return ret;
 642         }
 643
 644         for (i = 0; i < 32; i++) {
 645             uint64_t vsr[2];
 646             uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
 647             uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
 648
 649 #ifdef HOST_WORDS_BIGENDIAN
 650             vsr[0] = float64_val(*fpr);
 651             vsr[1] = *vsrl;
 652 #else
 653             vsr[0] = *vsrl;
 654             vsr[1] = float64_val(*fpr);
 655 #endif
 656             reg.addr = (uintptr_t) &vsr;
 657             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 658
 659             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 660             if (ret < 0) {
 661                 trace_kvm_failed_fp_set(vsx ? "VSR" : "FPR", i,
 662                                         strerror(errno));
 663                 return ret;
 664             }
 665         }
 666     }
 667
 668     if (env->insns_flags & PPC_ALTIVEC) {
 669         reg.id = KVM_REG_PPC_VSCR;
 670         reg.addr = (uintptr_t)&env->vscr;
 671         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 672         if (ret < 0) {
 673             trace_kvm_failed_vscr_set(strerror(errno));
 674             return ret;
 675         }
 676
 677         for (i = 0; i < 32; i++) {
 678             reg.id = KVM_REG_PPC_VR(i);
 679             reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
 680             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 681             if (ret < 0) {
 682                 trace_kvm_failed_vr_set(i, strerror(errno));
 683                 return ret;
 684             }
 685         }
 686     }
 687
 688     return 0;
 689 }
 690
 691 static int kvm_get_fp(CPUState *cs)
 692 {
 693     PowerPCCPU *cpu = POWERPC_CPU(cs);
 694     CPUPPCState *env = &cpu->env;
 695     struct kvm_one_reg reg;
 696     int i;
 697     int ret;
 698
 699     if (env->insns_flags & PPC_FLOAT) {
 700         uint64_t fpscr;
 701         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 702
 703         reg.id = KVM_REG_PPC_FPSCR;
 704         reg.addr = (uintptr_t)&fpscr;
 705         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 706         if (ret < 0) {
 707             trace_kvm_failed_fpscr_get(strerror(errno));
 708             return ret;
 709         } else {
 710             env->fpscr = fpscr;
 711         }
 712
 713         for (i = 0; i < 32; i++) {
 714             uint64_t vsr[2];
 715             uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i);
 716             uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i);
 717
 718             reg.addr = (uintptr_t) &vsr;
 719             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 720
 721             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 722             if (ret < 0) {
 723                 trace_kvm_failed_fp_get(vsx ? "VSR" : "FPR", i,
 724                                         strerror(errno));
 725                 return ret;
 726             } else {
 727 #ifdef HOST_WORDS_BIGENDIAN
 728                 *fpr = vsr[0];
 729                 if (vsx) {
 730                     *vsrl = vsr[1];
 731                 }
 732 #else
 733                 *fpr = vsr[1];
 734                 if (vsx) {
 735                     *vsrl = vsr[0];
 736                 }
 737 #endif
 738             }
 739         }
 740     }
 741
 742     if (env->insns_flags & PPC_ALTIVEC) {
 743         reg.id = KVM_REG_PPC_VSCR;
 744         reg.addr = (uintptr_t)&env->vscr;
 745         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 746         if (ret < 0) {
 747             trace_kvm_failed_vscr_get(strerror(errno));
 748             return ret;
 749         }
 750
 751         for (i = 0; i < 32; i++) {
 752             reg.id = KVM_REG_PPC_VR(i);
 753             reg.addr = (uintptr_t)cpu_avr_ptr(env, i);
 754             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 755             if (ret < 0) {
 756                 trace_kvm_failed_vr_get(i, strerror(errno));
 757                 return ret;
 758             }
 759         }
 760     }
 761
 762     return 0;
 763 }
 764
 765 #if defined(TARGET_PPC64)
 766 static int kvm_get_vpa(CPUState *cs)
 767 {
 768     PowerPCCPU *cpu = POWERPC_CPU(cs);
 769     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
 770     struct kvm_one_reg reg;
 771     int ret;
 772
 773     reg.id = KVM_REG_PPC_VPA_ADDR;
 774     reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 775     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 776     if (ret < 0) {
 777         trace_kvm_failed_vpa_addr_get(strerror(errno));
 778         return ret;
 779     }
 780
 781     assert((uintptr_t)&spapr_cpu->slb_shadow_size
 782            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 783     reg.id = KVM_REG_PPC_VPA_SLB;
 784     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 785     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 786     if (ret < 0) {
 787         trace_kvm_failed_slb_get(strerror(errno));
 788         return ret;
 789     }
 790
 791     assert((uintptr_t)&spapr_cpu->dtl_size
 792            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 793     reg.id = KVM_REG_PPC_VPA_DTL;
 794     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 795     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 796     if (ret < 0) {
 797         trace_kvm_failed_dtl_get(strerror(errno));
 798         return ret;
 799     }
 800
 801     return 0;
 802 }
 803
 804 static int kvm_put_vpa(CPUState *cs)
 805 {
 806     PowerPCCPU *cpu = POWERPC_CPU(cs);
 807     SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu);
 808     struct kvm_one_reg reg;
 809     int ret;
 810
 811     /*
 812      * SLB shadow or DTL can't be registered unless a master VPA is
 813      * registered.  That means when restoring state, if a VPA *is*
 814      * registered, we need to set that up first.  If not, we need to
 815      * deregister the others before deregistering the master VPA
 816      */
 817     assert(spapr_cpu->vpa_addr
 818            || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr));
 819
 820     if (spapr_cpu->vpa_addr) {
 821         reg.id = KVM_REG_PPC_VPA_ADDR;
 822         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 823         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 824         if (ret < 0) {
 825             trace_kvm_failed_vpa_addr_set(strerror(errno));
 826             return ret;
 827         }
 828     }
 829
 830     assert((uintptr_t)&spapr_cpu->slb_shadow_size
 831            == ((uintptr_t)&spapr_cpu->slb_shadow_addr + 8));
 832     reg.id = KVM_REG_PPC_VPA_SLB;
 833     reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr;
 834     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 835     if (ret < 0) {
 836         trace_kvm_failed_slb_set(strerror(errno));
 837         return ret;
 838     }
 839
 840     assert((uintptr_t)&spapr_cpu->dtl_size
 841            == ((uintptr_t)&spapr_cpu->dtl_addr + 8));
 842     reg.id = KVM_REG_PPC_VPA_DTL;
 843     reg.addr = (uintptr_t)&spapr_cpu->dtl_addr;
 844     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 845     if (ret < 0) {
 846         trace_kvm_failed_dtl_set(strerror(errno));
 847         return ret;
 848     }
 849
 850     if (!spapr_cpu->vpa_addr) {
 851         reg.id = KVM_REG_PPC_VPA_ADDR;
 852         reg.addr = (uintptr_t)&spapr_cpu->vpa_addr;
 853         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 854         if (ret < 0) {
 855             trace_kvm_failed_null_vpa_addr_set(strerror(errno));
 856             return ret;
 857         }
 858     }
 859
 860     return 0;
 861 }
 862 #endif /* TARGET_PPC64 */
 863
 864 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 865 {
 866     CPUPPCState *env = &cpu->env;
 867     struct kvm_sregs sregs;
 868     int i;
 869
 870     sregs.pvr = env->spr[SPR_PVR];
 871
 872     if (cpu->vhyp) {
 873         PPCVirtualHypervisorClass *vhc =
 874             PPC_VIRTUAL_HYPERVISOR_GET_CLASS(cpu->vhyp);
 875         sregs.u.s.sdr1 = vhc->encode_hpt_for_kvm_pr(cpu->vhyp);
 876     } else {
 877         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 878     }
 879
 880     /* Sync SLB */
 881 #ifdef TARGET_PPC64
 882     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 883         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 884         if (env->slb[i].esid & SLB_ESID_V) {
 885             sregs.u.s.ppc64.slb[i].slbe |= i;
 886         }
 887         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 888     }
 889 #endif
 890
 891     /* Sync SRs */
 892     for (i = 0; i < 16; i++) {
 893         sregs.u.s.ppc32.sr[i] = env->sr[i];
 894     }
 895
 896     /* Sync BATs */
 897     for (i = 0; i < 8; i++) {
 898         /* Beware. We have to swap upper and lower bits here */
 899         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 900             | env->DBAT[1][i];
 901         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 902             | env->IBAT[1][i];
 903     }
 904
 905     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 906 }
 907
 908 int kvm_arch_put_registers(CPUState *cs, int level)
 909 {
 910     PowerPCCPU *cpu = POWERPC_CPU(cs);
 911     CPUPPCState *env = &cpu->env;
 912     struct kvm_regs regs;
 913     int ret;
 914     int i;
 915
 916     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 917     if (ret < 0) {
 918         return ret;
 919     }
 920
 921     regs.ctr = env->ctr;
 922     regs.lr  = env->lr;
 923     regs.xer = cpu_read_xer(env);
 924     regs.msr = env->msr;
 925     regs.pc = env->nip;
 926
 927     regs.srr0 = env->spr[SPR_SRR0];
 928     regs.srr1 = env->spr[SPR_SRR1];
 929
 930     regs.sprg0 = env->spr[SPR_SPRG0];
 931     regs.sprg1 = env->spr[SPR_SPRG1];
 932     regs.sprg2 = env->spr[SPR_SPRG2];
 933     regs.sprg3 = env->spr[SPR_SPRG3];
 934     regs.sprg4 = env->spr[SPR_SPRG4];
 935     regs.sprg5 = env->spr[SPR_SPRG5];
 936     regs.sprg6 = env->spr[SPR_SPRG6];
 937     regs.sprg7 = env->spr[SPR_SPRG7];
 938
 939     regs.pid = env->spr[SPR_BOOKE_PID];
 940
 941     for (i = 0; i < 32; i++) {
 942         regs.gpr[i] = env->gpr[i];
 943     }
 944
 945     regs.cr = 0;
 946     for (i = 0; i < 8; i++) {
 947         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 948     }
 949
 950     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 951     if (ret < 0) {
 952         return ret;
 953     }
 954
 955     kvm_put_fp(cs);
 956
 957     if (env->tlb_dirty) {
 958         kvm_sw_tlb_put(cpu);
 959         env->tlb_dirty = false;
 960     }
 961
 962     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 963         ret = kvmppc_put_books_sregs(cpu);
 964         if (ret < 0) {
 965             return ret;
 966         }
 967     }
 968
 969     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 970         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 971     }
 972
 973     if (cap_one_reg) {
 974         int i;
 975
 976         /*
 977          * We deliberately ignore errors here, for kernels which have
 978          * the ONE_REG calls, but don't support the specific
 979          * registers, there's a reasonable chance things will still
 980          * work, at least until we try to migrate.
 981          */
 982         for (i = 0; i < 1024; i++) {
 983             uint64_t id = env->spr_cb[i].one_reg_id;
 984
 985             if (id != 0) {
 986                 kvm_put_one_spr(cs, id, i);
 987             }
 988         }
 989
 990 #ifdef TARGET_PPC64
 991         if (msr_ts) {
 992             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 993                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 994             }
 995             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 996                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 997             }
 998             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 999             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1000             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1001             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1002             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1003             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1004             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1005             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1006             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1007             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1008         }
1009
1010         if (cap_papr) {
1011             if (kvm_put_vpa(cs) < 0) {
1012                 trace_kvm_failed_put_vpa();
1013             }
1014         }
1015
1016         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1017 #endif /* TARGET_PPC64 */
1018     }
1019
1020     return ret;
1021 }
1022
1023 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1024 {
1025      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1026 }
1027
1028 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1029 {
1030     CPUPPCState *env = &cpu->env;
1031     struct kvm_sregs sregs;
1032     int ret;
1033
1034     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1035     if (ret < 0) {
1036         return ret;
1037     }
1038
1039     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1040         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1041         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1042         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1043         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1044         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1045         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1046         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1047         env->spr[SPR_DECR] = sregs.u.e.dec;
1048         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1049         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1050         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1051     }
1052
1053     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1054         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1055         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1056         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1057         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1058         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1059     }
1060
1061     if (sregs.u.e.features & KVM_SREGS_E_64) {
1062         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1063     }
1064
1065     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1066         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1067     }
1068
1069     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1070         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1071         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1072         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1073         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1074         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1075         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1076         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1077         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1078         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1079         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1080         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1081         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1082         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1083         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1084         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1085         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1086         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1087         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1088         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1089         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1090         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1091         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1092         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1093         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1094         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1095         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1096         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1097         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1098         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1099         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1100         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1101         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1102
1103         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1104             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1105             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1106             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1107             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1108             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1109             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1110         }
1111
1112         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1113             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1114             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1115         }
1116
1117         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1118             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1119             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1120             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1121             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1122         }
1123     }
1124
1125     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1126         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1127         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1128         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1129         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1130         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1131         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1132         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1133         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1134         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1135         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1136     }
1137
1138     if (sregs.u.e.features & KVM_SREGS_EXP) {
1139         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1140     }
1141
1142     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1143         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1144         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1145     }
1146
1147     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1148         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1149         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1150         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1151
1152         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1153             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1154             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1155         }
1156     }
1157
1158     return 0;
1159 }
1160
1161 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1162 {
1163     CPUPPCState *env = &cpu->env;
1164     struct kvm_sregs sregs;
1165     int ret;
1166     int i;
1167
1168     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1169     if (ret < 0) {
1170         return ret;
1171     }
1172
1173     if (!cpu->vhyp) {
1174         ppc_store_sdr1(env, sregs.u.s.sdr1);
1175     }
1176
1177     /* Sync SLB */
1178 #ifdef TARGET_PPC64
1179     /*
1180      * The packed SLB array we get from KVM_GET_SREGS only contains
1181      * information about valid entries. So we flush our internal copy
1182      * to get rid of stale ones, then put all valid SLB entries back
1183      * in.
1184      */
1185     memset(env->slb, 0, sizeof(env->slb));
1186     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1187         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1188         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1189         /*
1190          * Only restore valid entries
1191          */
1192         if (rb & SLB_ESID_V) {
1193             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1194         }
1195     }
1196 #endif
1197
1198     /* Sync SRs */
1199     for (i = 0; i < 16; i++) {
1200         env->sr[i] = sregs.u.s.ppc32.sr[i];
1201     }
1202
1203     /* Sync BATs */
1204     for (i = 0; i < 8; i++) {
1205         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1206         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1207         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1208         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1209     }
1210
1211     return 0;
1212 }
1213
1214 int kvm_arch_get_registers(CPUState *cs)
1215 {
1216     PowerPCCPU *cpu = POWERPC_CPU(cs);
1217     CPUPPCState *env = &cpu->env;
1218     struct kvm_regs regs;
1219     uint32_t cr;
1220     int i, ret;
1221
1222     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1223     if (ret < 0) {
1224         return ret;
1225     }
1226
1227     cr = regs.cr;
1228     for (i = 7; i >= 0; i--) {
1229         env->crf[i] = cr & 15;
1230         cr >>= 4;
1231     }
1232
1233     env->ctr = regs.ctr;
1234     env->lr = regs.lr;
1235     cpu_write_xer(env, regs.xer);
1236     env->msr = regs.msr;
1237     env->nip = regs.pc;
1238
1239     env->spr[SPR_SRR0] = regs.srr0;
1240     env->spr[SPR_SRR1] = regs.srr1;
1241
1242     env->spr[SPR_SPRG0] = regs.sprg0;
1243     env->spr[SPR_SPRG1] = regs.sprg1;
1244     env->spr[SPR_SPRG2] = regs.sprg2;
1245     env->spr[SPR_SPRG3] = regs.sprg3;
1246     env->spr[SPR_SPRG4] = regs.sprg4;
1247     env->spr[SPR_SPRG5] = regs.sprg5;
1248     env->spr[SPR_SPRG6] = regs.sprg6;
1249     env->spr[SPR_SPRG7] = regs.sprg7;
1250
1251     env->spr[SPR_BOOKE_PID] = regs.pid;
1252
1253     for (i = 0; i < 32; i++) {
1254         env->gpr[i] = regs.gpr[i];
1255     }
1256
1257     kvm_get_fp(cs);
1258
1259     if (cap_booke_sregs) {
1260         ret = kvmppc_get_booke_sregs(cpu);
1261         if (ret < 0) {
1262             return ret;
1263         }
1264     }
1265
1266     if (cap_segstate) {
1267         ret = kvmppc_get_books_sregs(cpu);
1268         if (ret < 0) {
1269             return ret;
1270         }
1271     }
1272
1273     if (cap_hior) {
1274         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1275     }
1276
1277     if (cap_one_reg) {
1278         int i;
1279
1280         /*
1281          * We deliberately ignore errors here, for kernels which have
1282          * the ONE_REG calls, but don't support the specific
1283          * registers, there's a reasonable chance things will still
1284          * work, at least until we try to migrate.
1285          */
1286         for (i = 0; i < 1024; i++) {
1287             uint64_t id = env->spr_cb[i].one_reg_id;
1288
1289             if (id != 0) {
1290                 kvm_get_one_spr(cs, id, i);
1291             }
1292         }
1293
1294 #ifdef TARGET_PPC64
1295         if (msr_ts) {
1296             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1297                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1298             }
1299             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1300                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1301             }
1302             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1303             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1304             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1305             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1306             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1307             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1308             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1309             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1310             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1311             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1312         }
1313
1314         if (cap_papr) {
1315             if (kvm_get_vpa(cs) < 0) {
1316                 trace_kvm_failed_get_vpa();
1317             }
1318         }
1319
1320         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1321 #endif
1322     }
1323
1324     return 0;
1325 }
1326
1327 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1328 {
1329     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1330
1331     if (irq != PPC_INTERRUPT_EXT) {
1332         return 0;
1333     }
1334
1335     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1336         return 0;
1337     }
1338
1339     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1340
1341     return 0;
1342 }
1343
1344 #if defined(TARGET_PPC64)
1345 #define PPC_INPUT_INT PPC970_INPUT_INT
1346 #else
1347 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1348 #endif
1349
1350 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1351 {
1352     PowerPCCPU *cpu = POWERPC_CPU(cs);
1353     CPUPPCState *env = &cpu->env;
1354     int r;
1355     unsigned irq;
1356
1357     qemu_mutex_lock_iothread();
1358
1359     /*
1360      * PowerPC QEMU tracks the various core input pins (interrupt,
1361      * critical interrupt, reset, etc) in PPC-specific
1362      * env->irq_input_state.
1363      */
1364     if (!cap_interrupt_level &&
1365         run->ready_for_interrupt_injection &&
1366         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1367         (env->irq_input_state & (1 << PPC_INPUT_INT)))
1368     {
1369         /*
1370          * For now KVM disregards the 'irq' argument. However, in the
1371          * future KVM could cache it in-kernel to avoid a heavyweight
1372          * exit when reading the UIC.
1373          */
1374         irq = KVM_INTERRUPT_SET;
1375
1376         trace_kvm_injected_interrupt(irq);
1377         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1378         if (r < 0) {
1379             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1380         }
1381
1382         /* Always wake up soon in case the interrupt was level based */
1383         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1384                        (NANOSECONDS_PER_SECOND / 50));
1385     }
1386
1387     /*
1388      * We don't know if there are more interrupts pending after
1389      * this. However, the guest will return to userspace in the course
1390      * of handling this one anyways, so we will get a chance to
1391      * deliver the rest.
1392      */
1393
1394     qemu_mutex_unlock_iothread();
1395 }
1396
1397 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1398 {
1399     return MEMTXATTRS_UNSPECIFIED;
1400 }
1401
1402 int kvm_arch_process_async_events(CPUState *cs)
1403 {
1404     return cs->halted;
1405 }
1406
1407 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1408 {
1409     CPUState *cs = CPU(cpu);
1410     CPUPPCState *env = &cpu->env;
1411
1412     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1413         cs->halted = 1;
1414         cs->exception_index = EXCP_HLT;
1415     }
1416
1417     return 0;
1418 }
1419
1420 /* map dcr access to existing qemu dcr emulation */
1421 static int kvmppc_handle_dcr_read(CPUPPCState *env,
1422                                   uint32_t dcrn, uint32_t *data)
1423 {
1424     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) {
1425         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1426     }
1427
1428     return 0;
1429 }
1430
1431 static int kvmppc_handle_dcr_write(CPUPPCState *env,
1432                                    uint32_t dcrn, uint32_t data)
1433 {
1434     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) {
1435         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1436     }
1437
1438     return 0;
1439 }
1440
1441 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1442 {
1443     /* Mixed endian case is not handled */
1444     uint32_t sc = debug_inst_opcode;
1445
1446     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1447                             sizeof(sc), 0) ||
1448         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1449         return -EINVAL;
1450     }
1451
1452     return 0;
1453 }
1454
1455 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1456 {
1457     uint32_t sc;
1458
1459     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1460         sc != debug_inst_opcode ||
1461         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1462                             sizeof(sc), 1)) {
1463         return -EINVAL;
1464     }
1465
1466     return 0;
1467 }
1468
1469 static int find_hw_breakpoint(target_ulong addr, int type)
1470 {
1471     int n;
1472
1473     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1474            <= ARRAY_SIZE(hw_debug_points));
1475
1476     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1477         if (hw_debug_points[n].addr == addr &&
1478              hw_debug_points[n].type == type) {
1479             return n;
1480         }
1481     }
1482
1483     return -1;
1484 }
1485
1486 static int find_hw_watchpoint(target_ulong addr, int *flag)
1487 {
1488     int n;
1489
1490     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1491     if (n >= 0) {
1492         *flag = BP_MEM_ACCESS;
1493         return n;
1494     }
1495
1496     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1497     if (n >= 0) {
1498         *flag = BP_MEM_WRITE;
1499         return n;
1500     }
1501
1502     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1503     if (n >= 0) {
1504         *flag = BP_MEM_READ;
1505         return n;
1506     }
1507
1508     return -1;
1509 }
1510
1511 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1512                                   target_ulong len, int type)
1513 {
1514     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1515         return -ENOBUFS;
1516     }
1517
1518     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1519     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1520
1521     switch (type) {
1522     case GDB_BREAKPOINT_HW:
1523         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1524             return -ENOBUFS;
1525         }
1526
1527         if (find_hw_breakpoint(addr, type) >= 0) {
1528             return -EEXIST;
1529         }
1530
1531         nb_hw_breakpoint++;
1532         break;
1533
1534     case GDB_WATCHPOINT_WRITE:
1535     case GDB_WATCHPOINT_READ:
1536     case GDB_WATCHPOINT_ACCESS:
1537         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1538             return -ENOBUFS;
1539         }
1540
1541         if (find_hw_breakpoint(addr, type) >= 0) {
1542             return -EEXIST;
1543         }
1544
1545         nb_hw_watchpoint++;
1546         break;
1547
1548     default:
1549         return -ENOSYS;
1550     }
1551
1552     return 0;
1553 }
1554
1555 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1556                                   target_ulong len, int type)
1557 {
1558     int n;
1559
1560     n = find_hw_breakpoint(addr, type);
1561     if (n < 0) {
1562         return -ENOENT;
1563     }
1564
1565     switch (type) {
1566     case GDB_BREAKPOINT_HW:
1567         nb_hw_breakpoint--;
1568         break;
1569
1570     case GDB_WATCHPOINT_WRITE:
1571     case GDB_WATCHPOINT_READ:
1572     case GDB_WATCHPOINT_ACCESS:
1573         nb_hw_watchpoint--;
1574         break;
1575
1576     default:
1577         return -ENOSYS;
1578     }
1579     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1580
1581     return 0;
1582 }
1583
1584 void kvm_arch_remove_all_hw_breakpoints(void)
1585 {
1586     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1587 }
1588
1589 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1590 {
1591     int n;
1592
1593     /* Software Breakpoint updates */
1594     if (kvm_sw_breakpoints_active(cs)) {
1595         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1596     }
1597
1598     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1599            <= ARRAY_SIZE(hw_debug_points));
1600     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1601
1602     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1603         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1604         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1605         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1606             switch (hw_debug_points[n].type) {
1607             case GDB_BREAKPOINT_HW:
1608                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1609                 break;
1610             case GDB_WATCHPOINT_WRITE:
1611                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1612                 break;
1613             case GDB_WATCHPOINT_READ:
1614                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1615                 break;
1616             case GDB_WATCHPOINT_ACCESS:
1617                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1618                                         KVMPPC_DEBUG_WATCH_READ;
1619                 break;
1620             default:
1621                 cpu_abort(cs, "Unsupported breakpoint type\n");
1622             }
1623             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1624         }
1625     }
1626 }
1627
1628 static int kvm_handle_hw_breakpoint(CPUState *cs,
1629                                     struct kvm_debug_exit_arch *arch_info)
1630 {
1631     int handle = 0;
1632     int n;
1633     int flag = 0;
1634
1635     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1636         if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1637             n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1638             if (n >= 0) {
1639                 handle = 1;
1640             }
1641         } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1642                                         KVMPPC_DEBUG_WATCH_WRITE)) {
1643             n = find_hw_watchpoint(arch_info->address,  &flag);
1644             if (n >= 0) {
1645                 handle = 1;
1646                 cs->watchpoint_hit = &hw_watchpoint;
1647                 hw_watchpoint.vaddr = hw_debug_points[n].addr;
1648                 hw_watchpoint.flags = flag;
1649             }
1650         }
1651     }
1652     return handle;
1653 }
1654
1655 static int kvm_handle_singlestep(void)
1656 {
1657     return 1;
1658 }
1659
1660 static int kvm_handle_sw_breakpoint(void)
1661 {
1662     return 1;
1663 }
1664
1665 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1666 {
1667     CPUState *cs = CPU(cpu);
1668     CPUPPCState *env = &cpu->env;
1669     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1670
1671     if (cs->singlestep_enabled) {
1672         return kvm_handle_singlestep();
1673     }
1674
1675     if (arch_info->status) {
1676         return kvm_handle_hw_breakpoint(cs, arch_info);
1677     }
1678
1679     if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1680         return kvm_handle_sw_breakpoint();
1681     }
1682
1683     /*
1684      * QEMU is not able to handle debug exception, so inject
1685      * program exception to guest;
1686      * Yes program exception NOT debug exception !!
1687      * When QEMU is using debug resources then debug exception must
1688      * be always set. To achieve this we set MSR_DE and also set
1689      * MSRP_DEP so guest cannot change MSR_DE.
1690      * When emulating debug resource for guest we want guest
1691      * to control MSR_DE (enable/disable debug interrupt on need).
1692      * Supporting both configurations are NOT possible.
1693      * So the result is that we cannot share debug resources
1694      * between QEMU and Guest on BOOKE architecture.
1695      * In the current design QEMU gets the priority over guest,
1696      * this means that if QEMU is using debug resources then guest
1697      * cannot use them;
1698      * For software breakpoint QEMU uses a privileged instruction;
1699      * So there cannot be any reason that we are here for guest
1700      * set debug exception, only possibility is guest executed a
1701      * privileged / illegal instruction and that's why we are
1702      * injecting a program interrupt.
1703      */
1704     cpu_synchronize_state(cs);
1705     /*
1706      * env->nip is PC, so increment this by 4 to use
1707      * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1708      */
1709     env->nip += 4;
1710     cs->exception_index = POWERPC_EXCP_PROGRAM;
1711     env->error_code = POWERPC_EXCP_INVAL;
1712     ppc_cpu_do_interrupt(cs);
1713
1714     return 0;
1715 }
1716
1717 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1718 {
1719     PowerPCCPU *cpu = POWERPC_CPU(cs);
1720     CPUPPCState *env = &cpu->env;
1721     int ret;
1722
1723     qemu_mutex_lock_iothread();
1724
1725     switch (run->exit_reason) {
1726     case KVM_EXIT_DCR:
1727         if (run->dcr.is_write) {
1728             trace_kvm_handle_dcr_write();
1729             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1730         } else {
1731             trace_kvm_handle_dcr_read();
1732             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1733         }
1734         break;
1735     case KVM_EXIT_HLT:
1736         trace_kvm_handle_halt();
1737         ret = kvmppc_handle_halt(cpu);
1738         break;
1739 #if defined(TARGET_PPC64)
1740     case KVM_EXIT_PAPR_HCALL:
1741         trace_kvm_handle_papr_hcall();
1742         run->papr_hcall.ret = spapr_hypercall(cpu,
1743                                               run->papr_hcall.nr,
1744                                               run->papr_hcall.args);
1745         ret = 0;
1746         break;
1747 #endif
1748     case KVM_EXIT_EPR:
1749         trace_kvm_handle_epr();
1750         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1751         ret = 0;
1752         break;
1753     case KVM_EXIT_WATCHDOG:
1754         trace_kvm_handle_watchdog_expiry();
1755         watchdog_perform_action();
1756         ret = 0;
1757         break;
1758
1759     case KVM_EXIT_DEBUG:
1760         trace_kvm_handle_debug_exception();
1761         if (kvm_handle_debug(cpu, run)) {
1762             ret = EXCP_DEBUG;
1763             break;
1764         }
1765         /* re-enter, this exception was guest-internal */
1766         ret = 0;
1767         break;
1768
1769     default:
1770         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1771         ret = -1;
1772         break;
1773     }
1774
1775     qemu_mutex_unlock_iothread();
1776     return ret;
1777 }
1778
1779 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1780 {
1781     CPUState *cs = CPU(cpu);
1782     uint32_t bits = tsr_bits;
1783     struct kvm_one_reg reg = {
1784         .id = KVM_REG_PPC_OR_TSR,
1785         .addr = (uintptr_t) &bits,
1786     };
1787
1788     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1789 }
1790
1791 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1792 {
1793
1794     CPUState *cs = CPU(cpu);
1795     uint32_t bits = tsr_bits;
1796     struct kvm_one_reg reg = {
1797         .id = KVM_REG_PPC_CLEAR_TSR,
1798         .addr = (uintptr_t) &bits,
1799     };
1800
1801     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1802 }
1803
1804 int kvmppc_set_tcr(PowerPCCPU *cpu)
1805 {
1806     CPUState *cs = CPU(cpu);
1807     CPUPPCState *env = &cpu->env;
1808     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1809
1810     struct kvm_one_reg reg = {
1811         .id = KVM_REG_PPC_TCR,
1812         .addr = (uintptr_t) &tcr,
1813     };
1814
1815     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1816 }
1817
1818 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1819 {
1820     CPUState *cs = CPU(cpu);
1821     int ret;
1822
1823     if (!kvm_enabled()) {
1824         return -1;
1825     }
1826
1827     if (!cap_ppc_watchdog) {
1828         printf("warning: KVM does not support watchdog");
1829         return -1;
1830     }
1831
1832     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1833     if (ret < 0) {
1834         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1835                 __func__, strerror(-ret));
1836         return ret;
1837     }
1838
1839     return ret;
1840 }
1841
1842 static int read_cpuinfo(const char *field, char *value, int len)
1843 {
1844     FILE *f;
1845     int ret = -1;
1846     int field_len = strlen(field);
1847     char line[512];
1848
1849     f = fopen("/proc/cpuinfo", "r");
1850     if (!f) {
1851         return -1;
1852     }
1853
1854     do {
1855         if (!fgets(line, sizeof(line), f)) {
1856             break;
1857         }
1858         if (!strncmp(line, field, field_len)) {
1859             pstrcpy(value, len, line);
1860             ret = 0;
1861             break;
1862         }
1863     } while (*line);
1864
1865     fclose(f);
1866
1867     return ret;
1868 }
1869
1870 uint32_t kvmppc_get_tbfreq(void)
1871 {
1872     char line[512];
1873     char *ns;
1874     uint32_t retval = NANOSECONDS_PER_SECOND;
1875
1876     if (read_cpuinfo("timebase", line, sizeof(line))) {
1877         return retval;
1878     }
1879
1880     ns = strchr(line, ':');
1881     if (!ns) {
1882         return retval;
1883     }
1884
1885     ns++;
1886
1887     return atoi(ns);
1888 }
1889
1890 bool kvmppc_get_host_serial(char **value)
1891 {
1892     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1893                                NULL);
1894 }
1895
1896 bool kvmppc_get_host_model(char **value)
1897 {
1898     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1899 }
1900
1901 /* Try to find a device tree node for a CPU with clock-frequency property */
1902 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1903 {
1904     struct dirent *dirp;
1905     DIR *dp;
1906
1907     dp = opendir(PROC_DEVTREE_CPU);
1908     if (!dp) {
1909         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1910         return -1;
1911     }
1912
1913     buf[0] = '\0';
1914     while ((dirp = readdir(dp)) != NULL) {
1915         FILE *f;
1916         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1917                  dirp->d_name);
1918         f = fopen(buf, "r");
1919         if (f) {
1920             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1921             fclose(f);
1922             break;
1923         }
1924         buf[0] = '\0';
1925     }
1926     closedir(dp);
1927     if (buf[0] == '\0') {
1928         printf("Unknown host!\n");
1929         return -1;
1930     }
1931
1932     return 0;
1933 }
1934
1935 static uint64_t kvmppc_read_int_dt(const char *filename)
1936 {
1937     union {
1938         uint32_t v32;
1939         uint64_t v64;
1940     } u;
1941     FILE *f;
1942     int len;
1943
1944     f = fopen(filename, "rb");
1945     if (!f) {
1946         return -1;
1947     }
1948
1949     len = fread(&u, 1, sizeof(u), f);
1950     fclose(f);
1951     switch (len) {
1952     case 4:
1953         /* property is a 32-bit quantity */
1954         return be32_to_cpu(u.v32);
1955     case 8:
1956         return be64_to_cpu(u.v64);
1957     }
1958
1959     return 0;
1960 }
1961
1962 /*
1963  * Read a CPU node property from the host device tree that's a single
1964  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1965  * (can't find or open the property, or doesn't understand the format)
1966  */
1967 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1968 {
1969     char buf[PATH_MAX], *tmp;
1970     uint64_t val;
1971
1972     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1973         return -1;
1974     }
1975
1976     tmp = g_strdup_printf("%s/%s", buf, propname);
1977     val = kvmppc_read_int_dt(tmp);
1978     g_free(tmp);
1979
1980     return val;
1981 }
1982
1983 uint64_t kvmppc_get_clockfreq(void)
1984 {
1985     return kvmppc_read_int_cpu_dt("clock-frequency");
1986 }
1987
1988 static int kvmppc_get_dec_bits(void)
1989 {
1990     int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits");
1991
1992     if (nr_bits > 0) {
1993         return nr_bits;
1994     }
1995     return 0;
1996 }
1997
1998 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1999 {
2000     CPUState *cs = env_cpu(env);
2001
2002     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2003         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2004         return 0;
2005     }
2006
2007     return 1;
2008 }
2009
2010 int kvmppc_get_hasidle(CPUPPCState *env)
2011 {
2012     struct kvm_ppc_pvinfo pvinfo;
2013
2014     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2015         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2016         return 1;
2017     }
2018
2019     return 0;
2020 }
2021
2022 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2023 {
2024     uint32_t *hc = (uint32_t *)buf;
2025     struct kvm_ppc_pvinfo pvinfo;
2026
2027     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2028         memcpy(buf, pvinfo.hcall, buf_len);
2029         return 0;
2030     }
2031
2032     /*
2033      * Fallback to always fail hypercalls regardless of endianness:
2034      *
2035      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2036      *     li r3, -1
2037      *     b .+8       (becomes nop in wrong endian)
2038      *     bswap32(li r3, -1)
2039      */
2040
2041     hc[0] = cpu_to_be32(0x08000048);
2042     hc[1] = cpu_to_be32(0x3860ffff);
2043     hc[2] = cpu_to_be32(0x48000008);
2044     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2045
2046     return 1;
2047 }
2048
2049 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2050 {
2051     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2052 }
2053
2054 void kvmppc_enable_logical_ci_hcalls(void)
2055 {
2056     /*
2057      * FIXME: it would be nice if we could detect the cases where
2058      * we're using a device which requires the in kernel
2059      * implementation of these hcalls, but the kernel lacks them and
2060      * produce a warning.
2061      */
2062     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2063     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2064 }
2065
2066 void kvmppc_enable_set_mode_hcall(void)
2067 {
2068     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2069 }
2070
2071 void kvmppc_enable_clear_ref_mod_hcalls(void)
2072 {
2073     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2074     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2075 }
2076
2077 void kvmppc_enable_h_page_init(void)
2078 {
2079     kvmppc_enable_hcall(kvm_state, H_PAGE_INIT);
2080 }
2081
2082 void kvmppc_set_papr(PowerPCCPU *cpu)
2083 {
2084     CPUState *cs = CPU(cpu);
2085     int ret;
2086
2087     if (!kvm_enabled()) {
2088         return;
2089     }
2090
2091     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2092     if (ret) {
2093         error_report("This vCPU type or KVM version does not support PAPR");
2094         exit(1);
2095     }
2096
2097     /*
2098      * Update the capability flag so we sync the right information
2099      * with kvm
2100      */
2101     cap_papr = 1;
2102 }
2103
2104 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2105 {
2106     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2107 }
2108
2109 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2110 {
2111     CPUState *cs = CPU(cpu);
2112     int ret;
2113
2114     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2115     if (ret && mpic_proxy) {
2116         error_report("This KVM version does not support EPR");
2117         exit(1);
2118     }
2119 }
2120
2121 int kvmppc_smt_threads(void)
2122 {
2123     return cap_ppc_smt ? cap_ppc_smt : 1;
2124 }
2125
2126 int kvmppc_set_smt_threads(int smt)
2127 {
2128     int ret;
2129
2130     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_SMT, 0, smt, 0);
2131     if (!ret) {
2132         cap_ppc_smt = smt;
2133     }
2134     return ret;
2135 }
2136
2137 void kvmppc_hint_smt_possible(Error **errp)
2138 {
2139     int i;
2140     GString *g;
2141     char *s;
2142
2143     assert(kvm_enabled());
2144     if (cap_ppc_smt_possible) {
2145         g = g_string_new("Available VSMT modes:");
2146         for (i = 63; i >= 0; i--) {
2147             if ((1UL << i) & cap_ppc_smt_possible) {
2148                 g_string_append_printf(g, " %lu", (1UL << i));
2149             }
2150         }
2151         s = g_string_free(g, false);
2152         error_append_hint(errp, "%s.\n", s);
2153         g_free(s);
2154     } else {
2155         error_append_hint(errp,
2156                           "This KVM seems to be too old to support VSMT.\n");
2157     }
2158 }
2159
2160
2161 #ifdef TARGET_PPC64
2162 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2163 {
2164     struct kvm_ppc_smmu_info info;
2165     long rampagesize, best_page_shift;
2166     int i;
2167
2168     /*
2169      * Find the largest hardware supported page size that's less than
2170      * or equal to the (logical) backing page size of guest RAM
2171      */
2172     kvm_get_smmu_info(&info, &error_fatal);
2173     rampagesize = qemu_minrampagesize();
2174     best_page_shift = 0;
2175
2176     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2177         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2178
2179         if (!sps->page_shift) {
2180             continue;
2181         }
2182
2183         if ((sps->page_shift > best_page_shift)
2184             && ((1UL << sps->page_shift) <= rampagesize)) {
2185             best_page_shift = sps->page_shift;
2186         }
2187     }
2188
2189     return MIN(current_size,
2190                1ULL << (best_page_shift + hash_shift - 7));
2191 }
2192 #endif
2193
2194 bool kvmppc_spapr_use_multitce(void)
2195 {
2196     return cap_spapr_multitce;
2197 }
2198
2199 int kvmppc_spapr_enable_inkernel_multitce(void)
2200 {
2201     int ret;
2202
2203     ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2204                             H_PUT_TCE_INDIRECT, 1);
2205     if (!ret) {
2206         ret = kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_ENABLE_HCALL, 0,
2207                                 H_STUFF_TCE, 1);
2208     }
2209
2210     return ret;
2211 }
2212
2213 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2214                               uint64_t bus_offset, uint32_t nb_table,
2215                               int *pfd, bool need_vfio)
2216 {
2217     long len;
2218     int fd;
2219     void *table;
2220
2221     /*
2222      * Must set fd to -1 so we don't try to munmap when called for
2223      * destroying the table, which the upper layers -will- do
2224      */
2225     *pfd = -1;
2226     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2227         return NULL;
2228     }
2229
2230     if (cap_spapr_tce_64) {
2231         struct kvm_create_spapr_tce_64 args = {
2232             .liobn = liobn,
2233             .page_shift = page_shift,
2234             .offset = bus_offset >> page_shift,
2235             .size = nb_table,
2236             .flags = 0
2237         };
2238         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2239         if (fd < 0) {
2240             fprintf(stderr,
2241                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2242                     liobn);
2243             return NULL;
2244         }
2245     } else if (cap_spapr_tce) {
2246         uint64_t window_size = (uint64_t) nb_table << page_shift;
2247         struct kvm_create_spapr_tce args = {
2248             .liobn = liobn,
2249             .window_size = window_size,
2250         };
2251         if ((window_size != args.window_size) || bus_offset) {
2252             return NULL;
2253         }
2254         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2255         if (fd < 0) {
2256             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2257                     liobn);
2258             return NULL;
2259         }
2260     } else {
2261         return NULL;
2262     }
2263
2264     len = nb_table * sizeof(uint64_t);
2265     /* FIXME: round this up to page size */
2266
2267     table = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
2268     if (table == MAP_FAILED) {
2269         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2270                 liobn);
2271         close(fd);
2272         return NULL;
2273     }
2274
2275     *pfd = fd;
2276     return table;
2277 }
2278
2279 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2280 {
2281     long len;
2282
2283     if (fd < 0) {
2284         return -1;
2285     }
2286
2287     len = nb_table * sizeof(uint64_t);
2288     if ((munmap(table, len) < 0) ||
2289         (close(fd) < 0)) {
2290         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2291                 strerror(errno));
2292         /* Leak the table */
2293     }
2294
2295     return 0;
2296 }
2297
2298 int kvmppc_reset_htab(int shift_hint)
2299 {
2300     uint32_t shift = shift_hint;
2301
2302     if (!kvm_enabled()) {
2303         /* Full emulation, tell caller to allocate htab itself */
2304         return 0;
2305     }
2306     if (kvm_vm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2307         int ret;
2308         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2309         if (ret == -ENOTTY) {
2310             /*
2311              * At least some versions of PR KVM advertise the
2312              * capability, but don't implement the ioctl().  Oops.
2313              * Return 0 so that we allocate the htab in qemu, as is
2314              * correct for PR.
2315              */
2316             return 0;
2317         } else if (ret < 0) {
2318             return ret;
2319         }
2320         return shift;
2321     }
2322
2323     /*
2324      * We have a kernel that predates the htab reset calls.  For PR
2325      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2326      * this era, it has allocated a 16MB fixed size hash table
2327      * already.
2328      */
2329     if (kvmppc_is_pr(kvm_state)) {
2330         /* PR - tell caller to allocate htab */
2331         return 0;
2332     } else {
2333         /* HV - assume 16MB kernel allocated htab */
2334         return 24;
2335     }
2336 }
2337
2338 static inline uint32_t mfpvr(void)
2339 {
2340     uint32_t pvr;
2341
2342     asm ("mfpvr %0"
2343          : "=r"(pvr));
2344     return pvr;
2345 }
2346
2347 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2348 {
2349     if (on) {
2350         *word |= flags;
2351     } else {
2352         *word &= ~flags;
2353     }
2354 }
2355
2356 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2357 {
2358     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2359     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2360     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2361
2362     /* Now fix up the class with information we can query from the host */
2363     pcc->pvr = mfpvr();
2364
2365     alter_insns(&pcc->insns_flags, PPC_ALTIVEC,
2366                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_ALTIVEC);
2367     alter_insns(&pcc->insns_flags2, PPC2_VSX,
2368                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_VSX);
2369     alter_insns(&pcc->insns_flags2, PPC2_DFP,
2370                 qemu_getauxval(AT_HWCAP) & PPC_FEATURE_HAS_DFP);
2371
2372     if (dcache_size != -1) {
2373         pcc->l1_dcache_size = dcache_size;
2374     }
2375
2376     if (icache_size != -1) {
2377         pcc->l1_icache_size = icache_size;
2378     }
2379
2380 #if defined(TARGET_PPC64)
2381     pcc->radix_page_info = kvm_get_radix_page_info();
2382
2383     if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) {
2384         /*
2385          * POWER9 DD1 has some bugs which make it not really ISA 3.00
2386          * compliant.  More importantly, advertising ISA 3.00
2387          * architected mode may prevent guests from activating
2388          * necessary DD1 workarounds.
2389          */
2390         pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07
2391                                 | PCR_COMPAT_2_06 | PCR_COMPAT_2_05);
2392     }
2393 #endif /* defined(TARGET_PPC64) */
2394 }
2395
2396 bool kvmppc_has_cap_epr(void)
2397 {
2398     return cap_epr;
2399 }
2400
2401 bool kvmppc_has_cap_fixup_hcalls(void)
2402 {
2403     return cap_fixup_hcalls;
2404 }
2405
2406 bool kvmppc_has_cap_htm(void)
2407 {
2408     return cap_htm;
2409 }
2410
2411 bool kvmppc_has_cap_mmu_radix(void)
2412 {
2413     return cap_mmu_radix;
2414 }
2415
2416 bool kvmppc_has_cap_mmu_hash_v3(void)
2417 {
2418     return cap_mmu_hash_v3;
2419 }
2420
2421 static bool kvmppc_power8_host(void)
2422 {
2423     bool ret = false;
2424 #ifdef TARGET_PPC64
2425     {
2426         uint32_t base_pvr = CPU_POWERPC_POWER_SERVER_MASK & mfpvr();
2427         ret = (base_pvr == CPU_POWERPC_POWER8E_BASE) ||
2428               (base_pvr == CPU_POWERPC_POWER8NVL_BASE) ||
2429               (base_pvr == CPU_POWERPC_POWER8_BASE);
2430     }
2431 #endif /* TARGET_PPC64 */
2432     return ret;
2433 }
2434
2435 static int parse_cap_ppc_safe_cache(struct kvm_ppc_cpu_char c)
2436 {
2437     bool l1d_thread_priv_req = !kvmppc_power8_host();
2438
2439     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_L1D_FLUSH_PR) {
2440         return 2;
2441     } else if ((!l1d_thread_priv_req ||
2442                 c.character & c.character_mask & H_CPU_CHAR_L1D_THREAD_PRIV) &&
2443                (c.character & c.character_mask
2444                 & (H_CPU_CHAR_L1D_FLUSH_ORI30 | H_CPU_CHAR_L1D_FLUSH_TRIG2))) {
2445         return 1;
2446     }
2447
2448     return 0;
2449 }
2450
2451 static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c)
2452 {
2453     if (~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR) {
2454         return 2;
2455     } else if (c.character & c.character_mask & H_CPU_CHAR_SPEC_BAR_ORI31) {
2456         return 1;
2457     }
2458
2459     return 0;
2460 }
2461
2462 static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c)
2463 {
2464     if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) &&
2465         (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) &&
2466         (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) {
2467         return SPAPR_CAP_FIXED_NA;
2468     } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) {
2469         return SPAPR_CAP_WORKAROUND;
2470     } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) {
2471         return  SPAPR_CAP_FIXED_CCD;
2472     } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) {
2473         return SPAPR_CAP_FIXED_IBS;
2474     }
2475
2476     return 0;
2477 }
2478
2479 static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c)
2480 {
2481     if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) {
2482         return 1;
2483     }
2484     return 0;
2485 }
2486
2487 bool kvmppc_has_cap_xive(void)
2488 {
2489     return cap_xive;
2490 }
2491
2492 static void kvmppc_get_cpu_characteristics(KVMState *s)
2493 {
2494     struct kvm_ppc_cpu_char c;
2495     int ret;
2496
2497     /* Assume broken */
2498     cap_ppc_safe_cache = 0;
2499     cap_ppc_safe_bounds_check = 0;
2500     cap_ppc_safe_indirect_branch = 0;
2501
2502     ret = kvm_vm_check_extension(s, KVM_CAP_PPC_GET_CPU_CHAR);
2503     if (!ret) {
2504         return;
2505     }
2506     ret = kvm_vm_ioctl(s, KVM_PPC_GET_CPU_CHAR, &c);
2507     if (ret < 0) {
2508         return;
2509     }
2510
2511     cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c);
2512     cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c);
2513     cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c);
2514     cap_ppc_count_cache_flush_assist =
2515         parse_cap_ppc_count_cache_flush_assist(c);
2516 }
2517
2518 int kvmppc_get_cap_safe_cache(void)
2519 {
2520     return cap_ppc_safe_cache;
2521 }
2522
2523 int kvmppc_get_cap_safe_bounds_check(void)
2524 {
2525     return cap_ppc_safe_bounds_check;
2526 }
2527
2528 int kvmppc_get_cap_safe_indirect_branch(void)
2529 {
2530     return cap_ppc_safe_indirect_branch;
2531 }
2532
2533 int kvmppc_get_cap_count_cache_flush_assist(void)
2534 {
2535     return cap_ppc_count_cache_flush_assist;
2536 }
2537
2538 bool kvmppc_has_cap_nested_kvm_hv(void)
2539 {
2540     return !!cap_ppc_nested_kvm_hv;
2541 }
2542
2543 int kvmppc_set_cap_nested_kvm_hv(int enable)
2544 {
2545     return kvm_vm_enable_cap(kvm_state, KVM_CAP_PPC_NESTED_HV, 0, enable);
2546 }
2547
2548 bool kvmppc_has_cap_spapr_vfio(void)
2549 {
2550     return cap_spapr_vfio;
2551 }
2552
2553 int kvmppc_get_cap_large_decr(void)
2554 {
2555     return cap_large_decr;
2556 }
2557
2558 int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable)
2559 {
2560     CPUState *cs = CPU(cpu);
2561     uint64_t lpcr;
2562
2563     kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2564     /* Do we need to modify the LPCR? */
2565     if (!!(lpcr & LPCR_LD) != !!enable) {
2566         if (enable) {
2567             lpcr |= LPCR_LD;
2568         } else {
2569             lpcr &= ~LPCR_LD;
2570         }
2571         kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2572         kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr);
2573
2574         if (!!(lpcr & LPCR_LD) != !!enable) {
2575             return -1;
2576         }
2577     }
2578
2579     return 0;
2580 }
2581
2582 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2583 {
2584     uint32_t host_pvr = mfpvr();
2585     PowerPCCPUClass *pvr_pcc;
2586
2587     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2588     if (pvr_pcc == NULL) {
2589         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2590     }
2591
2592     return pvr_pcc;
2593 }
2594
2595 static int kvm_ppc_register_host_cpu_type(MachineState *ms)
2596 {
2597     TypeInfo type_info = {
2598         .name = TYPE_HOST_POWERPC_CPU,
2599         .class_init = kvmppc_host_cpu_class_init,
2600     };
2601     MachineClass *mc = MACHINE_GET_CLASS(ms);
2602     PowerPCCPUClass *pvr_pcc;
2603     ObjectClass *oc;
2604     DeviceClass *dc;
2605     int i;
2606
2607     pvr_pcc = kvm_ppc_get_host_cpu_class();
2608     if (pvr_pcc == NULL) {
2609         return -1;
2610     }
2611     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2612     type_register(&type_info);
2613     if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) {
2614         /* override TCG default cpu type with 'host' cpu model */
2615         mc->default_cpu_type = TYPE_HOST_POWERPC_CPU;
2616     }
2617
2618     oc = object_class_by_name(type_info.name);
2619     g_assert(oc);
2620
2621     /*
2622      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2623      * we want "POWER8" to be a "family" alias that points to the current
2624      * host CPU type, too)
2625      */
2626     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2627     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2628         if (strcasecmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2629             char *suffix;
2630
2631             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2632             suffix = strstr(ppc_cpu_aliases[i].model, POWERPC_CPU_TYPE_SUFFIX);
2633             if (suffix) {
2634                 *suffix = 0;
2635             }
2636             break;
2637         }
2638     }
2639
2640     return 0;
2641 }
2642
2643 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2644 {
2645     struct kvm_rtas_token_args args = {
2646         .token = token,
2647     };
2648
2649     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2650         return -ENOENT;
2651     }
2652
2653     strncpy(args.name, function, sizeof(args.name) - 1);
2654
2655     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2656 }
2657
2658 int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp)
2659 {
2660     struct kvm_get_htab_fd s = {
2661         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2662         .start_index = index,
2663     };
2664     int ret;
2665
2666     if (!cap_htab_fd) {
2667         error_setg(errp, "KVM version doesn't support %s the HPT",
2668                    write ? "writing" : "reading");
2669         return -ENOTSUP;
2670     }
2671
2672     ret = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2673     if (ret < 0) {
2674         error_setg(errp, "Unable to open fd for %s HPT %s KVM: %s",
2675                    write ? "writing" : "reading", write ? "to" : "from",
2676                    strerror(errno));
2677         return -errno;
2678     }
2679
2680     return ret;
2681 }
2682
2683 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2684 {
2685     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2686     uint8_t buf[bufsize];
2687     ssize_t rc;
2688
2689     do {
2690         rc = read(fd, buf, bufsize);
2691         if (rc < 0) {
2692             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2693                     strerror(errno));
2694             return rc;
2695         } else if (rc) {
2696             uint8_t *buffer = buf;
2697             ssize_t n = rc;
2698             while (n) {
2699                 struct kvm_get_htab_header *head =
2700                     (struct kvm_get_htab_header *) buffer;
2701                 size_t chunksize = sizeof(*head) +
2702                      HASH_PTE_SIZE_64 * head->n_valid;
2703
2704                 qemu_put_be32(f, head->index);
2705                 qemu_put_be16(f, head->n_valid);
2706                 qemu_put_be16(f, head->n_invalid);
2707                 qemu_put_buffer(f, (void *)(head + 1),
2708                                 HASH_PTE_SIZE_64 * head->n_valid);
2709
2710                 buffer += chunksize;
2711                 n -= chunksize;
2712             }
2713         }
2714     } while ((rc != 0)
2715              && ((max_ns < 0) ||
2716                  ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2717
2718     return (rc == 0) ? 1 : 0;
2719 }
2720
2721 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2722                            uint16_t n_valid, uint16_t n_invalid)
2723 {
2724     struct kvm_get_htab_header *buf;
2725     size_t chunksize = sizeof(*buf) + n_valid * HASH_PTE_SIZE_64;
2726     ssize_t rc;
2727
2728     buf = alloca(chunksize);
2729     buf->index = index;
2730     buf->n_valid = n_valid;
2731     buf->n_invalid = n_invalid;
2732
2733     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64 * n_valid);
2734
2735     rc = write(fd, buf, chunksize);
2736     if (rc < 0) {
2737         fprintf(stderr, "Error writing KVM hash table: %s\n",
2738                 strerror(errno));
2739         return rc;
2740     }
2741     if (rc != chunksize) {
2742         /* We should never get a short write on a single chunk */
2743         fprintf(stderr, "Short write, restoring KVM hash table\n");
2744         return -1;
2745     }
2746     return 0;
2747 }
2748
2749 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2750 {
2751     return true;
2752 }
2753
2754 void kvm_arch_init_irq_routing(KVMState *s)
2755 {
2756 }
2757
2758 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2759 {
2760     int fd, rc;
2761     int i;
2762
2763     fd = kvmppc_get_htab_fd(false, ptex, &error_abort);
2764
2765     i = 0;
2766     while (i < n) {
2767         struct kvm_get_htab_header *hdr;
2768         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2769         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2770
2771         rc = read(fd, buf, sizeof(buf));
2772         if (rc < 0) {
2773             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2774         }
2775
2776         hdr = (struct kvm_get_htab_header *)buf;
2777         while ((i < n) && ((char *)hdr < (buf + rc))) {
2778             int invalid = hdr->n_invalid, valid = hdr->n_valid;
2779
2780             if (hdr->index != (ptex + i)) {
2781                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2782                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2783             }
2784
2785             if (n - i < valid) {
2786                 valid = n - i;
2787             }
2788             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * valid);
2789             i += valid;
2790
2791             if ((n - i) < invalid) {
2792                 invalid = n - i;
2793             }
2794             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2795             i += invalid;
2796
2797             hdr = (struct kvm_get_htab_header *)
2798                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2799         }
2800     }
2801
2802     close(fd);
2803 }
2804
2805 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2806 {
2807     int fd, rc;
2808     struct {
2809         struct kvm_get_htab_header hdr;
2810         uint64_t pte0;
2811         uint64_t pte1;
2812     } buf;
2813
2814     fd = kvmppc_get_htab_fd(true, 0 /* Ignored */, &error_abort);
2815
2816     buf.hdr.n_valid = 1;
2817     buf.hdr.n_invalid = 0;
2818     buf.hdr.index = ptex;
2819     buf.pte0 = cpu_to_be64(pte0);
2820     buf.pte1 = cpu_to_be64(pte1);
2821
2822     rc = write(fd, &buf, sizeof(buf));
2823     if (rc != sizeof(buf)) {
2824         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2825     }
2826     close(fd);
2827 }
2828
2829 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2830                              uint64_t address, uint32_t data, PCIDevice *dev)
2831 {
2832     return 0;
2833 }
2834
2835 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2836                                 int vector, PCIDevice *dev)
2837 {
2838     return 0;
2839 }
2840
2841 int kvm_arch_release_virq_post(int virq)
2842 {
2843     return 0;
2844 }
2845
2846 int kvm_arch_msi_data_to_gsi(uint32_t data)
2847 {
2848     return data & 0xffff;
2849 }
2850
2851 int kvmppc_enable_hwrng(void)
2852 {
2853     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2854         return -1;
2855     }
2856
2857     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2858 }
2859
2860 void kvmppc_check_papr_resize_hpt(Error **errp)
2861 {
2862     if (!kvm_enabled()) {
2863         return; /* No KVM, we're good */
2864     }
2865
2866     if (cap_resize_hpt) {
2867         return; /* Kernel has explicit support, we're good */
2868     }
2869
2870     /* Otherwise fallback on looking for PR KVM */
2871     if (kvmppc_is_pr(kvm_state)) {
2872         return;
2873     }
2874
2875     error_setg(errp,
2876                "Hash page table resizing not available with this KVM version");
2877 }
2878
2879 int kvmppc_resize_hpt_prepare(PowerPCCPU *cpu, target_ulong flags, int shift)
2880 {
2881     CPUState *cs = CPU(cpu);
2882     struct kvm_ppc_resize_hpt rhpt = {
2883         .flags = flags,
2884         .shift = shift,
2885     };
2886
2887     if (!cap_resize_hpt) {
2888         return -ENOSYS;
2889     }
2890
2891     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_PREPARE, &rhpt);
2892 }
2893
2894 int kvmppc_resize_hpt_commit(PowerPCCPU *cpu, target_ulong flags, int shift)
2895 {
2896     CPUState *cs = CPU(cpu);
2897     struct kvm_ppc_resize_hpt rhpt = {
2898         .flags = flags,
2899         .shift = shift,
2900     };
2901
2902     if (!cap_resize_hpt) {
2903         return -ENOSYS;
2904     }
2905
2906     return kvm_vm_ioctl(cs->kvm_state, KVM_PPC_RESIZE_HPT_COMMIT, &rhpt);
2907 }
2908
2909 /*
2910  * This is a helper function to detect a post migration scenario
2911  * in which a guest, running as KVM-HV, freezes in cpu_post_load because
2912  * the guest kernel can't handle a PVR value other than the actual host
2913  * PVR in KVM_SET_SREGS, even if pvr_match() returns true.
2914  *
2915  * If we don't have cap_ppc_pvr_compat and we're not running in PR
2916  * (so, we're HV), return true. The workaround itself is done in
2917  * cpu_post_load.
2918  *
2919  * The order here is important: we'll only check for KVM PR as a
2920  * fallback if the guest kernel can't handle the situation itself.
2921  * We need to avoid as much as possible querying the running KVM type
2922  * in QEMU level.
2923  */
2924 bool kvmppc_pvr_workaround_required(PowerPCCPU *cpu)
2925 {
2926     CPUState *cs = CPU(cpu);
2927
2928     if (!kvm_enabled()) {
2929         return false;
2930     }
2931
2932     if (cap_ppc_pvr_compat) {
2933         return false;
2934     }
2935
2936     return !kvmppc_is_pr(cs->kvm_state);
2937 }
2938
2939 void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online)
2940 {
2941     CPUState *cs = CPU(cpu);
2942
2943     if (kvm_enabled()) {
2944         kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online);
2945     }
2946 }
2947
2948 void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset)
2949 {
2950     CPUState *cs = CPU(cpu);
2951
2952     if (kvm_enabled()) {
2953         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &tb_offset);
2954     }
2955 }