target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "cpu-models.h"
  28 #include "qemu/timer.h"
  29 #include "sysemu/sysemu.h"
  30 #include "sysemu/hw_accel.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/spapr_cpu_core.h"
  40 #include "hw/ppc/ppc.h"
  41 #include "sysemu/watchdog.h"
  42 #include "trace.h"
  43 #include "exec/gdbstub.h"
  44 #include "exec/memattrs.h"
  45 #include "exec/ram_addr.h"
  46 #include "sysemu/hostmem.h"
  47 #include "qemu/cutils.h"
  48 #include "qemu/mmap-alloc.h"
  49 #if defined(TARGET_PPC64)
  50 #include "hw/ppc/spapr_cpu_core.h"
  51 #endif
  52 #include "elf.h"
  53 #include "sysemu/kvm_int.h"
  54
  55 //#define DEBUG_KVM
  56
  57 #ifdef DEBUG_KVM
  58 #define DPRINTF(fmt, ...) \
  59     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  60 #else
  61 #define DPRINTF(fmt, ...) \
  62     do { } while (0)
  63 #endif
  64
  65 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  66
  67 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  68     KVM_CAP_LAST_INFO
  69 };
  70
  71 static int cap_interrupt_unset = false;
  72 static int cap_interrupt_level = false;
  73 static int cap_segstate;
  74 static int cap_booke_sregs;
  75 static int cap_ppc_smt;
  76 static int cap_ppc_rma;
  77 static int cap_spapr_tce;
  78 static int cap_spapr_tce_64;
  79 static int cap_spapr_multitce;
  80 static int cap_spapr_vfio;
  81 static int cap_hior;
  82 static int cap_one_reg;
  83 static int cap_epr;
  84 static int cap_ppc_watchdog;
  85 static int cap_papr;
  86 static int cap_htab_fd;
  87 static int cap_fixup_hcalls;
  88 static int cap_htm;             /* Hardware transactional memory support */
  89
  90 static uint32_t debug_inst_opcode;
  91
  92 /* XXX We have a race condition where we actually have a level triggered
  93  *     interrupt, but the infrastructure can't expose that yet, so the guest
  94  *     takes but ignores it, goes to sleep and never gets notified that there's
  95  *     still an interrupt pending.
  96  *
  97  *     As a quick workaround, let's just wake up again 20 ms after we injected
  98  *     an interrupt. That way we can assure that we're always reinjecting
  99  *     interrupts in case the guest swallowed them.
 100  */
 101 static QEMUTimer *idle_timer;
 102
 103 static void kvm_kick_cpu(void *opaque)
 104 {
 105     PowerPCCPU *cpu = opaque;
 106
 107     qemu_cpu_kick(CPU(cpu));
 108 }
 109
 110 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 111  * should only be used for fallback tests - generally we should use
 112  * explicit capabilities for the features we want, rather than
 113  * assuming what is/isn't available depending on the KVM variant. */
 114 static bool kvmppc_is_pr(KVMState *ks)
 115 {
 116     /* Assume KVM-PR if the GET_PVINFO capability is available */
 117     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 118 }
 119
 120 static int kvm_ppc_register_host_cpu_type(void);
 121
 122 int kvm_arch_init(MachineState *ms, KVMState *s)
 123 {
 124     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 125     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 126     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 127     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 128     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 129     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 130     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 131     cap_spapr_tce_64 = kvm_check_extension(s, KVM_CAP_SPAPR_TCE_64);
 132     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 133     cap_spapr_vfio = false;
 134     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 135     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 136     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 137     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 138     /* Note: we don't set cap_papr here, because this capability is
 139      * only activated after this by kvmppc_set_papr() */
 140     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 141     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 142     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 143
 144     if (!cap_interrupt_level) {
 145         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 146                         "VM to stall at times!\n");
 147     }
 148
 149     kvm_ppc_register_host_cpu_type();
 150
 151     return 0;
 152 }
 153
 154 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 155 {
 156     return 0;
 157 }
 158
 159 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 160 {
 161     CPUPPCState *cenv = &cpu->env;
 162     CPUState *cs = CPU(cpu);
 163     struct kvm_sregs sregs;
 164     int ret;
 165
 166     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 167         /* What we're really trying to say is "if we're on BookE, we use
 168            the native PVR for now". This is the only sane way to check
 169            it though, so we potentially confuse users that they can run
 170            BookE guests on BookS. Let's hope nobody dares enough :) */
 171         return 0;
 172     } else {
 173         if (!cap_segstate) {
 174             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 175             return -ENOSYS;
 176         }
 177     }
 178
 179     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 180     if (ret) {
 181         return ret;
 182     }
 183
 184     sregs.pvr = cenv->spr[SPR_PVR];
 185     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 186 }
 187
 188 /* Set up a shared TLB array with KVM */
 189 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 190 {
 191     CPUPPCState *env = &cpu->env;
 192     CPUState *cs = CPU(cpu);
 193     struct kvm_book3e_206_tlb_params params = {};
 194     struct kvm_config_tlb cfg = {};
 195     unsigned int entries = 0;
 196     int ret, i;
 197
 198     if (!kvm_enabled() ||
 199         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 200         return 0;
 201     }
 202
 203     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 204
 205     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 206         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 207         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 208         entries += params.tlb_sizes[i];
 209     }
 210
 211     assert(entries == env->nb_tlb);
 212     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 213
 214     env->tlb_dirty = true;
 215
 216     cfg.array = (uintptr_t)env->tlb.tlbm;
 217     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 218     cfg.params = (uintptr_t)&params;
 219     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 220
 221     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 222     if (ret < 0) {
 223         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 224                 __func__, strerror(-ret));
 225         return ret;
 226     }
 227
 228     env->kvm_sw_tlb = true;
 229     return 0;
 230 }
 231
 232
 233 #if defined(TARGET_PPC64)
 234 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 235                                        struct kvm_ppc_smmu_info *info)
 236 {
 237     CPUPPCState *env = &cpu->env;
 238     CPUState *cs = CPU(cpu);
 239
 240     memset(info, 0, sizeof(*info));
 241
 242     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 243      * need to "guess" what the supported page sizes are.
 244      *
 245      * For that to work we make a few assumptions:
 246      *
 247      * - Check whether we are running "PR" KVM which only supports 4K
 248      *   and 16M pages, but supports them regardless of the backing
 249      *   store characteritics. We also don't support 1T segments.
 250      *
 251      *   This is safe as if HV KVM ever supports that capability or PR
 252      *   KVM grows supports for more page/segment sizes, those versions
 253      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 254      *   will not hit this fallback
 255      *
 256      * - Else we are running HV KVM. This means we only support page
 257      *   sizes that fit in the backing store. Additionally we only
 258      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 259      *   P7 encodings for the SLB and hash table. Here too, we assume
 260      *   support for any newer processor will mean a kernel that
 261      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 262      *   this fallback.
 263      */
 264     if (kvmppc_is_pr(cs->kvm_state)) {
 265         /* No flags */
 266         info->flags = 0;
 267         info->slb_size = 64;
 268
 269         /* Standard 4k base page size segment */
 270         info->sps[0].page_shift = 12;
 271         info->sps[0].slb_enc = 0;
 272         info->sps[0].enc[0].page_shift = 12;
 273         info->sps[0].enc[0].pte_enc = 0;
 274
 275         /* Standard 16M large page size segment */
 276         info->sps[1].page_shift = 24;
 277         info->sps[1].slb_enc = SLB_VSID_L;
 278         info->sps[1].enc[0].page_shift = 24;
 279         info->sps[1].enc[0].pte_enc = 0;
 280     } else {
 281         int i = 0;
 282
 283         /* HV KVM has backing store size restrictions */
 284         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 285
 286         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 287             info->flags |= KVM_PPC_1T_SEGMENTS;
 288         }
 289
 290         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 291            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 292             info->slb_size = 32;
 293         } else {
 294             info->slb_size = 64;
 295         }
 296
 297         /* Standard 4k base page size segment */
 298         info->sps[i].page_shift = 12;
 299         info->sps[i].slb_enc = 0;
 300         info->sps[i].enc[0].page_shift = 12;
 301         info->sps[i].enc[0].pte_enc = 0;
 302         i++;
 303
 304         /* 64K on MMU 2.06 and later */
 305         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 306             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 307             info->sps[i].page_shift = 16;
 308             info->sps[i].slb_enc = 0x110;
 309             info->sps[i].enc[0].page_shift = 16;
 310             info->sps[i].enc[0].pte_enc = 1;
 311             i++;
 312         }
 313
 314         /* Standard 16M large page size segment */
 315         info->sps[i].page_shift = 24;
 316         info->sps[i].slb_enc = SLB_VSID_L;
 317         info->sps[i].enc[0].page_shift = 24;
 318         info->sps[i].enc[0].pte_enc = 0;
 319     }
 320 }
 321
 322 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 323 {
 324     CPUState *cs = CPU(cpu);
 325     int ret;
 326
 327     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 328         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 329         if (ret == 0) {
 330             return;
 331         }
 332     }
 333
 334     kvm_get_fallback_smmu_info(cpu, info);
 335 }
 336
 337 struct ppc_radix_page_info *kvm_get_radix_page_info(void)
 338 {
 339     KVMState *s = KVM_STATE(current_machine->accelerator);
 340     struct ppc_radix_page_info *radix_page_info;
 341     struct kvm_ppc_rmmu_info rmmu_info;
 342     int i;
 343
 344     if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) {
 345         return NULL;
 346     }
 347     if (kvm_vm_ioctl(s, KVM_PPC_GET_RMMU_INFO, &rmmu_info)) {
 348         return NULL;
 349     }
 350     radix_page_info = g_malloc0(sizeof(*radix_page_info));
 351     radix_page_info->count = 0;
 352     for (i = 0; i < PPC_PAGE_SIZES_MAX_SZ; i++) {
 353         if (rmmu_info.ap_encodings[i]) {
 354             radix_page_info->entries[i] = rmmu_info.ap_encodings[i];
 355             radix_page_info->count++;
 356         }
 357     }
 358     return radix_page_info;
 359 }
 360
 361 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 362 {
 363     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 364         return true;
 365     }
 366
 367     return (1ul << shift) <= rampgsize;
 368 }
 369
 370 static long max_cpu_page_size;
 371
 372 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 373 {
 374     static struct kvm_ppc_smmu_info smmu_info;
 375     static bool has_smmu_info;
 376     CPUPPCState *env = &cpu->env;
 377     int iq, ik, jq, jk;
 378     bool has_64k_pages = false;
 379
 380     /* We only handle page sizes for 64-bit server guests for now */
 381     if (!(env->mmu_model & POWERPC_MMU_64)) {
 382         return;
 383     }
 384
 385     /* Collect MMU info from kernel if not already */
 386     if (!has_smmu_info) {
 387         kvm_get_smmu_info(cpu, &smmu_info);
 388         has_smmu_info = true;
 389     }
 390
 391     if (!max_cpu_page_size) {
 392         max_cpu_page_size = qemu_getrampagesize();
 393     }
 394
 395     /* Convert to QEMU form */
 396     memset(&env->sps, 0, sizeof(env->sps));
 397
 398     /* If we have HV KVM, we need to forbid CI large pages if our
 399      * host page size is smaller than 64K.
 400      */
 401     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 402         env->ci_large_pages = getpagesize() >= 0x10000;
 403     }
 404
 405     /*
 406      * XXX This loop should be an entry wide AND of the capabilities that
 407      *     the selected CPU has with the capabilities that KVM supports.
 408      */
 409     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 410         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 411         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 412
 413         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 414                                  ksps->page_shift)) {
 415             continue;
 416         }
 417         qsps->page_shift = ksps->page_shift;
 418         qsps->slb_enc = ksps->slb_enc;
 419         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 420             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 421                                      ksps->enc[jk].page_shift)) {
 422                 continue;
 423             }
 424             if (ksps->enc[jk].page_shift == 16) {
 425                 has_64k_pages = true;
 426             }
 427             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 428             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 429             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 430                 break;
 431             }
 432         }
 433         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 434             break;
 435         }
 436     }
 437     env->slb_nr = smmu_info.slb_size;
 438     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 439         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 440     }
 441     if (!has_64k_pages) {
 442         env->mmu_model &= ~POWERPC_MMU_64K;
 443     }
 444 }
 445
 446 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
 447 {
 448     Object *mem_obj = object_resolve_path(obj_path, NULL);
 449     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 450     long pagesize;
 451
 452     if (mempath) {
 453         pagesize = qemu_mempath_getpagesize(mempath);
 454     } else {
 455         pagesize = getpagesize();
 456     }
 457
 458     return pagesize >= max_cpu_page_size;
 459 }
 460
 461 #else /* defined (TARGET_PPC64) */
 462
 463 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 464 {
 465 }
 466
 467 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
 468 {
 469     return true;
 470 }
 471
 472 #endif /* !defined (TARGET_PPC64) */
 473
 474 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 475 {
 476     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 477 }
 478
 479 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 480  * book3s supports only 1 watchpoint, so array size
 481  * of 4 is sufficient for now.
 482  */
 483 #define MAX_HW_BKPTS 4
 484
 485 static struct HWBreakpoint {
 486     target_ulong addr;
 487     int type;
 488 } hw_debug_points[MAX_HW_BKPTS];
 489
 490 static CPUWatchpoint hw_watchpoint;
 491
 492 /* Default there is no breakpoint and watchpoint supported */
 493 static int max_hw_breakpoint;
 494 static int max_hw_watchpoint;
 495 static int nb_hw_breakpoint;
 496 static int nb_hw_watchpoint;
 497
 498 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 499 {
 500     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 501         max_hw_breakpoint = 2;
 502         max_hw_watchpoint = 2;
 503     }
 504
 505     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 506         fprintf(stderr, "Error initializing h/w breakpoints\n");
 507         return;
 508     }
 509 }
 510
 511 int kvm_arch_init_vcpu(CPUState *cs)
 512 {
 513     PowerPCCPU *cpu = POWERPC_CPU(cs);
 514     CPUPPCState *cenv = &cpu->env;
 515     int ret;
 516
 517     /* Gather server mmu info from KVM and update the CPU state */
 518     kvm_fixup_page_sizes(cpu);
 519
 520     /* Synchronize sregs with kvm */
 521     ret = kvm_arch_sync_sregs(cpu);
 522     if (ret) {
 523         if (ret == -EINVAL) {
 524             error_report("Register sync failed... If you're using kvm-hv.ko,"
 525                          " only \"-cpu host\" is possible");
 526         }
 527         return ret;
 528     }
 529
 530     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 531
 532     switch (cenv->mmu_model) {
 533     case POWERPC_MMU_BOOKE206:
 534         /* This target supports access to KVM's guest TLB */
 535         ret = kvm_booke206_tlb_init(cpu);
 536         break;
 537     case POWERPC_MMU_2_07:
 538         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 539             /* KVM-HV has transactional memory on POWER8 also without the
 540              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 541              * long as it's availble to userspace on the host. */
 542             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 543                 cap_htm = true;
 544             }
 545         }
 546         break;
 547     default:
 548         break;
 549     }
 550
 551     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 552     kvmppc_hw_debug_points_init(cenv);
 553
 554     return ret;
 555 }
 556
 557 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 558 {
 559     CPUPPCState *env = &cpu->env;
 560     CPUState *cs = CPU(cpu);
 561     struct kvm_dirty_tlb dirty_tlb;
 562     unsigned char *bitmap;
 563     int ret;
 564
 565     if (!env->kvm_sw_tlb) {
 566         return;
 567     }
 568
 569     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 570     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 571
 572     dirty_tlb.bitmap = (uintptr_t)bitmap;
 573     dirty_tlb.num_dirty = env->nb_tlb;
 574
 575     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 576     if (ret) {
 577         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 578                 __func__, strerror(-ret));
 579     }
 580
 581     g_free(bitmap);
 582 }
 583
 584 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 585 {
 586     PowerPCCPU *cpu = POWERPC_CPU(cs);
 587     CPUPPCState *env = &cpu->env;
 588     union {
 589         uint32_t u32;
 590         uint64_t u64;
 591     } val;
 592     struct kvm_one_reg reg = {
 593         .id = id,
 594         .addr = (uintptr_t) &val,
 595     };
 596     int ret;
 597
 598     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 599     if (ret != 0) {
 600         trace_kvm_failed_spr_get(spr, strerror(errno));
 601     } else {
 602         switch (id & KVM_REG_SIZE_MASK) {
 603         case KVM_REG_SIZE_U32:
 604             env->spr[spr] = val.u32;
 605             break;
 606
 607         case KVM_REG_SIZE_U64:
 608             env->spr[spr] = val.u64;
 609             break;
 610
 611         default:
 612             /* Don't handle this size yet */
 613             abort();
 614         }
 615     }
 616 }
 617
 618 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 619 {
 620     PowerPCCPU *cpu = POWERPC_CPU(cs);
 621     CPUPPCState *env = &cpu->env;
 622     union {
 623         uint32_t u32;
 624         uint64_t u64;
 625     } val;
 626     struct kvm_one_reg reg = {
 627         .id = id,
 628         .addr = (uintptr_t) &val,
 629     };
 630     int ret;
 631
 632     switch (id & KVM_REG_SIZE_MASK) {
 633     case KVM_REG_SIZE_U32:
 634         val.u32 = env->spr[spr];
 635         break;
 636
 637     case KVM_REG_SIZE_U64:
 638         val.u64 = env->spr[spr];
 639         break;
 640
 641     default:
 642         /* Don't handle this size yet */
 643         abort();
 644     }
 645
 646     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 647     if (ret != 0) {
 648         trace_kvm_failed_spr_set(spr, strerror(errno));
 649     }
 650 }
 651
 652 static int kvm_put_fp(CPUState *cs)
 653 {
 654     PowerPCCPU *cpu = POWERPC_CPU(cs);
 655     CPUPPCState *env = &cpu->env;
 656     struct kvm_one_reg reg;
 657     int i;
 658     int ret;
 659
 660     if (env->insns_flags & PPC_FLOAT) {
 661         uint64_t fpscr = env->fpscr;
 662         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 663
 664         reg.id = KVM_REG_PPC_FPSCR;
 665         reg.addr = (uintptr_t)&fpscr;
 666         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 667         if (ret < 0) {
 668             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 669             return ret;
 670         }
 671
 672         for (i = 0; i < 32; i++) {
 673             uint64_t vsr[2];
 674
 675 #ifdef HOST_WORDS_BIGENDIAN
 676             vsr[0] = float64_val(env->fpr[i]);
 677             vsr[1] = env->vsr[i];
 678 #else
 679             vsr[0] = env->vsr[i];
 680             vsr[1] = float64_val(env->fpr[i]);
 681 #endif
 682             reg.addr = (uintptr_t) &vsr;
 683             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 684
 685             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 686             if (ret < 0) {
 687                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 688                         i, strerror(errno));
 689                 return ret;
 690             }
 691         }
 692     }
 693
 694     if (env->insns_flags & PPC_ALTIVEC) {
 695         reg.id = KVM_REG_PPC_VSCR;
 696         reg.addr = (uintptr_t)&env->vscr;
 697         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 698         if (ret < 0) {
 699             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 700             return ret;
 701         }
 702
 703         for (i = 0; i < 32; i++) {
 704             reg.id = KVM_REG_PPC_VR(i);
 705             reg.addr = (uintptr_t)&env->avr[i];
 706             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 707             if (ret < 0) {
 708                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 709                 return ret;
 710             }
 711         }
 712     }
 713
 714     return 0;
 715 }
 716
 717 static int kvm_get_fp(CPUState *cs)
 718 {
 719     PowerPCCPU *cpu = POWERPC_CPU(cs);
 720     CPUPPCState *env = &cpu->env;
 721     struct kvm_one_reg reg;
 722     int i;
 723     int ret;
 724
 725     if (env->insns_flags & PPC_FLOAT) {
 726         uint64_t fpscr;
 727         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 728
 729         reg.id = KVM_REG_PPC_FPSCR;
 730         reg.addr = (uintptr_t)&fpscr;
 731         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 732         if (ret < 0) {
 733             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 734             return ret;
 735         } else {
 736             env->fpscr = fpscr;
 737         }
 738
 739         for (i = 0; i < 32; i++) {
 740             uint64_t vsr[2];
 741
 742             reg.addr = (uintptr_t) &vsr;
 743             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 744
 745             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 746             if (ret < 0) {
 747                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 748                         vsx ? "VSR" : "FPR", i, strerror(errno));
 749                 return ret;
 750             } else {
 751 #ifdef HOST_WORDS_BIGENDIAN
 752                 env->fpr[i] = vsr[0];
 753                 if (vsx) {
 754                     env->vsr[i] = vsr[1];
 755                 }
 756 #else
 757                 env->fpr[i] = vsr[1];
 758                 if (vsx) {
 759                     env->vsr[i] = vsr[0];
 760                 }
 761 #endif
 762             }
 763         }
 764     }
 765
 766     if (env->insns_flags & PPC_ALTIVEC) {
 767         reg.id = KVM_REG_PPC_VSCR;
 768         reg.addr = (uintptr_t)&env->vscr;
 769         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 770         if (ret < 0) {
 771             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 772             return ret;
 773         }
 774
 775         for (i = 0; i < 32; i++) {
 776             reg.id = KVM_REG_PPC_VR(i);
 777             reg.addr = (uintptr_t)&env->avr[i];
 778             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 779             if (ret < 0) {
 780                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 781                         i, strerror(errno));
 782                 return ret;
 783             }
 784         }
 785     }
 786
 787     return 0;
 788 }
 789
 790 #if defined(TARGET_PPC64)
 791 static int kvm_get_vpa(CPUState *cs)
 792 {
 793     PowerPCCPU *cpu = POWERPC_CPU(cs);
 794     CPUPPCState *env = &cpu->env;
 795     struct kvm_one_reg reg;
 796     int ret;
 797
 798     reg.id = KVM_REG_PPC_VPA_ADDR;
 799     reg.addr = (uintptr_t)&env->vpa_addr;
 800     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 801     if (ret < 0) {
 802         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 803         return ret;
 804     }
 805
 806     assert((uintptr_t)&env->slb_shadow_size
 807            == ((uintptr_t)&env->slb_shadow_addr + 8));
 808     reg.id = KVM_REG_PPC_VPA_SLB;
 809     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 810     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 811     if (ret < 0) {
 812         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 813                 strerror(errno));
 814         return ret;
 815     }
 816
 817     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 818     reg.id = KVM_REG_PPC_VPA_DTL;
 819     reg.addr = (uintptr_t)&env->dtl_addr;
 820     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 821     if (ret < 0) {
 822         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 823                 strerror(errno));
 824         return ret;
 825     }
 826
 827     return 0;
 828 }
 829
 830 static int kvm_put_vpa(CPUState *cs)
 831 {
 832     PowerPCCPU *cpu = POWERPC_CPU(cs);
 833     CPUPPCState *env = &cpu->env;
 834     struct kvm_one_reg reg;
 835     int ret;
 836
 837     /* SLB shadow or DTL can't be registered unless a master VPA is
 838      * registered.  That means when restoring state, if a VPA *is*
 839      * registered, we need to set that up first.  If not, we need to
 840      * deregister the others before deregistering the master VPA */
 841     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 842
 843     if (env->vpa_addr) {
 844         reg.id = KVM_REG_PPC_VPA_ADDR;
 845         reg.addr = (uintptr_t)&env->vpa_addr;
 846         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 847         if (ret < 0) {
 848             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 849             return ret;
 850         }
 851     }
 852
 853     assert((uintptr_t)&env->slb_shadow_size
 854            == ((uintptr_t)&env->slb_shadow_addr + 8));
 855     reg.id = KVM_REG_PPC_VPA_SLB;
 856     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 857     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 858     if (ret < 0) {
 859         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 860         return ret;
 861     }
 862
 863     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 864     reg.id = KVM_REG_PPC_VPA_DTL;
 865     reg.addr = (uintptr_t)&env->dtl_addr;
 866     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 867     if (ret < 0) {
 868         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 869                 strerror(errno));
 870         return ret;
 871     }
 872
 873     if (!env->vpa_addr) {
 874         reg.id = KVM_REG_PPC_VPA_ADDR;
 875         reg.addr = (uintptr_t)&env->vpa_addr;
 876         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 877         if (ret < 0) {
 878             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 879             return ret;
 880         }
 881     }
 882
 883     return 0;
 884 }
 885 #endif /* TARGET_PPC64 */
 886
 887 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 888 {
 889     CPUPPCState *env = &cpu->env;
 890     struct kvm_sregs sregs;
 891     int i;
 892
 893     sregs.pvr = env->spr[SPR_PVR];
 894
 895     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 896
 897     /* Sync SLB */
 898 #ifdef TARGET_PPC64
 899     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 900         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 901         if (env->slb[i].esid & SLB_ESID_V) {
 902             sregs.u.s.ppc64.slb[i].slbe |= i;
 903         }
 904         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 905     }
 906 #endif
 907
 908     /* Sync SRs */
 909     for (i = 0; i < 16; i++) {
 910         sregs.u.s.ppc32.sr[i] = env->sr[i];
 911     }
 912
 913     /* Sync BATs */
 914     for (i = 0; i < 8; i++) {
 915         /* Beware. We have to swap upper and lower bits here */
 916         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 917             | env->DBAT[1][i];
 918         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 919             | env->IBAT[1][i];
 920     }
 921
 922     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 923 }
 924
 925 int kvm_arch_put_registers(CPUState *cs, int level)
 926 {
 927     PowerPCCPU *cpu = POWERPC_CPU(cs);
 928     CPUPPCState *env = &cpu->env;
 929     struct kvm_regs regs;
 930     int ret;
 931     int i;
 932
 933     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 934     if (ret < 0) {
 935         return ret;
 936     }
 937
 938     regs.ctr = env->ctr;
 939     regs.lr  = env->lr;
 940     regs.xer = cpu_read_xer(env);
 941     regs.msr = env->msr;
 942     regs.pc = env->nip;
 943
 944     regs.srr0 = env->spr[SPR_SRR0];
 945     regs.srr1 = env->spr[SPR_SRR1];
 946
 947     regs.sprg0 = env->spr[SPR_SPRG0];
 948     regs.sprg1 = env->spr[SPR_SPRG1];
 949     regs.sprg2 = env->spr[SPR_SPRG2];
 950     regs.sprg3 = env->spr[SPR_SPRG3];
 951     regs.sprg4 = env->spr[SPR_SPRG4];
 952     regs.sprg5 = env->spr[SPR_SPRG5];
 953     regs.sprg6 = env->spr[SPR_SPRG6];
 954     regs.sprg7 = env->spr[SPR_SPRG7];
 955
 956     regs.pid = env->spr[SPR_BOOKE_PID];
 957
 958     for (i = 0;i < 32; i++)
 959         regs.gpr[i] = env->gpr[i];
 960
 961     regs.cr = 0;
 962     for (i = 0; i < 8; i++) {
 963         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 964     }
 965
 966     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 967     if (ret < 0)
 968         return ret;
 969
 970     kvm_put_fp(cs);
 971
 972     if (env->tlb_dirty) {
 973         kvm_sw_tlb_put(cpu);
 974         env->tlb_dirty = false;
 975     }
 976
 977     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 978         ret = kvmppc_put_books_sregs(cpu);
 979         if (ret < 0) {
 980             return ret;
 981         }
 982     }
 983
 984     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 985         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 986     }
 987
 988     if (cap_one_reg) {
 989         int i;
 990
 991         /* We deliberately ignore errors here, for kernels which have
 992          * the ONE_REG calls, but don't support the specific
 993          * registers, there's a reasonable chance things will still
 994          * work, at least until we try to migrate. */
 995         for (i = 0; i < 1024; i++) {
 996             uint64_t id = env->spr_cb[i].one_reg_id;
 997
 998             if (id != 0) {
 999                 kvm_put_one_spr(cs, id, i);
1000             }
1001         }
1002
1003 #ifdef TARGET_PPC64
1004         if (msr_ts) {
1005             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1006                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1007             }
1008             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1009                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1010             }
1011             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1012             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1013             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1014             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1015             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1016             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1017             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1018             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1019             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1020             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1021         }
1022
1023         if (cap_papr) {
1024             if (kvm_put_vpa(cs) < 0) {
1025                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1026             }
1027         }
1028
1029         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1030 #endif /* TARGET_PPC64 */
1031     }
1032
1033     return ret;
1034 }
1035
1036 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1037 {
1038      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1039 }
1040
1041 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1042 {
1043     CPUPPCState *env = &cpu->env;
1044     struct kvm_sregs sregs;
1045     int ret;
1046
1047     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1048     if (ret < 0) {
1049         return ret;
1050     }
1051
1052     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1053         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1054         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1055         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1056         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1057         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1058         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1059         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1060         env->spr[SPR_DECR] = sregs.u.e.dec;
1061         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1062         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1063         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1064     }
1065
1066     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1067         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1068         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1069         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1070         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1071         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1072     }
1073
1074     if (sregs.u.e.features & KVM_SREGS_E_64) {
1075         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1076     }
1077
1078     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1079         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1080     }
1081
1082     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1083         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1084         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1085         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1086         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1087         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1088         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1089         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1090         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1091         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1092         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1093         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1094         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1095         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1096         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1097         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1098         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1099         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1100         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1101         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1102         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1103         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1104         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1105         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1106         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1107         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1108         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1109         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1110         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1111         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1112         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1113         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1114         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1115
1116         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1117             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1118             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1119             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1120             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1121             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1122             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1123         }
1124
1125         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1126             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1127             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1128         }
1129
1130         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1131             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1132             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1133             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1134             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1135         }
1136     }
1137
1138     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1139         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1140         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1141         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1142         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1143         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1144         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1145         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1146         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1147         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1148         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1149     }
1150
1151     if (sregs.u.e.features & KVM_SREGS_EXP) {
1152         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1153     }
1154
1155     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1156         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1157         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1158     }
1159
1160     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1161         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1162         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1163         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1164
1165         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1166             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1167             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1168         }
1169     }
1170
1171     return 0;
1172 }
1173
1174 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1175 {
1176     CPUPPCState *env = &cpu->env;
1177     struct kvm_sregs sregs;
1178     int ret;
1179     int i;
1180
1181     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1182     if (ret < 0) {
1183         return ret;
1184     }
1185
1186     if (!cpu->vhyp) {
1187         ppc_store_sdr1(env, sregs.u.s.sdr1);
1188     }
1189
1190     /* Sync SLB */
1191 #ifdef TARGET_PPC64
1192     /*
1193      * The packed SLB array we get from KVM_GET_SREGS only contains
1194      * information about valid entries. So we flush our internal copy
1195      * to get rid of stale ones, then put all valid SLB entries back
1196      * in.
1197      */
1198     memset(env->slb, 0, sizeof(env->slb));
1199     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1200         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1201         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1202         /*
1203          * Only restore valid entries
1204          */
1205         if (rb & SLB_ESID_V) {
1206             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1207         }
1208     }
1209 #endif
1210
1211     /* Sync SRs */
1212     for (i = 0; i < 16; i++) {
1213         env->sr[i] = sregs.u.s.ppc32.sr[i];
1214     }
1215
1216     /* Sync BATs */
1217     for (i = 0; i < 8; i++) {
1218         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1219         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1220         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1221         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1222     }
1223
1224     return 0;
1225 }
1226
1227 int kvm_arch_get_registers(CPUState *cs)
1228 {
1229     PowerPCCPU *cpu = POWERPC_CPU(cs);
1230     CPUPPCState *env = &cpu->env;
1231     struct kvm_regs regs;
1232     uint32_t cr;
1233     int i, ret;
1234
1235     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1236     if (ret < 0)
1237         return ret;
1238
1239     cr = regs.cr;
1240     for (i = 7; i >= 0; i--) {
1241         env->crf[i] = cr & 15;
1242         cr >>= 4;
1243     }
1244
1245     env->ctr = regs.ctr;
1246     env->lr = regs.lr;
1247     cpu_write_xer(env, regs.xer);
1248     env->msr = regs.msr;
1249     env->nip = regs.pc;
1250
1251     env->spr[SPR_SRR0] = regs.srr0;
1252     env->spr[SPR_SRR1] = regs.srr1;
1253
1254     env->spr[SPR_SPRG0] = regs.sprg0;
1255     env->spr[SPR_SPRG1] = regs.sprg1;
1256     env->spr[SPR_SPRG2] = regs.sprg2;
1257     env->spr[SPR_SPRG3] = regs.sprg3;
1258     env->spr[SPR_SPRG4] = regs.sprg4;
1259     env->spr[SPR_SPRG5] = regs.sprg5;
1260     env->spr[SPR_SPRG6] = regs.sprg6;
1261     env->spr[SPR_SPRG7] = regs.sprg7;
1262
1263     env->spr[SPR_BOOKE_PID] = regs.pid;
1264
1265     for (i = 0;i < 32; i++)
1266         env->gpr[i] = regs.gpr[i];
1267
1268     kvm_get_fp(cs);
1269
1270     if (cap_booke_sregs) {
1271         ret = kvmppc_get_booke_sregs(cpu);
1272         if (ret < 0) {
1273             return ret;
1274         }
1275     }
1276
1277     if (cap_segstate) {
1278         ret = kvmppc_get_books_sregs(cpu);
1279         if (ret < 0) {
1280             return ret;
1281         }
1282     }
1283
1284     if (cap_hior) {
1285         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1286     }
1287
1288     if (cap_one_reg) {
1289         int i;
1290
1291         /* We deliberately ignore errors here, for kernels which have
1292          * the ONE_REG calls, but don't support the specific
1293          * registers, there's a reasonable chance things will still
1294          * work, at least until we try to migrate. */
1295         for (i = 0; i < 1024; i++) {
1296             uint64_t id = env->spr_cb[i].one_reg_id;
1297
1298             if (id != 0) {
1299                 kvm_get_one_spr(cs, id, i);
1300             }
1301         }
1302
1303 #ifdef TARGET_PPC64
1304         if (msr_ts) {
1305             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1306                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1307             }
1308             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1309                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1310             }
1311             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1312             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1313             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1314             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1315             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1316             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1317             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1318             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1319             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1320             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1321         }
1322
1323         if (cap_papr) {
1324             if (kvm_get_vpa(cs) < 0) {
1325                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1326             }
1327         }
1328
1329         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1330 #endif
1331     }
1332
1333     return 0;
1334 }
1335
1336 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1337 {
1338     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1339
1340     if (irq != PPC_INTERRUPT_EXT) {
1341         return 0;
1342     }
1343
1344     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1345         return 0;
1346     }
1347
1348     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1349
1350     return 0;
1351 }
1352
1353 #if defined(TARGET_PPCEMB)
1354 #define PPC_INPUT_INT PPC40x_INPUT_INT
1355 #elif defined(TARGET_PPC64)
1356 #define PPC_INPUT_INT PPC970_INPUT_INT
1357 #else
1358 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1359 #endif
1360
1361 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1362 {
1363     PowerPCCPU *cpu = POWERPC_CPU(cs);
1364     CPUPPCState *env = &cpu->env;
1365     int r;
1366     unsigned irq;
1367
1368     qemu_mutex_lock_iothread();
1369
1370     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1371      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1372     if (!cap_interrupt_level &&
1373         run->ready_for_interrupt_injection &&
1374         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1375         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1376     {
1377         /* For now KVM disregards the 'irq' argument. However, in the
1378          * future KVM could cache it in-kernel to avoid a heavyweight exit
1379          * when reading the UIC.
1380          */
1381         irq = KVM_INTERRUPT_SET;
1382
1383         DPRINTF("injected interrupt %d\n", irq);
1384         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1385         if (r < 0) {
1386             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1387         }
1388
1389         /* Always wake up soon in case the interrupt was level based */
1390         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1391                        (NANOSECONDS_PER_SECOND / 50));
1392     }
1393
1394     /* We don't know if there are more interrupts pending after this. However,
1395      * the guest will return to userspace in the course of handling this one
1396      * anyways, so we will get a chance to deliver the rest. */
1397
1398     qemu_mutex_unlock_iothread();
1399 }
1400
1401 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1402 {
1403     return MEMTXATTRS_UNSPECIFIED;
1404 }
1405
1406 int kvm_arch_process_async_events(CPUState *cs)
1407 {
1408     return cs->halted;
1409 }
1410
1411 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1412 {
1413     CPUState *cs = CPU(cpu);
1414     CPUPPCState *env = &cpu->env;
1415
1416     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1417         cs->halted = 1;
1418         cs->exception_index = EXCP_HLT;
1419     }
1420
1421     return 0;
1422 }
1423
1424 /* map dcr access to existing qemu dcr emulation */
1425 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1426 {
1427     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1428         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1429
1430     return 0;
1431 }
1432
1433 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1434 {
1435     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1436         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1437
1438     return 0;
1439 }
1440
1441 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1442 {
1443     /* Mixed endian case is not handled */
1444     uint32_t sc = debug_inst_opcode;
1445
1446     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1447                             sizeof(sc), 0) ||
1448         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1449         return -EINVAL;
1450     }
1451
1452     return 0;
1453 }
1454
1455 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1456 {
1457     uint32_t sc;
1458
1459     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1460         sc != debug_inst_opcode ||
1461         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1462                             sizeof(sc), 1)) {
1463         return -EINVAL;
1464     }
1465
1466     return 0;
1467 }
1468
1469 static int find_hw_breakpoint(target_ulong addr, int type)
1470 {
1471     int n;
1472
1473     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1474            <= ARRAY_SIZE(hw_debug_points));
1475
1476     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1477         if (hw_debug_points[n].addr == addr &&
1478              hw_debug_points[n].type == type) {
1479             return n;
1480         }
1481     }
1482
1483     return -1;
1484 }
1485
1486 static int find_hw_watchpoint(target_ulong addr, int *flag)
1487 {
1488     int n;
1489
1490     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1491     if (n >= 0) {
1492         *flag = BP_MEM_ACCESS;
1493         return n;
1494     }
1495
1496     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1497     if (n >= 0) {
1498         *flag = BP_MEM_WRITE;
1499         return n;
1500     }
1501
1502     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1503     if (n >= 0) {
1504         *flag = BP_MEM_READ;
1505         return n;
1506     }
1507
1508     return -1;
1509 }
1510
1511 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1512                                   target_ulong len, int type)
1513 {
1514     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1515         return -ENOBUFS;
1516     }
1517
1518     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1519     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1520
1521     switch (type) {
1522     case GDB_BREAKPOINT_HW:
1523         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1524             return -ENOBUFS;
1525         }
1526
1527         if (find_hw_breakpoint(addr, type) >= 0) {
1528             return -EEXIST;
1529         }
1530
1531         nb_hw_breakpoint++;
1532         break;
1533
1534     case GDB_WATCHPOINT_WRITE:
1535     case GDB_WATCHPOINT_READ:
1536     case GDB_WATCHPOINT_ACCESS:
1537         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1538             return -ENOBUFS;
1539         }
1540
1541         if (find_hw_breakpoint(addr, type) >= 0) {
1542             return -EEXIST;
1543         }
1544
1545         nb_hw_watchpoint++;
1546         break;
1547
1548     default:
1549         return -ENOSYS;
1550     }
1551
1552     return 0;
1553 }
1554
1555 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1556                                   target_ulong len, int type)
1557 {
1558     int n;
1559
1560     n = find_hw_breakpoint(addr, type);
1561     if (n < 0) {
1562         return -ENOENT;
1563     }
1564
1565     switch (type) {
1566     case GDB_BREAKPOINT_HW:
1567         nb_hw_breakpoint--;
1568         break;
1569
1570     case GDB_WATCHPOINT_WRITE:
1571     case GDB_WATCHPOINT_READ:
1572     case GDB_WATCHPOINT_ACCESS:
1573         nb_hw_watchpoint--;
1574         break;
1575
1576     default:
1577         return -ENOSYS;
1578     }
1579     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1580
1581     return 0;
1582 }
1583
1584 void kvm_arch_remove_all_hw_breakpoints(void)
1585 {
1586     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1587 }
1588
1589 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1590 {
1591     int n;
1592
1593     /* Software Breakpoint updates */
1594     if (kvm_sw_breakpoints_active(cs)) {
1595         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1596     }
1597
1598     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1599            <= ARRAY_SIZE(hw_debug_points));
1600     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1601
1602     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1603         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1604         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1605         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1606             switch (hw_debug_points[n].type) {
1607             case GDB_BREAKPOINT_HW:
1608                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1609                 break;
1610             case GDB_WATCHPOINT_WRITE:
1611                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1612                 break;
1613             case GDB_WATCHPOINT_READ:
1614                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1615                 break;
1616             case GDB_WATCHPOINT_ACCESS:
1617                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1618                                         KVMPPC_DEBUG_WATCH_READ;
1619                 break;
1620             default:
1621                 cpu_abort(cs, "Unsupported breakpoint type\n");
1622             }
1623             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1624         }
1625     }
1626 }
1627
1628 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1629 {
1630     CPUState *cs = CPU(cpu);
1631     CPUPPCState *env = &cpu->env;
1632     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1633     int handle = 0;
1634     int n;
1635     int flag = 0;
1636
1637     if (cs->singlestep_enabled) {
1638         handle = 1;
1639     } else if (arch_info->status) {
1640         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1641             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1642                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1643                 if (n >= 0) {
1644                     handle = 1;
1645                 }
1646             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1647                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1648                 n = find_hw_watchpoint(arch_info->address,  &flag);
1649                 if (n >= 0) {
1650                     handle = 1;
1651                     cs->watchpoint_hit = &hw_watchpoint;
1652                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1653                     hw_watchpoint.flags = flag;
1654                 }
1655             }
1656         }
1657     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1658         handle = 1;
1659     } else {
1660         /* QEMU is not able to handle debug exception, so inject
1661          * program exception to guest;
1662          * Yes program exception NOT debug exception !!
1663          * When QEMU is using debug resources then debug exception must
1664          * be always set. To achieve this we set MSR_DE and also set
1665          * MSRP_DEP so guest cannot change MSR_DE.
1666          * When emulating debug resource for guest we want guest
1667          * to control MSR_DE (enable/disable debug interrupt on need).
1668          * Supporting both configurations are NOT possible.
1669          * So the result is that we cannot share debug resources
1670          * between QEMU and Guest on BOOKE architecture.
1671          * In the current design QEMU gets the priority over guest,
1672          * this means that if QEMU is using debug resources then guest
1673          * cannot use them;
1674          * For software breakpoint QEMU uses a privileged instruction;
1675          * So there cannot be any reason that we are here for guest
1676          * set debug exception, only possibility is guest executed a
1677          * privileged / illegal instruction and that's why we are
1678          * injecting a program interrupt.
1679          */
1680
1681         cpu_synchronize_state(cs);
1682         /* env->nip is PC, so increment this by 4 to use
1683          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1684          */
1685         env->nip += 4;
1686         cs->exception_index = POWERPC_EXCP_PROGRAM;
1687         env->error_code = POWERPC_EXCP_INVAL;
1688         ppc_cpu_do_interrupt(cs);
1689     }
1690
1691     return handle;
1692 }
1693
1694 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1695 {
1696     PowerPCCPU *cpu = POWERPC_CPU(cs);
1697     CPUPPCState *env = &cpu->env;
1698     int ret;
1699
1700     qemu_mutex_lock_iothread();
1701
1702     switch (run->exit_reason) {
1703     case KVM_EXIT_DCR:
1704         if (run->dcr.is_write) {
1705             DPRINTF("handle dcr write\n");
1706             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1707         } else {
1708             DPRINTF("handle dcr read\n");
1709             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1710         }
1711         break;
1712     case KVM_EXIT_HLT:
1713         DPRINTF("handle halt\n");
1714         ret = kvmppc_handle_halt(cpu);
1715         break;
1716 #if defined(TARGET_PPC64)
1717     case KVM_EXIT_PAPR_HCALL:
1718         DPRINTF("handle PAPR hypercall\n");
1719         run->papr_hcall.ret = spapr_hypercall(cpu,
1720                                               run->papr_hcall.nr,
1721                                               run->papr_hcall.args);
1722         ret = 0;
1723         break;
1724 #endif
1725     case KVM_EXIT_EPR:
1726         DPRINTF("handle epr\n");
1727         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1728         ret = 0;
1729         break;
1730     case KVM_EXIT_WATCHDOG:
1731         DPRINTF("handle watchdog expiry\n");
1732         watchdog_perform_action();
1733         ret = 0;
1734         break;
1735
1736     case KVM_EXIT_DEBUG:
1737         DPRINTF("handle debug exception\n");
1738         if (kvm_handle_debug(cpu, run)) {
1739             ret = EXCP_DEBUG;
1740             break;
1741         }
1742         /* re-enter, this exception was guest-internal */
1743         ret = 0;
1744         break;
1745
1746     default:
1747         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1748         ret = -1;
1749         break;
1750     }
1751
1752     qemu_mutex_unlock_iothread();
1753     return ret;
1754 }
1755
1756 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1757 {
1758     CPUState *cs = CPU(cpu);
1759     uint32_t bits = tsr_bits;
1760     struct kvm_one_reg reg = {
1761         .id = KVM_REG_PPC_OR_TSR,
1762         .addr = (uintptr_t) &bits,
1763     };
1764
1765     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1766 }
1767
1768 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1769 {
1770
1771     CPUState *cs = CPU(cpu);
1772     uint32_t bits = tsr_bits;
1773     struct kvm_one_reg reg = {
1774         .id = KVM_REG_PPC_CLEAR_TSR,
1775         .addr = (uintptr_t) &bits,
1776     };
1777
1778     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1779 }
1780
1781 int kvmppc_set_tcr(PowerPCCPU *cpu)
1782 {
1783     CPUState *cs = CPU(cpu);
1784     CPUPPCState *env = &cpu->env;
1785     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1786
1787     struct kvm_one_reg reg = {
1788         .id = KVM_REG_PPC_TCR,
1789         .addr = (uintptr_t) &tcr,
1790     };
1791
1792     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1793 }
1794
1795 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1796 {
1797     CPUState *cs = CPU(cpu);
1798     int ret;
1799
1800     if (!kvm_enabled()) {
1801         return -1;
1802     }
1803
1804     if (!cap_ppc_watchdog) {
1805         printf("warning: KVM does not support watchdog");
1806         return -1;
1807     }
1808
1809     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1810     if (ret < 0) {
1811         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1812                 __func__, strerror(-ret));
1813         return ret;
1814     }
1815
1816     return ret;
1817 }
1818
1819 static int read_cpuinfo(const char *field, char *value, int len)
1820 {
1821     FILE *f;
1822     int ret = -1;
1823     int field_len = strlen(field);
1824     char line[512];
1825
1826     f = fopen("/proc/cpuinfo", "r");
1827     if (!f) {
1828         return -1;
1829     }
1830
1831     do {
1832         if (!fgets(line, sizeof(line), f)) {
1833             break;
1834         }
1835         if (!strncmp(line, field, field_len)) {
1836             pstrcpy(value, len, line);
1837             ret = 0;
1838             break;
1839         }
1840     } while(*line);
1841
1842     fclose(f);
1843
1844     return ret;
1845 }
1846
1847 uint32_t kvmppc_get_tbfreq(void)
1848 {
1849     char line[512];
1850     char *ns;
1851     uint32_t retval = NANOSECONDS_PER_SECOND;
1852
1853     if (read_cpuinfo("timebase", line, sizeof(line))) {
1854         return retval;
1855     }
1856
1857     if (!(ns = strchr(line, ':'))) {
1858         return retval;
1859     }
1860
1861     ns++;
1862
1863     return atoi(ns);
1864 }
1865
1866 bool kvmppc_get_host_serial(char **value)
1867 {
1868     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1869                                NULL);
1870 }
1871
1872 bool kvmppc_get_host_model(char **value)
1873 {
1874     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1875 }
1876
1877 /* Try to find a device tree node for a CPU with clock-frequency property */
1878 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1879 {
1880     struct dirent *dirp;
1881     DIR *dp;
1882
1883     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1884         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1885         return -1;
1886     }
1887
1888     buf[0] = '\0';
1889     while ((dirp = readdir(dp)) != NULL) {
1890         FILE *f;
1891         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1892                  dirp->d_name);
1893         f = fopen(buf, "r");
1894         if (f) {
1895             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1896             fclose(f);
1897             break;
1898         }
1899         buf[0] = '\0';
1900     }
1901     closedir(dp);
1902     if (buf[0] == '\0') {
1903         printf("Unknown host!\n");
1904         return -1;
1905     }
1906
1907     return 0;
1908 }
1909
1910 static uint64_t kvmppc_read_int_dt(const char *filename)
1911 {
1912     union {
1913         uint32_t v32;
1914         uint64_t v64;
1915     } u;
1916     FILE *f;
1917     int len;
1918
1919     f = fopen(filename, "rb");
1920     if (!f) {
1921         return -1;
1922     }
1923
1924     len = fread(&u, 1, sizeof(u), f);
1925     fclose(f);
1926     switch (len) {
1927     case 4:
1928         /* property is a 32-bit quantity */
1929         return be32_to_cpu(u.v32);
1930     case 8:
1931         return be64_to_cpu(u.v64);
1932     }
1933
1934     return 0;
1935 }
1936
1937 /* Read a CPU node property from the host device tree that's a single
1938  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1939  * (can't find or open the property, or doesn't understand the
1940  * format) */
1941 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1942 {
1943     char buf[PATH_MAX], *tmp;
1944     uint64_t val;
1945
1946     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1947         return -1;
1948     }
1949
1950     tmp = g_strdup_printf("%s/%s", buf, propname);
1951     val = kvmppc_read_int_dt(tmp);
1952     g_free(tmp);
1953
1954     return val;
1955 }
1956
1957 uint64_t kvmppc_get_clockfreq(void)
1958 {
1959     return kvmppc_read_int_cpu_dt("clock-frequency");
1960 }
1961
1962 uint32_t kvmppc_get_vmx(void)
1963 {
1964     return kvmppc_read_int_cpu_dt("ibm,vmx");
1965 }
1966
1967 uint32_t kvmppc_get_dfp(void)
1968 {
1969     return kvmppc_read_int_cpu_dt("ibm,dfp");
1970 }
1971
1972 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1973  {
1974      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1975      CPUState *cs = CPU(cpu);
1976
1977     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1978         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1979         return 0;
1980     }
1981
1982     return 1;
1983 }
1984
1985 int kvmppc_get_hasidle(CPUPPCState *env)
1986 {
1987     struct kvm_ppc_pvinfo pvinfo;
1988
1989     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1990         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1991         return 1;
1992     }
1993
1994     return 0;
1995 }
1996
1997 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1998 {
1999     uint32_t *hc = (uint32_t*)buf;
2000     struct kvm_ppc_pvinfo pvinfo;
2001
2002     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2003         memcpy(buf, pvinfo.hcall, buf_len);
2004         return 0;
2005     }
2006
2007     /*
2008      * Fallback to always fail hypercalls regardless of endianness:
2009      *
2010      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2011      *     li r3, -1
2012      *     b .+8       (becomes nop in wrong endian)
2013      *     bswap32(li r3, -1)
2014      */
2015
2016     hc[0] = cpu_to_be32(0x08000048);
2017     hc[1] = cpu_to_be32(0x3860ffff);
2018     hc[2] = cpu_to_be32(0x48000008);
2019     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2020
2021     return 1;
2022 }
2023
2024 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2025 {
2026     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2027 }
2028
2029 void kvmppc_enable_logical_ci_hcalls(void)
2030 {
2031     /*
2032      * FIXME: it would be nice if we could detect the cases where
2033      * we're using a device which requires the in kernel
2034      * implementation of these hcalls, but the kernel lacks them and
2035      * produce a warning.
2036      */
2037     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2038     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2039 }
2040
2041 void kvmppc_enable_set_mode_hcall(void)
2042 {
2043     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2044 }
2045
2046 void kvmppc_enable_clear_ref_mod_hcalls(void)
2047 {
2048     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2049     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2050 }
2051
2052 void kvmppc_set_papr(PowerPCCPU *cpu)
2053 {
2054     CPUState *cs = CPU(cpu);
2055     int ret;
2056
2057     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2058     if (ret) {
2059         error_report("This vCPU type or KVM version does not support PAPR");
2060         exit(1);
2061     }
2062
2063     /* Update the capability flag so we sync the right information
2064      * with kvm */
2065     cap_papr = 1;
2066 }
2067
2068 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2069 {
2070     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2071 }
2072
2073 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2074 {
2075     CPUState *cs = CPU(cpu);
2076     int ret;
2077
2078     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2079     if (ret && mpic_proxy) {
2080         error_report("This KVM version does not support EPR");
2081         exit(1);
2082     }
2083 }
2084
2085 int kvmppc_smt_threads(void)
2086 {
2087     return cap_ppc_smt ? cap_ppc_smt : 1;
2088 }
2089
2090 #ifdef TARGET_PPC64
2091 off_t kvmppc_alloc_rma(void **rma)
2092 {
2093     off_t size;
2094     int fd;
2095     struct kvm_allocate_rma ret;
2096
2097     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2098      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2099      *                      not necessary on this hardware
2100      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2101      *
2102      * FIXME: We should allow the user to force contiguous RMA
2103      * allocation in the cap_ppc_rma==1 case.
2104      */
2105     if (cap_ppc_rma < 2) {
2106         return 0;
2107     }
2108
2109     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2110     if (fd < 0) {
2111         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2112                 strerror(errno));
2113         return -1;
2114     }
2115
2116     size = MIN(ret.rma_size, 256ul << 20);
2117
2118     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2119     if (*rma == MAP_FAILED) {
2120         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2121         return -1;
2122     };
2123
2124     return size;
2125 }
2126
2127 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2128 {
2129     struct kvm_ppc_smmu_info info;
2130     long rampagesize, best_page_shift;
2131     int i;
2132
2133     if (cap_ppc_rma >= 2) {
2134         return current_size;
2135     }
2136
2137     /* Find the largest hardware supported page size that's less than
2138      * or equal to the (logical) backing page size of guest RAM */
2139     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2140     rampagesize = qemu_getrampagesize();
2141     best_page_shift = 0;
2142
2143     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2144         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2145
2146         if (!sps->page_shift) {
2147             continue;
2148         }
2149
2150         if ((sps->page_shift > best_page_shift)
2151             && ((1UL << sps->page_shift) <= rampagesize)) {
2152             best_page_shift = sps->page_shift;
2153         }
2154     }
2155
2156     return MIN(current_size,
2157                1ULL << (best_page_shift + hash_shift - 7));
2158 }
2159 #endif
2160
2161 bool kvmppc_spapr_use_multitce(void)
2162 {
2163     return cap_spapr_multitce;
2164 }
2165
2166 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift,
2167                               uint64_t bus_offset, uint32_t nb_table,
2168                               int *pfd, bool need_vfio)
2169 {
2170     long len;
2171     int fd;
2172     void *table;
2173
2174     /* Must set fd to -1 so we don't try to munmap when called for
2175      * destroying the table, which the upper layers -will- do
2176      */
2177     *pfd = -1;
2178     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2179         return NULL;
2180     }
2181
2182     if (cap_spapr_tce_64) {
2183         struct kvm_create_spapr_tce_64 args = {
2184             .liobn = liobn,
2185             .page_shift = page_shift,
2186             .offset = bus_offset >> page_shift,
2187             .size = nb_table,
2188             .flags = 0
2189         };
2190         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE_64, &args);
2191         if (fd < 0) {
2192             fprintf(stderr,
2193                     "KVM: Failed to create TCE64 table for liobn 0x%x\n",
2194                     liobn);
2195             return NULL;
2196         }
2197     } else if (cap_spapr_tce) {
2198         uint64_t window_size = (uint64_t) nb_table << page_shift;
2199         struct kvm_create_spapr_tce args = {
2200             .liobn = liobn,
2201             .window_size = window_size,
2202         };
2203         if ((window_size != args.window_size) || bus_offset) {
2204             return NULL;
2205         }
2206         fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2207         if (fd < 0) {
2208             fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2209                     liobn);
2210             return NULL;
2211         }
2212     } else {
2213         return NULL;
2214     }
2215
2216     len = nb_table * sizeof(uint64_t);
2217     /* FIXME: round this up to page size */
2218
2219     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2220     if (table == MAP_FAILED) {
2221         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2222                 liobn);
2223         close(fd);
2224         return NULL;
2225     }
2226
2227     *pfd = fd;
2228     return table;
2229 }
2230
2231 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2232 {
2233     long len;
2234
2235     if (fd < 0) {
2236         return -1;
2237     }
2238
2239     len = nb_table * sizeof(uint64_t);
2240     if ((munmap(table, len) < 0) ||
2241         (close(fd) < 0)) {
2242         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2243                 strerror(errno));
2244         /* Leak the table */
2245     }
2246
2247     return 0;
2248 }
2249
2250 int kvmppc_reset_htab(int shift_hint)
2251 {
2252     uint32_t shift = shift_hint;
2253
2254     if (!kvm_enabled()) {
2255         /* Full emulation, tell caller to allocate htab itself */
2256         return 0;
2257     }
2258     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2259         int ret;
2260         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2261         if (ret == -ENOTTY) {
2262             /* At least some versions of PR KVM advertise the
2263              * capability, but don't implement the ioctl().  Oops.
2264              * Return 0 so that we allocate the htab in qemu, as is
2265              * correct for PR. */
2266             return 0;
2267         } else if (ret < 0) {
2268             return ret;
2269         }
2270         return shift;
2271     }
2272
2273     /* We have a kernel that predates the htab reset calls.  For PR
2274      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2275      * this era, it has allocated a 16MB fixed size hash table already. */
2276     if (kvmppc_is_pr(kvm_state)) {
2277         /* PR - tell caller to allocate htab */
2278         return 0;
2279     } else {
2280         /* HV - assume 16MB kernel allocated htab */
2281         return 24;
2282     }
2283 }
2284
2285 static inline uint32_t mfpvr(void)
2286 {
2287     uint32_t pvr;
2288
2289     asm ("mfpvr %0"
2290          : "=r"(pvr));
2291     return pvr;
2292 }
2293
2294 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2295 {
2296     if (on) {
2297         *word |= flags;
2298     } else {
2299         *word &= ~flags;
2300     }
2301 }
2302
2303 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2304 {
2305     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2306     uint32_t vmx = kvmppc_get_vmx();
2307     uint32_t dfp = kvmppc_get_dfp();
2308     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2309     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2310
2311     /* Now fix up the class with information we can query from the host */
2312     pcc->pvr = mfpvr();
2313
2314     if (vmx != -1) {
2315         /* Only override when we know what the host supports */
2316         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2317         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2318     }
2319     if (dfp != -1) {
2320         /* Only override when we know what the host supports */
2321         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2322     }
2323
2324     if (dcache_size != -1) {
2325         pcc->l1_dcache_size = dcache_size;
2326     }
2327
2328     if (icache_size != -1) {
2329         pcc->l1_icache_size = icache_size;
2330     }
2331
2332 #if defined(TARGET_PPC64)
2333     pcc->radix_page_info = kvm_get_radix_page_info();
2334 #endif /* defined(TARGET_PPC64) */
2335 }
2336
2337 bool kvmppc_has_cap_epr(void)
2338 {
2339     return cap_epr;
2340 }
2341
2342 bool kvmppc_has_cap_htab_fd(void)
2343 {
2344     return cap_htab_fd;
2345 }
2346
2347 bool kvmppc_has_cap_fixup_hcalls(void)
2348 {
2349     return cap_fixup_hcalls;
2350 }
2351
2352 bool kvmppc_has_cap_htm(void)
2353 {
2354     return cap_htm;
2355 }
2356
2357 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2358 {
2359     ObjectClass *oc = OBJECT_CLASS(pcc);
2360
2361     while (oc && !object_class_is_abstract(oc)) {
2362         oc = object_class_get_parent(oc);
2363     }
2364     assert(oc);
2365
2366     return POWERPC_CPU_CLASS(oc);
2367 }
2368
2369 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2370 {
2371     uint32_t host_pvr = mfpvr();
2372     PowerPCCPUClass *pvr_pcc;
2373
2374     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2375     if (pvr_pcc == NULL) {
2376         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2377     }
2378
2379     return pvr_pcc;
2380 }
2381
2382 static int kvm_ppc_register_host_cpu_type(void)
2383 {
2384     TypeInfo type_info = {
2385         .name = TYPE_HOST_POWERPC_CPU,
2386         .class_init = kvmppc_host_cpu_class_init,
2387     };
2388     PowerPCCPUClass *pvr_pcc;
2389     DeviceClass *dc;
2390     int i;
2391
2392     pvr_pcc = kvm_ppc_get_host_cpu_class();
2393     if (pvr_pcc == NULL) {
2394         return -1;
2395     }
2396     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2397     type_register(&type_info);
2398
2399 #if defined(TARGET_PPC64)
2400     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2401     type_info.parent = TYPE_SPAPR_CPU_CORE,
2402     type_info.instance_size = sizeof(sPAPRCPUCore);
2403     type_info.instance_init = NULL;
2404     type_info.class_init = spapr_cpu_core_class_init;
2405     type_info.class_data = (void *) "host";
2406     type_register(&type_info);
2407     g_free((void *)type_info.name);
2408 #endif
2409
2410     /*
2411      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2412      * we want "POWER8" to be a "family" alias that points to the current
2413      * host CPU type, too)
2414      */
2415     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2416     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2417         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2418             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2419             char *suffix;
2420
2421             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2422             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2423             if (suffix) {
2424                 *suffix = 0;
2425             }
2426             ppc_cpu_aliases[i].oc = oc;
2427             break;
2428         }
2429     }
2430
2431     return 0;
2432 }
2433
2434 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2435 {
2436     struct kvm_rtas_token_args args = {
2437         .token = token,
2438     };
2439
2440     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2441         return -ENOENT;
2442     }
2443
2444     strncpy(args.name, function, sizeof(args.name));
2445
2446     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2447 }
2448
2449 int kvmppc_get_htab_fd(bool write)
2450 {
2451     struct kvm_get_htab_fd s = {
2452         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2453         .start_index = 0,
2454     };
2455
2456     if (!cap_htab_fd) {
2457         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2458         return -1;
2459     }
2460
2461     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2462 }
2463
2464 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2465 {
2466     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2467     uint8_t buf[bufsize];
2468     ssize_t rc;
2469
2470     do {
2471         rc = read(fd, buf, bufsize);
2472         if (rc < 0) {
2473             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2474                     strerror(errno));
2475             return rc;
2476         } else if (rc) {
2477             uint8_t *buffer = buf;
2478             ssize_t n = rc;
2479             while (n) {
2480                 struct kvm_get_htab_header *head =
2481                     (struct kvm_get_htab_header *) buffer;
2482                 size_t chunksize = sizeof(*head) +
2483                      HASH_PTE_SIZE_64 * head->n_valid;
2484
2485                 qemu_put_be32(f, head->index);
2486                 qemu_put_be16(f, head->n_valid);
2487                 qemu_put_be16(f, head->n_invalid);
2488                 qemu_put_buffer(f, (void *)(head + 1),
2489                                 HASH_PTE_SIZE_64 * head->n_valid);
2490
2491                 buffer += chunksize;
2492                 n -= chunksize;
2493             }
2494         }
2495     } while ((rc != 0)
2496              && ((max_ns < 0)
2497                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2498
2499     return (rc == 0) ? 1 : 0;
2500 }
2501
2502 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2503                            uint16_t n_valid, uint16_t n_invalid)
2504 {
2505     struct kvm_get_htab_header *buf;
2506     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2507     ssize_t rc;
2508
2509     buf = alloca(chunksize);
2510     buf->index = index;
2511     buf->n_valid = n_valid;
2512     buf->n_invalid = n_invalid;
2513
2514     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2515
2516     rc = write(fd, buf, chunksize);
2517     if (rc < 0) {
2518         fprintf(stderr, "Error writing KVM hash table: %s\n",
2519                 strerror(errno));
2520         return rc;
2521     }
2522     if (rc != chunksize) {
2523         /* We should never get a short write on a single chunk */
2524         fprintf(stderr, "Short write, restoring KVM hash table\n");
2525         return -1;
2526     }
2527     return 0;
2528 }
2529
2530 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2531 {
2532     return true;
2533 }
2534
2535 void kvm_arch_init_irq_routing(KVMState *s)
2536 {
2537 }
2538
2539 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2540 {
2541     struct kvm_get_htab_fd ghf = {
2542         .flags = 0,
2543         .start_index = ptex,
2544     };
2545     int fd, rc;
2546     int i;
2547
2548     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2549     if (fd < 0) {
2550         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2551     }
2552
2553     i = 0;
2554     while (i < n) {
2555         struct kvm_get_htab_header *hdr;
2556         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2557         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2558
2559         rc = read(fd, buf, sizeof(buf));
2560         if (rc < 0) {
2561             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2562         }
2563
2564         hdr = (struct kvm_get_htab_header *)buf;
2565         while ((i < n) && ((char *)hdr < (buf + rc))) {
2566             int invalid = hdr->n_invalid;
2567
2568             if (hdr->index != (ptex + i)) {
2569                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2570                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2571             }
2572
2573             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2574             i += hdr->n_valid;
2575
2576             if ((n - i) < invalid) {
2577                 invalid = n - i;
2578             }
2579             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2580             i += hdr->n_invalid;
2581
2582             hdr = (struct kvm_get_htab_header *)
2583                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2584         }
2585     }
2586
2587     close(fd);
2588 }
2589
2590 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2591 {
2592     int fd, rc;
2593     struct kvm_get_htab_fd ghf;
2594     struct {
2595         struct kvm_get_htab_header hdr;
2596         uint64_t pte0;
2597         uint64_t pte1;
2598     } buf;
2599
2600     ghf.flags = 0;
2601     ghf.start_index = 0;     /* Ignored */
2602     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2603     if (fd < 0) {
2604         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2605     }
2606
2607     buf.hdr.n_valid = 1;
2608     buf.hdr.n_invalid = 0;
2609     buf.hdr.index = ptex;
2610     buf.pte0 = cpu_to_be64(pte0);
2611     buf.pte1 = cpu_to_be64(pte1);
2612
2613     rc = write(fd, &buf, sizeof(buf));
2614     if (rc != sizeof(buf)) {
2615         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2616     }
2617     close(fd);
2618 }
2619
2620 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2621                              uint64_t address, uint32_t data, PCIDevice *dev)
2622 {
2623     return 0;
2624 }
2625
2626 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2627                                 int vector, PCIDevice *dev)
2628 {
2629     return 0;
2630 }
2631
2632 int kvm_arch_release_virq_post(int virq)
2633 {
2634     return 0;
2635 }
2636
2637 int kvm_arch_msi_data_to_gsi(uint32_t data)
2638 {
2639     return data & 0xffff;
2640 }
2641
2642 int kvmppc_enable_hwrng(void)
2643 {
2644     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2645         return -1;
2646     }
2647
2648     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2649 }