target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/kvm.h"
  30 #include "sysemu/numa.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/ppc.h"
  40 #include "sysemu/watchdog.h"
  41 #include "trace.h"
  42 #include "exec/gdbstub.h"
  43 #include "exec/memattrs.h"
  44 #include "sysemu/hostmem.h"
  45 #include "qemu/cutils.h"
  46 #if defined(TARGET_PPC64)
  47 #include "hw/ppc/spapr_cpu_core.h"
  48 #endif
  49
  50 //#define DEBUG_KVM
  51
  52 #ifdef DEBUG_KVM
  53 #define DPRINTF(fmt, ...) \
  54     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  55 #else
  56 #define DPRINTF(fmt, ...) \
  57     do { } while (0)
  58 #endif
  59
  60 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  61
  62 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  63     KVM_CAP_LAST_INFO
  64 };
  65
  66 static int cap_interrupt_unset = false;
  67 static int cap_interrupt_level = false;
  68 static int cap_segstate;
  69 static int cap_booke_sregs;
  70 static int cap_ppc_smt;
  71 static int cap_ppc_rma;
  72 static int cap_spapr_tce;
  73 static int cap_spapr_multitce;
  74 static int cap_spapr_vfio;
  75 static int cap_hior;
  76 static int cap_one_reg;
  77 static int cap_epr;
  78 static int cap_ppc_watchdog;
  79 static int cap_papr;
  80 static int cap_htab_fd;
  81 static int cap_fixup_hcalls;
  82
  83 static uint32_t debug_inst_opcode;
  84
  85 /* XXX We have a race condition where we actually have a level triggered
  86  *     interrupt, but the infrastructure can't expose that yet, so the guest
  87  *     takes but ignores it, goes to sleep and never gets notified that there's
  88  *     still an interrupt pending.
  89  *
  90  *     As a quick workaround, let's just wake up again 20 ms after we injected
  91  *     an interrupt. That way we can assure that we're always reinjecting
  92  *     interrupts in case the guest swallowed them.
  93  */
  94 static QEMUTimer *idle_timer;
  95
  96 static void kvm_kick_cpu(void *opaque)
  97 {
  98     PowerPCCPU *cpu = opaque;
  99
 100     qemu_cpu_kick(CPU(cpu));
 101 }
 102
 103 static int kvm_ppc_register_host_cpu_type(void);
 104
 105 int kvm_arch_init(MachineState *ms, KVMState *s)
 106 {
 107     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 108     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 109     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 110     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 111     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 112     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 113     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 114     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 115     cap_spapr_vfio = false;
 116     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 117     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 118     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 119     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 120     /* Note: we don't set cap_papr here, because this capability is
 121      * only activated after this by kvmppc_set_papr() */
 122     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 123     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 124
 125     if (!cap_interrupt_level) {
 126         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 127                         "VM to stall at times!\n");
 128     }
 129
 130     kvm_ppc_register_host_cpu_type();
 131
 132     return 0;
 133 }
 134
 135 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 136 {
 137     CPUPPCState *cenv = &cpu->env;
 138     CPUState *cs = CPU(cpu);
 139     struct kvm_sregs sregs;
 140     int ret;
 141
 142     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 143         /* What we're really trying to say is "if we're on BookE, we use
 144            the native PVR for now". This is the only sane way to check
 145            it though, so we potentially confuse users that they can run
 146            BookE guests on BookS. Let's hope nobody dares enough :) */
 147         return 0;
 148     } else {
 149         if (!cap_segstate) {
 150             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 151             return -ENOSYS;
 152         }
 153     }
 154
 155     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 156     if (ret) {
 157         return ret;
 158     }
 159
 160     sregs.pvr = cenv->spr[SPR_PVR];
 161     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 162 }
 163
 164 /* Set up a shared TLB array with KVM */
 165 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 166 {
 167     CPUPPCState *env = &cpu->env;
 168     CPUState *cs = CPU(cpu);
 169     struct kvm_book3e_206_tlb_params params = {};
 170     struct kvm_config_tlb cfg = {};
 171     unsigned int entries = 0;
 172     int ret, i;
 173
 174     if (!kvm_enabled() ||
 175         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 176         return 0;
 177     }
 178
 179     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 180
 181     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 182         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 183         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 184         entries += params.tlb_sizes[i];
 185     }
 186
 187     assert(entries == env->nb_tlb);
 188     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 189
 190     env->tlb_dirty = true;
 191
 192     cfg.array = (uintptr_t)env->tlb.tlbm;
 193     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 194     cfg.params = (uintptr_t)&params;
 195     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 196
 197     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 198     if (ret < 0) {
 199         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 200                 __func__, strerror(-ret));
 201         return ret;
 202     }
 203
 204     env->kvm_sw_tlb = true;
 205     return 0;
 206 }
 207
 208
 209 #if defined(TARGET_PPC64)
 210 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 211                                        struct kvm_ppc_smmu_info *info)
 212 {
 213     CPUPPCState *env = &cpu->env;
 214     CPUState *cs = CPU(cpu);
 215
 216     memset(info, 0, sizeof(*info));
 217
 218     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 219      * need to "guess" what the supported page sizes are.
 220      *
 221      * For that to work we make a few assumptions:
 222      *
 223      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 224      *   KVM which only supports 4K and 16M pages, but supports them
 225      *   regardless of the backing store characteritics. We also don't
 226      *   support 1T segments.
 227      *
 228      *   This is safe as if HV KVM ever supports that capability or PR
 229      *   KVM grows supports for more page/segment sizes, those versions
 230      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 231      *   will not hit this fallback
 232      *
 233      * - Else we are running HV KVM. This means we only support page
 234      *   sizes that fit in the backing store. Additionally we only
 235      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 236      *   P7 encodings for the SLB and hash table. Here too, we assume
 237      *   support for any newer processor will mean a kernel that
 238      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 239      *   this fallback.
 240      */
 241     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 242         /* No flags */
 243         info->flags = 0;
 244         info->slb_size = 64;
 245
 246         /* Standard 4k base page size segment */
 247         info->sps[0].page_shift = 12;
 248         info->sps[0].slb_enc = 0;
 249         info->sps[0].enc[0].page_shift = 12;
 250         info->sps[0].enc[0].pte_enc = 0;
 251
 252         /* Standard 16M large page size segment */
 253         info->sps[1].page_shift = 24;
 254         info->sps[1].slb_enc = SLB_VSID_L;
 255         info->sps[1].enc[0].page_shift = 24;
 256         info->sps[1].enc[0].pte_enc = 0;
 257     } else {
 258         int i = 0;
 259
 260         /* HV KVM has backing store size restrictions */
 261         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 262
 263         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 264             info->flags |= KVM_PPC_1T_SEGMENTS;
 265         }
 266
 267         if (env->mmu_model == POWERPC_MMU_2_06 ||
 268             env->mmu_model == POWERPC_MMU_2_07) {
 269             info->slb_size = 32;
 270         } else {
 271             info->slb_size = 64;
 272         }
 273
 274         /* Standard 4k base page size segment */
 275         info->sps[i].page_shift = 12;
 276         info->sps[i].slb_enc = 0;
 277         info->sps[i].enc[0].page_shift = 12;
 278         info->sps[i].enc[0].pte_enc = 0;
 279         i++;
 280
 281         /* 64K on MMU 2.06 and later */
 282         if (env->mmu_model == POWERPC_MMU_2_06 ||
 283             env->mmu_model == POWERPC_MMU_2_07) {
 284             info->sps[i].page_shift = 16;
 285             info->sps[i].slb_enc = 0x110;
 286             info->sps[i].enc[0].page_shift = 16;
 287             info->sps[i].enc[0].pte_enc = 1;
 288             i++;
 289         }
 290
 291         /* Standard 16M large page size segment */
 292         info->sps[i].page_shift = 24;
 293         info->sps[i].slb_enc = SLB_VSID_L;
 294         info->sps[i].enc[0].page_shift = 24;
 295         info->sps[i].enc[0].pte_enc = 0;
 296     }
 297 }
 298
 299 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 300 {
 301     CPUState *cs = CPU(cpu);
 302     int ret;
 303
 304     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 305         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 306         if (ret == 0) {
 307             return;
 308         }
 309     }
 310
 311     kvm_get_fallback_smmu_info(cpu, info);
 312 }
 313
 314 static long gethugepagesize(const char *mem_path)
 315 {
 316     struct statfs fs;
 317     int ret;
 318
 319     do {
 320         ret = statfs(mem_path, &fs);
 321     } while (ret != 0 && errno == EINTR);
 322
 323     if (ret != 0) {
 324         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 325                 strerror(errno));
 326         exit(1);
 327     }
 328
 329 #define HUGETLBFS_MAGIC       0x958458f6
 330
 331     if (fs.f_type != HUGETLBFS_MAGIC) {
 332         /* Explicit mempath, but it's ordinary pages */
 333         return getpagesize();
 334     }
 335
 336     /* It's hugepage, return the huge page size */
 337     return fs.f_bsize;
 338 }
 339
 340 /*
 341  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 342  * may or may not name the same files / on the same filesystem now as
 343  * when we actually open and map them.  Iterate over the file
 344  * descriptors instead, and use qemu_fd_getpagesize().
 345  */
 346 static int find_max_supported_pagesize(Object *obj, void *opaque)
 347 {
 348     char *mem_path;
 349     long *hpsize_min = opaque;
 350
 351     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 352         mem_path = object_property_get_str(obj, "mem-path", NULL);
 353         if (mem_path) {
 354             long hpsize = gethugepagesize(mem_path);
 355             if (hpsize < *hpsize_min) {
 356                 *hpsize_min = hpsize;
 357             }
 358         } else {
 359             *hpsize_min = getpagesize();
 360         }
 361     }
 362
 363     return 0;
 364 }
 365
 366 static long getrampagesize(void)
 367 {
 368     long hpsize = LONG_MAX;
 369     Object *memdev_root;
 370
 371     if (mem_path) {
 372         return gethugepagesize(mem_path);
 373     }
 374
 375     /* it's possible we have memory-backend objects with
 376      * hugepage-backed RAM. these may get mapped into system
 377      * address space via -numa parameters or memory hotplug
 378      * hooks. we want to take these into account, but we
 379      * also want to make sure these supported hugepage
 380      * sizes are applicable across the entire range of memory
 381      * we may boot from, so we take the min across all
 382      * backends, and assume normal pages in cases where a
 383      * backend isn't backed by hugepages.
 384      */
 385     memdev_root = object_resolve_path("/objects", NULL);
 386     if (!memdev_root) {
 387         return getpagesize();
 388     }
 389
 390     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 391
 392     if (hpsize == LONG_MAX || hpsize == getpagesize()) {
 393         return getpagesize();
 394     }
 395
 396     /* If NUMA is disabled or the NUMA nodes are not backed with a
 397      * memory-backend, then there is at least one node using "normal"
 398      * RAM. And since normal RAM has not been configured with "-mem-path"
 399      * (what we've checked earlier here already), we can not use huge pages!
 400      */
 401     if (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL) {
 402         static bool warned;
 403         if (!warned) {
 404             error_report("Huge page support disabled (n/a for main memory).");
 405             warned = true;
 406         }
 407         return getpagesize();
 408     }
 409
 410     return hpsize;
 411 }
 412
 413 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 414 {
 415     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 416         return true;
 417     }
 418
 419     return (1ul << shift) <= rampgsize;
 420 }
 421
 422 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 423 {
 424     static struct kvm_ppc_smmu_info smmu_info;
 425     static bool has_smmu_info;
 426     CPUPPCState *env = &cpu->env;
 427     long rampagesize;
 428     int iq, ik, jq, jk;
 429
 430     /* We only handle page sizes for 64-bit server guests for now */
 431     if (!(env->mmu_model & POWERPC_MMU_64)) {
 432         return;
 433     }
 434
 435     /* Collect MMU info from kernel if not already */
 436     if (!has_smmu_info) {
 437         kvm_get_smmu_info(cpu, &smmu_info);
 438         has_smmu_info = true;
 439     }
 440
 441     rampagesize = getrampagesize();
 442
 443     /* Convert to QEMU form */
 444     memset(&env->sps, 0, sizeof(env->sps));
 445
 446     /* If we have HV KVM, we need to forbid CI large pages if our
 447      * host page size is smaller than 64K.
 448      */
 449     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 450         env->ci_large_pages = getpagesize() >= 0x10000;
 451     }
 452
 453     /*
 454      * XXX This loop should be an entry wide AND of the capabilities that
 455      *     the selected CPU has with the capabilities that KVM supports.
 456      */
 457     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 458         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 459         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 460
 461         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 462                                  ksps->page_shift)) {
 463             continue;
 464         }
 465         qsps->page_shift = ksps->page_shift;
 466         qsps->slb_enc = ksps->slb_enc;
 467         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 468             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 469                                      ksps->enc[jk].page_shift)) {
 470                 continue;
 471             }
 472             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 473             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 474             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 475                 break;
 476             }
 477         }
 478         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 479             break;
 480         }
 481     }
 482     env->slb_nr = smmu_info.slb_size;
 483     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 484         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 485     }
 486 }
 487 #else /* defined (TARGET_PPC64) */
 488
 489 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 490 {
 491 }
 492
 493 #endif /* !defined (TARGET_PPC64) */
 494
 495 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 496 {
 497     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 498 }
 499
 500 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 501  * book3s supports only 1 watchpoint, so array size
 502  * of 4 is sufficient for now.
 503  */
 504 #define MAX_HW_BKPTS 4
 505
 506 static struct HWBreakpoint {
 507     target_ulong addr;
 508     int type;
 509 } hw_debug_points[MAX_HW_BKPTS];
 510
 511 static CPUWatchpoint hw_watchpoint;
 512
 513 /* Default there is no breakpoint and watchpoint supported */
 514 static int max_hw_breakpoint;
 515 static int max_hw_watchpoint;
 516 static int nb_hw_breakpoint;
 517 static int nb_hw_watchpoint;
 518
 519 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 520 {
 521     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 522         max_hw_breakpoint = 2;
 523         max_hw_watchpoint = 2;
 524     }
 525
 526     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 527         fprintf(stderr, "Error initializing h/w breakpoints\n");
 528         return;
 529     }
 530 }
 531
 532 int kvm_arch_init_vcpu(CPUState *cs)
 533 {
 534     PowerPCCPU *cpu = POWERPC_CPU(cs);
 535     CPUPPCState *cenv = &cpu->env;
 536     int ret;
 537
 538     /* Gather server mmu info from KVM and update the CPU state */
 539     kvm_fixup_page_sizes(cpu);
 540
 541     /* Synchronize sregs with kvm */
 542     ret = kvm_arch_sync_sregs(cpu);
 543     if (ret) {
 544         if (ret == -EINVAL) {
 545             error_report("Register sync failed... If you're using kvm-hv.ko,"
 546                          " only \"-cpu host\" is possible");
 547         }
 548         return ret;
 549     }
 550
 551     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 552
 553     /* Some targets support access to KVM's guest TLB. */
 554     switch (cenv->mmu_model) {
 555     case POWERPC_MMU_BOOKE206:
 556         ret = kvm_booke206_tlb_init(cpu);
 557         break;
 558     default:
 559         break;
 560     }
 561
 562     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 563     kvmppc_hw_debug_points_init(cenv);
 564
 565     return ret;
 566 }
 567
 568 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 569 {
 570     CPUPPCState *env = &cpu->env;
 571     CPUState *cs = CPU(cpu);
 572     struct kvm_dirty_tlb dirty_tlb;
 573     unsigned char *bitmap;
 574     int ret;
 575
 576     if (!env->kvm_sw_tlb) {
 577         return;
 578     }
 579
 580     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 581     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 582
 583     dirty_tlb.bitmap = (uintptr_t)bitmap;
 584     dirty_tlb.num_dirty = env->nb_tlb;
 585
 586     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 587     if (ret) {
 588         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 589                 __func__, strerror(-ret));
 590     }
 591
 592     g_free(bitmap);
 593 }
 594
 595 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 596 {
 597     PowerPCCPU *cpu = POWERPC_CPU(cs);
 598     CPUPPCState *env = &cpu->env;
 599     union {
 600         uint32_t u32;
 601         uint64_t u64;
 602     } val;
 603     struct kvm_one_reg reg = {
 604         .id = id,
 605         .addr = (uintptr_t) &val,
 606     };
 607     int ret;
 608
 609     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 610     if (ret != 0) {
 611         trace_kvm_failed_spr_get(spr, strerror(errno));
 612     } else {
 613         switch (id & KVM_REG_SIZE_MASK) {
 614         case KVM_REG_SIZE_U32:
 615             env->spr[spr] = val.u32;
 616             break;
 617
 618         case KVM_REG_SIZE_U64:
 619             env->spr[spr] = val.u64;
 620             break;
 621
 622         default:
 623             /* Don't handle this size yet */
 624             abort();
 625         }
 626     }
 627 }
 628
 629 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 630 {
 631     PowerPCCPU *cpu = POWERPC_CPU(cs);
 632     CPUPPCState *env = &cpu->env;
 633     union {
 634         uint32_t u32;
 635         uint64_t u64;
 636     } val;
 637     struct kvm_one_reg reg = {
 638         .id = id,
 639         .addr = (uintptr_t) &val,
 640     };
 641     int ret;
 642
 643     switch (id & KVM_REG_SIZE_MASK) {
 644     case KVM_REG_SIZE_U32:
 645         val.u32 = env->spr[spr];
 646         break;
 647
 648     case KVM_REG_SIZE_U64:
 649         val.u64 = env->spr[spr];
 650         break;
 651
 652     default:
 653         /* Don't handle this size yet */
 654         abort();
 655     }
 656
 657     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 658     if (ret != 0) {
 659         trace_kvm_failed_spr_set(spr, strerror(errno));
 660     }
 661 }
 662
 663 static int kvm_put_fp(CPUState *cs)
 664 {
 665     PowerPCCPU *cpu = POWERPC_CPU(cs);
 666     CPUPPCState *env = &cpu->env;
 667     struct kvm_one_reg reg;
 668     int i;
 669     int ret;
 670
 671     if (env->insns_flags & PPC_FLOAT) {
 672         uint64_t fpscr = env->fpscr;
 673         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 674
 675         reg.id = KVM_REG_PPC_FPSCR;
 676         reg.addr = (uintptr_t)&fpscr;
 677         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 678         if (ret < 0) {
 679             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 680             return ret;
 681         }
 682
 683         for (i = 0; i < 32; i++) {
 684             uint64_t vsr[2];
 685
 686 #ifdef HOST_WORDS_BIGENDIAN
 687             vsr[0] = float64_val(env->fpr[i]);
 688             vsr[1] = env->vsr[i];
 689 #else
 690             vsr[0] = env->vsr[i];
 691             vsr[1] = float64_val(env->fpr[i]);
 692 #endif
 693             reg.addr = (uintptr_t) &vsr;
 694             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 695
 696             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 697             if (ret < 0) {
 698                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 699                         i, strerror(errno));
 700                 return ret;
 701             }
 702         }
 703     }
 704
 705     if (env->insns_flags & PPC_ALTIVEC) {
 706         reg.id = KVM_REG_PPC_VSCR;
 707         reg.addr = (uintptr_t)&env->vscr;
 708         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 709         if (ret < 0) {
 710             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 711             return ret;
 712         }
 713
 714         for (i = 0; i < 32; i++) {
 715             reg.id = KVM_REG_PPC_VR(i);
 716             reg.addr = (uintptr_t)&env->avr[i];
 717             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 718             if (ret < 0) {
 719                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 720                 return ret;
 721             }
 722         }
 723     }
 724
 725     return 0;
 726 }
 727
 728 static int kvm_get_fp(CPUState *cs)
 729 {
 730     PowerPCCPU *cpu = POWERPC_CPU(cs);
 731     CPUPPCState *env = &cpu->env;
 732     struct kvm_one_reg reg;
 733     int i;
 734     int ret;
 735
 736     if (env->insns_flags & PPC_FLOAT) {
 737         uint64_t fpscr;
 738         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 739
 740         reg.id = KVM_REG_PPC_FPSCR;
 741         reg.addr = (uintptr_t)&fpscr;
 742         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 743         if (ret < 0) {
 744             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 745             return ret;
 746         } else {
 747             env->fpscr = fpscr;
 748         }
 749
 750         for (i = 0; i < 32; i++) {
 751             uint64_t vsr[2];
 752
 753             reg.addr = (uintptr_t) &vsr;
 754             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 755
 756             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 757             if (ret < 0) {
 758                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 759                         vsx ? "VSR" : "FPR", i, strerror(errno));
 760                 return ret;
 761             } else {
 762 #ifdef HOST_WORDS_BIGENDIAN
 763                 env->fpr[i] = vsr[0];
 764                 if (vsx) {
 765                     env->vsr[i] = vsr[1];
 766                 }
 767 #else
 768                 env->fpr[i] = vsr[1];
 769                 if (vsx) {
 770                     env->vsr[i] = vsr[0];
 771                 }
 772 #endif
 773             }
 774         }
 775     }
 776
 777     if (env->insns_flags & PPC_ALTIVEC) {
 778         reg.id = KVM_REG_PPC_VSCR;
 779         reg.addr = (uintptr_t)&env->vscr;
 780         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 781         if (ret < 0) {
 782             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 783             return ret;
 784         }
 785
 786         for (i = 0; i < 32; i++) {
 787             reg.id = KVM_REG_PPC_VR(i);
 788             reg.addr = (uintptr_t)&env->avr[i];
 789             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 790             if (ret < 0) {
 791                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 792                         i, strerror(errno));
 793                 return ret;
 794             }
 795         }
 796     }
 797
 798     return 0;
 799 }
 800
 801 #if defined(TARGET_PPC64)
 802 static int kvm_get_vpa(CPUState *cs)
 803 {
 804     PowerPCCPU *cpu = POWERPC_CPU(cs);
 805     CPUPPCState *env = &cpu->env;
 806     struct kvm_one_reg reg;
 807     int ret;
 808
 809     reg.id = KVM_REG_PPC_VPA_ADDR;
 810     reg.addr = (uintptr_t)&env->vpa_addr;
 811     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 812     if (ret < 0) {
 813         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 814         return ret;
 815     }
 816
 817     assert((uintptr_t)&env->slb_shadow_size
 818            == ((uintptr_t)&env->slb_shadow_addr + 8));
 819     reg.id = KVM_REG_PPC_VPA_SLB;
 820     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 821     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 822     if (ret < 0) {
 823         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 824                 strerror(errno));
 825         return ret;
 826     }
 827
 828     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 829     reg.id = KVM_REG_PPC_VPA_DTL;
 830     reg.addr = (uintptr_t)&env->dtl_addr;
 831     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 832     if (ret < 0) {
 833         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 834                 strerror(errno));
 835         return ret;
 836     }
 837
 838     return 0;
 839 }
 840
 841 static int kvm_put_vpa(CPUState *cs)
 842 {
 843     PowerPCCPU *cpu = POWERPC_CPU(cs);
 844     CPUPPCState *env = &cpu->env;
 845     struct kvm_one_reg reg;
 846     int ret;
 847
 848     /* SLB shadow or DTL can't be registered unless a master VPA is
 849      * registered.  That means when restoring state, if a VPA *is*
 850      * registered, we need to set that up first.  If not, we need to
 851      * deregister the others before deregistering the master VPA */
 852     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 853
 854     if (env->vpa_addr) {
 855         reg.id = KVM_REG_PPC_VPA_ADDR;
 856         reg.addr = (uintptr_t)&env->vpa_addr;
 857         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 858         if (ret < 0) {
 859             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 860             return ret;
 861         }
 862     }
 863
 864     assert((uintptr_t)&env->slb_shadow_size
 865            == ((uintptr_t)&env->slb_shadow_addr + 8));
 866     reg.id = KVM_REG_PPC_VPA_SLB;
 867     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 868     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 869     if (ret < 0) {
 870         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 871         return ret;
 872     }
 873
 874     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 875     reg.id = KVM_REG_PPC_VPA_DTL;
 876     reg.addr = (uintptr_t)&env->dtl_addr;
 877     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 878     if (ret < 0) {
 879         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 880                 strerror(errno));
 881         return ret;
 882     }
 883
 884     if (!env->vpa_addr) {
 885         reg.id = KVM_REG_PPC_VPA_ADDR;
 886         reg.addr = (uintptr_t)&env->vpa_addr;
 887         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 888         if (ret < 0) {
 889             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 890             return ret;
 891         }
 892     }
 893
 894     return 0;
 895 }
 896 #endif /* TARGET_PPC64 */
 897
 898 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 899 {
 900     CPUPPCState *env = &cpu->env;
 901     struct kvm_sregs sregs;
 902     int i;
 903
 904     sregs.pvr = env->spr[SPR_PVR];
 905
 906     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 907
 908     /* Sync SLB */
 909 #ifdef TARGET_PPC64
 910     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 911         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 912         if (env->slb[i].esid & SLB_ESID_V) {
 913             sregs.u.s.ppc64.slb[i].slbe |= i;
 914         }
 915         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 916     }
 917 #endif
 918
 919     /* Sync SRs */
 920     for (i = 0; i < 16; i++) {
 921         sregs.u.s.ppc32.sr[i] = env->sr[i];
 922     }
 923
 924     /* Sync BATs */
 925     for (i = 0; i < 8; i++) {
 926         /* Beware. We have to swap upper and lower bits here */
 927         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 928             | env->DBAT[1][i];
 929         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 930             | env->IBAT[1][i];
 931     }
 932
 933     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 934 }
 935
 936 int kvm_arch_put_registers(CPUState *cs, int level)
 937 {
 938     PowerPCCPU *cpu = POWERPC_CPU(cs);
 939     CPUPPCState *env = &cpu->env;
 940     struct kvm_regs regs;
 941     int ret;
 942     int i;
 943
 944     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 945     if (ret < 0) {
 946         return ret;
 947     }
 948
 949     regs.ctr = env->ctr;
 950     regs.lr  = env->lr;
 951     regs.xer = cpu_read_xer(env);
 952     regs.msr = env->msr;
 953     regs.pc = env->nip;
 954
 955     regs.srr0 = env->spr[SPR_SRR0];
 956     regs.srr1 = env->spr[SPR_SRR1];
 957
 958     regs.sprg0 = env->spr[SPR_SPRG0];
 959     regs.sprg1 = env->spr[SPR_SPRG1];
 960     regs.sprg2 = env->spr[SPR_SPRG2];
 961     regs.sprg3 = env->spr[SPR_SPRG3];
 962     regs.sprg4 = env->spr[SPR_SPRG4];
 963     regs.sprg5 = env->spr[SPR_SPRG5];
 964     regs.sprg6 = env->spr[SPR_SPRG6];
 965     regs.sprg7 = env->spr[SPR_SPRG7];
 966
 967     regs.pid = env->spr[SPR_BOOKE_PID];
 968
 969     for (i = 0;i < 32; i++)
 970         regs.gpr[i] = env->gpr[i];
 971
 972     regs.cr = 0;
 973     for (i = 0; i < 8; i++) {
 974         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 975     }
 976
 977     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 978     if (ret < 0)
 979         return ret;
 980
 981     kvm_put_fp(cs);
 982
 983     if (env->tlb_dirty) {
 984         kvm_sw_tlb_put(cpu);
 985         env->tlb_dirty = false;
 986     }
 987
 988     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 989         ret = kvmppc_put_books_sregs(cpu);
 990         if (ret < 0) {
 991             return ret;
 992         }
 993     }
 994
 995     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 996         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 997     }
 998
 999     if (cap_one_reg) {
1000         int i;
1001
1002         /* We deliberately ignore errors here, for kernels which have
1003          * the ONE_REG calls, but don't support the specific
1004          * registers, there's a reasonable chance things will still
1005          * work, at least until we try to migrate. */
1006         for (i = 0; i < 1024; i++) {
1007             uint64_t id = env->spr_cb[i].one_reg_id;
1008
1009             if (id != 0) {
1010                 kvm_put_one_spr(cs, id, i);
1011             }
1012         }
1013
1014 #ifdef TARGET_PPC64
1015         if (msr_ts) {
1016             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1017                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1018             }
1019             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1020                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1021             }
1022             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1023             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1024             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1025             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1026             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1027             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1028             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1029             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1030             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1031             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1032         }
1033
1034         if (cap_papr) {
1035             if (kvm_put_vpa(cs) < 0) {
1036                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1037             }
1038         }
1039
1040         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1041 #endif /* TARGET_PPC64 */
1042     }
1043
1044     return ret;
1045 }
1046
1047 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1048 {
1049      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1050 }
1051
1052 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1053 {
1054     CPUPPCState *env = &cpu->env;
1055     struct kvm_sregs sregs;
1056     int ret;
1057
1058     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1059     if (ret < 0) {
1060         return ret;
1061     }
1062
1063     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1064         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1065         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1066         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1067         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1068         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1069         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1070         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1071         env->spr[SPR_DECR] = sregs.u.e.dec;
1072         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1073         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1074         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1075     }
1076
1077     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1078         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1079         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1080         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1081         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1082         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1083     }
1084
1085     if (sregs.u.e.features & KVM_SREGS_E_64) {
1086         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1087     }
1088
1089     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1090         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1091     }
1092
1093     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1094         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1095         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1096         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1097         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1098         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1099         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1100         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1101         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1102         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1103         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1104         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1105         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1106         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1107         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1108         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1109         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1110         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1111         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1112         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1113         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1114         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1115         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1116         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1117         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1118         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1119         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1120         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1121         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1122         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1123         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1124         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1125         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1126
1127         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1128             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1129             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1130             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1131             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1132             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1133             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1134         }
1135
1136         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1137             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1138             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1139         }
1140
1141         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1142             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1143             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1144             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1145             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1146         }
1147     }
1148
1149     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1150         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1151         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1152         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1153         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1154         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1155         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1156         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1157         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1158         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1159         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1160     }
1161
1162     if (sregs.u.e.features & KVM_SREGS_EXP) {
1163         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1164     }
1165
1166     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1167         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1168         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1169     }
1170
1171     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1172         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1173         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1174         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1175
1176         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1177             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1178             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1179         }
1180     }
1181
1182     return 0;
1183 }
1184
1185 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1186 {
1187     CPUPPCState *env = &cpu->env;
1188     struct kvm_sregs sregs;
1189     int ret;
1190     int i;
1191
1192     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1193     if (ret < 0) {
1194         return ret;
1195     }
1196
1197     if (!env->external_htab) {
1198         ppc_store_sdr1(env, sregs.u.s.sdr1);
1199     }
1200
1201     /* Sync SLB */
1202 #ifdef TARGET_PPC64
1203     /*
1204      * The packed SLB array we get from KVM_GET_SREGS only contains
1205      * information about valid entries. So we flush our internal copy
1206      * to get rid of stale ones, then put all valid SLB entries back
1207      * in.
1208      */
1209     memset(env->slb, 0, sizeof(env->slb));
1210     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1211         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1212         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1213         /*
1214          * Only restore valid entries
1215          */
1216         if (rb & SLB_ESID_V) {
1217             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1218         }
1219     }
1220 #endif
1221
1222     /* Sync SRs */
1223     for (i = 0; i < 16; i++) {
1224         env->sr[i] = sregs.u.s.ppc32.sr[i];
1225     }
1226
1227     /* Sync BATs */
1228     for (i = 0; i < 8; i++) {
1229         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1230         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1231         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1232         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1233     }
1234
1235     return 0;
1236 }
1237
1238 int kvm_arch_get_registers(CPUState *cs)
1239 {
1240     PowerPCCPU *cpu = POWERPC_CPU(cs);
1241     CPUPPCState *env = &cpu->env;
1242     struct kvm_regs regs;
1243     uint32_t cr;
1244     int i, ret;
1245
1246     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1247     if (ret < 0)
1248         return ret;
1249
1250     cr = regs.cr;
1251     for (i = 7; i >= 0; i--) {
1252         env->crf[i] = cr & 15;
1253         cr >>= 4;
1254     }
1255
1256     env->ctr = regs.ctr;
1257     env->lr = regs.lr;
1258     cpu_write_xer(env, regs.xer);
1259     env->msr = regs.msr;
1260     env->nip = regs.pc;
1261
1262     env->spr[SPR_SRR0] = regs.srr0;
1263     env->spr[SPR_SRR1] = regs.srr1;
1264
1265     env->spr[SPR_SPRG0] = regs.sprg0;
1266     env->spr[SPR_SPRG1] = regs.sprg1;
1267     env->spr[SPR_SPRG2] = regs.sprg2;
1268     env->spr[SPR_SPRG3] = regs.sprg3;
1269     env->spr[SPR_SPRG4] = regs.sprg4;
1270     env->spr[SPR_SPRG5] = regs.sprg5;
1271     env->spr[SPR_SPRG6] = regs.sprg6;
1272     env->spr[SPR_SPRG7] = regs.sprg7;
1273
1274     env->spr[SPR_BOOKE_PID] = regs.pid;
1275
1276     for (i = 0;i < 32; i++)
1277         env->gpr[i] = regs.gpr[i];
1278
1279     kvm_get_fp(cs);
1280
1281     if (cap_booke_sregs) {
1282         ret = kvmppc_get_booke_sregs(cpu);
1283         if (ret < 0) {
1284             return ret;
1285         }
1286     }
1287
1288     if (cap_segstate) {
1289         ret = kvmppc_get_books_sregs(cpu);
1290         if (ret < 0) {
1291             return ret;
1292         }
1293     }
1294
1295     if (cap_hior) {
1296         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1297     }
1298
1299     if (cap_one_reg) {
1300         int i;
1301
1302         /* We deliberately ignore errors here, for kernels which have
1303          * the ONE_REG calls, but don't support the specific
1304          * registers, there's a reasonable chance things will still
1305          * work, at least until we try to migrate. */
1306         for (i = 0; i < 1024; i++) {
1307             uint64_t id = env->spr_cb[i].one_reg_id;
1308
1309             if (id != 0) {
1310                 kvm_get_one_spr(cs, id, i);
1311             }
1312         }
1313
1314 #ifdef TARGET_PPC64
1315         if (msr_ts) {
1316             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1317                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1318             }
1319             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1320                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1321             }
1322             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1323             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1324             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1325             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1326             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1327             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1328             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1329             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1330             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1331             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1332         }
1333
1334         if (cap_papr) {
1335             if (kvm_get_vpa(cs) < 0) {
1336                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1337             }
1338         }
1339
1340         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1341 #endif
1342     }
1343
1344     return 0;
1345 }
1346
1347 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1348 {
1349     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1350
1351     if (irq != PPC_INTERRUPT_EXT) {
1352         return 0;
1353     }
1354
1355     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1356         return 0;
1357     }
1358
1359     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1360
1361     return 0;
1362 }
1363
1364 #if defined(TARGET_PPCEMB)
1365 #define PPC_INPUT_INT PPC40x_INPUT_INT
1366 #elif defined(TARGET_PPC64)
1367 #define PPC_INPUT_INT PPC970_INPUT_INT
1368 #else
1369 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1370 #endif
1371
1372 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1373 {
1374     PowerPCCPU *cpu = POWERPC_CPU(cs);
1375     CPUPPCState *env = &cpu->env;
1376     int r;
1377     unsigned irq;
1378
1379     qemu_mutex_lock_iothread();
1380
1381     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1382      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1383     if (!cap_interrupt_level &&
1384         run->ready_for_interrupt_injection &&
1385         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1386         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1387     {
1388         /* For now KVM disregards the 'irq' argument. However, in the
1389          * future KVM could cache it in-kernel to avoid a heavyweight exit
1390          * when reading the UIC.
1391          */
1392         irq = KVM_INTERRUPT_SET;
1393
1394         DPRINTF("injected interrupt %d\n", irq);
1395         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1396         if (r < 0) {
1397             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1398         }
1399
1400         /* Always wake up soon in case the interrupt was level based */
1401         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1402                        (NANOSECONDS_PER_SECOND / 50));
1403     }
1404
1405     /* We don't know if there are more interrupts pending after this. However,
1406      * the guest will return to userspace in the course of handling this one
1407      * anyways, so we will get a chance to deliver the rest. */
1408
1409     qemu_mutex_unlock_iothread();
1410 }
1411
1412 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1413 {
1414     return MEMTXATTRS_UNSPECIFIED;
1415 }
1416
1417 int kvm_arch_process_async_events(CPUState *cs)
1418 {
1419     return cs->halted;
1420 }
1421
1422 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1423 {
1424     CPUState *cs = CPU(cpu);
1425     CPUPPCState *env = &cpu->env;
1426
1427     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1428         cs->halted = 1;
1429         cs->exception_index = EXCP_HLT;
1430     }
1431
1432     return 0;
1433 }
1434
1435 /* map dcr access to existing qemu dcr emulation */
1436 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1437 {
1438     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1439         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1440
1441     return 0;
1442 }
1443
1444 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1445 {
1446     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1447         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1448
1449     return 0;
1450 }
1451
1452 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1453 {
1454     /* Mixed endian case is not handled */
1455     uint32_t sc = debug_inst_opcode;
1456
1457     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1458                             sizeof(sc), 0) ||
1459         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1460         return -EINVAL;
1461     }
1462
1463     return 0;
1464 }
1465
1466 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1467 {
1468     uint32_t sc;
1469
1470     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1471         sc != debug_inst_opcode ||
1472         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1473                             sizeof(sc), 1)) {
1474         return -EINVAL;
1475     }
1476
1477     return 0;
1478 }
1479
1480 static int find_hw_breakpoint(target_ulong addr, int type)
1481 {
1482     int n;
1483
1484     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1485            <= ARRAY_SIZE(hw_debug_points));
1486
1487     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1488         if (hw_debug_points[n].addr == addr &&
1489              hw_debug_points[n].type == type) {
1490             return n;
1491         }
1492     }
1493
1494     return -1;
1495 }
1496
1497 static int find_hw_watchpoint(target_ulong addr, int *flag)
1498 {
1499     int n;
1500
1501     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1502     if (n >= 0) {
1503         *flag = BP_MEM_ACCESS;
1504         return n;
1505     }
1506
1507     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1508     if (n >= 0) {
1509         *flag = BP_MEM_WRITE;
1510         return n;
1511     }
1512
1513     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1514     if (n >= 0) {
1515         *flag = BP_MEM_READ;
1516         return n;
1517     }
1518
1519     return -1;
1520 }
1521
1522 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1523                                   target_ulong len, int type)
1524 {
1525     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1526         return -ENOBUFS;
1527     }
1528
1529     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1530     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1531
1532     switch (type) {
1533     case GDB_BREAKPOINT_HW:
1534         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1535             return -ENOBUFS;
1536         }
1537
1538         if (find_hw_breakpoint(addr, type) >= 0) {
1539             return -EEXIST;
1540         }
1541
1542         nb_hw_breakpoint++;
1543         break;
1544
1545     case GDB_WATCHPOINT_WRITE:
1546     case GDB_WATCHPOINT_READ:
1547     case GDB_WATCHPOINT_ACCESS:
1548         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1549             return -ENOBUFS;
1550         }
1551
1552         if (find_hw_breakpoint(addr, type) >= 0) {
1553             return -EEXIST;
1554         }
1555
1556         nb_hw_watchpoint++;
1557         break;
1558
1559     default:
1560         return -ENOSYS;
1561     }
1562
1563     return 0;
1564 }
1565
1566 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1567                                   target_ulong len, int type)
1568 {
1569     int n;
1570
1571     n = find_hw_breakpoint(addr, type);
1572     if (n < 0) {
1573         return -ENOENT;
1574     }
1575
1576     switch (type) {
1577     case GDB_BREAKPOINT_HW:
1578         nb_hw_breakpoint--;
1579         break;
1580
1581     case GDB_WATCHPOINT_WRITE:
1582     case GDB_WATCHPOINT_READ:
1583     case GDB_WATCHPOINT_ACCESS:
1584         nb_hw_watchpoint--;
1585         break;
1586
1587     default:
1588         return -ENOSYS;
1589     }
1590     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1591
1592     return 0;
1593 }
1594
1595 void kvm_arch_remove_all_hw_breakpoints(void)
1596 {
1597     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1598 }
1599
1600 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1601 {
1602     int n;
1603
1604     /* Software Breakpoint updates */
1605     if (kvm_sw_breakpoints_active(cs)) {
1606         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1607     }
1608
1609     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1610            <= ARRAY_SIZE(hw_debug_points));
1611     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1612
1613     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1614         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1615         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1616         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1617             switch (hw_debug_points[n].type) {
1618             case GDB_BREAKPOINT_HW:
1619                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1620                 break;
1621             case GDB_WATCHPOINT_WRITE:
1622                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1623                 break;
1624             case GDB_WATCHPOINT_READ:
1625                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1626                 break;
1627             case GDB_WATCHPOINT_ACCESS:
1628                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1629                                         KVMPPC_DEBUG_WATCH_READ;
1630                 break;
1631             default:
1632                 cpu_abort(cs, "Unsupported breakpoint type\n");
1633             }
1634             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1635         }
1636     }
1637 }
1638
1639 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1640 {
1641     CPUState *cs = CPU(cpu);
1642     CPUPPCState *env = &cpu->env;
1643     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1644     int handle = 0;
1645     int n;
1646     int flag = 0;
1647
1648     if (cs->singlestep_enabled) {
1649         handle = 1;
1650     } else if (arch_info->status) {
1651         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1652             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1653                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1654                 if (n >= 0) {
1655                     handle = 1;
1656                 }
1657             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1658                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1659                 n = find_hw_watchpoint(arch_info->address,  &flag);
1660                 if (n >= 0) {
1661                     handle = 1;
1662                     cs->watchpoint_hit = &hw_watchpoint;
1663                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1664                     hw_watchpoint.flags = flag;
1665                 }
1666             }
1667         }
1668     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1669         handle = 1;
1670     } else {
1671         /* QEMU is not able to handle debug exception, so inject
1672          * program exception to guest;
1673          * Yes program exception NOT debug exception !!
1674          * When QEMU is using debug resources then debug exception must
1675          * be always set. To achieve this we set MSR_DE and also set
1676          * MSRP_DEP so guest cannot change MSR_DE.
1677          * When emulating debug resource for guest we want guest
1678          * to control MSR_DE (enable/disable debug interrupt on need).
1679          * Supporting both configurations are NOT possible.
1680          * So the result is that we cannot share debug resources
1681          * between QEMU and Guest on BOOKE architecture.
1682          * In the current design QEMU gets the priority over guest,
1683          * this means that if QEMU is using debug resources then guest
1684          * cannot use them;
1685          * For software breakpoint QEMU uses a privileged instruction;
1686          * So there cannot be any reason that we are here for guest
1687          * set debug exception, only possibility is guest executed a
1688          * privileged / illegal instruction and that's why we are
1689          * injecting a program interrupt.
1690          */
1691
1692         cpu_synchronize_state(cs);
1693         /* env->nip is PC, so increment this by 4 to use
1694          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1695          */
1696         env->nip += 4;
1697         cs->exception_index = POWERPC_EXCP_PROGRAM;
1698         env->error_code = POWERPC_EXCP_INVAL;
1699         ppc_cpu_do_interrupt(cs);
1700     }
1701
1702     return handle;
1703 }
1704
1705 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1706 {
1707     PowerPCCPU *cpu = POWERPC_CPU(cs);
1708     CPUPPCState *env = &cpu->env;
1709     int ret;
1710
1711     qemu_mutex_lock_iothread();
1712
1713     switch (run->exit_reason) {
1714     case KVM_EXIT_DCR:
1715         if (run->dcr.is_write) {
1716             DPRINTF("handle dcr write\n");
1717             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1718         } else {
1719             DPRINTF("handle dcr read\n");
1720             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1721         }
1722         break;
1723     case KVM_EXIT_HLT:
1724         DPRINTF("handle halt\n");
1725         ret = kvmppc_handle_halt(cpu);
1726         break;
1727 #if defined(TARGET_PPC64)
1728     case KVM_EXIT_PAPR_HCALL:
1729         DPRINTF("handle PAPR hypercall\n");
1730         run->papr_hcall.ret = spapr_hypercall(cpu,
1731                                               run->papr_hcall.nr,
1732                                               run->papr_hcall.args);
1733         ret = 0;
1734         break;
1735 #endif
1736     case KVM_EXIT_EPR:
1737         DPRINTF("handle epr\n");
1738         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1739         ret = 0;
1740         break;
1741     case KVM_EXIT_WATCHDOG:
1742         DPRINTF("handle watchdog expiry\n");
1743         watchdog_perform_action();
1744         ret = 0;
1745         break;
1746
1747     case KVM_EXIT_DEBUG:
1748         DPRINTF("handle debug exception\n");
1749         if (kvm_handle_debug(cpu, run)) {
1750             ret = EXCP_DEBUG;
1751             break;
1752         }
1753         /* re-enter, this exception was guest-internal */
1754         ret = 0;
1755         break;
1756
1757     default:
1758         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1759         ret = -1;
1760         break;
1761     }
1762
1763     qemu_mutex_unlock_iothread();
1764     return ret;
1765 }
1766
1767 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1768 {
1769     CPUState *cs = CPU(cpu);
1770     uint32_t bits = tsr_bits;
1771     struct kvm_one_reg reg = {
1772         .id = KVM_REG_PPC_OR_TSR,
1773         .addr = (uintptr_t) &bits,
1774     };
1775
1776     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1777 }
1778
1779 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1780 {
1781
1782     CPUState *cs = CPU(cpu);
1783     uint32_t bits = tsr_bits;
1784     struct kvm_one_reg reg = {
1785         .id = KVM_REG_PPC_CLEAR_TSR,
1786         .addr = (uintptr_t) &bits,
1787     };
1788
1789     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1790 }
1791
1792 int kvmppc_set_tcr(PowerPCCPU *cpu)
1793 {
1794     CPUState *cs = CPU(cpu);
1795     CPUPPCState *env = &cpu->env;
1796     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1797
1798     struct kvm_one_reg reg = {
1799         .id = KVM_REG_PPC_TCR,
1800         .addr = (uintptr_t) &tcr,
1801     };
1802
1803     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1804 }
1805
1806 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1807 {
1808     CPUState *cs = CPU(cpu);
1809     int ret;
1810
1811     if (!kvm_enabled()) {
1812         return -1;
1813     }
1814
1815     if (!cap_ppc_watchdog) {
1816         printf("warning: KVM does not support watchdog");
1817         return -1;
1818     }
1819
1820     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1821     if (ret < 0) {
1822         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1823                 __func__, strerror(-ret));
1824         return ret;
1825     }
1826
1827     return ret;
1828 }
1829
1830 static int read_cpuinfo(const char *field, char *value, int len)
1831 {
1832     FILE *f;
1833     int ret = -1;
1834     int field_len = strlen(field);
1835     char line[512];
1836
1837     f = fopen("/proc/cpuinfo", "r");
1838     if (!f) {
1839         return -1;
1840     }
1841
1842     do {
1843         if (!fgets(line, sizeof(line), f)) {
1844             break;
1845         }
1846         if (!strncmp(line, field, field_len)) {
1847             pstrcpy(value, len, line);
1848             ret = 0;
1849             break;
1850         }
1851     } while(*line);
1852
1853     fclose(f);
1854
1855     return ret;
1856 }
1857
1858 uint32_t kvmppc_get_tbfreq(void)
1859 {
1860     char line[512];
1861     char *ns;
1862     uint32_t retval = NANOSECONDS_PER_SECOND;
1863
1864     if (read_cpuinfo("timebase", line, sizeof(line))) {
1865         return retval;
1866     }
1867
1868     if (!(ns = strchr(line, ':'))) {
1869         return retval;
1870     }
1871
1872     ns++;
1873
1874     return atoi(ns);
1875 }
1876
1877 bool kvmppc_get_host_serial(char **value)
1878 {
1879     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1880                                NULL);
1881 }
1882
1883 bool kvmppc_get_host_model(char **value)
1884 {
1885     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1886 }
1887
1888 /* Try to find a device tree node for a CPU with clock-frequency property */
1889 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1890 {
1891     struct dirent *dirp;
1892     DIR *dp;
1893
1894     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1895         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1896         return -1;
1897     }
1898
1899     buf[0] = '\0';
1900     while ((dirp = readdir(dp)) != NULL) {
1901         FILE *f;
1902         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1903                  dirp->d_name);
1904         f = fopen(buf, "r");
1905         if (f) {
1906             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1907             fclose(f);
1908             break;
1909         }
1910         buf[0] = '\0';
1911     }
1912     closedir(dp);
1913     if (buf[0] == '\0') {
1914         printf("Unknown host!\n");
1915         return -1;
1916     }
1917
1918     return 0;
1919 }
1920
1921 static uint64_t kvmppc_read_int_dt(const char *filename)
1922 {
1923     union {
1924         uint32_t v32;
1925         uint64_t v64;
1926     } u;
1927     FILE *f;
1928     int len;
1929
1930     f = fopen(filename, "rb");
1931     if (!f) {
1932         return -1;
1933     }
1934
1935     len = fread(&u, 1, sizeof(u), f);
1936     fclose(f);
1937     switch (len) {
1938     case 4:
1939         /* property is a 32-bit quantity */
1940         return be32_to_cpu(u.v32);
1941     case 8:
1942         return be64_to_cpu(u.v64);
1943     }
1944
1945     return 0;
1946 }
1947
1948 /* Read a CPU node property from the host device tree that's a single
1949  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1950  * (can't find or open the property, or doesn't understand the
1951  * format) */
1952 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1953 {
1954     char buf[PATH_MAX], *tmp;
1955     uint64_t val;
1956
1957     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1958         return -1;
1959     }
1960
1961     tmp = g_strdup_printf("%s/%s", buf, propname);
1962     val = kvmppc_read_int_dt(tmp);
1963     g_free(tmp);
1964
1965     return val;
1966 }
1967
1968 uint64_t kvmppc_get_clockfreq(void)
1969 {
1970     return kvmppc_read_int_cpu_dt("clock-frequency");
1971 }
1972
1973 uint32_t kvmppc_get_vmx(void)
1974 {
1975     return kvmppc_read_int_cpu_dt("ibm,vmx");
1976 }
1977
1978 uint32_t kvmppc_get_dfp(void)
1979 {
1980     return kvmppc_read_int_cpu_dt("ibm,dfp");
1981 }
1982
1983 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1984  {
1985      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1986      CPUState *cs = CPU(cpu);
1987
1988     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1989         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1990         return 0;
1991     }
1992
1993     return 1;
1994 }
1995
1996 int kvmppc_get_hasidle(CPUPPCState *env)
1997 {
1998     struct kvm_ppc_pvinfo pvinfo;
1999
2000     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2001         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2002         return 1;
2003     }
2004
2005     return 0;
2006 }
2007
2008 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2009 {
2010     uint32_t *hc = (uint32_t*)buf;
2011     struct kvm_ppc_pvinfo pvinfo;
2012
2013     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2014         memcpy(buf, pvinfo.hcall, buf_len);
2015         return 0;
2016     }
2017
2018     /*
2019      * Fallback to always fail hypercalls regardless of endianness:
2020      *
2021      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2022      *     li r3, -1
2023      *     b .+8       (becomes nop in wrong endian)
2024      *     bswap32(li r3, -1)
2025      */
2026
2027     hc[0] = cpu_to_be32(0x08000048);
2028     hc[1] = cpu_to_be32(0x3860ffff);
2029     hc[2] = cpu_to_be32(0x48000008);
2030     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2031
2032     return 1;
2033 }
2034
2035 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2036 {
2037     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2038 }
2039
2040 void kvmppc_enable_logical_ci_hcalls(void)
2041 {
2042     /*
2043      * FIXME: it would be nice if we could detect the cases where
2044      * we're using a device which requires the in kernel
2045      * implementation of these hcalls, but the kernel lacks them and
2046      * produce a warning.
2047      */
2048     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2049     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2050 }
2051
2052 void kvmppc_enable_set_mode_hcall(void)
2053 {
2054     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2055 }
2056
2057 void kvmppc_set_papr(PowerPCCPU *cpu)
2058 {
2059     CPUState *cs = CPU(cpu);
2060     int ret;
2061
2062     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2063     if (ret) {
2064         error_report("This vCPU type or KVM version does not support PAPR");
2065         exit(1);
2066     }
2067
2068     /* Update the capability flag so we sync the right information
2069      * with kvm */
2070     cap_papr = 1;
2071 }
2072
2073 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2074 {
2075     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2076 }
2077
2078 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2079 {
2080     CPUState *cs = CPU(cpu);
2081     int ret;
2082
2083     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2084     if (ret && mpic_proxy) {
2085         error_report("This KVM version does not support EPR");
2086         exit(1);
2087     }
2088 }
2089
2090 int kvmppc_smt_threads(void)
2091 {
2092     return cap_ppc_smt ? cap_ppc_smt : 1;
2093 }
2094
2095 #ifdef TARGET_PPC64
2096 off_t kvmppc_alloc_rma(void **rma)
2097 {
2098     off_t size;
2099     int fd;
2100     struct kvm_allocate_rma ret;
2101
2102     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2103      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2104      *                      not necessary on this hardware
2105      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2106      *
2107      * FIXME: We should allow the user to force contiguous RMA
2108      * allocation in the cap_ppc_rma==1 case.
2109      */
2110     if (cap_ppc_rma < 2) {
2111         return 0;
2112     }
2113
2114     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2115     if (fd < 0) {
2116         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2117                 strerror(errno));
2118         return -1;
2119     }
2120
2121     size = MIN(ret.rma_size, 256ul << 20);
2122
2123     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2124     if (*rma == MAP_FAILED) {
2125         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2126         return -1;
2127     };
2128
2129     return size;
2130 }
2131
2132 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2133 {
2134     struct kvm_ppc_smmu_info info;
2135     long rampagesize, best_page_shift;
2136     int i;
2137
2138     if (cap_ppc_rma >= 2) {
2139         return current_size;
2140     }
2141
2142     /* Find the largest hardware supported page size that's less than
2143      * or equal to the (logical) backing page size of guest RAM */
2144     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2145     rampagesize = getrampagesize();
2146     best_page_shift = 0;
2147
2148     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2149         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2150
2151         if (!sps->page_shift) {
2152             continue;
2153         }
2154
2155         if ((sps->page_shift > best_page_shift)
2156             && ((1UL << sps->page_shift) <= rampagesize)) {
2157             best_page_shift = sps->page_shift;
2158         }
2159     }
2160
2161     return MIN(current_size,
2162                1ULL << (best_page_shift + hash_shift - 7));
2163 }
2164 #endif
2165
2166 bool kvmppc_spapr_use_multitce(void)
2167 {
2168     return cap_spapr_multitce;
2169 }
2170
2171 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2172                               bool need_vfio)
2173 {
2174     struct kvm_create_spapr_tce args = {
2175         .liobn = liobn,
2176         .window_size = window_size,
2177     };
2178     long len;
2179     int fd;
2180     void *table;
2181
2182     /* Must set fd to -1 so we don't try to munmap when called for
2183      * destroying the table, which the upper layers -will- do
2184      */
2185     *pfd = -1;
2186     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2187         return NULL;
2188     }
2189
2190     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2191     if (fd < 0) {
2192         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2193                 liobn);
2194         return NULL;
2195     }
2196
2197     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2198     /* FIXME: round this up to page size */
2199
2200     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2201     if (table == MAP_FAILED) {
2202         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2203                 liobn);
2204         close(fd);
2205         return NULL;
2206     }
2207
2208     *pfd = fd;
2209     return table;
2210 }
2211
2212 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2213 {
2214     long len;
2215
2216     if (fd < 0) {
2217         return -1;
2218     }
2219
2220     len = nb_table * sizeof(uint64_t);
2221     if ((munmap(table, len) < 0) ||
2222         (close(fd) < 0)) {
2223         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2224                 strerror(errno));
2225         /* Leak the table */
2226     }
2227
2228     return 0;
2229 }
2230
2231 int kvmppc_reset_htab(int shift_hint)
2232 {
2233     uint32_t shift = shift_hint;
2234
2235     if (!kvm_enabled()) {
2236         /* Full emulation, tell caller to allocate htab itself */
2237         return 0;
2238     }
2239     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2240         int ret;
2241         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2242         if (ret == -ENOTTY) {
2243             /* At least some versions of PR KVM advertise the
2244              * capability, but don't implement the ioctl().  Oops.
2245              * Return 0 so that we allocate the htab in qemu, as is
2246              * correct for PR. */
2247             return 0;
2248         } else if (ret < 0) {
2249             return ret;
2250         }
2251         return shift;
2252     }
2253
2254     /* We have a kernel that predates the htab reset calls.  For PR
2255      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2256      * this era, it has allocated a 16MB fixed size hash table
2257      * already.  Kernels of this era have the GET_PVINFO capability
2258      * only on PR, so we use this hack to determine the right
2259      * answer */
2260     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2261         /* PR - tell caller to allocate htab */
2262         return 0;
2263     } else {
2264         /* HV - assume 16MB kernel allocated htab */
2265         return 24;
2266     }
2267 }
2268
2269 static inline uint32_t mfpvr(void)
2270 {
2271     uint32_t pvr;
2272
2273     asm ("mfpvr %0"
2274          : "=r"(pvr));
2275     return pvr;
2276 }
2277
2278 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2279 {
2280     if (on) {
2281         *word |= flags;
2282     } else {
2283         *word &= ~flags;
2284     }
2285 }
2286
2287 static void kvmppc_host_cpu_initfn(Object *obj)
2288 {
2289     assert(kvm_enabled());
2290 }
2291
2292 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2293 {
2294     DeviceClass *dc = DEVICE_CLASS(oc);
2295     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2296     uint32_t vmx = kvmppc_get_vmx();
2297     uint32_t dfp = kvmppc_get_dfp();
2298     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2299     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2300
2301     /* Now fix up the class with information we can query from the host */
2302     pcc->pvr = mfpvr();
2303
2304     if (vmx != -1) {
2305         /* Only override when we know what the host supports */
2306         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2307         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2308     }
2309     if (dfp != -1) {
2310         /* Only override when we know what the host supports */
2311         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2312     }
2313
2314     if (dcache_size != -1) {
2315         pcc->l1_dcache_size = dcache_size;
2316     }
2317
2318     if (icache_size != -1) {
2319         pcc->l1_icache_size = icache_size;
2320     }
2321
2322     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2323     dc->cannot_destroy_with_object_finalize_yet = true;
2324 }
2325
2326 bool kvmppc_has_cap_epr(void)
2327 {
2328     return cap_epr;
2329 }
2330
2331 bool kvmppc_has_cap_htab_fd(void)
2332 {
2333     return cap_htab_fd;
2334 }
2335
2336 bool kvmppc_has_cap_fixup_hcalls(void)
2337 {
2338     return cap_fixup_hcalls;
2339 }
2340
2341 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2342 {
2343     ObjectClass *oc = OBJECT_CLASS(pcc);
2344
2345     while (oc && !object_class_is_abstract(oc)) {
2346         oc = object_class_get_parent(oc);
2347     }
2348     assert(oc);
2349
2350     return POWERPC_CPU_CLASS(oc);
2351 }
2352
2353 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2354 {
2355     uint32_t host_pvr = mfpvr();
2356     PowerPCCPUClass *pvr_pcc;
2357
2358     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2359     if (pvr_pcc == NULL) {
2360         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2361     }
2362
2363     return pvr_pcc;
2364 }
2365
2366 #if defined(TARGET_PPC64)
2367 static void spapr_cpu_core_host_initfn(Object *obj)
2368 {
2369     sPAPRCPUCore *core = SPAPR_CPU_CORE(obj);
2370     char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, "host");
2371     ObjectClass *oc = object_class_by_name(name);
2372
2373     g_assert(oc);
2374     g_free((void *)name);
2375     core->cpu_class = oc;
2376 }
2377 #endif
2378
2379 static int kvm_ppc_register_host_cpu_type(void)
2380 {
2381     TypeInfo type_info = {
2382         .name = TYPE_HOST_POWERPC_CPU,
2383         .instance_init = kvmppc_host_cpu_initfn,
2384         .class_init = kvmppc_host_cpu_class_init,
2385     };
2386     PowerPCCPUClass *pvr_pcc;
2387     DeviceClass *dc;
2388
2389     pvr_pcc = kvm_ppc_get_host_cpu_class();
2390     if (pvr_pcc == NULL) {
2391         return -1;
2392     }
2393     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2394     type_register(&type_info);
2395
2396 #if defined(TARGET_PPC64)
2397     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2398     type_info.parent = TYPE_SPAPR_CPU_CORE,
2399     type_info.instance_size = sizeof(sPAPRCPUCore),
2400     type_info.instance_init = spapr_cpu_core_host_initfn,
2401     type_info.class_init = NULL;
2402     type_register(&type_info);
2403     g_free((void *)type_info.name);
2404     type_info.instance_size = 0;
2405     type_info.instance_init = NULL;
2406 #endif
2407
2408     /* Register generic family CPU class for a family */
2409     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2410     dc = DEVICE_CLASS(pvr_pcc);
2411     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2412     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2413     type_register(&type_info);
2414
2415     return 0;
2416 }
2417
2418 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2419 {
2420     struct kvm_rtas_token_args args = {
2421         .token = token,
2422     };
2423
2424     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2425         return -ENOENT;
2426     }
2427
2428     strncpy(args.name, function, sizeof(args.name));
2429
2430     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2431 }
2432
2433 int kvmppc_get_htab_fd(bool write)
2434 {
2435     struct kvm_get_htab_fd s = {
2436         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2437         .start_index = 0,
2438     };
2439
2440     if (!cap_htab_fd) {
2441         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2442         return -1;
2443     }
2444
2445     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2446 }
2447
2448 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2449 {
2450     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2451     uint8_t buf[bufsize];
2452     ssize_t rc;
2453
2454     do {
2455         rc = read(fd, buf, bufsize);
2456         if (rc < 0) {
2457             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2458                     strerror(errno));
2459             return rc;
2460         } else if (rc) {
2461             uint8_t *buffer = buf;
2462             ssize_t n = rc;
2463             while (n) {
2464                 struct kvm_get_htab_header *head =
2465                     (struct kvm_get_htab_header *) buffer;
2466                 size_t chunksize = sizeof(*head) +
2467                      HASH_PTE_SIZE_64 * head->n_valid;
2468
2469                 qemu_put_be32(f, head->index);
2470                 qemu_put_be16(f, head->n_valid);
2471                 qemu_put_be16(f, head->n_invalid);
2472                 qemu_put_buffer(f, (void *)(head + 1),
2473                                 HASH_PTE_SIZE_64 * head->n_valid);
2474
2475                 buffer += chunksize;
2476                 n -= chunksize;
2477             }
2478         }
2479     } while ((rc != 0)
2480              && ((max_ns < 0)
2481                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2482
2483     return (rc == 0) ? 1 : 0;
2484 }
2485
2486 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2487                            uint16_t n_valid, uint16_t n_invalid)
2488 {
2489     struct kvm_get_htab_header *buf;
2490     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2491     ssize_t rc;
2492
2493     buf = alloca(chunksize);
2494     buf->index = index;
2495     buf->n_valid = n_valid;
2496     buf->n_invalid = n_invalid;
2497
2498     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2499
2500     rc = write(fd, buf, chunksize);
2501     if (rc < 0) {
2502         fprintf(stderr, "Error writing KVM hash table: %s\n",
2503                 strerror(errno));
2504         return rc;
2505     }
2506     if (rc != chunksize) {
2507         /* We should never get a short write on a single chunk */
2508         fprintf(stderr, "Short write, restoring KVM hash table\n");
2509         return -1;
2510     }
2511     return 0;
2512 }
2513
2514 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2515 {
2516     return true;
2517 }
2518
2519 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2520 {
2521     return 1;
2522 }
2523
2524 int kvm_arch_on_sigbus(int code, void *addr)
2525 {
2526     return 1;
2527 }
2528
2529 void kvm_arch_init_irq_routing(KVMState *s)
2530 {
2531 }
2532
2533 struct kvm_get_htab_buf {
2534     struct kvm_get_htab_header header;
2535     /*
2536      * We require one extra byte for read
2537      */
2538     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2539 };
2540
2541 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2542 {
2543     int htab_fd;
2544     struct kvm_get_htab_fd ghf;
2545     struct kvm_get_htab_buf  *hpte_buf;
2546
2547     ghf.flags = 0;
2548     ghf.start_index = pte_index;
2549     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2550     if (htab_fd < 0) {
2551         goto error_out;
2552     }
2553
2554     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2555     /*
2556      * Read the hpte group
2557      */
2558     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2559         goto out_close;
2560     }
2561
2562     close(htab_fd);
2563     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2564
2565 out_close:
2566     g_free(hpte_buf);
2567     close(htab_fd);
2568 error_out:
2569     return 0;
2570 }
2571
2572 void kvmppc_hash64_free_pteg(uint64_t token)
2573 {
2574     struct kvm_get_htab_buf *htab_buf;
2575
2576     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2577                             hpte);
2578     g_free(htab_buf);
2579     return;
2580 }
2581
2582 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2583                              target_ulong pte0, target_ulong pte1)
2584 {
2585     int htab_fd;
2586     struct kvm_get_htab_fd ghf;
2587     struct kvm_get_htab_buf hpte_buf;
2588
2589     ghf.flags = 0;
2590     ghf.start_index = 0;     /* Ignored */
2591     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2592     if (htab_fd < 0) {
2593         goto error_out;
2594     }
2595
2596     hpte_buf.header.n_valid = 1;
2597     hpte_buf.header.n_invalid = 0;
2598     hpte_buf.header.index = pte_index;
2599     hpte_buf.hpte[0] = pte0;
2600     hpte_buf.hpte[1] = pte1;
2601     /*
2602      * Write the hpte entry.
2603      * CAUTION: write() has the warn_unused_result attribute. Hence we
2604      * need to check the return value, even though we do nothing.
2605      */
2606     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2607         goto out_close;
2608     }
2609
2610 out_close:
2611     close(htab_fd);
2612     return;
2613
2614 error_out:
2615     return;
2616 }
2617
2618 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2619                              uint64_t address, uint32_t data, PCIDevice *dev)
2620 {
2621     return 0;
2622 }
2623
2624 int kvm_arch_msi_data_to_gsi(uint32_t data)
2625 {
2626     return data & 0xffff;
2627 }
2628
2629 int kvmppc_enable_hwrng(void)
2630 {
2631     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2632         return -1;
2633     }
2634
2635     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2636 }