target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/kvm.h"
  30 #include "sysemu/numa.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/ppc.h"
  40 #include "sysemu/watchdog.h"
  41 #include "trace.h"
  42 #include "exec/gdbstub.h"
  43 #include "exec/memattrs.h"
  44 #include "sysemu/hostmem.h"
  45 #include "qemu/cutils.h"
  46 #if defined(TARGET_PPC64)
  47 #include "hw/ppc/spapr_cpu_core.h"
  48 #endif
  49
  50 //#define DEBUG_KVM
  51
  52 #ifdef DEBUG_KVM
  53 #define DPRINTF(fmt, ...) \
  54     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  55 #else
  56 #define DPRINTF(fmt, ...) \
  57     do { } while (0)
  58 #endif
  59
  60 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  61
  62 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  63     KVM_CAP_LAST_INFO
  64 };
  65
  66 static int cap_interrupt_unset = false;
  67 static int cap_interrupt_level = false;
  68 static int cap_segstate;
  69 static int cap_booke_sregs;
  70 static int cap_ppc_smt;
  71 static int cap_ppc_rma;
  72 static int cap_spapr_tce;
  73 static int cap_spapr_multitce;
  74 static int cap_spapr_vfio;
  75 static int cap_hior;
  76 static int cap_one_reg;
  77 static int cap_epr;
  78 static int cap_ppc_watchdog;
  79 static int cap_papr;
  80 static int cap_htab_fd;
  81 static int cap_fixup_hcalls;
  82
  83 static uint32_t debug_inst_opcode;
  84
  85 /* XXX We have a race condition where we actually have a level triggered
  86  *     interrupt, but the infrastructure can't expose that yet, so the guest
  87  *     takes but ignores it, goes to sleep and never gets notified that there's
  88  *     still an interrupt pending.
  89  *
  90  *     As a quick workaround, let's just wake up again 20 ms after we injected
  91  *     an interrupt. That way we can assure that we're always reinjecting
  92  *     interrupts in case the guest swallowed them.
  93  */
  94 static QEMUTimer *idle_timer;
  95
  96 static void kvm_kick_cpu(void *opaque)
  97 {
  98     PowerPCCPU *cpu = opaque;
  99
 100     qemu_cpu_kick(CPU(cpu));
 101 }
 102
 103 static int kvm_ppc_register_host_cpu_type(void);
 104
 105 int kvm_arch_init(MachineState *ms, KVMState *s)
 106 {
 107     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 108     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 109     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 110     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 111     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 112     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 113     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 114     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 115     cap_spapr_vfio = false;
 116     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 117     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 118     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 119     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 120     /* Note: we don't set cap_papr here, because this capability is
 121      * only activated after this by kvmppc_set_papr() */
 122     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 123     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 124
 125     if (!cap_interrupt_level) {
 126         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 127                         "VM to stall at times!\n");
 128     }
 129
 130     kvm_ppc_register_host_cpu_type();
 131
 132     return 0;
 133 }
 134
 135 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 136 {
 137     CPUPPCState *cenv = &cpu->env;
 138     CPUState *cs = CPU(cpu);
 139     struct kvm_sregs sregs;
 140     int ret;
 141
 142     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 143         /* What we're really trying to say is "if we're on BookE, we use
 144            the native PVR for now". This is the only sane way to check
 145            it though, so we potentially confuse users that they can run
 146            BookE guests on BookS. Let's hope nobody dares enough :) */
 147         return 0;
 148     } else {
 149         if (!cap_segstate) {
 150             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 151             return -ENOSYS;
 152         }
 153     }
 154
 155     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 156     if (ret) {
 157         return ret;
 158     }
 159
 160     sregs.pvr = cenv->spr[SPR_PVR];
 161     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 162 }
 163
 164 /* Set up a shared TLB array with KVM */
 165 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 166 {
 167     CPUPPCState *env = &cpu->env;
 168     CPUState *cs = CPU(cpu);
 169     struct kvm_book3e_206_tlb_params params = {};
 170     struct kvm_config_tlb cfg = {};
 171     unsigned int entries = 0;
 172     int ret, i;
 173
 174     if (!kvm_enabled() ||
 175         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 176         return 0;
 177     }
 178
 179     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 180
 181     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 182         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 183         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 184         entries += params.tlb_sizes[i];
 185     }
 186
 187     assert(entries == env->nb_tlb);
 188     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 189
 190     env->tlb_dirty = true;
 191
 192     cfg.array = (uintptr_t)env->tlb.tlbm;
 193     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 194     cfg.params = (uintptr_t)&params;
 195     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 196
 197     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 198     if (ret < 0) {
 199         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 200                 __func__, strerror(-ret));
 201         return ret;
 202     }
 203
 204     env->kvm_sw_tlb = true;
 205     return 0;
 206 }
 207
 208
 209 #if defined(TARGET_PPC64)
 210 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 211                                        struct kvm_ppc_smmu_info *info)
 212 {
 213     CPUPPCState *env = &cpu->env;
 214     CPUState *cs = CPU(cpu);
 215
 216     memset(info, 0, sizeof(*info));
 217
 218     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 219      * need to "guess" what the supported page sizes are.
 220      *
 221      * For that to work we make a few assumptions:
 222      *
 223      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 224      *   KVM which only supports 4K and 16M pages, but supports them
 225      *   regardless of the backing store characteritics. We also don't
 226      *   support 1T segments.
 227      *
 228      *   This is safe as if HV KVM ever supports that capability or PR
 229      *   KVM grows supports for more page/segment sizes, those versions
 230      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 231      *   will not hit this fallback
 232      *
 233      * - Else we are running HV KVM. This means we only support page
 234      *   sizes that fit in the backing store. Additionally we only
 235      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 236      *   P7 encodings for the SLB and hash table. Here too, we assume
 237      *   support for any newer processor will mean a kernel that
 238      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 239      *   this fallback.
 240      */
 241     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 242         /* No flags */
 243         info->flags = 0;
 244         info->slb_size = 64;
 245
 246         /* Standard 4k base page size segment */
 247         info->sps[0].page_shift = 12;
 248         info->sps[0].slb_enc = 0;
 249         info->sps[0].enc[0].page_shift = 12;
 250         info->sps[0].enc[0].pte_enc = 0;
 251
 252         /* Standard 16M large page size segment */
 253         info->sps[1].page_shift = 24;
 254         info->sps[1].slb_enc = SLB_VSID_L;
 255         info->sps[1].enc[0].page_shift = 24;
 256         info->sps[1].enc[0].pte_enc = 0;
 257     } else {
 258         int i = 0;
 259
 260         /* HV KVM has backing store size restrictions */
 261         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 262
 263         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 264             info->flags |= KVM_PPC_1T_SEGMENTS;
 265         }
 266
 267         if (env->mmu_model == POWERPC_MMU_2_06 ||
 268             env->mmu_model == POWERPC_MMU_2_07) {
 269             info->slb_size = 32;
 270         } else {
 271             info->slb_size = 64;
 272         }
 273
 274         /* Standard 4k base page size segment */
 275         info->sps[i].page_shift = 12;
 276         info->sps[i].slb_enc = 0;
 277         info->sps[i].enc[0].page_shift = 12;
 278         info->sps[i].enc[0].pte_enc = 0;
 279         i++;
 280
 281         /* 64K on MMU 2.06 and later */
 282         if (env->mmu_model == POWERPC_MMU_2_06 ||
 283             env->mmu_model == POWERPC_MMU_2_07) {
 284             info->sps[i].page_shift = 16;
 285             info->sps[i].slb_enc = 0x110;
 286             info->sps[i].enc[0].page_shift = 16;
 287             info->sps[i].enc[0].pte_enc = 1;
 288             i++;
 289         }
 290
 291         /* Standard 16M large page size segment */
 292         info->sps[i].page_shift = 24;
 293         info->sps[i].slb_enc = SLB_VSID_L;
 294         info->sps[i].enc[0].page_shift = 24;
 295         info->sps[i].enc[0].pte_enc = 0;
 296     }
 297 }
 298
 299 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 300 {
 301     CPUState *cs = CPU(cpu);
 302     int ret;
 303
 304     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 305         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 306         if (ret == 0) {
 307             return;
 308         }
 309     }
 310
 311     kvm_get_fallback_smmu_info(cpu, info);
 312 }
 313
 314 static long gethugepagesize(const char *mem_path)
 315 {
 316     struct statfs fs;
 317     int ret;
 318
 319     do {
 320         ret = statfs(mem_path, &fs);
 321     } while (ret != 0 && errno == EINTR);
 322
 323     if (ret != 0) {
 324         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 325                 strerror(errno));
 326         exit(1);
 327     }
 328
 329 #define HUGETLBFS_MAGIC       0x958458f6
 330
 331     if (fs.f_type != HUGETLBFS_MAGIC) {
 332         /* Explicit mempath, but it's ordinary pages */
 333         return getpagesize();
 334     }
 335
 336     /* It's hugepage, return the huge page size */
 337     return fs.f_bsize;
 338 }
 339
 340 /*
 341  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 342  * may or may not name the same files / on the same filesystem now as
 343  * when we actually open and map them.  Iterate over the file
 344  * descriptors instead, and use qemu_fd_getpagesize().
 345  */
 346 static int find_max_supported_pagesize(Object *obj, void *opaque)
 347 {
 348     char *mem_path;
 349     long *hpsize_min = opaque;
 350
 351     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 352         mem_path = object_property_get_str(obj, "mem-path", NULL);
 353         if (mem_path) {
 354             long hpsize = gethugepagesize(mem_path);
 355             if (hpsize < *hpsize_min) {
 356                 *hpsize_min = hpsize;
 357             }
 358         } else {
 359             *hpsize_min = getpagesize();
 360         }
 361     }
 362
 363     return 0;
 364 }
 365
 366 static long getrampagesize(void)
 367 {
 368     long hpsize = LONG_MAX;
 369     long mainrampagesize;
 370     Object *memdev_root;
 371
 372     if (mem_path) {
 373         mainrampagesize = gethugepagesize(mem_path);
 374     } else {
 375         mainrampagesize = getpagesize();
 376     }
 377
 378     /* it's possible we have memory-backend objects with
 379      * hugepage-backed RAM. these may get mapped into system
 380      * address space via -numa parameters or memory hotplug
 381      * hooks. we want to take these into account, but we
 382      * also want to make sure these supported hugepage
 383      * sizes are applicable across the entire range of memory
 384      * we may boot from, so we take the min across all
 385      * backends, and assume normal pages in cases where a
 386      * backend isn't backed by hugepages.
 387      */
 388     memdev_root = object_resolve_path("/objects", NULL);
 389     if (memdev_root) {
 390         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 391     }
 392     if (hpsize == LONG_MAX) {
 393         /* No additional memory regions found ==> Report main RAM page size */
 394         return mainrampagesize;
 395     }
 396
 397     /* If NUMA is disabled or the NUMA nodes are not backed with a
 398      * memory-backend, then there is at least one node using "normal" RAM,
 399      * so if its page size is smaller we have got to report that size instead.
 400      */
 401     if (hpsize > mainrampagesize &&
 402         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
 403         static bool warned;
 404         if (!warned) {
 405             error_report("Huge page support disabled (n/a for main memory).");
 406             warned = true;
 407         }
 408         return mainrampagesize;
 409     }
 410
 411     return hpsize;
 412 }
 413
 414 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 415 {
 416     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 417         return true;
 418     }
 419
 420     return (1ul << shift) <= rampgsize;
 421 }
 422
 423 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 424 {
 425     static struct kvm_ppc_smmu_info smmu_info;
 426     static bool has_smmu_info;
 427     CPUPPCState *env = &cpu->env;
 428     long rampagesize;
 429     int iq, ik, jq, jk;
 430
 431     /* We only handle page sizes for 64-bit server guests for now */
 432     if (!(env->mmu_model & POWERPC_MMU_64)) {
 433         return;
 434     }
 435
 436     /* Collect MMU info from kernel if not already */
 437     if (!has_smmu_info) {
 438         kvm_get_smmu_info(cpu, &smmu_info);
 439         has_smmu_info = true;
 440     }
 441
 442     rampagesize = getrampagesize();
 443
 444     /* Convert to QEMU form */
 445     memset(&env->sps, 0, sizeof(env->sps));
 446
 447     /* If we have HV KVM, we need to forbid CI large pages if our
 448      * host page size is smaller than 64K.
 449      */
 450     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 451         env->ci_large_pages = getpagesize() >= 0x10000;
 452     }
 453
 454     /*
 455      * XXX This loop should be an entry wide AND of the capabilities that
 456      *     the selected CPU has with the capabilities that KVM supports.
 457      */
 458     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 459         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 460         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 461
 462         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 463                                  ksps->page_shift)) {
 464             continue;
 465         }
 466         qsps->page_shift = ksps->page_shift;
 467         qsps->slb_enc = ksps->slb_enc;
 468         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 469             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 470                                      ksps->enc[jk].page_shift)) {
 471                 continue;
 472             }
 473             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 474             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 475             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 476                 break;
 477             }
 478         }
 479         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 480             break;
 481         }
 482     }
 483     env->slb_nr = smmu_info.slb_size;
 484     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 485         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 486     }
 487 }
 488 #else /* defined (TARGET_PPC64) */
 489
 490 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 491 {
 492 }
 493
 494 #endif /* !defined (TARGET_PPC64) */
 495
 496 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 497 {
 498     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 499 }
 500
 501 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 502  * book3s supports only 1 watchpoint, so array size
 503  * of 4 is sufficient for now.
 504  */
 505 #define MAX_HW_BKPTS 4
 506
 507 static struct HWBreakpoint {
 508     target_ulong addr;
 509     int type;
 510 } hw_debug_points[MAX_HW_BKPTS];
 511
 512 static CPUWatchpoint hw_watchpoint;
 513
 514 /* Default there is no breakpoint and watchpoint supported */
 515 static int max_hw_breakpoint;
 516 static int max_hw_watchpoint;
 517 static int nb_hw_breakpoint;
 518 static int nb_hw_watchpoint;
 519
 520 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 521 {
 522     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 523         max_hw_breakpoint = 2;
 524         max_hw_watchpoint = 2;
 525     }
 526
 527     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 528         fprintf(stderr, "Error initializing h/w breakpoints\n");
 529         return;
 530     }
 531 }
 532
 533 int kvm_arch_init_vcpu(CPUState *cs)
 534 {
 535     PowerPCCPU *cpu = POWERPC_CPU(cs);
 536     CPUPPCState *cenv = &cpu->env;
 537     int ret;
 538
 539     /* Gather server mmu info from KVM and update the CPU state */
 540     kvm_fixup_page_sizes(cpu);
 541
 542     /* Synchronize sregs with kvm */
 543     ret = kvm_arch_sync_sregs(cpu);
 544     if (ret) {
 545         if (ret == -EINVAL) {
 546             error_report("Register sync failed... If you're using kvm-hv.ko,"
 547                          " only \"-cpu host\" is possible");
 548         }
 549         return ret;
 550     }
 551
 552     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 553
 554     /* Some targets support access to KVM's guest TLB. */
 555     switch (cenv->mmu_model) {
 556     case POWERPC_MMU_BOOKE206:
 557         ret = kvm_booke206_tlb_init(cpu);
 558         break;
 559     default:
 560         break;
 561     }
 562
 563     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 564     kvmppc_hw_debug_points_init(cenv);
 565
 566     return ret;
 567 }
 568
 569 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 570 {
 571     CPUPPCState *env = &cpu->env;
 572     CPUState *cs = CPU(cpu);
 573     struct kvm_dirty_tlb dirty_tlb;
 574     unsigned char *bitmap;
 575     int ret;
 576
 577     if (!env->kvm_sw_tlb) {
 578         return;
 579     }
 580
 581     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 582     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 583
 584     dirty_tlb.bitmap = (uintptr_t)bitmap;
 585     dirty_tlb.num_dirty = env->nb_tlb;
 586
 587     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 588     if (ret) {
 589         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 590                 __func__, strerror(-ret));
 591     }
 592
 593     g_free(bitmap);
 594 }
 595
 596 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 597 {
 598     PowerPCCPU *cpu = POWERPC_CPU(cs);
 599     CPUPPCState *env = &cpu->env;
 600     union {
 601         uint32_t u32;
 602         uint64_t u64;
 603     } val;
 604     struct kvm_one_reg reg = {
 605         .id = id,
 606         .addr = (uintptr_t) &val,
 607     };
 608     int ret;
 609
 610     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 611     if (ret != 0) {
 612         trace_kvm_failed_spr_get(spr, strerror(errno));
 613     } else {
 614         switch (id & KVM_REG_SIZE_MASK) {
 615         case KVM_REG_SIZE_U32:
 616             env->spr[spr] = val.u32;
 617             break;
 618
 619         case KVM_REG_SIZE_U64:
 620             env->spr[spr] = val.u64;
 621             break;
 622
 623         default:
 624             /* Don't handle this size yet */
 625             abort();
 626         }
 627     }
 628 }
 629
 630 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 631 {
 632     PowerPCCPU *cpu = POWERPC_CPU(cs);
 633     CPUPPCState *env = &cpu->env;
 634     union {
 635         uint32_t u32;
 636         uint64_t u64;
 637     } val;
 638     struct kvm_one_reg reg = {
 639         .id = id,
 640         .addr = (uintptr_t) &val,
 641     };
 642     int ret;
 643
 644     switch (id & KVM_REG_SIZE_MASK) {
 645     case KVM_REG_SIZE_U32:
 646         val.u32 = env->spr[spr];
 647         break;
 648
 649     case KVM_REG_SIZE_U64:
 650         val.u64 = env->spr[spr];
 651         break;
 652
 653     default:
 654         /* Don't handle this size yet */
 655         abort();
 656     }
 657
 658     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 659     if (ret != 0) {
 660         trace_kvm_failed_spr_set(spr, strerror(errno));
 661     }
 662 }
 663
 664 static int kvm_put_fp(CPUState *cs)
 665 {
 666     PowerPCCPU *cpu = POWERPC_CPU(cs);
 667     CPUPPCState *env = &cpu->env;
 668     struct kvm_one_reg reg;
 669     int i;
 670     int ret;
 671
 672     if (env->insns_flags & PPC_FLOAT) {
 673         uint64_t fpscr = env->fpscr;
 674         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 675
 676         reg.id = KVM_REG_PPC_FPSCR;
 677         reg.addr = (uintptr_t)&fpscr;
 678         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 679         if (ret < 0) {
 680             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 681             return ret;
 682         }
 683
 684         for (i = 0; i < 32; i++) {
 685             uint64_t vsr[2];
 686
 687 #ifdef HOST_WORDS_BIGENDIAN
 688             vsr[0] = float64_val(env->fpr[i]);
 689             vsr[1] = env->vsr[i];
 690 #else
 691             vsr[0] = env->vsr[i];
 692             vsr[1] = float64_val(env->fpr[i]);
 693 #endif
 694             reg.addr = (uintptr_t) &vsr;
 695             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 696
 697             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 698             if (ret < 0) {
 699                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 700                         i, strerror(errno));
 701                 return ret;
 702             }
 703         }
 704     }
 705
 706     if (env->insns_flags & PPC_ALTIVEC) {
 707         reg.id = KVM_REG_PPC_VSCR;
 708         reg.addr = (uintptr_t)&env->vscr;
 709         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 710         if (ret < 0) {
 711             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 712             return ret;
 713         }
 714
 715         for (i = 0; i < 32; i++) {
 716             reg.id = KVM_REG_PPC_VR(i);
 717             reg.addr = (uintptr_t)&env->avr[i];
 718             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 719             if (ret < 0) {
 720                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 721                 return ret;
 722             }
 723         }
 724     }
 725
 726     return 0;
 727 }
 728
 729 static int kvm_get_fp(CPUState *cs)
 730 {
 731     PowerPCCPU *cpu = POWERPC_CPU(cs);
 732     CPUPPCState *env = &cpu->env;
 733     struct kvm_one_reg reg;
 734     int i;
 735     int ret;
 736
 737     if (env->insns_flags & PPC_FLOAT) {
 738         uint64_t fpscr;
 739         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 740
 741         reg.id = KVM_REG_PPC_FPSCR;
 742         reg.addr = (uintptr_t)&fpscr;
 743         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 744         if (ret < 0) {
 745             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 746             return ret;
 747         } else {
 748             env->fpscr = fpscr;
 749         }
 750
 751         for (i = 0; i < 32; i++) {
 752             uint64_t vsr[2];
 753
 754             reg.addr = (uintptr_t) &vsr;
 755             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 756
 757             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 758             if (ret < 0) {
 759                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 760                         vsx ? "VSR" : "FPR", i, strerror(errno));
 761                 return ret;
 762             } else {
 763 #ifdef HOST_WORDS_BIGENDIAN
 764                 env->fpr[i] = vsr[0];
 765                 if (vsx) {
 766                     env->vsr[i] = vsr[1];
 767                 }
 768 #else
 769                 env->fpr[i] = vsr[1];
 770                 if (vsx) {
 771                     env->vsr[i] = vsr[0];
 772                 }
 773 #endif
 774             }
 775         }
 776     }
 777
 778     if (env->insns_flags & PPC_ALTIVEC) {
 779         reg.id = KVM_REG_PPC_VSCR;
 780         reg.addr = (uintptr_t)&env->vscr;
 781         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 782         if (ret < 0) {
 783             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 784             return ret;
 785         }
 786
 787         for (i = 0; i < 32; i++) {
 788             reg.id = KVM_REG_PPC_VR(i);
 789             reg.addr = (uintptr_t)&env->avr[i];
 790             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 791             if (ret < 0) {
 792                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 793                         i, strerror(errno));
 794                 return ret;
 795             }
 796         }
 797     }
 798
 799     return 0;
 800 }
 801
 802 #if defined(TARGET_PPC64)
 803 static int kvm_get_vpa(CPUState *cs)
 804 {
 805     PowerPCCPU *cpu = POWERPC_CPU(cs);
 806     CPUPPCState *env = &cpu->env;
 807     struct kvm_one_reg reg;
 808     int ret;
 809
 810     reg.id = KVM_REG_PPC_VPA_ADDR;
 811     reg.addr = (uintptr_t)&env->vpa_addr;
 812     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 813     if (ret < 0) {
 814         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 815         return ret;
 816     }
 817
 818     assert((uintptr_t)&env->slb_shadow_size
 819            == ((uintptr_t)&env->slb_shadow_addr + 8));
 820     reg.id = KVM_REG_PPC_VPA_SLB;
 821     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 822     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 823     if (ret < 0) {
 824         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 825                 strerror(errno));
 826         return ret;
 827     }
 828
 829     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 830     reg.id = KVM_REG_PPC_VPA_DTL;
 831     reg.addr = (uintptr_t)&env->dtl_addr;
 832     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 833     if (ret < 0) {
 834         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 835                 strerror(errno));
 836         return ret;
 837     }
 838
 839     return 0;
 840 }
 841
 842 static int kvm_put_vpa(CPUState *cs)
 843 {
 844     PowerPCCPU *cpu = POWERPC_CPU(cs);
 845     CPUPPCState *env = &cpu->env;
 846     struct kvm_one_reg reg;
 847     int ret;
 848
 849     /* SLB shadow or DTL can't be registered unless a master VPA is
 850      * registered.  That means when restoring state, if a VPA *is*
 851      * registered, we need to set that up first.  If not, we need to
 852      * deregister the others before deregistering the master VPA */
 853     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 854
 855     if (env->vpa_addr) {
 856         reg.id = KVM_REG_PPC_VPA_ADDR;
 857         reg.addr = (uintptr_t)&env->vpa_addr;
 858         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 859         if (ret < 0) {
 860             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 861             return ret;
 862         }
 863     }
 864
 865     assert((uintptr_t)&env->slb_shadow_size
 866            == ((uintptr_t)&env->slb_shadow_addr + 8));
 867     reg.id = KVM_REG_PPC_VPA_SLB;
 868     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 869     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 870     if (ret < 0) {
 871         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 872         return ret;
 873     }
 874
 875     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 876     reg.id = KVM_REG_PPC_VPA_DTL;
 877     reg.addr = (uintptr_t)&env->dtl_addr;
 878     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 879     if (ret < 0) {
 880         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 881                 strerror(errno));
 882         return ret;
 883     }
 884
 885     if (!env->vpa_addr) {
 886         reg.id = KVM_REG_PPC_VPA_ADDR;
 887         reg.addr = (uintptr_t)&env->vpa_addr;
 888         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 889         if (ret < 0) {
 890             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 891             return ret;
 892         }
 893     }
 894
 895     return 0;
 896 }
 897 #endif /* TARGET_PPC64 */
 898
 899 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 900 {
 901     CPUPPCState *env = &cpu->env;
 902     struct kvm_sregs sregs;
 903     int i;
 904
 905     sregs.pvr = env->spr[SPR_PVR];
 906
 907     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 908
 909     /* Sync SLB */
 910 #ifdef TARGET_PPC64
 911     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 912         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 913         if (env->slb[i].esid & SLB_ESID_V) {
 914             sregs.u.s.ppc64.slb[i].slbe |= i;
 915         }
 916         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 917     }
 918 #endif
 919
 920     /* Sync SRs */
 921     for (i = 0; i < 16; i++) {
 922         sregs.u.s.ppc32.sr[i] = env->sr[i];
 923     }
 924
 925     /* Sync BATs */
 926     for (i = 0; i < 8; i++) {
 927         /* Beware. We have to swap upper and lower bits here */
 928         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 929             | env->DBAT[1][i];
 930         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 931             | env->IBAT[1][i];
 932     }
 933
 934     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 935 }
 936
 937 int kvm_arch_put_registers(CPUState *cs, int level)
 938 {
 939     PowerPCCPU *cpu = POWERPC_CPU(cs);
 940     CPUPPCState *env = &cpu->env;
 941     struct kvm_regs regs;
 942     int ret;
 943     int i;
 944
 945     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 946     if (ret < 0) {
 947         return ret;
 948     }
 949
 950     regs.ctr = env->ctr;
 951     regs.lr  = env->lr;
 952     regs.xer = cpu_read_xer(env);
 953     regs.msr = env->msr;
 954     regs.pc = env->nip;
 955
 956     regs.srr0 = env->spr[SPR_SRR0];
 957     regs.srr1 = env->spr[SPR_SRR1];
 958
 959     regs.sprg0 = env->spr[SPR_SPRG0];
 960     regs.sprg1 = env->spr[SPR_SPRG1];
 961     regs.sprg2 = env->spr[SPR_SPRG2];
 962     regs.sprg3 = env->spr[SPR_SPRG3];
 963     regs.sprg4 = env->spr[SPR_SPRG4];
 964     regs.sprg5 = env->spr[SPR_SPRG5];
 965     regs.sprg6 = env->spr[SPR_SPRG6];
 966     regs.sprg7 = env->spr[SPR_SPRG7];
 967
 968     regs.pid = env->spr[SPR_BOOKE_PID];
 969
 970     for (i = 0;i < 32; i++)
 971         regs.gpr[i] = env->gpr[i];
 972
 973     regs.cr = 0;
 974     for (i = 0; i < 8; i++) {
 975         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 976     }
 977
 978     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 979     if (ret < 0)
 980         return ret;
 981
 982     kvm_put_fp(cs);
 983
 984     if (env->tlb_dirty) {
 985         kvm_sw_tlb_put(cpu);
 986         env->tlb_dirty = false;
 987     }
 988
 989     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 990         ret = kvmppc_put_books_sregs(cpu);
 991         if (ret < 0) {
 992             return ret;
 993         }
 994     }
 995
 996     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 997         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 998     }
 999
1000     if (cap_one_reg) {
1001         int i;
1002
1003         /* We deliberately ignore errors here, for kernels which have
1004          * the ONE_REG calls, but don't support the specific
1005          * registers, there's a reasonable chance things will still
1006          * work, at least until we try to migrate. */
1007         for (i = 0; i < 1024; i++) {
1008             uint64_t id = env->spr_cb[i].one_reg_id;
1009
1010             if (id != 0) {
1011                 kvm_put_one_spr(cs, id, i);
1012             }
1013         }
1014
1015 #ifdef TARGET_PPC64
1016         if (msr_ts) {
1017             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1018                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1019             }
1020             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1021                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1022             }
1023             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1024             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1025             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1026             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1027             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1028             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1029             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1030             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1031             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1032             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1033         }
1034
1035         if (cap_papr) {
1036             if (kvm_put_vpa(cs) < 0) {
1037                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1038             }
1039         }
1040
1041         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1042 #endif /* TARGET_PPC64 */
1043     }
1044
1045     return ret;
1046 }
1047
1048 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1049 {
1050      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1051 }
1052
1053 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1054 {
1055     CPUPPCState *env = &cpu->env;
1056     struct kvm_sregs sregs;
1057     int ret;
1058
1059     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1060     if (ret < 0) {
1061         return ret;
1062     }
1063
1064     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1065         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1066         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1067         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1068         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1069         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1070         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1071         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1072         env->spr[SPR_DECR] = sregs.u.e.dec;
1073         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1074         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1075         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1076     }
1077
1078     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1079         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1080         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1081         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1082         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1083         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1084     }
1085
1086     if (sregs.u.e.features & KVM_SREGS_E_64) {
1087         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1088     }
1089
1090     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1091         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1092     }
1093
1094     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1095         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1096         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1097         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1098         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1099         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1100         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1101         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1102         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1103         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1104         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1105         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1106         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1107         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1108         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1109         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1110         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1111         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1112         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1113         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1114         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1115         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1116         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1117         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1118         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1119         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1120         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1121         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1122         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1123         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1124         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1125         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1126         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1127
1128         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1129             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1130             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1131             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1132             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1133             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1134             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1135         }
1136
1137         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1138             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1139             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1140         }
1141
1142         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1143             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1144             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1145             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1146             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1147         }
1148     }
1149
1150     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1151         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1152         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1153         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1154         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1155         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1156         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1157         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1158         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1159         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1160         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1161     }
1162
1163     if (sregs.u.e.features & KVM_SREGS_EXP) {
1164         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1165     }
1166
1167     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1168         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1169         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1170     }
1171
1172     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1173         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1174         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1175         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1176
1177         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1178             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1179             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1180         }
1181     }
1182
1183     return 0;
1184 }
1185
1186 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1187 {
1188     CPUPPCState *env = &cpu->env;
1189     struct kvm_sregs sregs;
1190     int ret;
1191     int i;
1192
1193     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1194     if (ret < 0) {
1195         return ret;
1196     }
1197
1198     if (!env->external_htab) {
1199         ppc_store_sdr1(env, sregs.u.s.sdr1);
1200     }
1201
1202     /* Sync SLB */
1203 #ifdef TARGET_PPC64
1204     /*
1205      * The packed SLB array we get from KVM_GET_SREGS only contains
1206      * information about valid entries. So we flush our internal copy
1207      * to get rid of stale ones, then put all valid SLB entries back
1208      * in.
1209      */
1210     memset(env->slb, 0, sizeof(env->slb));
1211     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1212         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1213         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1214         /*
1215          * Only restore valid entries
1216          */
1217         if (rb & SLB_ESID_V) {
1218             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1219         }
1220     }
1221 #endif
1222
1223     /* Sync SRs */
1224     for (i = 0; i < 16; i++) {
1225         env->sr[i] = sregs.u.s.ppc32.sr[i];
1226     }
1227
1228     /* Sync BATs */
1229     for (i = 0; i < 8; i++) {
1230         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1231         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1232         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1233         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1234     }
1235
1236     return 0;
1237 }
1238
1239 int kvm_arch_get_registers(CPUState *cs)
1240 {
1241     PowerPCCPU *cpu = POWERPC_CPU(cs);
1242     CPUPPCState *env = &cpu->env;
1243     struct kvm_regs regs;
1244     uint32_t cr;
1245     int i, ret;
1246
1247     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1248     if (ret < 0)
1249         return ret;
1250
1251     cr = regs.cr;
1252     for (i = 7; i >= 0; i--) {
1253         env->crf[i] = cr & 15;
1254         cr >>= 4;
1255     }
1256
1257     env->ctr = regs.ctr;
1258     env->lr = regs.lr;
1259     cpu_write_xer(env, regs.xer);
1260     env->msr = regs.msr;
1261     env->nip = regs.pc;
1262
1263     env->spr[SPR_SRR0] = regs.srr0;
1264     env->spr[SPR_SRR1] = regs.srr1;
1265
1266     env->spr[SPR_SPRG0] = regs.sprg0;
1267     env->spr[SPR_SPRG1] = regs.sprg1;
1268     env->spr[SPR_SPRG2] = regs.sprg2;
1269     env->spr[SPR_SPRG3] = regs.sprg3;
1270     env->spr[SPR_SPRG4] = regs.sprg4;
1271     env->spr[SPR_SPRG5] = regs.sprg5;
1272     env->spr[SPR_SPRG6] = regs.sprg6;
1273     env->spr[SPR_SPRG7] = regs.sprg7;
1274
1275     env->spr[SPR_BOOKE_PID] = regs.pid;
1276
1277     for (i = 0;i < 32; i++)
1278         env->gpr[i] = regs.gpr[i];
1279
1280     kvm_get_fp(cs);
1281
1282     if (cap_booke_sregs) {
1283         ret = kvmppc_get_booke_sregs(cpu);
1284         if (ret < 0) {
1285             return ret;
1286         }
1287     }
1288
1289     if (cap_segstate) {
1290         ret = kvmppc_get_books_sregs(cpu);
1291         if (ret < 0) {
1292             return ret;
1293         }
1294     }
1295
1296     if (cap_hior) {
1297         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1298     }
1299
1300     if (cap_one_reg) {
1301         int i;
1302
1303         /* We deliberately ignore errors here, for kernels which have
1304          * the ONE_REG calls, but don't support the specific
1305          * registers, there's a reasonable chance things will still
1306          * work, at least until we try to migrate. */
1307         for (i = 0; i < 1024; i++) {
1308             uint64_t id = env->spr_cb[i].one_reg_id;
1309
1310             if (id != 0) {
1311                 kvm_get_one_spr(cs, id, i);
1312             }
1313         }
1314
1315 #ifdef TARGET_PPC64
1316         if (msr_ts) {
1317             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1318                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1319             }
1320             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1321                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1322             }
1323             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1324             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1325             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1326             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1327             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1328             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1329             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1330             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1331             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1332             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1333         }
1334
1335         if (cap_papr) {
1336             if (kvm_get_vpa(cs) < 0) {
1337                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1338             }
1339         }
1340
1341         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1342 #endif
1343     }
1344
1345     return 0;
1346 }
1347
1348 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1349 {
1350     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1351
1352     if (irq != PPC_INTERRUPT_EXT) {
1353         return 0;
1354     }
1355
1356     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1357         return 0;
1358     }
1359
1360     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1361
1362     return 0;
1363 }
1364
1365 #if defined(TARGET_PPCEMB)
1366 #define PPC_INPUT_INT PPC40x_INPUT_INT
1367 #elif defined(TARGET_PPC64)
1368 #define PPC_INPUT_INT PPC970_INPUT_INT
1369 #else
1370 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1371 #endif
1372
1373 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1374 {
1375     PowerPCCPU *cpu = POWERPC_CPU(cs);
1376     CPUPPCState *env = &cpu->env;
1377     int r;
1378     unsigned irq;
1379
1380     qemu_mutex_lock_iothread();
1381
1382     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1383      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1384     if (!cap_interrupt_level &&
1385         run->ready_for_interrupt_injection &&
1386         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1387         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1388     {
1389         /* For now KVM disregards the 'irq' argument. However, in the
1390          * future KVM could cache it in-kernel to avoid a heavyweight exit
1391          * when reading the UIC.
1392          */
1393         irq = KVM_INTERRUPT_SET;
1394
1395         DPRINTF("injected interrupt %d\n", irq);
1396         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1397         if (r < 0) {
1398             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1399         }
1400
1401         /* Always wake up soon in case the interrupt was level based */
1402         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1403                        (NANOSECONDS_PER_SECOND / 50));
1404     }
1405
1406     /* We don't know if there are more interrupts pending after this. However,
1407      * the guest will return to userspace in the course of handling this one
1408      * anyways, so we will get a chance to deliver the rest. */
1409
1410     qemu_mutex_unlock_iothread();
1411 }
1412
1413 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1414 {
1415     return MEMTXATTRS_UNSPECIFIED;
1416 }
1417
1418 int kvm_arch_process_async_events(CPUState *cs)
1419 {
1420     return cs->halted;
1421 }
1422
1423 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1424 {
1425     CPUState *cs = CPU(cpu);
1426     CPUPPCState *env = &cpu->env;
1427
1428     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1429         cs->halted = 1;
1430         cs->exception_index = EXCP_HLT;
1431     }
1432
1433     return 0;
1434 }
1435
1436 /* map dcr access to existing qemu dcr emulation */
1437 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1438 {
1439     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1440         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1441
1442     return 0;
1443 }
1444
1445 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1446 {
1447     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1448         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1449
1450     return 0;
1451 }
1452
1453 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1454 {
1455     /* Mixed endian case is not handled */
1456     uint32_t sc = debug_inst_opcode;
1457
1458     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1459                             sizeof(sc), 0) ||
1460         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1461         return -EINVAL;
1462     }
1463
1464     return 0;
1465 }
1466
1467 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1468 {
1469     uint32_t sc;
1470
1471     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1472         sc != debug_inst_opcode ||
1473         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1474                             sizeof(sc), 1)) {
1475         return -EINVAL;
1476     }
1477
1478     return 0;
1479 }
1480
1481 static int find_hw_breakpoint(target_ulong addr, int type)
1482 {
1483     int n;
1484
1485     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1486            <= ARRAY_SIZE(hw_debug_points));
1487
1488     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1489         if (hw_debug_points[n].addr == addr &&
1490              hw_debug_points[n].type == type) {
1491             return n;
1492         }
1493     }
1494
1495     return -1;
1496 }
1497
1498 static int find_hw_watchpoint(target_ulong addr, int *flag)
1499 {
1500     int n;
1501
1502     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1503     if (n >= 0) {
1504         *flag = BP_MEM_ACCESS;
1505         return n;
1506     }
1507
1508     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1509     if (n >= 0) {
1510         *flag = BP_MEM_WRITE;
1511         return n;
1512     }
1513
1514     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1515     if (n >= 0) {
1516         *flag = BP_MEM_READ;
1517         return n;
1518     }
1519
1520     return -1;
1521 }
1522
1523 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1524                                   target_ulong len, int type)
1525 {
1526     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1527         return -ENOBUFS;
1528     }
1529
1530     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1531     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1532
1533     switch (type) {
1534     case GDB_BREAKPOINT_HW:
1535         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1536             return -ENOBUFS;
1537         }
1538
1539         if (find_hw_breakpoint(addr, type) >= 0) {
1540             return -EEXIST;
1541         }
1542
1543         nb_hw_breakpoint++;
1544         break;
1545
1546     case GDB_WATCHPOINT_WRITE:
1547     case GDB_WATCHPOINT_READ:
1548     case GDB_WATCHPOINT_ACCESS:
1549         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1550             return -ENOBUFS;
1551         }
1552
1553         if (find_hw_breakpoint(addr, type) >= 0) {
1554             return -EEXIST;
1555         }
1556
1557         nb_hw_watchpoint++;
1558         break;
1559
1560     default:
1561         return -ENOSYS;
1562     }
1563
1564     return 0;
1565 }
1566
1567 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1568                                   target_ulong len, int type)
1569 {
1570     int n;
1571
1572     n = find_hw_breakpoint(addr, type);
1573     if (n < 0) {
1574         return -ENOENT;
1575     }
1576
1577     switch (type) {
1578     case GDB_BREAKPOINT_HW:
1579         nb_hw_breakpoint--;
1580         break;
1581
1582     case GDB_WATCHPOINT_WRITE:
1583     case GDB_WATCHPOINT_READ:
1584     case GDB_WATCHPOINT_ACCESS:
1585         nb_hw_watchpoint--;
1586         break;
1587
1588     default:
1589         return -ENOSYS;
1590     }
1591     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1592
1593     return 0;
1594 }
1595
1596 void kvm_arch_remove_all_hw_breakpoints(void)
1597 {
1598     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1599 }
1600
1601 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1602 {
1603     int n;
1604
1605     /* Software Breakpoint updates */
1606     if (kvm_sw_breakpoints_active(cs)) {
1607         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1608     }
1609
1610     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1611            <= ARRAY_SIZE(hw_debug_points));
1612     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1613
1614     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1615         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1616         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1617         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1618             switch (hw_debug_points[n].type) {
1619             case GDB_BREAKPOINT_HW:
1620                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1621                 break;
1622             case GDB_WATCHPOINT_WRITE:
1623                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1624                 break;
1625             case GDB_WATCHPOINT_READ:
1626                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1627                 break;
1628             case GDB_WATCHPOINT_ACCESS:
1629                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1630                                         KVMPPC_DEBUG_WATCH_READ;
1631                 break;
1632             default:
1633                 cpu_abort(cs, "Unsupported breakpoint type\n");
1634             }
1635             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1636         }
1637     }
1638 }
1639
1640 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1641 {
1642     CPUState *cs = CPU(cpu);
1643     CPUPPCState *env = &cpu->env;
1644     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1645     int handle = 0;
1646     int n;
1647     int flag = 0;
1648
1649     if (cs->singlestep_enabled) {
1650         handle = 1;
1651     } else if (arch_info->status) {
1652         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1653             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1654                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1655                 if (n >= 0) {
1656                     handle = 1;
1657                 }
1658             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1659                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1660                 n = find_hw_watchpoint(arch_info->address,  &flag);
1661                 if (n >= 0) {
1662                     handle = 1;
1663                     cs->watchpoint_hit = &hw_watchpoint;
1664                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1665                     hw_watchpoint.flags = flag;
1666                 }
1667             }
1668         }
1669     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1670         handle = 1;
1671     } else {
1672         /* QEMU is not able to handle debug exception, so inject
1673          * program exception to guest;
1674          * Yes program exception NOT debug exception !!
1675          * When QEMU is using debug resources then debug exception must
1676          * be always set. To achieve this we set MSR_DE and also set
1677          * MSRP_DEP so guest cannot change MSR_DE.
1678          * When emulating debug resource for guest we want guest
1679          * to control MSR_DE (enable/disable debug interrupt on need).
1680          * Supporting both configurations are NOT possible.
1681          * So the result is that we cannot share debug resources
1682          * between QEMU and Guest on BOOKE architecture.
1683          * In the current design QEMU gets the priority over guest,
1684          * this means that if QEMU is using debug resources then guest
1685          * cannot use them;
1686          * For software breakpoint QEMU uses a privileged instruction;
1687          * So there cannot be any reason that we are here for guest
1688          * set debug exception, only possibility is guest executed a
1689          * privileged / illegal instruction and that's why we are
1690          * injecting a program interrupt.
1691          */
1692
1693         cpu_synchronize_state(cs);
1694         /* env->nip is PC, so increment this by 4 to use
1695          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1696          */
1697         env->nip += 4;
1698         cs->exception_index = POWERPC_EXCP_PROGRAM;
1699         env->error_code = POWERPC_EXCP_INVAL;
1700         ppc_cpu_do_interrupt(cs);
1701     }
1702
1703     return handle;
1704 }
1705
1706 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1707 {
1708     PowerPCCPU *cpu = POWERPC_CPU(cs);
1709     CPUPPCState *env = &cpu->env;
1710     int ret;
1711
1712     qemu_mutex_lock_iothread();
1713
1714     switch (run->exit_reason) {
1715     case KVM_EXIT_DCR:
1716         if (run->dcr.is_write) {
1717             DPRINTF("handle dcr write\n");
1718             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1719         } else {
1720             DPRINTF("handle dcr read\n");
1721             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1722         }
1723         break;
1724     case KVM_EXIT_HLT:
1725         DPRINTF("handle halt\n");
1726         ret = kvmppc_handle_halt(cpu);
1727         break;
1728 #if defined(TARGET_PPC64)
1729     case KVM_EXIT_PAPR_HCALL:
1730         DPRINTF("handle PAPR hypercall\n");
1731         run->papr_hcall.ret = spapr_hypercall(cpu,
1732                                               run->papr_hcall.nr,
1733                                               run->papr_hcall.args);
1734         ret = 0;
1735         break;
1736 #endif
1737     case KVM_EXIT_EPR:
1738         DPRINTF("handle epr\n");
1739         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1740         ret = 0;
1741         break;
1742     case KVM_EXIT_WATCHDOG:
1743         DPRINTF("handle watchdog expiry\n");
1744         watchdog_perform_action();
1745         ret = 0;
1746         break;
1747
1748     case KVM_EXIT_DEBUG:
1749         DPRINTF("handle debug exception\n");
1750         if (kvm_handle_debug(cpu, run)) {
1751             ret = EXCP_DEBUG;
1752             break;
1753         }
1754         /* re-enter, this exception was guest-internal */
1755         ret = 0;
1756         break;
1757
1758     default:
1759         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1760         ret = -1;
1761         break;
1762     }
1763
1764     qemu_mutex_unlock_iothread();
1765     return ret;
1766 }
1767
1768 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1769 {
1770     CPUState *cs = CPU(cpu);
1771     uint32_t bits = tsr_bits;
1772     struct kvm_one_reg reg = {
1773         .id = KVM_REG_PPC_OR_TSR,
1774         .addr = (uintptr_t) &bits,
1775     };
1776
1777     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1778 }
1779
1780 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1781 {
1782
1783     CPUState *cs = CPU(cpu);
1784     uint32_t bits = tsr_bits;
1785     struct kvm_one_reg reg = {
1786         .id = KVM_REG_PPC_CLEAR_TSR,
1787         .addr = (uintptr_t) &bits,
1788     };
1789
1790     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1791 }
1792
1793 int kvmppc_set_tcr(PowerPCCPU *cpu)
1794 {
1795     CPUState *cs = CPU(cpu);
1796     CPUPPCState *env = &cpu->env;
1797     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1798
1799     struct kvm_one_reg reg = {
1800         .id = KVM_REG_PPC_TCR,
1801         .addr = (uintptr_t) &tcr,
1802     };
1803
1804     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1805 }
1806
1807 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1808 {
1809     CPUState *cs = CPU(cpu);
1810     int ret;
1811
1812     if (!kvm_enabled()) {
1813         return -1;
1814     }
1815
1816     if (!cap_ppc_watchdog) {
1817         printf("warning: KVM does not support watchdog");
1818         return -1;
1819     }
1820
1821     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1822     if (ret < 0) {
1823         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1824                 __func__, strerror(-ret));
1825         return ret;
1826     }
1827
1828     return ret;
1829 }
1830
1831 static int read_cpuinfo(const char *field, char *value, int len)
1832 {
1833     FILE *f;
1834     int ret = -1;
1835     int field_len = strlen(field);
1836     char line[512];
1837
1838     f = fopen("/proc/cpuinfo", "r");
1839     if (!f) {
1840         return -1;
1841     }
1842
1843     do {
1844         if (!fgets(line, sizeof(line), f)) {
1845             break;
1846         }
1847         if (!strncmp(line, field, field_len)) {
1848             pstrcpy(value, len, line);
1849             ret = 0;
1850             break;
1851         }
1852     } while(*line);
1853
1854     fclose(f);
1855
1856     return ret;
1857 }
1858
1859 uint32_t kvmppc_get_tbfreq(void)
1860 {
1861     char line[512];
1862     char *ns;
1863     uint32_t retval = NANOSECONDS_PER_SECOND;
1864
1865     if (read_cpuinfo("timebase", line, sizeof(line))) {
1866         return retval;
1867     }
1868
1869     if (!(ns = strchr(line, ':'))) {
1870         return retval;
1871     }
1872
1873     ns++;
1874
1875     return atoi(ns);
1876 }
1877
1878 bool kvmppc_get_host_serial(char **value)
1879 {
1880     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1881                                NULL);
1882 }
1883
1884 bool kvmppc_get_host_model(char **value)
1885 {
1886     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1887 }
1888
1889 /* Try to find a device tree node for a CPU with clock-frequency property */
1890 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1891 {
1892     struct dirent *dirp;
1893     DIR *dp;
1894
1895     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1896         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1897         return -1;
1898     }
1899
1900     buf[0] = '\0';
1901     while ((dirp = readdir(dp)) != NULL) {
1902         FILE *f;
1903         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1904                  dirp->d_name);
1905         f = fopen(buf, "r");
1906         if (f) {
1907             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1908             fclose(f);
1909             break;
1910         }
1911         buf[0] = '\0';
1912     }
1913     closedir(dp);
1914     if (buf[0] == '\0') {
1915         printf("Unknown host!\n");
1916         return -1;
1917     }
1918
1919     return 0;
1920 }
1921
1922 static uint64_t kvmppc_read_int_dt(const char *filename)
1923 {
1924     union {
1925         uint32_t v32;
1926         uint64_t v64;
1927     } u;
1928     FILE *f;
1929     int len;
1930
1931     f = fopen(filename, "rb");
1932     if (!f) {
1933         return -1;
1934     }
1935
1936     len = fread(&u, 1, sizeof(u), f);
1937     fclose(f);
1938     switch (len) {
1939     case 4:
1940         /* property is a 32-bit quantity */
1941         return be32_to_cpu(u.v32);
1942     case 8:
1943         return be64_to_cpu(u.v64);
1944     }
1945
1946     return 0;
1947 }
1948
1949 /* Read a CPU node property from the host device tree that's a single
1950  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1951  * (can't find or open the property, or doesn't understand the
1952  * format) */
1953 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1954 {
1955     char buf[PATH_MAX], *tmp;
1956     uint64_t val;
1957
1958     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1959         return -1;
1960     }
1961
1962     tmp = g_strdup_printf("%s/%s", buf, propname);
1963     val = kvmppc_read_int_dt(tmp);
1964     g_free(tmp);
1965
1966     return val;
1967 }
1968
1969 uint64_t kvmppc_get_clockfreq(void)
1970 {
1971     return kvmppc_read_int_cpu_dt("clock-frequency");
1972 }
1973
1974 uint32_t kvmppc_get_vmx(void)
1975 {
1976     return kvmppc_read_int_cpu_dt("ibm,vmx");
1977 }
1978
1979 uint32_t kvmppc_get_dfp(void)
1980 {
1981     return kvmppc_read_int_cpu_dt("ibm,dfp");
1982 }
1983
1984 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1985  {
1986      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1987      CPUState *cs = CPU(cpu);
1988
1989     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1990         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1991         return 0;
1992     }
1993
1994     return 1;
1995 }
1996
1997 int kvmppc_get_hasidle(CPUPPCState *env)
1998 {
1999     struct kvm_ppc_pvinfo pvinfo;
2000
2001     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2002         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2003         return 1;
2004     }
2005
2006     return 0;
2007 }
2008
2009 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2010 {
2011     uint32_t *hc = (uint32_t*)buf;
2012     struct kvm_ppc_pvinfo pvinfo;
2013
2014     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2015         memcpy(buf, pvinfo.hcall, buf_len);
2016         return 0;
2017     }
2018
2019     /*
2020      * Fallback to always fail hypercalls regardless of endianness:
2021      *
2022      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2023      *     li r3, -1
2024      *     b .+8       (becomes nop in wrong endian)
2025      *     bswap32(li r3, -1)
2026      */
2027
2028     hc[0] = cpu_to_be32(0x08000048);
2029     hc[1] = cpu_to_be32(0x3860ffff);
2030     hc[2] = cpu_to_be32(0x48000008);
2031     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2032
2033     return 1;
2034 }
2035
2036 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2037 {
2038     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2039 }
2040
2041 void kvmppc_enable_logical_ci_hcalls(void)
2042 {
2043     /*
2044      * FIXME: it would be nice if we could detect the cases where
2045      * we're using a device which requires the in kernel
2046      * implementation of these hcalls, but the kernel lacks them and
2047      * produce a warning.
2048      */
2049     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2050     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2051 }
2052
2053 void kvmppc_enable_set_mode_hcall(void)
2054 {
2055     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2056 }
2057
2058 void kvmppc_set_papr(PowerPCCPU *cpu)
2059 {
2060     CPUState *cs = CPU(cpu);
2061     int ret;
2062
2063     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2064     if (ret) {
2065         error_report("This vCPU type or KVM version does not support PAPR");
2066         exit(1);
2067     }
2068
2069     /* Update the capability flag so we sync the right information
2070      * with kvm */
2071     cap_papr = 1;
2072 }
2073
2074 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2075 {
2076     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2077 }
2078
2079 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2080 {
2081     CPUState *cs = CPU(cpu);
2082     int ret;
2083
2084     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2085     if (ret && mpic_proxy) {
2086         error_report("This KVM version does not support EPR");
2087         exit(1);
2088     }
2089 }
2090
2091 int kvmppc_smt_threads(void)
2092 {
2093     return cap_ppc_smt ? cap_ppc_smt : 1;
2094 }
2095
2096 #ifdef TARGET_PPC64
2097 off_t kvmppc_alloc_rma(void **rma)
2098 {
2099     off_t size;
2100     int fd;
2101     struct kvm_allocate_rma ret;
2102
2103     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2104      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2105      *                      not necessary on this hardware
2106      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2107      *
2108      * FIXME: We should allow the user to force contiguous RMA
2109      * allocation in the cap_ppc_rma==1 case.
2110      */
2111     if (cap_ppc_rma < 2) {
2112         return 0;
2113     }
2114
2115     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2116     if (fd < 0) {
2117         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2118                 strerror(errno));
2119         return -1;
2120     }
2121
2122     size = MIN(ret.rma_size, 256ul << 20);
2123
2124     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2125     if (*rma == MAP_FAILED) {
2126         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2127         return -1;
2128     };
2129
2130     return size;
2131 }
2132
2133 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2134 {
2135     struct kvm_ppc_smmu_info info;
2136     long rampagesize, best_page_shift;
2137     int i;
2138
2139     if (cap_ppc_rma >= 2) {
2140         return current_size;
2141     }
2142
2143     /* Find the largest hardware supported page size that's less than
2144      * or equal to the (logical) backing page size of guest RAM */
2145     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2146     rampagesize = getrampagesize();
2147     best_page_shift = 0;
2148
2149     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2150         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2151
2152         if (!sps->page_shift) {
2153             continue;
2154         }
2155
2156         if ((sps->page_shift > best_page_shift)
2157             && ((1UL << sps->page_shift) <= rampagesize)) {
2158             best_page_shift = sps->page_shift;
2159         }
2160     }
2161
2162     return MIN(current_size,
2163                1ULL << (best_page_shift + hash_shift - 7));
2164 }
2165 #endif
2166
2167 bool kvmppc_spapr_use_multitce(void)
2168 {
2169     return cap_spapr_multitce;
2170 }
2171
2172 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2173                               bool need_vfio)
2174 {
2175     struct kvm_create_spapr_tce args = {
2176         .liobn = liobn,
2177         .window_size = window_size,
2178     };
2179     long len;
2180     int fd;
2181     void *table;
2182
2183     /* Must set fd to -1 so we don't try to munmap when called for
2184      * destroying the table, which the upper layers -will- do
2185      */
2186     *pfd = -1;
2187     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2188         return NULL;
2189     }
2190
2191     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2192     if (fd < 0) {
2193         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2194                 liobn);
2195         return NULL;
2196     }
2197
2198     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2199     /* FIXME: round this up to page size */
2200
2201     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2202     if (table == MAP_FAILED) {
2203         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2204                 liobn);
2205         close(fd);
2206         return NULL;
2207     }
2208
2209     *pfd = fd;
2210     return table;
2211 }
2212
2213 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2214 {
2215     long len;
2216
2217     if (fd < 0) {
2218         return -1;
2219     }
2220
2221     len = nb_table * sizeof(uint64_t);
2222     if ((munmap(table, len) < 0) ||
2223         (close(fd) < 0)) {
2224         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2225                 strerror(errno));
2226         /* Leak the table */
2227     }
2228
2229     return 0;
2230 }
2231
2232 int kvmppc_reset_htab(int shift_hint)
2233 {
2234     uint32_t shift = shift_hint;
2235
2236     if (!kvm_enabled()) {
2237         /* Full emulation, tell caller to allocate htab itself */
2238         return 0;
2239     }
2240     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2241         int ret;
2242         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2243         if (ret == -ENOTTY) {
2244             /* At least some versions of PR KVM advertise the
2245              * capability, but don't implement the ioctl().  Oops.
2246              * Return 0 so that we allocate the htab in qemu, as is
2247              * correct for PR. */
2248             return 0;
2249         } else if (ret < 0) {
2250             return ret;
2251         }
2252         return shift;
2253     }
2254
2255     /* We have a kernel that predates the htab reset calls.  For PR
2256      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2257      * this era, it has allocated a 16MB fixed size hash table
2258      * already.  Kernels of this era have the GET_PVINFO capability
2259      * only on PR, so we use this hack to determine the right
2260      * answer */
2261     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2262         /* PR - tell caller to allocate htab */
2263         return 0;
2264     } else {
2265         /* HV - assume 16MB kernel allocated htab */
2266         return 24;
2267     }
2268 }
2269
2270 static inline uint32_t mfpvr(void)
2271 {
2272     uint32_t pvr;
2273
2274     asm ("mfpvr %0"
2275          : "=r"(pvr));
2276     return pvr;
2277 }
2278
2279 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2280 {
2281     if (on) {
2282         *word |= flags;
2283     } else {
2284         *word &= ~flags;
2285     }
2286 }
2287
2288 static void kvmppc_host_cpu_initfn(Object *obj)
2289 {
2290     assert(kvm_enabled());
2291 }
2292
2293 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2294 {
2295     DeviceClass *dc = DEVICE_CLASS(oc);
2296     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2297     uint32_t vmx = kvmppc_get_vmx();
2298     uint32_t dfp = kvmppc_get_dfp();
2299     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2300     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2301
2302     /* Now fix up the class with information we can query from the host */
2303     pcc->pvr = mfpvr();
2304
2305     if (vmx != -1) {
2306         /* Only override when we know what the host supports */
2307         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2308         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2309     }
2310     if (dfp != -1) {
2311         /* Only override when we know what the host supports */
2312         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2313     }
2314
2315     if (dcache_size != -1) {
2316         pcc->l1_dcache_size = dcache_size;
2317     }
2318
2319     if (icache_size != -1) {
2320         pcc->l1_icache_size = icache_size;
2321     }
2322
2323     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2324     dc->cannot_destroy_with_object_finalize_yet = true;
2325 }
2326
2327 bool kvmppc_has_cap_epr(void)
2328 {
2329     return cap_epr;
2330 }
2331
2332 bool kvmppc_has_cap_htab_fd(void)
2333 {
2334     return cap_htab_fd;
2335 }
2336
2337 bool kvmppc_has_cap_fixup_hcalls(void)
2338 {
2339     return cap_fixup_hcalls;
2340 }
2341
2342 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2343 {
2344     ObjectClass *oc = OBJECT_CLASS(pcc);
2345
2346     while (oc && !object_class_is_abstract(oc)) {
2347         oc = object_class_get_parent(oc);
2348     }
2349     assert(oc);
2350
2351     return POWERPC_CPU_CLASS(oc);
2352 }
2353
2354 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2355 {
2356     uint32_t host_pvr = mfpvr();
2357     PowerPCCPUClass *pvr_pcc;
2358
2359     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2360     if (pvr_pcc == NULL) {
2361         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2362     }
2363
2364     return pvr_pcc;
2365 }
2366
2367 #if defined(TARGET_PPC64)
2368 static void spapr_cpu_core_host_initfn(Object *obj)
2369 {
2370     sPAPRCPUCore *core = SPAPR_CPU_CORE(obj);
2371     char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, "host");
2372     ObjectClass *oc = object_class_by_name(name);
2373
2374     g_assert(oc);
2375     g_free((void *)name);
2376     core->cpu_class = oc;
2377 }
2378 #endif
2379
2380 static int kvm_ppc_register_host_cpu_type(void)
2381 {
2382     TypeInfo type_info = {
2383         .name = TYPE_HOST_POWERPC_CPU,
2384         .instance_init = kvmppc_host_cpu_initfn,
2385         .class_init = kvmppc_host_cpu_class_init,
2386     };
2387     PowerPCCPUClass *pvr_pcc;
2388     DeviceClass *dc;
2389
2390     pvr_pcc = kvm_ppc_get_host_cpu_class();
2391     if (pvr_pcc == NULL) {
2392         return -1;
2393     }
2394     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2395     type_register(&type_info);
2396
2397 #if defined(TARGET_PPC64)
2398     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2399     type_info.parent = TYPE_SPAPR_CPU_CORE,
2400     type_info.instance_size = sizeof(sPAPRCPUCore),
2401     type_info.instance_init = spapr_cpu_core_host_initfn,
2402     type_info.class_init = NULL;
2403     type_register(&type_info);
2404     g_free((void *)type_info.name);
2405     type_info.instance_size = 0;
2406     type_info.instance_init = NULL;
2407 #endif
2408
2409     /* Register generic family CPU class for a family */
2410     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2411     dc = DEVICE_CLASS(pvr_pcc);
2412     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2413     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2414     type_register(&type_info);
2415
2416     return 0;
2417 }
2418
2419 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2420 {
2421     struct kvm_rtas_token_args args = {
2422         .token = token,
2423     };
2424
2425     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2426         return -ENOENT;
2427     }
2428
2429     strncpy(args.name, function, sizeof(args.name));
2430
2431     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2432 }
2433
2434 int kvmppc_get_htab_fd(bool write)
2435 {
2436     struct kvm_get_htab_fd s = {
2437         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2438         .start_index = 0,
2439     };
2440
2441     if (!cap_htab_fd) {
2442         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2443         return -1;
2444     }
2445
2446     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2447 }
2448
2449 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2450 {
2451     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2452     uint8_t buf[bufsize];
2453     ssize_t rc;
2454
2455     do {
2456         rc = read(fd, buf, bufsize);
2457         if (rc < 0) {
2458             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2459                     strerror(errno));
2460             return rc;
2461         } else if (rc) {
2462             uint8_t *buffer = buf;
2463             ssize_t n = rc;
2464             while (n) {
2465                 struct kvm_get_htab_header *head =
2466                     (struct kvm_get_htab_header *) buffer;
2467                 size_t chunksize = sizeof(*head) +
2468                      HASH_PTE_SIZE_64 * head->n_valid;
2469
2470                 qemu_put_be32(f, head->index);
2471                 qemu_put_be16(f, head->n_valid);
2472                 qemu_put_be16(f, head->n_invalid);
2473                 qemu_put_buffer(f, (void *)(head + 1),
2474                                 HASH_PTE_SIZE_64 * head->n_valid);
2475
2476                 buffer += chunksize;
2477                 n -= chunksize;
2478             }
2479         }
2480     } while ((rc != 0)
2481              && ((max_ns < 0)
2482                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2483
2484     return (rc == 0) ? 1 : 0;
2485 }
2486
2487 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2488                            uint16_t n_valid, uint16_t n_invalid)
2489 {
2490     struct kvm_get_htab_header *buf;
2491     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2492     ssize_t rc;
2493
2494     buf = alloca(chunksize);
2495     buf->index = index;
2496     buf->n_valid = n_valid;
2497     buf->n_invalid = n_invalid;
2498
2499     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2500
2501     rc = write(fd, buf, chunksize);
2502     if (rc < 0) {
2503         fprintf(stderr, "Error writing KVM hash table: %s\n",
2504                 strerror(errno));
2505         return rc;
2506     }
2507     if (rc != chunksize) {
2508         /* We should never get a short write on a single chunk */
2509         fprintf(stderr, "Short write, restoring KVM hash table\n");
2510         return -1;
2511     }
2512     return 0;
2513 }
2514
2515 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2516 {
2517     return true;
2518 }
2519
2520 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2521 {
2522     return 1;
2523 }
2524
2525 int kvm_arch_on_sigbus(int code, void *addr)
2526 {
2527     return 1;
2528 }
2529
2530 void kvm_arch_init_irq_routing(KVMState *s)
2531 {
2532 }
2533
2534 struct kvm_get_htab_buf {
2535     struct kvm_get_htab_header header;
2536     /*
2537      * We require one extra byte for read
2538      */
2539     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2540 };
2541
2542 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2543 {
2544     int htab_fd;
2545     struct kvm_get_htab_fd ghf;
2546     struct kvm_get_htab_buf  *hpte_buf;
2547
2548     ghf.flags = 0;
2549     ghf.start_index = pte_index;
2550     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2551     if (htab_fd < 0) {
2552         goto error_out;
2553     }
2554
2555     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2556     /*
2557      * Read the hpte group
2558      */
2559     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2560         goto out_close;
2561     }
2562
2563     close(htab_fd);
2564     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2565
2566 out_close:
2567     g_free(hpte_buf);
2568     close(htab_fd);
2569 error_out:
2570     return 0;
2571 }
2572
2573 void kvmppc_hash64_free_pteg(uint64_t token)
2574 {
2575     struct kvm_get_htab_buf *htab_buf;
2576
2577     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2578                             hpte);
2579     g_free(htab_buf);
2580     return;
2581 }
2582
2583 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2584                              target_ulong pte0, target_ulong pte1)
2585 {
2586     int htab_fd;
2587     struct kvm_get_htab_fd ghf;
2588     struct kvm_get_htab_buf hpte_buf;
2589
2590     ghf.flags = 0;
2591     ghf.start_index = 0;     /* Ignored */
2592     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2593     if (htab_fd < 0) {
2594         goto error_out;
2595     }
2596
2597     hpte_buf.header.n_valid = 1;
2598     hpte_buf.header.n_invalid = 0;
2599     hpte_buf.header.index = pte_index;
2600     hpte_buf.hpte[0] = pte0;
2601     hpte_buf.hpte[1] = pte1;
2602     /*
2603      * Write the hpte entry.
2604      * CAUTION: write() has the warn_unused_result attribute. Hence we
2605      * need to check the return value, even though we do nothing.
2606      */
2607     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2608         goto out_close;
2609     }
2610
2611 out_close:
2612     close(htab_fd);
2613     return;
2614
2615 error_out:
2616     return;
2617 }
2618
2619 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2620                              uint64_t address, uint32_t data, PCIDevice *dev)
2621 {
2622     return 0;
2623 }
2624
2625 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2626                                 int vector, PCIDevice *dev)
2627 {
2628     return 0;
2629 }
2630
2631 int kvm_arch_release_virq_post(int virq)
2632 {
2633     return 0;
2634 }
2635
2636 int kvm_arch_msi_data_to_gsi(uint32_t data)
2637 {
2638     return data & 0xffff;
2639 }
2640
2641 int kvmppc_enable_hwrng(void)
2642 {
2643     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2644         return -1;
2645     }
2646
2647     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2648 }