target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/kvm.h"
  30 #include "sysemu/numa.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/spapr_cpu_core.h"
  40 #include "hw/ppc/ppc.h"
  41 #include "sysemu/watchdog.h"
  42 #include "trace.h"
  43 #include "exec/gdbstub.h"
  44 #include "exec/memattrs.h"
  45 #include "sysemu/hostmem.h"
  46 #include "qemu/cutils.h"
  47 #if defined(TARGET_PPC64)
  48 #include "hw/ppc/spapr_cpu_core.h"
  49 #endif
  50
  51 //#define DEBUG_KVM
  52
  53 #ifdef DEBUG_KVM
  54 #define DPRINTF(fmt, ...) \
  55     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  56 #else
  57 #define DPRINTF(fmt, ...) \
  58     do { } while (0)
  59 #endif
  60
  61 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  62
  63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  64     KVM_CAP_LAST_INFO
  65 };
  66
  67 static int cap_interrupt_unset = false;
  68 static int cap_interrupt_level = false;
  69 static int cap_segstate;
  70 static int cap_booke_sregs;
  71 static int cap_ppc_smt;
  72 static int cap_ppc_rma;
  73 static int cap_spapr_tce;
  74 static int cap_spapr_multitce;
  75 static int cap_spapr_vfio;
  76 static int cap_hior;
  77 static int cap_one_reg;
  78 static int cap_epr;
  79 static int cap_ppc_watchdog;
  80 static int cap_papr;
  81 static int cap_htab_fd;
  82 static int cap_fixup_hcalls;
  83
  84 static uint32_t debug_inst_opcode;
  85
  86 /* XXX We have a race condition where we actually have a level triggered
  87  *     interrupt, but the infrastructure can't expose that yet, so the guest
  88  *     takes but ignores it, goes to sleep and never gets notified that there's
  89  *     still an interrupt pending.
  90  *
  91  *     As a quick workaround, let's just wake up again 20 ms after we injected
  92  *     an interrupt. That way we can assure that we're always reinjecting
  93  *     interrupts in case the guest swallowed them.
  94  */
  95 static QEMUTimer *idle_timer;
  96
  97 static void kvm_kick_cpu(void *opaque)
  98 {
  99     PowerPCCPU *cpu = opaque;
 100
 101     qemu_cpu_kick(CPU(cpu));
 102 }
 103
 104 static int kvm_ppc_register_host_cpu_type(void);
 105
 106 int kvm_arch_init(MachineState *ms, KVMState *s)
 107 {
 108     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 109     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 110     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 111     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 112     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 113     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 114     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 115     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 116     cap_spapr_vfio = false;
 117     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 118     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 119     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 120     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 121     /* Note: we don't set cap_papr here, because this capability is
 122      * only activated after this by kvmppc_set_papr() */
 123     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 124     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 125
 126     if (!cap_interrupt_level) {
 127         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 128                         "VM to stall at times!\n");
 129     }
 130
 131     kvm_ppc_register_host_cpu_type();
 132
 133     return 0;
 134 }
 135
 136 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 137 {
 138     CPUPPCState *cenv = &cpu->env;
 139     CPUState *cs = CPU(cpu);
 140     struct kvm_sregs sregs;
 141     int ret;
 142
 143     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 144         /* What we're really trying to say is "if we're on BookE, we use
 145            the native PVR for now". This is the only sane way to check
 146            it though, so we potentially confuse users that they can run
 147            BookE guests on BookS. Let's hope nobody dares enough :) */
 148         return 0;
 149     } else {
 150         if (!cap_segstate) {
 151             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 152             return -ENOSYS;
 153         }
 154     }
 155
 156     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 157     if (ret) {
 158         return ret;
 159     }
 160
 161     sregs.pvr = cenv->spr[SPR_PVR];
 162     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 163 }
 164
 165 /* Set up a shared TLB array with KVM */
 166 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 167 {
 168     CPUPPCState *env = &cpu->env;
 169     CPUState *cs = CPU(cpu);
 170     struct kvm_book3e_206_tlb_params params = {};
 171     struct kvm_config_tlb cfg = {};
 172     unsigned int entries = 0;
 173     int ret, i;
 174
 175     if (!kvm_enabled() ||
 176         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 177         return 0;
 178     }
 179
 180     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 181
 182     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 183         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 184         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 185         entries += params.tlb_sizes[i];
 186     }
 187
 188     assert(entries == env->nb_tlb);
 189     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 190
 191     env->tlb_dirty = true;
 192
 193     cfg.array = (uintptr_t)env->tlb.tlbm;
 194     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 195     cfg.params = (uintptr_t)&params;
 196     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 197
 198     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 199     if (ret < 0) {
 200         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 201                 __func__, strerror(-ret));
 202         return ret;
 203     }
 204
 205     env->kvm_sw_tlb = true;
 206     return 0;
 207 }
 208
 209
 210 #if defined(TARGET_PPC64)
 211 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 212                                        struct kvm_ppc_smmu_info *info)
 213 {
 214     CPUPPCState *env = &cpu->env;
 215     CPUState *cs = CPU(cpu);
 216
 217     memset(info, 0, sizeof(*info));
 218
 219     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 220      * need to "guess" what the supported page sizes are.
 221      *
 222      * For that to work we make a few assumptions:
 223      *
 224      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 225      *   KVM which only supports 4K and 16M pages, but supports them
 226      *   regardless of the backing store characteritics. We also don't
 227      *   support 1T segments.
 228      *
 229      *   This is safe as if HV KVM ever supports that capability or PR
 230      *   KVM grows supports for more page/segment sizes, those versions
 231      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 232      *   will not hit this fallback
 233      *
 234      * - Else we are running HV KVM. This means we only support page
 235      *   sizes that fit in the backing store. Additionally we only
 236      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 237      *   P7 encodings for the SLB and hash table. Here too, we assume
 238      *   support for any newer processor will mean a kernel that
 239      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 240      *   this fallback.
 241      */
 242     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 243         /* No flags */
 244         info->flags = 0;
 245         info->slb_size = 64;
 246
 247         /* Standard 4k base page size segment */
 248         info->sps[0].page_shift = 12;
 249         info->sps[0].slb_enc = 0;
 250         info->sps[0].enc[0].page_shift = 12;
 251         info->sps[0].enc[0].pte_enc = 0;
 252
 253         /* Standard 16M large page size segment */
 254         info->sps[1].page_shift = 24;
 255         info->sps[1].slb_enc = SLB_VSID_L;
 256         info->sps[1].enc[0].page_shift = 24;
 257         info->sps[1].enc[0].pte_enc = 0;
 258     } else {
 259         int i = 0;
 260
 261         /* HV KVM has backing store size restrictions */
 262         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 263
 264         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 265             info->flags |= KVM_PPC_1T_SEGMENTS;
 266         }
 267
 268         if (env->mmu_model == POWERPC_MMU_2_06 ||
 269             env->mmu_model == POWERPC_MMU_2_07) {
 270             info->slb_size = 32;
 271         } else {
 272             info->slb_size = 64;
 273         }
 274
 275         /* Standard 4k base page size segment */
 276         info->sps[i].page_shift = 12;
 277         info->sps[i].slb_enc = 0;
 278         info->sps[i].enc[0].page_shift = 12;
 279         info->sps[i].enc[0].pte_enc = 0;
 280         i++;
 281
 282         /* 64K on MMU 2.06 and later */
 283         if (env->mmu_model == POWERPC_MMU_2_06 ||
 284             env->mmu_model == POWERPC_MMU_2_07) {
 285             info->sps[i].page_shift = 16;
 286             info->sps[i].slb_enc = 0x110;
 287             info->sps[i].enc[0].page_shift = 16;
 288             info->sps[i].enc[0].pte_enc = 1;
 289             i++;
 290         }
 291
 292         /* Standard 16M large page size segment */
 293         info->sps[i].page_shift = 24;
 294         info->sps[i].slb_enc = SLB_VSID_L;
 295         info->sps[i].enc[0].page_shift = 24;
 296         info->sps[i].enc[0].pte_enc = 0;
 297     }
 298 }
 299
 300 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 301 {
 302     CPUState *cs = CPU(cpu);
 303     int ret;
 304
 305     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 306         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 307         if (ret == 0) {
 308             return;
 309         }
 310     }
 311
 312     kvm_get_fallback_smmu_info(cpu, info);
 313 }
 314
 315 static long gethugepagesize(const char *mem_path)
 316 {
 317     struct statfs fs;
 318     int ret;
 319
 320     do {
 321         ret = statfs(mem_path, &fs);
 322     } while (ret != 0 && errno == EINTR);
 323
 324     if (ret != 0) {
 325         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 326                 strerror(errno));
 327         exit(1);
 328     }
 329
 330 #define HUGETLBFS_MAGIC       0x958458f6
 331
 332     if (fs.f_type != HUGETLBFS_MAGIC) {
 333         /* Explicit mempath, but it's ordinary pages */
 334         return getpagesize();
 335     }
 336
 337     /* It's hugepage, return the huge page size */
 338     return fs.f_bsize;
 339 }
 340
 341 /*
 342  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 343  * may or may not name the same files / on the same filesystem now as
 344  * when we actually open and map them.  Iterate over the file
 345  * descriptors instead, and use qemu_fd_getpagesize().
 346  */
 347 static int find_max_supported_pagesize(Object *obj, void *opaque)
 348 {
 349     char *mem_path;
 350     long *hpsize_min = opaque;
 351
 352     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 353         mem_path = object_property_get_str(obj, "mem-path", NULL);
 354         if (mem_path) {
 355             long hpsize = gethugepagesize(mem_path);
 356             if (hpsize < *hpsize_min) {
 357                 *hpsize_min = hpsize;
 358             }
 359         } else {
 360             *hpsize_min = getpagesize();
 361         }
 362     }
 363
 364     return 0;
 365 }
 366
 367 static long getrampagesize(void)
 368 {
 369     long hpsize = LONG_MAX;
 370     long mainrampagesize;
 371     Object *memdev_root;
 372
 373     if (mem_path) {
 374         mainrampagesize = gethugepagesize(mem_path);
 375     } else {
 376         mainrampagesize = getpagesize();
 377     }
 378
 379     /* it's possible we have memory-backend objects with
 380      * hugepage-backed RAM. these may get mapped into system
 381      * address space via -numa parameters or memory hotplug
 382      * hooks. we want to take these into account, but we
 383      * also want to make sure these supported hugepage
 384      * sizes are applicable across the entire range of memory
 385      * we may boot from, so we take the min across all
 386      * backends, and assume normal pages in cases where a
 387      * backend isn't backed by hugepages.
 388      */
 389     memdev_root = object_resolve_path("/objects", NULL);
 390     if (memdev_root) {
 391         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 392     }
 393     if (hpsize == LONG_MAX) {
 394         /* No additional memory regions found ==> Report main RAM page size */
 395         return mainrampagesize;
 396     }
 397
 398     /* If NUMA is disabled or the NUMA nodes are not backed with a
 399      * memory-backend, then there is at least one node using "normal" RAM,
 400      * so if its page size is smaller we have got to report that size instead.
 401      */
 402     if (hpsize > mainrampagesize &&
 403         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
 404         static bool warned;
 405         if (!warned) {
 406             error_report("Huge page support disabled (n/a for main memory).");
 407             warned = true;
 408         }
 409         return mainrampagesize;
 410     }
 411
 412     return hpsize;
 413 }
 414
 415 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 416 {
 417     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 418         return true;
 419     }
 420
 421     return (1ul << shift) <= rampgsize;
 422 }
 423
 424 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 425 {
 426     static struct kvm_ppc_smmu_info smmu_info;
 427     static bool has_smmu_info;
 428     CPUPPCState *env = &cpu->env;
 429     long rampagesize;
 430     int iq, ik, jq, jk;
 431     bool has_64k_pages = false;
 432
 433     /* We only handle page sizes for 64-bit server guests for now */
 434     if (!(env->mmu_model & POWERPC_MMU_64)) {
 435         return;
 436     }
 437
 438     /* Collect MMU info from kernel if not already */
 439     if (!has_smmu_info) {
 440         kvm_get_smmu_info(cpu, &smmu_info);
 441         has_smmu_info = true;
 442     }
 443
 444     rampagesize = getrampagesize();
 445
 446     /* Convert to QEMU form */
 447     memset(&env->sps, 0, sizeof(env->sps));
 448
 449     /* If we have HV KVM, we need to forbid CI large pages if our
 450      * host page size is smaller than 64K.
 451      */
 452     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 453         env->ci_large_pages = getpagesize() >= 0x10000;
 454     }
 455
 456     /*
 457      * XXX This loop should be an entry wide AND of the capabilities that
 458      *     the selected CPU has with the capabilities that KVM supports.
 459      */
 460     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 461         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 462         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 463
 464         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 465                                  ksps->page_shift)) {
 466             continue;
 467         }
 468         qsps->page_shift = ksps->page_shift;
 469         qsps->slb_enc = ksps->slb_enc;
 470         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 471             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 472                                      ksps->enc[jk].page_shift)) {
 473                 continue;
 474             }
 475             if (ksps->enc[jk].page_shift == 16) {
 476                 has_64k_pages = true;
 477             }
 478             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 479             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 480             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 481                 break;
 482             }
 483         }
 484         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 485             break;
 486         }
 487     }
 488     env->slb_nr = smmu_info.slb_size;
 489     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 490         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 491     }
 492     if (!has_64k_pages) {
 493         env->mmu_model &= ~POWERPC_MMU_64K;
 494     }
 495 }
 496 #else /* defined (TARGET_PPC64) */
 497
 498 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 499 {
 500 }
 501
 502 #endif /* !defined (TARGET_PPC64) */
 503
 504 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 505 {
 506     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 507 }
 508
 509 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 510  * book3s supports only 1 watchpoint, so array size
 511  * of 4 is sufficient for now.
 512  */
 513 #define MAX_HW_BKPTS 4
 514
 515 static struct HWBreakpoint {
 516     target_ulong addr;
 517     int type;
 518 } hw_debug_points[MAX_HW_BKPTS];
 519
 520 static CPUWatchpoint hw_watchpoint;
 521
 522 /* Default there is no breakpoint and watchpoint supported */
 523 static int max_hw_breakpoint;
 524 static int max_hw_watchpoint;
 525 static int nb_hw_breakpoint;
 526 static int nb_hw_watchpoint;
 527
 528 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 529 {
 530     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 531         max_hw_breakpoint = 2;
 532         max_hw_watchpoint = 2;
 533     }
 534
 535     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 536         fprintf(stderr, "Error initializing h/w breakpoints\n");
 537         return;
 538     }
 539 }
 540
 541 int kvm_arch_init_vcpu(CPUState *cs)
 542 {
 543     PowerPCCPU *cpu = POWERPC_CPU(cs);
 544     CPUPPCState *cenv = &cpu->env;
 545     int ret;
 546
 547     /* Gather server mmu info from KVM and update the CPU state */
 548     kvm_fixup_page_sizes(cpu);
 549
 550     /* Synchronize sregs with kvm */
 551     ret = kvm_arch_sync_sregs(cpu);
 552     if (ret) {
 553         if (ret == -EINVAL) {
 554             error_report("Register sync failed... If you're using kvm-hv.ko,"
 555                          " only \"-cpu host\" is possible");
 556         }
 557         return ret;
 558     }
 559
 560     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 561
 562     /* Some targets support access to KVM's guest TLB. */
 563     switch (cenv->mmu_model) {
 564     case POWERPC_MMU_BOOKE206:
 565         ret = kvm_booke206_tlb_init(cpu);
 566         break;
 567     default:
 568         break;
 569     }
 570
 571     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 572     kvmppc_hw_debug_points_init(cenv);
 573
 574     return ret;
 575 }
 576
 577 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 578 {
 579     CPUPPCState *env = &cpu->env;
 580     CPUState *cs = CPU(cpu);
 581     struct kvm_dirty_tlb dirty_tlb;
 582     unsigned char *bitmap;
 583     int ret;
 584
 585     if (!env->kvm_sw_tlb) {
 586         return;
 587     }
 588
 589     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 590     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 591
 592     dirty_tlb.bitmap = (uintptr_t)bitmap;
 593     dirty_tlb.num_dirty = env->nb_tlb;
 594
 595     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 596     if (ret) {
 597         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 598                 __func__, strerror(-ret));
 599     }
 600
 601     g_free(bitmap);
 602 }
 603
 604 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 605 {
 606     PowerPCCPU *cpu = POWERPC_CPU(cs);
 607     CPUPPCState *env = &cpu->env;
 608     union {
 609         uint32_t u32;
 610         uint64_t u64;
 611     } val;
 612     struct kvm_one_reg reg = {
 613         .id = id,
 614         .addr = (uintptr_t) &val,
 615     };
 616     int ret;
 617
 618     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 619     if (ret != 0) {
 620         trace_kvm_failed_spr_get(spr, strerror(errno));
 621     } else {
 622         switch (id & KVM_REG_SIZE_MASK) {
 623         case KVM_REG_SIZE_U32:
 624             env->spr[spr] = val.u32;
 625             break;
 626
 627         case KVM_REG_SIZE_U64:
 628             env->spr[spr] = val.u64;
 629             break;
 630
 631         default:
 632             /* Don't handle this size yet */
 633             abort();
 634         }
 635     }
 636 }
 637
 638 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 639 {
 640     PowerPCCPU *cpu = POWERPC_CPU(cs);
 641     CPUPPCState *env = &cpu->env;
 642     union {
 643         uint32_t u32;
 644         uint64_t u64;
 645     } val;
 646     struct kvm_one_reg reg = {
 647         .id = id,
 648         .addr = (uintptr_t) &val,
 649     };
 650     int ret;
 651
 652     switch (id & KVM_REG_SIZE_MASK) {
 653     case KVM_REG_SIZE_U32:
 654         val.u32 = env->spr[spr];
 655         break;
 656
 657     case KVM_REG_SIZE_U64:
 658         val.u64 = env->spr[spr];
 659         break;
 660
 661     default:
 662         /* Don't handle this size yet */
 663         abort();
 664     }
 665
 666     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 667     if (ret != 0) {
 668         trace_kvm_failed_spr_set(spr, strerror(errno));
 669     }
 670 }
 671
 672 static int kvm_put_fp(CPUState *cs)
 673 {
 674     PowerPCCPU *cpu = POWERPC_CPU(cs);
 675     CPUPPCState *env = &cpu->env;
 676     struct kvm_one_reg reg;
 677     int i;
 678     int ret;
 679
 680     if (env->insns_flags & PPC_FLOAT) {
 681         uint64_t fpscr = env->fpscr;
 682         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 683
 684         reg.id = KVM_REG_PPC_FPSCR;
 685         reg.addr = (uintptr_t)&fpscr;
 686         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 687         if (ret < 0) {
 688             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 689             return ret;
 690         }
 691
 692         for (i = 0; i < 32; i++) {
 693             uint64_t vsr[2];
 694
 695 #ifdef HOST_WORDS_BIGENDIAN
 696             vsr[0] = float64_val(env->fpr[i]);
 697             vsr[1] = env->vsr[i];
 698 #else
 699             vsr[0] = env->vsr[i];
 700             vsr[1] = float64_val(env->fpr[i]);
 701 #endif
 702             reg.addr = (uintptr_t) &vsr;
 703             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 704
 705             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 706             if (ret < 0) {
 707                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 708                         i, strerror(errno));
 709                 return ret;
 710             }
 711         }
 712     }
 713
 714     if (env->insns_flags & PPC_ALTIVEC) {
 715         reg.id = KVM_REG_PPC_VSCR;
 716         reg.addr = (uintptr_t)&env->vscr;
 717         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 718         if (ret < 0) {
 719             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 720             return ret;
 721         }
 722
 723         for (i = 0; i < 32; i++) {
 724             reg.id = KVM_REG_PPC_VR(i);
 725             reg.addr = (uintptr_t)&env->avr[i];
 726             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 727             if (ret < 0) {
 728                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 729                 return ret;
 730             }
 731         }
 732     }
 733
 734     return 0;
 735 }
 736
 737 static int kvm_get_fp(CPUState *cs)
 738 {
 739     PowerPCCPU *cpu = POWERPC_CPU(cs);
 740     CPUPPCState *env = &cpu->env;
 741     struct kvm_one_reg reg;
 742     int i;
 743     int ret;
 744
 745     if (env->insns_flags & PPC_FLOAT) {
 746         uint64_t fpscr;
 747         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 748
 749         reg.id = KVM_REG_PPC_FPSCR;
 750         reg.addr = (uintptr_t)&fpscr;
 751         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 752         if (ret < 0) {
 753             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 754             return ret;
 755         } else {
 756             env->fpscr = fpscr;
 757         }
 758
 759         for (i = 0; i < 32; i++) {
 760             uint64_t vsr[2];
 761
 762             reg.addr = (uintptr_t) &vsr;
 763             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 764
 765             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 766             if (ret < 0) {
 767                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 768                         vsx ? "VSR" : "FPR", i, strerror(errno));
 769                 return ret;
 770             } else {
 771 #ifdef HOST_WORDS_BIGENDIAN
 772                 env->fpr[i] = vsr[0];
 773                 if (vsx) {
 774                     env->vsr[i] = vsr[1];
 775                 }
 776 #else
 777                 env->fpr[i] = vsr[1];
 778                 if (vsx) {
 779                     env->vsr[i] = vsr[0];
 780                 }
 781 #endif
 782             }
 783         }
 784     }
 785
 786     if (env->insns_flags & PPC_ALTIVEC) {
 787         reg.id = KVM_REG_PPC_VSCR;
 788         reg.addr = (uintptr_t)&env->vscr;
 789         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 790         if (ret < 0) {
 791             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 792             return ret;
 793         }
 794
 795         for (i = 0; i < 32; i++) {
 796             reg.id = KVM_REG_PPC_VR(i);
 797             reg.addr = (uintptr_t)&env->avr[i];
 798             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 799             if (ret < 0) {
 800                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 801                         i, strerror(errno));
 802                 return ret;
 803             }
 804         }
 805     }
 806
 807     return 0;
 808 }
 809
 810 #if defined(TARGET_PPC64)
 811 static int kvm_get_vpa(CPUState *cs)
 812 {
 813     PowerPCCPU *cpu = POWERPC_CPU(cs);
 814     CPUPPCState *env = &cpu->env;
 815     struct kvm_one_reg reg;
 816     int ret;
 817
 818     reg.id = KVM_REG_PPC_VPA_ADDR;
 819     reg.addr = (uintptr_t)&env->vpa_addr;
 820     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 821     if (ret < 0) {
 822         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 823         return ret;
 824     }
 825
 826     assert((uintptr_t)&env->slb_shadow_size
 827            == ((uintptr_t)&env->slb_shadow_addr + 8));
 828     reg.id = KVM_REG_PPC_VPA_SLB;
 829     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 830     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 831     if (ret < 0) {
 832         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 833                 strerror(errno));
 834         return ret;
 835     }
 836
 837     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 838     reg.id = KVM_REG_PPC_VPA_DTL;
 839     reg.addr = (uintptr_t)&env->dtl_addr;
 840     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 841     if (ret < 0) {
 842         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 843                 strerror(errno));
 844         return ret;
 845     }
 846
 847     return 0;
 848 }
 849
 850 static int kvm_put_vpa(CPUState *cs)
 851 {
 852     PowerPCCPU *cpu = POWERPC_CPU(cs);
 853     CPUPPCState *env = &cpu->env;
 854     struct kvm_one_reg reg;
 855     int ret;
 856
 857     /* SLB shadow or DTL can't be registered unless a master VPA is
 858      * registered.  That means when restoring state, if a VPA *is*
 859      * registered, we need to set that up first.  If not, we need to
 860      * deregister the others before deregistering the master VPA */
 861     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 862
 863     if (env->vpa_addr) {
 864         reg.id = KVM_REG_PPC_VPA_ADDR;
 865         reg.addr = (uintptr_t)&env->vpa_addr;
 866         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 867         if (ret < 0) {
 868             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 869             return ret;
 870         }
 871     }
 872
 873     assert((uintptr_t)&env->slb_shadow_size
 874            == ((uintptr_t)&env->slb_shadow_addr + 8));
 875     reg.id = KVM_REG_PPC_VPA_SLB;
 876     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 877     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 878     if (ret < 0) {
 879         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 880         return ret;
 881     }
 882
 883     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 884     reg.id = KVM_REG_PPC_VPA_DTL;
 885     reg.addr = (uintptr_t)&env->dtl_addr;
 886     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 887     if (ret < 0) {
 888         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 889                 strerror(errno));
 890         return ret;
 891     }
 892
 893     if (!env->vpa_addr) {
 894         reg.id = KVM_REG_PPC_VPA_ADDR;
 895         reg.addr = (uintptr_t)&env->vpa_addr;
 896         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 897         if (ret < 0) {
 898             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 899             return ret;
 900         }
 901     }
 902
 903     return 0;
 904 }
 905 #endif /* TARGET_PPC64 */
 906
 907 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 908 {
 909     CPUPPCState *env = &cpu->env;
 910     struct kvm_sregs sregs;
 911     int i;
 912
 913     sregs.pvr = env->spr[SPR_PVR];
 914
 915     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 916
 917     /* Sync SLB */
 918 #ifdef TARGET_PPC64
 919     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 920         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 921         if (env->slb[i].esid & SLB_ESID_V) {
 922             sregs.u.s.ppc64.slb[i].slbe |= i;
 923         }
 924         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 925     }
 926 #endif
 927
 928     /* Sync SRs */
 929     for (i = 0; i < 16; i++) {
 930         sregs.u.s.ppc32.sr[i] = env->sr[i];
 931     }
 932
 933     /* Sync BATs */
 934     for (i = 0; i < 8; i++) {
 935         /* Beware. We have to swap upper and lower bits here */
 936         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 937             | env->DBAT[1][i];
 938         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 939             | env->IBAT[1][i];
 940     }
 941
 942     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 943 }
 944
 945 int kvm_arch_put_registers(CPUState *cs, int level)
 946 {
 947     PowerPCCPU *cpu = POWERPC_CPU(cs);
 948     CPUPPCState *env = &cpu->env;
 949     struct kvm_regs regs;
 950     int ret;
 951     int i;
 952
 953     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 954     if (ret < 0) {
 955         return ret;
 956     }
 957
 958     regs.ctr = env->ctr;
 959     regs.lr  = env->lr;
 960     regs.xer = cpu_read_xer(env);
 961     regs.msr = env->msr;
 962     regs.pc = env->nip;
 963
 964     regs.srr0 = env->spr[SPR_SRR0];
 965     regs.srr1 = env->spr[SPR_SRR1];
 966
 967     regs.sprg0 = env->spr[SPR_SPRG0];
 968     regs.sprg1 = env->spr[SPR_SPRG1];
 969     regs.sprg2 = env->spr[SPR_SPRG2];
 970     regs.sprg3 = env->spr[SPR_SPRG3];
 971     regs.sprg4 = env->spr[SPR_SPRG4];
 972     regs.sprg5 = env->spr[SPR_SPRG5];
 973     regs.sprg6 = env->spr[SPR_SPRG6];
 974     regs.sprg7 = env->spr[SPR_SPRG7];
 975
 976     regs.pid = env->spr[SPR_BOOKE_PID];
 977
 978     for (i = 0;i < 32; i++)
 979         regs.gpr[i] = env->gpr[i];
 980
 981     regs.cr = 0;
 982     for (i = 0; i < 8; i++) {
 983         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 984     }
 985
 986     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 987     if (ret < 0)
 988         return ret;
 989
 990     kvm_put_fp(cs);
 991
 992     if (env->tlb_dirty) {
 993         kvm_sw_tlb_put(cpu);
 994         env->tlb_dirty = false;
 995     }
 996
 997     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 998         ret = kvmppc_put_books_sregs(cpu);
 999         if (ret < 0) {
1000             return ret;
1001         }
1002     }
1003
1004     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1005         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1006     }
1007
1008     if (cap_one_reg) {
1009         int i;
1010
1011         /* We deliberately ignore errors here, for kernels which have
1012          * the ONE_REG calls, but don't support the specific
1013          * registers, there's a reasonable chance things will still
1014          * work, at least until we try to migrate. */
1015         for (i = 0; i < 1024; i++) {
1016             uint64_t id = env->spr_cb[i].one_reg_id;
1017
1018             if (id != 0) {
1019                 kvm_put_one_spr(cs, id, i);
1020             }
1021         }
1022
1023 #ifdef TARGET_PPC64
1024         if (msr_ts) {
1025             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1026                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1027             }
1028             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1029                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1030             }
1031             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1032             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1033             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1034             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1035             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1036             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1037             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1038             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1039             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1040             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1041         }
1042
1043         if (cap_papr) {
1044             if (kvm_put_vpa(cs) < 0) {
1045                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1046             }
1047         }
1048
1049         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1050 #endif /* TARGET_PPC64 */
1051     }
1052
1053     return ret;
1054 }
1055
1056 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1057 {
1058      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1059 }
1060
1061 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1062 {
1063     CPUPPCState *env = &cpu->env;
1064     struct kvm_sregs sregs;
1065     int ret;
1066
1067     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1068     if (ret < 0) {
1069         return ret;
1070     }
1071
1072     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1073         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1074         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1075         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1076         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1077         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1078         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1079         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1080         env->spr[SPR_DECR] = sregs.u.e.dec;
1081         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1082         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1083         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1084     }
1085
1086     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1087         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1088         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1089         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1090         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1091         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1092     }
1093
1094     if (sregs.u.e.features & KVM_SREGS_E_64) {
1095         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1096     }
1097
1098     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1099         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1100     }
1101
1102     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1103         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1104         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1105         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1106         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1107         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1108         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1109         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1110         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1111         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1112         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1113         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1114         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1115         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1116         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1117         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1118         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1119         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1120         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1121         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1122         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1123         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1124         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1125         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1126         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1127         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1128         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1129         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1130         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1131         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1132         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1133         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1134         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1135
1136         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1137             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1138             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1139             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1140             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1141             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1142             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1143         }
1144
1145         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1146             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1147             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1148         }
1149
1150         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1151             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1152             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1153             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1154             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1155         }
1156     }
1157
1158     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1159         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1160         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1161         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1162         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1163         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1164         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1165         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1166         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1167         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1168         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1169     }
1170
1171     if (sregs.u.e.features & KVM_SREGS_EXP) {
1172         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1173     }
1174
1175     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1176         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1177         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1178     }
1179
1180     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1181         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1182         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1183         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1184
1185         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1186             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1187             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1188         }
1189     }
1190
1191     return 0;
1192 }
1193
1194 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1195 {
1196     CPUPPCState *env = &cpu->env;
1197     struct kvm_sregs sregs;
1198     int ret;
1199     int i;
1200
1201     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1202     if (ret < 0) {
1203         return ret;
1204     }
1205
1206     if (!env->external_htab) {
1207         ppc_store_sdr1(env, sregs.u.s.sdr1);
1208     }
1209
1210     /* Sync SLB */
1211 #ifdef TARGET_PPC64
1212     /*
1213      * The packed SLB array we get from KVM_GET_SREGS only contains
1214      * information about valid entries. So we flush our internal copy
1215      * to get rid of stale ones, then put all valid SLB entries back
1216      * in.
1217      */
1218     memset(env->slb, 0, sizeof(env->slb));
1219     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1220         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1221         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1222         /*
1223          * Only restore valid entries
1224          */
1225         if (rb & SLB_ESID_V) {
1226             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1227         }
1228     }
1229 #endif
1230
1231     /* Sync SRs */
1232     for (i = 0; i < 16; i++) {
1233         env->sr[i] = sregs.u.s.ppc32.sr[i];
1234     }
1235
1236     /* Sync BATs */
1237     for (i = 0; i < 8; i++) {
1238         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1239         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1240         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1241         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1242     }
1243
1244     return 0;
1245 }
1246
1247 int kvm_arch_get_registers(CPUState *cs)
1248 {
1249     PowerPCCPU *cpu = POWERPC_CPU(cs);
1250     CPUPPCState *env = &cpu->env;
1251     struct kvm_regs regs;
1252     uint32_t cr;
1253     int i, ret;
1254
1255     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1256     if (ret < 0)
1257         return ret;
1258
1259     cr = regs.cr;
1260     for (i = 7; i >= 0; i--) {
1261         env->crf[i] = cr & 15;
1262         cr >>= 4;
1263     }
1264
1265     env->ctr = regs.ctr;
1266     env->lr = regs.lr;
1267     cpu_write_xer(env, regs.xer);
1268     env->msr = regs.msr;
1269     env->nip = regs.pc;
1270
1271     env->spr[SPR_SRR0] = regs.srr0;
1272     env->spr[SPR_SRR1] = regs.srr1;
1273
1274     env->spr[SPR_SPRG0] = regs.sprg0;
1275     env->spr[SPR_SPRG1] = regs.sprg1;
1276     env->spr[SPR_SPRG2] = regs.sprg2;
1277     env->spr[SPR_SPRG3] = regs.sprg3;
1278     env->spr[SPR_SPRG4] = regs.sprg4;
1279     env->spr[SPR_SPRG5] = regs.sprg5;
1280     env->spr[SPR_SPRG6] = regs.sprg6;
1281     env->spr[SPR_SPRG7] = regs.sprg7;
1282
1283     env->spr[SPR_BOOKE_PID] = regs.pid;
1284
1285     for (i = 0;i < 32; i++)
1286         env->gpr[i] = regs.gpr[i];
1287
1288     kvm_get_fp(cs);
1289
1290     if (cap_booke_sregs) {
1291         ret = kvmppc_get_booke_sregs(cpu);
1292         if (ret < 0) {
1293             return ret;
1294         }
1295     }
1296
1297     if (cap_segstate) {
1298         ret = kvmppc_get_books_sregs(cpu);
1299         if (ret < 0) {
1300             return ret;
1301         }
1302     }
1303
1304     if (cap_hior) {
1305         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1306     }
1307
1308     if (cap_one_reg) {
1309         int i;
1310
1311         /* We deliberately ignore errors here, for kernels which have
1312          * the ONE_REG calls, but don't support the specific
1313          * registers, there's a reasonable chance things will still
1314          * work, at least until we try to migrate. */
1315         for (i = 0; i < 1024; i++) {
1316             uint64_t id = env->spr_cb[i].one_reg_id;
1317
1318             if (id != 0) {
1319                 kvm_get_one_spr(cs, id, i);
1320             }
1321         }
1322
1323 #ifdef TARGET_PPC64
1324         if (msr_ts) {
1325             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1326                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1327             }
1328             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1329                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1330             }
1331             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1332             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1333             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1334             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1335             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1336             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1337             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1338             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1339             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1340             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1341         }
1342
1343         if (cap_papr) {
1344             if (kvm_get_vpa(cs) < 0) {
1345                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1346             }
1347         }
1348
1349         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1350 #endif
1351     }
1352
1353     return 0;
1354 }
1355
1356 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1357 {
1358     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1359
1360     if (irq != PPC_INTERRUPT_EXT) {
1361         return 0;
1362     }
1363
1364     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1365         return 0;
1366     }
1367
1368     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1369
1370     return 0;
1371 }
1372
1373 #if defined(TARGET_PPCEMB)
1374 #define PPC_INPUT_INT PPC40x_INPUT_INT
1375 #elif defined(TARGET_PPC64)
1376 #define PPC_INPUT_INT PPC970_INPUT_INT
1377 #else
1378 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1379 #endif
1380
1381 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1382 {
1383     PowerPCCPU *cpu = POWERPC_CPU(cs);
1384     CPUPPCState *env = &cpu->env;
1385     int r;
1386     unsigned irq;
1387
1388     qemu_mutex_lock_iothread();
1389
1390     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1391      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1392     if (!cap_interrupt_level &&
1393         run->ready_for_interrupt_injection &&
1394         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1395         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1396     {
1397         /* For now KVM disregards the 'irq' argument. However, in the
1398          * future KVM could cache it in-kernel to avoid a heavyweight exit
1399          * when reading the UIC.
1400          */
1401         irq = KVM_INTERRUPT_SET;
1402
1403         DPRINTF("injected interrupt %d\n", irq);
1404         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1405         if (r < 0) {
1406             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1407         }
1408
1409         /* Always wake up soon in case the interrupt was level based */
1410         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1411                        (NANOSECONDS_PER_SECOND / 50));
1412     }
1413
1414     /* We don't know if there are more interrupts pending after this. However,
1415      * the guest will return to userspace in the course of handling this one
1416      * anyways, so we will get a chance to deliver the rest. */
1417
1418     qemu_mutex_unlock_iothread();
1419 }
1420
1421 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1422 {
1423     return MEMTXATTRS_UNSPECIFIED;
1424 }
1425
1426 int kvm_arch_process_async_events(CPUState *cs)
1427 {
1428     return cs->halted;
1429 }
1430
1431 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1432 {
1433     CPUState *cs = CPU(cpu);
1434     CPUPPCState *env = &cpu->env;
1435
1436     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1437         cs->halted = 1;
1438         cs->exception_index = EXCP_HLT;
1439     }
1440
1441     return 0;
1442 }
1443
1444 /* map dcr access to existing qemu dcr emulation */
1445 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1446 {
1447     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1448         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1449
1450     return 0;
1451 }
1452
1453 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1454 {
1455     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1456         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1457
1458     return 0;
1459 }
1460
1461 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1462 {
1463     /* Mixed endian case is not handled */
1464     uint32_t sc = debug_inst_opcode;
1465
1466     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1467                             sizeof(sc), 0) ||
1468         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1469         return -EINVAL;
1470     }
1471
1472     return 0;
1473 }
1474
1475 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1476 {
1477     uint32_t sc;
1478
1479     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1480         sc != debug_inst_opcode ||
1481         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1482                             sizeof(sc), 1)) {
1483         return -EINVAL;
1484     }
1485
1486     return 0;
1487 }
1488
1489 static int find_hw_breakpoint(target_ulong addr, int type)
1490 {
1491     int n;
1492
1493     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1494            <= ARRAY_SIZE(hw_debug_points));
1495
1496     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1497         if (hw_debug_points[n].addr == addr &&
1498              hw_debug_points[n].type == type) {
1499             return n;
1500         }
1501     }
1502
1503     return -1;
1504 }
1505
1506 static int find_hw_watchpoint(target_ulong addr, int *flag)
1507 {
1508     int n;
1509
1510     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1511     if (n >= 0) {
1512         *flag = BP_MEM_ACCESS;
1513         return n;
1514     }
1515
1516     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1517     if (n >= 0) {
1518         *flag = BP_MEM_WRITE;
1519         return n;
1520     }
1521
1522     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1523     if (n >= 0) {
1524         *flag = BP_MEM_READ;
1525         return n;
1526     }
1527
1528     return -1;
1529 }
1530
1531 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1532                                   target_ulong len, int type)
1533 {
1534     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1535         return -ENOBUFS;
1536     }
1537
1538     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1539     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1540
1541     switch (type) {
1542     case GDB_BREAKPOINT_HW:
1543         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1544             return -ENOBUFS;
1545         }
1546
1547         if (find_hw_breakpoint(addr, type) >= 0) {
1548             return -EEXIST;
1549         }
1550
1551         nb_hw_breakpoint++;
1552         break;
1553
1554     case GDB_WATCHPOINT_WRITE:
1555     case GDB_WATCHPOINT_READ:
1556     case GDB_WATCHPOINT_ACCESS:
1557         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1558             return -ENOBUFS;
1559         }
1560
1561         if (find_hw_breakpoint(addr, type) >= 0) {
1562             return -EEXIST;
1563         }
1564
1565         nb_hw_watchpoint++;
1566         break;
1567
1568     default:
1569         return -ENOSYS;
1570     }
1571
1572     return 0;
1573 }
1574
1575 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1576                                   target_ulong len, int type)
1577 {
1578     int n;
1579
1580     n = find_hw_breakpoint(addr, type);
1581     if (n < 0) {
1582         return -ENOENT;
1583     }
1584
1585     switch (type) {
1586     case GDB_BREAKPOINT_HW:
1587         nb_hw_breakpoint--;
1588         break;
1589
1590     case GDB_WATCHPOINT_WRITE:
1591     case GDB_WATCHPOINT_READ:
1592     case GDB_WATCHPOINT_ACCESS:
1593         nb_hw_watchpoint--;
1594         break;
1595
1596     default:
1597         return -ENOSYS;
1598     }
1599     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1600
1601     return 0;
1602 }
1603
1604 void kvm_arch_remove_all_hw_breakpoints(void)
1605 {
1606     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1607 }
1608
1609 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1610 {
1611     int n;
1612
1613     /* Software Breakpoint updates */
1614     if (kvm_sw_breakpoints_active(cs)) {
1615         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1616     }
1617
1618     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1619            <= ARRAY_SIZE(hw_debug_points));
1620     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1621
1622     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1623         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1624         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1625         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1626             switch (hw_debug_points[n].type) {
1627             case GDB_BREAKPOINT_HW:
1628                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1629                 break;
1630             case GDB_WATCHPOINT_WRITE:
1631                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1632                 break;
1633             case GDB_WATCHPOINT_READ:
1634                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1635                 break;
1636             case GDB_WATCHPOINT_ACCESS:
1637                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1638                                         KVMPPC_DEBUG_WATCH_READ;
1639                 break;
1640             default:
1641                 cpu_abort(cs, "Unsupported breakpoint type\n");
1642             }
1643             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1644         }
1645     }
1646 }
1647
1648 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1649 {
1650     CPUState *cs = CPU(cpu);
1651     CPUPPCState *env = &cpu->env;
1652     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1653     int handle = 0;
1654     int n;
1655     int flag = 0;
1656
1657     if (cs->singlestep_enabled) {
1658         handle = 1;
1659     } else if (arch_info->status) {
1660         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1661             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1662                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1663                 if (n >= 0) {
1664                     handle = 1;
1665                 }
1666             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1667                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1668                 n = find_hw_watchpoint(arch_info->address,  &flag);
1669                 if (n >= 0) {
1670                     handle = 1;
1671                     cs->watchpoint_hit = &hw_watchpoint;
1672                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1673                     hw_watchpoint.flags = flag;
1674                 }
1675             }
1676         }
1677     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1678         handle = 1;
1679     } else {
1680         /* QEMU is not able to handle debug exception, so inject
1681          * program exception to guest;
1682          * Yes program exception NOT debug exception !!
1683          * When QEMU is using debug resources then debug exception must
1684          * be always set. To achieve this we set MSR_DE and also set
1685          * MSRP_DEP so guest cannot change MSR_DE.
1686          * When emulating debug resource for guest we want guest
1687          * to control MSR_DE (enable/disable debug interrupt on need).
1688          * Supporting both configurations are NOT possible.
1689          * So the result is that we cannot share debug resources
1690          * between QEMU and Guest on BOOKE architecture.
1691          * In the current design QEMU gets the priority over guest,
1692          * this means that if QEMU is using debug resources then guest
1693          * cannot use them;
1694          * For software breakpoint QEMU uses a privileged instruction;
1695          * So there cannot be any reason that we are here for guest
1696          * set debug exception, only possibility is guest executed a
1697          * privileged / illegal instruction and that's why we are
1698          * injecting a program interrupt.
1699          */
1700
1701         cpu_synchronize_state(cs);
1702         /* env->nip is PC, so increment this by 4 to use
1703          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1704          */
1705         env->nip += 4;
1706         cs->exception_index = POWERPC_EXCP_PROGRAM;
1707         env->error_code = POWERPC_EXCP_INVAL;
1708         ppc_cpu_do_interrupt(cs);
1709     }
1710
1711     return handle;
1712 }
1713
1714 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1715 {
1716     PowerPCCPU *cpu = POWERPC_CPU(cs);
1717     CPUPPCState *env = &cpu->env;
1718     int ret;
1719
1720     qemu_mutex_lock_iothread();
1721
1722     switch (run->exit_reason) {
1723     case KVM_EXIT_DCR:
1724         if (run->dcr.is_write) {
1725             DPRINTF("handle dcr write\n");
1726             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1727         } else {
1728             DPRINTF("handle dcr read\n");
1729             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1730         }
1731         break;
1732     case KVM_EXIT_HLT:
1733         DPRINTF("handle halt\n");
1734         ret = kvmppc_handle_halt(cpu);
1735         break;
1736 #if defined(TARGET_PPC64)
1737     case KVM_EXIT_PAPR_HCALL:
1738         DPRINTF("handle PAPR hypercall\n");
1739         run->papr_hcall.ret = spapr_hypercall(cpu,
1740                                               run->papr_hcall.nr,
1741                                               run->papr_hcall.args);
1742         ret = 0;
1743         break;
1744 #endif
1745     case KVM_EXIT_EPR:
1746         DPRINTF("handle epr\n");
1747         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1748         ret = 0;
1749         break;
1750     case KVM_EXIT_WATCHDOG:
1751         DPRINTF("handle watchdog expiry\n");
1752         watchdog_perform_action();
1753         ret = 0;
1754         break;
1755
1756     case KVM_EXIT_DEBUG:
1757         DPRINTF("handle debug exception\n");
1758         if (kvm_handle_debug(cpu, run)) {
1759             ret = EXCP_DEBUG;
1760             break;
1761         }
1762         /* re-enter, this exception was guest-internal */
1763         ret = 0;
1764         break;
1765
1766     default:
1767         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1768         ret = -1;
1769         break;
1770     }
1771
1772     qemu_mutex_unlock_iothread();
1773     return ret;
1774 }
1775
1776 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1777 {
1778     CPUState *cs = CPU(cpu);
1779     uint32_t bits = tsr_bits;
1780     struct kvm_one_reg reg = {
1781         .id = KVM_REG_PPC_OR_TSR,
1782         .addr = (uintptr_t) &bits,
1783     };
1784
1785     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1786 }
1787
1788 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1789 {
1790
1791     CPUState *cs = CPU(cpu);
1792     uint32_t bits = tsr_bits;
1793     struct kvm_one_reg reg = {
1794         .id = KVM_REG_PPC_CLEAR_TSR,
1795         .addr = (uintptr_t) &bits,
1796     };
1797
1798     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1799 }
1800
1801 int kvmppc_set_tcr(PowerPCCPU *cpu)
1802 {
1803     CPUState *cs = CPU(cpu);
1804     CPUPPCState *env = &cpu->env;
1805     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1806
1807     struct kvm_one_reg reg = {
1808         .id = KVM_REG_PPC_TCR,
1809         .addr = (uintptr_t) &tcr,
1810     };
1811
1812     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1813 }
1814
1815 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1816 {
1817     CPUState *cs = CPU(cpu);
1818     int ret;
1819
1820     if (!kvm_enabled()) {
1821         return -1;
1822     }
1823
1824     if (!cap_ppc_watchdog) {
1825         printf("warning: KVM does not support watchdog");
1826         return -1;
1827     }
1828
1829     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1830     if (ret < 0) {
1831         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1832                 __func__, strerror(-ret));
1833         return ret;
1834     }
1835
1836     return ret;
1837 }
1838
1839 static int read_cpuinfo(const char *field, char *value, int len)
1840 {
1841     FILE *f;
1842     int ret = -1;
1843     int field_len = strlen(field);
1844     char line[512];
1845
1846     f = fopen("/proc/cpuinfo", "r");
1847     if (!f) {
1848         return -1;
1849     }
1850
1851     do {
1852         if (!fgets(line, sizeof(line), f)) {
1853             break;
1854         }
1855         if (!strncmp(line, field, field_len)) {
1856             pstrcpy(value, len, line);
1857             ret = 0;
1858             break;
1859         }
1860     } while(*line);
1861
1862     fclose(f);
1863
1864     return ret;
1865 }
1866
1867 uint32_t kvmppc_get_tbfreq(void)
1868 {
1869     char line[512];
1870     char *ns;
1871     uint32_t retval = NANOSECONDS_PER_SECOND;
1872
1873     if (read_cpuinfo("timebase", line, sizeof(line))) {
1874         return retval;
1875     }
1876
1877     if (!(ns = strchr(line, ':'))) {
1878         return retval;
1879     }
1880
1881     ns++;
1882
1883     return atoi(ns);
1884 }
1885
1886 bool kvmppc_get_host_serial(char **value)
1887 {
1888     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1889                                NULL);
1890 }
1891
1892 bool kvmppc_get_host_model(char **value)
1893 {
1894     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1895 }
1896
1897 /* Try to find a device tree node for a CPU with clock-frequency property */
1898 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1899 {
1900     struct dirent *dirp;
1901     DIR *dp;
1902
1903     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1904         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1905         return -1;
1906     }
1907
1908     buf[0] = '\0';
1909     while ((dirp = readdir(dp)) != NULL) {
1910         FILE *f;
1911         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1912                  dirp->d_name);
1913         f = fopen(buf, "r");
1914         if (f) {
1915             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1916             fclose(f);
1917             break;
1918         }
1919         buf[0] = '\0';
1920     }
1921     closedir(dp);
1922     if (buf[0] == '\0') {
1923         printf("Unknown host!\n");
1924         return -1;
1925     }
1926
1927     return 0;
1928 }
1929
1930 static uint64_t kvmppc_read_int_dt(const char *filename)
1931 {
1932     union {
1933         uint32_t v32;
1934         uint64_t v64;
1935     } u;
1936     FILE *f;
1937     int len;
1938
1939     f = fopen(filename, "rb");
1940     if (!f) {
1941         return -1;
1942     }
1943
1944     len = fread(&u, 1, sizeof(u), f);
1945     fclose(f);
1946     switch (len) {
1947     case 4:
1948         /* property is a 32-bit quantity */
1949         return be32_to_cpu(u.v32);
1950     case 8:
1951         return be64_to_cpu(u.v64);
1952     }
1953
1954     return 0;
1955 }
1956
1957 /* Read a CPU node property from the host device tree that's a single
1958  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1959  * (can't find or open the property, or doesn't understand the
1960  * format) */
1961 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1962 {
1963     char buf[PATH_MAX], *tmp;
1964     uint64_t val;
1965
1966     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1967         return -1;
1968     }
1969
1970     tmp = g_strdup_printf("%s/%s", buf, propname);
1971     val = kvmppc_read_int_dt(tmp);
1972     g_free(tmp);
1973
1974     return val;
1975 }
1976
1977 uint64_t kvmppc_get_clockfreq(void)
1978 {
1979     return kvmppc_read_int_cpu_dt("clock-frequency");
1980 }
1981
1982 uint32_t kvmppc_get_vmx(void)
1983 {
1984     return kvmppc_read_int_cpu_dt("ibm,vmx");
1985 }
1986
1987 uint32_t kvmppc_get_dfp(void)
1988 {
1989     return kvmppc_read_int_cpu_dt("ibm,dfp");
1990 }
1991
1992 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1993  {
1994      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1995      CPUState *cs = CPU(cpu);
1996
1997     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1998         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1999         return 0;
2000     }
2001
2002     return 1;
2003 }
2004
2005 int kvmppc_get_hasidle(CPUPPCState *env)
2006 {
2007     struct kvm_ppc_pvinfo pvinfo;
2008
2009     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2010         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2011         return 1;
2012     }
2013
2014     return 0;
2015 }
2016
2017 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2018 {
2019     uint32_t *hc = (uint32_t*)buf;
2020     struct kvm_ppc_pvinfo pvinfo;
2021
2022     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2023         memcpy(buf, pvinfo.hcall, buf_len);
2024         return 0;
2025     }
2026
2027     /*
2028      * Fallback to always fail hypercalls regardless of endianness:
2029      *
2030      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2031      *     li r3, -1
2032      *     b .+8       (becomes nop in wrong endian)
2033      *     bswap32(li r3, -1)
2034      */
2035
2036     hc[0] = cpu_to_be32(0x08000048);
2037     hc[1] = cpu_to_be32(0x3860ffff);
2038     hc[2] = cpu_to_be32(0x48000008);
2039     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2040
2041     return 1;
2042 }
2043
2044 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2045 {
2046     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2047 }
2048
2049 void kvmppc_enable_logical_ci_hcalls(void)
2050 {
2051     /*
2052      * FIXME: it would be nice if we could detect the cases where
2053      * we're using a device which requires the in kernel
2054      * implementation of these hcalls, but the kernel lacks them and
2055      * produce a warning.
2056      */
2057     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2058     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2059 }
2060
2061 void kvmppc_enable_set_mode_hcall(void)
2062 {
2063     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2064 }
2065
2066 void kvmppc_enable_clear_ref_mod_hcalls(void)
2067 {
2068     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2069     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2070 }
2071
2072 void kvmppc_set_papr(PowerPCCPU *cpu)
2073 {
2074     CPUState *cs = CPU(cpu);
2075     int ret;
2076
2077     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2078     if (ret) {
2079         error_report("This vCPU type or KVM version does not support PAPR");
2080         exit(1);
2081     }
2082
2083     /* Update the capability flag so we sync the right information
2084      * with kvm */
2085     cap_papr = 1;
2086 }
2087
2088 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2089 {
2090     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2091 }
2092
2093 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2094 {
2095     CPUState *cs = CPU(cpu);
2096     int ret;
2097
2098     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2099     if (ret && mpic_proxy) {
2100         error_report("This KVM version does not support EPR");
2101         exit(1);
2102     }
2103 }
2104
2105 int kvmppc_smt_threads(void)
2106 {
2107     return cap_ppc_smt ? cap_ppc_smt : 1;
2108 }
2109
2110 #ifdef TARGET_PPC64
2111 off_t kvmppc_alloc_rma(void **rma)
2112 {
2113     off_t size;
2114     int fd;
2115     struct kvm_allocate_rma ret;
2116
2117     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2118      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2119      *                      not necessary on this hardware
2120      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2121      *
2122      * FIXME: We should allow the user to force contiguous RMA
2123      * allocation in the cap_ppc_rma==1 case.
2124      */
2125     if (cap_ppc_rma < 2) {
2126         return 0;
2127     }
2128
2129     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2130     if (fd < 0) {
2131         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2132                 strerror(errno));
2133         return -1;
2134     }
2135
2136     size = MIN(ret.rma_size, 256ul << 20);
2137
2138     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2139     if (*rma == MAP_FAILED) {
2140         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2141         return -1;
2142     };
2143
2144     return size;
2145 }
2146
2147 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2148 {
2149     struct kvm_ppc_smmu_info info;
2150     long rampagesize, best_page_shift;
2151     int i;
2152
2153     if (cap_ppc_rma >= 2) {
2154         return current_size;
2155     }
2156
2157     /* Find the largest hardware supported page size that's less than
2158      * or equal to the (logical) backing page size of guest RAM */
2159     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2160     rampagesize = getrampagesize();
2161     best_page_shift = 0;
2162
2163     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2164         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2165
2166         if (!sps->page_shift) {
2167             continue;
2168         }
2169
2170         if ((sps->page_shift > best_page_shift)
2171             && ((1UL << sps->page_shift) <= rampagesize)) {
2172             best_page_shift = sps->page_shift;
2173         }
2174     }
2175
2176     return MIN(current_size,
2177                1ULL << (best_page_shift + hash_shift - 7));
2178 }
2179 #endif
2180
2181 bool kvmppc_spapr_use_multitce(void)
2182 {
2183     return cap_spapr_multitce;
2184 }
2185
2186 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2187                               bool need_vfio)
2188 {
2189     struct kvm_create_spapr_tce args = {
2190         .liobn = liobn,
2191         .window_size = window_size,
2192     };
2193     long len;
2194     int fd;
2195     void *table;
2196
2197     /* Must set fd to -1 so we don't try to munmap when called for
2198      * destroying the table, which the upper layers -will- do
2199      */
2200     *pfd = -1;
2201     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2202         return NULL;
2203     }
2204
2205     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2206     if (fd < 0) {
2207         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2208                 liobn);
2209         return NULL;
2210     }
2211
2212     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2213     /* FIXME: round this up to page size */
2214
2215     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2216     if (table == MAP_FAILED) {
2217         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2218                 liobn);
2219         close(fd);
2220         return NULL;
2221     }
2222
2223     *pfd = fd;
2224     return table;
2225 }
2226
2227 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2228 {
2229     long len;
2230
2231     if (fd < 0) {
2232         return -1;
2233     }
2234
2235     len = nb_table * sizeof(uint64_t);
2236     if ((munmap(table, len) < 0) ||
2237         (close(fd) < 0)) {
2238         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2239                 strerror(errno));
2240         /* Leak the table */
2241     }
2242
2243     return 0;
2244 }
2245
2246 int kvmppc_reset_htab(int shift_hint)
2247 {
2248     uint32_t shift = shift_hint;
2249
2250     if (!kvm_enabled()) {
2251         /* Full emulation, tell caller to allocate htab itself */
2252         return 0;
2253     }
2254     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2255         int ret;
2256         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2257         if (ret == -ENOTTY) {
2258             /* At least some versions of PR KVM advertise the
2259              * capability, but don't implement the ioctl().  Oops.
2260              * Return 0 so that we allocate the htab in qemu, as is
2261              * correct for PR. */
2262             return 0;
2263         } else if (ret < 0) {
2264             return ret;
2265         }
2266         return shift;
2267     }
2268
2269     /* We have a kernel that predates the htab reset calls.  For PR
2270      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2271      * this era, it has allocated a 16MB fixed size hash table
2272      * already.  Kernels of this era have the GET_PVINFO capability
2273      * only on PR, so we use this hack to determine the right
2274      * answer */
2275     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2276         /* PR - tell caller to allocate htab */
2277         return 0;
2278     } else {
2279         /* HV - assume 16MB kernel allocated htab */
2280         return 24;
2281     }
2282 }
2283
2284 static inline uint32_t mfpvr(void)
2285 {
2286     uint32_t pvr;
2287
2288     asm ("mfpvr %0"
2289          : "=r"(pvr));
2290     return pvr;
2291 }
2292
2293 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2294 {
2295     if (on) {
2296         *word |= flags;
2297     } else {
2298         *word &= ~flags;
2299     }
2300 }
2301
2302 static void kvmppc_host_cpu_initfn(Object *obj)
2303 {
2304     assert(kvm_enabled());
2305 }
2306
2307 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2308 {
2309     DeviceClass *dc = DEVICE_CLASS(oc);
2310     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2311     uint32_t vmx = kvmppc_get_vmx();
2312     uint32_t dfp = kvmppc_get_dfp();
2313     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2314     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2315
2316     /* Now fix up the class with information we can query from the host */
2317     pcc->pvr = mfpvr();
2318
2319     if (vmx != -1) {
2320         /* Only override when we know what the host supports */
2321         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2322         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2323     }
2324     if (dfp != -1) {
2325         /* Only override when we know what the host supports */
2326         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2327     }
2328
2329     if (dcache_size != -1) {
2330         pcc->l1_dcache_size = dcache_size;
2331     }
2332
2333     if (icache_size != -1) {
2334         pcc->l1_icache_size = icache_size;
2335     }
2336
2337     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2338     dc->cannot_destroy_with_object_finalize_yet = true;
2339 }
2340
2341 bool kvmppc_has_cap_epr(void)
2342 {
2343     return cap_epr;
2344 }
2345
2346 bool kvmppc_has_cap_htab_fd(void)
2347 {
2348     return cap_htab_fd;
2349 }
2350
2351 bool kvmppc_has_cap_fixup_hcalls(void)
2352 {
2353     return cap_fixup_hcalls;
2354 }
2355
2356 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2357 {
2358     ObjectClass *oc = OBJECT_CLASS(pcc);
2359
2360     while (oc && !object_class_is_abstract(oc)) {
2361         oc = object_class_get_parent(oc);
2362     }
2363     assert(oc);
2364
2365     return POWERPC_CPU_CLASS(oc);
2366 }
2367
2368 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2369 {
2370     uint32_t host_pvr = mfpvr();
2371     PowerPCCPUClass *pvr_pcc;
2372
2373     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2374     if (pvr_pcc == NULL) {
2375         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2376     }
2377
2378     return pvr_pcc;
2379 }
2380
2381 static int kvm_ppc_register_host_cpu_type(void)
2382 {
2383     TypeInfo type_info = {
2384         .name = TYPE_HOST_POWERPC_CPU,
2385         .instance_init = kvmppc_host_cpu_initfn,
2386         .class_init = kvmppc_host_cpu_class_init,
2387     };
2388     PowerPCCPUClass *pvr_pcc;
2389     DeviceClass *dc;
2390
2391     pvr_pcc = kvm_ppc_get_host_cpu_class();
2392     if (pvr_pcc == NULL) {
2393         return -1;
2394     }
2395     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2396     type_register(&type_info);
2397
2398     /* Register generic family CPU class for a family */
2399     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2400     dc = DEVICE_CLASS(pvr_pcc);
2401     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2402     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2403     type_register(&type_info);
2404
2405 #if defined(TARGET_PPC64)
2406     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2407     type_info.parent = TYPE_SPAPR_CPU_CORE,
2408     type_info.instance_size = sizeof(sPAPRCPUCore);
2409     type_info.instance_init = NULL;
2410     type_info.class_init = spapr_cpu_core_class_init;
2411     type_info.class_data = (void *) "host";
2412     type_register(&type_info);
2413     g_free((void *)type_info.name);
2414
2415     /* Register generic spapr CPU family class for current host CPU type */
2416     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2417     type_info.class_data = (void *) dc->desc;
2418     type_register(&type_info);
2419     g_free((void *)type_info.name);
2420 #endif
2421
2422     return 0;
2423 }
2424
2425 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2426 {
2427     struct kvm_rtas_token_args args = {
2428         .token = token,
2429     };
2430
2431     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2432         return -ENOENT;
2433     }
2434
2435     strncpy(args.name, function, sizeof(args.name));
2436
2437     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2438 }
2439
2440 int kvmppc_get_htab_fd(bool write)
2441 {
2442     struct kvm_get_htab_fd s = {
2443         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2444         .start_index = 0,
2445     };
2446
2447     if (!cap_htab_fd) {
2448         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2449         return -1;
2450     }
2451
2452     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2453 }
2454
2455 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2456 {
2457     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2458     uint8_t buf[bufsize];
2459     ssize_t rc;
2460
2461     do {
2462         rc = read(fd, buf, bufsize);
2463         if (rc < 0) {
2464             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2465                     strerror(errno));
2466             return rc;
2467         } else if (rc) {
2468             uint8_t *buffer = buf;
2469             ssize_t n = rc;
2470             while (n) {
2471                 struct kvm_get_htab_header *head =
2472                     (struct kvm_get_htab_header *) buffer;
2473                 size_t chunksize = sizeof(*head) +
2474                      HASH_PTE_SIZE_64 * head->n_valid;
2475
2476                 qemu_put_be32(f, head->index);
2477                 qemu_put_be16(f, head->n_valid);
2478                 qemu_put_be16(f, head->n_invalid);
2479                 qemu_put_buffer(f, (void *)(head + 1),
2480                                 HASH_PTE_SIZE_64 * head->n_valid);
2481
2482                 buffer += chunksize;
2483                 n -= chunksize;
2484             }
2485         }
2486     } while ((rc != 0)
2487              && ((max_ns < 0)
2488                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2489
2490     return (rc == 0) ? 1 : 0;
2491 }
2492
2493 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2494                            uint16_t n_valid, uint16_t n_invalid)
2495 {
2496     struct kvm_get_htab_header *buf;
2497     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2498     ssize_t rc;
2499
2500     buf = alloca(chunksize);
2501     buf->index = index;
2502     buf->n_valid = n_valid;
2503     buf->n_invalid = n_invalid;
2504
2505     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2506
2507     rc = write(fd, buf, chunksize);
2508     if (rc < 0) {
2509         fprintf(stderr, "Error writing KVM hash table: %s\n",
2510                 strerror(errno));
2511         return rc;
2512     }
2513     if (rc != chunksize) {
2514         /* We should never get a short write on a single chunk */
2515         fprintf(stderr, "Short write, restoring KVM hash table\n");
2516         return -1;
2517     }
2518     return 0;
2519 }
2520
2521 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2522 {
2523     return true;
2524 }
2525
2526 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2527 {
2528     return 1;
2529 }
2530
2531 int kvm_arch_on_sigbus(int code, void *addr)
2532 {
2533     return 1;
2534 }
2535
2536 void kvm_arch_init_irq_routing(KVMState *s)
2537 {
2538 }
2539
2540 struct kvm_get_htab_buf {
2541     struct kvm_get_htab_header header;
2542     /*
2543      * We require one extra byte for read
2544      */
2545     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2546 };
2547
2548 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2549 {
2550     int htab_fd;
2551     struct kvm_get_htab_fd ghf;
2552     struct kvm_get_htab_buf  *hpte_buf;
2553
2554     ghf.flags = 0;
2555     ghf.start_index = pte_index;
2556     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2557     if (htab_fd < 0) {
2558         goto error_out;
2559     }
2560
2561     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2562     /*
2563      * Read the hpte group
2564      */
2565     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2566         goto out_close;
2567     }
2568
2569     close(htab_fd);
2570     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2571
2572 out_close:
2573     g_free(hpte_buf);
2574     close(htab_fd);
2575 error_out:
2576     return 0;
2577 }
2578
2579 void kvmppc_hash64_free_pteg(uint64_t token)
2580 {
2581     struct kvm_get_htab_buf *htab_buf;
2582
2583     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2584                             hpte);
2585     g_free(htab_buf);
2586     return;
2587 }
2588
2589 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2590                              target_ulong pte0, target_ulong pte1)
2591 {
2592     int htab_fd;
2593     struct kvm_get_htab_fd ghf;
2594     struct kvm_get_htab_buf hpte_buf;
2595
2596     ghf.flags = 0;
2597     ghf.start_index = 0;     /* Ignored */
2598     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2599     if (htab_fd < 0) {
2600         goto error_out;
2601     }
2602
2603     hpte_buf.header.n_valid = 1;
2604     hpte_buf.header.n_invalid = 0;
2605     hpte_buf.header.index = pte_index;
2606     hpte_buf.hpte[0] = pte0;
2607     hpte_buf.hpte[1] = pte1;
2608     /*
2609      * Write the hpte entry.
2610      * CAUTION: write() has the warn_unused_result attribute. Hence we
2611      * need to check the return value, even though we do nothing.
2612      */
2613     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2614         goto out_close;
2615     }
2616
2617 out_close:
2618     close(htab_fd);
2619     return;
2620
2621 error_out:
2622     return;
2623 }
2624
2625 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2626                              uint64_t address, uint32_t data, PCIDevice *dev)
2627 {
2628     return 0;
2629 }
2630
2631 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2632                                 int vector, PCIDevice *dev)
2633 {
2634     return 0;
2635 }
2636
2637 int kvm_arch_release_virq_post(int virq)
2638 {
2639     return 0;
2640 }
2641
2642 int kvm_arch_msi_data_to_gsi(uint32_t data)
2643 {
2644     return data & 0xffff;
2645 }
2646
2647 int kvmppc_enable_hwrng(void)
2648 {
2649     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2650         return -1;
2651     }
2652
2653     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2654 }