target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/kvm.h"
  30 #include "sysemu/numa.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/spapr_cpu_core.h"
  40 #include "hw/ppc/ppc.h"
  41 #include "sysemu/watchdog.h"
  42 #include "trace.h"
  43 #include "exec/gdbstub.h"
  44 #include "exec/memattrs.h"
  45 #include "sysemu/hostmem.h"
  46 #include "qemu/cutils.h"
  47 #if defined(TARGET_PPC64)
  48 #include "hw/ppc/spapr_cpu_core.h"
  49 #endif
  50
  51 //#define DEBUG_KVM
  52
  53 #ifdef DEBUG_KVM
  54 #define DPRINTF(fmt, ...) \
  55     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  56 #else
  57 #define DPRINTF(fmt, ...) \
  58     do { } while (0)
  59 #endif
  60
  61 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  62
  63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  64     KVM_CAP_LAST_INFO
  65 };
  66
  67 static int cap_interrupt_unset = false;
  68 static int cap_interrupt_level = false;
  69 static int cap_segstate;
  70 static int cap_booke_sregs;
  71 static int cap_ppc_smt;
  72 static int cap_ppc_rma;
  73 static int cap_spapr_tce;
  74 static int cap_spapr_multitce;
  75 static int cap_spapr_vfio;
  76 static int cap_hior;
  77 static int cap_one_reg;
  78 static int cap_epr;
  79 static int cap_ppc_watchdog;
  80 static int cap_papr;
  81 static int cap_htab_fd;
  82 static int cap_fixup_hcalls;
  83 static int cap_htm;             /* Hardware transactional memory support */
  84
  85 static uint32_t debug_inst_opcode;
  86
  87 /* XXX We have a race condition where we actually have a level triggered
  88  *     interrupt, but the infrastructure can't expose that yet, so the guest
  89  *     takes but ignores it, goes to sleep and never gets notified that there's
  90  *     still an interrupt pending.
  91  *
  92  *     As a quick workaround, let's just wake up again 20 ms after we injected
  93  *     an interrupt. That way we can assure that we're always reinjecting
  94  *     interrupts in case the guest swallowed them.
  95  */
  96 static QEMUTimer *idle_timer;
  97
  98 static void kvm_kick_cpu(void *opaque)
  99 {
 100     PowerPCCPU *cpu = opaque;
 101
 102     qemu_cpu_kick(CPU(cpu));
 103 }
 104
 105 static int kvm_ppc_register_host_cpu_type(void);
 106
 107 int kvm_arch_init(MachineState *ms, KVMState *s)
 108 {
 109     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 110     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 111     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 112     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 113     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 114     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 115     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 116     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 117     cap_spapr_vfio = false;
 118     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 119     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 120     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 121     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 122     /* Note: we don't set cap_papr here, because this capability is
 123      * only activated after this by kvmppc_set_papr() */
 124     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 125     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 126     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 127
 128     if (!cap_interrupt_level) {
 129         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 130                         "VM to stall at times!\n");
 131     }
 132
 133     kvm_ppc_register_host_cpu_type();
 134
 135     return 0;
 136 }
 137
 138 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 139 {
 140     CPUPPCState *cenv = &cpu->env;
 141     CPUState *cs = CPU(cpu);
 142     struct kvm_sregs sregs;
 143     int ret;
 144
 145     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 146         /* What we're really trying to say is "if we're on BookE, we use
 147            the native PVR for now". This is the only sane way to check
 148            it though, so we potentially confuse users that they can run
 149            BookE guests on BookS. Let's hope nobody dares enough :) */
 150         return 0;
 151     } else {
 152         if (!cap_segstate) {
 153             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 154             return -ENOSYS;
 155         }
 156     }
 157
 158     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 159     if (ret) {
 160         return ret;
 161     }
 162
 163     sregs.pvr = cenv->spr[SPR_PVR];
 164     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 165 }
 166
 167 /* Set up a shared TLB array with KVM */
 168 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 169 {
 170     CPUPPCState *env = &cpu->env;
 171     CPUState *cs = CPU(cpu);
 172     struct kvm_book3e_206_tlb_params params = {};
 173     struct kvm_config_tlb cfg = {};
 174     unsigned int entries = 0;
 175     int ret, i;
 176
 177     if (!kvm_enabled() ||
 178         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 179         return 0;
 180     }
 181
 182     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 183
 184     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 185         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 186         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 187         entries += params.tlb_sizes[i];
 188     }
 189
 190     assert(entries == env->nb_tlb);
 191     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 192
 193     env->tlb_dirty = true;
 194
 195     cfg.array = (uintptr_t)env->tlb.tlbm;
 196     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 197     cfg.params = (uintptr_t)&params;
 198     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 199
 200     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 201     if (ret < 0) {
 202         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 203                 __func__, strerror(-ret));
 204         return ret;
 205     }
 206
 207     env->kvm_sw_tlb = true;
 208     return 0;
 209 }
 210
 211
 212 #if defined(TARGET_PPC64)
 213 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 214                                        struct kvm_ppc_smmu_info *info)
 215 {
 216     CPUPPCState *env = &cpu->env;
 217     CPUState *cs = CPU(cpu);
 218
 219     memset(info, 0, sizeof(*info));
 220
 221     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 222      * need to "guess" what the supported page sizes are.
 223      *
 224      * For that to work we make a few assumptions:
 225      *
 226      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 227      *   KVM which only supports 4K and 16M pages, but supports them
 228      *   regardless of the backing store characteritics. We also don't
 229      *   support 1T segments.
 230      *
 231      *   This is safe as if HV KVM ever supports that capability or PR
 232      *   KVM grows supports for more page/segment sizes, those versions
 233      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 234      *   will not hit this fallback
 235      *
 236      * - Else we are running HV KVM. This means we only support page
 237      *   sizes that fit in the backing store. Additionally we only
 238      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 239      *   P7 encodings for the SLB and hash table. Here too, we assume
 240      *   support for any newer processor will mean a kernel that
 241      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 242      *   this fallback.
 243      */
 244     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 245         /* No flags */
 246         info->flags = 0;
 247         info->slb_size = 64;
 248
 249         /* Standard 4k base page size segment */
 250         info->sps[0].page_shift = 12;
 251         info->sps[0].slb_enc = 0;
 252         info->sps[0].enc[0].page_shift = 12;
 253         info->sps[0].enc[0].pte_enc = 0;
 254
 255         /* Standard 16M large page size segment */
 256         info->sps[1].page_shift = 24;
 257         info->sps[1].slb_enc = SLB_VSID_L;
 258         info->sps[1].enc[0].page_shift = 24;
 259         info->sps[1].enc[0].pte_enc = 0;
 260     } else {
 261         int i = 0;
 262
 263         /* HV KVM has backing store size restrictions */
 264         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 265
 266         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 267             info->flags |= KVM_PPC_1T_SEGMENTS;
 268         }
 269
 270         if (env->mmu_model == POWERPC_MMU_2_06 ||
 271             env->mmu_model == POWERPC_MMU_2_07) {
 272             info->slb_size = 32;
 273         } else {
 274             info->slb_size = 64;
 275         }
 276
 277         /* Standard 4k base page size segment */
 278         info->sps[i].page_shift = 12;
 279         info->sps[i].slb_enc = 0;
 280         info->sps[i].enc[0].page_shift = 12;
 281         info->sps[i].enc[0].pte_enc = 0;
 282         i++;
 283
 284         /* 64K on MMU 2.06 and later */
 285         if (env->mmu_model == POWERPC_MMU_2_06 ||
 286             env->mmu_model == POWERPC_MMU_2_07) {
 287             info->sps[i].page_shift = 16;
 288             info->sps[i].slb_enc = 0x110;
 289             info->sps[i].enc[0].page_shift = 16;
 290             info->sps[i].enc[0].pte_enc = 1;
 291             i++;
 292         }
 293
 294         /* Standard 16M large page size segment */
 295         info->sps[i].page_shift = 24;
 296         info->sps[i].slb_enc = SLB_VSID_L;
 297         info->sps[i].enc[0].page_shift = 24;
 298         info->sps[i].enc[0].pte_enc = 0;
 299     }
 300 }
 301
 302 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 303 {
 304     CPUState *cs = CPU(cpu);
 305     int ret;
 306
 307     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 308         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 309         if (ret == 0) {
 310             return;
 311         }
 312     }
 313
 314     kvm_get_fallback_smmu_info(cpu, info);
 315 }
 316
 317 static long gethugepagesize(const char *mem_path)
 318 {
 319     struct statfs fs;
 320     int ret;
 321
 322     do {
 323         ret = statfs(mem_path, &fs);
 324     } while (ret != 0 && errno == EINTR);
 325
 326     if (ret != 0) {
 327         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 328                 strerror(errno));
 329         exit(1);
 330     }
 331
 332 #define HUGETLBFS_MAGIC       0x958458f6
 333
 334     if (fs.f_type != HUGETLBFS_MAGIC) {
 335         /* Explicit mempath, but it's ordinary pages */
 336         return getpagesize();
 337     }
 338
 339     /* It's hugepage, return the huge page size */
 340     return fs.f_bsize;
 341 }
 342
 343 /*
 344  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 345  * may or may not name the same files / on the same filesystem now as
 346  * when we actually open and map them.  Iterate over the file
 347  * descriptors instead, and use qemu_fd_getpagesize().
 348  */
 349 static int find_max_supported_pagesize(Object *obj, void *opaque)
 350 {
 351     char *mem_path;
 352     long *hpsize_min = opaque;
 353
 354     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 355         mem_path = object_property_get_str(obj, "mem-path", NULL);
 356         if (mem_path) {
 357             long hpsize = gethugepagesize(mem_path);
 358             if (hpsize < *hpsize_min) {
 359                 *hpsize_min = hpsize;
 360             }
 361         } else {
 362             *hpsize_min = getpagesize();
 363         }
 364     }
 365
 366     return 0;
 367 }
 368
 369 static long getrampagesize(void)
 370 {
 371     long hpsize = LONG_MAX;
 372     long mainrampagesize;
 373     Object *memdev_root;
 374
 375     if (mem_path) {
 376         mainrampagesize = gethugepagesize(mem_path);
 377     } else {
 378         mainrampagesize = getpagesize();
 379     }
 380
 381     /* it's possible we have memory-backend objects with
 382      * hugepage-backed RAM. these may get mapped into system
 383      * address space via -numa parameters or memory hotplug
 384      * hooks. we want to take these into account, but we
 385      * also want to make sure these supported hugepage
 386      * sizes are applicable across the entire range of memory
 387      * we may boot from, so we take the min across all
 388      * backends, and assume normal pages in cases where a
 389      * backend isn't backed by hugepages.
 390      */
 391     memdev_root = object_resolve_path("/objects", NULL);
 392     if (memdev_root) {
 393         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 394     }
 395     if (hpsize == LONG_MAX) {
 396         /* No additional memory regions found ==> Report main RAM page size */
 397         return mainrampagesize;
 398     }
 399
 400     /* If NUMA is disabled or the NUMA nodes are not backed with a
 401      * memory-backend, then there is at least one node using "normal" RAM,
 402      * so if its page size is smaller we have got to report that size instead.
 403      */
 404     if (hpsize > mainrampagesize &&
 405         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
 406         static bool warned;
 407         if (!warned) {
 408             error_report("Huge page support disabled (n/a for main memory).");
 409             warned = true;
 410         }
 411         return mainrampagesize;
 412     }
 413
 414     return hpsize;
 415 }
 416
 417 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 418 {
 419     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 420         return true;
 421     }
 422
 423     return (1ul << shift) <= rampgsize;
 424 }
 425
 426 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 427 {
 428     static struct kvm_ppc_smmu_info smmu_info;
 429     static bool has_smmu_info;
 430     CPUPPCState *env = &cpu->env;
 431     long rampagesize;
 432     int iq, ik, jq, jk;
 433     bool has_64k_pages = false;
 434
 435     /* We only handle page sizes for 64-bit server guests for now */
 436     if (!(env->mmu_model & POWERPC_MMU_64)) {
 437         return;
 438     }
 439
 440     /* Collect MMU info from kernel if not already */
 441     if (!has_smmu_info) {
 442         kvm_get_smmu_info(cpu, &smmu_info);
 443         has_smmu_info = true;
 444     }
 445
 446     rampagesize = getrampagesize();
 447
 448     /* Convert to QEMU form */
 449     memset(&env->sps, 0, sizeof(env->sps));
 450
 451     /* If we have HV KVM, we need to forbid CI large pages if our
 452      * host page size is smaller than 64K.
 453      */
 454     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 455         env->ci_large_pages = getpagesize() >= 0x10000;
 456     }
 457
 458     /*
 459      * XXX This loop should be an entry wide AND of the capabilities that
 460      *     the selected CPU has with the capabilities that KVM supports.
 461      */
 462     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 463         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 464         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 465
 466         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 467                                  ksps->page_shift)) {
 468             continue;
 469         }
 470         qsps->page_shift = ksps->page_shift;
 471         qsps->slb_enc = ksps->slb_enc;
 472         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 473             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 474                                      ksps->enc[jk].page_shift)) {
 475                 continue;
 476             }
 477             if (ksps->enc[jk].page_shift == 16) {
 478                 has_64k_pages = true;
 479             }
 480             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 481             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 482             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 483                 break;
 484             }
 485         }
 486         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 487             break;
 488         }
 489     }
 490     env->slb_nr = smmu_info.slb_size;
 491     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 492         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 493     }
 494     if (!has_64k_pages) {
 495         env->mmu_model &= ~POWERPC_MMU_64K;
 496     }
 497 }
 498 #else /* defined (TARGET_PPC64) */
 499
 500 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 501 {
 502 }
 503
 504 #endif /* !defined (TARGET_PPC64) */
 505
 506 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 507 {
 508     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 509 }
 510
 511 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 512  * book3s supports only 1 watchpoint, so array size
 513  * of 4 is sufficient for now.
 514  */
 515 #define MAX_HW_BKPTS 4
 516
 517 static struct HWBreakpoint {
 518     target_ulong addr;
 519     int type;
 520 } hw_debug_points[MAX_HW_BKPTS];
 521
 522 static CPUWatchpoint hw_watchpoint;
 523
 524 /* Default there is no breakpoint and watchpoint supported */
 525 static int max_hw_breakpoint;
 526 static int max_hw_watchpoint;
 527 static int nb_hw_breakpoint;
 528 static int nb_hw_watchpoint;
 529
 530 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 531 {
 532     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 533         max_hw_breakpoint = 2;
 534         max_hw_watchpoint = 2;
 535     }
 536
 537     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 538         fprintf(stderr, "Error initializing h/w breakpoints\n");
 539         return;
 540     }
 541 }
 542
 543 int kvm_arch_init_vcpu(CPUState *cs)
 544 {
 545     PowerPCCPU *cpu = POWERPC_CPU(cs);
 546     CPUPPCState *cenv = &cpu->env;
 547     int ret;
 548
 549     /* Gather server mmu info from KVM and update the CPU state */
 550     kvm_fixup_page_sizes(cpu);
 551
 552     /* Synchronize sregs with kvm */
 553     ret = kvm_arch_sync_sregs(cpu);
 554     if (ret) {
 555         if (ret == -EINVAL) {
 556             error_report("Register sync failed... If you're using kvm-hv.ko,"
 557                          " only \"-cpu host\" is possible");
 558         }
 559         return ret;
 560     }
 561
 562     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 563
 564     /* Some targets support access to KVM's guest TLB. */
 565     switch (cenv->mmu_model) {
 566     case POWERPC_MMU_BOOKE206:
 567         ret = kvm_booke206_tlb_init(cpu);
 568         break;
 569     default:
 570         break;
 571     }
 572
 573     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 574     kvmppc_hw_debug_points_init(cenv);
 575
 576     return ret;
 577 }
 578
 579 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 580 {
 581     CPUPPCState *env = &cpu->env;
 582     CPUState *cs = CPU(cpu);
 583     struct kvm_dirty_tlb dirty_tlb;
 584     unsigned char *bitmap;
 585     int ret;
 586
 587     if (!env->kvm_sw_tlb) {
 588         return;
 589     }
 590
 591     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 592     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 593
 594     dirty_tlb.bitmap = (uintptr_t)bitmap;
 595     dirty_tlb.num_dirty = env->nb_tlb;
 596
 597     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 598     if (ret) {
 599         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 600                 __func__, strerror(-ret));
 601     }
 602
 603     g_free(bitmap);
 604 }
 605
 606 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 607 {
 608     PowerPCCPU *cpu = POWERPC_CPU(cs);
 609     CPUPPCState *env = &cpu->env;
 610     union {
 611         uint32_t u32;
 612         uint64_t u64;
 613     } val;
 614     struct kvm_one_reg reg = {
 615         .id = id,
 616         .addr = (uintptr_t) &val,
 617     };
 618     int ret;
 619
 620     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 621     if (ret != 0) {
 622         trace_kvm_failed_spr_get(spr, strerror(errno));
 623     } else {
 624         switch (id & KVM_REG_SIZE_MASK) {
 625         case KVM_REG_SIZE_U32:
 626             env->spr[spr] = val.u32;
 627             break;
 628
 629         case KVM_REG_SIZE_U64:
 630             env->spr[spr] = val.u64;
 631             break;
 632
 633         default:
 634             /* Don't handle this size yet */
 635             abort();
 636         }
 637     }
 638 }
 639
 640 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 641 {
 642     PowerPCCPU *cpu = POWERPC_CPU(cs);
 643     CPUPPCState *env = &cpu->env;
 644     union {
 645         uint32_t u32;
 646         uint64_t u64;
 647     } val;
 648     struct kvm_one_reg reg = {
 649         .id = id,
 650         .addr = (uintptr_t) &val,
 651     };
 652     int ret;
 653
 654     switch (id & KVM_REG_SIZE_MASK) {
 655     case KVM_REG_SIZE_U32:
 656         val.u32 = env->spr[spr];
 657         break;
 658
 659     case KVM_REG_SIZE_U64:
 660         val.u64 = env->spr[spr];
 661         break;
 662
 663     default:
 664         /* Don't handle this size yet */
 665         abort();
 666     }
 667
 668     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 669     if (ret != 0) {
 670         trace_kvm_failed_spr_set(spr, strerror(errno));
 671     }
 672 }
 673
 674 static int kvm_put_fp(CPUState *cs)
 675 {
 676     PowerPCCPU *cpu = POWERPC_CPU(cs);
 677     CPUPPCState *env = &cpu->env;
 678     struct kvm_one_reg reg;
 679     int i;
 680     int ret;
 681
 682     if (env->insns_flags & PPC_FLOAT) {
 683         uint64_t fpscr = env->fpscr;
 684         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 685
 686         reg.id = KVM_REG_PPC_FPSCR;
 687         reg.addr = (uintptr_t)&fpscr;
 688         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 689         if (ret < 0) {
 690             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 691             return ret;
 692         }
 693
 694         for (i = 0; i < 32; i++) {
 695             uint64_t vsr[2];
 696
 697 #ifdef HOST_WORDS_BIGENDIAN
 698             vsr[0] = float64_val(env->fpr[i]);
 699             vsr[1] = env->vsr[i];
 700 #else
 701             vsr[0] = env->vsr[i];
 702             vsr[1] = float64_val(env->fpr[i]);
 703 #endif
 704             reg.addr = (uintptr_t) &vsr;
 705             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 706
 707             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 708             if (ret < 0) {
 709                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 710                         i, strerror(errno));
 711                 return ret;
 712             }
 713         }
 714     }
 715
 716     if (env->insns_flags & PPC_ALTIVEC) {
 717         reg.id = KVM_REG_PPC_VSCR;
 718         reg.addr = (uintptr_t)&env->vscr;
 719         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 720         if (ret < 0) {
 721             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 722             return ret;
 723         }
 724
 725         for (i = 0; i < 32; i++) {
 726             reg.id = KVM_REG_PPC_VR(i);
 727             reg.addr = (uintptr_t)&env->avr[i];
 728             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 729             if (ret < 0) {
 730                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 731                 return ret;
 732             }
 733         }
 734     }
 735
 736     return 0;
 737 }
 738
 739 static int kvm_get_fp(CPUState *cs)
 740 {
 741     PowerPCCPU *cpu = POWERPC_CPU(cs);
 742     CPUPPCState *env = &cpu->env;
 743     struct kvm_one_reg reg;
 744     int i;
 745     int ret;
 746
 747     if (env->insns_flags & PPC_FLOAT) {
 748         uint64_t fpscr;
 749         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 750
 751         reg.id = KVM_REG_PPC_FPSCR;
 752         reg.addr = (uintptr_t)&fpscr;
 753         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 754         if (ret < 0) {
 755             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 756             return ret;
 757         } else {
 758             env->fpscr = fpscr;
 759         }
 760
 761         for (i = 0; i < 32; i++) {
 762             uint64_t vsr[2];
 763
 764             reg.addr = (uintptr_t) &vsr;
 765             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 766
 767             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 768             if (ret < 0) {
 769                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 770                         vsx ? "VSR" : "FPR", i, strerror(errno));
 771                 return ret;
 772             } else {
 773 #ifdef HOST_WORDS_BIGENDIAN
 774                 env->fpr[i] = vsr[0];
 775                 if (vsx) {
 776                     env->vsr[i] = vsr[1];
 777                 }
 778 #else
 779                 env->fpr[i] = vsr[1];
 780                 if (vsx) {
 781                     env->vsr[i] = vsr[0];
 782                 }
 783 #endif
 784             }
 785         }
 786     }
 787
 788     if (env->insns_flags & PPC_ALTIVEC) {
 789         reg.id = KVM_REG_PPC_VSCR;
 790         reg.addr = (uintptr_t)&env->vscr;
 791         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 792         if (ret < 0) {
 793             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 794             return ret;
 795         }
 796
 797         for (i = 0; i < 32; i++) {
 798             reg.id = KVM_REG_PPC_VR(i);
 799             reg.addr = (uintptr_t)&env->avr[i];
 800             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 801             if (ret < 0) {
 802                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 803                         i, strerror(errno));
 804                 return ret;
 805             }
 806         }
 807     }
 808
 809     return 0;
 810 }
 811
 812 #if defined(TARGET_PPC64)
 813 static int kvm_get_vpa(CPUState *cs)
 814 {
 815     PowerPCCPU *cpu = POWERPC_CPU(cs);
 816     CPUPPCState *env = &cpu->env;
 817     struct kvm_one_reg reg;
 818     int ret;
 819
 820     reg.id = KVM_REG_PPC_VPA_ADDR;
 821     reg.addr = (uintptr_t)&env->vpa_addr;
 822     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 823     if (ret < 0) {
 824         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 825         return ret;
 826     }
 827
 828     assert((uintptr_t)&env->slb_shadow_size
 829            == ((uintptr_t)&env->slb_shadow_addr + 8));
 830     reg.id = KVM_REG_PPC_VPA_SLB;
 831     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 832     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 833     if (ret < 0) {
 834         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 835                 strerror(errno));
 836         return ret;
 837     }
 838
 839     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 840     reg.id = KVM_REG_PPC_VPA_DTL;
 841     reg.addr = (uintptr_t)&env->dtl_addr;
 842     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 843     if (ret < 0) {
 844         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 845                 strerror(errno));
 846         return ret;
 847     }
 848
 849     return 0;
 850 }
 851
 852 static int kvm_put_vpa(CPUState *cs)
 853 {
 854     PowerPCCPU *cpu = POWERPC_CPU(cs);
 855     CPUPPCState *env = &cpu->env;
 856     struct kvm_one_reg reg;
 857     int ret;
 858
 859     /* SLB shadow or DTL can't be registered unless a master VPA is
 860      * registered.  That means when restoring state, if a VPA *is*
 861      * registered, we need to set that up first.  If not, we need to
 862      * deregister the others before deregistering the master VPA */
 863     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 864
 865     if (env->vpa_addr) {
 866         reg.id = KVM_REG_PPC_VPA_ADDR;
 867         reg.addr = (uintptr_t)&env->vpa_addr;
 868         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 869         if (ret < 0) {
 870             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 871             return ret;
 872         }
 873     }
 874
 875     assert((uintptr_t)&env->slb_shadow_size
 876            == ((uintptr_t)&env->slb_shadow_addr + 8));
 877     reg.id = KVM_REG_PPC_VPA_SLB;
 878     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 879     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 880     if (ret < 0) {
 881         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 882         return ret;
 883     }
 884
 885     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 886     reg.id = KVM_REG_PPC_VPA_DTL;
 887     reg.addr = (uintptr_t)&env->dtl_addr;
 888     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 889     if (ret < 0) {
 890         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 891                 strerror(errno));
 892         return ret;
 893     }
 894
 895     if (!env->vpa_addr) {
 896         reg.id = KVM_REG_PPC_VPA_ADDR;
 897         reg.addr = (uintptr_t)&env->vpa_addr;
 898         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 899         if (ret < 0) {
 900             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 901             return ret;
 902         }
 903     }
 904
 905     return 0;
 906 }
 907 #endif /* TARGET_PPC64 */
 908
 909 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 910 {
 911     CPUPPCState *env = &cpu->env;
 912     struct kvm_sregs sregs;
 913     int i;
 914
 915     sregs.pvr = env->spr[SPR_PVR];
 916
 917     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 918
 919     /* Sync SLB */
 920 #ifdef TARGET_PPC64
 921     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 922         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 923         if (env->slb[i].esid & SLB_ESID_V) {
 924             sregs.u.s.ppc64.slb[i].slbe |= i;
 925         }
 926         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 927     }
 928 #endif
 929
 930     /* Sync SRs */
 931     for (i = 0; i < 16; i++) {
 932         sregs.u.s.ppc32.sr[i] = env->sr[i];
 933     }
 934
 935     /* Sync BATs */
 936     for (i = 0; i < 8; i++) {
 937         /* Beware. We have to swap upper and lower bits here */
 938         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 939             | env->DBAT[1][i];
 940         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 941             | env->IBAT[1][i];
 942     }
 943
 944     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 945 }
 946
 947 int kvm_arch_put_registers(CPUState *cs, int level)
 948 {
 949     PowerPCCPU *cpu = POWERPC_CPU(cs);
 950     CPUPPCState *env = &cpu->env;
 951     struct kvm_regs regs;
 952     int ret;
 953     int i;
 954
 955     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 956     if (ret < 0) {
 957         return ret;
 958     }
 959
 960     regs.ctr = env->ctr;
 961     regs.lr  = env->lr;
 962     regs.xer = cpu_read_xer(env);
 963     regs.msr = env->msr;
 964     regs.pc = env->nip;
 965
 966     regs.srr0 = env->spr[SPR_SRR0];
 967     regs.srr1 = env->spr[SPR_SRR1];
 968
 969     regs.sprg0 = env->spr[SPR_SPRG0];
 970     regs.sprg1 = env->spr[SPR_SPRG1];
 971     regs.sprg2 = env->spr[SPR_SPRG2];
 972     regs.sprg3 = env->spr[SPR_SPRG3];
 973     regs.sprg4 = env->spr[SPR_SPRG4];
 974     regs.sprg5 = env->spr[SPR_SPRG5];
 975     regs.sprg6 = env->spr[SPR_SPRG6];
 976     regs.sprg7 = env->spr[SPR_SPRG7];
 977
 978     regs.pid = env->spr[SPR_BOOKE_PID];
 979
 980     for (i = 0;i < 32; i++)
 981         regs.gpr[i] = env->gpr[i];
 982
 983     regs.cr = 0;
 984     for (i = 0; i < 8; i++) {
 985         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 986     }
 987
 988     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 989     if (ret < 0)
 990         return ret;
 991
 992     kvm_put_fp(cs);
 993
 994     if (env->tlb_dirty) {
 995         kvm_sw_tlb_put(cpu);
 996         env->tlb_dirty = false;
 997     }
 998
 999     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1000         ret = kvmppc_put_books_sregs(cpu);
1001         if (ret < 0) {
1002             return ret;
1003         }
1004     }
1005
1006     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1007         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1008     }
1009
1010     if (cap_one_reg) {
1011         int i;
1012
1013         /* We deliberately ignore errors here, for kernels which have
1014          * the ONE_REG calls, but don't support the specific
1015          * registers, there's a reasonable chance things will still
1016          * work, at least until we try to migrate. */
1017         for (i = 0; i < 1024; i++) {
1018             uint64_t id = env->spr_cb[i].one_reg_id;
1019
1020             if (id != 0) {
1021                 kvm_put_one_spr(cs, id, i);
1022             }
1023         }
1024
1025 #ifdef TARGET_PPC64
1026         if (msr_ts) {
1027             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1028                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1029             }
1030             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1031                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1032             }
1033             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1034             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1035             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1036             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1037             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1038             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1039             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1040             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1041             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1042             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1043         }
1044
1045         if (cap_papr) {
1046             if (kvm_put_vpa(cs) < 0) {
1047                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1048             }
1049         }
1050
1051         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1052 #endif /* TARGET_PPC64 */
1053     }
1054
1055     return ret;
1056 }
1057
1058 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1059 {
1060      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1061 }
1062
1063 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1064 {
1065     CPUPPCState *env = &cpu->env;
1066     struct kvm_sregs sregs;
1067     int ret;
1068
1069     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1070     if (ret < 0) {
1071         return ret;
1072     }
1073
1074     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1075         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1076         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1077         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1078         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1079         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1080         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1081         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1082         env->spr[SPR_DECR] = sregs.u.e.dec;
1083         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1084         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1085         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1086     }
1087
1088     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1089         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1090         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1091         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1092         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1093         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1094     }
1095
1096     if (sregs.u.e.features & KVM_SREGS_E_64) {
1097         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1098     }
1099
1100     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1101         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1102     }
1103
1104     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1105         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1106         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1107         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1108         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1109         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1110         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1111         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1112         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1113         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1114         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1115         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1116         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1117         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1118         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1119         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1120         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1121         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1122         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1123         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1124         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1125         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1126         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1127         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1128         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1129         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1130         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1131         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1132         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1133         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1134         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1135         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1136         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1137
1138         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1139             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1140             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1141             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1142             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1143             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1144             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1145         }
1146
1147         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1148             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1149             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1150         }
1151
1152         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1153             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1154             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1155             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1156             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1157         }
1158     }
1159
1160     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1161         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1162         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1163         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1164         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1165         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1166         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1167         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1168         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1169         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1170         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1171     }
1172
1173     if (sregs.u.e.features & KVM_SREGS_EXP) {
1174         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1175     }
1176
1177     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1178         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1179         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1180     }
1181
1182     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1183         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1184         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1185         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1186
1187         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1188             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1189             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1190         }
1191     }
1192
1193     return 0;
1194 }
1195
1196 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1197 {
1198     CPUPPCState *env = &cpu->env;
1199     struct kvm_sregs sregs;
1200     int ret;
1201     int i;
1202
1203     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1204     if (ret < 0) {
1205         return ret;
1206     }
1207
1208     if (!env->external_htab) {
1209         ppc_store_sdr1(env, sregs.u.s.sdr1);
1210     }
1211
1212     /* Sync SLB */
1213 #ifdef TARGET_PPC64
1214     /*
1215      * The packed SLB array we get from KVM_GET_SREGS only contains
1216      * information about valid entries. So we flush our internal copy
1217      * to get rid of stale ones, then put all valid SLB entries back
1218      * in.
1219      */
1220     memset(env->slb, 0, sizeof(env->slb));
1221     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1222         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1223         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1224         /*
1225          * Only restore valid entries
1226          */
1227         if (rb & SLB_ESID_V) {
1228             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1229         }
1230     }
1231 #endif
1232
1233     /* Sync SRs */
1234     for (i = 0; i < 16; i++) {
1235         env->sr[i] = sregs.u.s.ppc32.sr[i];
1236     }
1237
1238     /* Sync BATs */
1239     for (i = 0; i < 8; i++) {
1240         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1241         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1242         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1243         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1244     }
1245
1246     return 0;
1247 }
1248
1249 int kvm_arch_get_registers(CPUState *cs)
1250 {
1251     PowerPCCPU *cpu = POWERPC_CPU(cs);
1252     CPUPPCState *env = &cpu->env;
1253     struct kvm_regs regs;
1254     uint32_t cr;
1255     int i, ret;
1256
1257     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1258     if (ret < 0)
1259         return ret;
1260
1261     cr = regs.cr;
1262     for (i = 7; i >= 0; i--) {
1263         env->crf[i] = cr & 15;
1264         cr >>= 4;
1265     }
1266
1267     env->ctr = regs.ctr;
1268     env->lr = regs.lr;
1269     cpu_write_xer(env, regs.xer);
1270     env->msr = regs.msr;
1271     env->nip = regs.pc;
1272
1273     env->spr[SPR_SRR0] = regs.srr0;
1274     env->spr[SPR_SRR1] = regs.srr1;
1275
1276     env->spr[SPR_SPRG0] = regs.sprg0;
1277     env->spr[SPR_SPRG1] = regs.sprg1;
1278     env->spr[SPR_SPRG2] = regs.sprg2;
1279     env->spr[SPR_SPRG3] = regs.sprg3;
1280     env->spr[SPR_SPRG4] = regs.sprg4;
1281     env->spr[SPR_SPRG5] = regs.sprg5;
1282     env->spr[SPR_SPRG6] = regs.sprg6;
1283     env->spr[SPR_SPRG7] = regs.sprg7;
1284
1285     env->spr[SPR_BOOKE_PID] = regs.pid;
1286
1287     for (i = 0;i < 32; i++)
1288         env->gpr[i] = regs.gpr[i];
1289
1290     kvm_get_fp(cs);
1291
1292     if (cap_booke_sregs) {
1293         ret = kvmppc_get_booke_sregs(cpu);
1294         if (ret < 0) {
1295             return ret;
1296         }
1297     }
1298
1299     if (cap_segstate) {
1300         ret = kvmppc_get_books_sregs(cpu);
1301         if (ret < 0) {
1302             return ret;
1303         }
1304     }
1305
1306     if (cap_hior) {
1307         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1308     }
1309
1310     if (cap_one_reg) {
1311         int i;
1312
1313         /* We deliberately ignore errors here, for kernels which have
1314          * the ONE_REG calls, but don't support the specific
1315          * registers, there's a reasonable chance things will still
1316          * work, at least until we try to migrate. */
1317         for (i = 0; i < 1024; i++) {
1318             uint64_t id = env->spr_cb[i].one_reg_id;
1319
1320             if (id != 0) {
1321                 kvm_get_one_spr(cs, id, i);
1322             }
1323         }
1324
1325 #ifdef TARGET_PPC64
1326         if (msr_ts) {
1327             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1328                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1329             }
1330             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1331                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1332             }
1333             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1334             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1335             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1336             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1337             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1338             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1339             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1340             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1341             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1342             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1343         }
1344
1345         if (cap_papr) {
1346             if (kvm_get_vpa(cs) < 0) {
1347                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1348             }
1349         }
1350
1351         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1352 #endif
1353     }
1354
1355     return 0;
1356 }
1357
1358 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1359 {
1360     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1361
1362     if (irq != PPC_INTERRUPT_EXT) {
1363         return 0;
1364     }
1365
1366     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1367         return 0;
1368     }
1369
1370     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1371
1372     return 0;
1373 }
1374
1375 #if defined(TARGET_PPCEMB)
1376 #define PPC_INPUT_INT PPC40x_INPUT_INT
1377 #elif defined(TARGET_PPC64)
1378 #define PPC_INPUT_INT PPC970_INPUT_INT
1379 #else
1380 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1381 #endif
1382
1383 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1384 {
1385     PowerPCCPU *cpu = POWERPC_CPU(cs);
1386     CPUPPCState *env = &cpu->env;
1387     int r;
1388     unsigned irq;
1389
1390     qemu_mutex_lock_iothread();
1391
1392     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1393      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1394     if (!cap_interrupt_level &&
1395         run->ready_for_interrupt_injection &&
1396         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1397         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1398     {
1399         /* For now KVM disregards the 'irq' argument. However, in the
1400          * future KVM could cache it in-kernel to avoid a heavyweight exit
1401          * when reading the UIC.
1402          */
1403         irq = KVM_INTERRUPT_SET;
1404
1405         DPRINTF("injected interrupt %d\n", irq);
1406         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1407         if (r < 0) {
1408             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1409         }
1410
1411         /* Always wake up soon in case the interrupt was level based */
1412         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1413                        (NANOSECONDS_PER_SECOND / 50));
1414     }
1415
1416     /* We don't know if there are more interrupts pending after this. However,
1417      * the guest will return to userspace in the course of handling this one
1418      * anyways, so we will get a chance to deliver the rest. */
1419
1420     qemu_mutex_unlock_iothread();
1421 }
1422
1423 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1424 {
1425     return MEMTXATTRS_UNSPECIFIED;
1426 }
1427
1428 int kvm_arch_process_async_events(CPUState *cs)
1429 {
1430     return cs->halted;
1431 }
1432
1433 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1434 {
1435     CPUState *cs = CPU(cpu);
1436     CPUPPCState *env = &cpu->env;
1437
1438     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1439         cs->halted = 1;
1440         cs->exception_index = EXCP_HLT;
1441     }
1442
1443     return 0;
1444 }
1445
1446 /* map dcr access to existing qemu dcr emulation */
1447 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1448 {
1449     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1450         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1451
1452     return 0;
1453 }
1454
1455 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1456 {
1457     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1458         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1459
1460     return 0;
1461 }
1462
1463 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1464 {
1465     /* Mixed endian case is not handled */
1466     uint32_t sc = debug_inst_opcode;
1467
1468     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1469                             sizeof(sc), 0) ||
1470         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1471         return -EINVAL;
1472     }
1473
1474     return 0;
1475 }
1476
1477 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1478 {
1479     uint32_t sc;
1480
1481     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1482         sc != debug_inst_opcode ||
1483         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1484                             sizeof(sc), 1)) {
1485         return -EINVAL;
1486     }
1487
1488     return 0;
1489 }
1490
1491 static int find_hw_breakpoint(target_ulong addr, int type)
1492 {
1493     int n;
1494
1495     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1496            <= ARRAY_SIZE(hw_debug_points));
1497
1498     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1499         if (hw_debug_points[n].addr == addr &&
1500              hw_debug_points[n].type == type) {
1501             return n;
1502         }
1503     }
1504
1505     return -1;
1506 }
1507
1508 static int find_hw_watchpoint(target_ulong addr, int *flag)
1509 {
1510     int n;
1511
1512     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1513     if (n >= 0) {
1514         *flag = BP_MEM_ACCESS;
1515         return n;
1516     }
1517
1518     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1519     if (n >= 0) {
1520         *flag = BP_MEM_WRITE;
1521         return n;
1522     }
1523
1524     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1525     if (n >= 0) {
1526         *flag = BP_MEM_READ;
1527         return n;
1528     }
1529
1530     return -1;
1531 }
1532
1533 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1534                                   target_ulong len, int type)
1535 {
1536     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1537         return -ENOBUFS;
1538     }
1539
1540     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1541     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1542
1543     switch (type) {
1544     case GDB_BREAKPOINT_HW:
1545         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1546             return -ENOBUFS;
1547         }
1548
1549         if (find_hw_breakpoint(addr, type) >= 0) {
1550             return -EEXIST;
1551         }
1552
1553         nb_hw_breakpoint++;
1554         break;
1555
1556     case GDB_WATCHPOINT_WRITE:
1557     case GDB_WATCHPOINT_READ:
1558     case GDB_WATCHPOINT_ACCESS:
1559         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1560             return -ENOBUFS;
1561         }
1562
1563         if (find_hw_breakpoint(addr, type) >= 0) {
1564             return -EEXIST;
1565         }
1566
1567         nb_hw_watchpoint++;
1568         break;
1569
1570     default:
1571         return -ENOSYS;
1572     }
1573
1574     return 0;
1575 }
1576
1577 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1578                                   target_ulong len, int type)
1579 {
1580     int n;
1581
1582     n = find_hw_breakpoint(addr, type);
1583     if (n < 0) {
1584         return -ENOENT;
1585     }
1586
1587     switch (type) {
1588     case GDB_BREAKPOINT_HW:
1589         nb_hw_breakpoint--;
1590         break;
1591
1592     case GDB_WATCHPOINT_WRITE:
1593     case GDB_WATCHPOINT_READ:
1594     case GDB_WATCHPOINT_ACCESS:
1595         nb_hw_watchpoint--;
1596         break;
1597
1598     default:
1599         return -ENOSYS;
1600     }
1601     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1602
1603     return 0;
1604 }
1605
1606 void kvm_arch_remove_all_hw_breakpoints(void)
1607 {
1608     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1609 }
1610
1611 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1612 {
1613     int n;
1614
1615     /* Software Breakpoint updates */
1616     if (kvm_sw_breakpoints_active(cs)) {
1617         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1618     }
1619
1620     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1621            <= ARRAY_SIZE(hw_debug_points));
1622     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1623
1624     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1625         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1626         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1627         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1628             switch (hw_debug_points[n].type) {
1629             case GDB_BREAKPOINT_HW:
1630                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1631                 break;
1632             case GDB_WATCHPOINT_WRITE:
1633                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1634                 break;
1635             case GDB_WATCHPOINT_READ:
1636                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1637                 break;
1638             case GDB_WATCHPOINT_ACCESS:
1639                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1640                                         KVMPPC_DEBUG_WATCH_READ;
1641                 break;
1642             default:
1643                 cpu_abort(cs, "Unsupported breakpoint type\n");
1644             }
1645             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1646         }
1647     }
1648 }
1649
1650 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1651 {
1652     CPUState *cs = CPU(cpu);
1653     CPUPPCState *env = &cpu->env;
1654     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1655     int handle = 0;
1656     int n;
1657     int flag = 0;
1658
1659     if (cs->singlestep_enabled) {
1660         handle = 1;
1661     } else if (arch_info->status) {
1662         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1663             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1664                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1665                 if (n >= 0) {
1666                     handle = 1;
1667                 }
1668             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1669                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1670                 n = find_hw_watchpoint(arch_info->address,  &flag);
1671                 if (n >= 0) {
1672                     handle = 1;
1673                     cs->watchpoint_hit = &hw_watchpoint;
1674                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1675                     hw_watchpoint.flags = flag;
1676                 }
1677             }
1678         }
1679     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1680         handle = 1;
1681     } else {
1682         /* QEMU is not able to handle debug exception, so inject
1683          * program exception to guest;
1684          * Yes program exception NOT debug exception !!
1685          * When QEMU is using debug resources then debug exception must
1686          * be always set. To achieve this we set MSR_DE and also set
1687          * MSRP_DEP so guest cannot change MSR_DE.
1688          * When emulating debug resource for guest we want guest
1689          * to control MSR_DE (enable/disable debug interrupt on need).
1690          * Supporting both configurations are NOT possible.
1691          * So the result is that we cannot share debug resources
1692          * between QEMU and Guest on BOOKE architecture.
1693          * In the current design QEMU gets the priority over guest,
1694          * this means that if QEMU is using debug resources then guest
1695          * cannot use them;
1696          * For software breakpoint QEMU uses a privileged instruction;
1697          * So there cannot be any reason that we are here for guest
1698          * set debug exception, only possibility is guest executed a
1699          * privileged / illegal instruction and that's why we are
1700          * injecting a program interrupt.
1701          */
1702
1703         cpu_synchronize_state(cs);
1704         /* env->nip is PC, so increment this by 4 to use
1705          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1706          */
1707         env->nip += 4;
1708         cs->exception_index = POWERPC_EXCP_PROGRAM;
1709         env->error_code = POWERPC_EXCP_INVAL;
1710         ppc_cpu_do_interrupt(cs);
1711     }
1712
1713     return handle;
1714 }
1715
1716 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1717 {
1718     PowerPCCPU *cpu = POWERPC_CPU(cs);
1719     CPUPPCState *env = &cpu->env;
1720     int ret;
1721
1722     qemu_mutex_lock_iothread();
1723
1724     switch (run->exit_reason) {
1725     case KVM_EXIT_DCR:
1726         if (run->dcr.is_write) {
1727             DPRINTF("handle dcr write\n");
1728             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1729         } else {
1730             DPRINTF("handle dcr read\n");
1731             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1732         }
1733         break;
1734     case KVM_EXIT_HLT:
1735         DPRINTF("handle halt\n");
1736         ret = kvmppc_handle_halt(cpu);
1737         break;
1738 #if defined(TARGET_PPC64)
1739     case KVM_EXIT_PAPR_HCALL:
1740         DPRINTF("handle PAPR hypercall\n");
1741         run->papr_hcall.ret = spapr_hypercall(cpu,
1742                                               run->papr_hcall.nr,
1743                                               run->papr_hcall.args);
1744         ret = 0;
1745         break;
1746 #endif
1747     case KVM_EXIT_EPR:
1748         DPRINTF("handle epr\n");
1749         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1750         ret = 0;
1751         break;
1752     case KVM_EXIT_WATCHDOG:
1753         DPRINTF("handle watchdog expiry\n");
1754         watchdog_perform_action();
1755         ret = 0;
1756         break;
1757
1758     case KVM_EXIT_DEBUG:
1759         DPRINTF("handle debug exception\n");
1760         if (kvm_handle_debug(cpu, run)) {
1761             ret = EXCP_DEBUG;
1762             break;
1763         }
1764         /* re-enter, this exception was guest-internal */
1765         ret = 0;
1766         break;
1767
1768     default:
1769         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1770         ret = -1;
1771         break;
1772     }
1773
1774     qemu_mutex_unlock_iothread();
1775     return ret;
1776 }
1777
1778 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1779 {
1780     CPUState *cs = CPU(cpu);
1781     uint32_t bits = tsr_bits;
1782     struct kvm_one_reg reg = {
1783         .id = KVM_REG_PPC_OR_TSR,
1784         .addr = (uintptr_t) &bits,
1785     };
1786
1787     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1788 }
1789
1790 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1791 {
1792
1793     CPUState *cs = CPU(cpu);
1794     uint32_t bits = tsr_bits;
1795     struct kvm_one_reg reg = {
1796         .id = KVM_REG_PPC_CLEAR_TSR,
1797         .addr = (uintptr_t) &bits,
1798     };
1799
1800     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1801 }
1802
1803 int kvmppc_set_tcr(PowerPCCPU *cpu)
1804 {
1805     CPUState *cs = CPU(cpu);
1806     CPUPPCState *env = &cpu->env;
1807     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1808
1809     struct kvm_one_reg reg = {
1810         .id = KVM_REG_PPC_TCR,
1811         .addr = (uintptr_t) &tcr,
1812     };
1813
1814     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1815 }
1816
1817 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1818 {
1819     CPUState *cs = CPU(cpu);
1820     int ret;
1821
1822     if (!kvm_enabled()) {
1823         return -1;
1824     }
1825
1826     if (!cap_ppc_watchdog) {
1827         printf("warning: KVM does not support watchdog");
1828         return -1;
1829     }
1830
1831     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1832     if (ret < 0) {
1833         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1834                 __func__, strerror(-ret));
1835         return ret;
1836     }
1837
1838     return ret;
1839 }
1840
1841 static int read_cpuinfo(const char *field, char *value, int len)
1842 {
1843     FILE *f;
1844     int ret = -1;
1845     int field_len = strlen(field);
1846     char line[512];
1847
1848     f = fopen("/proc/cpuinfo", "r");
1849     if (!f) {
1850         return -1;
1851     }
1852
1853     do {
1854         if (!fgets(line, sizeof(line), f)) {
1855             break;
1856         }
1857         if (!strncmp(line, field, field_len)) {
1858             pstrcpy(value, len, line);
1859             ret = 0;
1860             break;
1861         }
1862     } while(*line);
1863
1864     fclose(f);
1865
1866     return ret;
1867 }
1868
1869 uint32_t kvmppc_get_tbfreq(void)
1870 {
1871     char line[512];
1872     char *ns;
1873     uint32_t retval = NANOSECONDS_PER_SECOND;
1874
1875     if (read_cpuinfo("timebase", line, sizeof(line))) {
1876         return retval;
1877     }
1878
1879     if (!(ns = strchr(line, ':'))) {
1880         return retval;
1881     }
1882
1883     ns++;
1884
1885     return atoi(ns);
1886 }
1887
1888 bool kvmppc_get_host_serial(char **value)
1889 {
1890     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1891                                NULL);
1892 }
1893
1894 bool kvmppc_get_host_model(char **value)
1895 {
1896     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1897 }
1898
1899 /* Try to find a device tree node for a CPU with clock-frequency property */
1900 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1901 {
1902     struct dirent *dirp;
1903     DIR *dp;
1904
1905     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1906         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1907         return -1;
1908     }
1909
1910     buf[0] = '\0';
1911     while ((dirp = readdir(dp)) != NULL) {
1912         FILE *f;
1913         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1914                  dirp->d_name);
1915         f = fopen(buf, "r");
1916         if (f) {
1917             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1918             fclose(f);
1919             break;
1920         }
1921         buf[0] = '\0';
1922     }
1923     closedir(dp);
1924     if (buf[0] == '\0') {
1925         printf("Unknown host!\n");
1926         return -1;
1927     }
1928
1929     return 0;
1930 }
1931
1932 static uint64_t kvmppc_read_int_dt(const char *filename)
1933 {
1934     union {
1935         uint32_t v32;
1936         uint64_t v64;
1937     } u;
1938     FILE *f;
1939     int len;
1940
1941     f = fopen(filename, "rb");
1942     if (!f) {
1943         return -1;
1944     }
1945
1946     len = fread(&u, 1, sizeof(u), f);
1947     fclose(f);
1948     switch (len) {
1949     case 4:
1950         /* property is a 32-bit quantity */
1951         return be32_to_cpu(u.v32);
1952     case 8:
1953         return be64_to_cpu(u.v64);
1954     }
1955
1956     return 0;
1957 }
1958
1959 /* Read a CPU node property from the host device tree that's a single
1960  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1961  * (can't find or open the property, or doesn't understand the
1962  * format) */
1963 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1964 {
1965     char buf[PATH_MAX], *tmp;
1966     uint64_t val;
1967
1968     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1969         return -1;
1970     }
1971
1972     tmp = g_strdup_printf("%s/%s", buf, propname);
1973     val = kvmppc_read_int_dt(tmp);
1974     g_free(tmp);
1975
1976     return val;
1977 }
1978
1979 uint64_t kvmppc_get_clockfreq(void)
1980 {
1981     return kvmppc_read_int_cpu_dt("clock-frequency");
1982 }
1983
1984 uint32_t kvmppc_get_vmx(void)
1985 {
1986     return kvmppc_read_int_cpu_dt("ibm,vmx");
1987 }
1988
1989 uint32_t kvmppc_get_dfp(void)
1990 {
1991     return kvmppc_read_int_cpu_dt("ibm,dfp");
1992 }
1993
1994 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1995  {
1996      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1997      CPUState *cs = CPU(cpu);
1998
1999     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2000         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2001         return 0;
2002     }
2003
2004     return 1;
2005 }
2006
2007 int kvmppc_get_hasidle(CPUPPCState *env)
2008 {
2009     struct kvm_ppc_pvinfo pvinfo;
2010
2011     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2012         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2013         return 1;
2014     }
2015
2016     return 0;
2017 }
2018
2019 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2020 {
2021     uint32_t *hc = (uint32_t*)buf;
2022     struct kvm_ppc_pvinfo pvinfo;
2023
2024     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2025         memcpy(buf, pvinfo.hcall, buf_len);
2026         return 0;
2027     }
2028
2029     /*
2030      * Fallback to always fail hypercalls regardless of endianness:
2031      *
2032      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2033      *     li r3, -1
2034      *     b .+8       (becomes nop in wrong endian)
2035      *     bswap32(li r3, -1)
2036      */
2037
2038     hc[0] = cpu_to_be32(0x08000048);
2039     hc[1] = cpu_to_be32(0x3860ffff);
2040     hc[2] = cpu_to_be32(0x48000008);
2041     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2042
2043     return 1;
2044 }
2045
2046 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2047 {
2048     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2049 }
2050
2051 void kvmppc_enable_logical_ci_hcalls(void)
2052 {
2053     /*
2054      * FIXME: it would be nice if we could detect the cases where
2055      * we're using a device which requires the in kernel
2056      * implementation of these hcalls, but the kernel lacks them and
2057      * produce a warning.
2058      */
2059     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2060     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2061 }
2062
2063 void kvmppc_enable_set_mode_hcall(void)
2064 {
2065     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2066 }
2067
2068 void kvmppc_enable_clear_ref_mod_hcalls(void)
2069 {
2070     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2071     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2072 }
2073
2074 void kvmppc_set_papr(PowerPCCPU *cpu)
2075 {
2076     CPUState *cs = CPU(cpu);
2077     int ret;
2078
2079     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2080     if (ret) {
2081         error_report("This vCPU type or KVM version does not support PAPR");
2082         exit(1);
2083     }
2084
2085     /* Update the capability flag so we sync the right information
2086      * with kvm */
2087     cap_papr = 1;
2088 }
2089
2090 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2091 {
2092     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2093 }
2094
2095 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2096 {
2097     CPUState *cs = CPU(cpu);
2098     int ret;
2099
2100     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2101     if (ret && mpic_proxy) {
2102         error_report("This KVM version does not support EPR");
2103         exit(1);
2104     }
2105 }
2106
2107 int kvmppc_smt_threads(void)
2108 {
2109     return cap_ppc_smt ? cap_ppc_smt : 1;
2110 }
2111
2112 #ifdef TARGET_PPC64
2113 off_t kvmppc_alloc_rma(void **rma)
2114 {
2115     off_t size;
2116     int fd;
2117     struct kvm_allocate_rma ret;
2118
2119     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2120      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2121      *                      not necessary on this hardware
2122      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2123      *
2124      * FIXME: We should allow the user to force contiguous RMA
2125      * allocation in the cap_ppc_rma==1 case.
2126      */
2127     if (cap_ppc_rma < 2) {
2128         return 0;
2129     }
2130
2131     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2132     if (fd < 0) {
2133         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2134                 strerror(errno));
2135         return -1;
2136     }
2137
2138     size = MIN(ret.rma_size, 256ul << 20);
2139
2140     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2141     if (*rma == MAP_FAILED) {
2142         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2143         return -1;
2144     };
2145
2146     return size;
2147 }
2148
2149 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2150 {
2151     struct kvm_ppc_smmu_info info;
2152     long rampagesize, best_page_shift;
2153     int i;
2154
2155     if (cap_ppc_rma >= 2) {
2156         return current_size;
2157     }
2158
2159     /* Find the largest hardware supported page size that's less than
2160      * or equal to the (logical) backing page size of guest RAM */
2161     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2162     rampagesize = getrampagesize();
2163     best_page_shift = 0;
2164
2165     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2166         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2167
2168         if (!sps->page_shift) {
2169             continue;
2170         }
2171
2172         if ((sps->page_shift > best_page_shift)
2173             && ((1UL << sps->page_shift) <= rampagesize)) {
2174             best_page_shift = sps->page_shift;
2175         }
2176     }
2177
2178     return MIN(current_size,
2179                1ULL << (best_page_shift + hash_shift - 7));
2180 }
2181 #endif
2182
2183 bool kvmppc_spapr_use_multitce(void)
2184 {
2185     return cap_spapr_multitce;
2186 }
2187
2188 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2189                               bool need_vfio)
2190 {
2191     struct kvm_create_spapr_tce args = {
2192         .liobn = liobn,
2193         .window_size = window_size,
2194     };
2195     long len;
2196     int fd;
2197     void *table;
2198
2199     /* Must set fd to -1 so we don't try to munmap when called for
2200      * destroying the table, which the upper layers -will- do
2201      */
2202     *pfd = -1;
2203     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2204         return NULL;
2205     }
2206
2207     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2208     if (fd < 0) {
2209         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2210                 liobn);
2211         return NULL;
2212     }
2213
2214     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2215     /* FIXME: round this up to page size */
2216
2217     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2218     if (table == MAP_FAILED) {
2219         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2220                 liobn);
2221         close(fd);
2222         return NULL;
2223     }
2224
2225     *pfd = fd;
2226     return table;
2227 }
2228
2229 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2230 {
2231     long len;
2232
2233     if (fd < 0) {
2234         return -1;
2235     }
2236
2237     len = nb_table * sizeof(uint64_t);
2238     if ((munmap(table, len) < 0) ||
2239         (close(fd) < 0)) {
2240         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2241                 strerror(errno));
2242         /* Leak the table */
2243     }
2244
2245     return 0;
2246 }
2247
2248 int kvmppc_reset_htab(int shift_hint)
2249 {
2250     uint32_t shift = shift_hint;
2251
2252     if (!kvm_enabled()) {
2253         /* Full emulation, tell caller to allocate htab itself */
2254         return 0;
2255     }
2256     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2257         int ret;
2258         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2259         if (ret == -ENOTTY) {
2260             /* At least some versions of PR KVM advertise the
2261              * capability, but don't implement the ioctl().  Oops.
2262              * Return 0 so that we allocate the htab in qemu, as is
2263              * correct for PR. */
2264             return 0;
2265         } else if (ret < 0) {
2266             return ret;
2267         }
2268         return shift;
2269     }
2270
2271     /* We have a kernel that predates the htab reset calls.  For PR
2272      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2273      * this era, it has allocated a 16MB fixed size hash table
2274      * already.  Kernels of this era have the GET_PVINFO capability
2275      * only on PR, so we use this hack to determine the right
2276      * answer */
2277     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2278         /* PR - tell caller to allocate htab */
2279         return 0;
2280     } else {
2281         /* HV - assume 16MB kernel allocated htab */
2282         return 24;
2283     }
2284 }
2285
2286 static inline uint32_t mfpvr(void)
2287 {
2288     uint32_t pvr;
2289
2290     asm ("mfpvr %0"
2291          : "=r"(pvr));
2292     return pvr;
2293 }
2294
2295 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2296 {
2297     if (on) {
2298         *word |= flags;
2299     } else {
2300         *word &= ~flags;
2301     }
2302 }
2303
2304 static void kvmppc_host_cpu_initfn(Object *obj)
2305 {
2306     assert(kvm_enabled());
2307 }
2308
2309 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2310 {
2311     DeviceClass *dc = DEVICE_CLASS(oc);
2312     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2313     uint32_t vmx = kvmppc_get_vmx();
2314     uint32_t dfp = kvmppc_get_dfp();
2315     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2316     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2317
2318     /* Now fix up the class with information we can query from the host */
2319     pcc->pvr = mfpvr();
2320
2321     if (vmx != -1) {
2322         /* Only override when we know what the host supports */
2323         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2324         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2325     }
2326     if (dfp != -1) {
2327         /* Only override when we know what the host supports */
2328         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2329     }
2330
2331     if (dcache_size != -1) {
2332         pcc->l1_dcache_size = dcache_size;
2333     }
2334
2335     if (icache_size != -1) {
2336         pcc->l1_icache_size = icache_size;
2337     }
2338
2339     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2340     dc->cannot_destroy_with_object_finalize_yet = true;
2341 }
2342
2343 bool kvmppc_has_cap_epr(void)
2344 {
2345     return cap_epr;
2346 }
2347
2348 bool kvmppc_has_cap_htab_fd(void)
2349 {
2350     return cap_htab_fd;
2351 }
2352
2353 bool kvmppc_has_cap_fixup_hcalls(void)
2354 {
2355     return cap_fixup_hcalls;
2356 }
2357
2358 bool kvmppc_has_cap_htm(void)
2359 {
2360     return cap_htm;
2361 }
2362
2363 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2364 {
2365     ObjectClass *oc = OBJECT_CLASS(pcc);
2366
2367     while (oc && !object_class_is_abstract(oc)) {
2368         oc = object_class_get_parent(oc);
2369     }
2370     assert(oc);
2371
2372     return POWERPC_CPU_CLASS(oc);
2373 }
2374
2375 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2376 {
2377     uint32_t host_pvr = mfpvr();
2378     PowerPCCPUClass *pvr_pcc;
2379
2380     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2381     if (pvr_pcc == NULL) {
2382         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2383     }
2384
2385     return pvr_pcc;
2386 }
2387
2388 static int kvm_ppc_register_host_cpu_type(void)
2389 {
2390     TypeInfo type_info = {
2391         .name = TYPE_HOST_POWERPC_CPU,
2392         .instance_init = kvmppc_host_cpu_initfn,
2393         .class_init = kvmppc_host_cpu_class_init,
2394     };
2395     PowerPCCPUClass *pvr_pcc;
2396     DeviceClass *dc;
2397
2398     pvr_pcc = kvm_ppc_get_host_cpu_class();
2399     if (pvr_pcc == NULL) {
2400         return -1;
2401     }
2402     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2403     type_register(&type_info);
2404
2405     /* Register generic family CPU class for a family */
2406     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2407     dc = DEVICE_CLASS(pvr_pcc);
2408     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2409     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2410     type_register(&type_info);
2411
2412 #if defined(TARGET_PPC64)
2413     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2414     type_info.parent = TYPE_SPAPR_CPU_CORE,
2415     type_info.instance_size = sizeof(sPAPRCPUCore);
2416     type_info.instance_init = NULL;
2417     type_info.class_init = spapr_cpu_core_class_init;
2418     type_info.class_data = (void *) "host";
2419     type_register(&type_info);
2420     g_free((void *)type_info.name);
2421
2422     /* Register generic spapr CPU family class for current host CPU type */
2423     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2424     type_info.class_data = (void *) dc->desc;
2425     type_register(&type_info);
2426     g_free((void *)type_info.name);
2427 #endif
2428
2429     return 0;
2430 }
2431
2432 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2433 {
2434     struct kvm_rtas_token_args args = {
2435         .token = token,
2436     };
2437
2438     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2439         return -ENOENT;
2440     }
2441
2442     strncpy(args.name, function, sizeof(args.name));
2443
2444     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2445 }
2446
2447 int kvmppc_get_htab_fd(bool write)
2448 {
2449     struct kvm_get_htab_fd s = {
2450         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2451         .start_index = 0,
2452     };
2453
2454     if (!cap_htab_fd) {
2455         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2456         return -1;
2457     }
2458
2459     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2460 }
2461
2462 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2463 {
2464     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2465     uint8_t buf[bufsize];
2466     ssize_t rc;
2467
2468     do {
2469         rc = read(fd, buf, bufsize);
2470         if (rc < 0) {
2471             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2472                     strerror(errno));
2473             return rc;
2474         } else if (rc) {
2475             uint8_t *buffer = buf;
2476             ssize_t n = rc;
2477             while (n) {
2478                 struct kvm_get_htab_header *head =
2479                     (struct kvm_get_htab_header *) buffer;
2480                 size_t chunksize = sizeof(*head) +
2481                      HASH_PTE_SIZE_64 * head->n_valid;
2482
2483                 qemu_put_be32(f, head->index);
2484                 qemu_put_be16(f, head->n_valid);
2485                 qemu_put_be16(f, head->n_invalid);
2486                 qemu_put_buffer(f, (void *)(head + 1),
2487                                 HASH_PTE_SIZE_64 * head->n_valid);
2488
2489                 buffer += chunksize;
2490                 n -= chunksize;
2491             }
2492         }
2493     } while ((rc != 0)
2494              && ((max_ns < 0)
2495                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2496
2497     return (rc == 0) ? 1 : 0;
2498 }
2499
2500 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2501                            uint16_t n_valid, uint16_t n_invalid)
2502 {
2503     struct kvm_get_htab_header *buf;
2504     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2505     ssize_t rc;
2506
2507     buf = alloca(chunksize);
2508     buf->index = index;
2509     buf->n_valid = n_valid;
2510     buf->n_invalid = n_invalid;
2511
2512     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2513
2514     rc = write(fd, buf, chunksize);
2515     if (rc < 0) {
2516         fprintf(stderr, "Error writing KVM hash table: %s\n",
2517                 strerror(errno));
2518         return rc;
2519     }
2520     if (rc != chunksize) {
2521         /* We should never get a short write on a single chunk */
2522         fprintf(stderr, "Short write, restoring KVM hash table\n");
2523         return -1;
2524     }
2525     return 0;
2526 }
2527
2528 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2529 {
2530     return true;
2531 }
2532
2533 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2534 {
2535     return 1;
2536 }
2537
2538 int kvm_arch_on_sigbus(int code, void *addr)
2539 {
2540     return 1;
2541 }
2542
2543 void kvm_arch_init_irq_routing(KVMState *s)
2544 {
2545 }
2546
2547 struct kvm_get_htab_buf {
2548     struct kvm_get_htab_header header;
2549     /*
2550      * We require one extra byte for read
2551      */
2552     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2553 };
2554
2555 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2556 {
2557     int htab_fd;
2558     struct kvm_get_htab_fd ghf;
2559     struct kvm_get_htab_buf  *hpte_buf;
2560
2561     ghf.flags = 0;
2562     ghf.start_index = pte_index;
2563     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2564     if (htab_fd < 0) {
2565         goto error_out;
2566     }
2567
2568     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2569     /*
2570      * Read the hpte group
2571      */
2572     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2573         goto out_close;
2574     }
2575
2576     close(htab_fd);
2577     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2578
2579 out_close:
2580     g_free(hpte_buf);
2581     close(htab_fd);
2582 error_out:
2583     return 0;
2584 }
2585
2586 void kvmppc_hash64_free_pteg(uint64_t token)
2587 {
2588     struct kvm_get_htab_buf *htab_buf;
2589
2590     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2591                             hpte);
2592     g_free(htab_buf);
2593     return;
2594 }
2595
2596 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2597                              target_ulong pte0, target_ulong pte1)
2598 {
2599     int htab_fd;
2600     struct kvm_get_htab_fd ghf;
2601     struct kvm_get_htab_buf hpte_buf;
2602
2603     ghf.flags = 0;
2604     ghf.start_index = 0;     /* Ignored */
2605     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2606     if (htab_fd < 0) {
2607         goto error_out;
2608     }
2609
2610     hpte_buf.header.n_valid = 1;
2611     hpte_buf.header.n_invalid = 0;
2612     hpte_buf.header.index = pte_index;
2613     hpte_buf.hpte[0] = pte0;
2614     hpte_buf.hpte[1] = pte1;
2615     /*
2616      * Write the hpte entry.
2617      * CAUTION: write() has the warn_unused_result attribute. Hence we
2618      * need to check the return value, even though we do nothing.
2619      */
2620     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2621         goto out_close;
2622     }
2623
2624 out_close:
2625     close(htab_fd);
2626     return;
2627
2628 error_out:
2629     return;
2630 }
2631
2632 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2633                              uint64_t address, uint32_t data, PCIDevice *dev)
2634 {
2635     return 0;
2636 }
2637
2638 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2639                                 int vector, PCIDevice *dev)
2640 {
2641     return 0;
2642 }
2643
2644 int kvm_arch_release_virq_post(int virq)
2645 {
2646     return 0;
2647 }
2648
2649 int kvm_arch_msi_data_to_gsi(uint32_t data)
2650 {
2651     return data & 0xffff;
2652 }
2653
2654 int kvmppc_enable_hwrng(void)
2655 {
2656     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2657         return -1;
2658     }
2659
2660     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2661 }