target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/kvm.h"
  30 #include "kvm_ppc.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41 #include "exec/gdbstub.h"
  42 #include "exec/memattrs.h"
  43 #include "sysemu/hostmem.h"
  44 #include "qemu/cutils.h"
  45 #if defined(TARGET_PPC64)
  46 #include "hw/ppc/spapr_cpu_core.h"
  47 #endif
  48
  49 //#define DEBUG_KVM
  50
  51 #ifdef DEBUG_KVM
  52 #define DPRINTF(fmt, ...) \
  53     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  54 #else
  55 #define DPRINTF(fmt, ...) \
  56     do { } while (0)
  57 #endif
  58
  59 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  60
  61 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  62     KVM_CAP_LAST_INFO
  63 };
  64
  65 static int cap_interrupt_unset = false;
  66 static int cap_interrupt_level = false;
  67 static int cap_segstate;
  68 static int cap_booke_sregs;
  69 static int cap_ppc_smt;
  70 static int cap_ppc_rma;
  71 static int cap_spapr_tce;
  72 static int cap_spapr_multitce;
  73 static int cap_spapr_vfio;
  74 static int cap_hior;
  75 static int cap_one_reg;
  76 static int cap_epr;
  77 static int cap_ppc_watchdog;
  78 static int cap_papr;
  79 static int cap_htab_fd;
  80 static int cap_fixup_hcalls;
  81
  82 static uint32_t debug_inst_opcode;
  83
  84 /* XXX We have a race condition where we actually have a level triggered
  85  *     interrupt, but the infrastructure can't expose that yet, so the guest
  86  *     takes but ignores it, goes to sleep and never gets notified that there's
  87  *     still an interrupt pending.
  88  *
  89  *     As a quick workaround, let's just wake up again 20 ms after we injected
  90  *     an interrupt. That way we can assure that we're always reinjecting
  91  *     interrupts in case the guest swallowed them.
  92  */
  93 static QEMUTimer *idle_timer;
  94
  95 static void kvm_kick_cpu(void *opaque)
  96 {
  97     PowerPCCPU *cpu = opaque;
  98
  99     qemu_cpu_kick(CPU(cpu));
 100 }
 101
 102 static int kvm_ppc_register_host_cpu_type(void);
 103
 104 int kvm_arch_init(MachineState *ms, KVMState *s)
 105 {
 106     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 107     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 108     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 109     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 110     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 111     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 112     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 113     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 114     cap_spapr_vfio = false;
 115     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 116     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 117     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 118     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 119     /* Note: we don't set cap_papr here, because this capability is
 120      * only activated after this by kvmppc_set_papr() */
 121     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 122     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 123
 124     if (!cap_interrupt_level) {
 125         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 126                         "VM to stall at times!\n");
 127     }
 128
 129     kvm_ppc_register_host_cpu_type();
 130
 131     return 0;
 132 }
 133
 134 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 135 {
 136     CPUPPCState *cenv = &cpu->env;
 137     CPUState *cs = CPU(cpu);
 138     struct kvm_sregs sregs;
 139     int ret;
 140
 141     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 142         /* What we're really trying to say is "if we're on BookE, we use
 143            the native PVR for now". This is the only sane way to check
 144            it though, so we potentially confuse users that they can run
 145            BookE guests on BookS. Let's hope nobody dares enough :) */
 146         return 0;
 147     } else {
 148         if (!cap_segstate) {
 149             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 150             return -ENOSYS;
 151         }
 152     }
 153
 154     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 155     if (ret) {
 156         return ret;
 157     }
 158
 159     sregs.pvr = cenv->spr[SPR_PVR];
 160     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 161 }
 162
 163 /* Set up a shared TLB array with KVM */
 164 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 165 {
 166     CPUPPCState *env = &cpu->env;
 167     CPUState *cs = CPU(cpu);
 168     struct kvm_book3e_206_tlb_params params = {};
 169     struct kvm_config_tlb cfg = {};
 170     unsigned int entries = 0;
 171     int ret, i;
 172
 173     if (!kvm_enabled() ||
 174         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 175         return 0;
 176     }
 177
 178     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 179
 180     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 181         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 182         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 183         entries += params.tlb_sizes[i];
 184     }
 185
 186     assert(entries == env->nb_tlb);
 187     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 188
 189     env->tlb_dirty = true;
 190
 191     cfg.array = (uintptr_t)env->tlb.tlbm;
 192     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 193     cfg.params = (uintptr_t)&params;
 194     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 195
 196     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 197     if (ret < 0) {
 198         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 199                 __func__, strerror(-ret));
 200         return ret;
 201     }
 202
 203     env->kvm_sw_tlb = true;
 204     return 0;
 205 }
 206
 207
 208 #if defined(TARGET_PPC64)
 209 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 210                                        struct kvm_ppc_smmu_info *info)
 211 {
 212     CPUPPCState *env = &cpu->env;
 213     CPUState *cs = CPU(cpu);
 214
 215     memset(info, 0, sizeof(*info));
 216
 217     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 218      * need to "guess" what the supported page sizes are.
 219      *
 220      * For that to work we make a few assumptions:
 221      *
 222      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 223      *   KVM which only supports 4K and 16M pages, but supports them
 224      *   regardless of the backing store characteritics. We also don't
 225      *   support 1T segments.
 226      *
 227      *   This is safe as if HV KVM ever supports that capability or PR
 228      *   KVM grows supports for more page/segment sizes, those versions
 229      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 230      *   will not hit this fallback
 231      *
 232      * - Else we are running HV KVM. This means we only support page
 233      *   sizes that fit in the backing store. Additionally we only
 234      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 235      *   P7 encodings for the SLB and hash table. Here too, we assume
 236      *   support for any newer processor will mean a kernel that
 237      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 238      *   this fallback.
 239      */
 240     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 241         /* No flags */
 242         info->flags = 0;
 243         info->slb_size = 64;
 244
 245         /* Standard 4k base page size segment */
 246         info->sps[0].page_shift = 12;
 247         info->sps[0].slb_enc = 0;
 248         info->sps[0].enc[0].page_shift = 12;
 249         info->sps[0].enc[0].pte_enc = 0;
 250
 251         /* Standard 16M large page size segment */
 252         info->sps[1].page_shift = 24;
 253         info->sps[1].slb_enc = SLB_VSID_L;
 254         info->sps[1].enc[0].page_shift = 24;
 255         info->sps[1].enc[0].pte_enc = 0;
 256     } else {
 257         int i = 0;
 258
 259         /* HV KVM has backing store size restrictions */
 260         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 261
 262         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 263             info->flags |= KVM_PPC_1T_SEGMENTS;
 264         }
 265
 266         if (env->mmu_model == POWERPC_MMU_2_06 ||
 267             env->mmu_model == POWERPC_MMU_2_07) {
 268             info->slb_size = 32;
 269         } else {
 270             info->slb_size = 64;
 271         }
 272
 273         /* Standard 4k base page size segment */
 274         info->sps[i].page_shift = 12;
 275         info->sps[i].slb_enc = 0;
 276         info->sps[i].enc[0].page_shift = 12;
 277         info->sps[i].enc[0].pte_enc = 0;
 278         i++;
 279
 280         /* 64K on MMU 2.06 and later */
 281         if (env->mmu_model == POWERPC_MMU_2_06 ||
 282             env->mmu_model == POWERPC_MMU_2_07) {
 283             info->sps[i].page_shift = 16;
 284             info->sps[i].slb_enc = 0x110;
 285             info->sps[i].enc[0].page_shift = 16;
 286             info->sps[i].enc[0].pte_enc = 1;
 287             i++;
 288         }
 289
 290         /* Standard 16M large page size segment */
 291         info->sps[i].page_shift = 24;
 292         info->sps[i].slb_enc = SLB_VSID_L;
 293         info->sps[i].enc[0].page_shift = 24;
 294         info->sps[i].enc[0].pte_enc = 0;
 295     }
 296 }
 297
 298 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 299 {
 300     CPUState *cs = CPU(cpu);
 301     int ret;
 302
 303     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 304         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 305         if (ret == 0) {
 306             return;
 307         }
 308     }
 309
 310     kvm_get_fallback_smmu_info(cpu, info);
 311 }
 312
 313 static long gethugepagesize(const char *mem_path)
 314 {
 315     struct statfs fs;
 316     int ret;
 317
 318     do {
 319         ret = statfs(mem_path, &fs);
 320     } while (ret != 0 && errno == EINTR);
 321
 322     if (ret != 0) {
 323         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 324                 strerror(errno));
 325         exit(1);
 326     }
 327
 328 #define HUGETLBFS_MAGIC       0x958458f6
 329
 330     if (fs.f_type != HUGETLBFS_MAGIC) {
 331         /* Explicit mempath, but it's ordinary pages */
 332         return getpagesize();
 333     }
 334
 335     /* It's hugepage, return the huge page size */
 336     return fs.f_bsize;
 337 }
 338
 339 /*
 340  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 341  * may or may not name the same files / on the same filesystem now as
 342  * when we actually open and map them.  Iterate over the file
 343  * descriptors instead, and use qemu_fd_getpagesize().
 344  */
 345 static int find_max_supported_pagesize(Object *obj, void *opaque)
 346 {
 347     char *mem_path;
 348     long *hpsize_min = opaque;
 349
 350     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 351         mem_path = object_property_get_str(obj, "mem-path", NULL);
 352         if (mem_path) {
 353             long hpsize = gethugepagesize(mem_path);
 354             if (hpsize < *hpsize_min) {
 355                 *hpsize_min = hpsize;
 356             }
 357         } else {
 358             *hpsize_min = getpagesize();
 359         }
 360     }
 361
 362     return 0;
 363 }
 364
 365 static long getrampagesize(void)
 366 {
 367     long hpsize = LONG_MAX;
 368     Object *memdev_root;
 369
 370     if (mem_path) {
 371         return gethugepagesize(mem_path);
 372     }
 373
 374     /* it's possible we have memory-backend objects with
 375      * hugepage-backed RAM. these may get mapped into system
 376      * address space via -numa parameters or memory hotplug
 377      * hooks. we want to take these into account, but we
 378      * also want to make sure these supported hugepage
 379      * sizes are applicable across the entire range of memory
 380      * we may boot from, so we take the min across all
 381      * backends, and assume normal pages in cases where a
 382      * backend isn't backed by hugepages.
 383      */
 384     memdev_root = object_resolve_path("/objects", NULL);
 385     if (!memdev_root) {
 386         return getpagesize();
 387     }
 388
 389     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 390
 391     return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 392 }
 393
 394 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 395 {
 396     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 397         return true;
 398     }
 399
 400     return (1ul << shift) <= rampgsize;
 401 }
 402
 403 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 404 {
 405     static struct kvm_ppc_smmu_info smmu_info;
 406     static bool has_smmu_info;
 407     CPUPPCState *env = &cpu->env;
 408     long rampagesize;
 409     int iq, ik, jq, jk;
 410
 411     /* We only handle page sizes for 64-bit server guests for now */
 412     if (!(env->mmu_model & POWERPC_MMU_64)) {
 413         return;
 414     }
 415
 416     /* Collect MMU info from kernel if not already */
 417     if (!has_smmu_info) {
 418         kvm_get_smmu_info(cpu, &smmu_info);
 419         has_smmu_info = true;
 420     }
 421
 422     rampagesize = getrampagesize();
 423
 424     /* Convert to QEMU form */
 425     memset(&env->sps, 0, sizeof(env->sps));
 426
 427     /* If we have HV KVM, we need to forbid CI large pages if our
 428      * host page size is smaller than 64K.
 429      */
 430     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 431         env->ci_large_pages = getpagesize() >= 0x10000;
 432     }
 433
 434     /*
 435      * XXX This loop should be an entry wide AND of the capabilities that
 436      *     the selected CPU has with the capabilities that KVM supports.
 437      */
 438     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 439         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 440         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 441
 442         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 443                                  ksps->page_shift)) {
 444             continue;
 445         }
 446         qsps->page_shift = ksps->page_shift;
 447         qsps->slb_enc = ksps->slb_enc;
 448         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 449             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 450                                      ksps->enc[jk].page_shift)) {
 451                 continue;
 452             }
 453             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 454             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 455             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 456                 break;
 457             }
 458         }
 459         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 460             break;
 461         }
 462     }
 463     env->slb_nr = smmu_info.slb_size;
 464     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 465         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 466     }
 467 }
 468 #else /* defined (TARGET_PPC64) */
 469
 470 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 471 {
 472 }
 473
 474 #endif /* !defined (TARGET_PPC64) */
 475
 476 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 477 {
 478     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 479 }
 480
 481 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 482  * book3s supports only 1 watchpoint, so array size
 483  * of 4 is sufficient for now.
 484  */
 485 #define MAX_HW_BKPTS 4
 486
 487 static struct HWBreakpoint {
 488     target_ulong addr;
 489     int type;
 490 } hw_debug_points[MAX_HW_BKPTS];
 491
 492 static CPUWatchpoint hw_watchpoint;
 493
 494 /* Default there is no breakpoint and watchpoint supported */
 495 static int max_hw_breakpoint;
 496 static int max_hw_watchpoint;
 497 static int nb_hw_breakpoint;
 498 static int nb_hw_watchpoint;
 499
 500 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 501 {
 502     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 503         max_hw_breakpoint = 2;
 504         max_hw_watchpoint = 2;
 505     }
 506
 507     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 508         fprintf(stderr, "Error initializing h/w breakpoints\n");
 509         return;
 510     }
 511 }
 512
 513 int kvm_arch_init_vcpu(CPUState *cs)
 514 {
 515     PowerPCCPU *cpu = POWERPC_CPU(cs);
 516     CPUPPCState *cenv = &cpu->env;
 517     int ret;
 518
 519     /* Gather server mmu info from KVM and update the CPU state */
 520     kvm_fixup_page_sizes(cpu);
 521
 522     /* Synchronize sregs with kvm */
 523     ret = kvm_arch_sync_sregs(cpu);
 524     if (ret) {
 525         if (ret == -EINVAL) {
 526             error_report("Register sync failed... If you're using kvm-hv.ko,"
 527                          " only \"-cpu host\" is possible");
 528         }
 529         return ret;
 530     }
 531
 532     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 533
 534     /* Some targets support access to KVM's guest TLB. */
 535     switch (cenv->mmu_model) {
 536     case POWERPC_MMU_BOOKE206:
 537         ret = kvm_booke206_tlb_init(cpu);
 538         break;
 539     default:
 540         break;
 541     }
 542
 543     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 544     kvmppc_hw_debug_points_init(cenv);
 545
 546     return ret;
 547 }
 548
 549 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 550 {
 551     CPUPPCState *env = &cpu->env;
 552     CPUState *cs = CPU(cpu);
 553     struct kvm_dirty_tlb dirty_tlb;
 554     unsigned char *bitmap;
 555     int ret;
 556
 557     if (!env->kvm_sw_tlb) {
 558         return;
 559     }
 560
 561     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 562     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 563
 564     dirty_tlb.bitmap = (uintptr_t)bitmap;
 565     dirty_tlb.num_dirty = env->nb_tlb;
 566
 567     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 568     if (ret) {
 569         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 570                 __func__, strerror(-ret));
 571     }
 572
 573     g_free(bitmap);
 574 }
 575
 576 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 577 {
 578     PowerPCCPU *cpu = POWERPC_CPU(cs);
 579     CPUPPCState *env = &cpu->env;
 580     union {
 581         uint32_t u32;
 582         uint64_t u64;
 583     } val;
 584     struct kvm_one_reg reg = {
 585         .id = id,
 586         .addr = (uintptr_t) &val,
 587     };
 588     int ret;
 589
 590     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 591     if (ret != 0) {
 592         trace_kvm_failed_spr_get(spr, strerror(errno));
 593     } else {
 594         switch (id & KVM_REG_SIZE_MASK) {
 595         case KVM_REG_SIZE_U32:
 596             env->spr[spr] = val.u32;
 597             break;
 598
 599         case KVM_REG_SIZE_U64:
 600             env->spr[spr] = val.u64;
 601             break;
 602
 603         default:
 604             /* Don't handle this size yet */
 605             abort();
 606         }
 607     }
 608 }
 609
 610 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 611 {
 612     PowerPCCPU *cpu = POWERPC_CPU(cs);
 613     CPUPPCState *env = &cpu->env;
 614     union {
 615         uint32_t u32;
 616         uint64_t u64;
 617     } val;
 618     struct kvm_one_reg reg = {
 619         .id = id,
 620         .addr = (uintptr_t) &val,
 621     };
 622     int ret;
 623
 624     switch (id & KVM_REG_SIZE_MASK) {
 625     case KVM_REG_SIZE_U32:
 626         val.u32 = env->spr[spr];
 627         break;
 628
 629     case KVM_REG_SIZE_U64:
 630         val.u64 = env->spr[spr];
 631         break;
 632
 633     default:
 634         /* Don't handle this size yet */
 635         abort();
 636     }
 637
 638     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 639     if (ret != 0) {
 640         trace_kvm_failed_spr_set(spr, strerror(errno));
 641     }
 642 }
 643
 644 static int kvm_put_fp(CPUState *cs)
 645 {
 646     PowerPCCPU *cpu = POWERPC_CPU(cs);
 647     CPUPPCState *env = &cpu->env;
 648     struct kvm_one_reg reg;
 649     int i;
 650     int ret;
 651
 652     if (env->insns_flags & PPC_FLOAT) {
 653         uint64_t fpscr = env->fpscr;
 654         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 655
 656         reg.id = KVM_REG_PPC_FPSCR;
 657         reg.addr = (uintptr_t)&fpscr;
 658         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 659         if (ret < 0) {
 660             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 661             return ret;
 662         }
 663
 664         for (i = 0; i < 32; i++) {
 665             uint64_t vsr[2];
 666
 667 #ifdef HOST_WORDS_BIGENDIAN
 668             vsr[0] = float64_val(env->fpr[i]);
 669             vsr[1] = env->vsr[i];
 670 #else
 671             vsr[0] = env->vsr[i];
 672             vsr[1] = float64_val(env->fpr[i]);
 673 #endif
 674             reg.addr = (uintptr_t) &vsr;
 675             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 676
 677             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 678             if (ret < 0) {
 679                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 680                         i, strerror(errno));
 681                 return ret;
 682             }
 683         }
 684     }
 685
 686     if (env->insns_flags & PPC_ALTIVEC) {
 687         reg.id = KVM_REG_PPC_VSCR;
 688         reg.addr = (uintptr_t)&env->vscr;
 689         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 690         if (ret < 0) {
 691             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 692             return ret;
 693         }
 694
 695         for (i = 0; i < 32; i++) {
 696             reg.id = KVM_REG_PPC_VR(i);
 697             reg.addr = (uintptr_t)&env->avr[i];
 698             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 699             if (ret < 0) {
 700                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 701                 return ret;
 702             }
 703         }
 704     }
 705
 706     return 0;
 707 }
 708
 709 static int kvm_get_fp(CPUState *cs)
 710 {
 711     PowerPCCPU *cpu = POWERPC_CPU(cs);
 712     CPUPPCState *env = &cpu->env;
 713     struct kvm_one_reg reg;
 714     int i;
 715     int ret;
 716
 717     if (env->insns_flags & PPC_FLOAT) {
 718         uint64_t fpscr;
 719         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 720
 721         reg.id = KVM_REG_PPC_FPSCR;
 722         reg.addr = (uintptr_t)&fpscr;
 723         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 724         if (ret < 0) {
 725             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 726             return ret;
 727         } else {
 728             env->fpscr = fpscr;
 729         }
 730
 731         for (i = 0; i < 32; i++) {
 732             uint64_t vsr[2];
 733
 734             reg.addr = (uintptr_t) &vsr;
 735             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 736
 737             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 738             if (ret < 0) {
 739                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 740                         vsx ? "VSR" : "FPR", i, strerror(errno));
 741                 return ret;
 742             } else {
 743 #ifdef HOST_WORDS_BIGENDIAN
 744                 env->fpr[i] = vsr[0];
 745                 if (vsx) {
 746                     env->vsr[i] = vsr[1];
 747                 }
 748 #else
 749                 env->fpr[i] = vsr[1];
 750                 if (vsx) {
 751                     env->vsr[i] = vsr[0];
 752                 }
 753 #endif
 754             }
 755         }
 756     }
 757
 758     if (env->insns_flags & PPC_ALTIVEC) {
 759         reg.id = KVM_REG_PPC_VSCR;
 760         reg.addr = (uintptr_t)&env->vscr;
 761         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 762         if (ret < 0) {
 763             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 764             return ret;
 765         }
 766
 767         for (i = 0; i < 32; i++) {
 768             reg.id = KVM_REG_PPC_VR(i);
 769             reg.addr = (uintptr_t)&env->avr[i];
 770             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 771             if (ret < 0) {
 772                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 773                         i, strerror(errno));
 774                 return ret;
 775             }
 776         }
 777     }
 778
 779     return 0;
 780 }
 781
 782 #if defined(TARGET_PPC64)
 783 static int kvm_get_vpa(CPUState *cs)
 784 {
 785     PowerPCCPU *cpu = POWERPC_CPU(cs);
 786     CPUPPCState *env = &cpu->env;
 787     struct kvm_one_reg reg;
 788     int ret;
 789
 790     reg.id = KVM_REG_PPC_VPA_ADDR;
 791     reg.addr = (uintptr_t)&env->vpa_addr;
 792     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 793     if (ret < 0) {
 794         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 795         return ret;
 796     }
 797
 798     assert((uintptr_t)&env->slb_shadow_size
 799            == ((uintptr_t)&env->slb_shadow_addr + 8));
 800     reg.id = KVM_REG_PPC_VPA_SLB;
 801     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 802     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 803     if (ret < 0) {
 804         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 805                 strerror(errno));
 806         return ret;
 807     }
 808
 809     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 810     reg.id = KVM_REG_PPC_VPA_DTL;
 811     reg.addr = (uintptr_t)&env->dtl_addr;
 812     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 813     if (ret < 0) {
 814         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 815                 strerror(errno));
 816         return ret;
 817     }
 818
 819     return 0;
 820 }
 821
 822 static int kvm_put_vpa(CPUState *cs)
 823 {
 824     PowerPCCPU *cpu = POWERPC_CPU(cs);
 825     CPUPPCState *env = &cpu->env;
 826     struct kvm_one_reg reg;
 827     int ret;
 828
 829     /* SLB shadow or DTL can't be registered unless a master VPA is
 830      * registered.  That means when restoring state, if a VPA *is*
 831      * registered, we need to set that up first.  If not, we need to
 832      * deregister the others before deregistering the master VPA */
 833     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 834
 835     if (env->vpa_addr) {
 836         reg.id = KVM_REG_PPC_VPA_ADDR;
 837         reg.addr = (uintptr_t)&env->vpa_addr;
 838         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 839         if (ret < 0) {
 840             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 841             return ret;
 842         }
 843     }
 844
 845     assert((uintptr_t)&env->slb_shadow_size
 846            == ((uintptr_t)&env->slb_shadow_addr + 8));
 847     reg.id = KVM_REG_PPC_VPA_SLB;
 848     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 849     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 850     if (ret < 0) {
 851         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 852         return ret;
 853     }
 854
 855     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 856     reg.id = KVM_REG_PPC_VPA_DTL;
 857     reg.addr = (uintptr_t)&env->dtl_addr;
 858     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 859     if (ret < 0) {
 860         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 861                 strerror(errno));
 862         return ret;
 863     }
 864
 865     if (!env->vpa_addr) {
 866         reg.id = KVM_REG_PPC_VPA_ADDR;
 867         reg.addr = (uintptr_t)&env->vpa_addr;
 868         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 869         if (ret < 0) {
 870             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 871             return ret;
 872         }
 873     }
 874
 875     return 0;
 876 }
 877 #endif /* TARGET_PPC64 */
 878
 879 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 880 {
 881     CPUPPCState *env = &cpu->env;
 882     struct kvm_sregs sregs;
 883     int i;
 884
 885     sregs.pvr = env->spr[SPR_PVR];
 886
 887     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 888
 889     /* Sync SLB */
 890 #ifdef TARGET_PPC64
 891     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 892         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 893         if (env->slb[i].esid & SLB_ESID_V) {
 894             sregs.u.s.ppc64.slb[i].slbe |= i;
 895         }
 896         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 897     }
 898 #endif
 899
 900     /* Sync SRs */
 901     for (i = 0; i < 16; i++) {
 902         sregs.u.s.ppc32.sr[i] = env->sr[i];
 903     }
 904
 905     /* Sync BATs */
 906     for (i = 0; i < 8; i++) {
 907         /* Beware. We have to swap upper and lower bits here */
 908         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 909             | env->DBAT[1][i];
 910         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 911             | env->IBAT[1][i];
 912     }
 913
 914     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 915 }
 916
 917 int kvm_arch_put_registers(CPUState *cs, int level)
 918 {
 919     PowerPCCPU *cpu = POWERPC_CPU(cs);
 920     CPUPPCState *env = &cpu->env;
 921     struct kvm_regs regs;
 922     int ret;
 923     int i;
 924
 925     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 926     if (ret < 0) {
 927         return ret;
 928     }
 929
 930     regs.ctr = env->ctr;
 931     regs.lr  = env->lr;
 932     regs.xer = cpu_read_xer(env);
 933     regs.msr = env->msr;
 934     regs.pc = env->nip;
 935
 936     regs.srr0 = env->spr[SPR_SRR0];
 937     regs.srr1 = env->spr[SPR_SRR1];
 938
 939     regs.sprg0 = env->spr[SPR_SPRG0];
 940     regs.sprg1 = env->spr[SPR_SPRG1];
 941     regs.sprg2 = env->spr[SPR_SPRG2];
 942     regs.sprg3 = env->spr[SPR_SPRG3];
 943     regs.sprg4 = env->spr[SPR_SPRG4];
 944     regs.sprg5 = env->spr[SPR_SPRG5];
 945     regs.sprg6 = env->spr[SPR_SPRG6];
 946     regs.sprg7 = env->spr[SPR_SPRG7];
 947
 948     regs.pid = env->spr[SPR_BOOKE_PID];
 949
 950     for (i = 0;i < 32; i++)
 951         regs.gpr[i] = env->gpr[i];
 952
 953     regs.cr = 0;
 954     for (i = 0; i < 8; i++) {
 955         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 956     }
 957
 958     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 959     if (ret < 0)
 960         return ret;
 961
 962     kvm_put_fp(cs);
 963
 964     if (env->tlb_dirty) {
 965         kvm_sw_tlb_put(cpu);
 966         env->tlb_dirty = false;
 967     }
 968
 969     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 970         ret = kvmppc_put_books_sregs(cpu);
 971         if (ret < 0) {
 972             return ret;
 973         }
 974     }
 975
 976     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 977         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 978     }
 979
 980     if (cap_one_reg) {
 981         int i;
 982
 983         /* We deliberately ignore errors here, for kernels which have
 984          * the ONE_REG calls, but don't support the specific
 985          * registers, there's a reasonable chance things will still
 986          * work, at least until we try to migrate. */
 987         for (i = 0; i < 1024; i++) {
 988             uint64_t id = env->spr_cb[i].one_reg_id;
 989
 990             if (id != 0) {
 991                 kvm_put_one_spr(cs, id, i);
 992             }
 993         }
 994
 995 #ifdef TARGET_PPC64
 996         if (msr_ts) {
 997             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 998                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 999             }
1000             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1001                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1002             }
1003             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1004             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1005             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1006             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1007             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1008             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1009             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1010             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1011             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1012             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1013         }
1014
1015         if (cap_papr) {
1016             if (kvm_put_vpa(cs) < 0) {
1017                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1018             }
1019         }
1020
1021         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1022 #endif /* TARGET_PPC64 */
1023     }
1024
1025     return ret;
1026 }
1027
1028 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1029 {
1030      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1031 }
1032
1033 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1034 {
1035     CPUPPCState *env = &cpu->env;
1036     struct kvm_sregs sregs;
1037     int ret;
1038
1039     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1040     if (ret < 0) {
1041         return ret;
1042     }
1043
1044     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1045         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1046         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1047         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1048         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1049         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1050         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1051         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1052         env->spr[SPR_DECR] = sregs.u.e.dec;
1053         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1054         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1055         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1056     }
1057
1058     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1059         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1060         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1061         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1062         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1063         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1064     }
1065
1066     if (sregs.u.e.features & KVM_SREGS_E_64) {
1067         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1068     }
1069
1070     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1071         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1072     }
1073
1074     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1075         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1076         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1077         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1078         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1079         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1080         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1081         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1082         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1083         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1084         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1085         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1086         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1087         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1088         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1089         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1090         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1091         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1092         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1093         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1094         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1095         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1096         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1097         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1098         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1099         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1100         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1101         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1102         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1103         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1104         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1105         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1106         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1107
1108         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1109             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1110             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1111             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1112             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1113             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1114             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1115         }
1116
1117         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1118             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1119             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1120         }
1121
1122         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1123             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1124             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1125             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1126             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1127         }
1128     }
1129
1130     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1131         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1132         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1133         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1134         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1135         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1136         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1137         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1138         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1139         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1140         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1141     }
1142
1143     if (sregs.u.e.features & KVM_SREGS_EXP) {
1144         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1145     }
1146
1147     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1148         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1149         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1150     }
1151
1152     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1153         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1154         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1155         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1156
1157         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1158             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1159             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1160         }
1161     }
1162
1163     return 0;
1164 }
1165
1166 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1167 {
1168     CPUPPCState *env = &cpu->env;
1169     struct kvm_sregs sregs;
1170     int ret;
1171     int i;
1172
1173     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1174     if (ret < 0) {
1175         return ret;
1176     }
1177
1178     if (!env->external_htab) {
1179         ppc_store_sdr1(env, sregs.u.s.sdr1);
1180     }
1181
1182     /* Sync SLB */
1183 #ifdef TARGET_PPC64
1184     /*
1185      * The packed SLB array we get from KVM_GET_SREGS only contains
1186      * information about valid entries. So we flush our internal copy
1187      * to get rid of stale ones, then put all valid SLB entries back
1188      * in.
1189      */
1190     memset(env->slb, 0, sizeof(env->slb));
1191     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1192         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1193         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1194         /*
1195          * Only restore valid entries
1196          */
1197         if (rb & SLB_ESID_V) {
1198             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1199         }
1200     }
1201 #endif
1202
1203     /* Sync SRs */
1204     for (i = 0; i < 16; i++) {
1205         env->sr[i] = sregs.u.s.ppc32.sr[i];
1206     }
1207
1208     /* Sync BATs */
1209     for (i = 0; i < 8; i++) {
1210         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1211         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1212         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1213         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1214     }
1215
1216     return 0;
1217 }
1218
1219 int kvm_arch_get_registers(CPUState *cs)
1220 {
1221     PowerPCCPU *cpu = POWERPC_CPU(cs);
1222     CPUPPCState *env = &cpu->env;
1223     struct kvm_regs regs;
1224     uint32_t cr;
1225     int i, ret;
1226
1227     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1228     if (ret < 0)
1229         return ret;
1230
1231     cr = regs.cr;
1232     for (i = 7; i >= 0; i--) {
1233         env->crf[i] = cr & 15;
1234         cr >>= 4;
1235     }
1236
1237     env->ctr = regs.ctr;
1238     env->lr = regs.lr;
1239     cpu_write_xer(env, regs.xer);
1240     env->msr = regs.msr;
1241     env->nip = regs.pc;
1242
1243     env->spr[SPR_SRR0] = regs.srr0;
1244     env->spr[SPR_SRR1] = regs.srr1;
1245
1246     env->spr[SPR_SPRG0] = regs.sprg0;
1247     env->spr[SPR_SPRG1] = regs.sprg1;
1248     env->spr[SPR_SPRG2] = regs.sprg2;
1249     env->spr[SPR_SPRG3] = regs.sprg3;
1250     env->spr[SPR_SPRG4] = regs.sprg4;
1251     env->spr[SPR_SPRG5] = regs.sprg5;
1252     env->spr[SPR_SPRG6] = regs.sprg6;
1253     env->spr[SPR_SPRG7] = regs.sprg7;
1254
1255     env->spr[SPR_BOOKE_PID] = regs.pid;
1256
1257     for (i = 0;i < 32; i++)
1258         env->gpr[i] = regs.gpr[i];
1259
1260     kvm_get_fp(cs);
1261
1262     if (cap_booke_sregs) {
1263         ret = kvmppc_get_booke_sregs(cpu);
1264         if (ret < 0) {
1265             return ret;
1266         }
1267     }
1268
1269     if (cap_segstate) {
1270         ret = kvmppc_get_books_sregs(cpu);
1271         if (ret < 0) {
1272             return ret;
1273         }
1274     }
1275
1276     if (cap_hior) {
1277         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1278     }
1279
1280     if (cap_one_reg) {
1281         int i;
1282
1283         /* We deliberately ignore errors here, for kernels which have
1284          * the ONE_REG calls, but don't support the specific
1285          * registers, there's a reasonable chance things will still
1286          * work, at least until we try to migrate. */
1287         for (i = 0; i < 1024; i++) {
1288             uint64_t id = env->spr_cb[i].one_reg_id;
1289
1290             if (id != 0) {
1291                 kvm_get_one_spr(cs, id, i);
1292             }
1293         }
1294
1295 #ifdef TARGET_PPC64
1296         if (msr_ts) {
1297             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1298                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1299             }
1300             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1301                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1302             }
1303             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1304             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1305             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1306             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1307             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1308             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1309             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1310             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1311             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1312             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1313         }
1314
1315         if (cap_papr) {
1316             if (kvm_get_vpa(cs) < 0) {
1317                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1318             }
1319         }
1320
1321         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1322 #endif
1323     }
1324
1325     return 0;
1326 }
1327
1328 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1329 {
1330     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1331
1332     if (irq != PPC_INTERRUPT_EXT) {
1333         return 0;
1334     }
1335
1336     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1337         return 0;
1338     }
1339
1340     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1341
1342     return 0;
1343 }
1344
1345 #if defined(TARGET_PPCEMB)
1346 #define PPC_INPUT_INT PPC40x_INPUT_INT
1347 #elif defined(TARGET_PPC64)
1348 #define PPC_INPUT_INT PPC970_INPUT_INT
1349 #else
1350 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1351 #endif
1352
1353 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1354 {
1355     PowerPCCPU *cpu = POWERPC_CPU(cs);
1356     CPUPPCState *env = &cpu->env;
1357     int r;
1358     unsigned irq;
1359
1360     qemu_mutex_lock_iothread();
1361
1362     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1363      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1364     if (!cap_interrupt_level &&
1365         run->ready_for_interrupt_injection &&
1366         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1367         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1368     {
1369         /* For now KVM disregards the 'irq' argument. However, in the
1370          * future KVM could cache it in-kernel to avoid a heavyweight exit
1371          * when reading the UIC.
1372          */
1373         irq = KVM_INTERRUPT_SET;
1374
1375         DPRINTF("injected interrupt %d\n", irq);
1376         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1377         if (r < 0) {
1378             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1379         }
1380
1381         /* Always wake up soon in case the interrupt was level based */
1382         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1383                        (NANOSECONDS_PER_SECOND / 50));
1384     }
1385
1386     /* We don't know if there are more interrupts pending after this. However,
1387      * the guest will return to userspace in the course of handling this one
1388      * anyways, so we will get a chance to deliver the rest. */
1389
1390     qemu_mutex_unlock_iothread();
1391 }
1392
1393 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1394 {
1395     return MEMTXATTRS_UNSPECIFIED;
1396 }
1397
1398 int kvm_arch_process_async_events(CPUState *cs)
1399 {
1400     return cs->halted;
1401 }
1402
1403 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1404 {
1405     CPUState *cs = CPU(cpu);
1406     CPUPPCState *env = &cpu->env;
1407
1408     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1409         cs->halted = 1;
1410         cs->exception_index = EXCP_HLT;
1411     }
1412
1413     return 0;
1414 }
1415
1416 /* map dcr access to existing qemu dcr emulation */
1417 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1418 {
1419     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1420         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1421
1422     return 0;
1423 }
1424
1425 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1426 {
1427     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1428         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1429
1430     return 0;
1431 }
1432
1433 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1434 {
1435     /* Mixed endian case is not handled */
1436     uint32_t sc = debug_inst_opcode;
1437
1438     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1439                             sizeof(sc), 0) ||
1440         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1441         return -EINVAL;
1442     }
1443
1444     return 0;
1445 }
1446
1447 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1448 {
1449     uint32_t sc;
1450
1451     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1452         sc != debug_inst_opcode ||
1453         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1454                             sizeof(sc), 1)) {
1455         return -EINVAL;
1456     }
1457
1458     return 0;
1459 }
1460
1461 static int find_hw_breakpoint(target_ulong addr, int type)
1462 {
1463     int n;
1464
1465     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1466            <= ARRAY_SIZE(hw_debug_points));
1467
1468     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1469         if (hw_debug_points[n].addr == addr &&
1470              hw_debug_points[n].type == type) {
1471             return n;
1472         }
1473     }
1474
1475     return -1;
1476 }
1477
1478 static int find_hw_watchpoint(target_ulong addr, int *flag)
1479 {
1480     int n;
1481
1482     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1483     if (n >= 0) {
1484         *flag = BP_MEM_ACCESS;
1485         return n;
1486     }
1487
1488     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1489     if (n >= 0) {
1490         *flag = BP_MEM_WRITE;
1491         return n;
1492     }
1493
1494     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1495     if (n >= 0) {
1496         *flag = BP_MEM_READ;
1497         return n;
1498     }
1499
1500     return -1;
1501 }
1502
1503 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1504                                   target_ulong len, int type)
1505 {
1506     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1507         return -ENOBUFS;
1508     }
1509
1510     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1511     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1512
1513     switch (type) {
1514     case GDB_BREAKPOINT_HW:
1515         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1516             return -ENOBUFS;
1517         }
1518
1519         if (find_hw_breakpoint(addr, type) >= 0) {
1520             return -EEXIST;
1521         }
1522
1523         nb_hw_breakpoint++;
1524         break;
1525
1526     case GDB_WATCHPOINT_WRITE:
1527     case GDB_WATCHPOINT_READ:
1528     case GDB_WATCHPOINT_ACCESS:
1529         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1530             return -ENOBUFS;
1531         }
1532
1533         if (find_hw_breakpoint(addr, type) >= 0) {
1534             return -EEXIST;
1535         }
1536
1537         nb_hw_watchpoint++;
1538         break;
1539
1540     default:
1541         return -ENOSYS;
1542     }
1543
1544     return 0;
1545 }
1546
1547 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1548                                   target_ulong len, int type)
1549 {
1550     int n;
1551
1552     n = find_hw_breakpoint(addr, type);
1553     if (n < 0) {
1554         return -ENOENT;
1555     }
1556
1557     switch (type) {
1558     case GDB_BREAKPOINT_HW:
1559         nb_hw_breakpoint--;
1560         break;
1561
1562     case GDB_WATCHPOINT_WRITE:
1563     case GDB_WATCHPOINT_READ:
1564     case GDB_WATCHPOINT_ACCESS:
1565         nb_hw_watchpoint--;
1566         break;
1567
1568     default:
1569         return -ENOSYS;
1570     }
1571     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1572
1573     return 0;
1574 }
1575
1576 void kvm_arch_remove_all_hw_breakpoints(void)
1577 {
1578     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1579 }
1580
1581 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1582 {
1583     int n;
1584
1585     /* Software Breakpoint updates */
1586     if (kvm_sw_breakpoints_active(cs)) {
1587         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1588     }
1589
1590     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1591            <= ARRAY_SIZE(hw_debug_points));
1592     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1593
1594     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1595         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1596         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1597         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1598             switch (hw_debug_points[n].type) {
1599             case GDB_BREAKPOINT_HW:
1600                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1601                 break;
1602             case GDB_WATCHPOINT_WRITE:
1603                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1604                 break;
1605             case GDB_WATCHPOINT_READ:
1606                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1607                 break;
1608             case GDB_WATCHPOINT_ACCESS:
1609                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1610                                         KVMPPC_DEBUG_WATCH_READ;
1611                 break;
1612             default:
1613                 cpu_abort(cs, "Unsupported breakpoint type\n");
1614             }
1615             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1616         }
1617     }
1618 }
1619
1620 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1621 {
1622     CPUState *cs = CPU(cpu);
1623     CPUPPCState *env = &cpu->env;
1624     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1625     int handle = 0;
1626     int n;
1627     int flag = 0;
1628
1629     if (cs->singlestep_enabled) {
1630         handle = 1;
1631     } else if (arch_info->status) {
1632         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1633             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1634                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1635                 if (n >= 0) {
1636                     handle = 1;
1637                 }
1638             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1639                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1640                 n = find_hw_watchpoint(arch_info->address,  &flag);
1641                 if (n >= 0) {
1642                     handle = 1;
1643                     cs->watchpoint_hit = &hw_watchpoint;
1644                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1645                     hw_watchpoint.flags = flag;
1646                 }
1647             }
1648         }
1649     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1650         handle = 1;
1651     } else {
1652         /* QEMU is not able to handle debug exception, so inject
1653          * program exception to guest;
1654          * Yes program exception NOT debug exception !!
1655          * When QEMU is using debug resources then debug exception must
1656          * be always set. To achieve this we set MSR_DE and also set
1657          * MSRP_DEP so guest cannot change MSR_DE.
1658          * When emulating debug resource for guest we want guest
1659          * to control MSR_DE (enable/disable debug interrupt on need).
1660          * Supporting both configurations are NOT possible.
1661          * So the result is that we cannot share debug resources
1662          * between QEMU and Guest on BOOKE architecture.
1663          * In the current design QEMU gets the priority over guest,
1664          * this means that if QEMU is using debug resources then guest
1665          * cannot use them;
1666          * For software breakpoint QEMU uses a privileged instruction;
1667          * So there cannot be any reason that we are here for guest
1668          * set debug exception, only possibility is guest executed a
1669          * privileged / illegal instruction and that's why we are
1670          * injecting a program interrupt.
1671          */
1672
1673         cpu_synchronize_state(cs);
1674         /* env->nip is PC, so increment this by 4 to use
1675          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1676          */
1677         env->nip += 4;
1678         cs->exception_index = POWERPC_EXCP_PROGRAM;
1679         env->error_code = POWERPC_EXCP_INVAL;
1680         ppc_cpu_do_interrupt(cs);
1681     }
1682
1683     return handle;
1684 }
1685
1686 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1687 {
1688     PowerPCCPU *cpu = POWERPC_CPU(cs);
1689     CPUPPCState *env = &cpu->env;
1690     int ret;
1691
1692     qemu_mutex_lock_iothread();
1693
1694     switch (run->exit_reason) {
1695     case KVM_EXIT_DCR:
1696         if (run->dcr.is_write) {
1697             DPRINTF("handle dcr write\n");
1698             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1699         } else {
1700             DPRINTF("handle dcr read\n");
1701             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1702         }
1703         break;
1704     case KVM_EXIT_HLT:
1705         DPRINTF("handle halt\n");
1706         ret = kvmppc_handle_halt(cpu);
1707         break;
1708 #if defined(TARGET_PPC64)
1709     case KVM_EXIT_PAPR_HCALL:
1710         DPRINTF("handle PAPR hypercall\n");
1711         run->papr_hcall.ret = spapr_hypercall(cpu,
1712                                               run->papr_hcall.nr,
1713                                               run->papr_hcall.args);
1714         ret = 0;
1715         break;
1716 #endif
1717     case KVM_EXIT_EPR:
1718         DPRINTF("handle epr\n");
1719         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1720         ret = 0;
1721         break;
1722     case KVM_EXIT_WATCHDOG:
1723         DPRINTF("handle watchdog expiry\n");
1724         watchdog_perform_action();
1725         ret = 0;
1726         break;
1727
1728     case KVM_EXIT_DEBUG:
1729         DPRINTF("handle debug exception\n");
1730         if (kvm_handle_debug(cpu, run)) {
1731             ret = EXCP_DEBUG;
1732             break;
1733         }
1734         /* re-enter, this exception was guest-internal */
1735         ret = 0;
1736         break;
1737
1738     default:
1739         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1740         ret = -1;
1741         break;
1742     }
1743
1744     qemu_mutex_unlock_iothread();
1745     return ret;
1746 }
1747
1748 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1749 {
1750     CPUState *cs = CPU(cpu);
1751     uint32_t bits = tsr_bits;
1752     struct kvm_one_reg reg = {
1753         .id = KVM_REG_PPC_OR_TSR,
1754         .addr = (uintptr_t) &bits,
1755     };
1756
1757     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1758 }
1759
1760 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1761 {
1762
1763     CPUState *cs = CPU(cpu);
1764     uint32_t bits = tsr_bits;
1765     struct kvm_one_reg reg = {
1766         .id = KVM_REG_PPC_CLEAR_TSR,
1767         .addr = (uintptr_t) &bits,
1768     };
1769
1770     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1771 }
1772
1773 int kvmppc_set_tcr(PowerPCCPU *cpu)
1774 {
1775     CPUState *cs = CPU(cpu);
1776     CPUPPCState *env = &cpu->env;
1777     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1778
1779     struct kvm_one_reg reg = {
1780         .id = KVM_REG_PPC_TCR,
1781         .addr = (uintptr_t) &tcr,
1782     };
1783
1784     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1785 }
1786
1787 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1788 {
1789     CPUState *cs = CPU(cpu);
1790     int ret;
1791
1792     if (!kvm_enabled()) {
1793         return -1;
1794     }
1795
1796     if (!cap_ppc_watchdog) {
1797         printf("warning: KVM does not support watchdog");
1798         return -1;
1799     }
1800
1801     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1802     if (ret < 0) {
1803         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1804                 __func__, strerror(-ret));
1805         return ret;
1806     }
1807
1808     return ret;
1809 }
1810
1811 static int read_cpuinfo(const char *field, char *value, int len)
1812 {
1813     FILE *f;
1814     int ret = -1;
1815     int field_len = strlen(field);
1816     char line[512];
1817
1818     f = fopen("/proc/cpuinfo", "r");
1819     if (!f) {
1820         return -1;
1821     }
1822
1823     do {
1824         if (!fgets(line, sizeof(line), f)) {
1825             break;
1826         }
1827         if (!strncmp(line, field, field_len)) {
1828             pstrcpy(value, len, line);
1829             ret = 0;
1830             break;
1831         }
1832     } while(*line);
1833
1834     fclose(f);
1835
1836     return ret;
1837 }
1838
1839 uint32_t kvmppc_get_tbfreq(void)
1840 {
1841     char line[512];
1842     char *ns;
1843     uint32_t retval = NANOSECONDS_PER_SECOND;
1844
1845     if (read_cpuinfo("timebase", line, sizeof(line))) {
1846         return retval;
1847     }
1848
1849     if (!(ns = strchr(line, ':'))) {
1850         return retval;
1851     }
1852
1853     ns++;
1854
1855     return atoi(ns);
1856 }
1857
1858 bool kvmppc_get_host_serial(char **value)
1859 {
1860     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1861                                NULL);
1862 }
1863
1864 bool kvmppc_get_host_model(char **value)
1865 {
1866     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1867 }
1868
1869 /* Try to find a device tree node for a CPU with clock-frequency property */
1870 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1871 {
1872     struct dirent *dirp;
1873     DIR *dp;
1874
1875     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1876         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1877         return -1;
1878     }
1879
1880     buf[0] = '\0';
1881     while ((dirp = readdir(dp)) != NULL) {
1882         FILE *f;
1883         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1884                  dirp->d_name);
1885         f = fopen(buf, "r");
1886         if (f) {
1887             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1888             fclose(f);
1889             break;
1890         }
1891         buf[0] = '\0';
1892     }
1893     closedir(dp);
1894     if (buf[0] == '\0') {
1895         printf("Unknown host!\n");
1896         return -1;
1897     }
1898
1899     return 0;
1900 }
1901
1902 static uint64_t kvmppc_read_int_dt(const char *filename)
1903 {
1904     union {
1905         uint32_t v32;
1906         uint64_t v64;
1907     } u;
1908     FILE *f;
1909     int len;
1910
1911     f = fopen(filename, "rb");
1912     if (!f) {
1913         return -1;
1914     }
1915
1916     len = fread(&u, 1, sizeof(u), f);
1917     fclose(f);
1918     switch (len) {
1919     case 4:
1920         /* property is a 32-bit quantity */
1921         return be32_to_cpu(u.v32);
1922     case 8:
1923         return be64_to_cpu(u.v64);
1924     }
1925
1926     return 0;
1927 }
1928
1929 /* Read a CPU node property from the host device tree that's a single
1930  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1931  * (can't find or open the property, or doesn't understand the
1932  * format) */
1933 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1934 {
1935     char buf[PATH_MAX], *tmp;
1936     uint64_t val;
1937
1938     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1939         return -1;
1940     }
1941
1942     tmp = g_strdup_printf("%s/%s", buf, propname);
1943     val = kvmppc_read_int_dt(tmp);
1944     g_free(tmp);
1945
1946     return val;
1947 }
1948
1949 uint64_t kvmppc_get_clockfreq(void)
1950 {
1951     return kvmppc_read_int_cpu_dt("clock-frequency");
1952 }
1953
1954 uint32_t kvmppc_get_vmx(void)
1955 {
1956     return kvmppc_read_int_cpu_dt("ibm,vmx");
1957 }
1958
1959 uint32_t kvmppc_get_dfp(void)
1960 {
1961     return kvmppc_read_int_cpu_dt("ibm,dfp");
1962 }
1963
1964 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1965  {
1966      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1967      CPUState *cs = CPU(cpu);
1968
1969     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1970         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1971         return 0;
1972     }
1973
1974     return 1;
1975 }
1976
1977 int kvmppc_get_hasidle(CPUPPCState *env)
1978 {
1979     struct kvm_ppc_pvinfo pvinfo;
1980
1981     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1982         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1983         return 1;
1984     }
1985
1986     return 0;
1987 }
1988
1989 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1990 {
1991     uint32_t *hc = (uint32_t*)buf;
1992     struct kvm_ppc_pvinfo pvinfo;
1993
1994     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1995         memcpy(buf, pvinfo.hcall, buf_len);
1996         return 0;
1997     }
1998
1999     /*
2000      * Fallback to always fail hypercalls regardless of endianness:
2001      *
2002      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2003      *     li r3, -1
2004      *     b .+8       (becomes nop in wrong endian)
2005      *     bswap32(li r3, -1)
2006      */
2007
2008     hc[0] = cpu_to_be32(0x08000048);
2009     hc[1] = cpu_to_be32(0x3860ffff);
2010     hc[2] = cpu_to_be32(0x48000008);
2011     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2012
2013     return 1;
2014 }
2015
2016 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2017 {
2018     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2019 }
2020
2021 void kvmppc_enable_logical_ci_hcalls(void)
2022 {
2023     /*
2024      * FIXME: it would be nice if we could detect the cases where
2025      * we're using a device which requires the in kernel
2026      * implementation of these hcalls, but the kernel lacks them and
2027      * produce a warning.
2028      */
2029     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2030     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2031 }
2032
2033 void kvmppc_enable_set_mode_hcall(void)
2034 {
2035     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2036 }
2037
2038 void kvmppc_set_papr(PowerPCCPU *cpu)
2039 {
2040     CPUState *cs = CPU(cpu);
2041     int ret;
2042
2043     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2044     if (ret) {
2045         error_report("This vCPU type or KVM version does not support PAPR");
2046         exit(1);
2047     }
2048
2049     /* Update the capability flag so we sync the right information
2050      * with kvm */
2051     cap_papr = 1;
2052 }
2053
2054 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2055 {
2056     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2057 }
2058
2059 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2060 {
2061     CPUState *cs = CPU(cpu);
2062     int ret;
2063
2064     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2065     if (ret && mpic_proxy) {
2066         error_report("This KVM version does not support EPR");
2067         exit(1);
2068     }
2069 }
2070
2071 int kvmppc_smt_threads(void)
2072 {
2073     return cap_ppc_smt ? cap_ppc_smt : 1;
2074 }
2075
2076 #ifdef TARGET_PPC64
2077 off_t kvmppc_alloc_rma(void **rma)
2078 {
2079     off_t size;
2080     int fd;
2081     struct kvm_allocate_rma ret;
2082
2083     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2084      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2085      *                      not necessary on this hardware
2086      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2087      *
2088      * FIXME: We should allow the user to force contiguous RMA
2089      * allocation in the cap_ppc_rma==1 case.
2090      */
2091     if (cap_ppc_rma < 2) {
2092         return 0;
2093     }
2094
2095     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2096     if (fd < 0) {
2097         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2098                 strerror(errno));
2099         return -1;
2100     }
2101
2102     size = MIN(ret.rma_size, 256ul << 20);
2103
2104     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2105     if (*rma == MAP_FAILED) {
2106         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2107         return -1;
2108     };
2109
2110     return size;
2111 }
2112
2113 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2114 {
2115     struct kvm_ppc_smmu_info info;
2116     long rampagesize, best_page_shift;
2117     int i;
2118
2119     if (cap_ppc_rma >= 2) {
2120         return current_size;
2121     }
2122
2123     /* Find the largest hardware supported page size that's less than
2124      * or equal to the (logical) backing page size of guest RAM */
2125     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2126     rampagesize = getrampagesize();
2127     best_page_shift = 0;
2128
2129     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2130         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2131
2132         if (!sps->page_shift) {
2133             continue;
2134         }
2135
2136         if ((sps->page_shift > best_page_shift)
2137             && ((1UL << sps->page_shift) <= rampagesize)) {
2138             best_page_shift = sps->page_shift;
2139         }
2140     }
2141
2142     return MIN(current_size,
2143                1ULL << (best_page_shift + hash_shift - 7));
2144 }
2145 #endif
2146
2147 bool kvmppc_spapr_use_multitce(void)
2148 {
2149     return cap_spapr_multitce;
2150 }
2151
2152 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2153                               bool need_vfio)
2154 {
2155     struct kvm_create_spapr_tce args = {
2156         .liobn = liobn,
2157         .window_size = window_size,
2158     };
2159     long len;
2160     int fd;
2161     void *table;
2162
2163     /* Must set fd to -1 so we don't try to munmap when called for
2164      * destroying the table, which the upper layers -will- do
2165      */
2166     *pfd = -1;
2167     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2168         return NULL;
2169     }
2170
2171     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2172     if (fd < 0) {
2173         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2174                 liobn);
2175         return NULL;
2176     }
2177
2178     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2179     /* FIXME: round this up to page size */
2180
2181     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2182     if (table == MAP_FAILED) {
2183         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2184                 liobn);
2185         close(fd);
2186         return NULL;
2187     }
2188
2189     *pfd = fd;
2190     return table;
2191 }
2192
2193 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2194 {
2195     long len;
2196
2197     if (fd < 0) {
2198         return -1;
2199     }
2200
2201     len = nb_table * sizeof(uint64_t);
2202     if ((munmap(table, len) < 0) ||
2203         (close(fd) < 0)) {
2204         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2205                 strerror(errno));
2206         /* Leak the table */
2207     }
2208
2209     return 0;
2210 }
2211
2212 int kvmppc_reset_htab(int shift_hint)
2213 {
2214     uint32_t shift = shift_hint;
2215
2216     if (!kvm_enabled()) {
2217         /* Full emulation, tell caller to allocate htab itself */
2218         return 0;
2219     }
2220     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2221         int ret;
2222         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2223         if (ret == -ENOTTY) {
2224             /* At least some versions of PR KVM advertise the
2225              * capability, but don't implement the ioctl().  Oops.
2226              * Return 0 so that we allocate the htab in qemu, as is
2227              * correct for PR. */
2228             return 0;
2229         } else if (ret < 0) {
2230             return ret;
2231         }
2232         return shift;
2233     }
2234
2235     /* We have a kernel that predates the htab reset calls.  For PR
2236      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2237      * this era, it has allocated a 16MB fixed size hash table
2238      * already.  Kernels of this era have the GET_PVINFO capability
2239      * only on PR, so we use this hack to determine the right
2240      * answer */
2241     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2242         /* PR - tell caller to allocate htab */
2243         return 0;
2244     } else {
2245         /* HV - assume 16MB kernel allocated htab */
2246         return 24;
2247     }
2248 }
2249
2250 static inline uint32_t mfpvr(void)
2251 {
2252     uint32_t pvr;
2253
2254     asm ("mfpvr %0"
2255          : "=r"(pvr));
2256     return pvr;
2257 }
2258
2259 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2260 {
2261     if (on) {
2262         *word |= flags;
2263     } else {
2264         *word &= ~flags;
2265     }
2266 }
2267
2268 static void kvmppc_host_cpu_initfn(Object *obj)
2269 {
2270     assert(kvm_enabled());
2271 }
2272
2273 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2274 {
2275     DeviceClass *dc = DEVICE_CLASS(oc);
2276     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2277     uint32_t vmx = kvmppc_get_vmx();
2278     uint32_t dfp = kvmppc_get_dfp();
2279     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2280     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2281
2282     /* Now fix up the class with information we can query from the host */
2283     pcc->pvr = mfpvr();
2284
2285     if (vmx != -1) {
2286         /* Only override when we know what the host supports */
2287         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2288         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2289     }
2290     if (dfp != -1) {
2291         /* Only override when we know what the host supports */
2292         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2293     }
2294
2295     if (dcache_size != -1) {
2296         pcc->l1_dcache_size = dcache_size;
2297     }
2298
2299     if (icache_size != -1) {
2300         pcc->l1_icache_size = icache_size;
2301     }
2302
2303     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2304     dc->cannot_destroy_with_object_finalize_yet = true;
2305 }
2306
2307 bool kvmppc_has_cap_epr(void)
2308 {
2309     return cap_epr;
2310 }
2311
2312 bool kvmppc_has_cap_htab_fd(void)
2313 {
2314     return cap_htab_fd;
2315 }
2316
2317 bool kvmppc_has_cap_fixup_hcalls(void)
2318 {
2319     return cap_fixup_hcalls;
2320 }
2321
2322 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2323 {
2324     ObjectClass *oc = OBJECT_CLASS(pcc);
2325
2326     while (oc && !object_class_is_abstract(oc)) {
2327         oc = object_class_get_parent(oc);
2328     }
2329     assert(oc);
2330
2331     return POWERPC_CPU_CLASS(oc);
2332 }
2333
2334 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2335 {
2336     uint32_t host_pvr = mfpvr();
2337     PowerPCCPUClass *pvr_pcc;
2338
2339     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2340     if (pvr_pcc == NULL) {
2341         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2342     }
2343
2344     return pvr_pcc;
2345 }
2346
2347 #if defined(TARGET_PPC64)
2348 static void spapr_cpu_core_host_initfn(Object *obj)
2349 {
2350     sPAPRCPUCore *core = SPAPR_CPU_CORE(obj);
2351     char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, "host");
2352     ObjectClass *oc = object_class_by_name(name);
2353
2354     g_assert(oc);
2355     g_free((void *)name);
2356     core->cpu_class = oc;
2357 }
2358 #endif
2359
2360 static int kvm_ppc_register_host_cpu_type(void)
2361 {
2362     TypeInfo type_info = {
2363         .name = TYPE_HOST_POWERPC_CPU,
2364         .instance_init = kvmppc_host_cpu_initfn,
2365         .class_init = kvmppc_host_cpu_class_init,
2366     };
2367     PowerPCCPUClass *pvr_pcc;
2368     DeviceClass *dc;
2369
2370     pvr_pcc = kvm_ppc_get_host_cpu_class();
2371     if (pvr_pcc == NULL) {
2372         return -1;
2373     }
2374     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2375     type_register(&type_info);
2376
2377 #if defined(TARGET_PPC64)
2378     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2379     type_info.parent = TYPE_SPAPR_CPU_CORE,
2380     type_info.instance_size = sizeof(sPAPRCPUCore),
2381     type_info.instance_init = spapr_cpu_core_host_initfn,
2382     type_info.class_init = NULL;
2383     type_register(&type_info);
2384     g_free((void *)type_info.name);
2385     type_info.instance_size = 0;
2386     type_info.instance_init = NULL;
2387 #endif
2388
2389     /* Register generic family CPU class for a family */
2390     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2391     dc = DEVICE_CLASS(pvr_pcc);
2392     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2393     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2394     type_register(&type_info);
2395
2396     return 0;
2397 }
2398
2399 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2400 {
2401     struct kvm_rtas_token_args args = {
2402         .token = token,
2403     };
2404
2405     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2406         return -ENOENT;
2407     }
2408
2409     strncpy(args.name, function, sizeof(args.name));
2410
2411     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2412 }
2413
2414 int kvmppc_get_htab_fd(bool write)
2415 {
2416     struct kvm_get_htab_fd s = {
2417         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2418         .start_index = 0,
2419     };
2420
2421     if (!cap_htab_fd) {
2422         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2423         return -1;
2424     }
2425
2426     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2427 }
2428
2429 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2430 {
2431     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2432     uint8_t buf[bufsize];
2433     ssize_t rc;
2434
2435     do {
2436         rc = read(fd, buf, bufsize);
2437         if (rc < 0) {
2438             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2439                     strerror(errno));
2440             return rc;
2441         } else if (rc) {
2442             uint8_t *buffer = buf;
2443             ssize_t n = rc;
2444             while (n) {
2445                 struct kvm_get_htab_header *head =
2446                     (struct kvm_get_htab_header *) buffer;
2447                 size_t chunksize = sizeof(*head) +
2448                      HASH_PTE_SIZE_64 * head->n_valid;
2449
2450                 qemu_put_be32(f, head->index);
2451                 qemu_put_be16(f, head->n_valid);
2452                 qemu_put_be16(f, head->n_invalid);
2453                 qemu_put_buffer(f, (void *)(head + 1),
2454                                 HASH_PTE_SIZE_64 * head->n_valid);
2455
2456                 buffer += chunksize;
2457                 n -= chunksize;
2458             }
2459         }
2460     } while ((rc != 0)
2461              && ((max_ns < 0)
2462                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2463
2464     return (rc == 0) ? 1 : 0;
2465 }
2466
2467 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2468                            uint16_t n_valid, uint16_t n_invalid)
2469 {
2470     struct kvm_get_htab_header *buf;
2471     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2472     ssize_t rc;
2473
2474     buf = alloca(chunksize);
2475     buf->index = index;
2476     buf->n_valid = n_valid;
2477     buf->n_invalid = n_invalid;
2478
2479     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2480
2481     rc = write(fd, buf, chunksize);
2482     if (rc < 0) {
2483         fprintf(stderr, "Error writing KVM hash table: %s\n",
2484                 strerror(errno));
2485         return rc;
2486     }
2487     if (rc != chunksize) {
2488         /* We should never get a short write on a single chunk */
2489         fprintf(stderr, "Short write, restoring KVM hash table\n");
2490         return -1;
2491     }
2492     return 0;
2493 }
2494
2495 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2496 {
2497     return true;
2498 }
2499
2500 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2501 {
2502     return 1;
2503 }
2504
2505 int kvm_arch_on_sigbus(int code, void *addr)
2506 {
2507     return 1;
2508 }
2509
2510 void kvm_arch_init_irq_routing(KVMState *s)
2511 {
2512 }
2513
2514 struct kvm_get_htab_buf {
2515     struct kvm_get_htab_header header;
2516     /*
2517      * We require one extra byte for read
2518      */
2519     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2520 };
2521
2522 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2523 {
2524     int htab_fd;
2525     struct kvm_get_htab_fd ghf;
2526     struct kvm_get_htab_buf  *hpte_buf;
2527
2528     ghf.flags = 0;
2529     ghf.start_index = pte_index;
2530     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2531     if (htab_fd < 0) {
2532         goto error_out;
2533     }
2534
2535     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2536     /*
2537      * Read the hpte group
2538      */
2539     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2540         goto out_close;
2541     }
2542
2543     close(htab_fd);
2544     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2545
2546 out_close:
2547     g_free(hpte_buf);
2548     close(htab_fd);
2549 error_out:
2550     return 0;
2551 }
2552
2553 void kvmppc_hash64_free_pteg(uint64_t token)
2554 {
2555     struct kvm_get_htab_buf *htab_buf;
2556
2557     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2558                             hpte);
2559     g_free(htab_buf);
2560     return;
2561 }
2562
2563 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2564                              target_ulong pte0, target_ulong pte1)
2565 {
2566     int htab_fd;
2567     struct kvm_get_htab_fd ghf;
2568     struct kvm_get_htab_buf hpte_buf;
2569
2570     ghf.flags = 0;
2571     ghf.start_index = 0;     /* Ignored */
2572     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2573     if (htab_fd < 0) {
2574         goto error_out;
2575     }
2576
2577     hpte_buf.header.n_valid = 1;
2578     hpte_buf.header.n_invalid = 0;
2579     hpte_buf.header.index = pte_index;
2580     hpte_buf.hpte[0] = pte0;
2581     hpte_buf.hpte[1] = pte1;
2582     /*
2583      * Write the hpte entry.
2584      * CAUTION: write() has the warn_unused_result attribute. Hence we
2585      * need to check the return value, even though we do nothing.
2586      */
2587     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2588         goto out_close;
2589     }
2590
2591 out_close:
2592     close(htab_fd);
2593     return;
2594
2595 error_out:
2596     return;
2597 }
2598
2599 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2600                              uint64_t address, uint32_t data, PCIDevice *dev)
2601 {
2602     return 0;
2603 }
2604
2605 int kvm_arch_msi_data_to_gsi(uint32_t data)
2606 {
2607     return data & 0xffff;
2608 }
2609
2610 int kvmppc_enable_hwrng(void)
2611 {
2612     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2613         return -1;
2614     }
2615
2616     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2617 }