target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/error-report.h"
  27 #include "cpu.h"
  28 #include "qemu/timer.h"
  29 #include "sysemu/sysemu.h"
  30 #include "sysemu/kvm.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/ppc.h"
  40 #include "sysemu/watchdog.h"
  41 #include "trace.h"
  42 #include "exec/gdbstub.h"
  43 #include "exec/memattrs.h"
  44 #include "sysemu/hostmem.h"
  45 #include "qemu/cutils.h"
  46
  47 //#define DEBUG_KVM
  48
  49 #ifdef DEBUG_KVM
  50 #define DPRINTF(fmt, ...) \
  51     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  52 #else
  53 #define DPRINTF(fmt, ...) \
  54     do { } while (0)
  55 #endif
  56
  57 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  58
  59 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  60     KVM_CAP_LAST_INFO
  61 };
  62
  63 static int cap_interrupt_unset = false;
  64 static int cap_interrupt_level = false;
  65 static int cap_segstate;
  66 static int cap_booke_sregs;
  67 static int cap_ppc_smt;
  68 static int cap_ppc_rma;
  69 static int cap_spapr_tce;
  70 static int cap_spapr_multitce;
  71 static int cap_spapr_vfio;
  72 static int cap_hior;
  73 static int cap_one_reg;
  74 static int cap_epr;
  75 static int cap_ppc_watchdog;
  76 static int cap_papr;
  77 static int cap_htab_fd;
  78 static int cap_fixup_hcalls;
  79
  80 static uint32_t debug_inst_opcode;
  81
  82 /* XXX We have a race condition where we actually have a level triggered
  83  *     interrupt, but the infrastructure can't expose that yet, so the guest
  84  *     takes but ignores it, goes to sleep and never gets notified that there's
  85  *     still an interrupt pending.
  86  *
  87  *     As a quick workaround, let's just wake up again 20 ms after we injected
  88  *     an interrupt. That way we can assure that we're always reinjecting
  89  *     interrupts in case the guest swallowed them.
  90  */
  91 static QEMUTimer *idle_timer;
  92
  93 static void kvm_kick_cpu(void *opaque)
  94 {
  95     PowerPCCPU *cpu = opaque;
  96
  97     qemu_cpu_kick(CPU(cpu));
  98 }
  99
 100 static int kvm_ppc_register_host_cpu_type(void);
 101
 102 int kvm_arch_init(MachineState *ms, KVMState *s)
 103 {
 104     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 105     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 106     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 107     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 108     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 109     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 110     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 111     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 112     cap_spapr_vfio = false;
 113     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 114     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 115     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 116     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 117     /* Note: we don't set cap_papr here, because this capability is
 118      * only activated after this by kvmppc_set_papr() */
 119     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 120     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 121
 122     if (!cap_interrupt_level) {
 123         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 124                         "VM to stall at times!\n");
 125     }
 126
 127     kvm_ppc_register_host_cpu_type();
 128
 129     return 0;
 130 }
 131
 132 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 133 {
 134     CPUPPCState *cenv = &cpu->env;
 135     CPUState *cs = CPU(cpu);
 136     struct kvm_sregs sregs;
 137     int ret;
 138
 139     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 140         /* What we're really trying to say is "if we're on BookE, we use
 141            the native PVR for now". This is the only sane way to check
 142            it though, so we potentially confuse users that they can run
 143            BookE guests on BookS. Let's hope nobody dares enough :) */
 144         return 0;
 145     } else {
 146         if (!cap_segstate) {
 147             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 148             return -ENOSYS;
 149         }
 150     }
 151
 152     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 153     if (ret) {
 154         return ret;
 155     }
 156
 157     sregs.pvr = cenv->spr[SPR_PVR];
 158     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 159 }
 160
 161 /* Set up a shared TLB array with KVM */
 162 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 163 {
 164     CPUPPCState *env = &cpu->env;
 165     CPUState *cs = CPU(cpu);
 166     struct kvm_book3e_206_tlb_params params = {};
 167     struct kvm_config_tlb cfg = {};
 168     unsigned int entries = 0;
 169     int ret, i;
 170
 171     if (!kvm_enabled() ||
 172         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 173         return 0;
 174     }
 175
 176     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 177
 178     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 179         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 180         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 181         entries += params.tlb_sizes[i];
 182     }
 183
 184     assert(entries == env->nb_tlb);
 185     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 186
 187     env->tlb_dirty = true;
 188
 189     cfg.array = (uintptr_t)env->tlb.tlbm;
 190     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 191     cfg.params = (uintptr_t)&params;
 192     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 193
 194     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 195     if (ret < 0) {
 196         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 197                 __func__, strerror(-ret));
 198         return ret;
 199     }
 200
 201     env->kvm_sw_tlb = true;
 202     return 0;
 203 }
 204
 205
 206 #if defined(TARGET_PPC64)
 207 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 208                                        struct kvm_ppc_smmu_info *info)
 209 {
 210     CPUPPCState *env = &cpu->env;
 211     CPUState *cs = CPU(cpu);
 212
 213     memset(info, 0, sizeof(*info));
 214
 215     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 216      * need to "guess" what the supported page sizes are.
 217      *
 218      * For that to work we make a few assumptions:
 219      *
 220      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 221      *   KVM which only supports 4K and 16M pages, but supports them
 222      *   regardless of the backing store characteritics. We also don't
 223      *   support 1T segments.
 224      *
 225      *   This is safe as if HV KVM ever supports that capability or PR
 226      *   KVM grows supports for more page/segment sizes, those versions
 227      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 228      *   will not hit this fallback
 229      *
 230      * - Else we are running HV KVM. This means we only support page
 231      *   sizes that fit in the backing store. Additionally we only
 232      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 233      *   P7 encodings for the SLB and hash table. Here too, we assume
 234      *   support for any newer processor will mean a kernel that
 235      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 236      *   this fallback.
 237      */
 238     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 239         /* No flags */
 240         info->flags = 0;
 241         info->slb_size = 64;
 242
 243         /* Standard 4k base page size segment */
 244         info->sps[0].page_shift = 12;
 245         info->sps[0].slb_enc = 0;
 246         info->sps[0].enc[0].page_shift = 12;
 247         info->sps[0].enc[0].pte_enc = 0;
 248
 249         /* Standard 16M large page size segment */
 250         info->sps[1].page_shift = 24;
 251         info->sps[1].slb_enc = SLB_VSID_L;
 252         info->sps[1].enc[0].page_shift = 24;
 253         info->sps[1].enc[0].pte_enc = 0;
 254     } else {
 255         int i = 0;
 256
 257         /* HV KVM has backing store size restrictions */
 258         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 259
 260         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 261             info->flags |= KVM_PPC_1T_SEGMENTS;
 262         }
 263
 264         if (env->mmu_model == POWERPC_MMU_2_06 ||
 265             env->mmu_model == POWERPC_MMU_2_07) {
 266             info->slb_size = 32;
 267         } else {
 268             info->slb_size = 64;
 269         }
 270
 271         /* Standard 4k base page size segment */
 272         info->sps[i].page_shift = 12;
 273         info->sps[i].slb_enc = 0;
 274         info->sps[i].enc[0].page_shift = 12;
 275         info->sps[i].enc[0].pte_enc = 0;
 276         i++;
 277
 278         /* 64K on MMU 2.06 and later */
 279         if (env->mmu_model == POWERPC_MMU_2_06 ||
 280             env->mmu_model == POWERPC_MMU_2_07) {
 281             info->sps[i].page_shift = 16;
 282             info->sps[i].slb_enc = 0x110;
 283             info->sps[i].enc[0].page_shift = 16;
 284             info->sps[i].enc[0].pte_enc = 1;
 285             i++;
 286         }
 287
 288         /* Standard 16M large page size segment */
 289         info->sps[i].page_shift = 24;
 290         info->sps[i].slb_enc = SLB_VSID_L;
 291         info->sps[i].enc[0].page_shift = 24;
 292         info->sps[i].enc[0].pte_enc = 0;
 293     }
 294 }
 295
 296 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 297 {
 298     CPUState *cs = CPU(cpu);
 299     int ret;
 300
 301     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 302         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 303         if (ret == 0) {
 304             return;
 305         }
 306     }
 307
 308     kvm_get_fallback_smmu_info(cpu, info);
 309 }
 310
 311 static long gethugepagesize(const char *mem_path)
 312 {
 313     struct statfs fs;
 314     int ret;
 315
 316     do {
 317         ret = statfs(mem_path, &fs);
 318     } while (ret != 0 && errno == EINTR);
 319
 320     if (ret != 0) {
 321         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 322                 strerror(errno));
 323         exit(1);
 324     }
 325
 326 #define HUGETLBFS_MAGIC       0x958458f6
 327
 328     if (fs.f_type != HUGETLBFS_MAGIC) {
 329         /* Explicit mempath, but it's ordinary pages */
 330         return getpagesize();
 331     }
 332
 333     /* It's hugepage, return the huge page size */
 334     return fs.f_bsize;
 335 }
 336
 337 /*
 338  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 339  * may or may not name the same files / on the same filesystem now as
 340  * when we actually open and map them.  Iterate over the file
 341  * descriptors instead, and use qemu_fd_getpagesize().
 342  */
 343 static int find_max_supported_pagesize(Object *obj, void *opaque)
 344 {
 345     char *mem_path;
 346     long *hpsize_min = opaque;
 347
 348     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 349         mem_path = object_property_get_str(obj, "mem-path", NULL);
 350         if (mem_path) {
 351             long hpsize = gethugepagesize(mem_path);
 352             if (hpsize < *hpsize_min) {
 353                 *hpsize_min = hpsize;
 354             }
 355         } else {
 356             *hpsize_min = getpagesize();
 357         }
 358     }
 359
 360     return 0;
 361 }
 362
 363 static long getrampagesize(void)
 364 {
 365     long hpsize = LONG_MAX;
 366     Object *memdev_root;
 367
 368     if (mem_path) {
 369         return gethugepagesize(mem_path);
 370     }
 371
 372     /* it's possible we have memory-backend objects with
 373      * hugepage-backed RAM. these may get mapped into system
 374      * address space via -numa parameters or memory hotplug
 375      * hooks. we want to take these into account, but we
 376      * also want to make sure these supported hugepage
 377      * sizes are applicable across the entire range of memory
 378      * we may boot from, so we take the min across all
 379      * backends, and assume normal pages in cases where a
 380      * backend isn't backed by hugepages.
 381      */
 382     memdev_root = object_resolve_path("/objects", NULL);
 383     if (!memdev_root) {
 384         return getpagesize();
 385     }
 386
 387     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 388
 389     return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 390 }
 391
 392 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 393 {
 394     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 395         return true;
 396     }
 397
 398     return (1ul << shift) <= rampgsize;
 399 }
 400
 401 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 402 {
 403     static struct kvm_ppc_smmu_info smmu_info;
 404     static bool has_smmu_info;
 405     CPUPPCState *env = &cpu->env;
 406     long rampagesize;
 407     int iq, ik, jq, jk;
 408
 409     /* We only handle page sizes for 64-bit server guests for now */
 410     if (!(env->mmu_model & POWERPC_MMU_64)) {
 411         return;
 412     }
 413
 414     /* Collect MMU info from kernel if not already */
 415     if (!has_smmu_info) {
 416         kvm_get_smmu_info(cpu, &smmu_info);
 417         has_smmu_info = true;
 418     }
 419
 420     rampagesize = getrampagesize();
 421
 422     /* Convert to QEMU form */
 423     memset(&env->sps, 0, sizeof(env->sps));
 424
 425     /* If we have HV KVM, we need to forbid CI large pages if our
 426      * host page size is smaller than 64K.
 427      */
 428     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 429         env->ci_large_pages = getpagesize() >= 0x10000;
 430     }
 431
 432     /*
 433      * XXX This loop should be an entry wide AND of the capabilities that
 434      *     the selected CPU has with the capabilities that KVM supports.
 435      */
 436     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 437         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 438         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 439
 440         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 441                                  ksps->page_shift)) {
 442             continue;
 443         }
 444         qsps->page_shift = ksps->page_shift;
 445         qsps->slb_enc = ksps->slb_enc;
 446         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 447             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 448                                      ksps->enc[jk].page_shift)) {
 449                 continue;
 450             }
 451             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 452             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 453             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 454                 break;
 455             }
 456         }
 457         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 458             break;
 459         }
 460     }
 461     env->slb_nr = smmu_info.slb_size;
 462     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 463         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 464     }
 465 }
 466 #else /* defined (TARGET_PPC64) */
 467
 468 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 469 {
 470 }
 471
 472 #endif /* !defined (TARGET_PPC64) */
 473
 474 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 475 {
 476     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 477 }
 478
 479 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 480  * book3s supports only 1 watchpoint, so array size
 481  * of 4 is sufficient for now.
 482  */
 483 #define MAX_HW_BKPTS 4
 484
 485 static struct HWBreakpoint {
 486     target_ulong addr;
 487     int type;
 488 } hw_debug_points[MAX_HW_BKPTS];
 489
 490 static CPUWatchpoint hw_watchpoint;
 491
 492 /* Default there is no breakpoint and watchpoint supported */
 493 static int max_hw_breakpoint;
 494 static int max_hw_watchpoint;
 495 static int nb_hw_breakpoint;
 496 static int nb_hw_watchpoint;
 497
 498 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 499 {
 500     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 501         max_hw_breakpoint = 2;
 502         max_hw_watchpoint = 2;
 503     }
 504
 505     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 506         fprintf(stderr, "Error initializing h/w breakpoints\n");
 507         return;
 508     }
 509 }
 510
 511 int kvm_arch_init_vcpu(CPUState *cs)
 512 {
 513     PowerPCCPU *cpu = POWERPC_CPU(cs);
 514     CPUPPCState *cenv = &cpu->env;
 515     int ret;
 516
 517     /* Gather server mmu info from KVM and update the CPU state */
 518     kvm_fixup_page_sizes(cpu);
 519
 520     /* Synchronize sregs with kvm */
 521     ret = kvm_arch_sync_sregs(cpu);
 522     if (ret) {
 523         if (ret == -EINVAL) {
 524             error_report("Register sync failed... If you're using kvm-hv.ko,"
 525                          " only \"-cpu host\" is possible");
 526         }
 527         return ret;
 528     }
 529
 530     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 531
 532     /* Some targets support access to KVM's guest TLB. */
 533     switch (cenv->mmu_model) {
 534     case POWERPC_MMU_BOOKE206:
 535         ret = kvm_booke206_tlb_init(cpu);
 536         break;
 537     default:
 538         break;
 539     }
 540
 541     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 542     kvmppc_hw_debug_points_init(cenv);
 543
 544     return ret;
 545 }
 546
 547 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 548 {
 549     CPUPPCState *env = &cpu->env;
 550     CPUState *cs = CPU(cpu);
 551     struct kvm_dirty_tlb dirty_tlb;
 552     unsigned char *bitmap;
 553     int ret;
 554
 555     if (!env->kvm_sw_tlb) {
 556         return;
 557     }
 558
 559     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 560     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 561
 562     dirty_tlb.bitmap = (uintptr_t)bitmap;
 563     dirty_tlb.num_dirty = env->nb_tlb;
 564
 565     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 566     if (ret) {
 567         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 568                 __func__, strerror(-ret));
 569     }
 570
 571     g_free(bitmap);
 572 }
 573
 574 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 575 {
 576     PowerPCCPU *cpu = POWERPC_CPU(cs);
 577     CPUPPCState *env = &cpu->env;
 578     union {
 579         uint32_t u32;
 580         uint64_t u64;
 581     } val;
 582     struct kvm_one_reg reg = {
 583         .id = id,
 584         .addr = (uintptr_t) &val,
 585     };
 586     int ret;
 587
 588     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 589     if (ret != 0) {
 590         trace_kvm_failed_spr_get(spr, strerror(errno));
 591     } else {
 592         switch (id & KVM_REG_SIZE_MASK) {
 593         case KVM_REG_SIZE_U32:
 594             env->spr[spr] = val.u32;
 595             break;
 596
 597         case KVM_REG_SIZE_U64:
 598             env->spr[spr] = val.u64;
 599             break;
 600
 601         default:
 602             /* Don't handle this size yet */
 603             abort();
 604         }
 605     }
 606 }
 607
 608 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 609 {
 610     PowerPCCPU *cpu = POWERPC_CPU(cs);
 611     CPUPPCState *env = &cpu->env;
 612     union {
 613         uint32_t u32;
 614         uint64_t u64;
 615     } val;
 616     struct kvm_one_reg reg = {
 617         .id = id,
 618         .addr = (uintptr_t) &val,
 619     };
 620     int ret;
 621
 622     switch (id & KVM_REG_SIZE_MASK) {
 623     case KVM_REG_SIZE_U32:
 624         val.u32 = env->spr[spr];
 625         break;
 626
 627     case KVM_REG_SIZE_U64:
 628         val.u64 = env->spr[spr];
 629         break;
 630
 631     default:
 632         /* Don't handle this size yet */
 633         abort();
 634     }
 635
 636     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 637     if (ret != 0) {
 638         trace_kvm_failed_spr_set(spr, strerror(errno));
 639     }
 640 }
 641
 642 static int kvm_put_fp(CPUState *cs)
 643 {
 644     PowerPCCPU *cpu = POWERPC_CPU(cs);
 645     CPUPPCState *env = &cpu->env;
 646     struct kvm_one_reg reg;
 647     int i;
 648     int ret;
 649
 650     if (env->insns_flags & PPC_FLOAT) {
 651         uint64_t fpscr = env->fpscr;
 652         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 653
 654         reg.id = KVM_REG_PPC_FPSCR;
 655         reg.addr = (uintptr_t)&fpscr;
 656         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 657         if (ret < 0) {
 658             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 659             return ret;
 660         }
 661
 662         for (i = 0; i < 32; i++) {
 663             uint64_t vsr[2];
 664
 665 #ifdef HOST_WORDS_BIGENDIAN
 666             vsr[0] = float64_val(env->fpr[i]);
 667             vsr[1] = env->vsr[i];
 668 #else
 669             vsr[0] = env->vsr[i];
 670             vsr[1] = float64_val(env->fpr[i]);
 671 #endif
 672             reg.addr = (uintptr_t) &vsr;
 673             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 674
 675             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 676             if (ret < 0) {
 677                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 678                         i, strerror(errno));
 679                 return ret;
 680             }
 681         }
 682     }
 683
 684     if (env->insns_flags & PPC_ALTIVEC) {
 685         reg.id = KVM_REG_PPC_VSCR;
 686         reg.addr = (uintptr_t)&env->vscr;
 687         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 688         if (ret < 0) {
 689             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 690             return ret;
 691         }
 692
 693         for (i = 0; i < 32; i++) {
 694             reg.id = KVM_REG_PPC_VR(i);
 695             reg.addr = (uintptr_t)&env->avr[i];
 696             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 697             if (ret < 0) {
 698                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 699                 return ret;
 700             }
 701         }
 702     }
 703
 704     return 0;
 705 }
 706
 707 static int kvm_get_fp(CPUState *cs)
 708 {
 709     PowerPCCPU *cpu = POWERPC_CPU(cs);
 710     CPUPPCState *env = &cpu->env;
 711     struct kvm_one_reg reg;
 712     int i;
 713     int ret;
 714
 715     if (env->insns_flags & PPC_FLOAT) {
 716         uint64_t fpscr;
 717         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 718
 719         reg.id = KVM_REG_PPC_FPSCR;
 720         reg.addr = (uintptr_t)&fpscr;
 721         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 722         if (ret < 0) {
 723             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 724             return ret;
 725         } else {
 726             env->fpscr = fpscr;
 727         }
 728
 729         for (i = 0; i < 32; i++) {
 730             uint64_t vsr[2];
 731
 732             reg.addr = (uintptr_t) &vsr;
 733             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 734
 735             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 736             if (ret < 0) {
 737                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 738                         vsx ? "VSR" : "FPR", i, strerror(errno));
 739                 return ret;
 740             } else {
 741 #ifdef HOST_WORDS_BIGENDIAN
 742                 env->fpr[i] = vsr[0];
 743                 if (vsx) {
 744                     env->vsr[i] = vsr[1];
 745                 }
 746 #else
 747                 env->fpr[i] = vsr[1];
 748                 if (vsx) {
 749                     env->vsr[i] = vsr[0];
 750                 }
 751 #endif
 752             }
 753         }
 754     }
 755
 756     if (env->insns_flags & PPC_ALTIVEC) {
 757         reg.id = KVM_REG_PPC_VSCR;
 758         reg.addr = (uintptr_t)&env->vscr;
 759         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 760         if (ret < 0) {
 761             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 762             return ret;
 763         }
 764
 765         for (i = 0; i < 32; i++) {
 766             reg.id = KVM_REG_PPC_VR(i);
 767             reg.addr = (uintptr_t)&env->avr[i];
 768             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 769             if (ret < 0) {
 770                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 771                         i, strerror(errno));
 772                 return ret;
 773             }
 774         }
 775     }
 776
 777     return 0;
 778 }
 779
 780 #if defined(TARGET_PPC64)
 781 static int kvm_get_vpa(CPUState *cs)
 782 {
 783     PowerPCCPU *cpu = POWERPC_CPU(cs);
 784     CPUPPCState *env = &cpu->env;
 785     struct kvm_one_reg reg;
 786     int ret;
 787
 788     reg.id = KVM_REG_PPC_VPA_ADDR;
 789     reg.addr = (uintptr_t)&env->vpa_addr;
 790     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 791     if (ret < 0) {
 792         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 793         return ret;
 794     }
 795
 796     assert((uintptr_t)&env->slb_shadow_size
 797            == ((uintptr_t)&env->slb_shadow_addr + 8));
 798     reg.id = KVM_REG_PPC_VPA_SLB;
 799     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 800     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 801     if (ret < 0) {
 802         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 803                 strerror(errno));
 804         return ret;
 805     }
 806
 807     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 808     reg.id = KVM_REG_PPC_VPA_DTL;
 809     reg.addr = (uintptr_t)&env->dtl_addr;
 810     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 811     if (ret < 0) {
 812         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 813                 strerror(errno));
 814         return ret;
 815     }
 816
 817     return 0;
 818 }
 819
 820 static int kvm_put_vpa(CPUState *cs)
 821 {
 822     PowerPCCPU *cpu = POWERPC_CPU(cs);
 823     CPUPPCState *env = &cpu->env;
 824     struct kvm_one_reg reg;
 825     int ret;
 826
 827     /* SLB shadow or DTL can't be registered unless a master VPA is
 828      * registered.  That means when restoring state, if a VPA *is*
 829      * registered, we need to set that up first.  If not, we need to
 830      * deregister the others before deregistering the master VPA */
 831     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 832
 833     if (env->vpa_addr) {
 834         reg.id = KVM_REG_PPC_VPA_ADDR;
 835         reg.addr = (uintptr_t)&env->vpa_addr;
 836         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 837         if (ret < 0) {
 838             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 839             return ret;
 840         }
 841     }
 842
 843     assert((uintptr_t)&env->slb_shadow_size
 844            == ((uintptr_t)&env->slb_shadow_addr + 8));
 845     reg.id = KVM_REG_PPC_VPA_SLB;
 846     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 847     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 848     if (ret < 0) {
 849         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 850         return ret;
 851     }
 852
 853     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 854     reg.id = KVM_REG_PPC_VPA_DTL;
 855     reg.addr = (uintptr_t)&env->dtl_addr;
 856     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 857     if (ret < 0) {
 858         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 859                 strerror(errno));
 860         return ret;
 861     }
 862
 863     if (!env->vpa_addr) {
 864         reg.id = KVM_REG_PPC_VPA_ADDR;
 865         reg.addr = (uintptr_t)&env->vpa_addr;
 866         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 867         if (ret < 0) {
 868             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 869             return ret;
 870         }
 871     }
 872
 873     return 0;
 874 }
 875 #endif /* TARGET_PPC64 */
 876
 877 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 878 {
 879     CPUPPCState *env = &cpu->env;
 880     struct kvm_sregs sregs;
 881     int i;
 882
 883     sregs.pvr = env->spr[SPR_PVR];
 884
 885     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 886
 887     /* Sync SLB */
 888 #ifdef TARGET_PPC64
 889     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 890         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 891         if (env->slb[i].esid & SLB_ESID_V) {
 892             sregs.u.s.ppc64.slb[i].slbe |= i;
 893         }
 894         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 895     }
 896 #endif
 897
 898     /* Sync SRs */
 899     for (i = 0; i < 16; i++) {
 900         sregs.u.s.ppc32.sr[i] = env->sr[i];
 901     }
 902
 903     /* Sync BATs */
 904     for (i = 0; i < 8; i++) {
 905         /* Beware. We have to swap upper and lower bits here */
 906         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 907             | env->DBAT[1][i];
 908         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 909             | env->IBAT[1][i];
 910     }
 911
 912     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 913 }
 914
 915 int kvm_arch_put_registers(CPUState *cs, int level)
 916 {
 917     PowerPCCPU *cpu = POWERPC_CPU(cs);
 918     CPUPPCState *env = &cpu->env;
 919     struct kvm_regs regs;
 920     int ret;
 921     int i;
 922
 923     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 924     if (ret < 0) {
 925         return ret;
 926     }
 927
 928     regs.ctr = env->ctr;
 929     regs.lr  = env->lr;
 930     regs.xer = cpu_read_xer(env);
 931     regs.msr = env->msr;
 932     regs.pc = env->nip;
 933
 934     regs.srr0 = env->spr[SPR_SRR0];
 935     regs.srr1 = env->spr[SPR_SRR1];
 936
 937     regs.sprg0 = env->spr[SPR_SPRG0];
 938     regs.sprg1 = env->spr[SPR_SPRG1];
 939     regs.sprg2 = env->spr[SPR_SPRG2];
 940     regs.sprg3 = env->spr[SPR_SPRG3];
 941     regs.sprg4 = env->spr[SPR_SPRG4];
 942     regs.sprg5 = env->spr[SPR_SPRG5];
 943     regs.sprg6 = env->spr[SPR_SPRG6];
 944     regs.sprg7 = env->spr[SPR_SPRG7];
 945
 946     regs.pid = env->spr[SPR_BOOKE_PID];
 947
 948     for (i = 0;i < 32; i++)
 949         regs.gpr[i] = env->gpr[i];
 950
 951     regs.cr = 0;
 952     for (i = 0; i < 8; i++) {
 953         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 954     }
 955
 956     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 957     if (ret < 0)
 958         return ret;
 959
 960     kvm_put_fp(cs);
 961
 962     if (env->tlb_dirty) {
 963         kvm_sw_tlb_put(cpu);
 964         env->tlb_dirty = false;
 965     }
 966
 967     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 968         ret = kvmppc_put_books_sregs(cpu);
 969         if (ret < 0) {
 970             return ret;
 971         }
 972     }
 973
 974     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 975         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 976     }
 977
 978     if (cap_one_reg) {
 979         int i;
 980
 981         /* We deliberately ignore errors here, for kernels which have
 982          * the ONE_REG calls, but don't support the specific
 983          * registers, there's a reasonable chance things will still
 984          * work, at least until we try to migrate. */
 985         for (i = 0; i < 1024; i++) {
 986             uint64_t id = env->spr_cb[i].one_reg_id;
 987
 988             if (id != 0) {
 989                 kvm_put_one_spr(cs, id, i);
 990             }
 991         }
 992
 993 #ifdef TARGET_PPC64
 994         if (msr_ts) {
 995             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 996                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 997             }
 998             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 999                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1000             }
1001             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1002             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1003             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1004             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1005             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1006             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1007             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1008             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1009             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1010             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1011         }
1012
1013         if (cap_papr) {
1014             if (kvm_put_vpa(cs) < 0) {
1015                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1016             }
1017         }
1018
1019         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1020 #endif /* TARGET_PPC64 */
1021     }
1022
1023     return ret;
1024 }
1025
1026 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1027 {
1028      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1029 }
1030
1031 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1032 {
1033     CPUPPCState *env = &cpu->env;
1034     struct kvm_sregs sregs;
1035     int ret;
1036
1037     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1038     if (ret < 0) {
1039         return ret;
1040     }
1041
1042     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1043         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1044         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1045         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1046         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1047         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1048         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1049         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1050         env->spr[SPR_DECR] = sregs.u.e.dec;
1051         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1052         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1053         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1054     }
1055
1056     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1057         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1058         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1059         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1060         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1061         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1062     }
1063
1064     if (sregs.u.e.features & KVM_SREGS_E_64) {
1065         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1066     }
1067
1068     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1069         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1070     }
1071
1072     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1073         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1074         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1075         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1076         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1077         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1078         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1079         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1080         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1081         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1082         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1083         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1084         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1085         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1086         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1087         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1088         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1089         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1090         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1091         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1092         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1093         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1094         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1095         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1096         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1097         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1098         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1099         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1100         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1101         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1102         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1103         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1104         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1105
1106         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1107             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1108             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1109             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1110             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1111             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1112             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1113         }
1114
1115         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1116             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1117             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1118         }
1119
1120         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1121             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1122             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1123             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1124             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1125         }
1126     }
1127
1128     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1129         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1130         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1131         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1132         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1133         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1134         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1135         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1136         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1137         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1138         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1139     }
1140
1141     if (sregs.u.e.features & KVM_SREGS_EXP) {
1142         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1143     }
1144
1145     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1146         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1147         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1148     }
1149
1150     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1151         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1152         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1153         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1154
1155         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1156             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1157             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1158         }
1159     }
1160
1161     return 0;
1162 }
1163
1164 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1165 {
1166     CPUPPCState *env = &cpu->env;
1167     struct kvm_sregs sregs;
1168     int ret;
1169     int i;
1170
1171     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1172     if (ret < 0) {
1173         return ret;
1174     }
1175
1176     if (!env->external_htab) {
1177         ppc_store_sdr1(env, sregs.u.s.sdr1);
1178     }
1179
1180     /* Sync SLB */
1181 #ifdef TARGET_PPC64
1182     /*
1183      * The packed SLB array we get from KVM_GET_SREGS only contains
1184      * information about valid entries. So we flush our internal copy
1185      * to get rid of stale ones, then put all valid SLB entries back
1186      * in.
1187      */
1188     memset(env->slb, 0, sizeof(env->slb));
1189     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1190         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1191         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1192         /*
1193          * Only restore valid entries
1194          */
1195         if (rb & SLB_ESID_V) {
1196             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1197         }
1198     }
1199 #endif
1200
1201     /* Sync SRs */
1202     for (i = 0; i < 16; i++) {
1203         env->sr[i] = sregs.u.s.ppc32.sr[i];
1204     }
1205
1206     /* Sync BATs */
1207     for (i = 0; i < 8; i++) {
1208         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1209         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1210         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1211         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1212     }
1213
1214     return 0;
1215 }
1216
1217 int kvm_arch_get_registers(CPUState *cs)
1218 {
1219     PowerPCCPU *cpu = POWERPC_CPU(cs);
1220     CPUPPCState *env = &cpu->env;
1221     struct kvm_regs regs;
1222     uint32_t cr;
1223     int i, ret;
1224
1225     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1226     if (ret < 0)
1227         return ret;
1228
1229     cr = regs.cr;
1230     for (i = 7; i >= 0; i--) {
1231         env->crf[i] = cr & 15;
1232         cr >>= 4;
1233     }
1234
1235     env->ctr = regs.ctr;
1236     env->lr = regs.lr;
1237     cpu_write_xer(env, regs.xer);
1238     env->msr = regs.msr;
1239     env->nip = regs.pc;
1240
1241     env->spr[SPR_SRR0] = regs.srr0;
1242     env->spr[SPR_SRR1] = regs.srr1;
1243
1244     env->spr[SPR_SPRG0] = regs.sprg0;
1245     env->spr[SPR_SPRG1] = regs.sprg1;
1246     env->spr[SPR_SPRG2] = regs.sprg2;
1247     env->spr[SPR_SPRG3] = regs.sprg3;
1248     env->spr[SPR_SPRG4] = regs.sprg4;
1249     env->spr[SPR_SPRG5] = regs.sprg5;
1250     env->spr[SPR_SPRG6] = regs.sprg6;
1251     env->spr[SPR_SPRG7] = regs.sprg7;
1252
1253     env->spr[SPR_BOOKE_PID] = regs.pid;
1254
1255     for (i = 0;i < 32; i++)
1256         env->gpr[i] = regs.gpr[i];
1257
1258     kvm_get_fp(cs);
1259
1260     if (cap_booke_sregs) {
1261         ret = kvmppc_get_booke_sregs(cpu);
1262         if (ret < 0) {
1263             return ret;
1264         }
1265     }
1266
1267     if (cap_segstate) {
1268         ret = kvmppc_get_books_sregs(cpu);
1269         if (ret < 0) {
1270             return ret;
1271         }
1272     }
1273
1274     if (cap_hior) {
1275         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1276     }
1277
1278     if (cap_one_reg) {
1279         int i;
1280
1281         /* We deliberately ignore errors here, for kernels which have
1282          * the ONE_REG calls, but don't support the specific
1283          * registers, there's a reasonable chance things will still
1284          * work, at least until we try to migrate. */
1285         for (i = 0; i < 1024; i++) {
1286             uint64_t id = env->spr_cb[i].one_reg_id;
1287
1288             if (id != 0) {
1289                 kvm_get_one_spr(cs, id, i);
1290             }
1291         }
1292
1293 #ifdef TARGET_PPC64
1294         if (msr_ts) {
1295             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1296                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1297             }
1298             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1299                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1300             }
1301             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1302             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1303             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1304             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1305             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1306             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1307             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1308             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1309             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1310             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1311         }
1312
1313         if (cap_papr) {
1314             if (kvm_get_vpa(cs) < 0) {
1315                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1316             }
1317         }
1318
1319         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1320 #endif
1321     }
1322
1323     return 0;
1324 }
1325
1326 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1327 {
1328     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1329
1330     if (irq != PPC_INTERRUPT_EXT) {
1331         return 0;
1332     }
1333
1334     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1335         return 0;
1336     }
1337
1338     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1339
1340     return 0;
1341 }
1342
1343 #if defined(TARGET_PPCEMB)
1344 #define PPC_INPUT_INT PPC40x_INPUT_INT
1345 #elif defined(TARGET_PPC64)
1346 #define PPC_INPUT_INT PPC970_INPUT_INT
1347 #else
1348 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1349 #endif
1350
1351 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1352 {
1353     PowerPCCPU *cpu = POWERPC_CPU(cs);
1354     CPUPPCState *env = &cpu->env;
1355     int r;
1356     unsigned irq;
1357
1358     qemu_mutex_lock_iothread();
1359
1360     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1361      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1362     if (!cap_interrupt_level &&
1363         run->ready_for_interrupt_injection &&
1364         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1365         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1366     {
1367         /* For now KVM disregards the 'irq' argument. However, in the
1368          * future KVM could cache it in-kernel to avoid a heavyweight exit
1369          * when reading the UIC.
1370          */
1371         irq = KVM_INTERRUPT_SET;
1372
1373         DPRINTF("injected interrupt %d\n", irq);
1374         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1375         if (r < 0) {
1376             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1377         }
1378
1379         /* Always wake up soon in case the interrupt was level based */
1380         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1381                        (NANOSECONDS_PER_SECOND / 50));
1382     }
1383
1384     /* We don't know if there are more interrupts pending after this. However,
1385      * the guest will return to userspace in the course of handling this one
1386      * anyways, so we will get a chance to deliver the rest. */
1387
1388     qemu_mutex_unlock_iothread();
1389 }
1390
1391 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1392 {
1393     return MEMTXATTRS_UNSPECIFIED;
1394 }
1395
1396 int kvm_arch_process_async_events(CPUState *cs)
1397 {
1398     return cs->halted;
1399 }
1400
1401 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1402 {
1403     CPUState *cs = CPU(cpu);
1404     CPUPPCState *env = &cpu->env;
1405
1406     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1407         cs->halted = 1;
1408         cs->exception_index = EXCP_HLT;
1409     }
1410
1411     return 0;
1412 }
1413
1414 /* map dcr access to existing qemu dcr emulation */
1415 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1416 {
1417     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1418         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1419
1420     return 0;
1421 }
1422
1423 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1424 {
1425     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1426         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1427
1428     return 0;
1429 }
1430
1431 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1432 {
1433     /* Mixed endian case is not handled */
1434     uint32_t sc = debug_inst_opcode;
1435
1436     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1437                             sizeof(sc), 0) ||
1438         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1439         return -EINVAL;
1440     }
1441
1442     return 0;
1443 }
1444
1445 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1446 {
1447     uint32_t sc;
1448
1449     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1450         sc != debug_inst_opcode ||
1451         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1452                             sizeof(sc), 1)) {
1453         return -EINVAL;
1454     }
1455
1456     return 0;
1457 }
1458
1459 static int find_hw_breakpoint(target_ulong addr, int type)
1460 {
1461     int n;
1462
1463     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1464            <= ARRAY_SIZE(hw_debug_points));
1465
1466     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1467         if (hw_debug_points[n].addr == addr &&
1468              hw_debug_points[n].type == type) {
1469             return n;
1470         }
1471     }
1472
1473     return -1;
1474 }
1475
1476 static int find_hw_watchpoint(target_ulong addr, int *flag)
1477 {
1478     int n;
1479
1480     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1481     if (n >= 0) {
1482         *flag = BP_MEM_ACCESS;
1483         return n;
1484     }
1485
1486     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1487     if (n >= 0) {
1488         *flag = BP_MEM_WRITE;
1489         return n;
1490     }
1491
1492     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1493     if (n >= 0) {
1494         *flag = BP_MEM_READ;
1495         return n;
1496     }
1497
1498     return -1;
1499 }
1500
1501 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1502                                   target_ulong len, int type)
1503 {
1504     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1505         return -ENOBUFS;
1506     }
1507
1508     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1509     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1510
1511     switch (type) {
1512     case GDB_BREAKPOINT_HW:
1513         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1514             return -ENOBUFS;
1515         }
1516
1517         if (find_hw_breakpoint(addr, type) >= 0) {
1518             return -EEXIST;
1519         }
1520
1521         nb_hw_breakpoint++;
1522         break;
1523
1524     case GDB_WATCHPOINT_WRITE:
1525     case GDB_WATCHPOINT_READ:
1526     case GDB_WATCHPOINT_ACCESS:
1527         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1528             return -ENOBUFS;
1529         }
1530
1531         if (find_hw_breakpoint(addr, type) >= 0) {
1532             return -EEXIST;
1533         }
1534
1535         nb_hw_watchpoint++;
1536         break;
1537
1538     default:
1539         return -ENOSYS;
1540     }
1541
1542     return 0;
1543 }
1544
1545 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1546                                   target_ulong len, int type)
1547 {
1548     int n;
1549
1550     n = find_hw_breakpoint(addr, type);
1551     if (n < 0) {
1552         return -ENOENT;
1553     }
1554
1555     switch (type) {
1556     case GDB_BREAKPOINT_HW:
1557         nb_hw_breakpoint--;
1558         break;
1559
1560     case GDB_WATCHPOINT_WRITE:
1561     case GDB_WATCHPOINT_READ:
1562     case GDB_WATCHPOINT_ACCESS:
1563         nb_hw_watchpoint--;
1564         break;
1565
1566     default:
1567         return -ENOSYS;
1568     }
1569     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1570
1571     return 0;
1572 }
1573
1574 void kvm_arch_remove_all_hw_breakpoints(void)
1575 {
1576     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1577 }
1578
1579 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1580 {
1581     int n;
1582
1583     /* Software Breakpoint updates */
1584     if (kvm_sw_breakpoints_active(cs)) {
1585         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1586     }
1587
1588     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1589            <= ARRAY_SIZE(hw_debug_points));
1590     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1591
1592     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1593         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1594         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1595         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1596             switch (hw_debug_points[n].type) {
1597             case GDB_BREAKPOINT_HW:
1598                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1599                 break;
1600             case GDB_WATCHPOINT_WRITE:
1601                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1602                 break;
1603             case GDB_WATCHPOINT_READ:
1604                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1605                 break;
1606             case GDB_WATCHPOINT_ACCESS:
1607                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1608                                         KVMPPC_DEBUG_WATCH_READ;
1609                 break;
1610             default:
1611                 cpu_abort(cs, "Unsupported breakpoint type\n");
1612             }
1613             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1614         }
1615     }
1616 }
1617
1618 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1619 {
1620     CPUState *cs = CPU(cpu);
1621     CPUPPCState *env = &cpu->env;
1622     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1623     int handle = 0;
1624     int n;
1625     int flag = 0;
1626
1627     if (cs->singlestep_enabled) {
1628         handle = 1;
1629     } else if (arch_info->status) {
1630         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1631             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1632                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1633                 if (n >= 0) {
1634                     handle = 1;
1635                 }
1636             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1637                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1638                 n = find_hw_watchpoint(arch_info->address,  &flag);
1639                 if (n >= 0) {
1640                     handle = 1;
1641                     cs->watchpoint_hit = &hw_watchpoint;
1642                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1643                     hw_watchpoint.flags = flag;
1644                 }
1645             }
1646         }
1647     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1648         handle = 1;
1649     } else {
1650         /* QEMU is not able to handle debug exception, so inject
1651          * program exception to guest;
1652          * Yes program exception NOT debug exception !!
1653          * When QEMU is using debug resources then debug exception must
1654          * be always set. To achieve this we set MSR_DE and also set
1655          * MSRP_DEP so guest cannot change MSR_DE.
1656          * When emulating debug resource for guest we want guest
1657          * to control MSR_DE (enable/disable debug interrupt on need).
1658          * Supporting both configurations are NOT possible.
1659          * So the result is that we cannot share debug resources
1660          * between QEMU and Guest on BOOKE architecture.
1661          * In the current design QEMU gets the priority over guest,
1662          * this means that if QEMU is using debug resources then guest
1663          * cannot use them;
1664          * For software breakpoint QEMU uses a privileged instruction;
1665          * So there cannot be any reason that we are here for guest
1666          * set debug exception, only possibility is guest executed a
1667          * privileged / illegal instruction and that's why we are
1668          * injecting a program interrupt.
1669          */
1670
1671         cpu_synchronize_state(cs);
1672         /* env->nip is PC, so increment this by 4 to use
1673          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1674          */
1675         env->nip += 4;
1676         cs->exception_index = POWERPC_EXCP_PROGRAM;
1677         env->error_code = POWERPC_EXCP_INVAL;
1678         ppc_cpu_do_interrupt(cs);
1679     }
1680
1681     return handle;
1682 }
1683
1684 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1685 {
1686     PowerPCCPU *cpu = POWERPC_CPU(cs);
1687     CPUPPCState *env = &cpu->env;
1688     int ret;
1689
1690     qemu_mutex_lock_iothread();
1691
1692     switch (run->exit_reason) {
1693     case KVM_EXIT_DCR:
1694         if (run->dcr.is_write) {
1695             DPRINTF("handle dcr write\n");
1696             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1697         } else {
1698             DPRINTF("handle dcr read\n");
1699             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1700         }
1701         break;
1702     case KVM_EXIT_HLT:
1703         DPRINTF("handle halt\n");
1704         ret = kvmppc_handle_halt(cpu);
1705         break;
1706 #if defined(TARGET_PPC64)
1707     case KVM_EXIT_PAPR_HCALL:
1708         DPRINTF("handle PAPR hypercall\n");
1709         run->papr_hcall.ret = spapr_hypercall(cpu,
1710                                               run->papr_hcall.nr,
1711                                               run->papr_hcall.args);
1712         ret = 0;
1713         break;
1714 #endif
1715     case KVM_EXIT_EPR:
1716         DPRINTF("handle epr\n");
1717         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1718         ret = 0;
1719         break;
1720     case KVM_EXIT_WATCHDOG:
1721         DPRINTF("handle watchdog expiry\n");
1722         watchdog_perform_action();
1723         ret = 0;
1724         break;
1725
1726     case KVM_EXIT_DEBUG:
1727         DPRINTF("handle debug exception\n");
1728         if (kvm_handle_debug(cpu, run)) {
1729             ret = EXCP_DEBUG;
1730             break;
1731         }
1732         /* re-enter, this exception was guest-internal */
1733         ret = 0;
1734         break;
1735
1736     default:
1737         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1738         ret = -1;
1739         break;
1740     }
1741
1742     qemu_mutex_unlock_iothread();
1743     return ret;
1744 }
1745
1746 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1747 {
1748     CPUState *cs = CPU(cpu);
1749     uint32_t bits = tsr_bits;
1750     struct kvm_one_reg reg = {
1751         .id = KVM_REG_PPC_OR_TSR,
1752         .addr = (uintptr_t) &bits,
1753     };
1754
1755     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1756 }
1757
1758 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1759 {
1760
1761     CPUState *cs = CPU(cpu);
1762     uint32_t bits = tsr_bits;
1763     struct kvm_one_reg reg = {
1764         .id = KVM_REG_PPC_CLEAR_TSR,
1765         .addr = (uintptr_t) &bits,
1766     };
1767
1768     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1769 }
1770
1771 int kvmppc_set_tcr(PowerPCCPU *cpu)
1772 {
1773     CPUState *cs = CPU(cpu);
1774     CPUPPCState *env = &cpu->env;
1775     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1776
1777     struct kvm_one_reg reg = {
1778         .id = KVM_REG_PPC_TCR,
1779         .addr = (uintptr_t) &tcr,
1780     };
1781
1782     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1783 }
1784
1785 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1786 {
1787     CPUState *cs = CPU(cpu);
1788     int ret;
1789
1790     if (!kvm_enabled()) {
1791         return -1;
1792     }
1793
1794     if (!cap_ppc_watchdog) {
1795         printf("warning: KVM does not support watchdog");
1796         return -1;
1797     }
1798
1799     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1800     if (ret < 0) {
1801         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1802                 __func__, strerror(-ret));
1803         return ret;
1804     }
1805
1806     return ret;
1807 }
1808
1809 static int read_cpuinfo(const char *field, char *value, int len)
1810 {
1811     FILE *f;
1812     int ret = -1;
1813     int field_len = strlen(field);
1814     char line[512];
1815
1816     f = fopen("/proc/cpuinfo", "r");
1817     if (!f) {
1818         return -1;
1819     }
1820
1821     do {
1822         if (!fgets(line, sizeof(line), f)) {
1823             break;
1824         }
1825         if (!strncmp(line, field, field_len)) {
1826             pstrcpy(value, len, line);
1827             ret = 0;
1828             break;
1829         }
1830     } while(*line);
1831
1832     fclose(f);
1833
1834     return ret;
1835 }
1836
1837 uint32_t kvmppc_get_tbfreq(void)
1838 {
1839     char line[512];
1840     char *ns;
1841     uint32_t retval = NANOSECONDS_PER_SECOND;
1842
1843     if (read_cpuinfo("timebase", line, sizeof(line))) {
1844         return retval;
1845     }
1846
1847     if (!(ns = strchr(line, ':'))) {
1848         return retval;
1849     }
1850
1851     ns++;
1852
1853     return atoi(ns);
1854 }
1855
1856 bool kvmppc_get_host_serial(char **value)
1857 {
1858     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1859                                NULL);
1860 }
1861
1862 bool kvmppc_get_host_model(char **value)
1863 {
1864     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1865 }
1866
1867 /* Try to find a device tree node for a CPU with clock-frequency property */
1868 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1869 {
1870     struct dirent *dirp;
1871     DIR *dp;
1872
1873     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1874         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1875         return -1;
1876     }
1877
1878     buf[0] = '\0';
1879     while ((dirp = readdir(dp)) != NULL) {
1880         FILE *f;
1881         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1882                  dirp->d_name);
1883         f = fopen(buf, "r");
1884         if (f) {
1885             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1886             fclose(f);
1887             break;
1888         }
1889         buf[0] = '\0';
1890     }
1891     closedir(dp);
1892     if (buf[0] == '\0') {
1893         printf("Unknown host!\n");
1894         return -1;
1895     }
1896
1897     return 0;
1898 }
1899
1900 static uint64_t kvmppc_read_int_dt(const char *filename)
1901 {
1902     union {
1903         uint32_t v32;
1904         uint64_t v64;
1905     } u;
1906     FILE *f;
1907     int len;
1908
1909     f = fopen(filename, "rb");
1910     if (!f) {
1911         return -1;
1912     }
1913
1914     len = fread(&u, 1, sizeof(u), f);
1915     fclose(f);
1916     switch (len) {
1917     case 4:
1918         /* property is a 32-bit quantity */
1919         return be32_to_cpu(u.v32);
1920     case 8:
1921         return be64_to_cpu(u.v64);
1922     }
1923
1924     return 0;
1925 }
1926
1927 /* Read a CPU node property from the host device tree that's a single
1928  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1929  * (can't find or open the property, or doesn't understand the
1930  * format) */
1931 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1932 {
1933     char buf[PATH_MAX], *tmp;
1934     uint64_t val;
1935
1936     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1937         return -1;
1938     }
1939
1940     tmp = g_strdup_printf("%s/%s", buf, propname);
1941     val = kvmppc_read_int_dt(tmp);
1942     g_free(tmp);
1943
1944     return val;
1945 }
1946
1947 uint64_t kvmppc_get_clockfreq(void)
1948 {
1949     return kvmppc_read_int_cpu_dt("clock-frequency");
1950 }
1951
1952 uint32_t kvmppc_get_vmx(void)
1953 {
1954     return kvmppc_read_int_cpu_dt("ibm,vmx");
1955 }
1956
1957 uint32_t kvmppc_get_dfp(void)
1958 {
1959     return kvmppc_read_int_cpu_dt("ibm,dfp");
1960 }
1961
1962 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1963  {
1964      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1965      CPUState *cs = CPU(cpu);
1966
1967     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1968         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1969         return 0;
1970     }
1971
1972     return 1;
1973 }
1974
1975 int kvmppc_get_hasidle(CPUPPCState *env)
1976 {
1977     struct kvm_ppc_pvinfo pvinfo;
1978
1979     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1980         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1981         return 1;
1982     }
1983
1984     return 0;
1985 }
1986
1987 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1988 {
1989     uint32_t *hc = (uint32_t*)buf;
1990     struct kvm_ppc_pvinfo pvinfo;
1991
1992     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1993         memcpy(buf, pvinfo.hcall, buf_len);
1994         return 0;
1995     }
1996
1997     /*
1998      * Fallback to always fail hypercalls regardless of endianness:
1999      *
2000      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2001      *     li r3, -1
2002      *     b .+8       (becomes nop in wrong endian)
2003      *     bswap32(li r3, -1)
2004      */
2005
2006     hc[0] = cpu_to_be32(0x08000048);
2007     hc[1] = cpu_to_be32(0x3860ffff);
2008     hc[2] = cpu_to_be32(0x48000008);
2009     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2010
2011     return 1;
2012 }
2013
2014 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2015 {
2016     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2017 }
2018
2019 void kvmppc_enable_logical_ci_hcalls(void)
2020 {
2021     /*
2022      * FIXME: it would be nice if we could detect the cases where
2023      * we're using a device which requires the in kernel
2024      * implementation of these hcalls, but the kernel lacks them and
2025      * produce a warning.
2026      */
2027     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2028     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2029 }
2030
2031 void kvmppc_enable_set_mode_hcall(void)
2032 {
2033     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2034 }
2035
2036 void kvmppc_set_papr(PowerPCCPU *cpu)
2037 {
2038     CPUState *cs = CPU(cpu);
2039     int ret;
2040
2041     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2042     if (ret) {
2043         error_report("This vCPU type or KVM version does not support PAPR");
2044         exit(1);
2045     }
2046
2047     /* Update the capability flag so we sync the right information
2048      * with kvm */
2049     cap_papr = 1;
2050 }
2051
2052 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2053 {
2054     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2055 }
2056
2057 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2058 {
2059     CPUState *cs = CPU(cpu);
2060     int ret;
2061
2062     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2063     if (ret && mpic_proxy) {
2064         error_report("This KVM version does not support EPR");
2065         exit(1);
2066     }
2067 }
2068
2069 int kvmppc_smt_threads(void)
2070 {
2071     return cap_ppc_smt ? cap_ppc_smt : 1;
2072 }
2073
2074 #ifdef TARGET_PPC64
2075 off_t kvmppc_alloc_rma(void **rma)
2076 {
2077     off_t size;
2078     int fd;
2079     struct kvm_allocate_rma ret;
2080
2081     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2082      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2083      *                      not necessary on this hardware
2084      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2085      *
2086      * FIXME: We should allow the user to force contiguous RMA
2087      * allocation in the cap_ppc_rma==1 case.
2088      */
2089     if (cap_ppc_rma < 2) {
2090         return 0;
2091     }
2092
2093     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2094     if (fd < 0) {
2095         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2096                 strerror(errno));
2097         return -1;
2098     }
2099
2100     size = MIN(ret.rma_size, 256ul << 20);
2101
2102     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2103     if (*rma == MAP_FAILED) {
2104         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2105         return -1;
2106     };
2107
2108     return size;
2109 }
2110
2111 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2112 {
2113     struct kvm_ppc_smmu_info info;
2114     long rampagesize, best_page_shift;
2115     int i;
2116
2117     if (cap_ppc_rma >= 2) {
2118         return current_size;
2119     }
2120
2121     /* Find the largest hardware supported page size that's less than
2122      * or equal to the (logical) backing page size of guest RAM */
2123     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2124     rampagesize = getrampagesize();
2125     best_page_shift = 0;
2126
2127     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2128         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2129
2130         if (!sps->page_shift) {
2131             continue;
2132         }
2133
2134         if ((sps->page_shift > best_page_shift)
2135             && ((1UL << sps->page_shift) <= rampagesize)) {
2136             best_page_shift = sps->page_shift;
2137         }
2138     }
2139
2140     return MIN(current_size,
2141                1ULL << (best_page_shift + hash_shift - 7));
2142 }
2143 #endif
2144
2145 bool kvmppc_spapr_use_multitce(void)
2146 {
2147     return cap_spapr_multitce;
2148 }
2149
2150 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2151                               bool need_vfio)
2152 {
2153     struct kvm_create_spapr_tce args = {
2154         .liobn = liobn,
2155         .window_size = window_size,
2156     };
2157     long len;
2158     int fd;
2159     void *table;
2160
2161     /* Must set fd to -1 so we don't try to munmap when called for
2162      * destroying the table, which the upper layers -will- do
2163      */
2164     *pfd = -1;
2165     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2166         return NULL;
2167     }
2168
2169     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2170     if (fd < 0) {
2171         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2172                 liobn);
2173         return NULL;
2174     }
2175
2176     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2177     /* FIXME: round this up to page size */
2178
2179     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2180     if (table == MAP_FAILED) {
2181         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2182                 liobn);
2183         close(fd);
2184         return NULL;
2185     }
2186
2187     *pfd = fd;
2188     return table;
2189 }
2190
2191 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2192 {
2193     long len;
2194
2195     if (fd < 0) {
2196         return -1;
2197     }
2198
2199     len = nb_table * sizeof(uint64_t);
2200     if ((munmap(table, len) < 0) ||
2201         (close(fd) < 0)) {
2202         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2203                 strerror(errno));
2204         /* Leak the table */
2205     }
2206
2207     return 0;
2208 }
2209
2210 int kvmppc_reset_htab(int shift_hint)
2211 {
2212     uint32_t shift = shift_hint;
2213
2214     if (!kvm_enabled()) {
2215         /* Full emulation, tell caller to allocate htab itself */
2216         return 0;
2217     }
2218     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2219         int ret;
2220         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2221         if (ret == -ENOTTY) {
2222             /* At least some versions of PR KVM advertise the
2223              * capability, but don't implement the ioctl().  Oops.
2224              * Return 0 so that we allocate the htab in qemu, as is
2225              * correct for PR. */
2226             return 0;
2227         } else if (ret < 0) {
2228             return ret;
2229         }
2230         return shift;
2231     }
2232
2233     /* We have a kernel that predates the htab reset calls.  For PR
2234      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2235      * this era, it has allocated a 16MB fixed size hash table
2236      * already.  Kernels of this era have the GET_PVINFO capability
2237      * only on PR, so we use this hack to determine the right
2238      * answer */
2239     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2240         /* PR - tell caller to allocate htab */
2241         return 0;
2242     } else {
2243         /* HV - assume 16MB kernel allocated htab */
2244         return 24;
2245     }
2246 }
2247
2248 static inline uint32_t mfpvr(void)
2249 {
2250     uint32_t pvr;
2251
2252     asm ("mfpvr %0"
2253          : "=r"(pvr));
2254     return pvr;
2255 }
2256
2257 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2258 {
2259     if (on) {
2260         *word |= flags;
2261     } else {
2262         *word &= ~flags;
2263     }
2264 }
2265
2266 static void kvmppc_host_cpu_initfn(Object *obj)
2267 {
2268     assert(kvm_enabled());
2269 }
2270
2271 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2272 {
2273     DeviceClass *dc = DEVICE_CLASS(oc);
2274     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2275     uint32_t vmx = kvmppc_get_vmx();
2276     uint32_t dfp = kvmppc_get_dfp();
2277     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2278     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2279
2280     /* Now fix up the class with information we can query from the host */
2281     pcc->pvr = mfpvr();
2282
2283     if (vmx != -1) {
2284         /* Only override when we know what the host supports */
2285         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2286         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2287     }
2288     if (dfp != -1) {
2289         /* Only override when we know what the host supports */
2290         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2291     }
2292
2293     if (dcache_size != -1) {
2294         pcc->l1_dcache_size = dcache_size;
2295     }
2296
2297     if (icache_size != -1) {
2298         pcc->l1_icache_size = icache_size;
2299     }
2300
2301     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2302     dc->cannot_destroy_with_object_finalize_yet = true;
2303 }
2304
2305 bool kvmppc_has_cap_epr(void)
2306 {
2307     return cap_epr;
2308 }
2309
2310 bool kvmppc_has_cap_htab_fd(void)
2311 {
2312     return cap_htab_fd;
2313 }
2314
2315 bool kvmppc_has_cap_fixup_hcalls(void)
2316 {
2317     return cap_fixup_hcalls;
2318 }
2319
2320 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2321 {
2322     ObjectClass *oc = OBJECT_CLASS(pcc);
2323
2324     while (oc && !object_class_is_abstract(oc)) {
2325         oc = object_class_get_parent(oc);
2326     }
2327     assert(oc);
2328
2329     return POWERPC_CPU_CLASS(oc);
2330 }
2331
2332 static int kvm_ppc_register_host_cpu_type(void)
2333 {
2334     TypeInfo type_info = {
2335         .name = TYPE_HOST_POWERPC_CPU,
2336         .instance_init = kvmppc_host_cpu_initfn,
2337         .class_init = kvmppc_host_cpu_class_init,
2338     };
2339     uint32_t host_pvr = mfpvr();
2340     PowerPCCPUClass *pvr_pcc;
2341     DeviceClass *dc;
2342
2343     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2344     if (pvr_pcc == NULL) {
2345         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2346     }
2347     if (pvr_pcc == NULL) {
2348         return -1;
2349     }
2350     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2351     type_register(&type_info);
2352
2353     /* Register generic family CPU class for a family */
2354     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2355     dc = DEVICE_CLASS(pvr_pcc);
2356     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2357     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2358     type_register(&type_info);
2359
2360     return 0;
2361 }
2362
2363 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2364 {
2365     struct kvm_rtas_token_args args = {
2366         .token = token,
2367     };
2368
2369     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2370         return -ENOENT;
2371     }
2372
2373     strncpy(args.name, function, sizeof(args.name));
2374
2375     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2376 }
2377
2378 int kvmppc_get_htab_fd(bool write)
2379 {
2380     struct kvm_get_htab_fd s = {
2381         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2382         .start_index = 0,
2383     };
2384
2385     if (!cap_htab_fd) {
2386         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2387         return -1;
2388     }
2389
2390     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2391 }
2392
2393 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2394 {
2395     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2396     uint8_t buf[bufsize];
2397     ssize_t rc;
2398
2399     do {
2400         rc = read(fd, buf, bufsize);
2401         if (rc < 0) {
2402             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2403                     strerror(errno));
2404             return rc;
2405         } else if (rc) {
2406             uint8_t *buffer = buf;
2407             ssize_t n = rc;
2408             while (n) {
2409                 struct kvm_get_htab_header *head =
2410                     (struct kvm_get_htab_header *) buffer;
2411                 size_t chunksize = sizeof(*head) +
2412                      HASH_PTE_SIZE_64 * head->n_valid;
2413
2414                 qemu_put_be32(f, head->index);
2415                 qemu_put_be16(f, head->n_valid);
2416                 qemu_put_be16(f, head->n_invalid);
2417                 qemu_put_buffer(f, (void *)(head + 1),
2418                                 HASH_PTE_SIZE_64 * head->n_valid);
2419
2420                 buffer += chunksize;
2421                 n -= chunksize;
2422             }
2423         }
2424     } while ((rc != 0)
2425              && ((max_ns < 0)
2426                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2427
2428     return (rc == 0) ? 1 : 0;
2429 }
2430
2431 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2432                            uint16_t n_valid, uint16_t n_invalid)
2433 {
2434     struct kvm_get_htab_header *buf;
2435     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2436     ssize_t rc;
2437
2438     buf = alloca(chunksize);
2439     buf->index = index;
2440     buf->n_valid = n_valid;
2441     buf->n_invalid = n_invalid;
2442
2443     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2444
2445     rc = write(fd, buf, chunksize);
2446     if (rc < 0) {
2447         fprintf(stderr, "Error writing KVM hash table: %s\n",
2448                 strerror(errno));
2449         return rc;
2450     }
2451     if (rc != chunksize) {
2452         /* We should never get a short write on a single chunk */
2453         fprintf(stderr, "Short write, restoring KVM hash table\n");
2454         return -1;
2455     }
2456     return 0;
2457 }
2458
2459 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2460 {
2461     return true;
2462 }
2463
2464 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2465 {
2466     return 1;
2467 }
2468
2469 int kvm_arch_on_sigbus(int code, void *addr)
2470 {
2471     return 1;
2472 }
2473
2474 void kvm_arch_init_irq_routing(KVMState *s)
2475 {
2476 }
2477
2478 struct kvm_get_htab_buf {
2479     struct kvm_get_htab_header header;
2480     /*
2481      * We require one extra byte for read
2482      */
2483     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2484 };
2485
2486 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2487 {
2488     int htab_fd;
2489     struct kvm_get_htab_fd ghf;
2490     struct kvm_get_htab_buf  *hpte_buf;
2491
2492     ghf.flags = 0;
2493     ghf.start_index = pte_index;
2494     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2495     if (htab_fd < 0) {
2496         goto error_out;
2497     }
2498
2499     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2500     /*
2501      * Read the hpte group
2502      */
2503     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2504         goto out_close;
2505     }
2506
2507     close(htab_fd);
2508     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2509
2510 out_close:
2511     g_free(hpte_buf);
2512     close(htab_fd);
2513 error_out:
2514     return 0;
2515 }
2516
2517 void kvmppc_hash64_free_pteg(uint64_t token)
2518 {
2519     struct kvm_get_htab_buf *htab_buf;
2520
2521     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2522                             hpte);
2523     g_free(htab_buf);
2524     return;
2525 }
2526
2527 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2528                              target_ulong pte0, target_ulong pte1)
2529 {
2530     int htab_fd;
2531     struct kvm_get_htab_fd ghf;
2532     struct kvm_get_htab_buf hpte_buf;
2533
2534     ghf.flags = 0;
2535     ghf.start_index = 0;     /* Ignored */
2536     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2537     if (htab_fd < 0) {
2538         goto error_out;
2539     }
2540
2541     hpte_buf.header.n_valid = 1;
2542     hpte_buf.header.n_invalid = 0;
2543     hpte_buf.header.index = pte_index;
2544     hpte_buf.hpte[0] = pte0;
2545     hpte_buf.hpte[1] = pte1;
2546     /*
2547      * Write the hpte entry.
2548      * CAUTION: write() has the warn_unused_result attribute. Hence we
2549      * need to check the return value, even though we do nothing.
2550      */
2551     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2552         goto out_close;
2553     }
2554
2555 out_close:
2556     close(htab_fd);
2557     return;
2558
2559 error_out:
2560     return;
2561 }
2562
2563 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2564                              uint64_t address, uint32_t data, PCIDevice *dev)
2565 {
2566     return 0;
2567 }
2568
2569 int kvm_arch_msi_data_to_gsi(uint32_t data)
2570 {
2571     return data & 0xffff;
2572 }
2573
2574 int kvmppc_enable_hwrng(void)
2575 {
2576     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2577         return -1;
2578     }
2579
2580     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2581 }