target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41 #include "exec/gdbstub.h"
  42 #include "exec/memattrs.h"
  43 #include "sysemu/hostmem.h"
  44
  45 //#define DEBUG_KVM
  46
  47 #ifdef DEBUG_KVM
  48 #define DPRINTF(fmt, ...) \
  49     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  50 #else
  51 #define DPRINTF(fmt, ...) \
  52     do { } while (0)
  53 #endif
  54
  55 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  56
  57 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  58     KVM_CAP_LAST_INFO
  59 };
  60
  61 static int cap_interrupt_unset = false;
  62 static int cap_interrupt_level = false;
  63 static int cap_segstate;
  64 static int cap_booke_sregs;
  65 static int cap_ppc_smt;
  66 static int cap_ppc_rma;
  67 static int cap_spapr_tce;
  68 static int cap_spapr_multitce;
  69 static int cap_spapr_vfio;
  70 static int cap_hior;
  71 static int cap_one_reg;
  72 static int cap_epr;
  73 static int cap_ppc_watchdog;
  74 static int cap_papr;
  75 static int cap_htab_fd;
  76 static int cap_fixup_hcalls;
  77
  78 static uint32_t debug_inst_opcode;
  79
  80 /* XXX We have a race condition where we actually have a level triggered
  81  *     interrupt, but the infrastructure can't expose that yet, so the guest
  82  *     takes but ignores it, goes to sleep and never gets notified that there's
  83  *     still an interrupt pending.
  84  *
  85  *     As a quick workaround, let's just wake up again 20 ms after we injected
  86  *     an interrupt. That way we can assure that we're always reinjecting
  87  *     interrupts in case the guest swallowed them.
  88  */
  89 static QEMUTimer *idle_timer;
  90
  91 static void kvm_kick_cpu(void *opaque)
  92 {
  93     PowerPCCPU *cpu = opaque;
  94
  95     qemu_cpu_kick(CPU(cpu));
  96 }
  97
  98 static int kvm_ppc_register_host_cpu_type(void);
  99
 100 int kvm_arch_init(MachineState *ms, KVMState *s)
 101 {
 102     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 103     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 104     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 105     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 106     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 107     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 108     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 109     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 110     cap_spapr_vfio = false;
 111     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 112     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 113     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 114     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 115     /* Note: we don't set cap_papr here, because this capability is
 116      * only activated after this by kvmppc_set_papr() */
 117     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 118     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 119
 120     if (!cap_interrupt_level) {
 121         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 122                         "VM to stall at times!\n");
 123     }
 124
 125     kvm_ppc_register_host_cpu_type();
 126
 127     return 0;
 128 }
 129
 130 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 131 {
 132     CPUPPCState *cenv = &cpu->env;
 133     CPUState *cs = CPU(cpu);
 134     struct kvm_sregs sregs;
 135     int ret;
 136
 137     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 138         /* What we're really trying to say is "if we're on BookE, we use
 139            the native PVR for now". This is the only sane way to check
 140            it though, so we potentially confuse users that they can run
 141            BookE guests on BookS. Let's hope nobody dares enough :) */
 142         return 0;
 143     } else {
 144         if (!cap_segstate) {
 145             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 146             return -ENOSYS;
 147         }
 148     }
 149
 150     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 151     if (ret) {
 152         return ret;
 153     }
 154
 155     sregs.pvr = cenv->spr[SPR_PVR];
 156     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 157 }
 158
 159 /* Set up a shared TLB array with KVM */
 160 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 161 {
 162     CPUPPCState *env = &cpu->env;
 163     CPUState *cs = CPU(cpu);
 164     struct kvm_book3e_206_tlb_params params = {};
 165     struct kvm_config_tlb cfg = {};
 166     unsigned int entries = 0;
 167     int ret, i;
 168
 169     if (!kvm_enabled() ||
 170         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 171         return 0;
 172     }
 173
 174     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 175
 176     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 177         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 178         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 179         entries += params.tlb_sizes[i];
 180     }
 181
 182     assert(entries == env->nb_tlb);
 183     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 184
 185     env->tlb_dirty = true;
 186
 187     cfg.array = (uintptr_t)env->tlb.tlbm;
 188     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 189     cfg.params = (uintptr_t)&params;
 190     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 191
 192     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 193     if (ret < 0) {
 194         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 195                 __func__, strerror(-ret));
 196         return ret;
 197     }
 198
 199     env->kvm_sw_tlb = true;
 200     return 0;
 201 }
 202
 203
 204 #if defined(TARGET_PPC64)
 205 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 206                                        struct kvm_ppc_smmu_info *info)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210
 211     memset(info, 0, sizeof(*info));
 212
 213     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 214      * need to "guess" what the supported page sizes are.
 215      *
 216      * For that to work we make a few assumptions:
 217      *
 218      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 219      *   KVM which only supports 4K and 16M pages, but supports them
 220      *   regardless of the backing store characteritics. We also don't
 221      *   support 1T segments.
 222      *
 223      *   This is safe as if HV KVM ever supports that capability or PR
 224      *   KVM grows supports for more page/segment sizes, those versions
 225      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 226      *   will not hit this fallback
 227      *
 228      * - Else we are running HV KVM. This means we only support page
 229      *   sizes that fit in the backing store. Additionally we only
 230      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 231      *   P7 encodings for the SLB and hash table. Here too, we assume
 232      *   support for any newer processor will mean a kernel that
 233      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 234      *   this fallback.
 235      */
 236     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 237         /* No flags */
 238         info->flags = 0;
 239         info->slb_size = 64;
 240
 241         /* Standard 4k base page size segment */
 242         info->sps[0].page_shift = 12;
 243         info->sps[0].slb_enc = 0;
 244         info->sps[0].enc[0].page_shift = 12;
 245         info->sps[0].enc[0].pte_enc = 0;
 246
 247         /* Standard 16M large page size segment */
 248         info->sps[1].page_shift = 24;
 249         info->sps[1].slb_enc = SLB_VSID_L;
 250         info->sps[1].enc[0].page_shift = 24;
 251         info->sps[1].enc[0].pte_enc = 0;
 252     } else {
 253         int i = 0;
 254
 255         /* HV KVM has backing store size restrictions */
 256         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 257
 258         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 259             info->flags |= KVM_PPC_1T_SEGMENTS;
 260         }
 261
 262         if (env->mmu_model == POWERPC_MMU_2_06 ||
 263             env->mmu_model == POWERPC_MMU_2_07) {
 264             info->slb_size = 32;
 265         } else {
 266             info->slb_size = 64;
 267         }
 268
 269         /* Standard 4k base page size segment */
 270         info->sps[i].page_shift = 12;
 271         info->sps[i].slb_enc = 0;
 272         info->sps[i].enc[0].page_shift = 12;
 273         info->sps[i].enc[0].pte_enc = 0;
 274         i++;
 275
 276         /* 64K on MMU 2.06 and later */
 277         if (env->mmu_model == POWERPC_MMU_2_06 ||
 278             env->mmu_model == POWERPC_MMU_2_07) {
 279             info->sps[i].page_shift = 16;
 280             info->sps[i].slb_enc = 0x110;
 281             info->sps[i].enc[0].page_shift = 16;
 282             info->sps[i].enc[0].pte_enc = 1;
 283             i++;
 284         }
 285
 286         /* Standard 16M large page size segment */
 287         info->sps[i].page_shift = 24;
 288         info->sps[i].slb_enc = SLB_VSID_L;
 289         info->sps[i].enc[0].page_shift = 24;
 290         info->sps[i].enc[0].pte_enc = 0;
 291     }
 292 }
 293
 294 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 295 {
 296     CPUState *cs = CPU(cpu);
 297     int ret;
 298
 299     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 300         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 301         if (ret == 0) {
 302             return;
 303         }
 304     }
 305
 306     kvm_get_fallback_smmu_info(cpu, info);
 307 }
 308
 309 static long gethugepagesize(const char *mem_path)
 310 {
 311     struct statfs fs;
 312     int ret;
 313
 314     do {
 315         ret = statfs(mem_path, &fs);
 316     } while (ret != 0 && errno == EINTR);
 317
 318     if (ret != 0) {
 319         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 320                 strerror(errno));
 321         exit(1);
 322     }
 323
 324 #define HUGETLBFS_MAGIC       0x958458f6
 325
 326     if (fs.f_type != HUGETLBFS_MAGIC) {
 327         /* Explicit mempath, but it's ordinary pages */
 328         return getpagesize();
 329     }
 330
 331     /* It's hugepage, return the huge page size */
 332     return fs.f_bsize;
 333 }
 334
 335 static int find_max_supported_pagesize(Object *obj, void *opaque)
 336 {
 337     char *mem_path;
 338     long *hpsize_min = opaque;
 339
 340     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 341         mem_path = object_property_get_str(obj, "mem-path", NULL);
 342         if (mem_path) {
 343             long hpsize = gethugepagesize(mem_path);
 344             if (hpsize < *hpsize_min) {
 345                 *hpsize_min = hpsize;
 346             }
 347         } else {
 348             *hpsize_min = getpagesize();
 349         }
 350     }
 351
 352     return 0;
 353 }
 354
 355 static long getrampagesize(void)
 356 {
 357     long hpsize = LONG_MAX;
 358     Object *memdev_root;
 359
 360     if (mem_path) {
 361         return gethugepagesize(mem_path);
 362     }
 363
 364     /* it's possible we have memory-backend objects with
 365      * hugepage-backed RAM. these may get mapped into system
 366      * address space via -numa parameters or memory hotplug
 367      * hooks. we want to take these into account, but we
 368      * also want to make sure these supported hugepage
 369      * sizes are applicable across the entire range of memory
 370      * we may boot from, so we take the min across all
 371      * backends, and assume normal pages in cases where a
 372      * backend isn't backed by hugepages.
 373      */
 374     memdev_root = object_resolve_path("/objects", NULL);
 375     if (!memdev_root) {
 376         return getpagesize();
 377     }
 378
 379     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 380
 381     return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 382 }
 383
 384 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 385 {
 386     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 387         return true;
 388     }
 389
 390     return (1ul << shift) <= rampgsize;
 391 }
 392
 393 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 394 {
 395     static struct kvm_ppc_smmu_info smmu_info;
 396     static bool has_smmu_info;
 397     CPUPPCState *env = &cpu->env;
 398     long rampagesize;
 399     int iq, ik, jq, jk;
 400
 401     /* We only handle page sizes for 64-bit server guests for now */
 402     if (!(env->mmu_model & POWERPC_MMU_64)) {
 403         return;
 404     }
 405
 406     /* Collect MMU info from kernel if not already */
 407     if (!has_smmu_info) {
 408         kvm_get_smmu_info(cpu, &smmu_info);
 409         has_smmu_info = true;
 410     }
 411
 412     rampagesize = getrampagesize();
 413
 414     /* Convert to QEMU form */
 415     memset(&env->sps, 0, sizeof(env->sps));
 416
 417     /* If we have HV KVM, we need to forbid CI large pages if our
 418      * host page size is smaller than 64K.
 419      */
 420     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 421         env->ci_large_pages = getpagesize() >= 0x10000;
 422     }
 423
 424     /*
 425      * XXX This loop should be an entry wide AND of the capabilities that
 426      *     the selected CPU has with the capabilities that KVM supports.
 427      */
 428     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 429         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 430         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 431
 432         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 433                                  ksps->page_shift)) {
 434             continue;
 435         }
 436         qsps->page_shift = ksps->page_shift;
 437         qsps->slb_enc = ksps->slb_enc;
 438         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 439             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 440                                      ksps->enc[jk].page_shift)) {
 441                 continue;
 442             }
 443             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 444             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 445             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 446                 break;
 447             }
 448         }
 449         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 450             break;
 451         }
 452     }
 453     env->slb_nr = smmu_info.slb_size;
 454     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 455         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 456     }
 457 }
 458 #else /* defined (TARGET_PPC64) */
 459
 460 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 461 {
 462 }
 463
 464 #endif /* !defined (TARGET_PPC64) */
 465
 466 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 467 {
 468     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 469 }
 470
 471 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 472  * book3s supports only 1 watchpoint, so array size
 473  * of 4 is sufficient for now.
 474  */
 475 #define MAX_HW_BKPTS 4
 476
 477 static struct HWBreakpoint {
 478     target_ulong addr;
 479     int type;
 480 } hw_debug_points[MAX_HW_BKPTS];
 481
 482 static CPUWatchpoint hw_watchpoint;
 483
 484 /* Default there is no breakpoint and watchpoint supported */
 485 static int max_hw_breakpoint;
 486 static int max_hw_watchpoint;
 487 static int nb_hw_breakpoint;
 488 static int nb_hw_watchpoint;
 489
 490 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 491 {
 492     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 493         max_hw_breakpoint = 2;
 494         max_hw_watchpoint = 2;
 495     }
 496
 497     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 498         fprintf(stderr, "Error initializing h/w breakpoints\n");
 499         return;
 500     }
 501 }
 502
 503 int kvm_arch_init_vcpu(CPUState *cs)
 504 {
 505     PowerPCCPU *cpu = POWERPC_CPU(cs);
 506     CPUPPCState *cenv = &cpu->env;
 507     int ret;
 508
 509     /* Gather server mmu info from KVM and update the CPU state */
 510     kvm_fixup_page_sizes(cpu);
 511
 512     /* Synchronize sregs with kvm */
 513     ret = kvm_arch_sync_sregs(cpu);
 514     if (ret) {
 515         return ret;
 516     }
 517
 518     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 519
 520     /* Some targets support access to KVM's guest TLB. */
 521     switch (cenv->mmu_model) {
 522     case POWERPC_MMU_BOOKE206:
 523         ret = kvm_booke206_tlb_init(cpu);
 524         break;
 525     default:
 526         break;
 527     }
 528
 529     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 530     kvmppc_hw_debug_points_init(cenv);
 531
 532     return ret;
 533 }
 534
 535 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 536 {
 537     CPUPPCState *env = &cpu->env;
 538     CPUState *cs = CPU(cpu);
 539     struct kvm_dirty_tlb dirty_tlb;
 540     unsigned char *bitmap;
 541     int ret;
 542
 543     if (!env->kvm_sw_tlb) {
 544         return;
 545     }
 546
 547     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 548     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 549
 550     dirty_tlb.bitmap = (uintptr_t)bitmap;
 551     dirty_tlb.num_dirty = env->nb_tlb;
 552
 553     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 554     if (ret) {
 555         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 556                 __func__, strerror(-ret));
 557     }
 558
 559     g_free(bitmap);
 560 }
 561
 562 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 563 {
 564     PowerPCCPU *cpu = POWERPC_CPU(cs);
 565     CPUPPCState *env = &cpu->env;
 566     union {
 567         uint32_t u32;
 568         uint64_t u64;
 569     } val;
 570     struct kvm_one_reg reg = {
 571         .id = id,
 572         .addr = (uintptr_t) &val,
 573     };
 574     int ret;
 575
 576     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 577     if (ret != 0) {
 578         trace_kvm_failed_spr_get(spr, strerror(errno));
 579     } else {
 580         switch (id & KVM_REG_SIZE_MASK) {
 581         case KVM_REG_SIZE_U32:
 582             env->spr[spr] = val.u32;
 583             break;
 584
 585         case KVM_REG_SIZE_U64:
 586             env->spr[spr] = val.u64;
 587             break;
 588
 589         default:
 590             /* Don't handle this size yet */
 591             abort();
 592         }
 593     }
 594 }
 595
 596 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 597 {
 598     PowerPCCPU *cpu = POWERPC_CPU(cs);
 599     CPUPPCState *env = &cpu->env;
 600     union {
 601         uint32_t u32;
 602         uint64_t u64;
 603     } val;
 604     struct kvm_one_reg reg = {
 605         .id = id,
 606         .addr = (uintptr_t) &val,
 607     };
 608     int ret;
 609
 610     switch (id & KVM_REG_SIZE_MASK) {
 611     case KVM_REG_SIZE_U32:
 612         val.u32 = env->spr[spr];
 613         break;
 614
 615     case KVM_REG_SIZE_U64:
 616         val.u64 = env->spr[spr];
 617         break;
 618
 619     default:
 620         /* Don't handle this size yet */
 621         abort();
 622     }
 623
 624     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 625     if (ret != 0) {
 626         trace_kvm_failed_spr_set(spr, strerror(errno));
 627     }
 628 }
 629
 630 static int kvm_put_fp(CPUState *cs)
 631 {
 632     PowerPCCPU *cpu = POWERPC_CPU(cs);
 633     CPUPPCState *env = &cpu->env;
 634     struct kvm_one_reg reg;
 635     int i;
 636     int ret;
 637
 638     if (env->insns_flags & PPC_FLOAT) {
 639         uint64_t fpscr = env->fpscr;
 640         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 641
 642         reg.id = KVM_REG_PPC_FPSCR;
 643         reg.addr = (uintptr_t)&fpscr;
 644         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 645         if (ret < 0) {
 646             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 647             return ret;
 648         }
 649
 650         for (i = 0; i < 32; i++) {
 651             uint64_t vsr[2];
 652
 653             vsr[0] = float64_val(env->fpr[i]);
 654             vsr[1] = env->vsr[i];
 655             reg.addr = (uintptr_t) &vsr;
 656             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 657
 658             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 659             if (ret < 0) {
 660                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 661                         i, strerror(errno));
 662                 return ret;
 663             }
 664         }
 665     }
 666
 667     if (env->insns_flags & PPC_ALTIVEC) {
 668         reg.id = KVM_REG_PPC_VSCR;
 669         reg.addr = (uintptr_t)&env->vscr;
 670         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 671         if (ret < 0) {
 672             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 673             return ret;
 674         }
 675
 676         for (i = 0; i < 32; i++) {
 677             reg.id = KVM_REG_PPC_VR(i);
 678             reg.addr = (uintptr_t)&env->avr[i];
 679             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 680             if (ret < 0) {
 681                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 682                 return ret;
 683             }
 684         }
 685     }
 686
 687     return 0;
 688 }
 689
 690 static int kvm_get_fp(CPUState *cs)
 691 {
 692     PowerPCCPU *cpu = POWERPC_CPU(cs);
 693     CPUPPCState *env = &cpu->env;
 694     struct kvm_one_reg reg;
 695     int i;
 696     int ret;
 697
 698     if (env->insns_flags & PPC_FLOAT) {
 699         uint64_t fpscr;
 700         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 701
 702         reg.id = KVM_REG_PPC_FPSCR;
 703         reg.addr = (uintptr_t)&fpscr;
 704         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 705         if (ret < 0) {
 706             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 707             return ret;
 708         } else {
 709             env->fpscr = fpscr;
 710         }
 711
 712         for (i = 0; i < 32; i++) {
 713             uint64_t vsr[2];
 714
 715             reg.addr = (uintptr_t) &vsr;
 716             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 717
 718             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 719             if (ret < 0) {
 720                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 721                         vsx ? "VSR" : "FPR", i, strerror(errno));
 722                 return ret;
 723             } else {
 724                 env->fpr[i] = vsr[0];
 725                 if (vsx) {
 726                     env->vsr[i] = vsr[1];
 727                 }
 728             }
 729         }
 730     }
 731
 732     if (env->insns_flags & PPC_ALTIVEC) {
 733         reg.id = KVM_REG_PPC_VSCR;
 734         reg.addr = (uintptr_t)&env->vscr;
 735         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 736         if (ret < 0) {
 737             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 738             return ret;
 739         }
 740
 741         for (i = 0; i < 32; i++) {
 742             reg.id = KVM_REG_PPC_VR(i);
 743             reg.addr = (uintptr_t)&env->avr[i];
 744             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 745             if (ret < 0) {
 746                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 747                         i, strerror(errno));
 748                 return ret;
 749             }
 750         }
 751     }
 752
 753     return 0;
 754 }
 755
 756 #if defined(TARGET_PPC64)
 757 static int kvm_get_vpa(CPUState *cs)
 758 {
 759     PowerPCCPU *cpu = POWERPC_CPU(cs);
 760     CPUPPCState *env = &cpu->env;
 761     struct kvm_one_reg reg;
 762     int ret;
 763
 764     reg.id = KVM_REG_PPC_VPA_ADDR;
 765     reg.addr = (uintptr_t)&env->vpa_addr;
 766     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 767     if (ret < 0) {
 768         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 769         return ret;
 770     }
 771
 772     assert((uintptr_t)&env->slb_shadow_size
 773            == ((uintptr_t)&env->slb_shadow_addr + 8));
 774     reg.id = KVM_REG_PPC_VPA_SLB;
 775     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 776     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 777     if (ret < 0) {
 778         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 779                 strerror(errno));
 780         return ret;
 781     }
 782
 783     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 784     reg.id = KVM_REG_PPC_VPA_DTL;
 785     reg.addr = (uintptr_t)&env->dtl_addr;
 786     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 787     if (ret < 0) {
 788         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 789                 strerror(errno));
 790         return ret;
 791     }
 792
 793     return 0;
 794 }
 795
 796 static int kvm_put_vpa(CPUState *cs)
 797 {
 798     PowerPCCPU *cpu = POWERPC_CPU(cs);
 799     CPUPPCState *env = &cpu->env;
 800     struct kvm_one_reg reg;
 801     int ret;
 802
 803     /* SLB shadow or DTL can't be registered unless a master VPA is
 804      * registered.  That means when restoring state, if a VPA *is*
 805      * registered, we need to set that up first.  If not, we need to
 806      * deregister the others before deregistering the master VPA */
 807     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 808
 809     if (env->vpa_addr) {
 810         reg.id = KVM_REG_PPC_VPA_ADDR;
 811         reg.addr = (uintptr_t)&env->vpa_addr;
 812         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 813         if (ret < 0) {
 814             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 815             return ret;
 816         }
 817     }
 818
 819     assert((uintptr_t)&env->slb_shadow_size
 820            == ((uintptr_t)&env->slb_shadow_addr + 8));
 821     reg.id = KVM_REG_PPC_VPA_SLB;
 822     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 823     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 824     if (ret < 0) {
 825         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 826         return ret;
 827     }
 828
 829     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 830     reg.id = KVM_REG_PPC_VPA_DTL;
 831     reg.addr = (uintptr_t)&env->dtl_addr;
 832     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 833     if (ret < 0) {
 834         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 835                 strerror(errno));
 836         return ret;
 837     }
 838
 839     if (!env->vpa_addr) {
 840         reg.id = KVM_REG_PPC_VPA_ADDR;
 841         reg.addr = (uintptr_t)&env->vpa_addr;
 842         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 843         if (ret < 0) {
 844             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 845             return ret;
 846         }
 847     }
 848
 849     return 0;
 850 }
 851 #endif /* TARGET_PPC64 */
 852
 853 int kvm_arch_put_registers(CPUState *cs, int level)
 854 {
 855     PowerPCCPU *cpu = POWERPC_CPU(cs);
 856     CPUPPCState *env = &cpu->env;
 857     struct kvm_regs regs;
 858     int ret;
 859     int i;
 860
 861     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 862     if (ret < 0) {
 863         return ret;
 864     }
 865
 866     regs.ctr = env->ctr;
 867     regs.lr  = env->lr;
 868     regs.xer = cpu_read_xer(env);
 869     regs.msr = env->msr;
 870     regs.pc = env->nip;
 871
 872     regs.srr0 = env->spr[SPR_SRR0];
 873     regs.srr1 = env->spr[SPR_SRR1];
 874
 875     regs.sprg0 = env->spr[SPR_SPRG0];
 876     regs.sprg1 = env->spr[SPR_SPRG1];
 877     regs.sprg2 = env->spr[SPR_SPRG2];
 878     regs.sprg3 = env->spr[SPR_SPRG3];
 879     regs.sprg4 = env->spr[SPR_SPRG4];
 880     regs.sprg5 = env->spr[SPR_SPRG5];
 881     regs.sprg6 = env->spr[SPR_SPRG6];
 882     regs.sprg7 = env->spr[SPR_SPRG7];
 883
 884     regs.pid = env->spr[SPR_BOOKE_PID];
 885
 886     for (i = 0;i < 32; i++)
 887         regs.gpr[i] = env->gpr[i];
 888
 889     regs.cr = 0;
 890     for (i = 0; i < 8; i++) {
 891         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 892     }
 893
 894     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 895     if (ret < 0)
 896         return ret;
 897
 898     kvm_put_fp(cs);
 899
 900     if (env->tlb_dirty) {
 901         kvm_sw_tlb_put(cpu);
 902         env->tlb_dirty = false;
 903     }
 904
 905     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 906         struct kvm_sregs sregs;
 907
 908         sregs.pvr = env->spr[SPR_PVR];
 909
 910         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 911
 912         /* Sync SLB */
 913 #ifdef TARGET_PPC64
 914         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 915             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 916             if (env->slb[i].esid & SLB_ESID_V) {
 917                 sregs.u.s.ppc64.slb[i].slbe |= i;
 918             }
 919             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 920         }
 921 #endif
 922
 923         /* Sync SRs */
 924         for (i = 0; i < 16; i++) {
 925             sregs.u.s.ppc32.sr[i] = env->sr[i];
 926         }
 927
 928         /* Sync BATs */
 929         for (i = 0; i < 8; i++) {
 930             /* Beware. We have to swap upper and lower bits here */
 931             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 932                 | env->DBAT[1][i];
 933             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 934                 | env->IBAT[1][i];
 935         }
 936
 937         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 938         if (ret) {
 939             return ret;
 940         }
 941     }
 942
 943     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 944         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 945     }
 946
 947     if (cap_one_reg) {
 948         int i;
 949
 950         /* We deliberately ignore errors here, for kernels which have
 951          * the ONE_REG calls, but don't support the specific
 952          * registers, there's a reasonable chance things will still
 953          * work, at least until we try to migrate. */
 954         for (i = 0; i < 1024; i++) {
 955             uint64_t id = env->spr_cb[i].one_reg_id;
 956
 957             if (id != 0) {
 958                 kvm_put_one_spr(cs, id, i);
 959             }
 960         }
 961
 962 #ifdef TARGET_PPC64
 963         if (msr_ts) {
 964             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 965                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 966             }
 967             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 968                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 969             }
 970             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 971             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 972             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 973             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 974             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 975             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 976             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 977             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 978             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 979             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 980         }
 981
 982         if (cap_papr) {
 983             if (kvm_put_vpa(cs) < 0) {
 984                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 985             }
 986         }
 987
 988         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 989 #endif /* TARGET_PPC64 */
 990     }
 991
 992     return ret;
 993 }
 994
 995 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
 996 {
 997      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
 998 }
 999
1000 int kvm_arch_get_registers(CPUState *cs)
1001 {
1002     PowerPCCPU *cpu = POWERPC_CPU(cs);
1003     CPUPPCState *env = &cpu->env;
1004     struct kvm_regs regs;
1005     struct kvm_sregs sregs;
1006     uint32_t cr;
1007     int i, ret;
1008
1009     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1010     if (ret < 0)
1011         return ret;
1012
1013     cr = regs.cr;
1014     for (i = 7; i >= 0; i--) {
1015         env->crf[i] = cr & 15;
1016         cr >>= 4;
1017     }
1018
1019     env->ctr = regs.ctr;
1020     env->lr = regs.lr;
1021     cpu_write_xer(env, regs.xer);
1022     env->msr = regs.msr;
1023     env->nip = regs.pc;
1024
1025     env->spr[SPR_SRR0] = regs.srr0;
1026     env->spr[SPR_SRR1] = regs.srr1;
1027
1028     env->spr[SPR_SPRG0] = regs.sprg0;
1029     env->spr[SPR_SPRG1] = regs.sprg1;
1030     env->spr[SPR_SPRG2] = regs.sprg2;
1031     env->spr[SPR_SPRG3] = regs.sprg3;
1032     env->spr[SPR_SPRG4] = regs.sprg4;
1033     env->spr[SPR_SPRG5] = regs.sprg5;
1034     env->spr[SPR_SPRG6] = regs.sprg6;
1035     env->spr[SPR_SPRG7] = regs.sprg7;
1036
1037     env->spr[SPR_BOOKE_PID] = regs.pid;
1038
1039     for (i = 0;i < 32; i++)
1040         env->gpr[i] = regs.gpr[i];
1041
1042     kvm_get_fp(cs);
1043
1044     if (cap_booke_sregs) {
1045         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1046         if (ret < 0) {
1047             return ret;
1048         }
1049
1050         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1051             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1052             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1053             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1054             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1055             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1056             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1057             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1058             env->spr[SPR_DECR] = sregs.u.e.dec;
1059             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1060             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1061             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1062         }
1063
1064         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1065             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1066             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1067             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1068             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1069             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1070         }
1071
1072         if (sregs.u.e.features & KVM_SREGS_E_64) {
1073             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1074         }
1075
1076         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1077             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1078         }
1079
1080         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1081             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1082             kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1083             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1084             kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1085             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1086             kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1087             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1088             kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1089             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1090             kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1091             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1092             kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1093             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1094             kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1095             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1096             kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1097             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1098             kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1099             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1100             kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1101             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1102             kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1103             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1104             kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1105             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1106             kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1107             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1108             kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1109             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1110             kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1111             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1112             kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1113
1114             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1115                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1116                 kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1117                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1118                 kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1119                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1120                 kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1121             }
1122
1123             if (sregs.u.e.features & KVM_SREGS_E_PM) {
1124                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1125                 kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1126             }
1127
1128             if (sregs.u.e.features & KVM_SREGS_E_PC) {
1129                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1130                 kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1131                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1132                 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1133             }
1134         }
1135
1136         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1137             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1138             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1139             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1140             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1141             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1142             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1143             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1144             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1145             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1146             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1147         }
1148
1149         if (sregs.u.e.features & KVM_SREGS_EXP) {
1150             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1151         }
1152
1153         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1154             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1155             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1156         }
1157
1158         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1159             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1160             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1161             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1162
1163             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1164                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1165                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1166             }
1167         }
1168     }
1169
1170     if (cap_segstate) {
1171         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1172         if (ret < 0) {
1173             return ret;
1174         }
1175
1176         if (!env->external_htab) {
1177             ppc_store_sdr1(env, sregs.u.s.sdr1);
1178         }
1179
1180         /* Sync SLB */
1181 #ifdef TARGET_PPC64
1182         /*
1183          * The packed SLB array we get from KVM_GET_SREGS only contains
1184          * information about valid entries. So we flush our internal
1185          * copy to get rid of stale ones, then put all valid SLB entries
1186          * back in.
1187          */
1188         memset(env->slb, 0, sizeof(env->slb));
1189         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1190             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1191             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1192             /*
1193              * Only restore valid entries
1194              */
1195             if (rb & SLB_ESID_V) {
1196                 ppc_store_slb(env, rb, rs);
1197             }
1198         }
1199 #endif
1200
1201         /* Sync SRs */
1202         for (i = 0; i < 16; i++) {
1203             env->sr[i] = sregs.u.s.ppc32.sr[i];
1204         }
1205
1206         /* Sync BATs */
1207         for (i = 0; i < 8; i++) {
1208             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1209             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1210             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1211             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1212         }
1213     }
1214
1215     if (cap_hior) {
1216         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1217     }
1218
1219     if (cap_one_reg) {
1220         int i;
1221
1222         /* We deliberately ignore errors here, for kernels which have
1223          * the ONE_REG calls, but don't support the specific
1224          * registers, there's a reasonable chance things will still
1225          * work, at least until we try to migrate. */
1226         for (i = 0; i < 1024; i++) {
1227             uint64_t id = env->spr_cb[i].one_reg_id;
1228
1229             if (id != 0) {
1230                 kvm_get_one_spr(cs, id, i);
1231             }
1232         }
1233
1234 #ifdef TARGET_PPC64
1235         if (msr_ts) {
1236             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1237                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1238             }
1239             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1240                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1241             }
1242             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1243             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1244             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1245             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1246             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1247             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1248             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1249             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1250             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1251             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1252         }
1253
1254         if (cap_papr) {
1255             if (kvm_get_vpa(cs) < 0) {
1256                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1257             }
1258         }
1259
1260         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1261 #endif
1262     }
1263
1264     return 0;
1265 }
1266
1267 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1268 {
1269     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1270
1271     if (irq != PPC_INTERRUPT_EXT) {
1272         return 0;
1273     }
1274
1275     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1276         return 0;
1277     }
1278
1279     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1280
1281     return 0;
1282 }
1283
1284 #if defined(TARGET_PPCEMB)
1285 #define PPC_INPUT_INT PPC40x_INPUT_INT
1286 #elif defined(TARGET_PPC64)
1287 #define PPC_INPUT_INT PPC970_INPUT_INT
1288 #else
1289 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1290 #endif
1291
1292 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1293 {
1294     PowerPCCPU *cpu = POWERPC_CPU(cs);
1295     CPUPPCState *env = &cpu->env;
1296     int r;
1297     unsigned irq;
1298
1299     qemu_mutex_lock_iothread();
1300
1301     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1302      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1303     if (!cap_interrupt_level &&
1304         run->ready_for_interrupt_injection &&
1305         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1306         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1307     {
1308         /* For now KVM disregards the 'irq' argument. However, in the
1309          * future KVM could cache it in-kernel to avoid a heavyweight exit
1310          * when reading the UIC.
1311          */
1312         irq = KVM_INTERRUPT_SET;
1313
1314         DPRINTF("injected interrupt %d\n", irq);
1315         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1316         if (r < 0) {
1317             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1318         }
1319
1320         /* Always wake up soon in case the interrupt was level based */
1321         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1322                        (get_ticks_per_sec() / 50));
1323     }
1324
1325     /* We don't know if there are more interrupts pending after this. However,
1326      * the guest will return to userspace in the course of handling this one
1327      * anyways, so we will get a chance to deliver the rest. */
1328
1329     qemu_mutex_unlock_iothread();
1330 }
1331
1332 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1333 {
1334     return MEMTXATTRS_UNSPECIFIED;
1335 }
1336
1337 int kvm_arch_process_async_events(CPUState *cs)
1338 {
1339     return cs->halted;
1340 }
1341
1342 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1343 {
1344     CPUState *cs = CPU(cpu);
1345     CPUPPCState *env = &cpu->env;
1346
1347     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1348         cs->halted = 1;
1349         cs->exception_index = EXCP_HLT;
1350     }
1351
1352     return 0;
1353 }
1354
1355 /* map dcr access to existing qemu dcr emulation */
1356 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1357 {
1358     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1359         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1360
1361     return 0;
1362 }
1363
1364 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1365 {
1366     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1367         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1368
1369     return 0;
1370 }
1371
1372 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1373 {
1374     /* Mixed endian case is not handled */
1375     uint32_t sc = debug_inst_opcode;
1376
1377     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1378                             sizeof(sc), 0) ||
1379         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1380         return -EINVAL;
1381     }
1382
1383     return 0;
1384 }
1385
1386 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1387 {
1388     uint32_t sc;
1389
1390     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1391         sc != debug_inst_opcode ||
1392         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1393                             sizeof(sc), 1)) {
1394         return -EINVAL;
1395     }
1396
1397     return 0;
1398 }
1399
1400 static int find_hw_breakpoint(target_ulong addr, int type)
1401 {
1402     int n;
1403
1404     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1405            <= ARRAY_SIZE(hw_debug_points));
1406
1407     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1408         if (hw_debug_points[n].addr == addr &&
1409              hw_debug_points[n].type == type) {
1410             return n;
1411         }
1412     }
1413
1414     return -1;
1415 }
1416
1417 static int find_hw_watchpoint(target_ulong addr, int *flag)
1418 {
1419     int n;
1420
1421     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1422     if (n >= 0) {
1423         *flag = BP_MEM_ACCESS;
1424         return n;
1425     }
1426
1427     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1428     if (n >= 0) {
1429         *flag = BP_MEM_WRITE;
1430         return n;
1431     }
1432
1433     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1434     if (n >= 0) {
1435         *flag = BP_MEM_READ;
1436         return n;
1437     }
1438
1439     return -1;
1440 }
1441
1442 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1443                                   target_ulong len, int type)
1444 {
1445     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1446         return -ENOBUFS;
1447     }
1448
1449     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1450     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1451
1452     switch (type) {
1453     case GDB_BREAKPOINT_HW:
1454         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1455             return -ENOBUFS;
1456         }
1457
1458         if (find_hw_breakpoint(addr, type) >= 0) {
1459             return -EEXIST;
1460         }
1461
1462         nb_hw_breakpoint++;
1463         break;
1464
1465     case GDB_WATCHPOINT_WRITE:
1466     case GDB_WATCHPOINT_READ:
1467     case GDB_WATCHPOINT_ACCESS:
1468         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1469             return -ENOBUFS;
1470         }
1471
1472         if (find_hw_breakpoint(addr, type) >= 0) {
1473             return -EEXIST;
1474         }
1475
1476         nb_hw_watchpoint++;
1477         break;
1478
1479     default:
1480         return -ENOSYS;
1481     }
1482
1483     return 0;
1484 }
1485
1486 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1487                                   target_ulong len, int type)
1488 {
1489     int n;
1490
1491     n = find_hw_breakpoint(addr, type);
1492     if (n < 0) {
1493         return -ENOENT;
1494     }
1495
1496     switch (type) {
1497     case GDB_BREAKPOINT_HW:
1498         nb_hw_breakpoint--;
1499         break;
1500
1501     case GDB_WATCHPOINT_WRITE:
1502     case GDB_WATCHPOINT_READ:
1503     case GDB_WATCHPOINT_ACCESS:
1504         nb_hw_watchpoint--;
1505         break;
1506
1507     default:
1508         return -ENOSYS;
1509     }
1510     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1511
1512     return 0;
1513 }
1514
1515 void kvm_arch_remove_all_hw_breakpoints(void)
1516 {
1517     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1518 }
1519
1520 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1521 {
1522     int n;
1523
1524     /* Software Breakpoint updates */
1525     if (kvm_sw_breakpoints_active(cs)) {
1526         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1527     }
1528
1529     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1530            <= ARRAY_SIZE(hw_debug_points));
1531     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1532
1533     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1534         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1535         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1536         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1537             switch (hw_debug_points[n].type) {
1538             case GDB_BREAKPOINT_HW:
1539                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1540                 break;
1541             case GDB_WATCHPOINT_WRITE:
1542                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1543                 break;
1544             case GDB_WATCHPOINT_READ:
1545                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1546                 break;
1547             case GDB_WATCHPOINT_ACCESS:
1548                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1549                                         KVMPPC_DEBUG_WATCH_READ;
1550                 break;
1551             default:
1552                 cpu_abort(cs, "Unsupported breakpoint type\n");
1553             }
1554             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1555         }
1556     }
1557 }
1558
1559 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1560 {
1561     CPUState *cs = CPU(cpu);
1562     CPUPPCState *env = &cpu->env;
1563     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1564     int handle = 0;
1565     int n;
1566     int flag = 0;
1567
1568     if (cs->singlestep_enabled) {
1569         handle = 1;
1570     } else if (arch_info->status) {
1571         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1572             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1573                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1574                 if (n >= 0) {
1575                     handle = 1;
1576                 }
1577             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1578                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1579                 n = find_hw_watchpoint(arch_info->address,  &flag);
1580                 if (n >= 0) {
1581                     handle = 1;
1582                     cs->watchpoint_hit = &hw_watchpoint;
1583                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1584                     hw_watchpoint.flags = flag;
1585                 }
1586             }
1587         }
1588     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1589         handle = 1;
1590     } else {
1591         /* QEMU is not able to handle debug exception, so inject
1592          * program exception to guest;
1593          * Yes program exception NOT debug exception !!
1594          * When QEMU is using debug resources then debug exception must
1595          * be always set. To achieve this we set MSR_DE and also set
1596          * MSRP_DEP so guest cannot change MSR_DE.
1597          * When emulating debug resource for guest we want guest
1598          * to control MSR_DE (enable/disable debug interrupt on need).
1599          * Supporting both configurations are NOT possible.
1600          * So the result is that we cannot share debug resources
1601          * between QEMU and Guest on BOOKE architecture.
1602          * In the current design QEMU gets the priority over guest,
1603          * this means that if QEMU is using debug resources then guest
1604          * cannot use them;
1605          * For software breakpoint QEMU uses a privileged instruction;
1606          * So there cannot be any reason that we are here for guest
1607          * set debug exception, only possibility is guest executed a
1608          * privileged / illegal instruction and that's why we are
1609          * injecting a program interrupt.
1610          */
1611
1612         cpu_synchronize_state(cs);
1613         /* env->nip is PC, so increment this by 4 to use
1614          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1615          */
1616         env->nip += 4;
1617         cs->exception_index = POWERPC_EXCP_PROGRAM;
1618         env->error_code = POWERPC_EXCP_INVAL;
1619         ppc_cpu_do_interrupt(cs);
1620     }
1621
1622     return handle;
1623 }
1624
1625 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1626 {
1627     PowerPCCPU *cpu = POWERPC_CPU(cs);
1628     CPUPPCState *env = &cpu->env;
1629     int ret;
1630
1631     qemu_mutex_lock_iothread();
1632
1633     switch (run->exit_reason) {
1634     case KVM_EXIT_DCR:
1635         if (run->dcr.is_write) {
1636             DPRINTF("handle dcr write\n");
1637             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1638         } else {
1639             DPRINTF("handle dcr read\n");
1640             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1641         }
1642         break;
1643     case KVM_EXIT_HLT:
1644         DPRINTF("handle halt\n");
1645         ret = kvmppc_handle_halt(cpu);
1646         break;
1647 #if defined(TARGET_PPC64)
1648     case KVM_EXIT_PAPR_HCALL:
1649         DPRINTF("handle PAPR hypercall\n");
1650         run->papr_hcall.ret = spapr_hypercall(cpu,
1651                                               run->papr_hcall.nr,
1652                                               run->papr_hcall.args);
1653         ret = 0;
1654         break;
1655 #endif
1656     case KVM_EXIT_EPR:
1657         DPRINTF("handle epr\n");
1658         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1659         ret = 0;
1660         break;
1661     case KVM_EXIT_WATCHDOG:
1662         DPRINTF("handle watchdog expiry\n");
1663         watchdog_perform_action();
1664         ret = 0;
1665         break;
1666
1667     case KVM_EXIT_DEBUG:
1668         DPRINTF("handle debug exception\n");
1669         if (kvm_handle_debug(cpu, run)) {
1670             ret = EXCP_DEBUG;
1671             break;
1672         }
1673         /* re-enter, this exception was guest-internal */
1674         ret = 0;
1675         break;
1676
1677     default:
1678         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1679         ret = -1;
1680         break;
1681     }
1682
1683     qemu_mutex_unlock_iothread();
1684     return ret;
1685 }
1686
1687 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1688 {
1689     CPUState *cs = CPU(cpu);
1690     uint32_t bits = tsr_bits;
1691     struct kvm_one_reg reg = {
1692         .id = KVM_REG_PPC_OR_TSR,
1693         .addr = (uintptr_t) &bits,
1694     };
1695
1696     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1697 }
1698
1699 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1700 {
1701
1702     CPUState *cs = CPU(cpu);
1703     uint32_t bits = tsr_bits;
1704     struct kvm_one_reg reg = {
1705         .id = KVM_REG_PPC_CLEAR_TSR,
1706         .addr = (uintptr_t) &bits,
1707     };
1708
1709     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1710 }
1711
1712 int kvmppc_set_tcr(PowerPCCPU *cpu)
1713 {
1714     CPUState *cs = CPU(cpu);
1715     CPUPPCState *env = &cpu->env;
1716     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1717
1718     struct kvm_one_reg reg = {
1719         .id = KVM_REG_PPC_TCR,
1720         .addr = (uintptr_t) &tcr,
1721     };
1722
1723     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1724 }
1725
1726 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1727 {
1728     CPUState *cs = CPU(cpu);
1729     int ret;
1730
1731     if (!kvm_enabled()) {
1732         return -1;
1733     }
1734
1735     if (!cap_ppc_watchdog) {
1736         printf("warning: KVM does not support watchdog");
1737         return -1;
1738     }
1739
1740     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1741     if (ret < 0) {
1742         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1743                 __func__, strerror(-ret));
1744         return ret;
1745     }
1746
1747     return ret;
1748 }
1749
1750 static int read_cpuinfo(const char *field, char *value, int len)
1751 {
1752     FILE *f;
1753     int ret = -1;
1754     int field_len = strlen(field);
1755     char line[512];
1756
1757     f = fopen("/proc/cpuinfo", "r");
1758     if (!f) {
1759         return -1;
1760     }
1761
1762     do {
1763         if (!fgets(line, sizeof(line), f)) {
1764             break;
1765         }
1766         if (!strncmp(line, field, field_len)) {
1767             pstrcpy(value, len, line);
1768             ret = 0;
1769             break;
1770         }
1771     } while(*line);
1772
1773     fclose(f);
1774
1775     return ret;
1776 }
1777
1778 uint32_t kvmppc_get_tbfreq(void)
1779 {
1780     char line[512];
1781     char *ns;
1782     uint32_t retval = get_ticks_per_sec();
1783
1784     if (read_cpuinfo("timebase", line, sizeof(line))) {
1785         return retval;
1786     }
1787
1788     if (!(ns = strchr(line, ':'))) {
1789         return retval;
1790     }
1791
1792     ns++;
1793
1794     return atoi(ns);
1795 }
1796
1797 bool kvmppc_get_host_serial(char **value)
1798 {
1799     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1800                                NULL);
1801 }
1802
1803 bool kvmppc_get_host_model(char **value)
1804 {
1805     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1806 }
1807
1808 /* Try to find a device tree node for a CPU with clock-frequency property */
1809 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1810 {
1811     struct dirent *dirp;
1812     DIR *dp;
1813
1814     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1815         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1816         return -1;
1817     }
1818
1819     buf[0] = '\0';
1820     while ((dirp = readdir(dp)) != NULL) {
1821         FILE *f;
1822         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1823                  dirp->d_name);
1824         f = fopen(buf, "r");
1825         if (f) {
1826             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1827             fclose(f);
1828             break;
1829         }
1830         buf[0] = '\0';
1831     }
1832     closedir(dp);
1833     if (buf[0] == '\0') {
1834         printf("Unknown host!\n");
1835         return -1;
1836     }
1837
1838     return 0;
1839 }
1840
1841 /* Read a CPU node property from the host device tree that's a single
1842  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1843  * (can't find or open the property, or doesn't understand the
1844  * format) */
1845 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1846 {
1847     char buf[PATH_MAX], *tmp;
1848     union {
1849         uint32_t v32;
1850         uint64_t v64;
1851     } u;
1852     FILE *f;
1853     int len;
1854
1855     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1856         return -1;
1857     }
1858
1859     tmp = g_strdup_printf("%s/%s", buf, propname);
1860
1861     f = fopen(tmp, "rb");
1862     g_free(tmp);
1863     if (!f) {
1864         return -1;
1865     }
1866
1867     len = fread(&u, 1, sizeof(u), f);
1868     fclose(f);
1869     switch (len) {
1870     case 4:
1871         /* property is a 32-bit quantity */
1872         return be32_to_cpu(u.v32);
1873     case 8:
1874         return be64_to_cpu(u.v64);
1875     }
1876
1877     return 0;
1878 }
1879
1880 uint64_t kvmppc_get_clockfreq(void)
1881 {
1882     return kvmppc_read_int_cpu_dt("clock-frequency");
1883 }
1884
1885 uint32_t kvmppc_get_vmx(void)
1886 {
1887     return kvmppc_read_int_cpu_dt("ibm,vmx");
1888 }
1889
1890 uint32_t kvmppc_get_dfp(void)
1891 {
1892     return kvmppc_read_int_cpu_dt("ibm,dfp");
1893 }
1894
1895 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1896  {
1897      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1898      CPUState *cs = CPU(cpu);
1899
1900     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1901         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1902         return 0;
1903     }
1904
1905     return 1;
1906 }
1907
1908 int kvmppc_get_hasidle(CPUPPCState *env)
1909 {
1910     struct kvm_ppc_pvinfo pvinfo;
1911
1912     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1913         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1914         return 1;
1915     }
1916
1917     return 0;
1918 }
1919
1920 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1921 {
1922     uint32_t *hc = (uint32_t*)buf;
1923     struct kvm_ppc_pvinfo pvinfo;
1924
1925     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1926         memcpy(buf, pvinfo.hcall, buf_len);
1927         return 0;
1928     }
1929
1930     /*
1931      * Fallback to always fail hypercalls regardless of endianness:
1932      *
1933      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1934      *     li r3, -1
1935      *     b .+8       (becomes nop in wrong endian)
1936      *     bswap32(li r3, -1)
1937      */
1938
1939     hc[0] = cpu_to_be32(0x08000048);
1940     hc[1] = cpu_to_be32(0x3860ffff);
1941     hc[2] = cpu_to_be32(0x48000008);
1942     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1943
1944     return 0;
1945 }
1946
1947 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1948 {
1949     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1950 }
1951
1952 void kvmppc_enable_logical_ci_hcalls(void)
1953 {
1954     /*
1955      * FIXME: it would be nice if we could detect the cases where
1956      * we're using a device which requires the in kernel
1957      * implementation of these hcalls, but the kernel lacks them and
1958      * produce a warning.
1959      */
1960     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1961     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1962 }
1963
1964 void kvmppc_enable_set_mode_hcall(void)
1965 {
1966     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1967 }
1968
1969 void kvmppc_set_papr(PowerPCCPU *cpu)
1970 {
1971     CPUState *cs = CPU(cpu);
1972     int ret;
1973
1974     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1975     if (ret) {
1976         cpu_abort(cs, "This KVM version does not support PAPR\n");
1977     }
1978
1979     /* Update the capability flag so we sync the right information
1980      * with kvm */
1981     cap_papr = 1;
1982 }
1983
1984 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1985 {
1986     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1987 }
1988
1989 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1990 {
1991     CPUState *cs = CPU(cpu);
1992     int ret;
1993
1994     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1995     if (ret && mpic_proxy) {
1996         cpu_abort(cs, "This KVM version does not support EPR\n");
1997     }
1998 }
1999
2000 int kvmppc_smt_threads(void)
2001 {
2002     return cap_ppc_smt ? cap_ppc_smt : 1;
2003 }
2004
2005 #ifdef TARGET_PPC64
2006 off_t kvmppc_alloc_rma(void **rma)
2007 {
2008     off_t size;
2009     int fd;
2010     struct kvm_allocate_rma ret;
2011
2012     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2013      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2014      *                      not necessary on this hardware
2015      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2016      *
2017      * FIXME: We should allow the user to force contiguous RMA
2018      * allocation in the cap_ppc_rma==1 case.
2019      */
2020     if (cap_ppc_rma < 2) {
2021         return 0;
2022     }
2023
2024     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2025     if (fd < 0) {
2026         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2027                 strerror(errno));
2028         return -1;
2029     }
2030
2031     size = MIN(ret.rma_size, 256ul << 20);
2032
2033     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2034     if (*rma == MAP_FAILED) {
2035         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2036         return -1;
2037     };
2038
2039     return size;
2040 }
2041
2042 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2043 {
2044     struct kvm_ppc_smmu_info info;
2045     long rampagesize, best_page_shift;
2046     int i;
2047
2048     if (cap_ppc_rma >= 2) {
2049         return current_size;
2050     }
2051
2052     /* Find the largest hardware supported page size that's less than
2053      * or equal to the (logical) backing page size of guest RAM */
2054     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2055     rampagesize = getrampagesize();
2056     best_page_shift = 0;
2057
2058     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2059         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2060
2061         if (!sps->page_shift) {
2062             continue;
2063         }
2064
2065         if ((sps->page_shift > best_page_shift)
2066             && ((1UL << sps->page_shift) <= rampagesize)) {
2067             best_page_shift = sps->page_shift;
2068         }
2069     }
2070
2071     return MIN(current_size,
2072                1ULL << (best_page_shift + hash_shift - 7));
2073 }
2074 #endif
2075
2076 bool kvmppc_spapr_use_multitce(void)
2077 {
2078     return cap_spapr_multitce;
2079 }
2080
2081 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2082                               bool need_vfio)
2083 {
2084     struct kvm_create_spapr_tce args = {
2085         .liobn = liobn,
2086         .window_size = window_size,
2087     };
2088     long len;
2089     int fd;
2090     void *table;
2091
2092     /* Must set fd to -1 so we don't try to munmap when called for
2093      * destroying the table, which the upper layers -will- do
2094      */
2095     *pfd = -1;
2096     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2097         return NULL;
2098     }
2099
2100     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2101     if (fd < 0) {
2102         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2103                 liobn);
2104         return NULL;
2105     }
2106
2107     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2108     /* FIXME: round this up to page size */
2109
2110     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2111     if (table == MAP_FAILED) {
2112         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2113                 liobn);
2114         close(fd);
2115         return NULL;
2116     }
2117
2118     *pfd = fd;
2119     return table;
2120 }
2121
2122 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2123 {
2124     long len;
2125
2126     if (fd < 0) {
2127         return -1;
2128     }
2129
2130     len = nb_table * sizeof(uint64_t);
2131     if ((munmap(table, len) < 0) ||
2132         (close(fd) < 0)) {
2133         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2134                 strerror(errno));
2135         /* Leak the table */
2136     }
2137
2138     return 0;
2139 }
2140
2141 int kvmppc_reset_htab(int shift_hint)
2142 {
2143     uint32_t shift = shift_hint;
2144
2145     if (!kvm_enabled()) {
2146         /* Full emulation, tell caller to allocate htab itself */
2147         return 0;
2148     }
2149     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2150         int ret;
2151         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2152         if (ret == -ENOTTY) {
2153             /* At least some versions of PR KVM advertise the
2154              * capability, but don't implement the ioctl().  Oops.
2155              * Return 0 so that we allocate the htab in qemu, as is
2156              * correct for PR. */
2157             return 0;
2158         } else if (ret < 0) {
2159             return ret;
2160         }
2161         return shift;
2162     }
2163
2164     /* We have a kernel that predates the htab reset calls.  For PR
2165      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2166      * this era, it has allocated a 16MB fixed size hash table
2167      * already.  Kernels of this era have the GET_PVINFO capability
2168      * only on PR, so we use this hack to determine the right
2169      * answer */
2170     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2171         /* PR - tell caller to allocate htab */
2172         return 0;
2173     } else {
2174         /* HV - assume 16MB kernel allocated htab */
2175         return 24;
2176     }
2177 }
2178
2179 static inline uint32_t mfpvr(void)
2180 {
2181     uint32_t pvr;
2182
2183     asm ("mfpvr %0"
2184          : "=r"(pvr));
2185     return pvr;
2186 }
2187
2188 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2189 {
2190     if (on) {
2191         *word |= flags;
2192     } else {
2193         *word &= ~flags;
2194     }
2195 }
2196
2197 static void kvmppc_host_cpu_initfn(Object *obj)
2198 {
2199     assert(kvm_enabled());
2200 }
2201
2202 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2203 {
2204     DeviceClass *dc = DEVICE_CLASS(oc);
2205     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2206     uint32_t vmx = kvmppc_get_vmx();
2207     uint32_t dfp = kvmppc_get_dfp();
2208     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2209     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2210
2211     /* Now fix up the class with information we can query from the host */
2212     pcc->pvr = mfpvr();
2213
2214     if (vmx != -1) {
2215         /* Only override when we know what the host supports */
2216         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2217         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2218     }
2219     if (dfp != -1) {
2220         /* Only override when we know what the host supports */
2221         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2222     }
2223
2224     if (dcache_size != -1) {
2225         pcc->l1_dcache_size = dcache_size;
2226     }
2227
2228     if (icache_size != -1) {
2229         pcc->l1_icache_size = icache_size;
2230     }
2231
2232     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2233     dc->cannot_destroy_with_object_finalize_yet = true;
2234 }
2235
2236 bool kvmppc_has_cap_epr(void)
2237 {
2238     return cap_epr;
2239 }
2240
2241 bool kvmppc_has_cap_htab_fd(void)
2242 {
2243     return cap_htab_fd;
2244 }
2245
2246 bool kvmppc_has_cap_fixup_hcalls(void)
2247 {
2248     return cap_fixup_hcalls;
2249 }
2250
2251 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2252 {
2253     ObjectClass *oc = OBJECT_CLASS(pcc);
2254
2255     while (oc && !object_class_is_abstract(oc)) {
2256         oc = object_class_get_parent(oc);
2257     }
2258     assert(oc);
2259
2260     return POWERPC_CPU_CLASS(oc);
2261 }
2262
2263 static int kvm_ppc_register_host_cpu_type(void)
2264 {
2265     TypeInfo type_info = {
2266         .name = TYPE_HOST_POWERPC_CPU,
2267         .instance_init = kvmppc_host_cpu_initfn,
2268         .class_init = kvmppc_host_cpu_class_init,
2269     };
2270     uint32_t host_pvr = mfpvr();
2271     PowerPCCPUClass *pvr_pcc;
2272     DeviceClass *dc;
2273
2274     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2275     if (pvr_pcc == NULL) {
2276         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2277     }
2278     if (pvr_pcc == NULL) {
2279         return -1;
2280     }
2281     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2282     type_register(&type_info);
2283
2284     /* Register generic family CPU class for a family */
2285     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2286     dc = DEVICE_CLASS(pvr_pcc);
2287     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2288     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2289     type_register(&type_info);
2290
2291     return 0;
2292 }
2293
2294 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2295 {
2296     struct kvm_rtas_token_args args = {
2297         .token = token,
2298     };
2299
2300     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2301         return -ENOENT;
2302     }
2303
2304     strncpy(args.name, function, sizeof(args.name));
2305
2306     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2307 }
2308
2309 int kvmppc_get_htab_fd(bool write)
2310 {
2311     struct kvm_get_htab_fd s = {
2312         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2313         .start_index = 0,
2314     };
2315
2316     if (!cap_htab_fd) {
2317         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2318         return -1;
2319     }
2320
2321     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2322 }
2323
2324 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2325 {
2326     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2327     uint8_t buf[bufsize];
2328     ssize_t rc;
2329
2330     do {
2331         rc = read(fd, buf, bufsize);
2332         if (rc < 0) {
2333             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2334                     strerror(errno));
2335             return rc;
2336         } else if (rc) {
2337             uint8_t *buffer = buf;
2338             ssize_t n = rc;
2339             while (n) {
2340                 struct kvm_get_htab_header *head =
2341                     (struct kvm_get_htab_header *) buffer;
2342                 size_t chunksize = sizeof(*head) +
2343                      HASH_PTE_SIZE_64 * head->n_valid;
2344
2345                 qemu_put_be32(f, head->index);
2346                 qemu_put_be16(f, head->n_valid);
2347                 qemu_put_be16(f, head->n_invalid);
2348                 qemu_put_buffer(f, (void *)(head + 1),
2349                                 HASH_PTE_SIZE_64 * head->n_valid);
2350
2351                 buffer += chunksize;
2352                 n -= chunksize;
2353             }
2354         }
2355     } while ((rc != 0)
2356              && ((max_ns < 0)
2357                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2358
2359     return (rc == 0) ? 1 : 0;
2360 }
2361
2362 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2363                            uint16_t n_valid, uint16_t n_invalid)
2364 {
2365     struct kvm_get_htab_header *buf;
2366     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2367     ssize_t rc;
2368
2369     buf = alloca(chunksize);
2370     buf->index = index;
2371     buf->n_valid = n_valid;
2372     buf->n_invalid = n_invalid;
2373
2374     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2375
2376     rc = write(fd, buf, chunksize);
2377     if (rc < 0) {
2378         fprintf(stderr, "Error writing KVM hash table: %s\n",
2379                 strerror(errno));
2380         return rc;
2381     }
2382     if (rc != chunksize) {
2383         /* We should never get a short write on a single chunk */
2384         fprintf(stderr, "Short write, restoring KVM hash table\n");
2385         return -1;
2386     }
2387     return 0;
2388 }
2389
2390 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2391 {
2392     return true;
2393 }
2394
2395 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2396 {
2397     return 1;
2398 }
2399
2400 int kvm_arch_on_sigbus(int code, void *addr)
2401 {
2402     return 1;
2403 }
2404
2405 void kvm_arch_init_irq_routing(KVMState *s)
2406 {
2407 }
2408
2409 struct kvm_get_htab_buf {
2410     struct kvm_get_htab_header header;
2411     /*
2412      * We require one extra byte for read
2413      */
2414     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2415 };
2416
2417 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2418 {
2419     int htab_fd;
2420     struct kvm_get_htab_fd ghf;
2421     struct kvm_get_htab_buf  *hpte_buf;
2422
2423     ghf.flags = 0;
2424     ghf.start_index = pte_index;
2425     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2426     if (htab_fd < 0) {
2427         goto error_out;
2428     }
2429
2430     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2431     /*
2432      * Read the hpte group
2433      */
2434     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2435         goto out_close;
2436     }
2437
2438     close(htab_fd);
2439     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2440
2441 out_close:
2442     g_free(hpte_buf);
2443     close(htab_fd);
2444 error_out:
2445     return 0;
2446 }
2447
2448 void kvmppc_hash64_free_pteg(uint64_t token)
2449 {
2450     struct kvm_get_htab_buf *htab_buf;
2451
2452     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2453                             hpte);
2454     g_free(htab_buf);
2455     return;
2456 }
2457
2458 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2459                              target_ulong pte0, target_ulong pte1)
2460 {
2461     int htab_fd;
2462     struct kvm_get_htab_fd ghf;
2463     struct kvm_get_htab_buf hpte_buf;
2464
2465     ghf.flags = 0;
2466     ghf.start_index = 0;     /* Ignored */
2467     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2468     if (htab_fd < 0) {
2469         goto error_out;
2470     }
2471
2472     hpte_buf.header.n_valid = 1;
2473     hpte_buf.header.n_invalid = 0;
2474     hpte_buf.header.index = pte_index;
2475     hpte_buf.hpte[0] = pte0;
2476     hpte_buf.hpte[1] = pte1;
2477     /*
2478      * Write the hpte entry.
2479      * CAUTION: write() has the warn_unused_result attribute. Hence we
2480      * need to check the return value, even though we do nothing.
2481      */
2482     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2483         goto out_close;
2484     }
2485
2486 out_close:
2487     close(htab_fd);
2488     return;
2489
2490 error_out:
2491     return;
2492 }
2493
2494 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2495                              uint64_t address, uint32_t data, PCIDevice *dev)
2496 {
2497     return 0;
2498 }
2499
2500 int kvm_arch_msi_data_to_gsi(uint32_t data)
2501 {
2502     return data & 0xffff;
2503 }
2504
2505 int kvmppc_enable_hwrng(void)
2506 {
2507     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2508         return -1;
2509     }
2510
2511     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2512 }