target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41 #include "exec/gdbstub.h"
  42 #include "exec/memattrs.h"
  43 #include "sysemu/hostmem.h"
  44
  45 //#define DEBUG_KVM
  46
  47 #ifdef DEBUG_KVM
  48 #define DPRINTF(fmt, ...) \
  49     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  50 #else
  51 #define DPRINTF(fmt, ...) \
  52     do { } while (0)
  53 #endif
  54
  55 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  56
  57 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  58     KVM_CAP_LAST_INFO
  59 };
  60
  61 static int cap_interrupt_unset = false;
  62 static int cap_interrupt_level = false;
  63 static int cap_segstate;
  64 static int cap_booke_sregs;
  65 static int cap_ppc_smt;
  66 static int cap_ppc_rma;
  67 static int cap_spapr_tce;
  68 static int cap_spapr_multitce;
  69 static int cap_spapr_vfio;
  70 static int cap_hior;
  71 static int cap_one_reg;
  72 static int cap_epr;
  73 static int cap_ppc_watchdog;
  74 static int cap_papr;
  75 static int cap_htab_fd;
  76 static int cap_fixup_hcalls;
  77
  78 static uint32_t debug_inst_opcode;
  79
  80 /* XXX We have a race condition where we actually have a level triggered
  81  *     interrupt, but the infrastructure can't expose that yet, so the guest
  82  *     takes but ignores it, goes to sleep and never gets notified that there's
  83  *     still an interrupt pending.
  84  *
  85  *     As a quick workaround, let's just wake up again 20 ms after we injected
  86  *     an interrupt. That way we can assure that we're always reinjecting
  87  *     interrupts in case the guest swallowed them.
  88  */
  89 static QEMUTimer *idle_timer;
  90
  91 static void kvm_kick_cpu(void *opaque)
  92 {
  93     PowerPCCPU *cpu = opaque;
  94
  95     qemu_cpu_kick(CPU(cpu));
  96 }
  97
  98 static int kvm_ppc_register_host_cpu_type(void);
  99
 100 int kvm_arch_init(MachineState *ms, KVMState *s)
 101 {
 102     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 103     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 104     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 105     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 106     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 107     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 108     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 109     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 110     cap_spapr_vfio = false;
 111     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 112     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 113     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 114     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 115     /* Note: we don't set cap_papr here, because this capability is
 116      * only activated after this by kvmppc_set_papr() */
 117     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 118     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 119
 120     if (!cap_interrupt_level) {
 121         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 122                         "VM to stall at times!\n");
 123     }
 124
 125     kvm_ppc_register_host_cpu_type();
 126
 127     return 0;
 128 }
 129
 130 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 131 {
 132     CPUPPCState *cenv = &cpu->env;
 133     CPUState *cs = CPU(cpu);
 134     struct kvm_sregs sregs;
 135     int ret;
 136
 137     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 138         /* What we're really trying to say is "if we're on BookE, we use
 139            the native PVR for now". This is the only sane way to check
 140            it though, so we potentially confuse users that they can run
 141            BookE guests on BookS. Let's hope nobody dares enough :) */
 142         return 0;
 143     } else {
 144         if (!cap_segstate) {
 145             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 146             return -ENOSYS;
 147         }
 148     }
 149
 150     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 151     if (ret) {
 152         return ret;
 153     }
 154
 155     sregs.pvr = cenv->spr[SPR_PVR];
 156     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 157 }
 158
 159 /* Set up a shared TLB array with KVM */
 160 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 161 {
 162     CPUPPCState *env = &cpu->env;
 163     CPUState *cs = CPU(cpu);
 164     struct kvm_book3e_206_tlb_params params = {};
 165     struct kvm_config_tlb cfg = {};
 166     unsigned int entries = 0;
 167     int ret, i;
 168
 169     if (!kvm_enabled() ||
 170         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 171         return 0;
 172     }
 173
 174     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 175
 176     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 177         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 178         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 179         entries += params.tlb_sizes[i];
 180     }
 181
 182     assert(entries == env->nb_tlb);
 183     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 184
 185     env->tlb_dirty = true;
 186
 187     cfg.array = (uintptr_t)env->tlb.tlbm;
 188     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 189     cfg.params = (uintptr_t)&params;
 190     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 191
 192     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 193     if (ret < 0) {
 194         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 195                 __func__, strerror(-ret));
 196         return ret;
 197     }
 198
 199     env->kvm_sw_tlb = true;
 200     return 0;
 201 }
 202
 203
 204 #if defined(TARGET_PPC64)
 205 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 206                                        struct kvm_ppc_smmu_info *info)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210
 211     memset(info, 0, sizeof(*info));
 212
 213     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 214      * need to "guess" what the supported page sizes are.
 215      *
 216      * For that to work we make a few assumptions:
 217      *
 218      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 219      *   KVM which only supports 4K and 16M pages, but supports them
 220      *   regardless of the backing store characteritics. We also don't
 221      *   support 1T segments.
 222      *
 223      *   This is safe as if HV KVM ever supports that capability or PR
 224      *   KVM grows supports for more page/segment sizes, those versions
 225      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 226      *   will not hit this fallback
 227      *
 228      * - Else we are running HV KVM. This means we only support page
 229      *   sizes that fit in the backing store. Additionally we only
 230      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 231      *   P7 encodings for the SLB and hash table. Here too, we assume
 232      *   support for any newer processor will mean a kernel that
 233      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 234      *   this fallback.
 235      */
 236     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 237         /* No flags */
 238         info->flags = 0;
 239         info->slb_size = 64;
 240
 241         /* Standard 4k base page size segment */
 242         info->sps[0].page_shift = 12;
 243         info->sps[0].slb_enc = 0;
 244         info->sps[0].enc[0].page_shift = 12;
 245         info->sps[0].enc[0].pte_enc = 0;
 246
 247         /* Standard 16M large page size segment */
 248         info->sps[1].page_shift = 24;
 249         info->sps[1].slb_enc = SLB_VSID_L;
 250         info->sps[1].enc[0].page_shift = 24;
 251         info->sps[1].enc[0].pte_enc = 0;
 252     } else {
 253         int i = 0;
 254
 255         /* HV KVM has backing store size restrictions */
 256         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 257
 258         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 259             info->flags |= KVM_PPC_1T_SEGMENTS;
 260         }
 261
 262         if (env->mmu_model == POWERPC_MMU_2_06) {
 263             info->slb_size = 32;
 264         } else {
 265             info->slb_size = 64;
 266         }
 267
 268         /* Standard 4k base page size segment */
 269         info->sps[i].page_shift = 12;
 270         info->sps[i].slb_enc = 0;
 271         info->sps[i].enc[0].page_shift = 12;
 272         info->sps[i].enc[0].pte_enc = 0;
 273         i++;
 274
 275         /* 64K on MMU 2.06 */
 276         if (env->mmu_model == POWERPC_MMU_2_06) {
 277             info->sps[i].page_shift = 16;
 278             info->sps[i].slb_enc = 0x110;
 279             info->sps[i].enc[0].page_shift = 16;
 280             info->sps[i].enc[0].pte_enc = 1;
 281             i++;
 282         }
 283
 284         /* Standard 16M large page size segment */
 285         info->sps[i].page_shift = 24;
 286         info->sps[i].slb_enc = SLB_VSID_L;
 287         info->sps[i].enc[0].page_shift = 24;
 288         info->sps[i].enc[0].pte_enc = 0;
 289     }
 290 }
 291
 292 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 293 {
 294     CPUState *cs = CPU(cpu);
 295     int ret;
 296
 297     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 298         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 299         if (ret == 0) {
 300             return;
 301         }
 302     }
 303
 304     kvm_get_fallback_smmu_info(cpu, info);
 305 }
 306
 307 static long gethugepagesize(const char *mem_path)
 308 {
 309     struct statfs fs;
 310     int ret;
 311
 312     do {
 313         ret = statfs(mem_path, &fs);
 314     } while (ret != 0 && errno == EINTR);
 315
 316     if (ret != 0) {
 317         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 318                 strerror(errno));
 319         exit(1);
 320     }
 321
 322 #define HUGETLBFS_MAGIC       0x958458f6
 323
 324     if (fs.f_type != HUGETLBFS_MAGIC) {
 325         /* Explicit mempath, but it's ordinary pages */
 326         return getpagesize();
 327     }
 328
 329     /* It's hugepage, return the huge page size */
 330     return fs.f_bsize;
 331 }
 332
 333 static int find_max_supported_pagesize(Object *obj, void *opaque)
 334 {
 335     char *mem_path;
 336     long *hpsize_min = opaque;
 337
 338     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 339         mem_path = object_property_get_str(obj, "mem-path", NULL);
 340         if (mem_path) {
 341             long hpsize = gethugepagesize(mem_path);
 342             if (hpsize < *hpsize_min) {
 343                 *hpsize_min = hpsize;
 344             }
 345         } else {
 346             *hpsize_min = getpagesize();
 347         }
 348     }
 349
 350     return 0;
 351 }
 352
 353 static long getrampagesize(void)
 354 {
 355     long hpsize = LONG_MAX;
 356     Object *memdev_root;
 357
 358     if (mem_path) {
 359         return gethugepagesize(mem_path);
 360     }
 361
 362     /* it's possible we have memory-backend objects with
 363      * hugepage-backed RAM. these may get mapped into system
 364      * address space via -numa parameters or memory hotplug
 365      * hooks. we want to take these into account, but we
 366      * also want to make sure these supported hugepage
 367      * sizes are applicable across the entire range of memory
 368      * we may boot from, so we take the min across all
 369      * backends, and assume normal pages in cases where a
 370      * backend isn't backed by hugepages.
 371      */
 372     memdev_root = object_resolve_path("/objects", NULL);
 373     if (!memdev_root) {
 374         return getpagesize();
 375     }
 376
 377     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 378
 379     return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 380 }
 381
 382 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 383 {
 384     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 385         return true;
 386     }
 387
 388     return (1ul << shift) <= rampgsize;
 389 }
 390
 391 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 392 {
 393     static struct kvm_ppc_smmu_info smmu_info;
 394     static bool has_smmu_info;
 395     CPUPPCState *env = &cpu->env;
 396     long rampagesize;
 397     int iq, ik, jq, jk;
 398
 399     /* We only handle page sizes for 64-bit server guests for now */
 400     if (!(env->mmu_model & POWERPC_MMU_64)) {
 401         return;
 402     }
 403
 404     /* Collect MMU info from kernel if not already */
 405     if (!has_smmu_info) {
 406         kvm_get_smmu_info(cpu, &smmu_info);
 407         has_smmu_info = true;
 408     }
 409
 410     rampagesize = getrampagesize();
 411
 412     /* Convert to QEMU form */
 413     memset(&env->sps, 0, sizeof(env->sps));
 414
 415     /*
 416      * XXX This loop should be an entry wide AND of the capabilities that
 417      *     the selected CPU has with the capabilities that KVM supports.
 418      */
 419     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 420         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 421         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 422
 423         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 424                                  ksps->page_shift)) {
 425             continue;
 426         }
 427         qsps->page_shift = ksps->page_shift;
 428         qsps->slb_enc = ksps->slb_enc;
 429         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 430             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 431                                      ksps->enc[jk].page_shift)) {
 432                 continue;
 433             }
 434             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 435             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 436             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 437                 break;
 438             }
 439         }
 440         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 441             break;
 442         }
 443     }
 444     env->slb_nr = smmu_info.slb_size;
 445     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 446         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 447     }
 448 }
 449 #else /* defined (TARGET_PPC64) */
 450
 451 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 452 {
 453 }
 454
 455 #endif /* !defined (TARGET_PPC64) */
 456
 457 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 458 {
 459     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 460 }
 461
 462 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 463  * book3s supports only 1 watchpoint, so array size
 464  * of 4 is sufficient for now.
 465  */
 466 #define MAX_HW_BKPTS 4
 467
 468 static struct HWBreakpoint {
 469     target_ulong addr;
 470     int type;
 471 } hw_debug_points[MAX_HW_BKPTS];
 472
 473 static CPUWatchpoint hw_watchpoint;
 474
 475 /* Default there is no breakpoint and watchpoint supported */
 476 static int max_hw_breakpoint;
 477 static int max_hw_watchpoint;
 478 static int nb_hw_breakpoint;
 479 static int nb_hw_watchpoint;
 480
 481 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 482 {
 483     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 484         max_hw_breakpoint = 2;
 485         max_hw_watchpoint = 2;
 486     }
 487
 488     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 489         fprintf(stderr, "Error initializing h/w breakpoints\n");
 490         return;
 491     }
 492 }
 493
 494 int kvm_arch_init_vcpu(CPUState *cs)
 495 {
 496     PowerPCCPU *cpu = POWERPC_CPU(cs);
 497     CPUPPCState *cenv = &cpu->env;
 498     int ret;
 499
 500     /* Gather server mmu info from KVM and update the CPU state */
 501     kvm_fixup_page_sizes(cpu);
 502
 503     /* Synchronize sregs with kvm */
 504     ret = kvm_arch_sync_sregs(cpu);
 505     if (ret) {
 506         return ret;
 507     }
 508
 509     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 510
 511     /* Some targets support access to KVM's guest TLB. */
 512     switch (cenv->mmu_model) {
 513     case POWERPC_MMU_BOOKE206:
 514         ret = kvm_booke206_tlb_init(cpu);
 515         break;
 516     default:
 517         break;
 518     }
 519
 520     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 521     kvmppc_hw_debug_points_init(cenv);
 522
 523     return ret;
 524 }
 525
 526 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 527 {
 528     CPUPPCState *env = &cpu->env;
 529     CPUState *cs = CPU(cpu);
 530     struct kvm_dirty_tlb dirty_tlb;
 531     unsigned char *bitmap;
 532     int ret;
 533
 534     if (!env->kvm_sw_tlb) {
 535         return;
 536     }
 537
 538     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 539     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 540
 541     dirty_tlb.bitmap = (uintptr_t)bitmap;
 542     dirty_tlb.num_dirty = env->nb_tlb;
 543
 544     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 545     if (ret) {
 546         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 547                 __func__, strerror(-ret));
 548     }
 549
 550     g_free(bitmap);
 551 }
 552
 553 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 554 {
 555     PowerPCCPU *cpu = POWERPC_CPU(cs);
 556     CPUPPCState *env = &cpu->env;
 557     union {
 558         uint32_t u32;
 559         uint64_t u64;
 560     } val;
 561     struct kvm_one_reg reg = {
 562         .id = id,
 563         .addr = (uintptr_t) &val,
 564     };
 565     int ret;
 566
 567     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 568     if (ret != 0) {
 569         trace_kvm_failed_spr_get(spr, strerror(errno));
 570     } else {
 571         switch (id & KVM_REG_SIZE_MASK) {
 572         case KVM_REG_SIZE_U32:
 573             env->spr[spr] = val.u32;
 574             break;
 575
 576         case KVM_REG_SIZE_U64:
 577             env->spr[spr] = val.u64;
 578             break;
 579
 580         default:
 581             /* Don't handle this size yet */
 582             abort();
 583         }
 584     }
 585 }
 586
 587 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 588 {
 589     PowerPCCPU *cpu = POWERPC_CPU(cs);
 590     CPUPPCState *env = &cpu->env;
 591     union {
 592         uint32_t u32;
 593         uint64_t u64;
 594     } val;
 595     struct kvm_one_reg reg = {
 596         .id = id,
 597         .addr = (uintptr_t) &val,
 598     };
 599     int ret;
 600
 601     switch (id & KVM_REG_SIZE_MASK) {
 602     case KVM_REG_SIZE_U32:
 603         val.u32 = env->spr[spr];
 604         break;
 605
 606     case KVM_REG_SIZE_U64:
 607         val.u64 = env->spr[spr];
 608         break;
 609
 610     default:
 611         /* Don't handle this size yet */
 612         abort();
 613     }
 614
 615     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 616     if (ret != 0) {
 617         trace_kvm_failed_spr_set(spr, strerror(errno));
 618     }
 619 }
 620
 621 static int kvm_put_fp(CPUState *cs)
 622 {
 623     PowerPCCPU *cpu = POWERPC_CPU(cs);
 624     CPUPPCState *env = &cpu->env;
 625     struct kvm_one_reg reg;
 626     int i;
 627     int ret;
 628
 629     if (env->insns_flags & PPC_FLOAT) {
 630         uint64_t fpscr = env->fpscr;
 631         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 632
 633         reg.id = KVM_REG_PPC_FPSCR;
 634         reg.addr = (uintptr_t)&fpscr;
 635         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 636         if (ret < 0) {
 637             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 638             return ret;
 639         }
 640
 641         for (i = 0; i < 32; i++) {
 642             uint64_t vsr[2];
 643
 644             vsr[0] = float64_val(env->fpr[i]);
 645             vsr[1] = env->vsr[i];
 646             reg.addr = (uintptr_t) &vsr;
 647             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 648
 649             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 650             if (ret < 0) {
 651                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 652                         i, strerror(errno));
 653                 return ret;
 654             }
 655         }
 656     }
 657
 658     if (env->insns_flags & PPC_ALTIVEC) {
 659         reg.id = KVM_REG_PPC_VSCR;
 660         reg.addr = (uintptr_t)&env->vscr;
 661         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 662         if (ret < 0) {
 663             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 664             return ret;
 665         }
 666
 667         for (i = 0; i < 32; i++) {
 668             reg.id = KVM_REG_PPC_VR(i);
 669             reg.addr = (uintptr_t)&env->avr[i];
 670             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 671             if (ret < 0) {
 672                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 673                 return ret;
 674             }
 675         }
 676     }
 677
 678     return 0;
 679 }
 680
 681 static int kvm_get_fp(CPUState *cs)
 682 {
 683     PowerPCCPU *cpu = POWERPC_CPU(cs);
 684     CPUPPCState *env = &cpu->env;
 685     struct kvm_one_reg reg;
 686     int i;
 687     int ret;
 688
 689     if (env->insns_flags & PPC_FLOAT) {
 690         uint64_t fpscr;
 691         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 692
 693         reg.id = KVM_REG_PPC_FPSCR;
 694         reg.addr = (uintptr_t)&fpscr;
 695         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 696         if (ret < 0) {
 697             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 698             return ret;
 699         } else {
 700             env->fpscr = fpscr;
 701         }
 702
 703         for (i = 0; i < 32; i++) {
 704             uint64_t vsr[2];
 705
 706             reg.addr = (uintptr_t) &vsr;
 707             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 708
 709             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 710             if (ret < 0) {
 711                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 712                         vsx ? "VSR" : "FPR", i, strerror(errno));
 713                 return ret;
 714             } else {
 715                 env->fpr[i] = vsr[0];
 716                 if (vsx) {
 717                     env->vsr[i] = vsr[1];
 718                 }
 719             }
 720         }
 721     }
 722
 723     if (env->insns_flags & PPC_ALTIVEC) {
 724         reg.id = KVM_REG_PPC_VSCR;
 725         reg.addr = (uintptr_t)&env->vscr;
 726         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 727         if (ret < 0) {
 728             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 729             return ret;
 730         }
 731
 732         for (i = 0; i < 32; i++) {
 733             reg.id = KVM_REG_PPC_VR(i);
 734             reg.addr = (uintptr_t)&env->avr[i];
 735             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 736             if (ret < 0) {
 737                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 738                         i, strerror(errno));
 739                 return ret;
 740             }
 741         }
 742     }
 743
 744     return 0;
 745 }
 746
 747 #if defined(TARGET_PPC64)
 748 static int kvm_get_vpa(CPUState *cs)
 749 {
 750     PowerPCCPU *cpu = POWERPC_CPU(cs);
 751     CPUPPCState *env = &cpu->env;
 752     struct kvm_one_reg reg;
 753     int ret;
 754
 755     reg.id = KVM_REG_PPC_VPA_ADDR;
 756     reg.addr = (uintptr_t)&env->vpa_addr;
 757     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 758     if (ret < 0) {
 759         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 760         return ret;
 761     }
 762
 763     assert((uintptr_t)&env->slb_shadow_size
 764            == ((uintptr_t)&env->slb_shadow_addr + 8));
 765     reg.id = KVM_REG_PPC_VPA_SLB;
 766     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 767     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 768     if (ret < 0) {
 769         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 770                 strerror(errno));
 771         return ret;
 772     }
 773
 774     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 775     reg.id = KVM_REG_PPC_VPA_DTL;
 776     reg.addr = (uintptr_t)&env->dtl_addr;
 777     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 778     if (ret < 0) {
 779         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 780                 strerror(errno));
 781         return ret;
 782     }
 783
 784     return 0;
 785 }
 786
 787 static int kvm_put_vpa(CPUState *cs)
 788 {
 789     PowerPCCPU *cpu = POWERPC_CPU(cs);
 790     CPUPPCState *env = &cpu->env;
 791     struct kvm_one_reg reg;
 792     int ret;
 793
 794     /* SLB shadow or DTL can't be registered unless a master VPA is
 795      * registered.  That means when restoring state, if a VPA *is*
 796      * registered, we need to set that up first.  If not, we need to
 797      * deregister the others before deregistering the master VPA */
 798     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 799
 800     if (env->vpa_addr) {
 801         reg.id = KVM_REG_PPC_VPA_ADDR;
 802         reg.addr = (uintptr_t)&env->vpa_addr;
 803         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 804         if (ret < 0) {
 805             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 806             return ret;
 807         }
 808     }
 809
 810     assert((uintptr_t)&env->slb_shadow_size
 811            == ((uintptr_t)&env->slb_shadow_addr + 8));
 812     reg.id = KVM_REG_PPC_VPA_SLB;
 813     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 814     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 815     if (ret < 0) {
 816         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 817         return ret;
 818     }
 819
 820     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 821     reg.id = KVM_REG_PPC_VPA_DTL;
 822     reg.addr = (uintptr_t)&env->dtl_addr;
 823     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 824     if (ret < 0) {
 825         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 826                 strerror(errno));
 827         return ret;
 828     }
 829
 830     if (!env->vpa_addr) {
 831         reg.id = KVM_REG_PPC_VPA_ADDR;
 832         reg.addr = (uintptr_t)&env->vpa_addr;
 833         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 834         if (ret < 0) {
 835             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 836             return ret;
 837         }
 838     }
 839
 840     return 0;
 841 }
 842 #endif /* TARGET_PPC64 */
 843
 844 int kvm_arch_put_registers(CPUState *cs, int level)
 845 {
 846     PowerPCCPU *cpu = POWERPC_CPU(cs);
 847     CPUPPCState *env = &cpu->env;
 848     struct kvm_regs regs;
 849     int ret;
 850     int i;
 851
 852     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 853     if (ret < 0) {
 854         return ret;
 855     }
 856
 857     regs.ctr = env->ctr;
 858     regs.lr  = env->lr;
 859     regs.xer = cpu_read_xer(env);
 860     regs.msr = env->msr;
 861     regs.pc = env->nip;
 862
 863     regs.srr0 = env->spr[SPR_SRR0];
 864     regs.srr1 = env->spr[SPR_SRR1];
 865
 866     regs.sprg0 = env->spr[SPR_SPRG0];
 867     regs.sprg1 = env->spr[SPR_SPRG1];
 868     regs.sprg2 = env->spr[SPR_SPRG2];
 869     regs.sprg3 = env->spr[SPR_SPRG3];
 870     regs.sprg4 = env->spr[SPR_SPRG4];
 871     regs.sprg5 = env->spr[SPR_SPRG5];
 872     regs.sprg6 = env->spr[SPR_SPRG6];
 873     regs.sprg7 = env->spr[SPR_SPRG7];
 874
 875     regs.pid = env->spr[SPR_BOOKE_PID];
 876
 877     for (i = 0;i < 32; i++)
 878         regs.gpr[i] = env->gpr[i];
 879
 880     regs.cr = 0;
 881     for (i = 0; i < 8; i++) {
 882         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 883     }
 884
 885     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 886     if (ret < 0)
 887         return ret;
 888
 889     kvm_put_fp(cs);
 890
 891     if (env->tlb_dirty) {
 892         kvm_sw_tlb_put(cpu);
 893         env->tlb_dirty = false;
 894     }
 895
 896     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 897         struct kvm_sregs sregs;
 898
 899         sregs.pvr = env->spr[SPR_PVR];
 900
 901         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 902
 903         /* Sync SLB */
 904 #ifdef TARGET_PPC64
 905         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 906             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 907             if (env->slb[i].esid & SLB_ESID_V) {
 908                 sregs.u.s.ppc64.slb[i].slbe |= i;
 909             }
 910             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 911         }
 912 #endif
 913
 914         /* Sync SRs */
 915         for (i = 0; i < 16; i++) {
 916             sregs.u.s.ppc32.sr[i] = env->sr[i];
 917         }
 918
 919         /* Sync BATs */
 920         for (i = 0; i < 8; i++) {
 921             /* Beware. We have to swap upper and lower bits here */
 922             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 923                 | env->DBAT[1][i];
 924             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 925                 | env->IBAT[1][i];
 926         }
 927
 928         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 929         if (ret) {
 930             return ret;
 931         }
 932     }
 933
 934     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 935         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 936     }
 937
 938     if (cap_one_reg) {
 939         int i;
 940
 941         /* We deliberately ignore errors here, for kernels which have
 942          * the ONE_REG calls, but don't support the specific
 943          * registers, there's a reasonable chance things will still
 944          * work, at least until we try to migrate. */
 945         for (i = 0; i < 1024; i++) {
 946             uint64_t id = env->spr_cb[i].one_reg_id;
 947
 948             if (id != 0) {
 949                 kvm_put_one_spr(cs, id, i);
 950             }
 951         }
 952
 953 #ifdef TARGET_PPC64
 954         if (msr_ts) {
 955             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 956                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 957             }
 958             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 959                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 960             }
 961             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 962             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 963             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 964             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 965             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 966             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 967             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 968             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 969             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 970             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 971         }
 972
 973         if (cap_papr) {
 974             if (kvm_put_vpa(cs) < 0) {
 975                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 976             }
 977         }
 978
 979         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 980 #endif /* TARGET_PPC64 */
 981     }
 982
 983     return ret;
 984 }
 985
 986 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
 987 {
 988      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
 989 }
 990
 991 int kvm_arch_get_registers(CPUState *cs)
 992 {
 993     PowerPCCPU *cpu = POWERPC_CPU(cs);
 994     CPUPPCState *env = &cpu->env;
 995     struct kvm_regs regs;
 996     struct kvm_sregs sregs;
 997     uint32_t cr;
 998     int i, ret;
 999
1000     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1001     if (ret < 0)
1002         return ret;
1003
1004     cr = regs.cr;
1005     for (i = 7; i >= 0; i--) {
1006         env->crf[i] = cr & 15;
1007         cr >>= 4;
1008     }
1009
1010     env->ctr = regs.ctr;
1011     env->lr = regs.lr;
1012     cpu_write_xer(env, regs.xer);
1013     env->msr = regs.msr;
1014     env->nip = regs.pc;
1015
1016     env->spr[SPR_SRR0] = regs.srr0;
1017     env->spr[SPR_SRR1] = regs.srr1;
1018
1019     env->spr[SPR_SPRG0] = regs.sprg0;
1020     env->spr[SPR_SPRG1] = regs.sprg1;
1021     env->spr[SPR_SPRG2] = regs.sprg2;
1022     env->spr[SPR_SPRG3] = regs.sprg3;
1023     env->spr[SPR_SPRG4] = regs.sprg4;
1024     env->spr[SPR_SPRG5] = regs.sprg5;
1025     env->spr[SPR_SPRG6] = regs.sprg6;
1026     env->spr[SPR_SPRG7] = regs.sprg7;
1027
1028     env->spr[SPR_BOOKE_PID] = regs.pid;
1029
1030     for (i = 0;i < 32; i++)
1031         env->gpr[i] = regs.gpr[i];
1032
1033     kvm_get_fp(cs);
1034
1035     if (cap_booke_sregs) {
1036         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1037         if (ret < 0) {
1038             return ret;
1039         }
1040
1041         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1042             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1043             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1044             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1045             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1046             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1047             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1048             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1049             env->spr[SPR_DECR] = sregs.u.e.dec;
1050             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1051             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1052             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1053         }
1054
1055         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1056             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1057             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1058             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1059             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1060             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1061         }
1062
1063         if (sregs.u.e.features & KVM_SREGS_E_64) {
1064             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1065         }
1066
1067         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1068             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1069         }
1070
1071         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1072             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1073             kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1074             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1075             kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1076             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1077             kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1078             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1079             kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1080             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1081             kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1082             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1083             kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1084             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1085             kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1086             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1087             kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1088             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1089             kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1090             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1091             kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1092             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1093             kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1094             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1095             kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1096             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1097             kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1098             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1099             kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1100             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1101             kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1102             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1103             kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1104
1105             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1106                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1107                 kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1108                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1109                 kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1110                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1111                 kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1112             }
1113
1114             if (sregs.u.e.features & KVM_SREGS_E_PM) {
1115                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1116                 kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1117             }
1118
1119             if (sregs.u.e.features & KVM_SREGS_E_PC) {
1120                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1121                 kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1122                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1123                 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1124             }
1125         }
1126
1127         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1128             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1129             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1130             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1131             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1132             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1133             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1134             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1135             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1136             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1137             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1138         }
1139
1140         if (sregs.u.e.features & KVM_SREGS_EXP) {
1141             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1142         }
1143
1144         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1145             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1146             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1147         }
1148
1149         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1150             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1151             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1152             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1153
1154             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1155                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1156                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1157             }
1158         }
1159     }
1160
1161     if (cap_segstate) {
1162         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1163         if (ret < 0) {
1164             return ret;
1165         }
1166
1167         if (!env->external_htab) {
1168             ppc_store_sdr1(env, sregs.u.s.sdr1);
1169         }
1170
1171         /* Sync SLB */
1172 #ifdef TARGET_PPC64
1173         /*
1174          * The packed SLB array we get from KVM_GET_SREGS only contains
1175          * information about valid entries. So we flush our internal
1176          * copy to get rid of stale ones, then put all valid SLB entries
1177          * back in.
1178          */
1179         memset(env->slb, 0, sizeof(env->slb));
1180         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1181             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1182             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1183             /*
1184              * Only restore valid entries
1185              */
1186             if (rb & SLB_ESID_V) {
1187                 ppc_store_slb(env, rb, rs);
1188             }
1189         }
1190 #endif
1191
1192         /* Sync SRs */
1193         for (i = 0; i < 16; i++) {
1194             env->sr[i] = sregs.u.s.ppc32.sr[i];
1195         }
1196
1197         /* Sync BATs */
1198         for (i = 0; i < 8; i++) {
1199             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1200             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1201             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1202             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1203         }
1204     }
1205
1206     if (cap_hior) {
1207         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1208     }
1209
1210     if (cap_one_reg) {
1211         int i;
1212
1213         /* We deliberately ignore errors here, for kernels which have
1214          * the ONE_REG calls, but don't support the specific
1215          * registers, there's a reasonable chance things will still
1216          * work, at least until we try to migrate. */
1217         for (i = 0; i < 1024; i++) {
1218             uint64_t id = env->spr_cb[i].one_reg_id;
1219
1220             if (id != 0) {
1221                 kvm_get_one_spr(cs, id, i);
1222             }
1223         }
1224
1225 #ifdef TARGET_PPC64
1226         if (msr_ts) {
1227             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1228                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1229             }
1230             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1231                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1232             }
1233             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1234             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1235             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1236             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1237             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1238             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1239             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1240             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1241             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1242             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1243         }
1244
1245         if (cap_papr) {
1246             if (kvm_get_vpa(cs) < 0) {
1247                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1248             }
1249         }
1250
1251         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1252 #endif
1253     }
1254
1255     return 0;
1256 }
1257
1258 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1259 {
1260     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1261
1262     if (irq != PPC_INTERRUPT_EXT) {
1263         return 0;
1264     }
1265
1266     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1267         return 0;
1268     }
1269
1270     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1271
1272     return 0;
1273 }
1274
1275 #if defined(TARGET_PPCEMB)
1276 #define PPC_INPUT_INT PPC40x_INPUT_INT
1277 #elif defined(TARGET_PPC64)
1278 #define PPC_INPUT_INT PPC970_INPUT_INT
1279 #else
1280 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1281 #endif
1282
1283 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1284 {
1285     PowerPCCPU *cpu = POWERPC_CPU(cs);
1286     CPUPPCState *env = &cpu->env;
1287     int r;
1288     unsigned irq;
1289
1290     qemu_mutex_lock_iothread();
1291
1292     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1293      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1294     if (!cap_interrupt_level &&
1295         run->ready_for_interrupt_injection &&
1296         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1297         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1298     {
1299         /* For now KVM disregards the 'irq' argument. However, in the
1300          * future KVM could cache it in-kernel to avoid a heavyweight exit
1301          * when reading the UIC.
1302          */
1303         irq = KVM_INTERRUPT_SET;
1304
1305         DPRINTF("injected interrupt %d\n", irq);
1306         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1307         if (r < 0) {
1308             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1309         }
1310
1311         /* Always wake up soon in case the interrupt was level based */
1312         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1313                        (get_ticks_per_sec() / 50));
1314     }
1315
1316     /* We don't know if there are more interrupts pending after this. However,
1317      * the guest will return to userspace in the course of handling this one
1318      * anyways, so we will get a chance to deliver the rest. */
1319
1320     qemu_mutex_unlock_iothread();
1321 }
1322
1323 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1324 {
1325     return MEMTXATTRS_UNSPECIFIED;
1326 }
1327
1328 int kvm_arch_process_async_events(CPUState *cs)
1329 {
1330     return cs->halted;
1331 }
1332
1333 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1334 {
1335     CPUState *cs = CPU(cpu);
1336     CPUPPCState *env = &cpu->env;
1337
1338     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1339         cs->halted = 1;
1340         cs->exception_index = EXCP_HLT;
1341     }
1342
1343     return 0;
1344 }
1345
1346 /* map dcr access to existing qemu dcr emulation */
1347 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1348 {
1349     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1350         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1351
1352     return 0;
1353 }
1354
1355 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1356 {
1357     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1358         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1359
1360     return 0;
1361 }
1362
1363 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1364 {
1365     /* Mixed endian case is not handled */
1366     uint32_t sc = debug_inst_opcode;
1367
1368     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1369                             sizeof(sc), 0) ||
1370         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1371         return -EINVAL;
1372     }
1373
1374     return 0;
1375 }
1376
1377 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1378 {
1379     uint32_t sc;
1380
1381     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1382         sc != debug_inst_opcode ||
1383         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1384                             sizeof(sc), 1)) {
1385         return -EINVAL;
1386     }
1387
1388     return 0;
1389 }
1390
1391 static int find_hw_breakpoint(target_ulong addr, int type)
1392 {
1393     int n;
1394
1395     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1396            <= ARRAY_SIZE(hw_debug_points));
1397
1398     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1399         if (hw_debug_points[n].addr == addr &&
1400              hw_debug_points[n].type == type) {
1401             return n;
1402         }
1403     }
1404
1405     return -1;
1406 }
1407
1408 static int find_hw_watchpoint(target_ulong addr, int *flag)
1409 {
1410     int n;
1411
1412     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1413     if (n >= 0) {
1414         *flag = BP_MEM_ACCESS;
1415         return n;
1416     }
1417
1418     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1419     if (n >= 0) {
1420         *flag = BP_MEM_WRITE;
1421         return n;
1422     }
1423
1424     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1425     if (n >= 0) {
1426         *flag = BP_MEM_READ;
1427         return n;
1428     }
1429
1430     return -1;
1431 }
1432
1433 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1434                                   target_ulong len, int type)
1435 {
1436     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1437         return -ENOBUFS;
1438     }
1439
1440     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1441     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1442
1443     switch (type) {
1444     case GDB_BREAKPOINT_HW:
1445         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1446             return -ENOBUFS;
1447         }
1448
1449         if (find_hw_breakpoint(addr, type) >= 0) {
1450             return -EEXIST;
1451         }
1452
1453         nb_hw_breakpoint++;
1454         break;
1455
1456     case GDB_WATCHPOINT_WRITE:
1457     case GDB_WATCHPOINT_READ:
1458     case GDB_WATCHPOINT_ACCESS:
1459         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1460             return -ENOBUFS;
1461         }
1462
1463         if (find_hw_breakpoint(addr, type) >= 0) {
1464             return -EEXIST;
1465         }
1466
1467         nb_hw_watchpoint++;
1468         break;
1469
1470     default:
1471         return -ENOSYS;
1472     }
1473
1474     return 0;
1475 }
1476
1477 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1478                                   target_ulong len, int type)
1479 {
1480     int n;
1481
1482     n = find_hw_breakpoint(addr, type);
1483     if (n < 0) {
1484         return -ENOENT;
1485     }
1486
1487     switch (type) {
1488     case GDB_BREAKPOINT_HW:
1489         nb_hw_breakpoint--;
1490         break;
1491
1492     case GDB_WATCHPOINT_WRITE:
1493     case GDB_WATCHPOINT_READ:
1494     case GDB_WATCHPOINT_ACCESS:
1495         nb_hw_watchpoint--;
1496         break;
1497
1498     default:
1499         return -ENOSYS;
1500     }
1501     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1502
1503     return 0;
1504 }
1505
1506 void kvm_arch_remove_all_hw_breakpoints(void)
1507 {
1508     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1509 }
1510
1511 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1512 {
1513     int n;
1514
1515     /* Software Breakpoint updates */
1516     if (kvm_sw_breakpoints_active(cs)) {
1517         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1518     }
1519
1520     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1521            <= ARRAY_SIZE(hw_debug_points));
1522     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1523
1524     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1525         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1526         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1527         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1528             switch (hw_debug_points[n].type) {
1529             case GDB_BREAKPOINT_HW:
1530                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1531                 break;
1532             case GDB_WATCHPOINT_WRITE:
1533                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1534                 break;
1535             case GDB_WATCHPOINT_READ:
1536                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1537                 break;
1538             case GDB_WATCHPOINT_ACCESS:
1539                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1540                                         KVMPPC_DEBUG_WATCH_READ;
1541                 break;
1542             default:
1543                 cpu_abort(cs, "Unsupported breakpoint type\n");
1544             }
1545             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1546         }
1547     }
1548 }
1549
1550 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1551 {
1552     CPUState *cs = CPU(cpu);
1553     CPUPPCState *env = &cpu->env;
1554     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1555     int handle = 0;
1556     int n;
1557     int flag = 0;
1558
1559     if (cs->singlestep_enabled) {
1560         handle = 1;
1561     } else if (arch_info->status) {
1562         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1563             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1564                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1565                 if (n >= 0) {
1566                     handle = 1;
1567                 }
1568             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1569                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1570                 n = find_hw_watchpoint(arch_info->address,  &flag);
1571                 if (n >= 0) {
1572                     handle = 1;
1573                     cs->watchpoint_hit = &hw_watchpoint;
1574                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1575                     hw_watchpoint.flags = flag;
1576                 }
1577             }
1578         }
1579     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1580         handle = 1;
1581     } else {
1582         /* QEMU is not able to handle debug exception, so inject
1583          * program exception to guest;
1584          * Yes program exception NOT debug exception !!
1585          * When QEMU is using debug resources then debug exception must
1586          * be always set. To achieve this we set MSR_DE and also set
1587          * MSRP_DEP so guest cannot change MSR_DE.
1588          * When emulating debug resource for guest we want guest
1589          * to control MSR_DE (enable/disable debug interrupt on need).
1590          * Supporting both configurations are NOT possible.
1591          * So the result is that we cannot share debug resources
1592          * between QEMU and Guest on BOOKE architecture.
1593          * In the current design QEMU gets the priority over guest,
1594          * this means that if QEMU is using debug resources then guest
1595          * cannot use them;
1596          * For software breakpoint QEMU uses a privileged instruction;
1597          * So there cannot be any reason that we are here for guest
1598          * set debug exception, only possibility is guest executed a
1599          * privileged / illegal instruction and that's why we are
1600          * injecting a program interrupt.
1601          */
1602
1603         cpu_synchronize_state(cs);
1604         /* env->nip is PC, so increment this by 4 to use
1605          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1606          */
1607         env->nip += 4;
1608         cs->exception_index = POWERPC_EXCP_PROGRAM;
1609         env->error_code = POWERPC_EXCP_INVAL;
1610         ppc_cpu_do_interrupt(cs);
1611     }
1612
1613     return handle;
1614 }
1615
1616 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1617 {
1618     PowerPCCPU *cpu = POWERPC_CPU(cs);
1619     CPUPPCState *env = &cpu->env;
1620     int ret;
1621
1622     qemu_mutex_lock_iothread();
1623
1624     switch (run->exit_reason) {
1625     case KVM_EXIT_DCR:
1626         if (run->dcr.is_write) {
1627             DPRINTF("handle dcr write\n");
1628             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1629         } else {
1630             DPRINTF("handle dcr read\n");
1631             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1632         }
1633         break;
1634     case KVM_EXIT_HLT:
1635         DPRINTF("handle halt\n");
1636         ret = kvmppc_handle_halt(cpu);
1637         break;
1638 #if defined(TARGET_PPC64)
1639     case KVM_EXIT_PAPR_HCALL:
1640         DPRINTF("handle PAPR hypercall\n");
1641         run->papr_hcall.ret = spapr_hypercall(cpu,
1642                                               run->papr_hcall.nr,
1643                                               run->papr_hcall.args);
1644         ret = 0;
1645         break;
1646 #endif
1647     case KVM_EXIT_EPR:
1648         DPRINTF("handle epr\n");
1649         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1650         ret = 0;
1651         break;
1652     case KVM_EXIT_WATCHDOG:
1653         DPRINTF("handle watchdog expiry\n");
1654         watchdog_perform_action();
1655         ret = 0;
1656         break;
1657
1658     case KVM_EXIT_DEBUG:
1659         DPRINTF("handle debug exception\n");
1660         if (kvm_handle_debug(cpu, run)) {
1661             ret = EXCP_DEBUG;
1662             break;
1663         }
1664         /* re-enter, this exception was guest-internal */
1665         ret = 0;
1666         break;
1667
1668     default:
1669         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1670         ret = -1;
1671         break;
1672     }
1673
1674     qemu_mutex_unlock_iothread();
1675     return ret;
1676 }
1677
1678 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1679 {
1680     CPUState *cs = CPU(cpu);
1681     uint32_t bits = tsr_bits;
1682     struct kvm_one_reg reg = {
1683         .id = KVM_REG_PPC_OR_TSR,
1684         .addr = (uintptr_t) &bits,
1685     };
1686
1687     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1688 }
1689
1690 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1691 {
1692
1693     CPUState *cs = CPU(cpu);
1694     uint32_t bits = tsr_bits;
1695     struct kvm_one_reg reg = {
1696         .id = KVM_REG_PPC_CLEAR_TSR,
1697         .addr = (uintptr_t) &bits,
1698     };
1699
1700     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1701 }
1702
1703 int kvmppc_set_tcr(PowerPCCPU *cpu)
1704 {
1705     CPUState *cs = CPU(cpu);
1706     CPUPPCState *env = &cpu->env;
1707     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1708
1709     struct kvm_one_reg reg = {
1710         .id = KVM_REG_PPC_TCR,
1711         .addr = (uintptr_t) &tcr,
1712     };
1713
1714     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1715 }
1716
1717 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1718 {
1719     CPUState *cs = CPU(cpu);
1720     int ret;
1721
1722     if (!kvm_enabled()) {
1723         return -1;
1724     }
1725
1726     if (!cap_ppc_watchdog) {
1727         printf("warning: KVM does not support watchdog");
1728         return -1;
1729     }
1730
1731     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1732     if (ret < 0) {
1733         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1734                 __func__, strerror(-ret));
1735         return ret;
1736     }
1737
1738     return ret;
1739 }
1740
1741 static int read_cpuinfo(const char *field, char *value, int len)
1742 {
1743     FILE *f;
1744     int ret = -1;
1745     int field_len = strlen(field);
1746     char line[512];
1747
1748     f = fopen("/proc/cpuinfo", "r");
1749     if (!f) {
1750         return -1;
1751     }
1752
1753     do {
1754         if (!fgets(line, sizeof(line), f)) {
1755             break;
1756         }
1757         if (!strncmp(line, field, field_len)) {
1758             pstrcpy(value, len, line);
1759             ret = 0;
1760             break;
1761         }
1762     } while(*line);
1763
1764     fclose(f);
1765
1766     return ret;
1767 }
1768
1769 uint32_t kvmppc_get_tbfreq(void)
1770 {
1771     char line[512];
1772     char *ns;
1773     uint32_t retval = get_ticks_per_sec();
1774
1775     if (read_cpuinfo("timebase", line, sizeof(line))) {
1776         return retval;
1777     }
1778
1779     if (!(ns = strchr(line, ':'))) {
1780         return retval;
1781     }
1782
1783     ns++;
1784
1785     retval = atoi(ns);
1786     return retval;
1787 }
1788
1789 bool kvmppc_get_host_serial(char **value)
1790 {
1791     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1792                                NULL);
1793 }
1794
1795 bool kvmppc_get_host_model(char **value)
1796 {
1797     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1798 }
1799
1800 /* Try to find a device tree node for a CPU with clock-frequency property */
1801 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1802 {
1803     struct dirent *dirp;
1804     DIR *dp;
1805
1806     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1807         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1808         return -1;
1809     }
1810
1811     buf[0] = '\0';
1812     while ((dirp = readdir(dp)) != NULL) {
1813         FILE *f;
1814         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1815                  dirp->d_name);
1816         f = fopen(buf, "r");
1817         if (f) {
1818             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1819             fclose(f);
1820             break;
1821         }
1822         buf[0] = '\0';
1823     }
1824     closedir(dp);
1825     if (buf[0] == '\0') {
1826         printf("Unknown host!\n");
1827         return -1;
1828     }
1829
1830     return 0;
1831 }
1832
1833 /* Read a CPU node property from the host device tree that's a single
1834  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1835  * (can't find or open the property, or doesn't understand the
1836  * format) */
1837 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1838 {
1839     char buf[PATH_MAX], *tmp;
1840     union {
1841         uint32_t v32;
1842         uint64_t v64;
1843     } u;
1844     FILE *f;
1845     int len;
1846
1847     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1848         return -1;
1849     }
1850
1851     tmp = g_strdup_printf("%s/%s", buf, propname);
1852
1853     f = fopen(tmp, "rb");
1854     g_free(tmp);
1855     if (!f) {
1856         return -1;
1857     }
1858
1859     len = fread(&u, 1, sizeof(u), f);
1860     fclose(f);
1861     switch (len) {
1862     case 4:
1863         /* property is a 32-bit quantity */
1864         return be32_to_cpu(u.v32);
1865     case 8:
1866         return be64_to_cpu(u.v64);
1867     }
1868
1869     return 0;
1870 }
1871
1872 uint64_t kvmppc_get_clockfreq(void)
1873 {
1874     return kvmppc_read_int_cpu_dt("clock-frequency");
1875 }
1876
1877 uint32_t kvmppc_get_vmx(void)
1878 {
1879     return kvmppc_read_int_cpu_dt("ibm,vmx");
1880 }
1881
1882 uint32_t kvmppc_get_dfp(void)
1883 {
1884     return kvmppc_read_int_cpu_dt("ibm,dfp");
1885 }
1886
1887 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1888  {
1889      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1890      CPUState *cs = CPU(cpu);
1891
1892     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1893         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1894         return 0;
1895     }
1896
1897     return 1;
1898 }
1899
1900 int kvmppc_get_hasidle(CPUPPCState *env)
1901 {
1902     struct kvm_ppc_pvinfo pvinfo;
1903
1904     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1905         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1906         return 1;
1907     }
1908
1909     return 0;
1910 }
1911
1912 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1913 {
1914     uint32_t *hc = (uint32_t*)buf;
1915     struct kvm_ppc_pvinfo pvinfo;
1916
1917     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1918         memcpy(buf, pvinfo.hcall, buf_len);
1919         return 0;
1920     }
1921
1922     /*
1923      * Fallback to always fail hypercalls regardless of endianness:
1924      *
1925      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1926      *     li r3, -1
1927      *     b .+8       (becomes nop in wrong endian)
1928      *     bswap32(li r3, -1)
1929      */
1930
1931     hc[0] = cpu_to_be32(0x08000048);
1932     hc[1] = cpu_to_be32(0x3860ffff);
1933     hc[2] = cpu_to_be32(0x48000008);
1934     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1935
1936     return 0;
1937 }
1938
1939 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1940 {
1941     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1942 }
1943
1944 void kvmppc_enable_logical_ci_hcalls(void)
1945 {
1946     /*
1947      * FIXME: it would be nice if we could detect the cases where
1948      * we're using a device which requires the in kernel
1949      * implementation of these hcalls, but the kernel lacks them and
1950      * produce a warning.
1951      */
1952     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1953     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1954 }
1955
1956 void kvmppc_enable_set_mode_hcall(void)
1957 {
1958     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1959 }
1960
1961 void kvmppc_set_papr(PowerPCCPU *cpu)
1962 {
1963     CPUState *cs = CPU(cpu);
1964     int ret;
1965
1966     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1967     if (ret) {
1968         cpu_abort(cs, "This KVM version does not support PAPR\n");
1969     }
1970
1971     /* Update the capability flag so we sync the right information
1972      * with kvm */
1973     cap_papr = 1;
1974 }
1975
1976 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1977 {
1978     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1979 }
1980
1981 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1982 {
1983     CPUState *cs = CPU(cpu);
1984     int ret;
1985
1986     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1987     if (ret && mpic_proxy) {
1988         cpu_abort(cs, "This KVM version does not support EPR\n");
1989     }
1990 }
1991
1992 int kvmppc_smt_threads(void)
1993 {
1994     return cap_ppc_smt ? cap_ppc_smt : 1;
1995 }
1996
1997 #ifdef TARGET_PPC64
1998 off_t kvmppc_alloc_rma(void **rma)
1999 {
2000     off_t size;
2001     int fd;
2002     struct kvm_allocate_rma ret;
2003
2004     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2005      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2006      *                      not necessary on this hardware
2007      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2008      *
2009      * FIXME: We should allow the user to force contiguous RMA
2010      * allocation in the cap_ppc_rma==1 case.
2011      */
2012     if (cap_ppc_rma < 2) {
2013         return 0;
2014     }
2015
2016     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2017     if (fd < 0) {
2018         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2019                 strerror(errno));
2020         return -1;
2021     }
2022
2023     size = MIN(ret.rma_size, 256ul << 20);
2024
2025     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2026     if (*rma == MAP_FAILED) {
2027         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2028         return -1;
2029     };
2030
2031     return size;
2032 }
2033
2034 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2035 {
2036     struct kvm_ppc_smmu_info info;
2037     long rampagesize, best_page_shift;
2038     int i;
2039
2040     if (cap_ppc_rma >= 2) {
2041         return current_size;
2042     }
2043
2044     /* Find the largest hardware supported page size that's less than
2045      * or equal to the (logical) backing page size of guest RAM */
2046     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2047     rampagesize = getrampagesize();
2048     best_page_shift = 0;
2049
2050     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2051         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2052
2053         if (!sps->page_shift) {
2054             continue;
2055         }
2056
2057         if ((sps->page_shift > best_page_shift)
2058             && ((1UL << sps->page_shift) <= rampagesize)) {
2059             best_page_shift = sps->page_shift;
2060         }
2061     }
2062
2063     return MIN(current_size,
2064                1ULL << (best_page_shift + hash_shift - 7));
2065 }
2066 #endif
2067
2068 bool kvmppc_spapr_use_multitce(void)
2069 {
2070     return cap_spapr_multitce;
2071 }
2072
2073 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2074                               bool vfio_accel)
2075 {
2076     struct kvm_create_spapr_tce args = {
2077         .liobn = liobn,
2078         .window_size = window_size,
2079     };
2080     long len;
2081     int fd;
2082     void *table;
2083
2084     /* Must set fd to -1 so we don't try to munmap when called for
2085      * destroying the table, which the upper layers -will- do
2086      */
2087     *pfd = -1;
2088     if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
2089         return NULL;
2090     }
2091
2092     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2093     if (fd < 0) {
2094         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2095                 liobn);
2096         return NULL;
2097     }
2098
2099     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2100     /* FIXME: round this up to page size */
2101
2102     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2103     if (table == MAP_FAILED) {
2104         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2105                 liobn);
2106         close(fd);
2107         return NULL;
2108     }
2109
2110     *pfd = fd;
2111     return table;
2112 }
2113
2114 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2115 {
2116     long len;
2117
2118     if (fd < 0) {
2119         return -1;
2120     }
2121
2122     len = nb_table * sizeof(uint64_t);
2123     if ((munmap(table, len) < 0) ||
2124         (close(fd) < 0)) {
2125         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2126                 strerror(errno));
2127         /* Leak the table */
2128     }
2129
2130     return 0;
2131 }
2132
2133 int kvmppc_reset_htab(int shift_hint)
2134 {
2135     uint32_t shift = shift_hint;
2136
2137     if (!kvm_enabled()) {
2138         /* Full emulation, tell caller to allocate htab itself */
2139         return 0;
2140     }
2141     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2142         int ret;
2143         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2144         if (ret == -ENOTTY) {
2145             /* At least some versions of PR KVM advertise the
2146              * capability, but don't implement the ioctl().  Oops.
2147              * Return 0 so that we allocate the htab in qemu, as is
2148              * correct for PR. */
2149             return 0;
2150         } else if (ret < 0) {
2151             return ret;
2152         }
2153         return shift;
2154     }
2155
2156     /* We have a kernel that predates the htab reset calls.  For PR
2157      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2158      * this era, it has allocated a 16MB fixed size hash table
2159      * already.  Kernels of this era have the GET_PVINFO capability
2160      * only on PR, so we use this hack to determine the right
2161      * answer */
2162     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2163         /* PR - tell caller to allocate htab */
2164         return 0;
2165     } else {
2166         /* HV - assume 16MB kernel allocated htab */
2167         return 24;
2168     }
2169 }
2170
2171 static inline uint32_t mfpvr(void)
2172 {
2173     uint32_t pvr;
2174
2175     asm ("mfpvr %0"
2176          : "=r"(pvr));
2177     return pvr;
2178 }
2179
2180 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2181 {
2182     if (on) {
2183         *word |= flags;
2184     } else {
2185         *word &= ~flags;
2186     }
2187 }
2188
2189 static void kvmppc_host_cpu_initfn(Object *obj)
2190 {
2191     assert(kvm_enabled());
2192 }
2193
2194 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2195 {
2196     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2197     uint32_t vmx = kvmppc_get_vmx();
2198     uint32_t dfp = kvmppc_get_dfp();
2199     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2200     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2201
2202     /* Now fix up the class with information we can query from the host */
2203     pcc->pvr = mfpvr();
2204
2205     if (vmx != -1) {
2206         /* Only override when we know what the host supports */
2207         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2208         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2209     }
2210     if (dfp != -1) {
2211         /* Only override when we know what the host supports */
2212         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2213     }
2214
2215     if (dcache_size != -1) {
2216         pcc->l1_dcache_size = dcache_size;
2217     }
2218
2219     if (icache_size != -1) {
2220         pcc->l1_icache_size = icache_size;
2221     }
2222 }
2223
2224 bool kvmppc_has_cap_epr(void)
2225 {
2226     return cap_epr;
2227 }
2228
2229 bool kvmppc_has_cap_htab_fd(void)
2230 {
2231     return cap_htab_fd;
2232 }
2233
2234 bool kvmppc_has_cap_fixup_hcalls(void)
2235 {
2236     return cap_fixup_hcalls;
2237 }
2238
2239 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2240 {
2241     ObjectClass *oc = OBJECT_CLASS(pcc);
2242
2243     while (oc && !object_class_is_abstract(oc)) {
2244         oc = object_class_get_parent(oc);
2245     }
2246     assert(oc);
2247
2248     return POWERPC_CPU_CLASS(oc);
2249 }
2250
2251 static int kvm_ppc_register_host_cpu_type(void)
2252 {
2253     TypeInfo type_info = {
2254         .name = TYPE_HOST_POWERPC_CPU,
2255         .instance_init = kvmppc_host_cpu_initfn,
2256         .class_init = kvmppc_host_cpu_class_init,
2257     };
2258     uint32_t host_pvr = mfpvr();
2259     PowerPCCPUClass *pvr_pcc;
2260     DeviceClass *dc;
2261
2262     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2263     if (pvr_pcc == NULL) {
2264         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2265     }
2266     if (pvr_pcc == NULL) {
2267         return -1;
2268     }
2269     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2270     type_register(&type_info);
2271
2272     /* Register generic family CPU class for a family */
2273     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2274     dc = DEVICE_CLASS(pvr_pcc);
2275     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2276     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2277     type_register(&type_info);
2278
2279     return 0;
2280 }
2281
2282 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2283 {
2284     struct kvm_rtas_token_args args = {
2285         .token = token,
2286     };
2287
2288     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2289         return -ENOENT;
2290     }
2291
2292     strncpy(args.name, function, sizeof(args.name));
2293
2294     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2295 }
2296
2297 int kvmppc_get_htab_fd(bool write)
2298 {
2299     struct kvm_get_htab_fd s = {
2300         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2301         .start_index = 0,
2302     };
2303
2304     if (!cap_htab_fd) {
2305         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2306         return -1;
2307     }
2308
2309     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2310 }
2311
2312 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2313 {
2314     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2315     uint8_t buf[bufsize];
2316     ssize_t rc;
2317
2318     do {
2319         rc = read(fd, buf, bufsize);
2320         if (rc < 0) {
2321             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2322                     strerror(errno));
2323             return rc;
2324         } else if (rc) {
2325             uint8_t *buffer = buf;
2326             ssize_t n = rc;
2327             while (n) {
2328                 struct kvm_get_htab_header *head =
2329                     (struct kvm_get_htab_header *) buffer;
2330                 size_t chunksize = sizeof(*head) +
2331                      HASH_PTE_SIZE_64 * head->n_valid;
2332
2333                 qemu_put_be32(f, head->index);
2334                 qemu_put_be16(f, head->n_valid);
2335                 qemu_put_be16(f, head->n_invalid);
2336                 qemu_put_buffer(f, (void *)(head + 1),
2337                                 HASH_PTE_SIZE_64 * head->n_valid);
2338
2339                 buffer += chunksize;
2340                 n -= chunksize;
2341             }
2342         }
2343     } while ((rc != 0)
2344              && ((max_ns < 0)
2345                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2346
2347     return (rc == 0) ? 1 : 0;
2348 }
2349
2350 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2351                            uint16_t n_valid, uint16_t n_invalid)
2352 {
2353     struct kvm_get_htab_header *buf;
2354     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2355     ssize_t rc;
2356
2357     buf = alloca(chunksize);
2358     buf->index = index;
2359     buf->n_valid = n_valid;
2360     buf->n_invalid = n_invalid;
2361
2362     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2363
2364     rc = write(fd, buf, chunksize);
2365     if (rc < 0) {
2366         fprintf(stderr, "Error writing KVM hash table: %s\n",
2367                 strerror(errno));
2368         return rc;
2369     }
2370     if (rc != chunksize) {
2371         /* We should never get a short write on a single chunk */
2372         fprintf(stderr, "Short write, restoring KVM hash table\n");
2373         return -1;
2374     }
2375     return 0;
2376 }
2377
2378 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2379 {
2380     return true;
2381 }
2382
2383 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2384 {
2385     return 1;
2386 }
2387
2388 int kvm_arch_on_sigbus(int code, void *addr)
2389 {
2390     return 1;
2391 }
2392
2393 void kvm_arch_init_irq_routing(KVMState *s)
2394 {
2395 }
2396
2397 struct kvm_get_htab_buf {
2398     struct kvm_get_htab_header header;
2399     /*
2400      * We require one extra byte for read
2401      */
2402     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2403 };
2404
2405 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2406 {
2407     int htab_fd;
2408     struct kvm_get_htab_fd ghf;
2409     struct kvm_get_htab_buf  *hpte_buf;
2410
2411     ghf.flags = 0;
2412     ghf.start_index = pte_index;
2413     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2414     if (htab_fd < 0) {
2415         goto error_out;
2416     }
2417
2418     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2419     /*
2420      * Read the hpte group
2421      */
2422     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2423         goto out_close;
2424     }
2425
2426     close(htab_fd);
2427     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2428
2429 out_close:
2430     g_free(hpte_buf);
2431     close(htab_fd);
2432 error_out:
2433     return 0;
2434 }
2435
2436 void kvmppc_hash64_free_pteg(uint64_t token)
2437 {
2438     struct kvm_get_htab_buf *htab_buf;
2439
2440     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2441                             hpte);
2442     g_free(htab_buf);
2443     return;
2444 }
2445
2446 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2447                              target_ulong pte0, target_ulong pte1)
2448 {
2449     int htab_fd;
2450     struct kvm_get_htab_fd ghf;
2451     struct kvm_get_htab_buf hpte_buf;
2452
2453     ghf.flags = 0;
2454     ghf.start_index = 0;     /* Ignored */
2455     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2456     if (htab_fd < 0) {
2457         goto error_out;
2458     }
2459
2460     hpte_buf.header.n_valid = 1;
2461     hpte_buf.header.n_invalid = 0;
2462     hpte_buf.header.index = pte_index;
2463     hpte_buf.hpte[0] = pte0;
2464     hpte_buf.hpte[1] = pte1;
2465     /*
2466      * Write the hpte entry.
2467      * CAUTION: write() has the warn_unused_result attribute. Hence we
2468      * need to check the return value, even though we do nothing.
2469      */
2470     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2471         goto out_close;
2472     }
2473
2474 out_close:
2475     close(htab_fd);
2476     return;
2477
2478 error_out:
2479     return;
2480 }
2481
2482 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2483                              uint64_t address, uint32_t data)
2484 {
2485     return 0;
2486 }
2487
2488 int kvm_arch_msi_data_to_gsi(uint32_t data)
2489 {
2490     return data & 0xffff;
2491 }
2492
2493 int kvmppc_enable_hwrng(void)
2494 {
2495     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2496         return -1;
2497     }
2498
2499     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2500 }