target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41 #include "exec/gdbstub.h"
  42 #include "exec/memattrs.h"
  43 #include "sysemu/hostmem.h"
  44
  45 //#define DEBUG_KVM
  46
  47 #ifdef DEBUG_KVM
  48 #define DPRINTF(fmt, ...) \
  49     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  50 #else
  51 #define DPRINTF(fmt, ...) \
  52     do { } while (0)
  53 #endif
  54
  55 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  56
  57 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  58     KVM_CAP_LAST_INFO
  59 };
  60
  61 static int cap_interrupt_unset = false;
  62 static int cap_interrupt_level = false;
  63 static int cap_segstate;
  64 static int cap_booke_sregs;
  65 static int cap_ppc_smt;
  66 static int cap_ppc_rma;
  67 static int cap_spapr_tce;
  68 static int cap_spapr_multitce;
  69 static int cap_spapr_vfio;
  70 static int cap_hior;
  71 static int cap_one_reg;
  72 static int cap_epr;
  73 static int cap_ppc_watchdog;
  74 static int cap_papr;
  75 static int cap_htab_fd;
  76 static int cap_fixup_hcalls;
  77
  78 static uint32_t debug_inst_opcode;
  79
  80 /* XXX We have a race condition where we actually have a level triggered
  81  *     interrupt, but the infrastructure can't expose that yet, so the guest
  82  *     takes but ignores it, goes to sleep and never gets notified that there's
  83  *     still an interrupt pending.
  84  *
  85  *     As a quick workaround, let's just wake up again 20 ms after we injected
  86  *     an interrupt. That way we can assure that we're always reinjecting
  87  *     interrupts in case the guest swallowed them.
  88  */
  89 static QEMUTimer *idle_timer;
  90
  91 static void kvm_kick_cpu(void *opaque)
  92 {
  93     PowerPCCPU *cpu = opaque;
  94
  95     qemu_cpu_kick(CPU(cpu));
  96 }
  97
  98 static int kvm_ppc_register_host_cpu_type(void);
  99
 100 int kvm_arch_init(MachineState *ms, KVMState *s)
 101 {
 102     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 103     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 104     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 105     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 106     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 107     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 108     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 109     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 110     cap_spapr_vfio = false;
 111     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 112     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 113     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 114     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 115     /* Note: we don't set cap_papr here, because this capability is
 116      * only activated after this by kvmppc_set_papr() */
 117     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 118     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 119
 120     if (!cap_interrupt_level) {
 121         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 122                         "VM to stall at times!\n");
 123     }
 124
 125     kvm_ppc_register_host_cpu_type();
 126
 127     return 0;
 128 }
 129
 130 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 131 {
 132     CPUPPCState *cenv = &cpu->env;
 133     CPUState *cs = CPU(cpu);
 134     struct kvm_sregs sregs;
 135     int ret;
 136
 137     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 138         /* What we're really trying to say is "if we're on BookE, we use
 139            the native PVR for now". This is the only sane way to check
 140            it though, so we potentially confuse users that they can run
 141            BookE guests on BookS. Let's hope nobody dares enough :) */
 142         return 0;
 143     } else {
 144         if (!cap_segstate) {
 145             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 146             return -ENOSYS;
 147         }
 148     }
 149
 150     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 151     if (ret) {
 152         return ret;
 153     }
 154
 155     sregs.pvr = cenv->spr[SPR_PVR];
 156     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 157 }
 158
 159 /* Set up a shared TLB array with KVM */
 160 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 161 {
 162     CPUPPCState *env = &cpu->env;
 163     CPUState *cs = CPU(cpu);
 164     struct kvm_book3e_206_tlb_params params = {};
 165     struct kvm_config_tlb cfg = {};
 166     unsigned int entries = 0;
 167     int ret, i;
 168
 169     if (!kvm_enabled() ||
 170         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 171         return 0;
 172     }
 173
 174     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 175
 176     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 177         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 178         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 179         entries += params.tlb_sizes[i];
 180     }
 181
 182     assert(entries == env->nb_tlb);
 183     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 184
 185     env->tlb_dirty = true;
 186
 187     cfg.array = (uintptr_t)env->tlb.tlbm;
 188     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 189     cfg.params = (uintptr_t)&params;
 190     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 191
 192     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 193     if (ret < 0) {
 194         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 195                 __func__, strerror(-ret));
 196         return ret;
 197     }
 198
 199     env->kvm_sw_tlb = true;
 200     return 0;
 201 }
 202
 203
 204 #if defined(TARGET_PPC64)
 205 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 206                                        struct kvm_ppc_smmu_info *info)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210
 211     memset(info, 0, sizeof(*info));
 212
 213     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 214      * need to "guess" what the supported page sizes are.
 215      *
 216      * For that to work we make a few assumptions:
 217      *
 218      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 219      *   KVM which only supports 4K and 16M pages, but supports them
 220      *   regardless of the backing store characteritics. We also don't
 221      *   support 1T segments.
 222      *
 223      *   This is safe as if HV KVM ever supports that capability or PR
 224      *   KVM grows supports for more page/segment sizes, those versions
 225      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 226      *   will not hit this fallback
 227      *
 228      * - Else we are running HV KVM. This means we only support page
 229      *   sizes that fit in the backing store. Additionally we only
 230      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 231      *   P7 encodings for the SLB and hash table. Here too, we assume
 232      *   support for any newer processor will mean a kernel that
 233      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 234      *   this fallback.
 235      */
 236     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 237         /* No flags */
 238         info->flags = 0;
 239         info->slb_size = 64;
 240
 241         /* Standard 4k base page size segment */
 242         info->sps[0].page_shift = 12;
 243         info->sps[0].slb_enc = 0;
 244         info->sps[0].enc[0].page_shift = 12;
 245         info->sps[0].enc[0].pte_enc = 0;
 246
 247         /* Standard 16M large page size segment */
 248         info->sps[1].page_shift = 24;
 249         info->sps[1].slb_enc = SLB_VSID_L;
 250         info->sps[1].enc[0].page_shift = 24;
 251         info->sps[1].enc[0].pte_enc = 0;
 252     } else {
 253         int i = 0;
 254
 255         /* HV KVM has backing store size restrictions */
 256         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 257
 258         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 259             info->flags |= KVM_PPC_1T_SEGMENTS;
 260         }
 261
 262         if (env->mmu_model == POWERPC_MMU_2_06) {
 263             info->slb_size = 32;
 264         } else {
 265             info->slb_size = 64;
 266         }
 267
 268         /* Standard 4k base page size segment */
 269         info->sps[i].page_shift = 12;
 270         info->sps[i].slb_enc = 0;
 271         info->sps[i].enc[0].page_shift = 12;
 272         info->sps[i].enc[0].pte_enc = 0;
 273         i++;
 274
 275         /* 64K on MMU 2.06 */
 276         if (env->mmu_model == POWERPC_MMU_2_06) {
 277             info->sps[i].page_shift = 16;
 278             info->sps[i].slb_enc = 0x110;
 279             info->sps[i].enc[0].page_shift = 16;
 280             info->sps[i].enc[0].pte_enc = 1;
 281             i++;
 282         }
 283
 284         /* Standard 16M large page size segment */
 285         info->sps[i].page_shift = 24;
 286         info->sps[i].slb_enc = SLB_VSID_L;
 287         info->sps[i].enc[0].page_shift = 24;
 288         info->sps[i].enc[0].pte_enc = 0;
 289     }
 290 }
 291
 292 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 293 {
 294     CPUState *cs = CPU(cpu);
 295     int ret;
 296
 297     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 298         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 299         if (ret == 0) {
 300             return;
 301         }
 302     }
 303
 304     kvm_get_fallback_smmu_info(cpu, info);
 305 }
 306
 307 static long gethugepagesize(const char *mem_path)
 308 {
 309     struct statfs fs;
 310     int ret;
 311
 312     do {
 313         ret = statfs(mem_path, &fs);
 314     } while (ret != 0 && errno == EINTR);
 315
 316     if (ret != 0) {
 317         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 318                 strerror(errno));
 319         exit(1);
 320     }
 321
 322 #define HUGETLBFS_MAGIC       0x958458f6
 323
 324     if (fs.f_type != HUGETLBFS_MAGIC) {
 325         /* Explicit mempath, but it's ordinary pages */
 326         return getpagesize();
 327     }
 328
 329     /* It's hugepage, return the huge page size */
 330     return fs.f_bsize;
 331 }
 332
 333 static int find_max_supported_pagesize(Object *obj, void *opaque)
 334 {
 335     char *mem_path;
 336     long *hpsize_min = opaque;
 337
 338     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 339         mem_path = object_property_get_str(obj, "mem-path", NULL);
 340         if (mem_path) {
 341             long hpsize = gethugepagesize(mem_path);
 342             if (hpsize < *hpsize_min) {
 343                 *hpsize_min = hpsize;
 344             }
 345         } else {
 346             *hpsize_min = getpagesize();
 347         }
 348     }
 349
 350     return 0;
 351 }
 352
 353 static long getrampagesize(void)
 354 {
 355     long hpsize = LONG_MAX;
 356     Object *memdev_root;
 357
 358     if (mem_path) {
 359         return gethugepagesize(mem_path);
 360     }
 361
 362     /* it's possible we have memory-backend objects with
 363      * hugepage-backed RAM. these may get mapped into system
 364      * address space via -numa parameters or memory hotplug
 365      * hooks. we want to take these into account, but we
 366      * also want to make sure these supported hugepage
 367      * sizes are applicable across the entire range of memory
 368      * we may boot from, so we take the min across all
 369      * backends, and assume normal pages in cases where a
 370      * backend isn't backed by hugepages.
 371      */
 372     memdev_root = object_resolve_path("/objects", NULL);
 373     if (!memdev_root) {
 374         return getpagesize();
 375     }
 376
 377     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 378
 379     return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 380 }
 381
 382 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 383 {
 384     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 385         return true;
 386     }
 387
 388     return (1ul << shift) <= rampgsize;
 389 }
 390
 391 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 392 {
 393     static struct kvm_ppc_smmu_info smmu_info;
 394     static bool has_smmu_info;
 395     CPUPPCState *env = &cpu->env;
 396     long rampagesize;
 397     int iq, ik, jq, jk;
 398
 399     /* We only handle page sizes for 64-bit server guests for now */
 400     if (!(env->mmu_model & POWERPC_MMU_64)) {
 401         return;
 402     }
 403
 404     /* Collect MMU info from kernel if not already */
 405     if (!has_smmu_info) {
 406         kvm_get_smmu_info(cpu, &smmu_info);
 407         has_smmu_info = true;
 408     }
 409
 410     rampagesize = getrampagesize();
 411
 412     /* Convert to QEMU form */
 413     memset(&env->sps, 0, sizeof(env->sps));
 414
 415     /*
 416      * XXX This loop should be an entry wide AND of the capabilities that
 417      *     the selected CPU has with the capabilities that KVM supports.
 418      */
 419     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 420         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 421         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 422
 423         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 424                                  ksps->page_shift)) {
 425             continue;
 426         }
 427         qsps->page_shift = ksps->page_shift;
 428         qsps->slb_enc = ksps->slb_enc;
 429         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 430             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 431                                      ksps->enc[jk].page_shift)) {
 432                 continue;
 433             }
 434             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 435             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 436             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 437                 break;
 438             }
 439         }
 440         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 441             break;
 442         }
 443     }
 444     env->slb_nr = smmu_info.slb_size;
 445     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 446         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 447     }
 448 }
 449 #else /* defined (TARGET_PPC64) */
 450
 451 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 452 {
 453 }
 454
 455 #endif /* !defined (TARGET_PPC64) */
 456
 457 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 458 {
 459     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 460 }
 461
 462 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 463  * book3s supports only 1 watchpoint, so array size
 464  * of 4 is sufficient for now.
 465  */
 466 #define MAX_HW_BKPTS 4
 467
 468 static struct HWBreakpoint {
 469     target_ulong addr;
 470     int type;
 471 } hw_debug_points[MAX_HW_BKPTS];
 472
 473 static CPUWatchpoint hw_watchpoint;
 474
 475 /* Default there is no breakpoint and watchpoint supported */
 476 static int max_hw_breakpoint;
 477 static int max_hw_watchpoint;
 478 static int nb_hw_breakpoint;
 479 static int nb_hw_watchpoint;
 480
 481 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 482 {
 483     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 484         max_hw_breakpoint = 2;
 485         max_hw_watchpoint = 2;
 486     }
 487
 488     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 489         fprintf(stderr, "Error initializing h/w breakpoints\n");
 490         return;
 491     }
 492 }
 493
 494 int kvm_arch_init_vcpu(CPUState *cs)
 495 {
 496     PowerPCCPU *cpu = POWERPC_CPU(cs);
 497     CPUPPCState *cenv = &cpu->env;
 498     int ret;
 499
 500     /* Gather server mmu info from KVM and update the CPU state */
 501     kvm_fixup_page_sizes(cpu);
 502
 503     /* Synchronize sregs with kvm */
 504     ret = kvm_arch_sync_sregs(cpu);
 505     if (ret) {
 506         return ret;
 507     }
 508
 509     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 510
 511     /* Some targets support access to KVM's guest TLB. */
 512     switch (cenv->mmu_model) {
 513     case POWERPC_MMU_BOOKE206:
 514         ret = kvm_booke206_tlb_init(cpu);
 515         break;
 516     default:
 517         break;
 518     }
 519
 520     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 521     kvmppc_hw_debug_points_init(cenv);
 522
 523     return ret;
 524 }
 525
 526 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 527 {
 528     CPUPPCState *env = &cpu->env;
 529     CPUState *cs = CPU(cpu);
 530     struct kvm_dirty_tlb dirty_tlb;
 531     unsigned char *bitmap;
 532     int ret;
 533
 534     if (!env->kvm_sw_tlb) {
 535         return;
 536     }
 537
 538     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 539     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 540
 541     dirty_tlb.bitmap = (uintptr_t)bitmap;
 542     dirty_tlb.num_dirty = env->nb_tlb;
 543
 544     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 545     if (ret) {
 546         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 547                 __func__, strerror(-ret));
 548     }
 549
 550     g_free(bitmap);
 551 }
 552
 553 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 554 {
 555     PowerPCCPU *cpu = POWERPC_CPU(cs);
 556     CPUPPCState *env = &cpu->env;
 557     union {
 558         uint32_t u32;
 559         uint64_t u64;
 560     } val;
 561     struct kvm_one_reg reg = {
 562         .id = id,
 563         .addr = (uintptr_t) &val,
 564     };
 565     int ret;
 566
 567     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 568     if (ret != 0) {
 569         trace_kvm_failed_spr_get(spr, strerror(errno));
 570     } else {
 571         switch (id & KVM_REG_SIZE_MASK) {
 572         case KVM_REG_SIZE_U32:
 573             env->spr[spr] = val.u32;
 574             break;
 575
 576         case KVM_REG_SIZE_U64:
 577             env->spr[spr] = val.u64;
 578             break;
 579
 580         default:
 581             /* Don't handle this size yet */
 582             abort();
 583         }
 584     }
 585 }
 586
 587 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 588 {
 589     PowerPCCPU *cpu = POWERPC_CPU(cs);
 590     CPUPPCState *env = &cpu->env;
 591     union {
 592         uint32_t u32;
 593         uint64_t u64;
 594     } val;
 595     struct kvm_one_reg reg = {
 596         .id = id,
 597         .addr = (uintptr_t) &val,
 598     };
 599     int ret;
 600
 601     switch (id & KVM_REG_SIZE_MASK) {
 602     case KVM_REG_SIZE_U32:
 603         val.u32 = env->spr[spr];
 604         break;
 605
 606     case KVM_REG_SIZE_U64:
 607         val.u64 = env->spr[spr];
 608         break;
 609
 610     default:
 611         /* Don't handle this size yet */
 612         abort();
 613     }
 614
 615     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 616     if (ret != 0) {
 617         trace_kvm_failed_spr_set(spr, strerror(errno));
 618     }
 619 }
 620
 621 static int kvm_put_fp(CPUState *cs)
 622 {
 623     PowerPCCPU *cpu = POWERPC_CPU(cs);
 624     CPUPPCState *env = &cpu->env;
 625     struct kvm_one_reg reg;
 626     int i;
 627     int ret;
 628
 629     if (env->insns_flags & PPC_FLOAT) {
 630         uint64_t fpscr = env->fpscr;
 631         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 632
 633         reg.id = KVM_REG_PPC_FPSCR;
 634         reg.addr = (uintptr_t)&fpscr;
 635         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 636         if (ret < 0) {
 637             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 638             return ret;
 639         }
 640
 641         for (i = 0; i < 32; i++) {
 642             uint64_t vsr[2];
 643
 644             vsr[0] = float64_val(env->fpr[i]);
 645             vsr[1] = env->vsr[i];
 646             reg.addr = (uintptr_t) &vsr;
 647             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 648
 649             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 650             if (ret < 0) {
 651                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 652                         i, strerror(errno));
 653                 return ret;
 654             }
 655         }
 656     }
 657
 658     if (env->insns_flags & PPC_ALTIVEC) {
 659         reg.id = KVM_REG_PPC_VSCR;
 660         reg.addr = (uintptr_t)&env->vscr;
 661         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 662         if (ret < 0) {
 663             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 664             return ret;
 665         }
 666
 667         for (i = 0; i < 32; i++) {
 668             reg.id = KVM_REG_PPC_VR(i);
 669             reg.addr = (uintptr_t)&env->avr[i];
 670             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 671             if (ret < 0) {
 672                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 673                 return ret;
 674             }
 675         }
 676     }
 677
 678     return 0;
 679 }
 680
 681 static int kvm_get_fp(CPUState *cs)
 682 {
 683     PowerPCCPU *cpu = POWERPC_CPU(cs);
 684     CPUPPCState *env = &cpu->env;
 685     struct kvm_one_reg reg;
 686     int i;
 687     int ret;
 688
 689     if (env->insns_flags & PPC_FLOAT) {
 690         uint64_t fpscr;
 691         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 692
 693         reg.id = KVM_REG_PPC_FPSCR;
 694         reg.addr = (uintptr_t)&fpscr;
 695         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 696         if (ret < 0) {
 697             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 698             return ret;
 699         } else {
 700             env->fpscr = fpscr;
 701         }
 702
 703         for (i = 0; i < 32; i++) {
 704             uint64_t vsr[2];
 705
 706             reg.addr = (uintptr_t) &vsr;
 707             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 708
 709             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 710             if (ret < 0) {
 711                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 712                         vsx ? "VSR" : "FPR", i, strerror(errno));
 713                 return ret;
 714             } else {
 715                 env->fpr[i] = vsr[0];
 716                 if (vsx) {
 717                     env->vsr[i] = vsr[1];
 718                 }
 719             }
 720         }
 721     }
 722
 723     if (env->insns_flags & PPC_ALTIVEC) {
 724         reg.id = KVM_REG_PPC_VSCR;
 725         reg.addr = (uintptr_t)&env->vscr;
 726         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 727         if (ret < 0) {
 728             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 729             return ret;
 730         }
 731
 732         for (i = 0; i < 32; i++) {
 733             reg.id = KVM_REG_PPC_VR(i);
 734             reg.addr = (uintptr_t)&env->avr[i];
 735             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 736             if (ret < 0) {
 737                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 738                         i, strerror(errno));
 739                 return ret;
 740             }
 741         }
 742     }
 743
 744     return 0;
 745 }
 746
 747 #if defined(TARGET_PPC64)
 748 static int kvm_get_vpa(CPUState *cs)
 749 {
 750     PowerPCCPU *cpu = POWERPC_CPU(cs);
 751     CPUPPCState *env = &cpu->env;
 752     struct kvm_one_reg reg;
 753     int ret;
 754
 755     reg.id = KVM_REG_PPC_VPA_ADDR;
 756     reg.addr = (uintptr_t)&env->vpa_addr;
 757     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 758     if (ret < 0) {
 759         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 760         return ret;
 761     }
 762
 763     assert((uintptr_t)&env->slb_shadow_size
 764            == ((uintptr_t)&env->slb_shadow_addr + 8));
 765     reg.id = KVM_REG_PPC_VPA_SLB;
 766     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 767     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 768     if (ret < 0) {
 769         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 770                 strerror(errno));
 771         return ret;
 772     }
 773
 774     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 775     reg.id = KVM_REG_PPC_VPA_DTL;
 776     reg.addr = (uintptr_t)&env->dtl_addr;
 777     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 778     if (ret < 0) {
 779         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 780                 strerror(errno));
 781         return ret;
 782     }
 783
 784     return 0;
 785 }
 786
 787 static int kvm_put_vpa(CPUState *cs)
 788 {
 789     PowerPCCPU *cpu = POWERPC_CPU(cs);
 790     CPUPPCState *env = &cpu->env;
 791     struct kvm_one_reg reg;
 792     int ret;
 793
 794     /* SLB shadow or DTL can't be registered unless a master VPA is
 795      * registered.  That means when restoring state, if a VPA *is*
 796      * registered, we need to set that up first.  If not, we need to
 797      * deregister the others before deregistering the master VPA */
 798     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 799
 800     if (env->vpa_addr) {
 801         reg.id = KVM_REG_PPC_VPA_ADDR;
 802         reg.addr = (uintptr_t)&env->vpa_addr;
 803         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 804         if (ret < 0) {
 805             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 806             return ret;
 807         }
 808     }
 809
 810     assert((uintptr_t)&env->slb_shadow_size
 811            == ((uintptr_t)&env->slb_shadow_addr + 8));
 812     reg.id = KVM_REG_PPC_VPA_SLB;
 813     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 814     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 815     if (ret < 0) {
 816         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 817         return ret;
 818     }
 819
 820     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 821     reg.id = KVM_REG_PPC_VPA_DTL;
 822     reg.addr = (uintptr_t)&env->dtl_addr;
 823     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 824     if (ret < 0) {
 825         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 826                 strerror(errno));
 827         return ret;
 828     }
 829
 830     if (!env->vpa_addr) {
 831         reg.id = KVM_REG_PPC_VPA_ADDR;
 832         reg.addr = (uintptr_t)&env->vpa_addr;
 833         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 834         if (ret < 0) {
 835             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 836             return ret;
 837         }
 838     }
 839
 840     return 0;
 841 }
 842 #endif /* TARGET_PPC64 */
 843
 844 int kvm_arch_put_registers(CPUState *cs, int level)
 845 {
 846     PowerPCCPU *cpu = POWERPC_CPU(cs);
 847     CPUPPCState *env = &cpu->env;
 848     struct kvm_regs regs;
 849     int ret;
 850     int i;
 851
 852     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 853     if (ret < 0) {
 854         return ret;
 855     }
 856
 857     regs.ctr = env->ctr;
 858     regs.lr  = env->lr;
 859     regs.xer = cpu_read_xer(env);
 860     regs.msr = env->msr;
 861     regs.pc = env->nip;
 862
 863     regs.srr0 = env->spr[SPR_SRR0];
 864     regs.srr1 = env->spr[SPR_SRR1];
 865
 866     regs.sprg0 = env->spr[SPR_SPRG0];
 867     regs.sprg1 = env->spr[SPR_SPRG1];
 868     regs.sprg2 = env->spr[SPR_SPRG2];
 869     regs.sprg3 = env->spr[SPR_SPRG3];
 870     regs.sprg4 = env->spr[SPR_SPRG4];
 871     regs.sprg5 = env->spr[SPR_SPRG5];
 872     regs.sprg6 = env->spr[SPR_SPRG6];
 873     regs.sprg7 = env->spr[SPR_SPRG7];
 874
 875     regs.pid = env->spr[SPR_BOOKE_PID];
 876
 877     for (i = 0;i < 32; i++)
 878         regs.gpr[i] = env->gpr[i];
 879
 880     regs.cr = 0;
 881     for (i = 0; i < 8; i++) {
 882         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 883     }
 884
 885     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 886     if (ret < 0)
 887         return ret;
 888
 889     kvm_put_fp(cs);
 890
 891     if (env->tlb_dirty) {
 892         kvm_sw_tlb_put(cpu);
 893         env->tlb_dirty = false;
 894     }
 895
 896     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 897         struct kvm_sregs sregs;
 898
 899         sregs.pvr = env->spr[SPR_PVR];
 900
 901         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 902
 903         /* Sync SLB */
 904 #ifdef TARGET_PPC64
 905         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 906             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 907             if (env->slb[i].esid & SLB_ESID_V) {
 908                 sregs.u.s.ppc64.slb[i].slbe |= i;
 909             }
 910             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 911         }
 912 #endif
 913
 914         /* Sync SRs */
 915         for (i = 0; i < 16; i++) {
 916             sregs.u.s.ppc32.sr[i] = env->sr[i];
 917         }
 918
 919         /* Sync BATs */
 920         for (i = 0; i < 8; i++) {
 921             /* Beware. We have to swap upper and lower bits here */
 922             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 923                 | env->DBAT[1][i];
 924             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 925                 | env->IBAT[1][i];
 926         }
 927
 928         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 929         if (ret) {
 930             return ret;
 931         }
 932     }
 933
 934     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 935         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 936     }
 937
 938     if (cap_one_reg) {
 939         int i;
 940
 941         /* We deliberately ignore errors here, for kernels which have
 942          * the ONE_REG calls, but don't support the specific
 943          * registers, there's a reasonable chance things will still
 944          * work, at least until we try to migrate. */
 945         for (i = 0; i < 1024; i++) {
 946             uint64_t id = env->spr_cb[i].one_reg_id;
 947
 948             if (id != 0) {
 949                 kvm_put_one_spr(cs, id, i);
 950             }
 951         }
 952
 953 #ifdef TARGET_PPC64
 954         if (msr_ts) {
 955             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 956                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 957             }
 958             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 959                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 960             }
 961             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 962             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 963             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 964             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 965             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 966             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 967             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 968             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 969             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 970             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 971         }
 972
 973         if (cap_papr) {
 974             if (kvm_put_vpa(cs) < 0) {
 975                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 976             }
 977         }
 978
 979         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 980 #endif /* TARGET_PPC64 */
 981     }
 982
 983     return ret;
 984 }
 985
 986 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
 987 {
 988      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
 989 }
 990
 991 int kvm_arch_get_registers(CPUState *cs)
 992 {
 993     PowerPCCPU *cpu = POWERPC_CPU(cs);
 994     CPUPPCState *env = &cpu->env;
 995     struct kvm_regs regs;
 996     struct kvm_sregs sregs;
 997     uint32_t cr;
 998     int i, ret;
 999
1000     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1001     if (ret < 0)
1002         return ret;
1003
1004     cr = regs.cr;
1005     for (i = 7; i >= 0; i--) {
1006         env->crf[i] = cr & 15;
1007         cr >>= 4;
1008     }
1009
1010     env->ctr = regs.ctr;
1011     env->lr = regs.lr;
1012     cpu_write_xer(env, regs.xer);
1013     env->msr = regs.msr;
1014     env->nip = regs.pc;
1015
1016     env->spr[SPR_SRR0] = regs.srr0;
1017     env->spr[SPR_SRR1] = regs.srr1;
1018
1019     env->spr[SPR_SPRG0] = regs.sprg0;
1020     env->spr[SPR_SPRG1] = regs.sprg1;
1021     env->spr[SPR_SPRG2] = regs.sprg2;
1022     env->spr[SPR_SPRG3] = regs.sprg3;
1023     env->spr[SPR_SPRG4] = regs.sprg4;
1024     env->spr[SPR_SPRG5] = regs.sprg5;
1025     env->spr[SPR_SPRG6] = regs.sprg6;
1026     env->spr[SPR_SPRG7] = regs.sprg7;
1027
1028     env->spr[SPR_BOOKE_PID] = regs.pid;
1029
1030     for (i = 0;i < 32; i++)
1031         env->gpr[i] = regs.gpr[i];
1032
1033     kvm_get_fp(cs);
1034
1035     if (cap_booke_sregs) {
1036         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1037         if (ret < 0) {
1038             return ret;
1039         }
1040
1041         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1042             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1043             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1044             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1045             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1046             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1047             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1048             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1049             env->spr[SPR_DECR] = sregs.u.e.dec;
1050             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1051             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1052             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1053         }
1054
1055         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1056             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1057             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1058             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1059             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1060             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1061         }
1062
1063         if (sregs.u.e.features & KVM_SREGS_E_64) {
1064             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1065         }
1066
1067         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1068             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1069         }
1070
1071         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1072             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1073             kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1074             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1075             kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1076             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1077             kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1078             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1079             kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1080             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1081             kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1082             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1083             kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1084             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1085             kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1086             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1087             kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1088             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1089             kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1090             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1091             kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1092             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1093             kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1094             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1095             kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1096             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1097             kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1098             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1099             kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1100             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1101             kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1102             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1103             kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1104
1105             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1106                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1107                 kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1108                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1109                 kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1110                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1111                 kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1112             }
1113
1114             if (sregs.u.e.features & KVM_SREGS_E_PM) {
1115                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1116                 kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1117             }
1118
1119             if (sregs.u.e.features & KVM_SREGS_E_PC) {
1120                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1121                 kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1122                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1123                 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1124             }
1125         }
1126
1127         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1128             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1129             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1130             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1131             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1132             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1133             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1134             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1135             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1136             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1137             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1138         }
1139
1140         if (sregs.u.e.features & KVM_SREGS_EXP) {
1141             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1142         }
1143
1144         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1145             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1146             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1147         }
1148
1149         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1150             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1151             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1152             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1153
1154             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1155                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1156                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1157             }
1158         }
1159     }
1160
1161     if (cap_segstate) {
1162         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1163         if (ret < 0) {
1164             return ret;
1165         }
1166
1167         if (!env->external_htab) {
1168             ppc_store_sdr1(env, sregs.u.s.sdr1);
1169         }
1170
1171         /* Sync SLB */
1172 #ifdef TARGET_PPC64
1173         /*
1174          * The packed SLB array we get from KVM_GET_SREGS only contains
1175          * information about valid entries. So we flush our internal
1176          * copy to get rid of stale ones, then put all valid SLB entries
1177          * back in.
1178          */
1179         memset(env->slb, 0, sizeof(env->slb));
1180         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1181             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1182             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1183             /*
1184              * Only restore valid entries
1185              */
1186             if (rb & SLB_ESID_V) {
1187                 ppc_store_slb(env, rb, rs);
1188             }
1189         }
1190 #endif
1191
1192         /* Sync SRs */
1193         for (i = 0; i < 16; i++) {
1194             env->sr[i] = sregs.u.s.ppc32.sr[i];
1195         }
1196
1197         /* Sync BATs */
1198         for (i = 0; i < 8; i++) {
1199             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1200             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1201             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1202             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1203         }
1204     }
1205
1206     if (cap_hior) {
1207         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1208     }
1209
1210     if (cap_one_reg) {
1211         int i;
1212
1213         /* We deliberately ignore errors here, for kernels which have
1214          * the ONE_REG calls, but don't support the specific
1215          * registers, there's a reasonable chance things will still
1216          * work, at least until we try to migrate. */
1217         for (i = 0; i < 1024; i++) {
1218             uint64_t id = env->spr_cb[i].one_reg_id;
1219
1220             if (id != 0) {
1221                 kvm_get_one_spr(cs, id, i);
1222             }
1223         }
1224
1225 #ifdef TARGET_PPC64
1226         if (msr_ts) {
1227             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1228                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1229             }
1230             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1231                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1232             }
1233             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1234             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1235             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1236             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1237             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1238             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1239             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1240             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1241             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1242             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1243         }
1244
1245         if (cap_papr) {
1246             if (kvm_get_vpa(cs) < 0) {
1247                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1248             }
1249         }
1250
1251         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1252 #endif
1253     }
1254
1255     return 0;
1256 }
1257
1258 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1259 {
1260     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1261
1262     if (irq != PPC_INTERRUPT_EXT) {
1263         return 0;
1264     }
1265
1266     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1267         return 0;
1268     }
1269
1270     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1271
1272     return 0;
1273 }
1274
1275 #if defined(TARGET_PPCEMB)
1276 #define PPC_INPUT_INT PPC40x_INPUT_INT
1277 #elif defined(TARGET_PPC64)
1278 #define PPC_INPUT_INT PPC970_INPUT_INT
1279 #else
1280 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1281 #endif
1282
1283 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1284 {
1285     PowerPCCPU *cpu = POWERPC_CPU(cs);
1286     CPUPPCState *env = &cpu->env;
1287     int r;
1288     unsigned irq;
1289
1290     qemu_mutex_lock_iothread();
1291
1292     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1293      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1294     if (!cap_interrupt_level &&
1295         run->ready_for_interrupt_injection &&
1296         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1297         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1298     {
1299         /* For now KVM disregards the 'irq' argument. However, in the
1300          * future KVM could cache it in-kernel to avoid a heavyweight exit
1301          * when reading the UIC.
1302          */
1303         irq = KVM_INTERRUPT_SET;
1304
1305         DPRINTF("injected interrupt %d\n", irq);
1306         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1307         if (r < 0) {
1308             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1309         }
1310
1311         /* Always wake up soon in case the interrupt was level based */
1312         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1313                        (get_ticks_per_sec() / 50));
1314     }
1315
1316     /* We don't know if there are more interrupts pending after this. However,
1317      * the guest will return to userspace in the course of handling this one
1318      * anyways, so we will get a chance to deliver the rest. */
1319
1320     qemu_mutex_unlock_iothread();
1321 }
1322
1323 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1324 {
1325     return MEMTXATTRS_UNSPECIFIED;
1326 }
1327
1328 int kvm_arch_process_async_events(CPUState *cs)
1329 {
1330     return cs->halted;
1331 }
1332
1333 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1334 {
1335     CPUState *cs = CPU(cpu);
1336     CPUPPCState *env = &cpu->env;
1337
1338     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1339         cs->halted = 1;
1340         cs->exception_index = EXCP_HLT;
1341     }
1342
1343     return 0;
1344 }
1345
1346 /* map dcr access to existing qemu dcr emulation */
1347 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1348 {
1349     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1350         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1351
1352     return 0;
1353 }
1354
1355 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1356 {
1357     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1358         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1359
1360     return 0;
1361 }
1362
1363 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1364 {
1365     /* Mixed endian case is not handled */
1366     uint32_t sc = debug_inst_opcode;
1367
1368     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1369                             sizeof(sc), 0) ||
1370         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1371         return -EINVAL;
1372     }
1373
1374     return 0;
1375 }
1376
1377 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1378 {
1379     uint32_t sc;
1380
1381     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1382         sc != debug_inst_opcode ||
1383         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1384                             sizeof(sc), 1)) {
1385         return -EINVAL;
1386     }
1387
1388     return 0;
1389 }
1390
1391 static int find_hw_breakpoint(target_ulong addr, int type)
1392 {
1393     int n;
1394
1395     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1396            <= ARRAY_SIZE(hw_debug_points));
1397
1398     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1399         if (hw_debug_points[n].addr == addr &&
1400              hw_debug_points[n].type == type) {
1401             return n;
1402         }
1403     }
1404
1405     return -1;
1406 }
1407
1408 static int find_hw_watchpoint(target_ulong addr, int *flag)
1409 {
1410     int n;
1411
1412     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1413     if (n >= 0) {
1414         *flag = BP_MEM_ACCESS;
1415         return n;
1416     }
1417
1418     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1419     if (n >= 0) {
1420         *flag = BP_MEM_WRITE;
1421         return n;
1422     }
1423
1424     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1425     if (n >= 0) {
1426         *flag = BP_MEM_READ;
1427         return n;
1428     }
1429
1430     return -1;
1431 }
1432
1433 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1434                                   target_ulong len, int type)
1435 {
1436     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1437         return -ENOBUFS;
1438     }
1439
1440     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1441     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1442
1443     switch (type) {
1444     case GDB_BREAKPOINT_HW:
1445         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1446             return -ENOBUFS;
1447         }
1448
1449         if (find_hw_breakpoint(addr, type) >= 0) {
1450             return -EEXIST;
1451         }
1452
1453         nb_hw_breakpoint++;
1454         break;
1455
1456     case GDB_WATCHPOINT_WRITE:
1457     case GDB_WATCHPOINT_READ:
1458     case GDB_WATCHPOINT_ACCESS:
1459         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1460             return -ENOBUFS;
1461         }
1462
1463         if (find_hw_breakpoint(addr, type) >= 0) {
1464             return -EEXIST;
1465         }
1466
1467         nb_hw_watchpoint++;
1468         break;
1469
1470     default:
1471         return -ENOSYS;
1472     }
1473
1474     return 0;
1475 }
1476
1477 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1478                                   target_ulong len, int type)
1479 {
1480     int n;
1481
1482     n = find_hw_breakpoint(addr, type);
1483     if (n < 0) {
1484         return -ENOENT;
1485     }
1486
1487     switch (type) {
1488     case GDB_BREAKPOINT_HW:
1489         nb_hw_breakpoint--;
1490         break;
1491
1492     case GDB_WATCHPOINT_WRITE:
1493     case GDB_WATCHPOINT_READ:
1494     case GDB_WATCHPOINT_ACCESS:
1495         nb_hw_watchpoint--;
1496         break;
1497
1498     default:
1499         return -ENOSYS;
1500     }
1501     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1502
1503     return 0;
1504 }
1505
1506 void kvm_arch_remove_all_hw_breakpoints(void)
1507 {
1508     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1509 }
1510
1511 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1512 {
1513     int n;
1514
1515     /* Software Breakpoint updates */
1516     if (kvm_sw_breakpoints_active(cs)) {
1517         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1518     }
1519
1520     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1521            <= ARRAY_SIZE(hw_debug_points));
1522     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1523
1524     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1525         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1526         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1527         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1528             switch (hw_debug_points[n].type) {
1529             case GDB_BREAKPOINT_HW:
1530                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1531                 break;
1532             case GDB_WATCHPOINT_WRITE:
1533                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1534                 break;
1535             case GDB_WATCHPOINT_READ:
1536                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1537                 break;
1538             case GDB_WATCHPOINT_ACCESS:
1539                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1540                                         KVMPPC_DEBUG_WATCH_READ;
1541                 break;
1542             default:
1543                 cpu_abort(cs, "Unsupported breakpoint type\n");
1544             }
1545             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1546         }
1547     }
1548 }
1549
1550 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1551 {
1552     CPUState *cs = CPU(cpu);
1553     CPUPPCState *env = &cpu->env;
1554     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1555     int handle = 0;
1556     int n;
1557     int flag = 0;
1558
1559     if (cs->singlestep_enabled) {
1560         handle = 1;
1561     } else if (arch_info->status) {
1562         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1563             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1564                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1565                 if (n >= 0) {
1566                     handle = 1;
1567                 }
1568             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1569                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1570                 n = find_hw_watchpoint(arch_info->address,  &flag);
1571                 if (n >= 0) {
1572                     handle = 1;
1573                     cs->watchpoint_hit = &hw_watchpoint;
1574                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1575                     hw_watchpoint.flags = flag;
1576                 }
1577             }
1578         }
1579     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1580         handle = 1;
1581     } else {
1582         /* QEMU is not able to handle debug exception, so inject
1583          * program exception to guest;
1584          * Yes program exception NOT debug exception !!
1585          * When QEMU is using debug resources then debug exception must
1586          * be always set. To achieve this we set MSR_DE and also set
1587          * MSRP_DEP so guest cannot change MSR_DE.
1588          * When emulating debug resource for guest we want guest
1589          * to control MSR_DE (enable/disable debug interrupt on need).
1590          * Supporting both configurations are NOT possible.
1591          * So the result is that we cannot share debug resources
1592          * between QEMU and Guest on BOOKE architecture.
1593          * In the current design QEMU gets the priority over guest,
1594          * this means that if QEMU is using debug resources then guest
1595          * cannot use them;
1596          * For software breakpoint QEMU uses a privileged instruction;
1597          * So there cannot be any reason that we are here for guest
1598          * set debug exception, only possibility is guest executed a
1599          * privileged / illegal instruction and that's why we are
1600          * injecting a program interrupt.
1601          */
1602
1603         cpu_synchronize_state(cs);
1604         /* env->nip is PC, so increment this by 4 to use
1605          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1606          */
1607         env->nip += 4;
1608         cs->exception_index = POWERPC_EXCP_PROGRAM;
1609         env->error_code = POWERPC_EXCP_INVAL;
1610         ppc_cpu_do_interrupt(cs);
1611     }
1612
1613     return handle;
1614 }
1615
1616 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1617 {
1618     PowerPCCPU *cpu = POWERPC_CPU(cs);
1619     CPUPPCState *env = &cpu->env;
1620     int ret;
1621
1622     qemu_mutex_lock_iothread();
1623
1624     switch (run->exit_reason) {
1625     case KVM_EXIT_DCR:
1626         if (run->dcr.is_write) {
1627             DPRINTF("handle dcr write\n");
1628             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1629         } else {
1630             DPRINTF("handle dcr read\n");
1631             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1632         }
1633         break;
1634     case KVM_EXIT_HLT:
1635         DPRINTF("handle halt\n");
1636         ret = kvmppc_handle_halt(cpu);
1637         break;
1638 #if defined(TARGET_PPC64)
1639     case KVM_EXIT_PAPR_HCALL:
1640         DPRINTF("handle PAPR hypercall\n");
1641         run->papr_hcall.ret = spapr_hypercall(cpu,
1642                                               run->papr_hcall.nr,
1643                                               run->papr_hcall.args);
1644         ret = 0;
1645         break;
1646 #endif
1647     case KVM_EXIT_EPR:
1648         DPRINTF("handle epr\n");
1649         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1650         ret = 0;
1651         break;
1652     case KVM_EXIT_WATCHDOG:
1653         DPRINTF("handle watchdog expiry\n");
1654         watchdog_perform_action();
1655         ret = 0;
1656         break;
1657
1658     case KVM_EXIT_DEBUG:
1659         DPRINTF("handle debug exception\n");
1660         if (kvm_handle_debug(cpu, run)) {
1661             ret = EXCP_DEBUG;
1662             break;
1663         }
1664         /* re-enter, this exception was guest-internal */
1665         ret = 0;
1666         break;
1667
1668     default:
1669         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1670         ret = -1;
1671         break;
1672     }
1673
1674     qemu_mutex_unlock_iothread();
1675     return ret;
1676 }
1677
1678 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1679 {
1680     CPUState *cs = CPU(cpu);
1681     uint32_t bits = tsr_bits;
1682     struct kvm_one_reg reg = {
1683         .id = KVM_REG_PPC_OR_TSR,
1684         .addr = (uintptr_t) &bits,
1685     };
1686
1687     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1688 }
1689
1690 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1691 {
1692
1693     CPUState *cs = CPU(cpu);
1694     uint32_t bits = tsr_bits;
1695     struct kvm_one_reg reg = {
1696         .id = KVM_REG_PPC_CLEAR_TSR,
1697         .addr = (uintptr_t) &bits,
1698     };
1699
1700     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1701 }
1702
1703 int kvmppc_set_tcr(PowerPCCPU *cpu)
1704 {
1705     CPUState *cs = CPU(cpu);
1706     CPUPPCState *env = &cpu->env;
1707     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1708
1709     struct kvm_one_reg reg = {
1710         .id = KVM_REG_PPC_TCR,
1711         .addr = (uintptr_t) &tcr,
1712     };
1713
1714     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1715 }
1716
1717 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1718 {
1719     CPUState *cs = CPU(cpu);
1720     int ret;
1721
1722     if (!kvm_enabled()) {
1723         return -1;
1724     }
1725
1726     if (!cap_ppc_watchdog) {
1727         printf("warning: KVM does not support watchdog");
1728         return -1;
1729     }
1730
1731     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1732     if (ret < 0) {
1733         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1734                 __func__, strerror(-ret));
1735         return ret;
1736     }
1737
1738     return ret;
1739 }
1740
1741 static int read_cpuinfo(const char *field, char *value, int len)
1742 {
1743     FILE *f;
1744     int ret = -1;
1745     int field_len = strlen(field);
1746     char line[512];
1747
1748     f = fopen("/proc/cpuinfo", "r");
1749     if (!f) {
1750         return -1;
1751     }
1752
1753     do {
1754         if (!fgets(line, sizeof(line), f)) {
1755             break;
1756         }
1757         if (!strncmp(line, field, field_len)) {
1758             pstrcpy(value, len, line);
1759             ret = 0;
1760             break;
1761         }
1762     } while(*line);
1763
1764     fclose(f);
1765
1766     return ret;
1767 }
1768
1769 uint32_t kvmppc_get_tbfreq(void)
1770 {
1771     char line[512];
1772     char *ns;
1773     uint32_t retval = get_ticks_per_sec();
1774
1775     if (read_cpuinfo("timebase", line, sizeof(line))) {
1776         return retval;
1777     }
1778
1779     if (!(ns = strchr(line, ':'))) {
1780         return retval;
1781     }
1782
1783     ns++;
1784
1785     return atoi(ns);
1786 }
1787
1788 bool kvmppc_get_host_serial(char **value)
1789 {
1790     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1791                                NULL);
1792 }
1793
1794 bool kvmppc_get_host_model(char **value)
1795 {
1796     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1797 }
1798
1799 /* Try to find a device tree node for a CPU with clock-frequency property */
1800 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1801 {
1802     struct dirent *dirp;
1803     DIR *dp;
1804
1805     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1806         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1807         return -1;
1808     }
1809
1810     buf[0] = '\0';
1811     while ((dirp = readdir(dp)) != NULL) {
1812         FILE *f;
1813         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1814                  dirp->d_name);
1815         f = fopen(buf, "r");
1816         if (f) {
1817             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1818             fclose(f);
1819             break;
1820         }
1821         buf[0] = '\0';
1822     }
1823     closedir(dp);
1824     if (buf[0] == '\0') {
1825         printf("Unknown host!\n");
1826         return -1;
1827     }
1828
1829     return 0;
1830 }
1831
1832 /* Read a CPU node property from the host device tree that's a single
1833  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1834  * (can't find or open the property, or doesn't understand the
1835  * format) */
1836 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1837 {
1838     char buf[PATH_MAX], *tmp;
1839     union {
1840         uint32_t v32;
1841         uint64_t v64;
1842     } u;
1843     FILE *f;
1844     int len;
1845
1846     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1847         return -1;
1848     }
1849
1850     tmp = g_strdup_printf("%s/%s", buf, propname);
1851
1852     f = fopen(tmp, "rb");
1853     g_free(tmp);
1854     if (!f) {
1855         return -1;
1856     }
1857
1858     len = fread(&u, 1, sizeof(u), f);
1859     fclose(f);
1860     switch (len) {
1861     case 4:
1862         /* property is a 32-bit quantity */
1863         return be32_to_cpu(u.v32);
1864     case 8:
1865         return be64_to_cpu(u.v64);
1866     }
1867
1868     return 0;
1869 }
1870
1871 uint64_t kvmppc_get_clockfreq(void)
1872 {
1873     return kvmppc_read_int_cpu_dt("clock-frequency");
1874 }
1875
1876 uint32_t kvmppc_get_vmx(void)
1877 {
1878     return kvmppc_read_int_cpu_dt("ibm,vmx");
1879 }
1880
1881 uint32_t kvmppc_get_dfp(void)
1882 {
1883     return kvmppc_read_int_cpu_dt("ibm,dfp");
1884 }
1885
1886 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1887  {
1888      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1889      CPUState *cs = CPU(cpu);
1890
1891     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1892         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1893         return 0;
1894     }
1895
1896     return 1;
1897 }
1898
1899 int kvmppc_get_hasidle(CPUPPCState *env)
1900 {
1901     struct kvm_ppc_pvinfo pvinfo;
1902
1903     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1904         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1905         return 1;
1906     }
1907
1908     return 0;
1909 }
1910
1911 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1912 {
1913     uint32_t *hc = (uint32_t*)buf;
1914     struct kvm_ppc_pvinfo pvinfo;
1915
1916     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1917         memcpy(buf, pvinfo.hcall, buf_len);
1918         return 0;
1919     }
1920
1921     /*
1922      * Fallback to always fail hypercalls regardless of endianness:
1923      *
1924      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1925      *     li r3, -1
1926      *     b .+8       (becomes nop in wrong endian)
1927      *     bswap32(li r3, -1)
1928      */
1929
1930     hc[0] = cpu_to_be32(0x08000048);
1931     hc[1] = cpu_to_be32(0x3860ffff);
1932     hc[2] = cpu_to_be32(0x48000008);
1933     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1934
1935     return 0;
1936 }
1937
1938 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1939 {
1940     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1941 }
1942
1943 void kvmppc_enable_logical_ci_hcalls(void)
1944 {
1945     /*
1946      * FIXME: it would be nice if we could detect the cases where
1947      * we're using a device which requires the in kernel
1948      * implementation of these hcalls, but the kernel lacks them and
1949      * produce a warning.
1950      */
1951     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1952     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1953 }
1954
1955 void kvmppc_enable_set_mode_hcall(void)
1956 {
1957     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1958 }
1959
1960 void kvmppc_set_papr(PowerPCCPU *cpu)
1961 {
1962     CPUState *cs = CPU(cpu);
1963     int ret;
1964
1965     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1966     if (ret) {
1967         cpu_abort(cs, "This KVM version does not support PAPR\n");
1968     }
1969
1970     /* Update the capability flag so we sync the right information
1971      * with kvm */
1972     cap_papr = 1;
1973 }
1974
1975 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1976 {
1977     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1978 }
1979
1980 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1981 {
1982     CPUState *cs = CPU(cpu);
1983     int ret;
1984
1985     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1986     if (ret && mpic_proxy) {
1987         cpu_abort(cs, "This KVM version does not support EPR\n");
1988     }
1989 }
1990
1991 int kvmppc_smt_threads(void)
1992 {
1993     return cap_ppc_smt ? cap_ppc_smt : 1;
1994 }
1995
1996 #ifdef TARGET_PPC64
1997 off_t kvmppc_alloc_rma(void **rma)
1998 {
1999     off_t size;
2000     int fd;
2001     struct kvm_allocate_rma ret;
2002
2003     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2004      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2005      *                      not necessary on this hardware
2006      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2007      *
2008      * FIXME: We should allow the user to force contiguous RMA
2009      * allocation in the cap_ppc_rma==1 case.
2010      */
2011     if (cap_ppc_rma < 2) {
2012         return 0;
2013     }
2014
2015     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2016     if (fd < 0) {
2017         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2018                 strerror(errno));
2019         return -1;
2020     }
2021
2022     size = MIN(ret.rma_size, 256ul << 20);
2023
2024     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2025     if (*rma == MAP_FAILED) {
2026         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2027         return -1;
2028     };
2029
2030     return size;
2031 }
2032
2033 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2034 {
2035     struct kvm_ppc_smmu_info info;
2036     long rampagesize, best_page_shift;
2037     int i;
2038
2039     if (cap_ppc_rma >= 2) {
2040         return current_size;
2041     }
2042
2043     /* Find the largest hardware supported page size that's less than
2044      * or equal to the (logical) backing page size of guest RAM */
2045     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2046     rampagesize = getrampagesize();
2047     best_page_shift = 0;
2048
2049     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2050         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2051
2052         if (!sps->page_shift) {
2053             continue;
2054         }
2055
2056         if ((sps->page_shift > best_page_shift)
2057             && ((1UL << sps->page_shift) <= rampagesize)) {
2058             best_page_shift = sps->page_shift;
2059         }
2060     }
2061
2062     return MIN(current_size,
2063                1ULL << (best_page_shift + hash_shift - 7));
2064 }
2065 #endif
2066
2067 bool kvmppc_spapr_use_multitce(void)
2068 {
2069     return cap_spapr_multitce;
2070 }
2071
2072 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2073                               bool vfio_accel)
2074 {
2075     struct kvm_create_spapr_tce args = {
2076         .liobn = liobn,
2077         .window_size = window_size,
2078     };
2079     long len;
2080     int fd;
2081     void *table;
2082
2083     /* Must set fd to -1 so we don't try to munmap when called for
2084      * destroying the table, which the upper layers -will- do
2085      */
2086     *pfd = -1;
2087     if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
2088         return NULL;
2089     }
2090
2091     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2092     if (fd < 0) {
2093         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2094                 liobn);
2095         return NULL;
2096     }
2097
2098     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2099     /* FIXME: round this up to page size */
2100
2101     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2102     if (table == MAP_FAILED) {
2103         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2104                 liobn);
2105         close(fd);
2106         return NULL;
2107     }
2108
2109     *pfd = fd;
2110     return table;
2111 }
2112
2113 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2114 {
2115     long len;
2116
2117     if (fd < 0) {
2118         return -1;
2119     }
2120
2121     len = nb_table * sizeof(uint64_t);
2122     if ((munmap(table, len) < 0) ||
2123         (close(fd) < 0)) {
2124         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2125                 strerror(errno));
2126         /* Leak the table */
2127     }
2128
2129     return 0;
2130 }
2131
2132 int kvmppc_reset_htab(int shift_hint)
2133 {
2134     uint32_t shift = shift_hint;
2135
2136     if (!kvm_enabled()) {
2137         /* Full emulation, tell caller to allocate htab itself */
2138         return 0;
2139     }
2140     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2141         int ret;
2142         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2143         if (ret == -ENOTTY) {
2144             /* At least some versions of PR KVM advertise the
2145              * capability, but don't implement the ioctl().  Oops.
2146              * Return 0 so that we allocate the htab in qemu, as is
2147              * correct for PR. */
2148             return 0;
2149         } else if (ret < 0) {
2150             return ret;
2151         }
2152         return shift;
2153     }
2154
2155     /* We have a kernel that predates the htab reset calls.  For PR
2156      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2157      * this era, it has allocated a 16MB fixed size hash table
2158      * already.  Kernels of this era have the GET_PVINFO capability
2159      * only on PR, so we use this hack to determine the right
2160      * answer */
2161     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2162         /* PR - tell caller to allocate htab */
2163         return 0;
2164     } else {
2165         /* HV - assume 16MB kernel allocated htab */
2166         return 24;
2167     }
2168 }
2169
2170 static inline uint32_t mfpvr(void)
2171 {
2172     uint32_t pvr;
2173
2174     asm ("mfpvr %0"
2175          : "=r"(pvr));
2176     return pvr;
2177 }
2178
2179 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2180 {
2181     if (on) {
2182         *word |= flags;
2183     } else {
2184         *word &= ~flags;
2185     }
2186 }
2187
2188 static void kvmppc_host_cpu_initfn(Object *obj)
2189 {
2190     assert(kvm_enabled());
2191 }
2192
2193 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2194 {
2195     DeviceClass *dc = DEVICE_CLASS(oc);
2196     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2197     uint32_t vmx = kvmppc_get_vmx();
2198     uint32_t dfp = kvmppc_get_dfp();
2199     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2200     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2201
2202     /* Now fix up the class with information we can query from the host */
2203     pcc->pvr = mfpvr();
2204
2205     if (vmx != -1) {
2206         /* Only override when we know what the host supports */
2207         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2208         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2209     }
2210     if (dfp != -1) {
2211         /* Only override when we know what the host supports */
2212         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2213     }
2214
2215     if (dcache_size != -1) {
2216         pcc->l1_dcache_size = dcache_size;
2217     }
2218
2219     if (icache_size != -1) {
2220         pcc->l1_icache_size = icache_size;
2221     }
2222
2223     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2224     dc->cannot_destroy_with_object_finalize_yet = true;
2225 }
2226
2227 bool kvmppc_has_cap_epr(void)
2228 {
2229     return cap_epr;
2230 }
2231
2232 bool kvmppc_has_cap_htab_fd(void)
2233 {
2234     return cap_htab_fd;
2235 }
2236
2237 bool kvmppc_has_cap_fixup_hcalls(void)
2238 {
2239     return cap_fixup_hcalls;
2240 }
2241
2242 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2243 {
2244     ObjectClass *oc = OBJECT_CLASS(pcc);
2245
2246     while (oc && !object_class_is_abstract(oc)) {
2247         oc = object_class_get_parent(oc);
2248     }
2249     assert(oc);
2250
2251     return POWERPC_CPU_CLASS(oc);
2252 }
2253
2254 static int kvm_ppc_register_host_cpu_type(void)
2255 {
2256     TypeInfo type_info = {
2257         .name = TYPE_HOST_POWERPC_CPU,
2258         .instance_init = kvmppc_host_cpu_initfn,
2259         .class_init = kvmppc_host_cpu_class_init,
2260     };
2261     uint32_t host_pvr = mfpvr();
2262     PowerPCCPUClass *pvr_pcc;
2263     DeviceClass *dc;
2264
2265     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2266     if (pvr_pcc == NULL) {
2267         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2268     }
2269     if (pvr_pcc == NULL) {
2270         return -1;
2271     }
2272     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2273     type_register(&type_info);
2274
2275     /* Register generic family CPU class for a family */
2276     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2277     dc = DEVICE_CLASS(pvr_pcc);
2278     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2279     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2280     type_register(&type_info);
2281
2282     return 0;
2283 }
2284
2285 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2286 {
2287     struct kvm_rtas_token_args args = {
2288         .token = token,
2289     };
2290
2291     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2292         return -ENOENT;
2293     }
2294
2295     strncpy(args.name, function, sizeof(args.name));
2296
2297     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2298 }
2299
2300 int kvmppc_get_htab_fd(bool write)
2301 {
2302     struct kvm_get_htab_fd s = {
2303         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2304         .start_index = 0,
2305     };
2306
2307     if (!cap_htab_fd) {
2308         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2309         return -1;
2310     }
2311
2312     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2313 }
2314
2315 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2316 {
2317     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2318     uint8_t buf[bufsize];
2319     ssize_t rc;
2320
2321     do {
2322         rc = read(fd, buf, bufsize);
2323         if (rc < 0) {
2324             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2325                     strerror(errno));
2326             return rc;
2327         } else if (rc) {
2328             uint8_t *buffer = buf;
2329             ssize_t n = rc;
2330             while (n) {
2331                 struct kvm_get_htab_header *head =
2332                     (struct kvm_get_htab_header *) buffer;
2333                 size_t chunksize = sizeof(*head) +
2334                      HASH_PTE_SIZE_64 * head->n_valid;
2335
2336                 qemu_put_be32(f, head->index);
2337                 qemu_put_be16(f, head->n_valid);
2338                 qemu_put_be16(f, head->n_invalid);
2339                 qemu_put_buffer(f, (void *)(head + 1),
2340                                 HASH_PTE_SIZE_64 * head->n_valid);
2341
2342                 buffer += chunksize;
2343                 n -= chunksize;
2344             }
2345         }
2346     } while ((rc != 0)
2347              && ((max_ns < 0)
2348                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2349
2350     return (rc == 0) ? 1 : 0;
2351 }
2352
2353 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2354                            uint16_t n_valid, uint16_t n_invalid)
2355 {
2356     struct kvm_get_htab_header *buf;
2357     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2358     ssize_t rc;
2359
2360     buf = alloca(chunksize);
2361     buf->index = index;
2362     buf->n_valid = n_valid;
2363     buf->n_invalid = n_invalid;
2364
2365     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2366
2367     rc = write(fd, buf, chunksize);
2368     if (rc < 0) {
2369         fprintf(stderr, "Error writing KVM hash table: %s\n",
2370                 strerror(errno));
2371         return rc;
2372     }
2373     if (rc != chunksize) {
2374         /* We should never get a short write on a single chunk */
2375         fprintf(stderr, "Short write, restoring KVM hash table\n");
2376         return -1;
2377     }
2378     return 0;
2379 }
2380
2381 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2382 {
2383     return true;
2384 }
2385
2386 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2387 {
2388     return 1;
2389 }
2390
2391 int kvm_arch_on_sigbus(int code, void *addr)
2392 {
2393     return 1;
2394 }
2395
2396 void kvm_arch_init_irq_routing(KVMState *s)
2397 {
2398 }
2399
2400 struct kvm_get_htab_buf {
2401     struct kvm_get_htab_header header;
2402     /*
2403      * We require one extra byte for read
2404      */
2405     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2406 };
2407
2408 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2409 {
2410     int htab_fd;
2411     struct kvm_get_htab_fd ghf;
2412     struct kvm_get_htab_buf  *hpte_buf;
2413
2414     ghf.flags = 0;
2415     ghf.start_index = pte_index;
2416     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2417     if (htab_fd < 0) {
2418         goto error_out;
2419     }
2420
2421     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2422     /*
2423      * Read the hpte group
2424      */
2425     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2426         goto out_close;
2427     }
2428
2429     close(htab_fd);
2430     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2431
2432 out_close:
2433     g_free(hpte_buf);
2434     close(htab_fd);
2435 error_out:
2436     return 0;
2437 }
2438
2439 void kvmppc_hash64_free_pteg(uint64_t token)
2440 {
2441     struct kvm_get_htab_buf *htab_buf;
2442
2443     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2444                             hpte);
2445     g_free(htab_buf);
2446     return;
2447 }
2448
2449 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2450                              target_ulong pte0, target_ulong pte1)
2451 {
2452     int htab_fd;
2453     struct kvm_get_htab_fd ghf;
2454     struct kvm_get_htab_buf hpte_buf;
2455
2456     ghf.flags = 0;
2457     ghf.start_index = 0;     /* Ignored */
2458     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2459     if (htab_fd < 0) {
2460         goto error_out;
2461     }
2462
2463     hpte_buf.header.n_valid = 1;
2464     hpte_buf.header.n_invalid = 0;
2465     hpte_buf.header.index = pte_index;
2466     hpte_buf.hpte[0] = pte0;
2467     hpte_buf.hpte[1] = pte1;
2468     /*
2469      * Write the hpte entry.
2470      * CAUTION: write() has the warn_unused_result attribute. Hence we
2471      * need to check the return value, even though we do nothing.
2472      */
2473     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2474         goto out_close;
2475     }
2476
2477 out_close:
2478     close(htab_fd);
2479     return;
2480
2481 error_out:
2482     return;
2483 }
2484
2485 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2486                              uint64_t address, uint32_t data)
2487 {
2488     return 0;
2489 }
2490
2491 int kvm_arch_msi_data_to_gsi(uint32_t data)
2492 {
2493     return data & 0xffff;
2494 }
2495
2496 int kvmppc_enable_hwrng(void)
2497 {
2498     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2499         return -1;
2500     }
2501
2502     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2503 }