target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41 #include "exec/gdbstub.h"
  42 #include "exec/memattrs.h"
  43 #include "sysemu/hostmem.h"
  44
  45 //#define DEBUG_KVM
  46
  47 #ifdef DEBUG_KVM
  48 #define DPRINTF(fmt, ...) \
  49     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  50 #else
  51 #define DPRINTF(fmt, ...) \
  52     do { } while (0)
  53 #endif
  54
  55 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  56
  57 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  58     KVM_CAP_LAST_INFO
  59 };
  60
  61 static int cap_interrupt_unset = false;
  62 static int cap_interrupt_level = false;
  63 static int cap_segstate;
  64 static int cap_booke_sregs;
  65 static int cap_ppc_smt;
  66 static int cap_ppc_rma;
  67 static int cap_spapr_tce;
  68 static int cap_spapr_multitce;
  69 static int cap_spapr_vfio;
  70 static int cap_hior;
  71 static int cap_one_reg;
  72 static int cap_epr;
  73 static int cap_ppc_watchdog;
  74 static int cap_papr;
  75 static int cap_htab_fd;
  76 static int cap_fixup_hcalls;
  77
  78 static uint32_t debug_inst_opcode;
  79
  80 /* XXX We have a race condition where we actually have a level triggered
  81  *     interrupt, but the infrastructure can't expose that yet, so the guest
  82  *     takes but ignores it, goes to sleep and never gets notified that there's
  83  *     still an interrupt pending.
  84  *
  85  *     As a quick workaround, let's just wake up again 20 ms after we injected
  86  *     an interrupt. That way we can assure that we're always reinjecting
  87  *     interrupts in case the guest swallowed them.
  88  */
  89 static QEMUTimer *idle_timer;
  90
  91 static void kvm_kick_cpu(void *opaque)
  92 {
  93     PowerPCCPU *cpu = opaque;
  94
  95     qemu_cpu_kick(CPU(cpu));
  96 }
  97
  98 static int kvm_ppc_register_host_cpu_type(void);
  99
 100 int kvm_arch_init(MachineState *ms, KVMState *s)
 101 {
 102     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 103     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 104     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 105     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 106     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 107     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 108     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 109     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 110     cap_spapr_vfio = false;
 111     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 112     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 113     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 114     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 115     /* Note: we don't set cap_papr here, because this capability is
 116      * only activated after this by kvmppc_set_papr() */
 117     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 118     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 119
 120     if (!cap_interrupt_level) {
 121         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 122                         "VM to stall at times!\n");
 123     }
 124
 125     kvm_ppc_register_host_cpu_type();
 126
 127     return 0;
 128 }
 129
 130 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 131 {
 132     CPUPPCState *cenv = &cpu->env;
 133     CPUState *cs = CPU(cpu);
 134     struct kvm_sregs sregs;
 135     int ret;
 136
 137     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 138         /* What we're really trying to say is "if we're on BookE, we use
 139            the native PVR for now". This is the only sane way to check
 140            it though, so we potentially confuse users that they can run
 141            BookE guests on BookS. Let's hope nobody dares enough :) */
 142         return 0;
 143     } else {
 144         if (!cap_segstate) {
 145             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 146             return -ENOSYS;
 147         }
 148     }
 149
 150     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 151     if (ret) {
 152         return ret;
 153     }
 154
 155     sregs.pvr = cenv->spr[SPR_PVR];
 156     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 157 }
 158
 159 /* Set up a shared TLB array with KVM */
 160 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 161 {
 162     CPUPPCState *env = &cpu->env;
 163     CPUState *cs = CPU(cpu);
 164     struct kvm_book3e_206_tlb_params params = {};
 165     struct kvm_config_tlb cfg = {};
 166     unsigned int entries = 0;
 167     int ret, i;
 168
 169     if (!kvm_enabled() ||
 170         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 171         return 0;
 172     }
 173
 174     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 175
 176     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 177         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 178         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 179         entries += params.tlb_sizes[i];
 180     }
 181
 182     assert(entries == env->nb_tlb);
 183     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 184
 185     env->tlb_dirty = true;
 186
 187     cfg.array = (uintptr_t)env->tlb.tlbm;
 188     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 189     cfg.params = (uintptr_t)&params;
 190     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 191
 192     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 193     if (ret < 0) {
 194         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 195                 __func__, strerror(-ret));
 196         return ret;
 197     }
 198
 199     env->kvm_sw_tlb = true;
 200     return 0;
 201 }
 202
 203
 204 #if defined(TARGET_PPC64)
 205 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 206                                        struct kvm_ppc_smmu_info *info)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210
 211     memset(info, 0, sizeof(*info));
 212
 213     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 214      * need to "guess" what the supported page sizes are.
 215      *
 216      * For that to work we make a few assumptions:
 217      *
 218      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 219      *   KVM which only supports 4K and 16M pages, but supports them
 220      *   regardless of the backing store characteritics. We also don't
 221      *   support 1T segments.
 222      *
 223      *   This is safe as if HV KVM ever supports that capability or PR
 224      *   KVM grows supports for more page/segment sizes, those versions
 225      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 226      *   will not hit this fallback
 227      *
 228      * - Else we are running HV KVM. This means we only support page
 229      *   sizes that fit in the backing store. Additionally we only
 230      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 231      *   P7 encodings for the SLB and hash table. Here too, we assume
 232      *   support for any newer processor will mean a kernel that
 233      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 234      *   this fallback.
 235      */
 236     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 237         /* No flags */
 238         info->flags = 0;
 239         info->slb_size = 64;
 240
 241         /* Standard 4k base page size segment */
 242         info->sps[0].page_shift = 12;
 243         info->sps[0].slb_enc = 0;
 244         info->sps[0].enc[0].page_shift = 12;
 245         info->sps[0].enc[0].pte_enc = 0;
 246
 247         /* Standard 16M large page size segment */
 248         info->sps[1].page_shift = 24;
 249         info->sps[1].slb_enc = SLB_VSID_L;
 250         info->sps[1].enc[0].page_shift = 24;
 251         info->sps[1].enc[0].pte_enc = 0;
 252     } else {
 253         int i = 0;
 254
 255         /* HV KVM has backing store size restrictions */
 256         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 257
 258         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 259             info->flags |= KVM_PPC_1T_SEGMENTS;
 260         }
 261
 262         if (env->mmu_model == POWERPC_MMU_2_06 ||
 263             env->mmu_model == POWERPC_MMU_2_07) {
 264             info->slb_size = 32;
 265         } else {
 266             info->slb_size = 64;
 267         }
 268
 269         /* Standard 4k base page size segment */
 270         info->sps[i].page_shift = 12;
 271         info->sps[i].slb_enc = 0;
 272         info->sps[i].enc[0].page_shift = 12;
 273         info->sps[i].enc[0].pte_enc = 0;
 274         i++;
 275
 276         /* 64K on MMU 2.06 and later */
 277         if (env->mmu_model == POWERPC_MMU_2_06 ||
 278             env->mmu_model == POWERPC_MMU_2_07) {
 279             info->sps[i].page_shift = 16;
 280             info->sps[i].slb_enc = 0x110;
 281             info->sps[i].enc[0].page_shift = 16;
 282             info->sps[i].enc[0].pte_enc = 1;
 283             i++;
 284         }
 285
 286         /* Standard 16M large page size segment */
 287         info->sps[i].page_shift = 24;
 288         info->sps[i].slb_enc = SLB_VSID_L;
 289         info->sps[i].enc[0].page_shift = 24;
 290         info->sps[i].enc[0].pte_enc = 0;
 291     }
 292 }
 293
 294 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 295 {
 296     CPUState *cs = CPU(cpu);
 297     int ret;
 298
 299     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 300         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 301         if (ret == 0) {
 302             return;
 303         }
 304     }
 305
 306     kvm_get_fallback_smmu_info(cpu, info);
 307 }
 308
 309 static long gethugepagesize(const char *mem_path)
 310 {
 311     struct statfs fs;
 312     int ret;
 313
 314     do {
 315         ret = statfs(mem_path, &fs);
 316     } while (ret != 0 && errno == EINTR);
 317
 318     if (ret != 0) {
 319         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 320                 strerror(errno));
 321         exit(1);
 322     }
 323
 324 #define HUGETLBFS_MAGIC       0x958458f6
 325
 326     if (fs.f_type != HUGETLBFS_MAGIC) {
 327         /* Explicit mempath, but it's ordinary pages */
 328         return getpagesize();
 329     }
 330
 331     /* It's hugepage, return the huge page size */
 332     return fs.f_bsize;
 333 }
 334
 335 static int find_max_supported_pagesize(Object *obj, void *opaque)
 336 {
 337     char *mem_path;
 338     long *hpsize_min = opaque;
 339
 340     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 341         mem_path = object_property_get_str(obj, "mem-path", NULL);
 342         if (mem_path) {
 343             long hpsize = gethugepagesize(mem_path);
 344             if (hpsize < *hpsize_min) {
 345                 *hpsize_min = hpsize;
 346             }
 347         } else {
 348             *hpsize_min = getpagesize();
 349         }
 350     }
 351
 352     return 0;
 353 }
 354
 355 static long getrampagesize(void)
 356 {
 357     long hpsize = LONG_MAX;
 358     Object *memdev_root;
 359
 360     if (mem_path) {
 361         return gethugepagesize(mem_path);
 362     }
 363
 364     /* it's possible we have memory-backend objects with
 365      * hugepage-backed RAM. these may get mapped into system
 366      * address space via -numa parameters or memory hotplug
 367      * hooks. we want to take these into account, but we
 368      * also want to make sure these supported hugepage
 369      * sizes are applicable across the entire range of memory
 370      * we may boot from, so we take the min across all
 371      * backends, and assume normal pages in cases where a
 372      * backend isn't backed by hugepages.
 373      */
 374     memdev_root = object_resolve_path("/objects", NULL);
 375     if (!memdev_root) {
 376         return getpagesize();
 377     }
 378
 379     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 380
 381     return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 382 }
 383
 384 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 385 {
 386     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 387         return true;
 388     }
 389
 390     return (1ul << shift) <= rampgsize;
 391 }
 392
 393 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 394 {
 395     static struct kvm_ppc_smmu_info smmu_info;
 396     static bool has_smmu_info;
 397     CPUPPCState *env = &cpu->env;
 398     long rampagesize;
 399     int iq, ik, jq, jk;
 400
 401     /* We only handle page sizes for 64-bit server guests for now */
 402     if (!(env->mmu_model & POWERPC_MMU_64)) {
 403         return;
 404     }
 405
 406     /* Collect MMU info from kernel if not already */
 407     if (!has_smmu_info) {
 408         kvm_get_smmu_info(cpu, &smmu_info);
 409         has_smmu_info = true;
 410     }
 411
 412     rampagesize = getrampagesize();
 413
 414     /* Convert to QEMU form */
 415     memset(&env->sps, 0, sizeof(env->sps));
 416
 417     /* If we have HV KVM, we need to forbid CI large pages if our
 418      * host page size is smaller than 64K.
 419      */
 420     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 421         env->ci_large_pages = getpagesize() >= 0x10000;
 422     }
 423
 424     /*
 425      * XXX This loop should be an entry wide AND of the capabilities that
 426      *     the selected CPU has with the capabilities that KVM supports.
 427      */
 428     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 429         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 430         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 431
 432         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 433                                  ksps->page_shift)) {
 434             continue;
 435         }
 436         qsps->page_shift = ksps->page_shift;
 437         qsps->slb_enc = ksps->slb_enc;
 438         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 439             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 440                                      ksps->enc[jk].page_shift)) {
 441                 continue;
 442             }
 443             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 444             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 445             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 446                 break;
 447             }
 448         }
 449         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 450             break;
 451         }
 452     }
 453     env->slb_nr = smmu_info.slb_size;
 454     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 455         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 456     }
 457 }
 458 #else /* defined (TARGET_PPC64) */
 459
 460 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 461 {
 462 }
 463
 464 #endif /* !defined (TARGET_PPC64) */
 465
 466 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 467 {
 468     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 469 }
 470
 471 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 472  * book3s supports only 1 watchpoint, so array size
 473  * of 4 is sufficient for now.
 474  */
 475 #define MAX_HW_BKPTS 4
 476
 477 static struct HWBreakpoint {
 478     target_ulong addr;
 479     int type;
 480 } hw_debug_points[MAX_HW_BKPTS];
 481
 482 static CPUWatchpoint hw_watchpoint;
 483
 484 /* Default there is no breakpoint and watchpoint supported */
 485 static int max_hw_breakpoint;
 486 static int max_hw_watchpoint;
 487 static int nb_hw_breakpoint;
 488 static int nb_hw_watchpoint;
 489
 490 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 491 {
 492     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 493         max_hw_breakpoint = 2;
 494         max_hw_watchpoint = 2;
 495     }
 496
 497     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 498         fprintf(stderr, "Error initializing h/w breakpoints\n");
 499         return;
 500     }
 501 }
 502
 503 int kvm_arch_init_vcpu(CPUState *cs)
 504 {
 505     PowerPCCPU *cpu = POWERPC_CPU(cs);
 506     CPUPPCState *cenv = &cpu->env;
 507     int ret;
 508
 509     /* Gather server mmu info from KVM and update the CPU state */
 510     kvm_fixup_page_sizes(cpu);
 511
 512     /* Synchronize sregs with kvm */
 513     ret = kvm_arch_sync_sregs(cpu);
 514     if (ret) {
 515         return ret;
 516     }
 517
 518     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 519
 520     /* Some targets support access to KVM's guest TLB. */
 521     switch (cenv->mmu_model) {
 522     case POWERPC_MMU_BOOKE206:
 523         ret = kvm_booke206_tlb_init(cpu);
 524         break;
 525     default:
 526         break;
 527     }
 528
 529     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 530     kvmppc_hw_debug_points_init(cenv);
 531
 532     return ret;
 533 }
 534
 535 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 536 {
 537     CPUPPCState *env = &cpu->env;
 538     CPUState *cs = CPU(cpu);
 539     struct kvm_dirty_tlb dirty_tlb;
 540     unsigned char *bitmap;
 541     int ret;
 542
 543     if (!env->kvm_sw_tlb) {
 544         return;
 545     }
 546
 547     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 548     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 549
 550     dirty_tlb.bitmap = (uintptr_t)bitmap;
 551     dirty_tlb.num_dirty = env->nb_tlb;
 552
 553     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 554     if (ret) {
 555         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 556                 __func__, strerror(-ret));
 557     }
 558
 559     g_free(bitmap);
 560 }
 561
 562 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 563 {
 564     PowerPCCPU *cpu = POWERPC_CPU(cs);
 565     CPUPPCState *env = &cpu->env;
 566     union {
 567         uint32_t u32;
 568         uint64_t u64;
 569     } val;
 570     struct kvm_one_reg reg = {
 571         .id = id,
 572         .addr = (uintptr_t) &val,
 573     };
 574     int ret;
 575
 576     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 577     if (ret != 0) {
 578         trace_kvm_failed_spr_get(spr, strerror(errno));
 579     } else {
 580         switch (id & KVM_REG_SIZE_MASK) {
 581         case KVM_REG_SIZE_U32:
 582             env->spr[spr] = val.u32;
 583             break;
 584
 585         case KVM_REG_SIZE_U64:
 586             env->spr[spr] = val.u64;
 587             break;
 588
 589         default:
 590             /* Don't handle this size yet */
 591             abort();
 592         }
 593     }
 594 }
 595
 596 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 597 {
 598     PowerPCCPU *cpu = POWERPC_CPU(cs);
 599     CPUPPCState *env = &cpu->env;
 600     union {
 601         uint32_t u32;
 602         uint64_t u64;
 603     } val;
 604     struct kvm_one_reg reg = {
 605         .id = id,
 606         .addr = (uintptr_t) &val,
 607     };
 608     int ret;
 609
 610     switch (id & KVM_REG_SIZE_MASK) {
 611     case KVM_REG_SIZE_U32:
 612         val.u32 = env->spr[spr];
 613         break;
 614
 615     case KVM_REG_SIZE_U64:
 616         val.u64 = env->spr[spr];
 617         break;
 618
 619     default:
 620         /* Don't handle this size yet */
 621         abort();
 622     }
 623
 624     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 625     if (ret != 0) {
 626         trace_kvm_failed_spr_set(spr, strerror(errno));
 627     }
 628 }
 629
 630 static int kvm_put_fp(CPUState *cs)
 631 {
 632     PowerPCCPU *cpu = POWERPC_CPU(cs);
 633     CPUPPCState *env = &cpu->env;
 634     struct kvm_one_reg reg;
 635     int i;
 636     int ret;
 637
 638     if (env->insns_flags & PPC_FLOAT) {
 639         uint64_t fpscr = env->fpscr;
 640         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 641
 642         reg.id = KVM_REG_PPC_FPSCR;
 643         reg.addr = (uintptr_t)&fpscr;
 644         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 645         if (ret < 0) {
 646             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 647             return ret;
 648         }
 649
 650         for (i = 0; i < 32; i++) {
 651             uint64_t vsr[2];
 652
 653             vsr[0] = float64_val(env->fpr[i]);
 654             vsr[1] = env->vsr[i];
 655             reg.addr = (uintptr_t) &vsr;
 656             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 657
 658             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 659             if (ret < 0) {
 660                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 661                         i, strerror(errno));
 662                 return ret;
 663             }
 664         }
 665     }
 666
 667     if (env->insns_flags & PPC_ALTIVEC) {
 668         reg.id = KVM_REG_PPC_VSCR;
 669         reg.addr = (uintptr_t)&env->vscr;
 670         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 671         if (ret < 0) {
 672             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 673             return ret;
 674         }
 675
 676         for (i = 0; i < 32; i++) {
 677             reg.id = KVM_REG_PPC_VR(i);
 678             reg.addr = (uintptr_t)&env->avr[i];
 679             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 680             if (ret < 0) {
 681                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 682                 return ret;
 683             }
 684         }
 685     }
 686
 687     return 0;
 688 }
 689
 690 static int kvm_get_fp(CPUState *cs)
 691 {
 692     PowerPCCPU *cpu = POWERPC_CPU(cs);
 693     CPUPPCState *env = &cpu->env;
 694     struct kvm_one_reg reg;
 695     int i;
 696     int ret;
 697
 698     if (env->insns_flags & PPC_FLOAT) {
 699         uint64_t fpscr;
 700         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 701
 702         reg.id = KVM_REG_PPC_FPSCR;
 703         reg.addr = (uintptr_t)&fpscr;
 704         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 705         if (ret < 0) {
 706             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 707             return ret;
 708         } else {
 709             env->fpscr = fpscr;
 710         }
 711
 712         for (i = 0; i < 32; i++) {
 713             uint64_t vsr[2];
 714
 715             reg.addr = (uintptr_t) &vsr;
 716             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 717
 718             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 719             if (ret < 0) {
 720                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 721                         vsx ? "VSR" : "FPR", i, strerror(errno));
 722                 return ret;
 723             } else {
 724                 env->fpr[i] = vsr[0];
 725                 if (vsx) {
 726                     env->vsr[i] = vsr[1];
 727                 }
 728             }
 729         }
 730     }
 731
 732     if (env->insns_flags & PPC_ALTIVEC) {
 733         reg.id = KVM_REG_PPC_VSCR;
 734         reg.addr = (uintptr_t)&env->vscr;
 735         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 736         if (ret < 0) {
 737             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 738             return ret;
 739         }
 740
 741         for (i = 0; i < 32; i++) {
 742             reg.id = KVM_REG_PPC_VR(i);
 743             reg.addr = (uintptr_t)&env->avr[i];
 744             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 745             if (ret < 0) {
 746                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 747                         i, strerror(errno));
 748                 return ret;
 749             }
 750         }
 751     }
 752
 753     return 0;
 754 }
 755
 756 #if defined(TARGET_PPC64)
 757 static int kvm_get_vpa(CPUState *cs)
 758 {
 759     PowerPCCPU *cpu = POWERPC_CPU(cs);
 760     CPUPPCState *env = &cpu->env;
 761     struct kvm_one_reg reg;
 762     int ret;
 763
 764     reg.id = KVM_REG_PPC_VPA_ADDR;
 765     reg.addr = (uintptr_t)&env->vpa_addr;
 766     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 767     if (ret < 0) {
 768         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 769         return ret;
 770     }
 771
 772     assert((uintptr_t)&env->slb_shadow_size
 773            == ((uintptr_t)&env->slb_shadow_addr + 8));
 774     reg.id = KVM_REG_PPC_VPA_SLB;
 775     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 776     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 777     if (ret < 0) {
 778         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 779                 strerror(errno));
 780         return ret;
 781     }
 782
 783     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 784     reg.id = KVM_REG_PPC_VPA_DTL;
 785     reg.addr = (uintptr_t)&env->dtl_addr;
 786     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 787     if (ret < 0) {
 788         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 789                 strerror(errno));
 790         return ret;
 791     }
 792
 793     return 0;
 794 }
 795
 796 static int kvm_put_vpa(CPUState *cs)
 797 {
 798     PowerPCCPU *cpu = POWERPC_CPU(cs);
 799     CPUPPCState *env = &cpu->env;
 800     struct kvm_one_reg reg;
 801     int ret;
 802
 803     /* SLB shadow or DTL can't be registered unless a master VPA is
 804      * registered.  That means when restoring state, if a VPA *is*
 805      * registered, we need to set that up first.  If not, we need to
 806      * deregister the others before deregistering the master VPA */
 807     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 808
 809     if (env->vpa_addr) {
 810         reg.id = KVM_REG_PPC_VPA_ADDR;
 811         reg.addr = (uintptr_t)&env->vpa_addr;
 812         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 813         if (ret < 0) {
 814             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 815             return ret;
 816         }
 817     }
 818
 819     assert((uintptr_t)&env->slb_shadow_size
 820            == ((uintptr_t)&env->slb_shadow_addr + 8));
 821     reg.id = KVM_REG_PPC_VPA_SLB;
 822     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 823     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 824     if (ret < 0) {
 825         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 826         return ret;
 827     }
 828
 829     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 830     reg.id = KVM_REG_PPC_VPA_DTL;
 831     reg.addr = (uintptr_t)&env->dtl_addr;
 832     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 833     if (ret < 0) {
 834         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 835                 strerror(errno));
 836         return ret;
 837     }
 838
 839     if (!env->vpa_addr) {
 840         reg.id = KVM_REG_PPC_VPA_ADDR;
 841         reg.addr = (uintptr_t)&env->vpa_addr;
 842         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 843         if (ret < 0) {
 844             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 845             return ret;
 846         }
 847     }
 848
 849     return 0;
 850 }
 851 #endif /* TARGET_PPC64 */
 852
 853 int kvm_arch_put_registers(CPUState *cs, int level)
 854 {
 855     PowerPCCPU *cpu = POWERPC_CPU(cs);
 856     CPUPPCState *env = &cpu->env;
 857     struct kvm_regs regs;
 858     int ret;
 859     int i;
 860
 861     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 862     if (ret < 0) {
 863         return ret;
 864     }
 865
 866     regs.ctr = env->ctr;
 867     regs.lr  = env->lr;
 868     regs.xer = cpu_read_xer(env);
 869     regs.msr = env->msr;
 870     regs.pc = env->nip;
 871
 872     regs.srr0 = env->spr[SPR_SRR0];
 873     regs.srr1 = env->spr[SPR_SRR1];
 874
 875     regs.sprg0 = env->spr[SPR_SPRG0];
 876     regs.sprg1 = env->spr[SPR_SPRG1];
 877     regs.sprg2 = env->spr[SPR_SPRG2];
 878     regs.sprg3 = env->spr[SPR_SPRG3];
 879     regs.sprg4 = env->spr[SPR_SPRG4];
 880     regs.sprg5 = env->spr[SPR_SPRG5];
 881     regs.sprg6 = env->spr[SPR_SPRG6];
 882     regs.sprg7 = env->spr[SPR_SPRG7];
 883
 884     regs.pid = env->spr[SPR_BOOKE_PID];
 885
 886     for (i = 0;i < 32; i++)
 887         regs.gpr[i] = env->gpr[i];
 888
 889     regs.cr = 0;
 890     for (i = 0; i < 8; i++) {
 891         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 892     }
 893
 894     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 895     if (ret < 0)
 896         return ret;
 897
 898     kvm_put_fp(cs);
 899
 900     if (env->tlb_dirty) {
 901         kvm_sw_tlb_put(cpu);
 902         env->tlb_dirty = false;
 903     }
 904
 905     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 906         struct kvm_sregs sregs;
 907
 908         sregs.pvr = env->spr[SPR_PVR];
 909
 910         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 911
 912         /* Sync SLB */
 913 #ifdef TARGET_PPC64
 914         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 915             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 916             if (env->slb[i].esid & SLB_ESID_V) {
 917                 sregs.u.s.ppc64.slb[i].slbe |= i;
 918             }
 919             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 920         }
 921 #endif
 922
 923         /* Sync SRs */
 924         for (i = 0; i < 16; i++) {
 925             sregs.u.s.ppc32.sr[i] = env->sr[i];
 926         }
 927
 928         /* Sync BATs */
 929         for (i = 0; i < 8; i++) {
 930             /* Beware. We have to swap upper and lower bits here */
 931             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 932                 | env->DBAT[1][i];
 933             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 934                 | env->IBAT[1][i];
 935         }
 936
 937         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 938         if (ret) {
 939             return ret;
 940         }
 941     }
 942
 943     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 944         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 945     }
 946
 947     if (cap_one_reg) {
 948         int i;
 949
 950         /* We deliberately ignore errors here, for kernels which have
 951          * the ONE_REG calls, but don't support the specific
 952          * registers, there's a reasonable chance things will still
 953          * work, at least until we try to migrate. */
 954         for (i = 0; i < 1024; i++) {
 955             uint64_t id = env->spr_cb[i].one_reg_id;
 956
 957             if (id != 0) {
 958                 kvm_put_one_spr(cs, id, i);
 959             }
 960         }
 961
 962 #ifdef TARGET_PPC64
 963         if (msr_ts) {
 964             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 965                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 966             }
 967             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 968                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 969             }
 970             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 971             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 972             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 973             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 974             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 975             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 976             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 977             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 978             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 979             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 980         }
 981
 982         if (cap_papr) {
 983             if (kvm_put_vpa(cs) < 0) {
 984                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 985             }
 986         }
 987
 988         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 989 #endif /* TARGET_PPC64 */
 990     }
 991
 992     return ret;
 993 }
 994
 995 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
 996 {
 997      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
 998 }
 999
1000 int kvm_arch_get_registers(CPUState *cs)
1001 {
1002     PowerPCCPU *cpu = POWERPC_CPU(cs);
1003     CPUPPCState *env = &cpu->env;
1004     struct kvm_regs regs;
1005     struct kvm_sregs sregs;
1006     uint32_t cr;
1007     int i, ret;
1008
1009     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1010     if (ret < 0)
1011         return ret;
1012
1013     cr = regs.cr;
1014     for (i = 7; i >= 0; i--) {
1015         env->crf[i] = cr & 15;
1016         cr >>= 4;
1017     }
1018
1019     env->ctr = regs.ctr;
1020     env->lr = regs.lr;
1021     cpu_write_xer(env, regs.xer);
1022     env->msr = regs.msr;
1023     env->nip = regs.pc;
1024
1025     env->spr[SPR_SRR0] = regs.srr0;
1026     env->spr[SPR_SRR1] = regs.srr1;
1027
1028     env->spr[SPR_SPRG0] = regs.sprg0;
1029     env->spr[SPR_SPRG1] = regs.sprg1;
1030     env->spr[SPR_SPRG2] = regs.sprg2;
1031     env->spr[SPR_SPRG3] = regs.sprg3;
1032     env->spr[SPR_SPRG4] = regs.sprg4;
1033     env->spr[SPR_SPRG5] = regs.sprg5;
1034     env->spr[SPR_SPRG6] = regs.sprg6;
1035     env->spr[SPR_SPRG7] = regs.sprg7;
1036
1037     env->spr[SPR_BOOKE_PID] = regs.pid;
1038
1039     for (i = 0;i < 32; i++)
1040         env->gpr[i] = regs.gpr[i];
1041
1042     kvm_get_fp(cs);
1043
1044     if (cap_booke_sregs) {
1045         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1046         if (ret < 0) {
1047             return ret;
1048         }
1049
1050         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1051             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1052             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1053             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1054             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1055             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1056             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1057             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1058             env->spr[SPR_DECR] = sregs.u.e.dec;
1059             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1060             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1061             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1062         }
1063
1064         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1065             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1066             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1067             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1068             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1069             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1070         }
1071
1072         if (sregs.u.e.features & KVM_SREGS_E_64) {
1073             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1074         }
1075
1076         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1077             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1078         }
1079
1080         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1081             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1082             kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1083             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1084             kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1085             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1086             kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1087             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1088             kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1089             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1090             kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1091             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1092             kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1093             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1094             kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1095             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1096             kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1097             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1098             kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1099             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1100             kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1101             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1102             kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1103             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1104             kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1105             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1106             kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1107             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1108             kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1109             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1110             kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1111             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1112             kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1113
1114             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1115                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1116                 kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1117                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1118                 kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1119                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1120                 kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1121             }
1122
1123             if (sregs.u.e.features & KVM_SREGS_E_PM) {
1124                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1125                 kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1126             }
1127
1128             if (sregs.u.e.features & KVM_SREGS_E_PC) {
1129                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1130                 kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1131                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1132                 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1133             }
1134         }
1135
1136         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1137             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1138             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1139             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1140             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1141             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1142             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1143             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1144             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1145             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1146             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1147         }
1148
1149         if (sregs.u.e.features & KVM_SREGS_EXP) {
1150             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1151         }
1152
1153         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1154             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1155             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1156         }
1157
1158         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1159             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1160             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1161             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1162
1163             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1164                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1165                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1166             }
1167         }
1168     }
1169
1170     if (cap_segstate) {
1171         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1172         if (ret < 0) {
1173             return ret;
1174         }
1175
1176         if (!env->external_htab) {
1177             ppc_store_sdr1(env, sregs.u.s.sdr1);
1178         }
1179
1180         /* Sync SLB */
1181 #ifdef TARGET_PPC64
1182         /*
1183          * The packed SLB array we get from KVM_GET_SREGS only contains
1184          * information about valid entries. So we flush our internal
1185          * copy to get rid of stale ones, then put all valid SLB entries
1186          * back in.
1187          */
1188         memset(env->slb, 0, sizeof(env->slb));
1189         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1190             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1191             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1192             /*
1193              * Only restore valid entries
1194              */
1195             if (rb & SLB_ESID_V) {
1196                 ppc_store_slb(env, rb, rs);
1197             }
1198         }
1199 #endif
1200
1201         /* Sync SRs */
1202         for (i = 0; i < 16; i++) {
1203             env->sr[i] = sregs.u.s.ppc32.sr[i];
1204         }
1205
1206         /* Sync BATs */
1207         for (i = 0; i < 8; i++) {
1208             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1209             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1210             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1211             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1212         }
1213     }
1214
1215     if (cap_hior) {
1216         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1217     }
1218
1219     if (cap_one_reg) {
1220         int i;
1221
1222         /* We deliberately ignore errors here, for kernels which have
1223          * the ONE_REG calls, but don't support the specific
1224          * registers, there's a reasonable chance things will still
1225          * work, at least until we try to migrate. */
1226         for (i = 0; i < 1024; i++) {
1227             uint64_t id = env->spr_cb[i].one_reg_id;
1228
1229             if (id != 0) {
1230                 kvm_get_one_spr(cs, id, i);
1231             }
1232         }
1233
1234 #ifdef TARGET_PPC64
1235         if (msr_ts) {
1236             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1237                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1238             }
1239             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1240                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1241             }
1242             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1243             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1244             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1245             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1246             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1247             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1248             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1249             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1250             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1251             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1252         }
1253
1254         if (cap_papr) {
1255             if (kvm_get_vpa(cs) < 0) {
1256                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1257             }
1258         }
1259
1260         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1261 #endif
1262     }
1263
1264     return 0;
1265 }
1266
1267 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1268 {
1269     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1270
1271     if (irq != PPC_INTERRUPT_EXT) {
1272         return 0;
1273     }
1274
1275     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1276         return 0;
1277     }
1278
1279     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1280
1281     return 0;
1282 }
1283
1284 #if defined(TARGET_PPCEMB)
1285 #define PPC_INPUT_INT PPC40x_INPUT_INT
1286 #elif defined(TARGET_PPC64)
1287 #define PPC_INPUT_INT PPC970_INPUT_INT
1288 #else
1289 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1290 #endif
1291
1292 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1293 {
1294     PowerPCCPU *cpu = POWERPC_CPU(cs);
1295     CPUPPCState *env = &cpu->env;
1296     int r;
1297     unsigned irq;
1298
1299     qemu_mutex_lock_iothread();
1300
1301     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1302      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1303     if (!cap_interrupt_level &&
1304         run->ready_for_interrupt_injection &&
1305         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1306         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1307     {
1308         /* For now KVM disregards the 'irq' argument. However, in the
1309          * future KVM could cache it in-kernel to avoid a heavyweight exit
1310          * when reading the UIC.
1311          */
1312         irq = KVM_INTERRUPT_SET;
1313
1314         DPRINTF("injected interrupt %d\n", irq);
1315         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1316         if (r < 0) {
1317             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1318         }
1319
1320         /* Always wake up soon in case the interrupt was level based */
1321         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1322                        (get_ticks_per_sec() / 50));
1323     }
1324
1325     /* We don't know if there are more interrupts pending after this. However,
1326      * the guest will return to userspace in the course of handling this one
1327      * anyways, so we will get a chance to deliver the rest. */
1328
1329     qemu_mutex_unlock_iothread();
1330 }
1331
1332 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1333 {
1334     return MEMTXATTRS_UNSPECIFIED;
1335 }
1336
1337 int kvm_arch_process_async_events(CPUState *cs)
1338 {
1339     return cs->halted;
1340 }
1341
1342 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1343 {
1344     CPUState *cs = CPU(cpu);
1345     CPUPPCState *env = &cpu->env;
1346
1347     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1348         cs->halted = 1;
1349         cs->exception_index = EXCP_HLT;
1350     }
1351
1352     return 0;
1353 }
1354
1355 /* map dcr access to existing qemu dcr emulation */
1356 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1357 {
1358     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1359         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1360
1361     return 0;
1362 }
1363
1364 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1365 {
1366     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1367         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1368
1369     return 0;
1370 }
1371
1372 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1373 {
1374     /* Mixed endian case is not handled */
1375     uint32_t sc = debug_inst_opcode;
1376
1377     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1378                             sizeof(sc), 0) ||
1379         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1380         return -EINVAL;
1381     }
1382
1383     return 0;
1384 }
1385
1386 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1387 {
1388     uint32_t sc;
1389
1390     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1391         sc != debug_inst_opcode ||
1392         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1393                             sizeof(sc), 1)) {
1394         return -EINVAL;
1395     }
1396
1397     return 0;
1398 }
1399
1400 static int find_hw_breakpoint(target_ulong addr, int type)
1401 {
1402     int n;
1403
1404     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1405            <= ARRAY_SIZE(hw_debug_points));
1406
1407     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1408         if (hw_debug_points[n].addr == addr &&
1409              hw_debug_points[n].type == type) {
1410             return n;
1411         }
1412     }
1413
1414     return -1;
1415 }
1416
1417 static int find_hw_watchpoint(target_ulong addr, int *flag)
1418 {
1419     int n;
1420
1421     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1422     if (n >= 0) {
1423         *flag = BP_MEM_ACCESS;
1424         return n;
1425     }
1426
1427     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1428     if (n >= 0) {
1429         *flag = BP_MEM_WRITE;
1430         return n;
1431     }
1432
1433     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1434     if (n >= 0) {
1435         *flag = BP_MEM_READ;
1436         return n;
1437     }
1438
1439     return -1;
1440 }
1441
1442 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1443                                   target_ulong len, int type)
1444 {
1445     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1446         return -ENOBUFS;
1447     }
1448
1449     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1450     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1451
1452     switch (type) {
1453     case GDB_BREAKPOINT_HW:
1454         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1455             return -ENOBUFS;
1456         }
1457
1458         if (find_hw_breakpoint(addr, type) >= 0) {
1459             return -EEXIST;
1460         }
1461
1462         nb_hw_breakpoint++;
1463         break;
1464
1465     case GDB_WATCHPOINT_WRITE:
1466     case GDB_WATCHPOINT_READ:
1467     case GDB_WATCHPOINT_ACCESS:
1468         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1469             return -ENOBUFS;
1470         }
1471
1472         if (find_hw_breakpoint(addr, type) >= 0) {
1473             return -EEXIST;
1474         }
1475
1476         nb_hw_watchpoint++;
1477         break;
1478
1479     default:
1480         return -ENOSYS;
1481     }
1482
1483     return 0;
1484 }
1485
1486 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1487                                   target_ulong len, int type)
1488 {
1489     int n;
1490
1491     n = find_hw_breakpoint(addr, type);
1492     if (n < 0) {
1493         return -ENOENT;
1494     }
1495
1496     switch (type) {
1497     case GDB_BREAKPOINT_HW:
1498         nb_hw_breakpoint--;
1499         break;
1500
1501     case GDB_WATCHPOINT_WRITE:
1502     case GDB_WATCHPOINT_READ:
1503     case GDB_WATCHPOINT_ACCESS:
1504         nb_hw_watchpoint--;
1505         break;
1506
1507     default:
1508         return -ENOSYS;
1509     }
1510     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1511
1512     return 0;
1513 }
1514
1515 void kvm_arch_remove_all_hw_breakpoints(void)
1516 {
1517     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1518 }
1519
1520 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1521 {
1522     int n;
1523
1524     /* Software Breakpoint updates */
1525     if (kvm_sw_breakpoints_active(cs)) {
1526         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1527     }
1528
1529     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1530            <= ARRAY_SIZE(hw_debug_points));
1531     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1532
1533     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1534         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1535         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1536         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1537             switch (hw_debug_points[n].type) {
1538             case GDB_BREAKPOINT_HW:
1539                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1540                 break;
1541             case GDB_WATCHPOINT_WRITE:
1542                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1543                 break;
1544             case GDB_WATCHPOINT_READ:
1545                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1546                 break;
1547             case GDB_WATCHPOINT_ACCESS:
1548                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1549                                         KVMPPC_DEBUG_WATCH_READ;
1550                 break;
1551             default:
1552                 cpu_abort(cs, "Unsupported breakpoint type\n");
1553             }
1554             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1555         }
1556     }
1557 }
1558
1559 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1560 {
1561     CPUState *cs = CPU(cpu);
1562     CPUPPCState *env = &cpu->env;
1563     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1564     int handle = 0;
1565     int n;
1566     int flag = 0;
1567
1568     if (cs->singlestep_enabled) {
1569         handle = 1;
1570     } else if (arch_info->status) {
1571         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1572             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1573                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1574                 if (n >= 0) {
1575                     handle = 1;
1576                 }
1577             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1578                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1579                 n = find_hw_watchpoint(arch_info->address,  &flag);
1580                 if (n >= 0) {
1581                     handle = 1;
1582                     cs->watchpoint_hit = &hw_watchpoint;
1583                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1584                     hw_watchpoint.flags = flag;
1585                 }
1586             }
1587         }
1588     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1589         handle = 1;
1590     } else {
1591         /* QEMU is not able to handle debug exception, so inject
1592          * program exception to guest;
1593          * Yes program exception NOT debug exception !!
1594          * When QEMU is using debug resources then debug exception must
1595          * be always set. To achieve this we set MSR_DE and also set
1596          * MSRP_DEP so guest cannot change MSR_DE.
1597          * When emulating debug resource for guest we want guest
1598          * to control MSR_DE (enable/disable debug interrupt on need).
1599          * Supporting both configurations are NOT possible.
1600          * So the result is that we cannot share debug resources
1601          * between QEMU and Guest on BOOKE architecture.
1602          * In the current design QEMU gets the priority over guest,
1603          * this means that if QEMU is using debug resources then guest
1604          * cannot use them;
1605          * For software breakpoint QEMU uses a privileged instruction;
1606          * So there cannot be any reason that we are here for guest
1607          * set debug exception, only possibility is guest executed a
1608          * privileged / illegal instruction and that's why we are
1609          * injecting a program interrupt.
1610          */
1611
1612         cpu_synchronize_state(cs);
1613         /* env->nip is PC, so increment this by 4 to use
1614          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1615          */
1616         env->nip += 4;
1617         cs->exception_index = POWERPC_EXCP_PROGRAM;
1618         env->error_code = POWERPC_EXCP_INVAL;
1619         ppc_cpu_do_interrupt(cs);
1620     }
1621
1622     return handle;
1623 }
1624
1625 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1626 {
1627     PowerPCCPU *cpu = POWERPC_CPU(cs);
1628     CPUPPCState *env = &cpu->env;
1629     int ret;
1630
1631     qemu_mutex_lock_iothread();
1632
1633     switch (run->exit_reason) {
1634     case KVM_EXIT_DCR:
1635         if (run->dcr.is_write) {
1636             DPRINTF("handle dcr write\n");
1637             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1638         } else {
1639             DPRINTF("handle dcr read\n");
1640             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1641         }
1642         break;
1643     case KVM_EXIT_HLT:
1644         DPRINTF("handle halt\n");
1645         ret = kvmppc_handle_halt(cpu);
1646         break;
1647 #if defined(TARGET_PPC64)
1648     case KVM_EXIT_PAPR_HCALL:
1649         DPRINTF("handle PAPR hypercall\n");
1650         run->papr_hcall.ret = spapr_hypercall(cpu,
1651                                               run->papr_hcall.nr,
1652                                               run->papr_hcall.args);
1653         ret = 0;
1654         break;
1655 #endif
1656     case KVM_EXIT_EPR:
1657         DPRINTF("handle epr\n");
1658         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1659         ret = 0;
1660         break;
1661     case KVM_EXIT_WATCHDOG:
1662         DPRINTF("handle watchdog expiry\n");
1663         watchdog_perform_action();
1664         ret = 0;
1665         break;
1666
1667     case KVM_EXIT_DEBUG:
1668         DPRINTF("handle debug exception\n");
1669         if (kvm_handle_debug(cpu, run)) {
1670             ret = EXCP_DEBUG;
1671             break;
1672         }
1673         /* re-enter, this exception was guest-internal */
1674         ret = 0;
1675         break;
1676
1677     default:
1678         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1679         ret = -1;
1680         break;
1681     }
1682
1683     qemu_mutex_unlock_iothread();
1684     return ret;
1685 }
1686
1687 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1688 {
1689     CPUState *cs = CPU(cpu);
1690     uint32_t bits = tsr_bits;
1691     struct kvm_one_reg reg = {
1692         .id = KVM_REG_PPC_OR_TSR,
1693         .addr = (uintptr_t) &bits,
1694     };
1695
1696     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1697 }
1698
1699 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1700 {
1701
1702     CPUState *cs = CPU(cpu);
1703     uint32_t bits = tsr_bits;
1704     struct kvm_one_reg reg = {
1705         .id = KVM_REG_PPC_CLEAR_TSR,
1706         .addr = (uintptr_t) &bits,
1707     };
1708
1709     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1710 }
1711
1712 int kvmppc_set_tcr(PowerPCCPU *cpu)
1713 {
1714     CPUState *cs = CPU(cpu);
1715     CPUPPCState *env = &cpu->env;
1716     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1717
1718     struct kvm_one_reg reg = {
1719         .id = KVM_REG_PPC_TCR,
1720         .addr = (uintptr_t) &tcr,
1721     };
1722
1723     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1724 }
1725
1726 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1727 {
1728     CPUState *cs = CPU(cpu);
1729     int ret;
1730
1731     if (!kvm_enabled()) {
1732         return -1;
1733     }
1734
1735     if (!cap_ppc_watchdog) {
1736         printf("warning: KVM does not support watchdog");
1737         return -1;
1738     }
1739
1740     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1741     if (ret < 0) {
1742         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1743                 __func__, strerror(-ret));
1744         return ret;
1745     }
1746
1747     return ret;
1748 }
1749
1750 static int read_cpuinfo(const char *field, char *value, int len)
1751 {
1752     FILE *f;
1753     int ret = -1;
1754     int field_len = strlen(field);
1755     char line[512];
1756
1757     f = fopen("/proc/cpuinfo", "r");
1758     if (!f) {
1759         return -1;
1760     }
1761
1762     do {
1763         if (!fgets(line, sizeof(line), f)) {
1764             break;
1765         }
1766         if (!strncmp(line, field, field_len)) {
1767             pstrcpy(value, len, line);
1768             ret = 0;
1769             break;
1770         }
1771     } while(*line);
1772
1773     fclose(f);
1774
1775     return ret;
1776 }
1777
1778 uint32_t kvmppc_get_tbfreq(void)
1779 {
1780     char line[512];
1781     char *ns;
1782     uint32_t retval = get_ticks_per_sec();
1783
1784     if (read_cpuinfo("timebase", line, sizeof(line))) {
1785         return retval;
1786     }
1787
1788     if (!(ns = strchr(line, ':'))) {
1789         return retval;
1790     }
1791
1792     ns++;
1793
1794     return atoi(ns);
1795 }
1796
1797 bool kvmppc_get_host_serial(char **value)
1798 {
1799     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1800                                NULL);
1801 }
1802
1803 bool kvmppc_get_host_model(char **value)
1804 {
1805     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1806 }
1807
1808 /* Try to find a device tree node for a CPU with clock-frequency property */
1809 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1810 {
1811     struct dirent *dirp;
1812     DIR *dp;
1813
1814     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1815         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1816         return -1;
1817     }
1818
1819     buf[0] = '\0';
1820     while ((dirp = readdir(dp)) != NULL) {
1821         FILE *f;
1822         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1823                  dirp->d_name);
1824         f = fopen(buf, "r");
1825         if (f) {
1826             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1827             fclose(f);
1828             break;
1829         }
1830         buf[0] = '\0';
1831     }
1832     closedir(dp);
1833     if (buf[0] == '\0') {
1834         printf("Unknown host!\n");
1835         return -1;
1836     }
1837
1838     return 0;
1839 }
1840
1841 static uint64_t kvmppc_read_int_dt(const char *filename)
1842 {
1843     union {
1844         uint32_t v32;
1845         uint64_t v64;
1846     } u;
1847     FILE *f;
1848     int len;
1849
1850     f = fopen(filename, "rb");
1851     if (!f) {
1852         return -1;
1853     }
1854
1855     len = fread(&u, 1, sizeof(u), f);
1856     fclose(f);
1857     switch (len) {
1858     case 4:
1859         /* property is a 32-bit quantity */
1860         return be32_to_cpu(u.v32);
1861     case 8:
1862         return be64_to_cpu(u.v64);
1863     }
1864
1865     return 0;
1866 }
1867
1868 /* Read a CPU node property from the host device tree that's a single
1869  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1870  * (can't find or open the property, or doesn't understand the
1871  * format) */
1872 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1873 {
1874     char buf[PATH_MAX], *tmp;
1875     uint64_t val;
1876
1877     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1878         return -1;
1879     }
1880
1881     tmp = g_strdup_printf("%s/%s", buf, propname);
1882     val = kvmppc_read_int_dt(tmp);
1883     g_free(tmp);
1884
1885     return val;
1886 }
1887
1888 uint64_t kvmppc_get_clockfreq(void)
1889 {
1890     return kvmppc_read_int_cpu_dt("clock-frequency");
1891 }
1892
1893 uint32_t kvmppc_get_vmx(void)
1894 {
1895     return kvmppc_read_int_cpu_dt("ibm,vmx");
1896 }
1897
1898 uint32_t kvmppc_get_dfp(void)
1899 {
1900     return kvmppc_read_int_cpu_dt("ibm,dfp");
1901 }
1902
1903 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1904  {
1905      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1906      CPUState *cs = CPU(cpu);
1907
1908     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1909         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1910         return 0;
1911     }
1912
1913     return 1;
1914 }
1915
1916 int kvmppc_get_hasidle(CPUPPCState *env)
1917 {
1918     struct kvm_ppc_pvinfo pvinfo;
1919
1920     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1921         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1922         return 1;
1923     }
1924
1925     return 0;
1926 }
1927
1928 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1929 {
1930     uint32_t *hc = (uint32_t*)buf;
1931     struct kvm_ppc_pvinfo pvinfo;
1932
1933     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1934         memcpy(buf, pvinfo.hcall, buf_len);
1935         return 0;
1936     }
1937
1938     /*
1939      * Fallback to always fail hypercalls regardless of endianness:
1940      *
1941      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1942      *     li r3, -1
1943      *     b .+8       (becomes nop in wrong endian)
1944      *     bswap32(li r3, -1)
1945      */
1946
1947     hc[0] = cpu_to_be32(0x08000048);
1948     hc[1] = cpu_to_be32(0x3860ffff);
1949     hc[2] = cpu_to_be32(0x48000008);
1950     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1951
1952     return 0;
1953 }
1954
1955 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1956 {
1957     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1958 }
1959
1960 void kvmppc_enable_logical_ci_hcalls(void)
1961 {
1962     /*
1963      * FIXME: it would be nice if we could detect the cases where
1964      * we're using a device which requires the in kernel
1965      * implementation of these hcalls, but the kernel lacks them and
1966      * produce a warning.
1967      */
1968     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1969     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1970 }
1971
1972 void kvmppc_enable_set_mode_hcall(void)
1973 {
1974     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1975 }
1976
1977 void kvmppc_set_papr(PowerPCCPU *cpu)
1978 {
1979     CPUState *cs = CPU(cpu);
1980     int ret;
1981
1982     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1983     if (ret) {
1984         cpu_abort(cs, "This KVM version does not support PAPR\n");
1985     }
1986
1987     /* Update the capability flag so we sync the right information
1988      * with kvm */
1989     cap_papr = 1;
1990 }
1991
1992 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1993 {
1994     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1995 }
1996
1997 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1998 {
1999     CPUState *cs = CPU(cpu);
2000     int ret;
2001
2002     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2003     if (ret && mpic_proxy) {
2004         cpu_abort(cs, "This KVM version does not support EPR\n");
2005     }
2006 }
2007
2008 int kvmppc_smt_threads(void)
2009 {
2010     return cap_ppc_smt ? cap_ppc_smt : 1;
2011 }
2012
2013 #ifdef TARGET_PPC64
2014 off_t kvmppc_alloc_rma(void **rma)
2015 {
2016     off_t size;
2017     int fd;
2018     struct kvm_allocate_rma ret;
2019
2020     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2021      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2022      *                      not necessary on this hardware
2023      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2024      *
2025      * FIXME: We should allow the user to force contiguous RMA
2026      * allocation in the cap_ppc_rma==1 case.
2027      */
2028     if (cap_ppc_rma < 2) {
2029         return 0;
2030     }
2031
2032     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2033     if (fd < 0) {
2034         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2035                 strerror(errno));
2036         return -1;
2037     }
2038
2039     size = MIN(ret.rma_size, 256ul << 20);
2040
2041     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2042     if (*rma == MAP_FAILED) {
2043         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2044         return -1;
2045     };
2046
2047     return size;
2048 }
2049
2050 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2051 {
2052     struct kvm_ppc_smmu_info info;
2053     long rampagesize, best_page_shift;
2054     int i;
2055
2056     if (cap_ppc_rma >= 2) {
2057         return current_size;
2058     }
2059
2060     /* Find the largest hardware supported page size that's less than
2061      * or equal to the (logical) backing page size of guest RAM */
2062     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2063     rampagesize = getrampagesize();
2064     best_page_shift = 0;
2065
2066     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2067         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2068
2069         if (!sps->page_shift) {
2070             continue;
2071         }
2072
2073         if ((sps->page_shift > best_page_shift)
2074             && ((1UL << sps->page_shift) <= rampagesize)) {
2075             best_page_shift = sps->page_shift;
2076         }
2077     }
2078
2079     return MIN(current_size,
2080                1ULL << (best_page_shift + hash_shift - 7));
2081 }
2082 #endif
2083
2084 bool kvmppc_spapr_use_multitce(void)
2085 {
2086     return cap_spapr_multitce;
2087 }
2088
2089 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2090                               bool need_vfio)
2091 {
2092     struct kvm_create_spapr_tce args = {
2093         .liobn = liobn,
2094         .window_size = window_size,
2095     };
2096     long len;
2097     int fd;
2098     void *table;
2099
2100     /* Must set fd to -1 so we don't try to munmap when called for
2101      * destroying the table, which the upper layers -will- do
2102      */
2103     *pfd = -1;
2104     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2105         return NULL;
2106     }
2107
2108     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2109     if (fd < 0) {
2110         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2111                 liobn);
2112         return NULL;
2113     }
2114
2115     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2116     /* FIXME: round this up to page size */
2117
2118     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2119     if (table == MAP_FAILED) {
2120         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2121                 liobn);
2122         close(fd);
2123         return NULL;
2124     }
2125
2126     *pfd = fd;
2127     return table;
2128 }
2129
2130 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2131 {
2132     long len;
2133
2134     if (fd < 0) {
2135         return -1;
2136     }
2137
2138     len = nb_table * sizeof(uint64_t);
2139     if ((munmap(table, len) < 0) ||
2140         (close(fd) < 0)) {
2141         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2142                 strerror(errno));
2143         /* Leak the table */
2144     }
2145
2146     return 0;
2147 }
2148
2149 int kvmppc_reset_htab(int shift_hint)
2150 {
2151     uint32_t shift = shift_hint;
2152
2153     if (!kvm_enabled()) {
2154         /* Full emulation, tell caller to allocate htab itself */
2155         return 0;
2156     }
2157     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2158         int ret;
2159         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2160         if (ret == -ENOTTY) {
2161             /* At least some versions of PR KVM advertise the
2162              * capability, but don't implement the ioctl().  Oops.
2163              * Return 0 so that we allocate the htab in qemu, as is
2164              * correct for PR. */
2165             return 0;
2166         } else if (ret < 0) {
2167             return ret;
2168         }
2169         return shift;
2170     }
2171
2172     /* We have a kernel that predates the htab reset calls.  For PR
2173      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2174      * this era, it has allocated a 16MB fixed size hash table
2175      * already.  Kernels of this era have the GET_PVINFO capability
2176      * only on PR, so we use this hack to determine the right
2177      * answer */
2178     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2179         /* PR - tell caller to allocate htab */
2180         return 0;
2181     } else {
2182         /* HV - assume 16MB kernel allocated htab */
2183         return 24;
2184     }
2185 }
2186
2187 static inline uint32_t mfpvr(void)
2188 {
2189     uint32_t pvr;
2190
2191     asm ("mfpvr %0"
2192          : "=r"(pvr));
2193     return pvr;
2194 }
2195
2196 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2197 {
2198     if (on) {
2199         *word |= flags;
2200     } else {
2201         *word &= ~flags;
2202     }
2203 }
2204
2205 static void kvmppc_host_cpu_initfn(Object *obj)
2206 {
2207     assert(kvm_enabled());
2208 }
2209
2210 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2211 {
2212     DeviceClass *dc = DEVICE_CLASS(oc);
2213     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2214     uint32_t vmx = kvmppc_get_vmx();
2215     uint32_t dfp = kvmppc_get_dfp();
2216     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2217     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2218
2219     /* Now fix up the class with information we can query from the host */
2220     pcc->pvr = mfpvr();
2221
2222     if (vmx != -1) {
2223         /* Only override when we know what the host supports */
2224         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2225         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2226     }
2227     if (dfp != -1) {
2228         /* Only override when we know what the host supports */
2229         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2230     }
2231
2232     if (dcache_size != -1) {
2233         pcc->l1_dcache_size = dcache_size;
2234     }
2235
2236     if (icache_size != -1) {
2237         pcc->l1_icache_size = icache_size;
2238     }
2239
2240     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2241     dc->cannot_destroy_with_object_finalize_yet = true;
2242 }
2243
2244 bool kvmppc_has_cap_epr(void)
2245 {
2246     return cap_epr;
2247 }
2248
2249 bool kvmppc_has_cap_htab_fd(void)
2250 {
2251     return cap_htab_fd;
2252 }
2253
2254 bool kvmppc_has_cap_fixup_hcalls(void)
2255 {
2256     return cap_fixup_hcalls;
2257 }
2258
2259 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2260 {
2261     ObjectClass *oc = OBJECT_CLASS(pcc);
2262
2263     while (oc && !object_class_is_abstract(oc)) {
2264         oc = object_class_get_parent(oc);
2265     }
2266     assert(oc);
2267
2268     return POWERPC_CPU_CLASS(oc);
2269 }
2270
2271 static int kvm_ppc_register_host_cpu_type(void)
2272 {
2273     TypeInfo type_info = {
2274         .name = TYPE_HOST_POWERPC_CPU,
2275         .instance_init = kvmppc_host_cpu_initfn,
2276         .class_init = kvmppc_host_cpu_class_init,
2277     };
2278     uint32_t host_pvr = mfpvr();
2279     PowerPCCPUClass *pvr_pcc;
2280     DeviceClass *dc;
2281
2282     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2283     if (pvr_pcc == NULL) {
2284         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2285     }
2286     if (pvr_pcc == NULL) {
2287         return -1;
2288     }
2289     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2290     type_register(&type_info);
2291
2292     /* Register generic family CPU class for a family */
2293     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2294     dc = DEVICE_CLASS(pvr_pcc);
2295     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2296     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2297     type_register(&type_info);
2298
2299     return 0;
2300 }
2301
2302 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2303 {
2304     struct kvm_rtas_token_args args = {
2305         .token = token,
2306     };
2307
2308     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2309         return -ENOENT;
2310     }
2311
2312     strncpy(args.name, function, sizeof(args.name));
2313
2314     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2315 }
2316
2317 int kvmppc_get_htab_fd(bool write)
2318 {
2319     struct kvm_get_htab_fd s = {
2320         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2321         .start_index = 0,
2322     };
2323
2324     if (!cap_htab_fd) {
2325         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2326         return -1;
2327     }
2328
2329     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2330 }
2331
2332 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2333 {
2334     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2335     uint8_t buf[bufsize];
2336     ssize_t rc;
2337
2338     do {
2339         rc = read(fd, buf, bufsize);
2340         if (rc < 0) {
2341             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2342                     strerror(errno));
2343             return rc;
2344         } else if (rc) {
2345             uint8_t *buffer = buf;
2346             ssize_t n = rc;
2347             while (n) {
2348                 struct kvm_get_htab_header *head =
2349                     (struct kvm_get_htab_header *) buffer;
2350                 size_t chunksize = sizeof(*head) +
2351                      HASH_PTE_SIZE_64 * head->n_valid;
2352
2353                 qemu_put_be32(f, head->index);
2354                 qemu_put_be16(f, head->n_valid);
2355                 qemu_put_be16(f, head->n_invalid);
2356                 qemu_put_buffer(f, (void *)(head + 1),
2357                                 HASH_PTE_SIZE_64 * head->n_valid);
2358
2359                 buffer += chunksize;
2360                 n -= chunksize;
2361             }
2362         }
2363     } while ((rc != 0)
2364              && ((max_ns < 0)
2365                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2366
2367     return (rc == 0) ? 1 : 0;
2368 }
2369
2370 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2371                            uint16_t n_valid, uint16_t n_invalid)
2372 {
2373     struct kvm_get_htab_header *buf;
2374     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2375     ssize_t rc;
2376
2377     buf = alloca(chunksize);
2378     buf->index = index;
2379     buf->n_valid = n_valid;
2380     buf->n_invalid = n_invalid;
2381
2382     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2383
2384     rc = write(fd, buf, chunksize);
2385     if (rc < 0) {
2386         fprintf(stderr, "Error writing KVM hash table: %s\n",
2387                 strerror(errno));
2388         return rc;
2389     }
2390     if (rc != chunksize) {
2391         /* We should never get a short write on a single chunk */
2392         fprintf(stderr, "Short write, restoring KVM hash table\n");
2393         return -1;
2394     }
2395     return 0;
2396 }
2397
2398 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2399 {
2400     return true;
2401 }
2402
2403 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2404 {
2405     return 1;
2406 }
2407
2408 int kvm_arch_on_sigbus(int code, void *addr)
2409 {
2410     return 1;
2411 }
2412
2413 void kvm_arch_init_irq_routing(KVMState *s)
2414 {
2415 }
2416
2417 struct kvm_get_htab_buf {
2418     struct kvm_get_htab_header header;
2419     /*
2420      * We require one extra byte for read
2421      */
2422     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2423 };
2424
2425 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2426 {
2427     int htab_fd;
2428     struct kvm_get_htab_fd ghf;
2429     struct kvm_get_htab_buf  *hpte_buf;
2430
2431     ghf.flags = 0;
2432     ghf.start_index = pte_index;
2433     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2434     if (htab_fd < 0) {
2435         goto error_out;
2436     }
2437
2438     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2439     /*
2440      * Read the hpte group
2441      */
2442     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2443         goto out_close;
2444     }
2445
2446     close(htab_fd);
2447     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2448
2449 out_close:
2450     g_free(hpte_buf);
2451     close(htab_fd);
2452 error_out:
2453     return 0;
2454 }
2455
2456 void kvmppc_hash64_free_pteg(uint64_t token)
2457 {
2458     struct kvm_get_htab_buf *htab_buf;
2459
2460     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2461                             hpte);
2462     g_free(htab_buf);
2463     return;
2464 }
2465
2466 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2467                              target_ulong pte0, target_ulong pte1)
2468 {
2469     int htab_fd;
2470     struct kvm_get_htab_fd ghf;
2471     struct kvm_get_htab_buf hpte_buf;
2472
2473     ghf.flags = 0;
2474     ghf.start_index = 0;     /* Ignored */
2475     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2476     if (htab_fd < 0) {
2477         goto error_out;
2478     }
2479
2480     hpte_buf.header.n_valid = 1;
2481     hpte_buf.header.n_invalid = 0;
2482     hpte_buf.header.index = pte_index;
2483     hpte_buf.hpte[0] = pte0;
2484     hpte_buf.hpte[1] = pte1;
2485     /*
2486      * Write the hpte entry.
2487      * CAUTION: write() has the warn_unused_result attribute. Hence we
2488      * need to check the return value, even though we do nothing.
2489      */
2490     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2491         goto out_close;
2492     }
2493
2494 out_close:
2495     close(htab_fd);
2496     return;
2497
2498 error_out:
2499     return;
2500 }
2501
2502 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2503                              uint64_t address, uint32_t data, PCIDevice *dev)
2504 {
2505     return 0;
2506 }
2507
2508 int kvm_arch_msi_data_to_gsi(uint32_t data)
2509 {
2510     return data & 0xffff;
2511 }
2512
2513 int kvmppc_enable_hwrng(void)
2514 {
2515     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2516         return -1;
2517     }
2518
2519     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2520 }