target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41 #include "exec/gdbstub.h"
  42 #include "exec/memattrs.h"
  43 #include "sysemu/hostmem.h"
  44
  45 //#define DEBUG_KVM
  46
  47 #ifdef DEBUG_KVM
  48 #define DPRINTF(fmt, ...) \
  49     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  50 #else
  51 #define DPRINTF(fmt, ...) \
  52     do { } while (0)
  53 #endif
  54
  55 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  56
  57 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  58     KVM_CAP_LAST_INFO
  59 };
  60
  61 static int cap_interrupt_unset = false;
  62 static int cap_interrupt_level = false;
  63 static int cap_segstate;
  64 static int cap_booke_sregs;
  65 static int cap_ppc_smt;
  66 static int cap_ppc_rma;
  67 static int cap_spapr_tce;
  68 static int cap_spapr_multitce;
  69 static int cap_spapr_vfio;
  70 static int cap_hior;
  71 static int cap_one_reg;
  72 static int cap_epr;
  73 static int cap_ppc_watchdog;
  74 static int cap_papr;
  75 static int cap_htab_fd;
  76 static int cap_fixup_hcalls;
  77
  78 static uint32_t debug_inst_opcode;
  79
  80 /* XXX We have a race condition where we actually have a level triggered
  81  *     interrupt, but the infrastructure can't expose that yet, so the guest
  82  *     takes but ignores it, goes to sleep and never gets notified that there's
  83  *     still an interrupt pending.
  84  *
  85  *     As a quick workaround, let's just wake up again 20 ms after we injected
  86  *     an interrupt. That way we can assure that we're always reinjecting
  87  *     interrupts in case the guest swallowed them.
  88  */
  89 static QEMUTimer *idle_timer;
  90
  91 static void kvm_kick_cpu(void *opaque)
  92 {
  93     PowerPCCPU *cpu = opaque;
  94
  95     qemu_cpu_kick(CPU(cpu));
  96 }
  97
  98 static int kvm_ppc_register_host_cpu_type(void);
  99
 100 int kvm_arch_init(MachineState *ms, KVMState *s)
 101 {
 102     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 103     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 104     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 105     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 106     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 107     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 108     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 109     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 110     cap_spapr_vfio = false;
 111     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 112     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 113     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 114     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 115     /* Note: we don't set cap_papr here, because this capability is
 116      * only activated after this by kvmppc_set_papr() */
 117     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 118     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 119
 120     if (!cap_interrupt_level) {
 121         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 122                         "VM to stall at times!\n");
 123     }
 124
 125     kvm_ppc_register_host_cpu_type();
 126
 127     return 0;
 128 }
 129
 130 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 131 {
 132     CPUPPCState *cenv = &cpu->env;
 133     CPUState *cs = CPU(cpu);
 134     struct kvm_sregs sregs;
 135     int ret;
 136
 137     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 138         /* What we're really trying to say is "if we're on BookE, we use
 139            the native PVR for now". This is the only sane way to check
 140            it though, so we potentially confuse users that they can run
 141            BookE guests on BookS. Let's hope nobody dares enough :) */
 142         return 0;
 143     } else {
 144         if (!cap_segstate) {
 145             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 146             return -ENOSYS;
 147         }
 148     }
 149
 150     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 151     if (ret) {
 152         return ret;
 153     }
 154
 155     sregs.pvr = cenv->spr[SPR_PVR];
 156     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 157 }
 158
 159 /* Set up a shared TLB array with KVM */
 160 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 161 {
 162     CPUPPCState *env = &cpu->env;
 163     CPUState *cs = CPU(cpu);
 164     struct kvm_book3e_206_tlb_params params = {};
 165     struct kvm_config_tlb cfg = {};
 166     unsigned int entries = 0;
 167     int ret, i;
 168
 169     if (!kvm_enabled() ||
 170         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 171         return 0;
 172     }
 173
 174     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 175
 176     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 177         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 178         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 179         entries += params.tlb_sizes[i];
 180     }
 181
 182     assert(entries == env->nb_tlb);
 183     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 184
 185     env->tlb_dirty = true;
 186
 187     cfg.array = (uintptr_t)env->tlb.tlbm;
 188     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 189     cfg.params = (uintptr_t)&params;
 190     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 191
 192     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 193     if (ret < 0) {
 194         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 195                 __func__, strerror(-ret));
 196         return ret;
 197     }
 198
 199     env->kvm_sw_tlb = true;
 200     return 0;
 201 }
 202
 203
 204 #if defined(TARGET_PPC64)
 205 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 206                                        struct kvm_ppc_smmu_info *info)
 207 {
 208     CPUPPCState *env = &cpu->env;
 209     CPUState *cs = CPU(cpu);
 210
 211     memset(info, 0, sizeof(*info));
 212
 213     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 214      * need to "guess" what the supported page sizes are.
 215      *
 216      * For that to work we make a few assumptions:
 217      *
 218      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 219      *   KVM which only supports 4K and 16M pages, but supports them
 220      *   regardless of the backing store characteritics. We also don't
 221      *   support 1T segments.
 222      *
 223      *   This is safe as if HV KVM ever supports that capability or PR
 224      *   KVM grows supports for more page/segment sizes, those versions
 225      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 226      *   will not hit this fallback
 227      *
 228      * - Else we are running HV KVM. This means we only support page
 229      *   sizes that fit in the backing store. Additionally we only
 230      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 231      *   P7 encodings for the SLB and hash table. Here too, we assume
 232      *   support for any newer processor will mean a kernel that
 233      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 234      *   this fallback.
 235      */
 236     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 237         /* No flags */
 238         info->flags = 0;
 239         info->slb_size = 64;
 240
 241         /* Standard 4k base page size segment */
 242         info->sps[0].page_shift = 12;
 243         info->sps[0].slb_enc = 0;
 244         info->sps[0].enc[0].page_shift = 12;
 245         info->sps[0].enc[0].pte_enc = 0;
 246
 247         /* Standard 16M large page size segment */
 248         info->sps[1].page_shift = 24;
 249         info->sps[1].slb_enc = SLB_VSID_L;
 250         info->sps[1].enc[0].page_shift = 24;
 251         info->sps[1].enc[0].pte_enc = 0;
 252     } else {
 253         int i = 0;
 254
 255         /* HV KVM has backing store size restrictions */
 256         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 257
 258         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 259             info->flags |= KVM_PPC_1T_SEGMENTS;
 260         }
 261
 262         if (env->mmu_model == POWERPC_MMU_2_06 ||
 263             env->mmu_model == POWERPC_MMU_2_07) {
 264             info->slb_size = 32;
 265         } else {
 266             info->slb_size = 64;
 267         }
 268
 269         /* Standard 4k base page size segment */
 270         info->sps[i].page_shift = 12;
 271         info->sps[i].slb_enc = 0;
 272         info->sps[i].enc[0].page_shift = 12;
 273         info->sps[i].enc[0].pte_enc = 0;
 274         i++;
 275
 276         /* 64K on MMU 2.06 and later */
 277         if (env->mmu_model == POWERPC_MMU_2_06 ||
 278             env->mmu_model == POWERPC_MMU_2_07) {
 279             info->sps[i].page_shift = 16;
 280             info->sps[i].slb_enc = 0x110;
 281             info->sps[i].enc[0].page_shift = 16;
 282             info->sps[i].enc[0].pte_enc = 1;
 283             i++;
 284         }
 285
 286         /* Standard 16M large page size segment */
 287         info->sps[i].page_shift = 24;
 288         info->sps[i].slb_enc = SLB_VSID_L;
 289         info->sps[i].enc[0].page_shift = 24;
 290         info->sps[i].enc[0].pte_enc = 0;
 291     }
 292 }
 293
 294 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 295 {
 296     CPUState *cs = CPU(cpu);
 297     int ret;
 298
 299     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 300         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 301         if (ret == 0) {
 302             return;
 303         }
 304     }
 305
 306     kvm_get_fallback_smmu_info(cpu, info);
 307 }
 308
 309 static long gethugepagesize(const char *mem_path)
 310 {
 311     struct statfs fs;
 312     int ret;
 313
 314     do {
 315         ret = statfs(mem_path, &fs);
 316     } while (ret != 0 && errno == EINTR);
 317
 318     if (ret != 0) {
 319         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 320                 strerror(errno));
 321         exit(1);
 322     }
 323
 324 #define HUGETLBFS_MAGIC       0x958458f6
 325
 326     if (fs.f_type != HUGETLBFS_MAGIC) {
 327         /* Explicit mempath, but it's ordinary pages */
 328         return getpagesize();
 329     }
 330
 331     /* It's hugepage, return the huge page size */
 332     return fs.f_bsize;
 333 }
 334
 335 static int find_max_supported_pagesize(Object *obj, void *opaque)
 336 {
 337     char *mem_path;
 338     long *hpsize_min = opaque;
 339
 340     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 341         mem_path = object_property_get_str(obj, "mem-path", NULL);
 342         if (mem_path) {
 343             long hpsize = gethugepagesize(mem_path);
 344             if (hpsize < *hpsize_min) {
 345                 *hpsize_min = hpsize;
 346             }
 347         } else {
 348             *hpsize_min = getpagesize();
 349         }
 350     }
 351
 352     return 0;
 353 }
 354
 355 static long getrampagesize(void)
 356 {
 357     long hpsize = LONG_MAX;
 358     Object *memdev_root;
 359
 360     if (mem_path) {
 361         return gethugepagesize(mem_path);
 362     }
 363
 364     /* it's possible we have memory-backend objects with
 365      * hugepage-backed RAM. these may get mapped into system
 366      * address space via -numa parameters or memory hotplug
 367      * hooks. we want to take these into account, but we
 368      * also want to make sure these supported hugepage
 369      * sizes are applicable across the entire range of memory
 370      * we may boot from, so we take the min across all
 371      * backends, and assume normal pages in cases where a
 372      * backend isn't backed by hugepages.
 373      */
 374     memdev_root = object_resolve_path("/objects", NULL);
 375     if (!memdev_root) {
 376         return getpagesize();
 377     }
 378
 379     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 380
 381     return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 382 }
 383
 384 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 385 {
 386     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 387         return true;
 388     }
 389
 390     return (1ul << shift) <= rampgsize;
 391 }
 392
 393 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 394 {
 395     static struct kvm_ppc_smmu_info smmu_info;
 396     static bool has_smmu_info;
 397     CPUPPCState *env = &cpu->env;
 398     long rampagesize;
 399     int iq, ik, jq, jk;
 400
 401     /* We only handle page sizes for 64-bit server guests for now */
 402     if (!(env->mmu_model & POWERPC_MMU_64)) {
 403         return;
 404     }
 405
 406     /* Collect MMU info from kernel if not already */
 407     if (!has_smmu_info) {
 408         kvm_get_smmu_info(cpu, &smmu_info);
 409         has_smmu_info = true;
 410     }
 411
 412     rampagesize = getrampagesize();
 413
 414     /* Convert to QEMU form */
 415     memset(&env->sps, 0, sizeof(env->sps));
 416
 417     /* If we have HV KVM, we need to forbid CI large pages if our
 418      * host page size is smaller than 64K.
 419      */
 420     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 421         env->ci_large_pages = getpagesize() >= 0x10000;
 422     }
 423
 424     /*
 425      * XXX This loop should be an entry wide AND of the capabilities that
 426      *     the selected CPU has with the capabilities that KVM supports.
 427      */
 428     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 429         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 430         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 431
 432         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 433                                  ksps->page_shift)) {
 434             continue;
 435         }
 436         qsps->page_shift = ksps->page_shift;
 437         qsps->slb_enc = ksps->slb_enc;
 438         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 439             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 440                                      ksps->enc[jk].page_shift)) {
 441                 continue;
 442             }
 443             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 444             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 445             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 446                 break;
 447             }
 448         }
 449         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 450             break;
 451         }
 452     }
 453     env->slb_nr = smmu_info.slb_size;
 454     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 455         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 456     }
 457 }
 458 #else /* defined (TARGET_PPC64) */
 459
 460 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 461 {
 462 }
 463
 464 #endif /* !defined (TARGET_PPC64) */
 465
 466 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 467 {
 468     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 469 }
 470
 471 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 472  * book3s supports only 1 watchpoint, so array size
 473  * of 4 is sufficient for now.
 474  */
 475 #define MAX_HW_BKPTS 4
 476
 477 static struct HWBreakpoint {
 478     target_ulong addr;
 479     int type;
 480 } hw_debug_points[MAX_HW_BKPTS];
 481
 482 static CPUWatchpoint hw_watchpoint;
 483
 484 /* Default there is no breakpoint and watchpoint supported */
 485 static int max_hw_breakpoint;
 486 static int max_hw_watchpoint;
 487 static int nb_hw_breakpoint;
 488 static int nb_hw_watchpoint;
 489
 490 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 491 {
 492     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 493         max_hw_breakpoint = 2;
 494         max_hw_watchpoint = 2;
 495     }
 496
 497     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 498         fprintf(stderr, "Error initializing h/w breakpoints\n");
 499         return;
 500     }
 501 }
 502
 503 int kvm_arch_init_vcpu(CPUState *cs)
 504 {
 505     PowerPCCPU *cpu = POWERPC_CPU(cs);
 506     CPUPPCState *cenv = &cpu->env;
 507     int ret;
 508
 509     /* Gather server mmu info from KVM and update the CPU state */
 510     kvm_fixup_page_sizes(cpu);
 511
 512     /* Synchronize sregs with kvm */
 513     ret = kvm_arch_sync_sregs(cpu);
 514     if (ret) {
 515         return ret;
 516     }
 517
 518     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 519
 520     /* Some targets support access to KVM's guest TLB. */
 521     switch (cenv->mmu_model) {
 522     case POWERPC_MMU_BOOKE206:
 523         ret = kvm_booke206_tlb_init(cpu);
 524         break;
 525     default:
 526         break;
 527     }
 528
 529     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 530     kvmppc_hw_debug_points_init(cenv);
 531
 532     return ret;
 533 }
 534
 535 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 536 {
 537     CPUPPCState *env = &cpu->env;
 538     CPUState *cs = CPU(cpu);
 539     struct kvm_dirty_tlb dirty_tlb;
 540     unsigned char *bitmap;
 541     int ret;
 542
 543     if (!env->kvm_sw_tlb) {
 544         return;
 545     }
 546
 547     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 548     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 549
 550     dirty_tlb.bitmap = (uintptr_t)bitmap;
 551     dirty_tlb.num_dirty = env->nb_tlb;
 552
 553     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 554     if (ret) {
 555         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 556                 __func__, strerror(-ret));
 557     }
 558
 559     g_free(bitmap);
 560 }
 561
 562 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 563 {
 564     PowerPCCPU *cpu = POWERPC_CPU(cs);
 565     CPUPPCState *env = &cpu->env;
 566     union {
 567         uint32_t u32;
 568         uint64_t u64;
 569     } val;
 570     struct kvm_one_reg reg = {
 571         .id = id,
 572         .addr = (uintptr_t) &val,
 573     };
 574     int ret;
 575
 576     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 577     if (ret != 0) {
 578         trace_kvm_failed_spr_get(spr, strerror(errno));
 579     } else {
 580         switch (id & KVM_REG_SIZE_MASK) {
 581         case KVM_REG_SIZE_U32:
 582             env->spr[spr] = val.u32;
 583             break;
 584
 585         case KVM_REG_SIZE_U64:
 586             env->spr[spr] = val.u64;
 587             break;
 588
 589         default:
 590             /* Don't handle this size yet */
 591             abort();
 592         }
 593     }
 594 }
 595
 596 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 597 {
 598     PowerPCCPU *cpu = POWERPC_CPU(cs);
 599     CPUPPCState *env = &cpu->env;
 600     union {
 601         uint32_t u32;
 602         uint64_t u64;
 603     } val;
 604     struct kvm_one_reg reg = {
 605         .id = id,
 606         .addr = (uintptr_t) &val,
 607     };
 608     int ret;
 609
 610     switch (id & KVM_REG_SIZE_MASK) {
 611     case KVM_REG_SIZE_U32:
 612         val.u32 = env->spr[spr];
 613         break;
 614
 615     case KVM_REG_SIZE_U64:
 616         val.u64 = env->spr[spr];
 617         break;
 618
 619     default:
 620         /* Don't handle this size yet */
 621         abort();
 622     }
 623
 624     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 625     if (ret != 0) {
 626         trace_kvm_failed_spr_set(spr, strerror(errno));
 627     }
 628 }
 629
 630 static int kvm_put_fp(CPUState *cs)
 631 {
 632     PowerPCCPU *cpu = POWERPC_CPU(cs);
 633     CPUPPCState *env = &cpu->env;
 634     struct kvm_one_reg reg;
 635     int i;
 636     int ret;
 637
 638     if (env->insns_flags & PPC_FLOAT) {
 639         uint64_t fpscr = env->fpscr;
 640         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 641
 642         reg.id = KVM_REG_PPC_FPSCR;
 643         reg.addr = (uintptr_t)&fpscr;
 644         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 645         if (ret < 0) {
 646             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 647             return ret;
 648         }
 649
 650         for (i = 0; i < 32; i++) {
 651             uint64_t vsr[2];
 652
 653 #ifdef HOST_WORDS_BIGENDIAN
 654             vsr[0] = float64_val(env->fpr[i]);
 655             vsr[1] = env->vsr[i];
 656 #else
 657             vsr[0] = env->vsr[i];
 658             vsr[1] = float64_val(env->fpr[i]);
 659 #endif
 660             reg.addr = (uintptr_t) &vsr;
 661             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 662
 663             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 664             if (ret < 0) {
 665                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 666                         i, strerror(errno));
 667                 return ret;
 668             }
 669         }
 670     }
 671
 672     if (env->insns_flags & PPC_ALTIVEC) {
 673         reg.id = KVM_REG_PPC_VSCR;
 674         reg.addr = (uintptr_t)&env->vscr;
 675         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 676         if (ret < 0) {
 677             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 678             return ret;
 679         }
 680
 681         for (i = 0; i < 32; i++) {
 682             reg.id = KVM_REG_PPC_VR(i);
 683             reg.addr = (uintptr_t)&env->avr[i];
 684             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 685             if (ret < 0) {
 686                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 687                 return ret;
 688             }
 689         }
 690     }
 691
 692     return 0;
 693 }
 694
 695 static int kvm_get_fp(CPUState *cs)
 696 {
 697     PowerPCCPU *cpu = POWERPC_CPU(cs);
 698     CPUPPCState *env = &cpu->env;
 699     struct kvm_one_reg reg;
 700     int i;
 701     int ret;
 702
 703     if (env->insns_flags & PPC_FLOAT) {
 704         uint64_t fpscr;
 705         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 706
 707         reg.id = KVM_REG_PPC_FPSCR;
 708         reg.addr = (uintptr_t)&fpscr;
 709         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 710         if (ret < 0) {
 711             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 712             return ret;
 713         } else {
 714             env->fpscr = fpscr;
 715         }
 716
 717         for (i = 0; i < 32; i++) {
 718             uint64_t vsr[2];
 719
 720             reg.addr = (uintptr_t) &vsr;
 721             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 722
 723             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 724             if (ret < 0) {
 725                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 726                         vsx ? "VSR" : "FPR", i, strerror(errno));
 727                 return ret;
 728             } else {
 729 #ifdef HOST_WORDS_BIGENDIAN
 730                 env->fpr[i] = vsr[0];
 731                 if (vsx) {
 732                     env->vsr[i] = vsr[1];
 733                 }
 734 #else
 735                 env->fpr[i] = vsr[1];
 736                 if (vsx) {
 737                     env->vsr[i] = vsr[0];
 738                 }
 739 #endif
 740             }
 741         }
 742     }
 743
 744     if (env->insns_flags & PPC_ALTIVEC) {
 745         reg.id = KVM_REG_PPC_VSCR;
 746         reg.addr = (uintptr_t)&env->vscr;
 747         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 748         if (ret < 0) {
 749             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 750             return ret;
 751         }
 752
 753         for (i = 0; i < 32; i++) {
 754             reg.id = KVM_REG_PPC_VR(i);
 755             reg.addr = (uintptr_t)&env->avr[i];
 756             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 757             if (ret < 0) {
 758                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 759                         i, strerror(errno));
 760                 return ret;
 761             }
 762         }
 763     }
 764
 765     return 0;
 766 }
 767
 768 #if defined(TARGET_PPC64)
 769 static int kvm_get_vpa(CPUState *cs)
 770 {
 771     PowerPCCPU *cpu = POWERPC_CPU(cs);
 772     CPUPPCState *env = &cpu->env;
 773     struct kvm_one_reg reg;
 774     int ret;
 775
 776     reg.id = KVM_REG_PPC_VPA_ADDR;
 777     reg.addr = (uintptr_t)&env->vpa_addr;
 778     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 779     if (ret < 0) {
 780         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 781         return ret;
 782     }
 783
 784     assert((uintptr_t)&env->slb_shadow_size
 785            == ((uintptr_t)&env->slb_shadow_addr + 8));
 786     reg.id = KVM_REG_PPC_VPA_SLB;
 787     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 788     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 789     if (ret < 0) {
 790         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 791                 strerror(errno));
 792         return ret;
 793     }
 794
 795     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 796     reg.id = KVM_REG_PPC_VPA_DTL;
 797     reg.addr = (uintptr_t)&env->dtl_addr;
 798     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 799     if (ret < 0) {
 800         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 801                 strerror(errno));
 802         return ret;
 803     }
 804
 805     return 0;
 806 }
 807
 808 static int kvm_put_vpa(CPUState *cs)
 809 {
 810     PowerPCCPU *cpu = POWERPC_CPU(cs);
 811     CPUPPCState *env = &cpu->env;
 812     struct kvm_one_reg reg;
 813     int ret;
 814
 815     /* SLB shadow or DTL can't be registered unless a master VPA is
 816      * registered.  That means when restoring state, if a VPA *is*
 817      * registered, we need to set that up first.  If not, we need to
 818      * deregister the others before deregistering the master VPA */
 819     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 820
 821     if (env->vpa_addr) {
 822         reg.id = KVM_REG_PPC_VPA_ADDR;
 823         reg.addr = (uintptr_t)&env->vpa_addr;
 824         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 825         if (ret < 0) {
 826             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 827             return ret;
 828         }
 829     }
 830
 831     assert((uintptr_t)&env->slb_shadow_size
 832            == ((uintptr_t)&env->slb_shadow_addr + 8));
 833     reg.id = KVM_REG_PPC_VPA_SLB;
 834     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 835     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 836     if (ret < 0) {
 837         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 838         return ret;
 839     }
 840
 841     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 842     reg.id = KVM_REG_PPC_VPA_DTL;
 843     reg.addr = (uintptr_t)&env->dtl_addr;
 844     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 845     if (ret < 0) {
 846         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 847                 strerror(errno));
 848         return ret;
 849     }
 850
 851     if (!env->vpa_addr) {
 852         reg.id = KVM_REG_PPC_VPA_ADDR;
 853         reg.addr = (uintptr_t)&env->vpa_addr;
 854         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 855         if (ret < 0) {
 856             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 857             return ret;
 858         }
 859     }
 860
 861     return 0;
 862 }
 863 #endif /* TARGET_PPC64 */
 864
 865 int kvm_arch_put_registers(CPUState *cs, int level)
 866 {
 867     PowerPCCPU *cpu = POWERPC_CPU(cs);
 868     CPUPPCState *env = &cpu->env;
 869     struct kvm_regs regs;
 870     int ret;
 871     int i;
 872
 873     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 874     if (ret < 0) {
 875         return ret;
 876     }
 877
 878     regs.ctr = env->ctr;
 879     regs.lr  = env->lr;
 880     regs.xer = cpu_read_xer(env);
 881     regs.msr = env->msr;
 882     regs.pc = env->nip;
 883
 884     regs.srr0 = env->spr[SPR_SRR0];
 885     regs.srr1 = env->spr[SPR_SRR1];
 886
 887     regs.sprg0 = env->spr[SPR_SPRG0];
 888     regs.sprg1 = env->spr[SPR_SPRG1];
 889     regs.sprg2 = env->spr[SPR_SPRG2];
 890     regs.sprg3 = env->spr[SPR_SPRG3];
 891     regs.sprg4 = env->spr[SPR_SPRG4];
 892     regs.sprg5 = env->spr[SPR_SPRG5];
 893     regs.sprg6 = env->spr[SPR_SPRG6];
 894     regs.sprg7 = env->spr[SPR_SPRG7];
 895
 896     regs.pid = env->spr[SPR_BOOKE_PID];
 897
 898     for (i = 0;i < 32; i++)
 899         regs.gpr[i] = env->gpr[i];
 900
 901     regs.cr = 0;
 902     for (i = 0; i < 8; i++) {
 903         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 904     }
 905
 906     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 907     if (ret < 0)
 908         return ret;
 909
 910     kvm_put_fp(cs);
 911
 912     if (env->tlb_dirty) {
 913         kvm_sw_tlb_put(cpu);
 914         env->tlb_dirty = false;
 915     }
 916
 917     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 918         struct kvm_sregs sregs;
 919
 920         sregs.pvr = env->spr[SPR_PVR];
 921
 922         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 923
 924         /* Sync SLB */
 925 #ifdef TARGET_PPC64
 926         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 927             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 928             if (env->slb[i].esid & SLB_ESID_V) {
 929                 sregs.u.s.ppc64.slb[i].slbe |= i;
 930             }
 931             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 932         }
 933 #endif
 934
 935         /* Sync SRs */
 936         for (i = 0; i < 16; i++) {
 937             sregs.u.s.ppc32.sr[i] = env->sr[i];
 938         }
 939
 940         /* Sync BATs */
 941         for (i = 0; i < 8; i++) {
 942             /* Beware. We have to swap upper and lower bits here */
 943             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 944                 | env->DBAT[1][i];
 945             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 946                 | env->IBAT[1][i];
 947         }
 948
 949         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 950         if (ret) {
 951             return ret;
 952         }
 953     }
 954
 955     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 956         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 957     }
 958
 959     if (cap_one_reg) {
 960         int i;
 961
 962         /* We deliberately ignore errors here, for kernels which have
 963          * the ONE_REG calls, but don't support the specific
 964          * registers, there's a reasonable chance things will still
 965          * work, at least until we try to migrate. */
 966         for (i = 0; i < 1024; i++) {
 967             uint64_t id = env->spr_cb[i].one_reg_id;
 968
 969             if (id != 0) {
 970                 kvm_put_one_spr(cs, id, i);
 971             }
 972         }
 973
 974 #ifdef TARGET_PPC64
 975         if (msr_ts) {
 976             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 977                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 978             }
 979             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 980                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 981             }
 982             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 983             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 986             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 987             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 988             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 989             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 990             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 991             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 992         }
 993
 994         if (cap_papr) {
 995             if (kvm_put_vpa(cs) < 0) {
 996                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 997             }
 998         }
 999
1000         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1001 #endif /* TARGET_PPC64 */
1002     }
1003
1004     return ret;
1005 }
1006
1007 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1008 {
1009      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1010 }
1011
1012 int kvm_arch_get_registers(CPUState *cs)
1013 {
1014     PowerPCCPU *cpu = POWERPC_CPU(cs);
1015     CPUPPCState *env = &cpu->env;
1016     struct kvm_regs regs;
1017     struct kvm_sregs sregs;
1018     uint32_t cr;
1019     int i, ret;
1020
1021     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1022     if (ret < 0)
1023         return ret;
1024
1025     cr = regs.cr;
1026     for (i = 7; i >= 0; i--) {
1027         env->crf[i] = cr & 15;
1028         cr >>= 4;
1029     }
1030
1031     env->ctr = regs.ctr;
1032     env->lr = regs.lr;
1033     cpu_write_xer(env, regs.xer);
1034     env->msr = regs.msr;
1035     env->nip = regs.pc;
1036
1037     env->spr[SPR_SRR0] = regs.srr0;
1038     env->spr[SPR_SRR1] = regs.srr1;
1039
1040     env->spr[SPR_SPRG0] = regs.sprg0;
1041     env->spr[SPR_SPRG1] = regs.sprg1;
1042     env->spr[SPR_SPRG2] = regs.sprg2;
1043     env->spr[SPR_SPRG3] = regs.sprg3;
1044     env->spr[SPR_SPRG4] = regs.sprg4;
1045     env->spr[SPR_SPRG5] = regs.sprg5;
1046     env->spr[SPR_SPRG6] = regs.sprg6;
1047     env->spr[SPR_SPRG7] = regs.sprg7;
1048
1049     env->spr[SPR_BOOKE_PID] = regs.pid;
1050
1051     for (i = 0;i < 32; i++)
1052         env->gpr[i] = regs.gpr[i];
1053
1054     kvm_get_fp(cs);
1055
1056     if (cap_booke_sregs) {
1057         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1058         if (ret < 0) {
1059             return ret;
1060         }
1061
1062         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1063             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1064             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1065             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1066             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1067             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1068             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1069             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1070             env->spr[SPR_DECR] = sregs.u.e.dec;
1071             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1072             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1073             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1074         }
1075
1076         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1077             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1078             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1079             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1080             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1081             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1082         }
1083
1084         if (sregs.u.e.features & KVM_SREGS_E_64) {
1085             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1086         }
1087
1088         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1089             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1090         }
1091
1092         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1093             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1094             kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1095             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1096             kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1097             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1098             kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1099             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1100             kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1101             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1102             kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1103             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1104             kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1105             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1106             kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1107             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1108             kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1109             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1110             kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1111             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1112             kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1113             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1114             kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1115             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1116             kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1117             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1118             kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1119             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1120             kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1121             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1122             kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1123             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1124             kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1125
1126             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1127                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1128                 kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1129                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1130                 kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1131                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1132                 kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1133             }
1134
1135             if (sregs.u.e.features & KVM_SREGS_E_PM) {
1136                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1137                 kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1138             }
1139
1140             if (sregs.u.e.features & KVM_SREGS_E_PC) {
1141                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1142                 kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1143                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1144                 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1145             }
1146         }
1147
1148         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1149             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1150             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1151             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1152             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1153             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1154             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1155             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1156             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1157             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1158             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1159         }
1160
1161         if (sregs.u.e.features & KVM_SREGS_EXP) {
1162             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1163         }
1164
1165         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1166             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1167             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1168         }
1169
1170         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1171             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1172             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1173             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1174
1175             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1176                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1177                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1178             }
1179         }
1180     }
1181
1182     if (cap_segstate) {
1183         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1184         if (ret < 0) {
1185             return ret;
1186         }
1187
1188         if (!env->external_htab) {
1189             ppc_store_sdr1(env, sregs.u.s.sdr1);
1190         }
1191
1192         /* Sync SLB */
1193 #ifdef TARGET_PPC64
1194         /*
1195          * The packed SLB array we get from KVM_GET_SREGS only contains
1196          * information about valid entries. So we flush our internal
1197          * copy to get rid of stale ones, then put all valid SLB entries
1198          * back in.
1199          */
1200         memset(env->slb, 0, sizeof(env->slb));
1201         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1202             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1203             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1204             /*
1205              * Only restore valid entries
1206              */
1207             if (rb & SLB_ESID_V) {
1208                 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1209             }
1210         }
1211 #endif
1212
1213         /* Sync SRs */
1214         for (i = 0; i < 16; i++) {
1215             env->sr[i] = sregs.u.s.ppc32.sr[i];
1216         }
1217
1218         /* Sync BATs */
1219         for (i = 0; i < 8; i++) {
1220             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1221             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1222             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1223             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1224         }
1225     }
1226
1227     if (cap_hior) {
1228         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1229     }
1230
1231     if (cap_one_reg) {
1232         int i;
1233
1234         /* We deliberately ignore errors here, for kernels which have
1235          * the ONE_REG calls, but don't support the specific
1236          * registers, there's a reasonable chance things will still
1237          * work, at least until we try to migrate. */
1238         for (i = 0; i < 1024; i++) {
1239             uint64_t id = env->spr_cb[i].one_reg_id;
1240
1241             if (id != 0) {
1242                 kvm_get_one_spr(cs, id, i);
1243             }
1244         }
1245
1246 #ifdef TARGET_PPC64
1247         if (msr_ts) {
1248             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1249                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1250             }
1251             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1252                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1253             }
1254             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1255             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1256             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1257             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1258             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1259             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1260             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1261             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1262             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1263             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1264         }
1265
1266         if (cap_papr) {
1267             if (kvm_get_vpa(cs) < 0) {
1268                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1269             }
1270         }
1271
1272         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1273 #endif
1274     }
1275
1276     return 0;
1277 }
1278
1279 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1280 {
1281     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1282
1283     if (irq != PPC_INTERRUPT_EXT) {
1284         return 0;
1285     }
1286
1287     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1288         return 0;
1289     }
1290
1291     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1292
1293     return 0;
1294 }
1295
1296 #if defined(TARGET_PPCEMB)
1297 #define PPC_INPUT_INT PPC40x_INPUT_INT
1298 #elif defined(TARGET_PPC64)
1299 #define PPC_INPUT_INT PPC970_INPUT_INT
1300 #else
1301 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1302 #endif
1303
1304 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1305 {
1306     PowerPCCPU *cpu = POWERPC_CPU(cs);
1307     CPUPPCState *env = &cpu->env;
1308     int r;
1309     unsigned irq;
1310
1311     qemu_mutex_lock_iothread();
1312
1313     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1314      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1315     if (!cap_interrupt_level &&
1316         run->ready_for_interrupt_injection &&
1317         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1318         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1319     {
1320         /* For now KVM disregards the 'irq' argument. However, in the
1321          * future KVM could cache it in-kernel to avoid a heavyweight exit
1322          * when reading the UIC.
1323          */
1324         irq = KVM_INTERRUPT_SET;
1325
1326         DPRINTF("injected interrupt %d\n", irq);
1327         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1328         if (r < 0) {
1329             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1330         }
1331
1332         /* Always wake up soon in case the interrupt was level based */
1333         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1334                        (get_ticks_per_sec() / 50));
1335     }
1336
1337     /* We don't know if there are more interrupts pending after this. However,
1338      * the guest will return to userspace in the course of handling this one
1339      * anyways, so we will get a chance to deliver the rest. */
1340
1341     qemu_mutex_unlock_iothread();
1342 }
1343
1344 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1345 {
1346     return MEMTXATTRS_UNSPECIFIED;
1347 }
1348
1349 int kvm_arch_process_async_events(CPUState *cs)
1350 {
1351     return cs->halted;
1352 }
1353
1354 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1355 {
1356     CPUState *cs = CPU(cpu);
1357     CPUPPCState *env = &cpu->env;
1358
1359     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1360         cs->halted = 1;
1361         cs->exception_index = EXCP_HLT;
1362     }
1363
1364     return 0;
1365 }
1366
1367 /* map dcr access to existing qemu dcr emulation */
1368 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1369 {
1370     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1371         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1372
1373     return 0;
1374 }
1375
1376 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1377 {
1378     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1379         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1380
1381     return 0;
1382 }
1383
1384 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1385 {
1386     /* Mixed endian case is not handled */
1387     uint32_t sc = debug_inst_opcode;
1388
1389     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1390                             sizeof(sc), 0) ||
1391         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1392         return -EINVAL;
1393     }
1394
1395     return 0;
1396 }
1397
1398 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1399 {
1400     uint32_t sc;
1401
1402     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1403         sc != debug_inst_opcode ||
1404         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1405                             sizeof(sc), 1)) {
1406         return -EINVAL;
1407     }
1408
1409     return 0;
1410 }
1411
1412 static int find_hw_breakpoint(target_ulong addr, int type)
1413 {
1414     int n;
1415
1416     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1417            <= ARRAY_SIZE(hw_debug_points));
1418
1419     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1420         if (hw_debug_points[n].addr == addr &&
1421              hw_debug_points[n].type == type) {
1422             return n;
1423         }
1424     }
1425
1426     return -1;
1427 }
1428
1429 static int find_hw_watchpoint(target_ulong addr, int *flag)
1430 {
1431     int n;
1432
1433     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1434     if (n >= 0) {
1435         *flag = BP_MEM_ACCESS;
1436         return n;
1437     }
1438
1439     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1440     if (n >= 0) {
1441         *flag = BP_MEM_WRITE;
1442         return n;
1443     }
1444
1445     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1446     if (n >= 0) {
1447         *flag = BP_MEM_READ;
1448         return n;
1449     }
1450
1451     return -1;
1452 }
1453
1454 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1455                                   target_ulong len, int type)
1456 {
1457     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1458         return -ENOBUFS;
1459     }
1460
1461     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1462     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1463
1464     switch (type) {
1465     case GDB_BREAKPOINT_HW:
1466         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1467             return -ENOBUFS;
1468         }
1469
1470         if (find_hw_breakpoint(addr, type) >= 0) {
1471             return -EEXIST;
1472         }
1473
1474         nb_hw_breakpoint++;
1475         break;
1476
1477     case GDB_WATCHPOINT_WRITE:
1478     case GDB_WATCHPOINT_READ:
1479     case GDB_WATCHPOINT_ACCESS:
1480         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1481             return -ENOBUFS;
1482         }
1483
1484         if (find_hw_breakpoint(addr, type) >= 0) {
1485             return -EEXIST;
1486         }
1487
1488         nb_hw_watchpoint++;
1489         break;
1490
1491     default:
1492         return -ENOSYS;
1493     }
1494
1495     return 0;
1496 }
1497
1498 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1499                                   target_ulong len, int type)
1500 {
1501     int n;
1502
1503     n = find_hw_breakpoint(addr, type);
1504     if (n < 0) {
1505         return -ENOENT;
1506     }
1507
1508     switch (type) {
1509     case GDB_BREAKPOINT_HW:
1510         nb_hw_breakpoint--;
1511         break;
1512
1513     case GDB_WATCHPOINT_WRITE:
1514     case GDB_WATCHPOINT_READ:
1515     case GDB_WATCHPOINT_ACCESS:
1516         nb_hw_watchpoint--;
1517         break;
1518
1519     default:
1520         return -ENOSYS;
1521     }
1522     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1523
1524     return 0;
1525 }
1526
1527 void kvm_arch_remove_all_hw_breakpoints(void)
1528 {
1529     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1530 }
1531
1532 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1533 {
1534     int n;
1535
1536     /* Software Breakpoint updates */
1537     if (kvm_sw_breakpoints_active(cs)) {
1538         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1539     }
1540
1541     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1542            <= ARRAY_SIZE(hw_debug_points));
1543     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1544
1545     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1546         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1547         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1548         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1549             switch (hw_debug_points[n].type) {
1550             case GDB_BREAKPOINT_HW:
1551                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1552                 break;
1553             case GDB_WATCHPOINT_WRITE:
1554                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1555                 break;
1556             case GDB_WATCHPOINT_READ:
1557                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1558                 break;
1559             case GDB_WATCHPOINT_ACCESS:
1560                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1561                                         KVMPPC_DEBUG_WATCH_READ;
1562                 break;
1563             default:
1564                 cpu_abort(cs, "Unsupported breakpoint type\n");
1565             }
1566             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1567         }
1568     }
1569 }
1570
1571 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1572 {
1573     CPUState *cs = CPU(cpu);
1574     CPUPPCState *env = &cpu->env;
1575     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1576     int handle = 0;
1577     int n;
1578     int flag = 0;
1579
1580     if (cs->singlestep_enabled) {
1581         handle = 1;
1582     } else if (arch_info->status) {
1583         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1584             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1585                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1586                 if (n >= 0) {
1587                     handle = 1;
1588                 }
1589             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1590                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1591                 n = find_hw_watchpoint(arch_info->address,  &flag);
1592                 if (n >= 0) {
1593                     handle = 1;
1594                     cs->watchpoint_hit = &hw_watchpoint;
1595                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1596                     hw_watchpoint.flags = flag;
1597                 }
1598             }
1599         }
1600     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1601         handle = 1;
1602     } else {
1603         /* QEMU is not able to handle debug exception, so inject
1604          * program exception to guest;
1605          * Yes program exception NOT debug exception !!
1606          * When QEMU is using debug resources then debug exception must
1607          * be always set. To achieve this we set MSR_DE and also set
1608          * MSRP_DEP so guest cannot change MSR_DE.
1609          * When emulating debug resource for guest we want guest
1610          * to control MSR_DE (enable/disable debug interrupt on need).
1611          * Supporting both configurations are NOT possible.
1612          * So the result is that we cannot share debug resources
1613          * between QEMU and Guest on BOOKE architecture.
1614          * In the current design QEMU gets the priority over guest,
1615          * this means that if QEMU is using debug resources then guest
1616          * cannot use them;
1617          * For software breakpoint QEMU uses a privileged instruction;
1618          * So there cannot be any reason that we are here for guest
1619          * set debug exception, only possibility is guest executed a
1620          * privileged / illegal instruction and that's why we are
1621          * injecting a program interrupt.
1622          */
1623
1624         cpu_synchronize_state(cs);
1625         /* env->nip is PC, so increment this by 4 to use
1626          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1627          */
1628         env->nip += 4;
1629         cs->exception_index = POWERPC_EXCP_PROGRAM;
1630         env->error_code = POWERPC_EXCP_INVAL;
1631         ppc_cpu_do_interrupt(cs);
1632     }
1633
1634     return handle;
1635 }
1636
1637 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1638 {
1639     PowerPCCPU *cpu = POWERPC_CPU(cs);
1640     CPUPPCState *env = &cpu->env;
1641     int ret;
1642
1643     qemu_mutex_lock_iothread();
1644
1645     switch (run->exit_reason) {
1646     case KVM_EXIT_DCR:
1647         if (run->dcr.is_write) {
1648             DPRINTF("handle dcr write\n");
1649             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1650         } else {
1651             DPRINTF("handle dcr read\n");
1652             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1653         }
1654         break;
1655     case KVM_EXIT_HLT:
1656         DPRINTF("handle halt\n");
1657         ret = kvmppc_handle_halt(cpu);
1658         break;
1659 #if defined(TARGET_PPC64)
1660     case KVM_EXIT_PAPR_HCALL:
1661         DPRINTF("handle PAPR hypercall\n");
1662         run->papr_hcall.ret = spapr_hypercall(cpu,
1663                                               run->papr_hcall.nr,
1664                                               run->papr_hcall.args);
1665         ret = 0;
1666         break;
1667 #endif
1668     case KVM_EXIT_EPR:
1669         DPRINTF("handle epr\n");
1670         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1671         ret = 0;
1672         break;
1673     case KVM_EXIT_WATCHDOG:
1674         DPRINTF("handle watchdog expiry\n");
1675         watchdog_perform_action();
1676         ret = 0;
1677         break;
1678
1679     case KVM_EXIT_DEBUG:
1680         DPRINTF("handle debug exception\n");
1681         if (kvm_handle_debug(cpu, run)) {
1682             ret = EXCP_DEBUG;
1683             break;
1684         }
1685         /* re-enter, this exception was guest-internal */
1686         ret = 0;
1687         break;
1688
1689     default:
1690         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1691         ret = -1;
1692         break;
1693     }
1694
1695     qemu_mutex_unlock_iothread();
1696     return ret;
1697 }
1698
1699 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1700 {
1701     CPUState *cs = CPU(cpu);
1702     uint32_t bits = tsr_bits;
1703     struct kvm_one_reg reg = {
1704         .id = KVM_REG_PPC_OR_TSR,
1705         .addr = (uintptr_t) &bits,
1706     };
1707
1708     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1709 }
1710
1711 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1712 {
1713
1714     CPUState *cs = CPU(cpu);
1715     uint32_t bits = tsr_bits;
1716     struct kvm_one_reg reg = {
1717         .id = KVM_REG_PPC_CLEAR_TSR,
1718         .addr = (uintptr_t) &bits,
1719     };
1720
1721     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1722 }
1723
1724 int kvmppc_set_tcr(PowerPCCPU *cpu)
1725 {
1726     CPUState *cs = CPU(cpu);
1727     CPUPPCState *env = &cpu->env;
1728     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1729
1730     struct kvm_one_reg reg = {
1731         .id = KVM_REG_PPC_TCR,
1732         .addr = (uintptr_t) &tcr,
1733     };
1734
1735     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1736 }
1737
1738 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1739 {
1740     CPUState *cs = CPU(cpu);
1741     int ret;
1742
1743     if (!kvm_enabled()) {
1744         return -1;
1745     }
1746
1747     if (!cap_ppc_watchdog) {
1748         printf("warning: KVM does not support watchdog");
1749         return -1;
1750     }
1751
1752     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1753     if (ret < 0) {
1754         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1755                 __func__, strerror(-ret));
1756         return ret;
1757     }
1758
1759     return ret;
1760 }
1761
1762 static int read_cpuinfo(const char *field, char *value, int len)
1763 {
1764     FILE *f;
1765     int ret = -1;
1766     int field_len = strlen(field);
1767     char line[512];
1768
1769     f = fopen("/proc/cpuinfo", "r");
1770     if (!f) {
1771         return -1;
1772     }
1773
1774     do {
1775         if (!fgets(line, sizeof(line), f)) {
1776             break;
1777         }
1778         if (!strncmp(line, field, field_len)) {
1779             pstrcpy(value, len, line);
1780             ret = 0;
1781             break;
1782         }
1783     } while(*line);
1784
1785     fclose(f);
1786
1787     return ret;
1788 }
1789
1790 uint32_t kvmppc_get_tbfreq(void)
1791 {
1792     char line[512];
1793     char *ns;
1794     uint32_t retval = get_ticks_per_sec();
1795
1796     if (read_cpuinfo("timebase", line, sizeof(line))) {
1797         return retval;
1798     }
1799
1800     if (!(ns = strchr(line, ':'))) {
1801         return retval;
1802     }
1803
1804     ns++;
1805
1806     return atoi(ns);
1807 }
1808
1809 bool kvmppc_get_host_serial(char **value)
1810 {
1811     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1812                                NULL);
1813 }
1814
1815 bool kvmppc_get_host_model(char **value)
1816 {
1817     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1818 }
1819
1820 /* Try to find a device tree node for a CPU with clock-frequency property */
1821 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1822 {
1823     struct dirent *dirp;
1824     DIR *dp;
1825
1826     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1827         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1828         return -1;
1829     }
1830
1831     buf[0] = '\0';
1832     while ((dirp = readdir(dp)) != NULL) {
1833         FILE *f;
1834         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1835                  dirp->d_name);
1836         f = fopen(buf, "r");
1837         if (f) {
1838             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1839             fclose(f);
1840             break;
1841         }
1842         buf[0] = '\0';
1843     }
1844     closedir(dp);
1845     if (buf[0] == '\0') {
1846         printf("Unknown host!\n");
1847         return -1;
1848     }
1849
1850     return 0;
1851 }
1852
1853 static uint64_t kvmppc_read_int_dt(const char *filename)
1854 {
1855     union {
1856         uint32_t v32;
1857         uint64_t v64;
1858     } u;
1859     FILE *f;
1860     int len;
1861
1862     f = fopen(filename, "rb");
1863     if (!f) {
1864         return -1;
1865     }
1866
1867     len = fread(&u, 1, sizeof(u), f);
1868     fclose(f);
1869     switch (len) {
1870     case 4:
1871         /* property is a 32-bit quantity */
1872         return be32_to_cpu(u.v32);
1873     case 8:
1874         return be64_to_cpu(u.v64);
1875     }
1876
1877     return 0;
1878 }
1879
1880 /* Read a CPU node property from the host device tree that's a single
1881  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1882  * (can't find or open the property, or doesn't understand the
1883  * format) */
1884 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1885 {
1886     char buf[PATH_MAX], *tmp;
1887     uint64_t val;
1888
1889     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1890         return -1;
1891     }
1892
1893     tmp = g_strdup_printf("%s/%s", buf, propname);
1894     val = kvmppc_read_int_dt(tmp);
1895     g_free(tmp);
1896
1897     return val;
1898 }
1899
1900 uint64_t kvmppc_get_clockfreq(void)
1901 {
1902     return kvmppc_read_int_cpu_dt("clock-frequency");
1903 }
1904
1905 uint32_t kvmppc_get_vmx(void)
1906 {
1907     return kvmppc_read_int_cpu_dt("ibm,vmx");
1908 }
1909
1910 uint32_t kvmppc_get_dfp(void)
1911 {
1912     return kvmppc_read_int_cpu_dt("ibm,dfp");
1913 }
1914
1915 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1916  {
1917      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1918      CPUState *cs = CPU(cpu);
1919
1920     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1921         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1922         return 0;
1923     }
1924
1925     return 1;
1926 }
1927
1928 int kvmppc_get_hasidle(CPUPPCState *env)
1929 {
1930     struct kvm_ppc_pvinfo pvinfo;
1931
1932     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1933         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1934         return 1;
1935     }
1936
1937     return 0;
1938 }
1939
1940 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1941 {
1942     uint32_t *hc = (uint32_t*)buf;
1943     struct kvm_ppc_pvinfo pvinfo;
1944
1945     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1946         memcpy(buf, pvinfo.hcall, buf_len);
1947         return 0;
1948     }
1949
1950     /*
1951      * Fallback to always fail hypercalls regardless of endianness:
1952      *
1953      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1954      *     li r3, -1
1955      *     b .+8       (becomes nop in wrong endian)
1956      *     bswap32(li r3, -1)
1957      */
1958
1959     hc[0] = cpu_to_be32(0x08000048);
1960     hc[1] = cpu_to_be32(0x3860ffff);
1961     hc[2] = cpu_to_be32(0x48000008);
1962     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1963
1964     return 0;
1965 }
1966
1967 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1968 {
1969     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1970 }
1971
1972 void kvmppc_enable_logical_ci_hcalls(void)
1973 {
1974     /*
1975      * FIXME: it would be nice if we could detect the cases where
1976      * we're using a device which requires the in kernel
1977      * implementation of these hcalls, but the kernel lacks them and
1978      * produce a warning.
1979      */
1980     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1981     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1982 }
1983
1984 void kvmppc_enable_set_mode_hcall(void)
1985 {
1986     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1987 }
1988
1989 void kvmppc_set_papr(PowerPCCPU *cpu)
1990 {
1991     CPUState *cs = CPU(cpu);
1992     int ret;
1993
1994     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1995     if (ret) {
1996         cpu_abort(cs, "This KVM version does not support PAPR\n");
1997     }
1998
1999     /* Update the capability flag so we sync the right information
2000      * with kvm */
2001     cap_papr = 1;
2002 }
2003
2004 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2005 {
2006     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2007 }
2008
2009 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2010 {
2011     CPUState *cs = CPU(cpu);
2012     int ret;
2013
2014     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2015     if (ret && mpic_proxy) {
2016         cpu_abort(cs, "This KVM version does not support EPR\n");
2017     }
2018 }
2019
2020 int kvmppc_smt_threads(void)
2021 {
2022     return cap_ppc_smt ? cap_ppc_smt : 1;
2023 }
2024
2025 #ifdef TARGET_PPC64
2026 off_t kvmppc_alloc_rma(void **rma)
2027 {
2028     off_t size;
2029     int fd;
2030     struct kvm_allocate_rma ret;
2031
2032     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2033      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2034      *                      not necessary on this hardware
2035      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2036      *
2037      * FIXME: We should allow the user to force contiguous RMA
2038      * allocation in the cap_ppc_rma==1 case.
2039      */
2040     if (cap_ppc_rma < 2) {
2041         return 0;
2042     }
2043
2044     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2045     if (fd < 0) {
2046         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2047                 strerror(errno));
2048         return -1;
2049     }
2050
2051     size = MIN(ret.rma_size, 256ul << 20);
2052
2053     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2054     if (*rma == MAP_FAILED) {
2055         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2056         return -1;
2057     };
2058
2059     return size;
2060 }
2061
2062 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2063 {
2064     struct kvm_ppc_smmu_info info;
2065     long rampagesize, best_page_shift;
2066     int i;
2067
2068     if (cap_ppc_rma >= 2) {
2069         return current_size;
2070     }
2071
2072     /* Find the largest hardware supported page size that's less than
2073      * or equal to the (logical) backing page size of guest RAM */
2074     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2075     rampagesize = getrampagesize();
2076     best_page_shift = 0;
2077
2078     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2079         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2080
2081         if (!sps->page_shift) {
2082             continue;
2083         }
2084
2085         if ((sps->page_shift > best_page_shift)
2086             && ((1UL << sps->page_shift) <= rampagesize)) {
2087             best_page_shift = sps->page_shift;
2088         }
2089     }
2090
2091     return MIN(current_size,
2092                1ULL << (best_page_shift + hash_shift - 7));
2093 }
2094 #endif
2095
2096 bool kvmppc_spapr_use_multitce(void)
2097 {
2098     return cap_spapr_multitce;
2099 }
2100
2101 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2102                               bool need_vfio)
2103 {
2104     struct kvm_create_spapr_tce args = {
2105         .liobn = liobn,
2106         .window_size = window_size,
2107     };
2108     long len;
2109     int fd;
2110     void *table;
2111
2112     /* Must set fd to -1 so we don't try to munmap when called for
2113      * destroying the table, which the upper layers -will- do
2114      */
2115     *pfd = -1;
2116     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2117         return NULL;
2118     }
2119
2120     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2121     if (fd < 0) {
2122         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2123                 liobn);
2124         return NULL;
2125     }
2126
2127     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2128     /* FIXME: round this up to page size */
2129
2130     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2131     if (table == MAP_FAILED) {
2132         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2133                 liobn);
2134         close(fd);
2135         return NULL;
2136     }
2137
2138     *pfd = fd;
2139     return table;
2140 }
2141
2142 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2143 {
2144     long len;
2145
2146     if (fd < 0) {
2147         return -1;
2148     }
2149
2150     len = nb_table * sizeof(uint64_t);
2151     if ((munmap(table, len) < 0) ||
2152         (close(fd) < 0)) {
2153         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2154                 strerror(errno));
2155         /* Leak the table */
2156     }
2157
2158     return 0;
2159 }
2160
2161 int kvmppc_reset_htab(int shift_hint)
2162 {
2163     uint32_t shift = shift_hint;
2164
2165     if (!kvm_enabled()) {
2166         /* Full emulation, tell caller to allocate htab itself */
2167         return 0;
2168     }
2169     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2170         int ret;
2171         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2172         if (ret == -ENOTTY) {
2173             /* At least some versions of PR KVM advertise the
2174              * capability, but don't implement the ioctl().  Oops.
2175              * Return 0 so that we allocate the htab in qemu, as is
2176              * correct for PR. */
2177             return 0;
2178         } else if (ret < 0) {
2179             return ret;
2180         }
2181         return shift;
2182     }
2183
2184     /* We have a kernel that predates the htab reset calls.  For PR
2185      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2186      * this era, it has allocated a 16MB fixed size hash table
2187      * already.  Kernels of this era have the GET_PVINFO capability
2188      * only on PR, so we use this hack to determine the right
2189      * answer */
2190     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2191         /* PR - tell caller to allocate htab */
2192         return 0;
2193     } else {
2194         /* HV - assume 16MB kernel allocated htab */
2195         return 24;
2196     }
2197 }
2198
2199 static inline uint32_t mfpvr(void)
2200 {
2201     uint32_t pvr;
2202
2203     asm ("mfpvr %0"
2204          : "=r"(pvr));
2205     return pvr;
2206 }
2207
2208 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2209 {
2210     if (on) {
2211         *word |= flags;
2212     } else {
2213         *word &= ~flags;
2214     }
2215 }
2216
2217 static void kvmppc_host_cpu_initfn(Object *obj)
2218 {
2219     assert(kvm_enabled());
2220 }
2221
2222 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2223 {
2224     DeviceClass *dc = DEVICE_CLASS(oc);
2225     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2226     uint32_t vmx = kvmppc_get_vmx();
2227     uint32_t dfp = kvmppc_get_dfp();
2228     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2229     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2230
2231     /* Now fix up the class with information we can query from the host */
2232     pcc->pvr = mfpvr();
2233
2234     if (vmx != -1) {
2235         /* Only override when we know what the host supports */
2236         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2237         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2238     }
2239     if (dfp != -1) {
2240         /* Only override when we know what the host supports */
2241         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2242     }
2243
2244     if (dcache_size != -1) {
2245         pcc->l1_dcache_size = dcache_size;
2246     }
2247
2248     if (icache_size != -1) {
2249         pcc->l1_icache_size = icache_size;
2250     }
2251
2252     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2253     dc->cannot_destroy_with_object_finalize_yet = true;
2254 }
2255
2256 bool kvmppc_has_cap_epr(void)
2257 {
2258     return cap_epr;
2259 }
2260
2261 bool kvmppc_has_cap_htab_fd(void)
2262 {
2263     return cap_htab_fd;
2264 }
2265
2266 bool kvmppc_has_cap_fixup_hcalls(void)
2267 {
2268     return cap_fixup_hcalls;
2269 }
2270
2271 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2272 {
2273     ObjectClass *oc = OBJECT_CLASS(pcc);
2274
2275     while (oc && !object_class_is_abstract(oc)) {
2276         oc = object_class_get_parent(oc);
2277     }
2278     assert(oc);
2279
2280     return POWERPC_CPU_CLASS(oc);
2281 }
2282
2283 static int kvm_ppc_register_host_cpu_type(void)
2284 {
2285     TypeInfo type_info = {
2286         .name = TYPE_HOST_POWERPC_CPU,
2287         .instance_init = kvmppc_host_cpu_initfn,
2288         .class_init = kvmppc_host_cpu_class_init,
2289     };
2290     uint32_t host_pvr = mfpvr();
2291     PowerPCCPUClass *pvr_pcc;
2292     DeviceClass *dc;
2293
2294     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2295     if (pvr_pcc == NULL) {
2296         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2297     }
2298     if (pvr_pcc == NULL) {
2299         return -1;
2300     }
2301     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2302     type_register(&type_info);
2303
2304     /* Register generic family CPU class for a family */
2305     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2306     dc = DEVICE_CLASS(pvr_pcc);
2307     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2308     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2309     type_register(&type_info);
2310
2311     return 0;
2312 }
2313
2314 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2315 {
2316     struct kvm_rtas_token_args args = {
2317         .token = token,
2318     };
2319
2320     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2321         return -ENOENT;
2322     }
2323
2324     strncpy(args.name, function, sizeof(args.name));
2325
2326     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2327 }
2328
2329 int kvmppc_get_htab_fd(bool write)
2330 {
2331     struct kvm_get_htab_fd s = {
2332         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2333         .start_index = 0,
2334     };
2335
2336     if (!cap_htab_fd) {
2337         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2338         return -1;
2339     }
2340
2341     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2342 }
2343
2344 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2345 {
2346     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2347     uint8_t buf[bufsize];
2348     ssize_t rc;
2349
2350     do {
2351         rc = read(fd, buf, bufsize);
2352         if (rc < 0) {
2353             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2354                     strerror(errno));
2355             return rc;
2356         } else if (rc) {
2357             uint8_t *buffer = buf;
2358             ssize_t n = rc;
2359             while (n) {
2360                 struct kvm_get_htab_header *head =
2361                     (struct kvm_get_htab_header *) buffer;
2362                 size_t chunksize = sizeof(*head) +
2363                      HASH_PTE_SIZE_64 * head->n_valid;
2364
2365                 qemu_put_be32(f, head->index);
2366                 qemu_put_be16(f, head->n_valid);
2367                 qemu_put_be16(f, head->n_invalid);
2368                 qemu_put_buffer(f, (void *)(head + 1),
2369                                 HASH_PTE_SIZE_64 * head->n_valid);
2370
2371                 buffer += chunksize;
2372                 n -= chunksize;
2373             }
2374         }
2375     } while ((rc != 0)
2376              && ((max_ns < 0)
2377                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2378
2379     return (rc == 0) ? 1 : 0;
2380 }
2381
2382 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2383                            uint16_t n_valid, uint16_t n_invalid)
2384 {
2385     struct kvm_get_htab_header *buf;
2386     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2387     ssize_t rc;
2388
2389     buf = alloca(chunksize);
2390     buf->index = index;
2391     buf->n_valid = n_valid;
2392     buf->n_invalid = n_invalid;
2393
2394     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2395
2396     rc = write(fd, buf, chunksize);
2397     if (rc < 0) {
2398         fprintf(stderr, "Error writing KVM hash table: %s\n",
2399                 strerror(errno));
2400         return rc;
2401     }
2402     if (rc != chunksize) {
2403         /* We should never get a short write on a single chunk */
2404         fprintf(stderr, "Short write, restoring KVM hash table\n");
2405         return -1;
2406     }
2407     return 0;
2408 }
2409
2410 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2411 {
2412     return true;
2413 }
2414
2415 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2416 {
2417     return 1;
2418 }
2419
2420 int kvm_arch_on_sigbus(int code, void *addr)
2421 {
2422     return 1;
2423 }
2424
2425 void kvm_arch_init_irq_routing(KVMState *s)
2426 {
2427 }
2428
2429 struct kvm_get_htab_buf {
2430     struct kvm_get_htab_header header;
2431     /*
2432      * We require one extra byte for read
2433      */
2434     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2435 };
2436
2437 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2438 {
2439     int htab_fd;
2440     struct kvm_get_htab_fd ghf;
2441     struct kvm_get_htab_buf  *hpte_buf;
2442
2443     ghf.flags = 0;
2444     ghf.start_index = pte_index;
2445     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2446     if (htab_fd < 0) {
2447         goto error_out;
2448     }
2449
2450     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2451     /*
2452      * Read the hpte group
2453      */
2454     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2455         goto out_close;
2456     }
2457
2458     close(htab_fd);
2459     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2460
2461 out_close:
2462     g_free(hpte_buf);
2463     close(htab_fd);
2464 error_out:
2465     return 0;
2466 }
2467
2468 void kvmppc_hash64_free_pteg(uint64_t token)
2469 {
2470     struct kvm_get_htab_buf *htab_buf;
2471
2472     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2473                             hpte);
2474     g_free(htab_buf);
2475     return;
2476 }
2477
2478 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2479                              target_ulong pte0, target_ulong pte1)
2480 {
2481     int htab_fd;
2482     struct kvm_get_htab_fd ghf;
2483     struct kvm_get_htab_buf hpte_buf;
2484
2485     ghf.flags = 0;
2486     ghf.start_index = 0;     /* Ignored */
2487     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2488     if (htab_fd < 0) {
2489         goto error_out;
2490     }
2491
2492     hpte_buf.header.n_valid = 1;
2493     hpte_buf.header.n_invalid = 0;
2494     hpte_buf.header.index = pte_index;
2495     hpte_buf.hpte[0] = pte0;
2496     hpte_buf.hpte[1] = pte1;
2497     /*
2498      * Write the hpte entry.
2499      * CAUTION: write() has the warn_unused_result attribute. Hence we
2500      * need to check the return value, even though we do nothing.
2501      */
2502     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2503         goto out_close;
2504     }
2505
2506 out_close:
2507     close(htab_fd);
2508     return;
2509
2510 error_out:
2511     return;
2512 }
2513
2514 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2515                              uint64_t address, uint32_t data, PCIDevice *dev)
2516 {
2517     return 0;
2518 }
2519
2520 int kvm_arch_msi_data_to_gsi(uint32_t data)
2521 {
2522     return data & 0xffff;
2523 }
2524
2525 int kvmppc_enable_hwrng(void)
2526 {
2527     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2528         return -1;
2529     }
2530
2531     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2532 }