target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/error-report.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/kvm.h"
  30 #include "kvm_ppc.h"
  31 #include "cpu.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/ppc.h"
  40 #include "sysemu/watchdog.h"
  41 #include "trace.h"
  42 #include "exec/gdbstub.h"
  43 #include "exec/memattrs.h"
  44 #include "sysemu/hostmem.h"
  45
  46 //#define DEBUG_KVM
  47
  48 #ifdef DEBUG_KVM
  49 #define DPRINTF(fmt, ...) \
  50     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  51 #else
  52 #define DPRINTF(fmt, ...) \
  53     do { } while (0)
  54 #endif
  55
  56 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  57
  58 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  59     KVM_CAP_LAST_INFO
  60 };
  61
  62 static int cap_interrupt_unset = false;
  63 static int cap_interrupt_level = false;
  64 static int cap_segstate;
  65 static int cap_booke_sregs;
  66 static int cap_ppc_smt;
  67 static int cap_ppc_rma;
  68 static int cap_spapr_tce;
  69 static int cap_spapr_multitce;
  70 static int cap_spapr_vfio;
  71 static int cap_hior;
  72 static int cap_one_reg;
  73 static int cap_epr;
  74 static int cap_ppc_watchdog;
  75 static int cap_papr;
  76 static int cap_htab_fd;
  77 static int cap_fixup_hcalls;
  78
  79 static uint32_t debug_inst_opcode;
  80
  81 /* XXX We have a race condition where we actually have a level triggered
  82  *     interrupt, but the infrastructure can't expose that yet, so the guest
  83  *     takes but ignores it, goes to sleep and never gets notified that there's
  84  *     still an interrupt pending.
  85  *
  86  *     As a quick workaround, let's just wake up again 20 ms after we injected
  87  *     an interrupt. That way we can assure that we're always reinjecting
  88  *     interrupts in case the guest swallowed them.
  89  */
  90 static QEMUTimer *idle_timer;
  91
  92 static void kvm_kick_cpu(void *opaque)
  93 {
  94     PowerPCCPU *cpu = opaque;
  95
  96     qemu_cpu_kick(CPU(cpu));
  97 }
  98
  99 static int kvm_ppc_register_host_cpu_type(void);
 100
 101 int kvm_arch_init(MachineState *ms, KVMState *s)
 102 {
 103     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 104     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 105     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 106     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 107     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 108     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 109     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 110     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 111     cap_spapr_vfio = false;
 112     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 113     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 114     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 115     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 116     /* Note: we don't set cap_papr here, because this capability is
 117      * only activated after this by kvmppc_set_papr() */
 118     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 119     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 120
 121     if (!cap_interrupt_level) {
 122         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 123                         "VM to stall at times!\n");
 124     }
 125
 126     kvm_ppc_register_host_cpu_type();
 127
 128     return 0;
 129 }
 130
 131 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 132 {
 133     CPUPPCState *cenv = &cpu->env;
 134     CPUState *cs = CPU(cpu);
 135     struct kvm_sregs sregs;
 136     int ret;
 137
 138     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 139         /* What we're really trying to say is "if we're on BookE, we use
 140            the native PVR for now". This is the only sane way to check
 141            it though, so we potentially confuse users that they can run
 142            BookE guests on BookS. Let's hope nobody dares enough :) */
 143         return 0;
 144     } else {
 145         if (!cap_segstate) {
 146             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 147             return -ENOSYS;
 148         }
 149     }
 150
 151     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 152     if (ret) {
 153         return ret;
 154     }
 155
 156     sregs.pvr = cenv->spr[SPR_PVR];
 157     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 158 }
 159
 160 /* Set up a shared TLB array with KVM */
 161 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 162 {
 163     CPUPPCState *env = &cpu->env;
 164     CPUState *cs = CPU(cpu);
 165     struct kvm_book3e_206_tlb_params params = {};
 166     struct kvm_config_tlb cfg = {};
 167     unsigned int entries = 0;
 168     int ret, i;
 169
 170     if (!kvm_enabled() ||
 171         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 172         return 0;
 173     }
 174
 175     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 176
 177     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 178         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 179         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 180         entries += params.tlb_sizes[i];
 181     }
 182
 183     assert(entries == env->nb_tlb);
 184     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 185
 186     env->tlb_dirty = true;
 187
 188     cfg.array = (uintptr_t)env->tlb.tlbm;
 189     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 190     cfg.params = (uintptr_t)&params;
 191     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 192
 193     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 194     if (ret < 0) {
 195         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 196                 __func__, strerror(-ret));
 197         return ret;
 198     }
 199
 200     env->kvm_sw_tlb = true;
 201     return 0;
 202 }
 203
 204
 205 #if defined(TARGET_PPC64)
 206 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 207                                        struct kvm_ppc_smmu_info *info)
 208 {
 209     CPUPPCState *env = &cpu->env;
 210     CPUState *cs = CPU(cpu);
 211
 212     memset(info, 0, sizeof(*info));
 213
 214     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 215      * need to "guess" what the supported page sizes are.
 216      *
 217      * For that to work we make a few assumptions:
 218      *
 219      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 220      *   KVM which only supports 4K and 16M pages, but supports them
 221      *   regardless of the backing store characteritics. We also don't
 222      *   support 1T segments.
 223      *
 224      *   This is safe as if HV KVM ever supports that capability or PR
 225      *   KVM grows supports for more page/segment sizes, those versions
 226      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 227      *   will not hit this fallback
 228      *
 229      * - Else we are running HV KVM. This means we only support page
 230      *   sizes that fit in the backing store. Additionally we only
 231      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 232      *   P7 encodings for the SLB and hash table. Here too, we assume
 233      *   support for any newer processor will mean a kernel that
 234      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 235      *   this fallback.
 236      */
 237     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 238         /* No flags */
 239         info->flags = 0;
 240         info->slb_size = 64;
 241
 242         /* Standard 4k base page size segment */
 243         info->sps[0].page_shift = 12;
 244         info->sps[0].slb_enc = 0;
 245         info->sps[0].enc[0].page_shift = 12;
 246         info->sps[0].enc[0].pte_enc = 0;
 247
 248         /* Standard 16M large page size segment */
 249         info->sps[1].page_shift = 24;
 250         info->sps[1].slb_enc = SLB_VSID_L;
 251         info->sps[1].enc[0].page_shift = 24;
 252         info->sps[1].enc[0].pte_enc = 0;
 253     } else {
 254         int i = 0;
 255
 256         /* HV KVM has backing store size restrictions */
 257         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 258
 259         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 260             info->flags |= KVM_PPC_1T_SEGMENTS;
 261         }
 262
 263         if (env->mmu_model == POWERPC_MMU_2_06 ||
 264             env->mmu_model == POWERPC_MMU_2_07) {
 265             info->slb_size = 32;
 266         } else {
 267             info->slb_size = 64;
 268         }
 269
 270         /* Standard 4k base page size segment */
 271         info->sps[i].page_shift = 12;
 272         info->sps[i].slb_enc = 0;
 273         info->sps[i].enc[0].page_shift = 12;
 274         info->sps[i].enc[0].pte_enc = 0;
 275         i++;
 276
 277         /* 64K on MMU 2.06 and later */
 278         if (env->mmu_model == POWERPC_MMU_2_06 ||
 279             env->mmu_model == POWERPC_MMU_2_07) {
 280             info->sps[i].page_shift = 16;
 281             info->sps[i].slb_enc = 0x110;
 282             info->sps[i].enc[0].page_shift = 16;
 283             info->sps[i].enc[0].pte_enc = 1;
 284             i++;
 285         }
 286
 287         /* Standard 16M large page size segment */
 288         info->sps[i].page_shift = 24;
 289         info->sps[i].slb_enc = SLB_VSID_L;
 290         info->sps[i].enc[0].page_shift = 24;
 291         info->sps[i].enc[0].pte_enc = 0;
 292     }
 293 }
 294
 295 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 296 {
 297     CPUState *cs = CPU(cpu);
 298     int ret;
 299
 300     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 301         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 302         if (ret == 0) {
 303             return;
 304         }
 305     }
 306
 307     kvm_get_fallback_smmu_info(cpu, info);
 308 }
 309
 310 static long gethugepagesize(const char *mem_path)
 311 {
 312     struct statfs fs;
 313     int ret;
 314
 315     do {
 316         ret = statfs(mem_path, &fs);
 317     } while (ret != 0 && errno == EINTR);
 318
 319     if (ret != 0) {
 320         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 321                 strerror(errno));
 322         exit(1);
 323     }
 324
 325 #define HUGETLBFS_MAGIC       0x958458f6
 326
 327     if (fs.f_type != HUGETLBFS_MAGIC) {
 328         /* Explicit mempath, but it's ordinary pages */
 329         return getpagesize();
 330     }
 331
 332     /* It's hugepage, return the huge page size */
 333     return fs.f_bsize;
 334 }
 335
 336 static int find_max_supported_pagesize(Object *obj, void *opaque)
 337 {
 338     char *mem_path;
 339     long *hpsize_min = opaque;
 340
 341     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 342         mem_path = object_property_get_str(obj, "mem-path", NULL);
 343         if (mem_path) {
 344             long hpsize = gethugepagesize(mem_path);
 345             if (hpsize < *hpsize_min) {
 346                 *hpsize_min = hpsize;
 347             }
 348         } else {
 349             *hpsize_min = getpagesize();
 350         }
 351     }
 352
 353     return 0;
 354 }
 355
 356 static long getrampagesize(void)
 357 {
 358     long hpsize = LONG_MAX;
 359     Object *memdev_root;
 360
 361     if (mem_path) {
 362         return gethugepagesize(mem_path);
 363     }
 364
 365     /* it's possible we have memory-backend objects with
 366      * hugepage-backed RAM. these may get mapped into system
 367      * address space via -numa parameters or memory hotplug
 368      * hooks. we want to take these into account, but we
 369      * also want to make sure these supported hugepage
 370      * sizes are applicable across the entire range of memory
 371      * we may boot from, so we take the min across all
 372      * backends, and assume normal pages in cases where a
 373      * backend isn't backed by hugepages.
 374      */
 375     memdev_root = object_resolve_path("/objects", NULL);
 376     if (!memdev_root) {
 377         return getpagesize();
 378     }
 379
 380     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 381
 382     return (hpsize == LONG_MAX) ? getpagesize() : hpsize;
 383 }
 384
 385 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 386 {
 387     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 388         return true;
 389     }
 390
 391     return (1ul << shift) <= rampgsize;
 392 }
 393
 394 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 395 {
 396     static struct kvm_ppc_smmu_info smmu_info;
 397     static bool has_smmu_info;
 398     CPUPPCState *env = &cpu->env;
 399     long rampagesize;
 400     int iq, ik, jq, jk;
 401
 402     /* We only handle page sizes for 64-bit server guests for now */
 403     if (!(env->mmu_model & POWERPC_MMU_64)) {
 404         return;
 405     }
 406
 407     /* Collect MMU info from kernel if not already */
 408     if (!has_smmu_info) {
 409         kvm_get_smmu_info(cpu, &smmu_info);
 410         has_smmu_info = true;
 411     }
 412
 413     rampagesize = getrampagesize();
 414
 415     /* Convert to QEMU form */
 416     memset(&env->sps, 0, sizeof(env->sps));
 417
 418     /* If we have HV KVM, we need to forbid CI large pages if our
 419      * host page size is smaller than 64K.
 420      */
 421     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 422         env->ci_large_pages = getpagesize() >= 0x10000;
 423     }
 424
 425     /*
 426      * XXX This loop should be an entry wide AND of the capabilities that
 427      *     the selected CPU has with the capabilities that KVM supports.
 428      */
 429     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 430         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 431         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 432
 433         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 434                                  ksps->page_shift)) {
 435             continue;
 436         }
 437         qsps->page_shift = ksps->page_shift;
 438         qsps->slb_enc = ksps->slb_enc;
 439         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 440             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 441                                      ksps->enc[jk].page_shift)) {
 442                 continue;
 443             }
 444             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 445             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 446             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 447                 break;
 448             }
 449         }
 450         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 451             break;
 452         }
 453     }
 454     env->slb_nr = smmu_info.slb_size;
 455     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 456         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 457     }
 458 }
 459 #else /* defined (TARGET_PPC64) */
 460
 461 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 462 {
 463 }
 464
 465 #endif /* !defined (TARGET_PPC64) */
 466
 467 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 468 {
 469     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 470 }
 471
 472 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 473  * book3s supports only 1 watchpoint, so array size
 474  * of 4 is sufficient for now.
 475  */
 476 #define MAX_HW_BKPTS 4
 477
 478 static struct HWBreakpoint {
 479     target_ulong addr;
 480     int type;
 481 } hw_debug_points[MAX_HW_BKPTS];
 482
 483 static CPUWatchpoint hw_watchpoint;
 484
 485 /* Default there is no breakpoint and watchpoint supported */
 486 static int max_hw_breakpoint;
 487 static int max_hw_watchpoint;
 488 static int nb_hw_breakpoint;
 489 static int nb_hw_watchpoint;
 490
 491 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 492 {
 493     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 494         max_hw_breakpoint = 2;
 495         max_hw_watchpoint = 2;
 496     }
 497
 498     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 499         fprintf(stderr, "Error initializing h/w breakpoints\n");
 500         return;
 501     }
 502 }
 503
 504 int kvm_arch_init_vcpu(CPUState *cs)
 505 {
 506     PowerPCCPU *cpu = POWERPC_CPU(cs);
 507     CPUPPCState *cenv = &cpu->env;
 508     int ret;
 509
 510     /* Gather server mmu info from KVM and update the CPU state */
 511     kvm_fixup_page_sizes(cpu);
 512
 513     /* Synchronize sregs with kvm */
 514     ret = kvm_arch_sync_sregs(cpu);
 515     if (ret) {
 516         if (ret == -EINVAL) {
 517             error_report("Register sync failed... If you're using kvm-hv.ko,"
 518                          " only \"-cpu host\" is possible");
 519         }
 520         return ret;
 521     }
 522
 523     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 524
 525     /* Some targets support access to KVM's guest TLB. */
 526     switch (cenv->mmu_model) {
 527     case POWERPC_MMU_BOOKE206:
 528         ret = kvm_booke206_tlb_init(cpu);
 529         break;
 530     default:
 531         break;
 532     }
 533
 534     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 535     kvmppc_hw_debug_points_init(cenv);
 536
 537     return ret;
 538 }
 539
 540 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 541 {
 542     CPUPPCState *env = &cpu->env;
 543     CPUState *cs = CPU(cpu);
 544     struct kvm_dirty_tlb dirty_tlb;
 545     unsigned char *bitmap;
 546     int ret;
 547
 548     if (!env->kvm_sw_tlb) {
 549         return;
 550     }
 551
 552     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 553     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 554
 555     dirty_tlb.bitmap = (uintptr_t)bitmap;
 556     dirty_tlb.num_dirty = env->nb_tlb;
 557
 558     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 559     if (ret) {
 560         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 561                 __func__, strerror(-ret));
 562     }
 563
 564     g_free(bitmap);
 565 }
 566
 567 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 568 {
 569     PowerPCCPU *cpu = POWERPC_CPU(cs);
 570     CPUPPCState *env = &cpu->env;
 571     union {
 572         uint32_t u32;
 573         uint64_t u64;
 574     } val;
 575     struct kvm_one_reg reg = {
 576         .id = id,
 577         .addr = (uintptr_t) &val,
 578     };
 579     int ret;
 580
 581     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 582     if (ret != 0) {
 583         trace_kvm_failed_spr_get(spr, strerror(errno));
 584     } else {
 585         switch (id & KVM_REG_SIZE_MASK) {
 586         case KVM_REG_SIZE_U32:
 587             env->spr[spr] = val.u32;
 588             break;
 589
 590         case KVM_REG_SIZE_U64:
 591             env->spr[spr] = val.u64;
 592             break;
 593
 594         default:
 595             /* Don't handle this size yet */
 596             abort();
 597         }
 598     }
 599 }
 600
 601 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 602 {
 603     PowerPCCPU *cpu = POWERPC_CPU(cs);
 604     CPUPPCState *env = &cpu->env;
 605     union {
 606         uint32_t u32;
 607         uint64_t u64;
 608     } val;
 609     struct kvm_one_reg reg = {
 610         .id = id,
 611         .addr = (uintptr_t) &val,
 612     };
 613     int ret;
 614
 615     switch (id & KVM_REG_SIZE_MASK) {
 616     case KVM_REG_SIZE_U32:
 617         val.u32 = env->spr[spr];
 618         break;
 619
 620     case KVM_REG_SIZE_U64:
 621         val.u64 = env->spr[spr];
 622         break;
 623
 624     default:
 625         /* Don't handle this size yet */
 626         abort();
 627     }
 628
 629     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 630     if (ret != 0) {
 631         trace_kvm_failed_spr_set(spr, strerror(errno));
 632     }
 633 }
 634
 635 static int kvm_put_fp(CPUState *cs)
 636 {
 637     PowerPCCPU *cpu = POWERPC_CPU(cs);
 638     CPUPPCState *env = &cpu->env;
 639     struct kvm_one_reg reg;
 640     int i;
 641     int ret;
 642
 643     if (env->insns_flags & PPC_FLOAT) {
 644         uint64_t fpscr = env->fpscr;
 645         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 646
 647         reg.id = KVM_REG_PPC_FPSCR;
 648         reg.addr = (uintptr_t)&fpscr;
 649         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 650         if (ret < 0) {
 651             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 652             return ret;
 653         }
 654
 655         for (i = 0; i < 32; i++) {
 656             uint64_t vsr[2];
 657
 658 #ifdef HOST_WORDS_BIGENDIAN
 659             vsr[0] = float64_val(env->fpr[i]);
 660             vsr[1] = env->vsr[i];
 661 #else
 662             vsr[0] = env->vsr[i];
 663             vsr[1] = float64_val(env->fpr[i]);
 664 #endif
 665             reg.addr = (uintptr_t) &vsr;
 666             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 667
 668             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 669             if (ret < 0) {
 670                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 671                         i, strerror(errno));
 672                 return ret;
 673             }
 674         }
 675     }
 676
 677     if (env->insns_flags & PPC_ALTIVEC) {
 678         reg.id = KVM_REG_PPC_VSCR;
 679         reg.addr = (uintptr_t)&env->vscr;
 680         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 681         if (ret < 0) {
 682             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 683             return ret;
 684         }
 685
 686         for (i = 0; i < 32; i++) {
 687             reg.id = KVM_REG_PPC_VR(i);
 688             reg.addr = (uintptr_t)&env->avr[i];
 689             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 690             if (ret < 0) {
 691                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 692                 return ret;
 693             }
 694         }
 695     }
 696
 697     return 0;
 698 }
 699
 700 static int kvm_get_fp(CPUState *cs)
 701 {
 702     PowerPCCPU *cpu = POWERPC_CPU(cs);
 703     CPUPPCState *env = &cpu->env;
 704     struct kvm_one_reg reg;
 705     int i;
 706     int ret;
 707
 708     if (env->insns_flags & PPC_FLOAT) {
 709         uint64_t fpscr;
 710         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 711
 712         reg.id = KVM_REG_PPC_FPSCR;
 713         reg.addr = (uintptr_t)&fpscr;
 714         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 715         if (ret < 0) {
 716             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 717             return ret;
 718         } else {
 719             env->fpscr = fpscr;
 720         }
 721
 722         for (i = 0; i < 32; i++) {
 723             uint64_t vsr[2];
 724
 725             reg.addr = (uintptr_t) &vsr;
 726             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 727
 728             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 729             if (ret < 0) {
 730                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 731                         vsx ? "VSR" : "FPR", i, strerror(errno));
 732                 return ret;
 733             } else {
 734 #ifdef HOST_WORDS_BIGENDIAN
 735                 env->fpr[i] = vsr[0];
 736                 if (vsx) {
 737                     env->vsr[i] = vsr[1];
 738                 }
 739 #else
 740                 env->fpr[i] = vsr[1];
 741                 if (vsx) {
 742                     env->vsr[i] = vsr[0];
 743                 }
 744 #endif
 745             }
 746         }
 747     }
 748
 749     if (env->insns_flags & PPC_ALTIVEC) {
 750         reg.id = KVM_REG_PPC_VSCR;
 751         reg.addr = (uintptr_t)&env->vscr;
 752         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 753         if (ret < 0) {
 754             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 755             return ret;
 756         }
 757
 758         for (i = 0; i < 32; i++) {
 759             reg.id = KVM_REG_PPC_VR(i);
 760             reg.addr = (uintptr_t)&env->avr[i];
 761             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 762             if (ret < 0) {
 763                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 764                         i, strerror(errno));
 765                 return ret;
 766             }
 767         }
 768     }
 769
 770     return 0;
 771 }
 772
 773 #if defined(TARGET_PPC64)
 774 static int kvm_get_vpa(CPUState *cs)
 775 {
 776     PowerPCCPU *cpu = POWERPC_CPU(cs);
 777     CPUPPCState *env = &cpu->env;
 778     struct kvm_one_reg reg;
 779     int ret;
 780
 781     reg.id = KVM_REG_PPC_VPA_ADDR;
 782     reg.addr = (uintptr_t)&env->vpa_addr;
 783     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 784     if (ret < 0) {
 785         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 786         return ret;
 787     }
 788
 789     assert((uintptr_t)&env->slb_shadow_size
 790            == ((uintptr_t)&env->slb_shadow_addr + 8));
 791     reg.id = KVM_REG_PPC_VPA_SLB;
 792     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 793     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 794     if (ret < 0) {
 795         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 796                 strerror(errno));
 797         return ret;
 798     }
 799
 800     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 801     reg.id = KVM_REG_PPC_VPA_DTL;
 802     reg.addr = (uintptr_t)&env->dtl_addr;
 803     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 804     if (ret < 0) {
 805         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 806                 strerror(errno));
 807         return ret;
 808     }
 809
 810     return 0;
 811 }
 812
 813 static int kvm_put_vpa(CPUState *cs)
 814 {
 815     PowerPCCPU *cpu = POWERPC_CPU(cs);
 816     CPUPPCState *env = &cpu->env;
 817     struct kvm_one_reg reg;
 818     int ret;
 819
 820     /* SLB shadow or DTL can't be registered unless a master VPA is
 821      * registered.  That means when restoring state, if a VPA *is*
 822      * registered, we need to set that up first.  If not, we need to
 823      * deregister the others before deregistering the master VPA */
 824     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 825
 826     if (env->vpa_addr) {
 827         reg.id = KVM_REG_PPC_VPA_ADDR;
 828         reg.addr = (uintptr_t)&env->vpa_addr;
 829         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 830         if (ret < 0) {
 831             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 832             return ret;
 833         }
 834     }
 835
 836     assert((uintptr_t)&env->slb_shadow_size
 837            == ((uintptr_t)&env->slb_shadow_addr + 8));
 838     reg.id = KVM_REG_PPC_VPA_SLB;
 839     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 840     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 841     if (ret < 0) {
 842         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 843         return ret;
 844     }
 845
 846     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 847     reg.id = KVM_REG_PPC_VPA_DTL;
 848     reg.addr = (uintptr_t)&env->dtl_addr;
 849     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 850     if (ret < 0) {
 851         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 852                 strerror(errno));
 853         return ret;
 854     }
 855
 856     if (!env->vpa_addr) {
 857         reg.id = KVM_REG_PPC_VPA_ADDR;
 858         reg.addr = (uintptr_t)&env->vpa_addr;
 859         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 860         if (ret < 0) {
 861             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 862             return ret;
 863         }
 864     }
 865
 866     return 0;
 867 }
 868 #endif /* TARGET_PPC64 */
 869
 870 int kvm_arch_put_registers(CPUState *cs, int level)
 871 {
 872     PowerPCCPU *cpu = POWERPC_CPU(cs);
 873     CPUPPCState *env = &cpu->env;
 874     struct kvm_regs regs;
 875     int ret;
 876     int i;
 877
 878     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 879     if (ret < 0) {
 880         return ret;
 881     }
 882
 883     regs.ctr = env->ctr;
 884     regs.lr  = env->lr;
 885     regs.xer = cpu_read_xer(env);
 886     regs.msr = env->msr;
 887     regs.pc = env->nip;
 888
 889     regs.srr0 = env->spr[SPR_SRR0];
 890     regs.srr1 = env->spr[SPR_SRR1];
 891
 892     regs.sprg0 = env->spr[SPR_SPRG0];
 893     regs.sprg1 = env->spr[SPR_SPRG1];
 894     regs.sprg2 = env->spr[SPR_SPRG2];
 895     regs.sprg3 = env->spr[SPR_SPRG3];
 896     regs.sprg4 = env->spr[SPR_SPRG4];
 897     regs.sprg5 = env->spr[SPR_SPRG5];
 898     regs.sprg6 = env->spr[SPR_SPRG6];
 899     regs.sprg7 = env->spr[SPR_SPRG7];
 900
 901     regs.pid = env->spr[SPR_BOOKE_PID];
 902
 903     for (i = 0;i < 32; i++)
 904         regs.gpr[i] = env->gpr[i];
 905
 906     regs.cr = 0;
 907     for (i = 0; i < 8; i++) {
 908         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 909     }
 910
 911     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 912     if (ret < 0)
 913         return ret;
 914
 915     kvm_put_fp(cs);
 916
 917     if (env->tlb_dirty) {
 918         kvm_sw_tlb_put(cpu);
 919         env->tlb_dirty = false;
 920     }
 921
 922     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 923         struct kvm_sregs sregs;
 924
 925         sregs.pvr = env->spr[SPR_PVR];
 926
 927         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 928
 929         /* Sync SLB */
 930 #ifdef TARGET_PPC64
 931         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 932             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 933             if (env->slb[i].esid & SLB_ESID_V) {
 934                 sregs.u.s.ppc64.slb[i].slbe |= i;
 935             }
 936             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 937         }
 938 #endif
 939
 940         /* Sync SRs */
 941         for (i = 0; i < 16; i++) {
 942             sregs.u.s.ppc32.sr[i] = env->sr[i];
 943         }
 944
 945         /* Sync BATs */
 946         for (i = 0; i < 8; i++) {
 947             /* Beware. We have to swap upper and lower bits here */
 948             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 949                 | env->DBAT[1][i];
 950             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 951                 | env->IBAT[1][i];
 952         }
 953
 954         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 955         if (ret) {
 956             return ret;
 957         }
 958     }
 959
 960     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 961         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 962     }
 963
 964     if (cap_one_reg) {
 965         int i;
 966
 967         /* We deliberately ignore errors here, for kernels which have
 968          * the ONE_REG calls, but don't support the specific
 969          * registers, there's a reasonable chance things will still
 970          * work, at least until we try to migrate. */
 971         for (i = 0; i < 1024; i++) {
 972             uint64_t id = env->spr_cb[i].one_reg_id;
 973
 974             if (id != 0) {
 975                 kvm_put_one_spr(cs, id, i);
 976             }
 977         }
 978
 979 #ifdef TARGET_PPC64
 980         if (msr_ts) {
 981             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 982                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 983             }
 984             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 985                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 986             }
 987             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 988             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 989             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 990             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 991             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 992             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 993             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 994             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 995             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 996             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 997         }
 998
 999         if (cap_papr) {
1000             if (kvm_put_vpa(cs) < 0) {
1001                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1002             }
1003         }
1004
1005         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1006 #endif /* TARGET_PPC64 */
1007     }
1008
1009     return ret;
1010 }
1011
1012 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1013 {
1014      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1015 }
1016
1017 int kvm_arch_get_registers(CPUState *cs)
1018 {
1019     PowerPCCPU *cpu = POWERPC_CPU(cs);
1020     CPUPPCState *env = &cpu->env;
1021     struct kvm_regs regs;
1022     struct kvm_sregs sregs;
1023     uint32_t cr;
1024     int i, ret;
1025
1026     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1027     if (ret < 0)
1028         return ret;
1029
1030     cr = regs.cr;
1031     for (i = 7; i >= 0; i--) {
1032         env->crf[i] = cr & 15;
1033         cr >>= 4;
1034     }
1035
1036     env->ctr = regs.ctr;
1037     env->lr = regs.lr;
1038     cpu_write_xer(env, regs.xer);
1039     env->msr = regs.msr;
1040     env->nip = regs.pc;
1041
1042     env->spr[SPR_SRR0] = regs.srr0;
1043     env->spr[SPR_SRR1] = regs.srr1;
1044
1045     env->spr[SPR_SPRG0] = regs.sprg0;
1046     env->spr[SPR_SPRG1] = regs.sprg1;
1047     env->spr[SPR_SPRG2] = regs.sprg2;
1048     env->spr[SPR_SPRG3] = regs.sprg3;
1049     env->spr[SPR_SPRG4] = regs.sprg4;
1050     env->spr[SPR_SPRG5] = regs.sprg5;
1051     env->spr[SPR_SPRG6] = regs.sprg6;
1052     env->spr[SPR_SPRG7] = regs.sprg7;
1053
1054     env->spr[SPR_BOOKE_PID] = regs.pid;
1055
1056     for (i = 0;i < 32; i++)
1057         env->gpr[i] = regs.gpr[i];
1058
1059     kvm_get_fp(cs);
1060
1061     if (cap_booke_sregs) {
1062         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1063         if (ret < 0) {
1064             return ret;
1065         }
1066
1067         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1068             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1069             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1070             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1071             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1072             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1073             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1074             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1075             env->spr[SPR_DECR] = sregs.u.e.dec;
1076             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1077             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1078             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1079         }
1080
1081         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1082             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1083             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1084             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1085             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1086             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1087         }
1088
1089         if (sregs.u.e.features & KVM_SREGS_E_64) {
1090             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1091         }
1092
1093         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1094             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1095         }
1096
1097         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1098             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1099             kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1100             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1101             kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1102             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1103             kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1104             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1105             kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1106             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1107             kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1108             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1109             kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1110             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1111             kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1112             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1113             kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1114             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1115             kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1116             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1117             kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1118             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1119             kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1120             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1121             kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1122             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1123             kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1124             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1125             kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1126             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1127             kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1128             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1129             kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1130
1131             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1132                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1133                 kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1134                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1135                 kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1136                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1137                 kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1138             }
1139
1140             if (sregs.u.e.features & KVM_SREGS_E_PM) {
1141                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1142                 kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1143             }
1144
1145             if (sregs.u.e.features & KVM_SREGS_E_PC) {
1146                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1147                 kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1148                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1149                 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1150             }
1151         }
1152
1153         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1154             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1155             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1156             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1157             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1158             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1159             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1160             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1161             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1162             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1163             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1164         }
1165
1166         if (sregs.u.e.features & KVM_SREGS_EXP) {
1167             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1168         }
1169
1170         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1171             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1172             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1173         }
1174
1175         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1176             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1177             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1178             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1179
1180             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1181                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1182                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1183             }
1184         }
1185     }
1186
1187     if (cap_segstate) {
1188         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1189         if (ret < 0) {
1190             return ret;
1191         }
1192
1193         if (!env->external_htab) {
1194             ppc_store_sdr1(env, sregs.u.s.sdr1);
1195         }
1196
1197         /* Sync SLB */
1198 #ifdef TARGET_PPC64
1199         /*
1200          * The packed SLB array we get from KVM_GET_SREGS only contains
1201          * information about valid entries. So we flush our internal
1202          * copy to get rid of stale ones, then put all valid SLB entries
1203          * back in.
1204          */
1205         memset(env->slb, 0, sizeof(env->slb));
1206         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1207             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1208             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1209             /*
1210              * Only restore valid entries
1211              */
1212             if (rb & SLB_ESID_V) {
1213                 ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1214             }
1215         }
1216 #endif
1217
1218         /* Sync SRs */
1219         for (i = 0; i < 16; i++) {
1220             env->sr[i] = sregs.u.s.ppc32.sr[i];
1221         }
1222
1223         /* Sync BATs */
1224         for (i = 0; i < 8; i++) {
1225             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1226             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1227             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1228             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1229         }
1230     }
1231
1232     if (cap_hior) {
1233         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1234     }
1235
1236     if (cap_one_reg) {
1237         int i;
1238
1239         /* We deliberately ignore errors here, for kernels which have
1240          * the ONE_REG calls, but don't support the specific
1241          * registers, there's a reasonable chance things will still
1242          * work, at least until we try to migrate. */
1243         for (i = 0; i < 1024; i++) {
1244             uint64_t id = env->spr_cb[i].one_reg_id;
1245
1246             if (id != 0) {
1247                 kvm_get_one_spr(cs, id, i);
1248             }
1249         }
1250
1251 #ifdef TARGET_PPC64
1252         if (msr_ts) {
1253             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1254                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1255             }
1256             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1257                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1258             }
1259             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1260             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1261             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1262             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1263             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1264             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1265             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1266             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1267             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1268             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1269         }
1270
1271         if (cap_papr) {
1272             if (kvm_get_vpa(cs) < 0) {
1273                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1274             }
1275         }
1276
1277         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1278 #endif
1279     }
1280
1281     return 0;
1282 }
1283
1284 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1285 {
1286     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1287
1288     if (irq != PPC_INTERRUPT_EXT) {
1289         return 0;
1290     }
1291
1292     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1293         return 0;
1294     }
1295
1296     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1297
1298     return 0;
1299 }
1300
1301 #if defined(TARGET_PPCEMB)
1302 #define PPC_INPUT_INT PPC40x_INPUT_INT
1303 #elif defined(TARGET_PPC64)
1304 #define PPC_INPUT_INT PPC970_INPUT_INT
1305 #else
1306 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1307 #endif
1308
1309 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1310 {
1311     PowerPCCPU *cpu = POWERPC_CPU(cs);
1312     CPUPPCState *env = &cpu->env;
1313     int r;
1314     unsigned irq;
1315
1316     qemu_mutex_lock_iothread();
1317
1318     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1319      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1320     if (!cap_interrupt_level &&
1321         run->ready_for_interrupt_injection &&
1322         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1323         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1324     {
1325         /* For now KVM disregards the 'irq' argument. However, in the
1326          * future KVM could cache it in-kernel to avoid a heavyweight exit
1327          * when reading the UIC.
1328          */
1329         irq = KVM_INTERRUPT_SET;
1330
1331         DPRINTF("injected interrupt %d\n", irq);
1332         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1333         if (r < 0) {
1334             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1335         }
1336
1337         /* Always wake up soon in case the interrupt was level based */
1338         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1339                        (get_ticks_per_sec() / 50));
1340     }
1341
1342     /* We don't know if there are more interrupts pending after this. However,
1343      * the guest will return to userspace in the course of handling this one
1344      * anyways, so we will get a chance to deliver the rest. */
1345
1346     qemu_mutex_unlock_iothread();
1347 }
1348
1349 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1350 {
1351     return MEMTXATTRS_UNSPECIFIED;
1352 }
1353
1354 int kvm_arch_process_async_events(CPUState *cs)
1355 {
1356     return cs->halted;
1357 }
1358
1359 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1360 {
1361     CPUState *cs = CPU(cpu);
1362     CPUPPCState *env = &cpu->env;
1363
1364     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1365         cs->halted = 1;
1366         cs->exception_index = EXCP_HLT;
1367     }
1368
1369     return 0;
1370 }
1371
1372 /* map dcr access to existing qemu dcr emulation */
1373 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1374 {
1375     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1376         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1377
1378     return 0;
1379 }
1380
1381 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1382 {
1383     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1384         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1385
1386     return 0;
1387 }
1388
1389 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1390 {
1391     /* Mixed endian case is not handled */
1392     uint32_t sc = debug_inst_opcode;
1393
1394     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1395                             sizeof(sc), 0) ||
1396         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1397         return -EINVAL;
1398     }
1399
1400     return 0;
1401 }
1402
1403 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1404 {
1405     uint32_t sc;
1406
1407     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1408         sc != debug_inst_opcode ||
1409         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1410                             sizeof(sc), 1)) {
1411         return -EINVAL;
1412     }
1413
1414     return 0;
1415 }
1416
1417 static int find_hw_breakpoint(target_ulong addr, int type)
1418 {
1419     int n;
1420
1421     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1422            <= ARRAY_SIZE(hw_debug_points));
1423
1424     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1425         if (hw_debug_points[n].addr == addr &&
1426              hw_debug_points[n].type == type) {
1427             return n;
1428         }
1429     }
1430
1431     return -1;
1432 }
1433
1434 static int find_hw_watchpoint(target_ulong addr, int *flag)
1435 {
1436     int n;
1437
1438     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1439     if (n >= 0) {
1440         *flag = BP_MEM_ACCESS;
1441         return n;
1442     }
1443
1444     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1445     if (n >= 0) {
1446         *flag = BP_MEM_WRITE;
1447         return n;
1448     }
1449
1450     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1451     if (n >= 0) {
1452         *flag = BP_MEM_READ;
1453         return n;
1454     }
1455
1456     return -1;
1457 }
1458
1459 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1460                                   target_ulong len, int type)
1461 {
1462     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1463         return -ENOBUFS;
1464     }
1465
1466     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1467     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1468
1469     switch (type) {
1470     case GDB_BREAKPOINT_HW:
1471         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1472             return -ENOBUFS;
1473         }
1474
1475         if (find_hw_breakpoint(addr, type) >= 0) {
1476             return -EEXIST;
1477         }
1478
1479         nb_hw_breakpoint++;
1480         break;
1481
1482     case GDB_WATCHPOINT_WRITE:
1483     case GDB_WATCHPOINT_READ:
1484     case GDB_WATCHPOINT_ACCESS:
1485         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1486             return -ENOBUFS;
1487         }
1488
1489         if (find_hw_breakpoint(addr, type) >= 0) {
1490             return -EEXIST;
1491         }
1492
1493         nb_hw_watchpoint++;
1494         break;
1495
1496     default:
1497         return -ENOSYS;
1498     }
1499
1500     return 0;
1501 }
1502
1503 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1504                                   target_ulong len, int type)
1505 {
1506     int n;
1507
1508     n = find_hw_breakpoint(addr, type);
1509     if (n < 0) {
1510         return -ENOENT;
1511     }
1512
1513     switch (type) {
1514     case GDB_BREAKPOINT_HW:
1515         nb_hw_breakpoint--;
1516         break;
1517
1518     case GDB_WATCHPOINT_WRITE:
1519     case GDB_WATCHPOINT_READ:
1520     case GDB_WATCHPOINT_ACCESS:
1521         nb_hw_watchpoint--;
1522         break;
1523
1524     default:
1525         return -ENOSYS;
1526     }
1527     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1528
1529     return 0;
1530 }
1531
1532 void kvm_arch_remove_all_hw_breakpoints(void)
1533 {
1534     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1535 }
1536
1537 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1538 {
1539     int n;
1540
1541     /* Software Breakpoint updates */
1542     if (kvm_sw_breakpoints_active(cs)) {
1543         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1544     }
1545
1546     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1547            <= ARRAY_SIZE(hw_debug_points));
1548     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1549
1550     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1551         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1552         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1553         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1554             switch (hw_debug_points[n].type) {
1555             case GDB_BREAKPOINT_HW:
1556                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1557                 break;
1558             case GDB_WATCHPOINT_WRITE:
1559                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1560                 break;
1561             case GDB_WATCHPOINT_READ:
1562                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1563                 break;
1564             case GDB_WATCHPOINT_ACCESS:
1565                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1566                                         KVMPPC_DEBUG_WATCH_READ;
1567                 break;
1568             default:
1569                 cpu_abort(cs, "Unsupported breakpoint type\n");
1570             }
1571             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1572         }
1573     }
1574 }
1575
1576 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1577 {
1578     CPUState *cs = CPU(cpu);
1579     CPUPPCState *env = &cpu->env;
1580     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1581     int handle = 0;
1582     int n;
1583     int flag = 0;
1584
1585     if (cs->singlestep_enabled) {
1586         handle = 1;
1587     } else if (arch_info->status) {
1588         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1589             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1590                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1591                 if (n >= 0) {
1592                     handle = 1;
1593                 }
1594             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1595                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1596                 n = find_hw_watchpoint(arch_info->address,  &flag);
1597                 if (n >= 0) {
1598                     handle = 1;
1599                     cs->watchpoint_hit = &hw_watchpoint;
1600                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1601                     hw_watchpoint.flags = flag;
1602                 }
1603             }
1604         }
1605     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1606         handle = 1;
1607     } else {
1608         /* QEMU is not able to handle debug exception, so inject
1609          * program exception to guest;
1610          * Yes program exception NOT debug exception !!
1611          * When QEMU is using debug resources then debug exception must
1612          * be always set. To achieve this we set MSR_DE and also set
1613          * MSRP_DEP so guest cannot change MSR_DE.
1614          * When emulating debug resource for guest we want guest
1615          * to control MSR_DE (enable/disable debug interrupt on need).
1616          * Supporting both configurations are NOT possible.
1617          * So the result is that we cannot share debug resources
1618          * between QEMU and Guest on BOOKE architecture.
1619          * In the current design QEMU gets the priority over guest,
1620          * this means that if QEMU is using debug resources then guest
1621          * cannot use them;
1622          * For software breakpoint QEMU uses a privileged instruction;
1623          * So there cannot be any reason that we are here for guest
1624          * set debug exception, only possibility is guest executed a
1625          * privileged / illegal instruction and that's why we are
1626          * injecting a program interrupt.
1627          */
1628
1629         cpu_synchronize_state(cs);
1630         /* env->nip is PC, so increment this by 4 to use
1631          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1632          */
1633         env->nip += 4;
1634         cs->exception_index = POWERPC_EXCP_PROGRAM;
1635         env->error_code = POWERPC_EXCP_INVAL;
1636         ppc_cpu_do_interrupt(cs);
1637     }
1638
1639     return handle;
1640 }
1641
1642 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1643 {
1644     PowerPCCPU *cpu = POWERPC_CPU(cs);
1645     CPUPPCState *env = &cpu->env;
1646     int ret;
1647
1648     qemu_mutex_lock_iothread();
1649
1650     switch (run->exit_reason) {
1651     case KVM_EXIT_DCR:
1652         if (run->dcr.is_write) {
1653             DPRINTF("handle dcr write\n");
1654             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1655         } else {
1656             DPRINTF("handle dcr read\n");
1657             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1658         }
1659         break;
1660     case KVM_EXIT_HLT:
1661         DPRINTF("handle halt\n");
1662         ret = kvmppc_handle_halt(cpu);
1663         break;
1664 #if defined(TARGET_PPC64)
1665     case KVM_EXIT_PAPR_HCALL:
1666         DPRINTF("handle PAPR hypercall\n");
1667         run->papr_hcall.ret = spapr_hypercall(cpu,
1668                                               run->papr_hcall.nr,
1669                                               run->papr_hcall.args);
1670         ret = 0;
1671         break;
1672 #endif
1673     case KVM_EXIT_EPR:
1674         DPRINTF("handle epr\n");
1675         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1676         ret = 0;
1677         break;
1678     case KVM_EXIT_WATCHDOG:
1679         DPRINTF("handle watchdog expiry\n");
1680         watchdog_perform_action();
1681         ret = 0;
1682         break;
1683
1684     case KVM_EXIT_DEBUG:
1685         DPRINTF("handle debug exception\n");
1686         if (kvm_handle_debug(cpu, run)) {
1687             ret = EXCP_DEBUG;
1688             break;
1689         }
1690         /* re-enter, this exception was guest-internal */
1691         ret = 0;
1692         break;
1693
1694     default:
1695         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1696         ret = -1;
1697         break;
1698     }
1699
1700     qemu_mutex_unlock_iothread();
1701     return ret;
1702 }
1703
1704 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1705 {
1706     CPUState *cs = CPU(cpu);
1707     uint32_t bits = tsr_bits;
1708     struct kvm_one_reg reg = {
1709         .id = KVM_REG_PPC_OR_TSR,
1710         .addr = (uintptr_t) &bits,
1711     };
1712
1713     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1714 }
1715
1716 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1717 {
1718
1719     CPUState *cs = CPU(cpu);
1720     uint32_t bits = tsr_bits;
1721     struct kvm_one_reg reg = {
1722         .id = KVM_REG_PPC_CLEAR_TSR,
1723         .addr = (uintptr_t) &bits,
1724     };
1725
1726     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1727 }
1728
1729 int kvmppc_set_tcr(PowerPCCPU *cpu)
1730 {
1731     CPUState *cs = CPU(cpu);
1732     CPUPPCState *env = &cpu->env;
1733     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1734
1735     struct kvm_one_reg reg = {
1736         .id = KVM_REG_PPC_TCR,
1737         .addr = (uintptr_t) &tcr,
1738     };
1739
1740     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1741 }
1742
1743 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1744 {
1745     CPUState *cs = CPU(cpu);
1746     int ret;
1747
1748     if (!kvm_enabled()) {
1749         return -1;
1750     }
1751
1752     if (!cap_ppc_watchdog) {
1753         printf("warning: KVM does not support watchdog");
1754         return -1;
1755     }
1756
1757     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1758     if (ret < 0) {
1759         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1760                 __func__, strerror(-ret));
1761         return ret;
1762     }
1763
1764     return ret;
1765 }
1766
1767 static int read_cpuinfo(const char *field, char *value, int len)
1768 {
1769     FILE *f;
1770     int ret = -1;
1771     int field_len = strlen(field);
1772     char line[512];
1773
1774     f = fopen("/proc/cpuinfo", "r");
1775     if (!f) {
1776         return -1;
1777     }
1778
1779     do {
1780         if (!fgets(line, sizeof(line), f)) {
1781             break;
1782         }
1783         if (!strncmp(line, field, field_len)) {
1784             pstrcpy(value, len, line);
1785             ret = 0;
1786             break;
1787         }
1788     } while(*line);
1789
1790     fclose(f);
1791
1792     return ret;
1793 }
1794
1795 uint32_t kvmppc_get_tbfreq(void)
1796 {
1797     char line[512];
1798     char *ns;
1799     uint32_t retval = get_ticks_per_sec();
1800
1801     if (read_cpuinfo("timebase", line, sizeof(line))) {
1802         return retval;
1803     }
1804
1805     if (!(ns = strchr(line, ':'))) {
1806         return retval;
1807     }
1808
1809     ns++;
1810
1811     return atoi(ns);
1812 }
1813
1814 bool kvmppc_get_host_serial(char **value)
1815 {
1816     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1817                                NULL);
1818 }
1819
1820 bool kvmppc_get_host_model(char **value)
1821 {
1822     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1823 }
1824
1825 /* Try to find a device tree node for a CPU with clock-frequency property */
1826 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1827 {
1828     struct dirent *dirp;
1829     DIR *dp;
1830
1831     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1832         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1833         return -1;
1834     }
1835
1836     buf[0] = '\0';
1837     while ((dirp = readdir(dp)) != NULL) {
1838         FILE *f;
1839         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1840                  dirp->d_name);
1841         f = fopen(buf, "r");
1842         if (f) {
1843             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1844             fclose(f);
1845             break;
1846         }
1847         buf[0] = '\0';
1848     }
1849     closedir(dp);
1850     if (buf[0] == '\0') {
1851         printf("Unknown host!\n");
1852         return -1;
1853     }
1854
1855     return 0;
1856 }
1857
1858 static uint64_t kvmppc_read_int_dt(const char *filename)
1859 {
1860     union {
1861         uint32_t v32;
1862         uint64_t v64;
1863     } u;
1864     FILE *f;
1865     int len;
1866
1867     f = fopen(filename, "rb");
1868     if (!f) {
1869         return -1;
1870     }
1871
1872     len = fread(&u, 1, sizeof(u), f);
1873     fclose(f);
1874     switch (len) {
1875     case 4:
1876         /* property is a 32-bit quantity */
1877         return be32_to_cpu(u.v32);
1878     case 8:
1879         return be64_to_cpu(u.v64);
1880     }
1881
1882     return 0;
1883 }
1884
1885 /* Read a CPU node property from the host device tree that's a single
1886  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1887  * (can't find or open the property, or doesn't understand the
1888  * format) */
1889 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1890 {
1891     char buf[PATH_MAX], *tmp;
1892     uint64_t val;
1893
1894     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1895         return -1;
1896     }
1897
1898     tmp = g_strdup_printf("%s/%s", buf, propname);
1899     val = kvmppc_read_int_dt(tmp);
1900     g_free(tmp);
1901
1902     return val;
1903 }
1904
1905 uint64_t kvmppc_get_clockfreq(void)
1906 {
1907     return kvmppc_read_int_cpu_dt("clock-frequency");
1908 }
1909
1910 uint32_t kvmppc_get_vmx(void)
1911 {
1912     return kvmppc_read_int_cpu_dt("ibm,vmx");
1913 }
1914
1915 uint32_t kvmppc_get_dfp(void)
1916 {
1917     return kvmppc_read_int_cpu_dt("ibm,dfp");
1918 }
1919
1920 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1921  {
1922      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1923      CPUState *cs = CPU(cpu);
1924
1925     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1926         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1927         return 0;
1928     }
1929
1930     return 1;
1931 }
1932
1933 int kvmppc_get_hasidle(CPUPPCState *env)
1934 {
1935     struct kvm_ppc_pvinfo pvinfo;
1936
1937     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1938         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1939         return 1;
1940     }
1941
1942     return 0;
1943 }
1944
1945 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1946 {
1947     uint32_t *hc = (uint32_t*)buf;
1948     struct kvm_ppc_pvinfo pvinfo;
1949
1950     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1951         memcpy(buf, pvinfo.hcall, buf_len);
1952         return 0;
1953     }
1954
1955     /*
1956      * Fallback to always fail hypercalls regardless of endianness:
1957      *
1958      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1959      *     li r3, -1
1960      *     b .+8       (becomes nop in wrong endian)
1961      *     bswap32(li r3, -1)
1962      */
1963
1964     hc[0] = cpu_to_be32(0x08000048);
1965     hc[1] = cpu_to_be32(0x3860ffff);
1966     hc[2] = cpu_to_be32(0x48000008);
1967     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1968
1969     return 0;
1970 }
1971
1972 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1973 {
1974     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1975 }
1976
1977 void kvmppc_enable_logical_ci_hcalls(void)
1978 {
1979     /*
1980      * FIXME: it would be nice if we could detect the cases where
1981      * we're using a device which requires the in kernel
1982      * implementation of these hcalls, but the kernel lacks them and
1983      * produce a warning.
1984      */
1985     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1986     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1987 }
1988
1989 void kvmppc_enable_set_mode_hcall(void)
1990 {
1991     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
1992 }
1993
1994 void kvmppc_set_papr(PowerPCCPU *cpu)
1995 {
1996     CPUState *cs = CPU(cpu);
1997     int ret;
1998
1999     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2000     if (ret) {
2001         error_report("This vCPU type or KVM version does not support PAPR");
2002         exit(1);
2003     }
2004
2005     /* Update the capability flag so we sync the right information
2006      * with kvm */
2007     cap_papr = 1;
2008 }
2009
2010 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2011 {
2012     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2013 }
2014
2015 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2016 {
2017     CPUState *cs = CPU(cpu);
2018     int ret;
2019
2020     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2021     if (ret && mpic_proxy) {
2022         error_report("This KVM version does not support EPR");
2023         exit(1);
2024     }
2025 }
2026
2027 int kvmppc_smt_threads(void)
2028 {
2029     return cap_ppc_smt ? cap_ppc_smt : 1;
2030 }
2031
2032 #ifdef TARGET_PPC64
2033 off_t kvmppc_alloc_rma(void **rma)
2034 {
2035     off_t size;
2036     int fd;
2037     struct kvm_allocate_rma ret;
2038
2039     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2040      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2041      *                      not necessary on this hardware
2042      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2043      *
2044      * FIXME: We should allow the user to force contiguous RMA
2045      * allocation in the cap_ppc_rma==1 case.
2046      */
2047     if (cap_ppc_rma < 2) {
2048         return 0;
2049     }
2050
2051     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2052     if (fd < 0) {
2053         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2054                 strerror(errno));
2055         return -1;
2056     }
2057
2058     size = MIN(ret.rma_size, 256ul << 20);
2059
2060     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2061     if (*rma == MAP_FAILED) {
2062         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2063         return -1;
2064     };
2065
2066     return size;
2067 }
2068
2069 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2070 {
2071     struct kvm_ppc_smmu_info info;
2072     long rampagesize, best_page_shift;
2073     int i;
2074
2075     if (cap_ppc_rma >= 2) {
2076         return current_size;
2077     }
2078
2079     /* Find the largest hardware supported page size that's less than
2080      * or equal to the (logical) backing page size of guest RAM */
2081     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2082     rampagesize = getrampagesize();
2083     best_page_shift = 0;
2084
2085     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2086         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2087
2088         if (!sps->page_shift) {
2089             continue;
2090         }
2091
2092         if ((sps->page_shift > best_page_shift)
2093             && ((1UL << sps->page_shift) <= rampagesize)) {
2094             best_page_shift = sps->page_shift;
2095         }
2096     }
2097
2098     return MIN(current_size,
2099                1ULL << (best_page_shift + hash_shift - 7));
2100 }
2101 #endif
2102
2103 bool kvmppc_spapr_use_multitce(void)
2104 {
2105     return cap_spapr_multitce;
2106 }
2107
2108 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2109                               bool need_vfio)
2110 {
2111     struct kvm_create_spapr_tce args = {
2112         .liobn = liobn,
2113         .window_size = window_size,
2114     };
2115     long len;
2116     int fd;
2117     void *table;
2118
2119     /* Must set fd to -1 so we don't try to munmap when called for
2120      * destroying the table, which the upper layers -will- do
2121      */
2122     *pfd = -1;
2123     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2124         return NULL;
2125     }
2126
2127     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2128     if (fd < 0) {
2129         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2130                 liobn);
2131         return NULL;
2132     }
2133
2134     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2135     /* FIXME: round this up to page size */
2136
2137     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2138     if (table == MAP_FAILED) {
2139         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2140                 liobn);
2141         close(fd);
2142         return NULL;
2143     }
2144
2145     *pfd = fd;
2146     return table;
2147 }
2148
2149 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2150 {
2151     long len;
2152
2153     if (fd < 0) {
2154         return -1;
2155     }
2156
2157     len = nb_table * sizeof(uint64_t);
2158     if ((munmap(table, len) < 0) ||
2159         (close(fd) < 0)) {
2160         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2161                 strerror(errno));
2162         /* Leak the table */
2163     }
2164
2165     return 0;
2166 }
2167
2168 int kvmppc_reset_htab(int shift_hint)
2169 {
2170     uint32_t shift = shift_hint;
2171
2172     if (!kvm_enabled()) {
2173         /* Full emulation, tell caller to allocate htab itself */
2174         return 0;
2175     }
2176     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2177         int ret;
2178         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2179         if (ret == -ENOTTY) {
2180             /* At least some versions of PR KVM advertise the
2181              * capability, but don't implement the ioctl().  Oops.
2182              * Return 0 so that we allocate the htab in qemu, as is
2183              * correct for PR. */
2184             return 0;
2185         } else if (ret < 0) {
2186             return ret;
2187         }
2188         return shift;
2189     }
2190
2191     /* We have a kernel that predates the htab reset calls.  For PR
2192      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2193      * this era, it has allocated a 16MB fixed size hash table
2194      * already.  Kernels of this era have the GET_PVINFO capability
2195      * only on PR, so we use this hack to determine the right
2196      * answer */
2197     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2198         /* PR - tell caller to allocate htab */
2199         return 0;
2200     } else {
2201         /* HV - assume 16MB kernel allocated htab */
2202         return 24;
2203     }
2204 }
2205
2206 static inline uint32_t mfpvr(void)
2207 {
2208     uint32_t pvr;
2209
2210     asm ("mfpvr %0"
2211          : "=r"(pvr));
2212     return pvr;
2213 }
2214
2215 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2216 {
2217     if (on) {
2218         *word |= flags;
2219     } else {
2220         *word &= ~flags;
2221     }
2222 }
2223
2224 static void kvmppc_host_cpu_initfn(Object *obj)
2225 {
2226     assert(kvm_enabled());
2227 }
2228
2229 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2230 {
2231     DeviceClass *dc = DEVICE_CLASS(oc);
2232     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2233     uint32_t vmx = kvmppc_get_vmx();
2234     uint32_t dfp = kvmppc_get_dfp();
2235     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2236     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2237
2238     /* Now fix up the class with information we can query from the host */
2239     pcc->pvr = mfpvr();
2240
2241     if (vmx != -1) {
2242         /* Only override when we know what the host supports */
2243         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2244         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2245     }
2246     if (dfp != -1) {
2247         /* Only override when we know what the host supports */
2248         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2249     }
2250
2251     if (dcache_size != -1) {
2252         pcc->l1_dcache_size = dcache_size;
2253     }
2254
2255     if (icache_size != -1) {
2256         pcc->l1_icache_size = icache_size;
2257     }
2258
2259     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2260     dc->cannot_destroy_with_object_finalize_yet = true;
2261 }
2262
2263 bool kvmppc_has_cap_epr(void)
2264 {
2265     return cap_epr;
2266 }
2267
2268 bool kvmppc_has_cap_htab_fd(void)
2269 {
2270     return cap_htab_fd;
2271 }
2272
2273 bool kvmppc_has_cap_fixup_hcalls(void)
2274 {
2275     return cap_fixup_hcalls;
2276 }
2277
2278 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2279 {
2280     ObjectClass *oc = OBJECT_CLASS(pcc);
2281
2282     while (oc && !object_class_is_abstract(oc)) {
2283         oc = object_class_get_parent(oc);
2284     }
2285     assert(oc);
2286
2287     return POWERPC_CPU_CLASS(oc);
2288 }
2289
2290 static int kvm_ppc_register_host_cpu_type(void)
2291 {
2292     TypeInfo type_info = {
2293         .name = TYPE_HOST_POWERPC_CPU,
2294         .instance_init = kvmppc_host_cpu_initfn,
2295         .class_init = kvmppc_host_cpu_class_init,
2296     };
2297     uint32_t host_pvr = mfpvr();
2298     PowerPCCPUClass *pvr_pcc;
2299     DeviceClass *dc;
2300
2301     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2302     if (pvr_pcc == NULL) {
2303         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2304     }
2305     if (pvr_pcc == NULL) {
2306         return -1;
2307     }
2308     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2309     type_register(&type_info);
2310
2311     /* Register generic family CPU class for a family */
2312     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2313     dc = DEVICE_CLASS(pvr_pcc);
2314     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2315     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2316     type_register(&type_info);
2317
2318     return 0;
2319 }
2320
2321 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2322 {
2323     struct kvm_rtas_token_args args = {
2324         .token = token,
2325     };
2326
2327     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2328         return -ENOENT;
2329     }
2330
2331     strncpy(args.name, function, sizeof(args.name));
2332
2333     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2334 }
2335
2336 int kvmppc_get_htab_fd(bool write)
2337 {
2338     struct kvm_get_htab_fd s = {
2339         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2340         .start_index = 0,
2341     };
2342
2343     if (!cap_htab_fd) {
2344         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2345         return -1;
2346     }
2347
2348     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2349 }
2350
2351 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2352 {
2353     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2354     uint8_t buf[bufsize];
2355     ssize_t rc;
2356
2357     do {
2358         rc = read(fd, buf, bufsize);
2359         if (rc < 0) {
2360             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2361                     strerror(errno));
2362             return rc;
2363         } else if (rc) {
2364             uint8_t *buffer = buf;
2365             ssize_t n = rc;
2366             while (n) {
2367                 struct kvm_get_htab_header *head =
2368                     (struct kvm_get_htab_header *) buffer;
2369                 size_t chunksize = sizeof(*head) +
2370                      HASH_PTE_SIZE_64 * head->n_valid;
2371
2372                 qemu_put_be32(f, head->index);
2373                 qemu_put_be16(f, head->n_valid);
2374                 qemu_put_be16(f, head->n_invalid);
2375                 qemu_put_buffer(f, (void *)(head + 1),
2376                                 HASH_PTE_SIZE_64 * head->n_valid);
2377
2378                 buffer += chunksize;
2379                 n -= chunksize;
2380             }
2381         }
2382     } while ((rc != 0)
2383              && ((max_ns < 0)
2384                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2385
2386     return (rc == 0) ? 1 : 0;
2387 }
2388
2389 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2390                            uint16_t n_valid, uint16_t n_invalid)
2391 {
2392     struct kvm_get_htab_header *buf;
2393     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2394     ssize_t rc;
2395
2396     buf = alloca(chunksize);
2397     buf->index = index;
2398     buf->n_valid = n_valid;
2399     buf->n_invalid = n_invalid;
2400
2401     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2402
2403     rc = write(fd, buf, chunksize);
2404     if (rc < 0) {
2405         fprintf(stderr, "Error writing KVM hash table: %s\n",
2406                 strerror(errno));
2407         return rc;
2408     }
2409     if (rc != chunksize) {
2410         /* We should never get a short write on a single chunk */
2411         fprintf(stderr, "Short write, restoring KVM hash table\n");
2412         return -1;
2413     }
2414     return 0;
2415 }
2416
2417 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2418 {
2419     return true;
2420 }
2421
2422 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2423 {
2424     return 1;
2425 }
2426
2427 int kvm_arch_on_sigbus(int code, void *addr)
2428 {
2429     return 1;
2430 }
2431
2432 void kvm_arch_init_irq_routing(KVMState *s)
2433 {
2434 }
2435
2436 struct kvm_get_htab_buf {
2437     struct kvm_get_htab_header header;
2438     /*
2439      * We require one extra byte for read
2440      */
2441     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2442 };
2443
2444 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2445 {
2446     int htab_fd;
2447     struct kvm_get_htab_fd ghf;
2448     struct kvm_get_htab_buf  *hpte_buf;
2449
2450     ghf.flags = 0;
2451     ghf.start_index = pte_index;
2452     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2453     if (htab_fd < 0) {
2454         goto error_out;
2455     }
2456
2457     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2458     /*
2459      * Read the hpte group
2460      */
2461     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2462         goto out_close;
2463     }
2464
2465     close(htab_fd);
2466     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2467
2468 out_close:
2469     g_free(hpte_buf);
2470     close(htab_fd);
2471 error_out:
2472     return 0;
2473 }
2474
2475 void kvmppc_hash64_free_pteg(uint64_t token)
2476 {
2477     struct kvm_get_htab_buf *htab_buf;
2478
2479     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2480                             hpte);
2481     g_free(htab_buf);
2482     return;
2483 }
2484
2485 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2486                              target_ulong pte0, target_ulong pte1)
2487 {
2488     int htab_fd;
2489     struct kvm_get_htab_fd ghf;
2490     struct kvm_get_htab_buf hpte_buf;
2491
2492     ghf.flags = 0;
2493     ghf.start_index = 0;     /* Ignored */
2494     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2495     if (htab_fd < 0) {
2496         goto error_out;
2497     }
2498
2499     hpte_buf.header.n_valid = 1;
2500     hpte_buf.header.n_invalid = 0;
2501     hpte_buf.header.index = pte_index;
2502     hpte_buf.hpte[0] = pte0;
2503     hpte_buf.hpte[1] = pte1;
2504     /*
2505      * Write the hpte entry.
2506      * CAUTION: write() has the warn_unused_result attribute. Hence we
2507      * need to check the return value, even though we do nothing.
2508      */
2509     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2510         goto out_close;
2511     }
2512
2513 out_close:
2514     close(htab_fd);
2515     return;
2516
2517 error_out:
2518     return;
2519 }
2520
2521 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2522                              uint64_t address, uint32_t data, PCIDevice *dev)
2523 {
2524     return 0;
2525 }
2526
2527 int kvm_arch_msi_data_to_gsi(uint32_t data)
2528 {
2529     return data & 0xffff;
2530 }
2531
2532 int kvmppc_enable_hwrng(void)
2533 {
2534     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2535         return -1;
2536     }
2537
2538     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2539 }