target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "sysemu/watchdog.h"
  39 #include "trace.h"
  40
  41 //#define DEBUG_KVM
  42
  43 #ifdef DEBUG_KVM
  44 #define DPRINTF(fmt, ...) \
  45     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  46 #else
  47 #define DPRINTF(fmt, ...) \
  48     do { } while (0)
  49 #endif
  50
  51 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  52
  53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  54     KVM_CAP_LAST_INFO
  55 };
  56
  57 static int cap_interrupt_unset = false;
  58 static int cap_interrupt_level = false;
  59 static int cap_segstate;
  60 static int cap_booke_sregs;
  61 static int cap_ppc_smt;
  62 static int cap_ppc_rma;
  63 static int cap_spapr_tce;
  64 static int cap_hior;
  65 static int cap_one_reg;
  66 static int cap_epr;
  67 static int cap_ppc_watchdog;
  68 static int cap_papr;
  69 static int cap_htab_fd;
  70
  71 /* XXX We have a race condition where we actually have a level triggered
  72  *     interrupt, but the infrastructure can't expose that yet, so the guest
  73  *     takes but ignores it, goes to sleep and never gets notified that there's
  74  *     still an interrupt pending.
  75  *
  76  *     As a quick workaround, let's just wake up again 20 ms after we injected
  77  *     an interrupt. That way we can assure that we're always reinjecting
  78  *     interrupts in case the guest swallowed them.
  79  */
  80 static QEMUTimer *idle_timer;
  81
  82 static void kvm_kick_cpu(void *opaque)
  83 {
  84     PowerPCCPU *cpu = opaque;
  85
  86     qemu_cpu_kick(CPU(cpu));
  87 }
  88
  89 static int kvm_ppc_register_host_cpu_type(void);
  90
  91 int kvm_arch_init(KVMState *s)
  92 {
  93     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  94     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  95     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  96     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  97     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  98     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  99     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 100     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 101     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 102     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 103     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 104     /* Note: we don't set cap_papr here, because this capability is
 105      * only activated after this by kvmppc_set_papr() */
 106     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 107
 108     if (!cap_interrupt_level) {
 109         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 110                         "VM to stall at times!\n");
 111     }
 112
 113     kvm_ppc_register_host_cpu_type();
 114
 115     return 0;
 116 }
 117
 118 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 119 {
 120     CPUPPCState *cenv = &cpu->env;
 121     CPUState *cs = CPU(cpu);
 122     struct kvm_sregs sregs;
 123     int ret;
 124
 125     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 126         /* What we're really trying to say is "if we're on BookE, we use
 127            the native PVR for now". This is the only sane way to check
 128            it though, so we potentially confuse users that they can run
 129            BookE guests on BookS. Let's hope nobody dares enough :) */
 130         return 0;
 131     } else {
 132         if (!cap_segstate) {
 133             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 134             return -ENOSYS;
 135         }
 136     }
 137
 138     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 139     if (ret) {
 140         return ret;
 141     }
 142
 143     sregs.pvr = cenv->spr[SPR_PVR];
 144     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 145 }
 146
 147 /* Set up a shared TLB array with KVM */
 148 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 149 {
 150     CPUPPCState *env = &cpu->env;
 151     CPUState *cs = CPU(cpu);
 152     struct kvm_book3e_206_tlb_params params = {};
 153     struct kvm_config_tlb cfg = {};
 154     unsigned int entries = 0;
 155     int ret, i;
 156
 157     if (!kvm_enabled() ||
 158         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 159         return 0;
 160     }
 161
 162     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 163
 164     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 165         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 166         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 167         entries += params.tlb_sizes[i];
 168     }
 169
 170     assert(entries == env->nb_tlb);
 171     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 172
 173     env->tlb_dirty = true;
 174
 175     cfg.array = (uintptr_t)env->tlb.tlbm;
 176     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 177     cfg.params = (uintptr_t)&params;
 178     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 179
 180     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 181     if (ret < 0) {
 182         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 183                 __func__, strerror(-ret));
 184         return ret;
 185     }
 186
 187     env->kvm_sw_tlb = true;
 188     return 0;
 189 }
 190
 191
 192 #if defined(TARGET_PPC64)
 193 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 194                                        struct kvm_ppc_smmu_info *info)
 195 {
 196     CPUPPCState *env = &cpu->env;
 197     CPUState *cs = CPU(cpu);
 198
 199     memset(info, 0, sizeof(*info));
 200
 201     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 202      * need to "guess" what the supported page sizes are.
 203      *
 204      * For that to work we make a few assumptions:
 205      *
 206      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 207      *   KVM which only supports 4K and 16M pages, but supports them
 208      *   regardless of the backing store characteritics. We also don't
 209      *   support 1T segments.
 210      *
 211      *   This is safe as if HV KVM ever supports that capability or PR
 212      *   KVM grows supports for more page/segment sizes, those versions
 213      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 214      *   will not hit this fallback
 215      *
 216      * - Else we are running HV KVM. This means we only support page
 217      *   sizes that fit in the backing store. Additionally we only
 218      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 219      *   P7 encodings for the SLB and hash table. Here too, we assume
 220      *   support for any newer processor will mean a kernel that
 221      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 222      *   this fallback.
 223      */
 224     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 225         /* No flags */
 226         info->flags = 0;
 227         info->slb_size = 64;
 228
 229         /* Standard 4k base page size segment */
 230         info->sps[0].page_shift = 12;
 231         info->sps[0].slb_enc = 0;
 232         info->sps[0].enc[0].page_shift = 12;
 233         info->sps[0].enc[0].pte_enc = 0;
 234
 235         /* Standard 16M large page size segment */
 236         info->sps[1].page_shift = 24;
 237         info->sps[1].slb_enc = SLB_VSID_L;
 238         info->sps[1].enc[0].page_shift = 24;
 239         info->sps[1].enc[0].pte_enc = 0;
 240     } else {
 241         int i = 0;
 242
 243         /* HV KVM has backing store size restrictions */
 244         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 245
 246         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 247             info->flags |= KVM_PPC_1T_SEGMENTS;
 248         }
 249
 250         if (env->mmu_model == POWERPC_MMU_2_06) {
 251             info->slb_size = 32;
 252         } else {
 253             info->slb_size = 64;
 254         }
 255
 256         /* Standard 4k base page size segment */
 257         info->sps[i].page_shift = 12;
 258         info->sps[i].slb_enc = 0;
 259         info->sps[i].enc[0].page_shift = 12;
 260         info->sps[i].enc[0].pte_enc = 0;
 261         i++;
 262
 263         /* 64K on MMU 2.06 */
 264         if (env->mmu_model == POWERPC_MMU_2_06) {
 265             info->sps[i].page_shift = 16;
 266             info->sps[i].slb_enc = 0x110;
 267             info->sps[i].enc[0].page_shift = 16;
 268             info->sps[i].enc[0].pte_enc = 1;
 269             i++;
 270         }
 271
 272         /* Standard 16M large page size segment */
 273         info->sps[i].page_shift = 24;
 274         info->sps[i].slb_enc = SLB_VSID_L;
 275         info->sps[i].enc[0].page_shift = 24;
 276         info->sps[i].enc[0].pte_enc = 0;
 277     }
 278 }
 279
 280 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 281 {
 282     CPUState *cs = CPU(cpu);
 283     int ret;
 284
 285     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 286         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 287         if (ret == 0) {
 288             return;
 289         }
 290     }
 291
 292     kvm_get_fallback_smmu_info(cpu, info);
 293 }
 294
 295 static long getrampagesize(void)
 296 {
 297     struct statfs fs;
 298     int ret;
 299
 300     if (!mem_path) {
 301         /* guest RAM is backed by normal anonymous pages */
 302         return getpagesize();
 303     }
 304
 305     do {
 306         ret = statfs(mem_path, &fs);
 307     } while (ret != 0 && errno == EINTR);
 308
 309     if (ret != 0) {
 310         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 311                 strerror(errno));
 312         exit(1);
 313     }
 314
 315 #define HUGETLBFS_MAGIC       0x958458f6
 316
 317     if (fs.f_type != HUGETLBFS_MAGIC) {
 318         /* Explicit mempath, but it's ordinary pages */
 319         return getpagesize();
 320     }
 321
 322     /* It's hugepage, return the huge page size */
 323     return fs.f_bsize;
 324 }
 325
 326 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 327 {
 328     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 329         return true;
 330     }
 331
 332     return (1ul << shift) <= rampgsize;
 333 }
 334
 335 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 336 {
 337     static struct kvm_ppc_smmu_info smmu_info;
 338     static bool has_smmu_info;
 339     CPUPPCState *env = &cpu->env;
 340     long rampagesize;
 341     int iq, ik, jq, jk;
 342
 343     /* We only handle page sizes for 64-bit server guests for now */
 344     if (!(env->mmu_model & POWERPC_MMU_64)) {
 345         return;
 346     }
 347
 348     /* Collect MMU info from kernel if not already */
 349     if (!has_smmu_info) {
 350         kvm_get_smmu_info(cpu, &smmu_info);
 351         has_smmu_info = true;
 352     }
 353
 354     rampagesize = getrampagesize();
 355
 356     /* Convert to QEMU form */
 357     memset(&env->sps, 0, sizeof(env->sps));
 358
 359     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 360         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 361         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 362
 363         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 364                                  ksps->page_shift)) {
 365             continue;
 366         }
 367         qsps->page_shift = ksps->page_shift;
 368         qsps->slb_enc = ksps->slb_enc;
 369         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 370             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 371                                      ksps->enc[jk].page_shift)) {
 372                 continue;
 373             }
 374             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 375             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 376             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 377                 break;
 378             }
 379         }
 380         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 381             break;
 382         }
 383     }
 384     env->slb_nr = smmu_info.slb_size;
 385     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 386         env->mmu_model |= POWERPC_MMU_1TSEG;
 387     } else {
 388         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 389     }
 390 }
 391 #else /* defined (TARGET_PPC64) */
 392
 393 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 394 {
 395 }
 396
 397 #endif /* !defined (TARGET_PPC64) */
 398
 399 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 400 {
 401     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 402 }
 403
 404 int kvm_arch_init_vcpu(CPUState *cs)
 405 {
 406     PowerPCCPU *cpu = POWERPC_CPU(cs);
 407     CPUPPCState *cenv = &cpu->env;
 408     int ret;
 409
 410     /* Gather server mmu info from KVM and update the CPU state */
 411     kvm_fixup_page_sizes(cpu);
 412
 413     /* Synchronize sregs with kvm */
 414     ret = kvm_arch_sync_sregs(cpu);
 415     if (ret) {
 416         return ret;
 417     }
 418
 419     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 420
 421     /* Some targets support access to KVM's guest TLB. */
 422     switch (cenv->mmu_model) {
 423     case POWERPC_MMU_BOOKE206:
 424         ret = kvm_booke206_tlb_init(cpu);
 425         break;
 426     default:
 427         break;
 428     }
 429
 430     return ret;
 431 }
 432
 433 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 434 {
 435     CPUPPCState *env = &cpu->env;
 436     CPUState *cs = CPU(cpu);
 437     struct kvm_dirty_tlb dirty_tlb;
 438     unsigned char *bitmap;
 439     int ret;
 440
 441     if (!env->kvm_sw_tlb) {
 442         return;
 443     }
 444
 445     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 446     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 447
 448     dirty_tlb.bitmap = (uintptr_t)bitmap;
 449     dirty_tlb.num_dirty = env->nb_tlb;
 450
 451     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 452     if (ret) {
 453         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 454                 __func__, strerror(-ret));
 455     }
 456
 457     g_free(bitmap);
 458 }
 459
 460 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 461 {
 462     PowerPCCPU *cpu = POWERPC_CPU(cs);
 463     CPUPPCState *env = &cpu->env;
 464     union {
 465         uint32_t u32;
 466         uint64_t u64;
 467     } val;
 468     struct kvm_one_reg reg = {
 469         .id = id,
 470         .addr = (uintptr_t) &val,
 471     };
 472     int ret;
 473
 474     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 475     if (ret != 0) {
 476         trace_kvm_failed_spr_get(spr, strerror(errno));
 477     } else {
 478         switch (id & KVM_REG_SIZE_MASK) {
 479         case KVM_REG_SIZE_U32:
 480             env->spr[spr] = val.u32;
 481             break;
 482
 483         case KVM_REG_SIZE_U64:
 484             env->spr[spr] = val.u64;
 485             break;
 486
 487         default:
 488             /* Don't handle this size yet */
 489             abort();
 490         }
 491     }
 492 }
 493
 494 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 495 {
 496     PowerPCCPU *cpu = POWERPC_CPU(cs);
 497     CPUPPCState *env = &cpu->env;
 498     union {
 499         uint32_t u32;
 500         uint64_t u64;
 501     } val;
 502     struct kvm_one_reg reg = {
 503         .id = id,
 504         .addr = (uintptr_t) &val,
 505     };
 506     int ret;
 507
 508     switch (id & KVM_REG_SIZE_MASK) {
 509     case KVM_REG_SIZE_U32:
 510         val.u32 = env->spr[spr];
 511         break;
 512
 513     case KVM_REG_SIZE_U64:
 514         val.u64 = env->spr[spr];
 515         break;
 516
 517     default:
 518         /* Don't handle this size yet */
 519         abort();
 520     }
 521
 522     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 523     if (ret != 0) {
 524         trace_kvm_failed_spr_set(spr, strerror(errno));
 525     }
 526 }
 527
 528 static int kvm_put_fp(CPUState *cs)
 529 {
 530     PowerPCCPU *cpu = POWERPC_CPU(cs);
 531     CPUPPCState *env = &cpu->env;
 532     struct kvm_one_reg reg;
 533     int i;
 534     int ret;
 535
 536     if (env->insns_flags & PPC_FLOAT) {
 537         uint64_t fpscr = env->fpscr;
 538         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 539
 540         reg.id = KVM_REG_PPC_FPSCR;
 541         reg.addr = (uintptr_t)&fpscr;
 542         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 543         if (ret < 0) {
 544             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 545             return ret;
 546         }
 547
 548         for (i = 0; i < 32; i++) {
 549             uint64_t vsr[2];
 550
 551             vsr[0] = float64_val(env->fpr[i]);
 552             vsr[1] = env->vsr[i];
 553             reg.addr = (uintptr_t) &vsr;
 554             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 555
 556             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 557             if (ret < 0) {
 558                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 559                         i, strerror(errno));
 560                 return ret;
 561             }
 562         }
 563     }
 564
 565     if (env->insns_flags & PPC_ALTIVEC) {
 566         reg.id = KVM_REG_PPC_VSCR;
 567         reg.addr = (uintptr_t)&env->vscr;
 568         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 569         if (ret < 0) {
 570             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 571             return ret;
 572         }
 573
 574         for (i = 0; i < 32; i++) {
 575             reg.id = KVM_REG_PPC_VR(i);
 576             reg.addr = (uintptr_t)&env->avr[i];
 577             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 578             if (ret < 0) {
 579                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 580                 return ret;
 581             }
 582         }
 583     }
 584
 585     return 0;
 586 }
 587
 588 static int kvm_get_fp(CPUState *cs)
 589 {
 590     PowerPCCPU *cpu = POWERPC_CPU(cs);
 591     CPUPPCState *env = &cpu->env;
 592     struct kvm_one_reg reg;
 593     int i;
 594     int ret;
 595
 596     if (env->insns_flags & PPC_FLOAT) {
 597         uint64_t fpscr;
 598         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 599
 600         reg.id = KVM_REG_PPC_FPSCR;
 601         reg.addr = (uintptr_t)&fpscr;
 602         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 603         if (ret < 0) {
 604             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 605             return ret;
 606         } else {
 607             env->fpscr = fpscr;
 608         }
 609
 610         for (i = 0; i < 32; i++) {
 611             uint64_t vsr[2];
 612
 613             reg.addr = (uintptr_t) &vsr;
 614             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 615
 616             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 617             if (ret < 0) {
 618                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 619                         vsx ? "VSR" : "FPR", i, strerror(errno));
 620                 return ret;
 621             } else {
 622                 env->fpr[i] = vsr[0];
 623                 if (vsx) {
 624                     env->vsr[i] = vsr[1];
 625                 }
 626             }
 627         }
 628     }
 629
 630     if (env->insns_flags & PPC_ALTIVEC) {
 631         reg.id = KVM_REG_PPC_VSCR;
 632         reg.addr = (uintptr_t)&env->vscr;
 633         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 634         if (ret < 0) {
 635             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 636             return ret;
 637         }
 638
 639         for (i = 0; i < 32; i++) {
 640             reg.id = KVM_REG_PPC_VR(i);
 641             reg.addr = (uintptr_t)&env->avr[i];
 642             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 643             if (ret < 0) {
 644                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 645                         i, strerror(errno));
 646                 return ret;
 647             }
 648         }
 649     }
 650
 651     return 0;
 652 }
 653
 654 #if defined(TARGET_PPC64)
 655 static int kvm_get_vpa(CPUState *cs)
 656 {
 657     PowerPCCPU *cpu = POWERPC_CPU(cs);
 658     CPUPPCState *env = &cpu->env;
 659     struct kvm_one_reg reg;
 660     int ret;
 661
 662     reg.id = KVM_REG_PPC_VPA_ADDR;
 663     reg.addr = (uintptr_t)&env->vpa_addr;
 664     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 665     if (ret < 0) {
 666         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 667         return ret;
 668     }
 669
 670     assert((uintptr_t)&env->slb_shadow_size
 671            == ((uintptr_t)&env->slb_shadow_addr + 8));
 672     reg.id = KVM_REG_PPC_VPA_SLB;
 673     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 674     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 675     if (ret < 0) {
 676         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 677                 strerror(errno));
 678         return ret;
 679     }
 680
 681     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 682     reg.id = KVM_REG_PPC_VPA_DTL;
 683     reg.addr = (uintptr_t)&env->dtl_addr;
 684     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 685     if (ret < 0) {
 686         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 687                 strerror(errno));
 688         return ret;
 689     }
 690
 691     return 0;
 692 }
 693
 694 static int kvm_put_vpa(CPUState *cs)
 695 {
 696     PowerPCCPU *cpu = POWERPC_CPU(cs);
 697     CPUPPCState *env = &cpu->env;
 698     struct kvm_one_reg reg;
 699     int ret;
 700
 701     /* SLB shadow or DTL can't be registered unless a master VPA is
 702      * registered.  That means when restoring state, if a VPA *is*
 703      * registered, we need to set that up first.  If not, we need to
 704      * deregister the others before deregistering the master VPA */
 705     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 706
 707     if (env->vpa_addr) {
 708         reg.id = KVM_REG_PPC_VPA_ADDR;
 709         reg.addr = (uintptr_t)&env->vpa_addr;
 710         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 711         if (ret < 0) {
 712             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 713             return ret;
 714         }
 715     }
 716
 717     assert((uintptr_t)&env->slb_shadow_size
 718            == ((uintptr_t)&env->slb_shadow_addr + 8));
 719     reg.id = KVM_REG_PPC_VPA_SLB;
 720     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 721     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 722     if (ret < 0) {
 723         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 724         return ret;
 725     }
 726
 727     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 728     reg.id = KVM_REG_PPC_VPA_DTL;
 729     reg.addr = (uintptr_t)&env->dtl_addr;
 730     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 731     if (ret < 0) {
 732         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 733                 strerror(errno));
 734         return ret;
 735     }
 736
 737     if (!env->vpa_addr) {
 738         reg.id = KVM_REG_PPC_VPA_ADDR;
 739         reg.addr = (uintptr_t)&env->vpa_addr;
 740         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 741         if (ret < 0) {
 742             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 743             return ret;
 744         }
 745     }
 746
 747     return 0;
 748 }
 749 #endif /* TARGET_PPC64 */
 750
 751 int kvm_arch_put_registers(CPUState *cs, int level)
 752 {
 753     PowerPCCPU *cpu = POWERPC_CPU(cs);
 754     CPUPPCState *env = &cpu->env;
 755     struct kvm_regs regs;
 756     int ret;
 757     int i;
 758
 759     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 760     if (ret < 0) {
 761         return ret;
 762     }
 763
 764     regs.ctr = env->ctr;
 765     regs.lr  = env->lr;
 766     regs.xer = cpu_read_xer(env);
 767     regs.msr = env->msr;
 768     regs.pc = env->nip;
 769
 770     regs.srr0 = env->spr[SPR_SRR0];
 771     regs.srr1 = env->spr[SPR_SRR1];
 772
 773     regs.sprg0 = env->spr[SPR_SPRG0];
 774     regs.sprg1 = env->spr[SPR_SPRG1];
 775     regs.sprg2 = env->spr[SPR_SPRG2];
 776     regs.sprg3 = env->spr[SPR_SPRG3];
 777     regs.sprg4 = env->spr[SPR_SPRG4];
 778     regs.sprg5 = env->spr[SPR_SPRG5];
 779     regs.sprg6 = env->spr[SPR_SPRG6];
 780     regs.sprg7 = env->spr[SPR_SPRG7];
 781
 782     regs.pid = env->spr[SPR_BOOKE_PID];
 783
 784     for (i = 0;i < 32; i++)
 785         regs.gpr[i] = env->gpr[i];
 786
 787     regs.cr = 0;
 788     for (i = 0; i < 8; i++) {
 789         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 790     }
 791
 792     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 793     if (ret < 0)
 794         return ret;
 795
 796     kvm_put_fp(cs);
 797
 798     if (env->tlb_dirty) {
 799         kvm_sw_tlb_put(cpu);
 800         env->tlb_dirty = false;
 801     }
 802
 803     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 804         struct kvm_sregs sregs;
 805
 806         sregs.pvr = env->spr[SPR_PVR];
 807
 808         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 809
 810         /* Sync SLB */
 811 #ifdef TARGET_PPC64
 812         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 813             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 814             if (env->slb[i].esid & SLB_ESID_V) {
 815                 sregs.u.s.ppc64.slb[i].slbe |= i;
 816             }
 817             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 818         }
 819 #endif
 820
 821         /* Sync SRs */
 822         for (i = 0; i < 16; i++) {
 823             sregs.u.s.ppc32.sr[i] = env->sr[i];
 824         }
 825
 826         /* Sync BATs */
 827         for (i = 0; i < 8; i++) {
 828             /* Beware. We have to swap upper and lower bits here */
 829             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 830                 | env->DBAT[1][i];
 831             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 832                 | env->IBAT[1][i];
 833         }
 834
 835         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 836         if (ret) {
 837             return ret;
 838         }
 839     }
 840
 841     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 842         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 843     }
 844
 845     if (cap_one_reg) {
 846         int i;
 847
 848         /* We deliberately ignore errors here, for kernels which have
 849          * the ONE_REG calls, but don't support the specific
 850          * registers, there's a reasonable chance things will still
 851          * work, at least until we try to migrate. */
 852         for (i = 0; i < 1024; i++) {
 853             uint64_t id = env->spr_cb[i].one_reg_id;
 854
 855             if (id != 0) {
 856                 kvm_put_one_spr(cs, id, i);
 857             }
 858         }
 859
 860 #ifdef TARGET_PPC64
 861         if (cap_papr) {
 862             if (kvm_put_vpa(cs) < 0) {
 863                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 864             }
 865         }
 866 #endif /* TARGET_PPC64 */
 867     }
 868
 869     return ret;
 870 }
 871
 872 int kvm_arch_get_registers(CPUState *cs)
 873 {
 874     PowerPCCPU *cpu = POWERPC_CPU(cs);
 875     CPUPPCState *env = &cpu->env;
 876     struct kvm_regs regs;
 877     struct kvm_sregs sregs;
 878     uint32_t cr;
 879     int i, ret;
 880
 881     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 882     if (ret < 0)
 883         return ret;
 884
 885     cr = regs.cr;
 886     for (i = 7; i >= 0; i--) {
 887         env->crf[i] = cr & 15;
 888         cr >>= 4;
 889     }
 890
 891     env->ctr = regs.ctr;
 892     env->lr = regs.lr;
 893     cpu_write_xer(env, regs.xer);
 894     env->msr = regs.msr;
 895     env->nip = regs.pc;
 896
 897     env->spr[SPR_SRR0] = regs.srr0;
 898     env->spr[SPR_SRR1] = regs.srr1;
 899
 900     env->spr[SPR_SPRG0] = regs.sprg0;
 901     env->spr[SPR_SPRG1] = regs.sprg1;
 902     env->spr[SPR_SPRG2] = regs.sprg2;
 903     env->spr[SPR_SPRG3] = regs.sprg3;
 904     env->spr[SPR_SPRG4] = regs.sprg4;
 905     env->spr[SPR_SPRG5] = regs.sprg5;
 906     env->spr[SPR_SPRG6] = regs.sprg6;
 907     env->spr[SPR_SPRG7] = regs.sprg7;
 908
 909     env->spr[SPR_BOOKE_PID] = regs.pid;
 910
 911     for (i = 0;i < 32; i++)
 912         env->gpr[i] = regs.gpr[i];
 913
 914     kvm_get_fp(cs);
 915
 916     if (cap_booke_sregs) {
 917         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 918         if (ret < 0) {
 919             return ret;
 920         }
 921
 922         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 923             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 924             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 925             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 926             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 927             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 928             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 929             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 930             env->spr[SPR_DECR] = sregs.u.e.dec;
 931             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 932             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 933             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 934         }
 935
 936         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 937             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 938             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 939             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 940             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 941             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 942         }
 943
 944         if (sregs.u.e.features & KVM_SREGS_E_64) {
 945             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 946         }
 947
 948         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 949             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 950         }
 951
 952         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 953             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 954             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 955             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 956             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 957             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 958             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 959             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 960             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 961             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 962             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 963             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 964             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 965             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 966             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 967             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 968             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 969
 970             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 971                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 972                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 973                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 974             }
 975
 976             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 977                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 978             }
 979
 980             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 981                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 982                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 983             }
 984         }
 985
 986         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 987             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 988             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 989             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 990             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 991             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 992             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
 993             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
 994             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
 995             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
 996             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
 997         }
 998
 999         if (sregs.u.e.features & KVM_SREGS_EXP) {
1000             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1001         }
1002
1003         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1004             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1005             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1006         }
1007
1008         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1009             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1010             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1011             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1012
1013             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1014                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1015                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1016             }
1017         }
1018     }
1019
1020     if (cap_segstate) {
1021         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1022         if (ret < 0) {
1023             return ret;
1024         }
1025
1026         if (!env->external_htab) {
1027             ppc_store_sdr1(env, sregs.u.s.sdr1);
1028         }
1029
1030         /* Sync SLB */
1031 #ifdef TARGET_PPC64
1032         /*
1033          * The packed SLB array we get from KVM_GET_SREGS only contains
1034          * information about valid entries. So we flush our internal
1035          * copy to get rid of stale ones, then put all valid SLB entries
1036          * back in.
1037          */
1038         memset(env->slb, 0, sizeof(env->slb));
1039         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1040             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1041             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1042             /*
1043              * Only restore valid entries
1044              */
1045             if (rb & SLB_ESID_V) {
1046                 ppc_store_slb(env, rb, rs);
1047             }
1048         }
1049 #endif
1050
1051         /* Sync SRs */
1052         for (i = 0; i < 16; i++) {
1053             env->sr[i] = sregs.u.s.ppc32.sr[i];
1054         }
1055
1056         /* Sync BATs */
1057         for (i = 0; i < 8; i++) {
1058             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1059             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1060             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1061             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1062         }
1063     }
1064
1065     if (cap_hior) {
1066         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1067     }
1068
1069     if (cap_one_reg) {
1070         int i;
1071
1072         /* We deliberately ignore errors here, for kernels which have
1073          * the ONE_REG calls, but don't support the specific
1074          * registers, there's a reasonable chance things will still
1075          * work, at least until we try to migrate. */
1076         for (i = 0; i < 1024; i++) {
1077             uint64_t id = env->spr_cb[i].one_reg_id;
1078
1079             if (id != 0) {
1080                 kvm_get_one_spr(cs, id, i);
1081             }
1082         }
1083
1084 #ifdef TARGET_PPC64
1085         if (cap_papr) {
1086             if (kvm_get_vpa(cs) < 0) {
1087                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1088             }
1089         }
1090 #endif
1091     }
1092
1093     return 0;
1094 }
1095
1096 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1097 {
1098     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1099
1100     if (irq != PPC_INTERRUPT_EXT) {
1101         return 0;
1102     }
1103
1104     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1105         return 0;
1106     }
1107
1108     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1109
1110     return 0;
1111 }
1112
1113 #if defined(TARGET_PPCEMB)
1114 #define PPC_INPUT_INT PPC40x_INPUT_INT
1115 #elif defined(TARGET_PPC64)
1116 #define PPC_INPUT_INT PPC970_INPUT_INT
1117 #else
1118 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1119 #endif
1120
1121 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1122 {
1123     PowerPCCPU *cpu = POWERPC_CPU(cs);
1124     CPUPPCState *env = &cpu->env;
1125     int r;
1126     unsigned irq;
1127
1128     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1129      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1130     if (!cap_interrupt_level &&
1131         run->ready_for_interrupt_injection &&
1132         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1133         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1134     {
1135         /* For now KVM disregards the 'irq' argument. However, in the
1136          * future KVM could cache it in-kernel to avoid a heavyweight exit
1137          * when reading the UIC.
1138          */
1139         irq = KVM_INTERRUPT_SET;
1140
1141         DPRINTF("injected interrupt %d\n", irq);
1142         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1143         if (r < 0) {
1144             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1145         }
1146
1147         /* Always wake up soon in case the interrupt was level based */
1148         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1149                        (get_ticks_per_sec() / 50));
1150     }
1151
1152     /* We don't know if there are more interrupts pending after this. However,
1153      * the guest will return to userspace in the course of handling this one
1154      * anyways, so we will get a chance to deliver the rest. */
1155 }
1156
1157 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1158 {
1159 }
1160
1161 int kvm_arch_process_async_events(CPUState *cs)
1162 {
1163     return cs->halted;
1164 }
1165
1166 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1167 {
1168     CPUState *cs = CPU(cpu);
1169     CPUPPCState *env = &cpu->env;
1170
1171     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1172         cs->halted = 1;
1173         cs->exception_index = EXCP_HLT;
1174     }
1175
1176     return 0;
1177 }
1178
1179 /* map dcr access to existing qemu dcr emulation */
1180 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1181 {
1182     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1183         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1184
1185     return 0;
1186 }
1187
1188 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1189 {
1190     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1191         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1192
1193     return 0;
1194 }
1195
1196 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1197 {
1198     PowerPCCPU *cpu = POWERPC_CPU(cs);
1199     CPUPPCState *env = &cpu->env;
1200     int ret;
1201
1202     switch (run->exit_reason) {
1203     case KVM_EXIT_DCR:
1204         if (run->dcr.is_write) {
1205             DPRINTF("handle dcr write\n");
1206             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1207         } else {
1208             DPRINTF("handle dcr read\n");
1209             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1210         }
1211         break;
1212     case KVM_EXIT_HLT:
1213         DPRINTF("handle halt\n");
1214         ret = kvmppc_handle_halt(cpu);
1215         break;
1216 #if defined(TARGET_PPC64)
1217     case KVM_EXIT_PAPR_HCALL:
1218         DPRINTF("handle PAPR hypercall\n");
1219         run->papr_hcall.ret = spapr_hypercall(cpu,
1220                                               run->papr_hcall.nr,
1221                                               run->papr_hcall.args);
1222         ret = 0;
1223         break;
1224 #endif
1225     case KVM_EXIT_EPR:
1226         DPRINTF("handle epr\n");
1227         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1228         ret = 0;
1229         break;
1230     case KVM_EXIT_WATCHDOG:
1231         DPRINTF("handle watchdog expiry\n");
1232         watchdog_perform_action();
1233         ret = 0;
1234         break;
1235
1236     default:
1237         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1238         ret = -1;
1239         break;
1240     }
1241
1242     return ret;
1243 }
1244
1245 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1246 {
1247     CPUState *cs = CPU(cpu);
1248     uint32_t bits = tsr_bits;
1249     struct kvm_one_reg reg = {
1250         .id = KVM_REG_PPC_OR_TSR,
1251         .addr = (uintptr_t) &bits,
1252     };
1253
1254     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1255 }
1256
1257 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1258 {
1259
1260     CPUState *cs = CPU(cpu);
1261     uint32_t bits = tsr_bits;
1262     struct kvm_one_reg reg = {
1263         .id = KVM_REG_PPC_CLEAR_TSR,
1264         .addr = (uintptr_t) &bits,
1265     };
1266
1267     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1268 }
1269
1270 int kvmppc_set_tcr(PowerPCCPU *cpu)
1271 {
1272     CPUState *cs = CPU(cpu);
1273     CPUPPCState *env = &cpu->env;
1274     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1275
1276     struct kvm_one_reg reg = {
1277         .id = KVM_REG_PPC_TCR,
1278         .addr = (uintptr_t) &tcr,
1279     };
1280
1281     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1282 }
1283
1284 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1285 {
1286     CPUState *cs = CPU(cpu);
1287     int ret;
1288
1289     if (!kvm_enabled()) {
1290         return -1;
1291     }
1292
1293     if (!cap_ppc_watchdog) {
1294         printf("warning: KVM does not support watchdog");
1295         return -1;
1296     }
1297
1298     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1299     if (ret < 0) {
1300         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1301                 __func__, strerror(-ret));
1302         return ret;
1303     }
1304
1305     return ret;
1306 }
1307
1308 static int read_cpuinfo(const char *field, char *value, int len)
1309 {
1310     FILE *f;
1311     int ret = -1;
1312     int field_len = strlen(field);
1313     char line[512];
1314
1315     f = fopen("/proc/cpuinfo", "r");
1316     if (!f) {
1317         return -1;
1318     }
1319
1320     do {
1321         if(!fgets(line, sizeof(line), f)) {
1322             break;
1323         }
1324         if (!strncmp(line, field, field_len)) {
1325             pstrcpy(value, len, line);
1326             ret = 0;
1327             break;
1328         }
1329     } while(*line);
1330
1331     fclose(f);
1332
1333     return ret;
1334 }
1335
1336 uint32_t kvmppc_get_tbfreq(void)
1337 {
1338     char line[512];
1339     char *ns;
1340     uint32_t retval = get_ticks_per_sec();
1341
1342     if (read_cpuinfo("timebase", line, sizeof(line))) {
1343         return retval;
1344     }
1345
1346     if (!(ns = strchr(line, ':'))) {
1347         return retval;
1348     }
1349
1350     ns++;
1351
1352     retval = atoi(ns);
1353     return retval;
1354 }
1355
1356 /* Try to find a device tree node for a CPU with clock-frequency property */
1357 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1358 {
1359     struct dirent *dirp;
1360     DIR *dp;
1361
1362     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1363         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1364         return -1;
1365     }
1366
1367     buf[0] = '\0';
1368     while ((dirp = readdir(dp)) != NULL) {
1369         FILE *f;
1370         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1371                  dirp->d_name);
1372         f = fopen(buf, "r");
1373         if (f) {
1374             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1375             fclose(f);
1376             break;
1377         }
1378         buf[0] = '\0';
1379     }
1380     closedir(dp);
1381     if (buf[0] == '\0') {
1382         printf("Unknown host!\n");
1383         return -1;
1384     }
1385
1386     return 0;
1387 }
1388
1389 /* Read a CPU node property from the host device tree that's a single
1390  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1391  * (can't find or open the property, or doesn't understand the
1392  * format) */
1393 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1394 {
1395     char buf[PATH_MAX];
1396     union {
1397         uint32_t v32;
1398         uint64_t v64;
1399     } u;
1400     FILE *f;
1401     int len;
1402
1403     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1404         return -1;
1405     }
1406
1407     strncat(buf, "/", sizeof(buf) - strlen(buf));
1408     strncat(buf, propname, sizeof(buf) - strlen(buf));
1409
1410     f = fopen(buf, "rb");
1411     if (!f) {
1412         return -1;
1413     }
1414
1415     len = fread(&u, 1, sizeof(u), f);
1416     fclose(f);
1417     switch (len) {
1418     case 4:
1419         /* property is a 32-bit quantity */
1420         return be32_to_cpu(u.v32);
1421     case 8:
1422         return be64_to_cpu(u.v64);
1423     }
1424
1425     return 0;
1426 }
1427
1428 uint64_t kvmppc_get_clockfreq(void)
1429 {
1430     return kvmppc_read_int_cpu_dt("clock-frequency");
1431 }
1432
1433 uint32_t kvmppc_get_vmx(void)
1434 {
1435     return kvmppc_read_int_cpu_dt("ibm,vmx");
1436 }
1437
1438 uint32_t kvmppc_get_dfp(void)
1439 {
1440     return kvmppc_read_int_cpu_dt("ibm,dfp");
1441 }
1442
1443 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1444  {
1445      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1446      CPUState *cs = CPU(cpu);
1447
1448     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1449         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1450         return 0;
1451     }
1452
1453     return 1;
1454 }
1455
1456 int kvmppc_get_hasidle(CPUPPCState *env)
1457 {
1458     struct kvm_ppc_pvinfo pvinfo;
1459
1460     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1461         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1462         return 1;
1463     }
1464
1465     return 0;
1466 }
1467
1468 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1469 {
1470     uint32_t *hc = (uint32_t*)buf;
1471     struct kvm_ppc_pvinfo pvinfo;
1472
1473     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1474         memcpy(buf, pvinfo.hcall, buf_len);
1475         return 0;
1476     }
1477
1478     /*
1479      * Fallback to always fail hypercalls:
1480      *
1481      *     li r3, -1
1482      *     nop
1483      *     nop
1484      *     nop
1485      */
1486
1487     hc[0] = 0x3860ffff;
1488     hc[1] = 0x60000000;
1489     hc[2] = 0x60000000;
1490     hc[3] = 0x60000000;
1491
1492     return 0;
1493 }
1494
1495 void kvmppc_set_papr(PowerPCCPU *cpu)
1496 {
1497     CPUState *cs = CPU(cpu);
1498     int ret;
1499
1500     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1501     if (ret) {
1502         cpu_abort(cs, "This KVM version does not support PAPR\n");
1503     }
1504
1505     /* Update the capability flag so we sync the right information
1506      * with kvm */
1507     cap_papr = 1;
1508 }
1509
1510 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1511 {
1512     CPUState *cs = CPU(cpu);
1513     int ret;
1514
1515     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1516     if (ret && mpic_proxy) {
1517         cpu_abort(cs, "This KVM version does not support EPR\n");
1518     }
1519 }
1520
1521 int kvmppc_smt_threads(void)
1522 {
1523     return cap_ppc_smt ? cap_ppc_smt : 1;
1524 }
1525
1526 #ifdef TARGET_PPC64
1527 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1528 {
1529     void *rma;
1530     off_t size;
1531     int fd;
1532     struct kvm_allocate_rma ret;
1533     MemoryRegion *rma_region;
1534
1535     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1536      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1537      *                      not necessary on this hardware
1538      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1539      *
1540      * FIXME: We should allow the user to force contiguous RMA
1541      * allocation in the cap_ppc_rma==1 case.
1542      */
1543     if (cap_ppc_rma < 2) {
1544         return 0;
1545     }
1546
1547     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1548     if (fd < 0) {
1549         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1550                 strerror(errno));
1551         return -1;
1552     }
1553
1554     size = MIN(ret.rma_size, 256ul << 20);
1555
1556     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1557     if (rma == MAP_FAILED) {
1558         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1559         return -1;
1560     };
1561
1562     rma_region = g_new(MemoryRegion, 1);
1563     memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1564     vmstate_register_ram_global(rma_region);
1565     memory_region_add_subregion(sysmem, 0, rma_region);
1566
1567     return size;
1568 }
1569
1570 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1571 {
1572     struct kvm_ppc_smmu_info info;
1573     long rampagesize, best_page_shift;
1574     int i;
1575
1576     if (cap_ppc_rma >= 2) {
1577         return current_size;
1578     }
1579
1580     /* Find the largest hardware supported page size that's less than
1581      * or equal to the (logical) backing page size of guest RAM */
1582     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1583     rampagesize = getrampagesize();
1584     best_page_shift = 0;
1585
1586     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1587         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1588
1589         if (!sps->page_shift) {
1590             continue;
1591         }
1592
1593         if ((sps->page_shift > best_page_shift)
1594             && ((1UL << sps->page_shift) <= rampagesize)) {
1595             best_page_shift = sps->page_shift;
1596         }
1597     }
1598
1599     return MIN(current_size,
1600                1ULL << (best_page_shift + hash_shift - 7));
1601 }
1602 #endif
1603
1604 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1605 {
1606     struct kvm_create_spapr_tce args = {
1607         .liobn = liobn,
1608         .window_size = window_size,
1609     };
1610     long len;
1611     int fd;
1612     void *table;
1613
1614     /* Must set fd to -1 so we don't try to munmap when called for
1615      * destroying the table, which the upper layers -will- do
1616      */
1617     *pfd = -1;
1618     if (!cap_spapr_tce) {
1619         return NULL;
1620     }
1621
1622     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1623     if (fd < 0) {
1624         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1625                 liobn);
1626         return NULL;
1627     }
1628
1629     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1630     /* FIXME: round this up to page size */
1631
1632     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1633     if (table == MAP_FAILED) {
1634         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1635                 liobn);
1636         close(fd);
1637         return NULL;
1638     }
1639
1640     *pfd = fd;
1641     return table;
1642 }
1643
1644 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1645 {
1646     long len;
1647
1648     if (fd < 0) {
1649         return -1;
1650     }
1651
1652     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1653     if ((munmap(table, len) < 0) ||
1654         (close(fd) < 0)) {
1655         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1656                 strerror(errno));
1657         /* Leak the table */
1658     }
1659
1660     return 0;
1661 }
1662
1663 int kvmppc_reset_htab(int shift_hint)
1664 {
1665     uint32_t shift = shift_hint;
1666
1667     if (!kvm_enabled()) {
1668         /* Full emulation, tell caller to allocate htab itself */
1669         return 0;
1670     }
1671     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1672         int ret;
1673         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1674         if (ret == -ENOTTY) {
1675             /* At least some versions of PR KVM advertise the
1676              * capability, but don't implement the ioctl().  Oops.
1677              * Return 0 so that we allocate the htab in qemu, as is
1678              * correct for PR. */
1679             return 0;
1680         } else if (ret < 0) {
1681             return ret;
1682         }
1683         return shift;
1684     }
1685
1686     /* We have a kernel that predates the htab reset calls.  For PR
1687      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1688      * this era, it has allocated a 16MB fixed size hash table
1689      * already.  Kernels of this era have the GET_PVINFO capability
1690      * only on PR, so we use this hack to determine the right
1691      * answer */
1692     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1693         /* PR - tell caller to allocate htab */
1694         return 0;
1695     } else {
1696         /* HV - assume 16MB kernel allocated htab */
1697         return 24;
1698     }
1699 }
1700
1701 static inline uint32_t mfpvr(void)
1702 {
1703     uint32_t pvr;
1704
1705     asm ("mfpvr %0"
1706          : "=r"(pvr));
1707     return pvr;
1708 }
1709
1710 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1711 {
1712     if (on) {
1713         *word |= flags;
1714     } else {
1715         *word &= ~flags;
1716     }
1717 }
1718
1719 static void kvmppc_host_cpu_initfn(Object *obj)
1720 {
1721     assert(kvm_enabled());
1722 }
1723
1724 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1725 {
1726     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1727     uint32_t vmx = kvmppc_get_vmx();
1728     uint32_t dfp = kvmppc_get_dfp();
1729     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1730     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1731
1732     /* Now fix up the class with information we can query from the host */
1733     pcc->pvr = mfpvr();
1734
1735     if (vmx != -1) {
1736         /* Only override when we know what the host supports */
1737         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1738         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1739     }
1740     if (dfp != -1) {
1741         /* Only override when we know what the host supports */
1742         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1743     }
1744
1745     if (dcache_size != -1) {
1746         pcc->l1_dcache_size = dcache_size;
1747     }
1748
1749     if (icache_size != -1) {
1750         pcc->l1_icache_size = icache_size;
1751     }
1752 }
1753
1754 bool kvmppc_has_cap_epr(void)
1755 {
1756     return cap_epr;
1757 }
1758
1759 bool kvmppc_has_cap_htab_fd(void)
1760 {
1761     return cap_htab_fd;
1762 }
1763
1764 static int kvm_ppc_register_host_cpu_type(void)
1765 {
1766     TypeInfo type_info = {
1767         .name = TYPE_HOST_POWERPC_CPU,
1768         .instance_init = kvmppc_host_cpu_initfn,
1769         .class_init = kvmppc_host_cpu_class_init,
1770     };
1771     uint32_t host_pvr = mfpvr();
1772     PowerPCCPUClass *pvr_pcc;
1773
1774     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1775     if (pvr_pcc == NULL) {
1776         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1777     }
1778     if (pvr_pcc == NULL) {
1779         return -1;
1780     }
1781     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1782     type_register(&type_info);
1783     return 0;
1784 }
1785
1786 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1787 {
1788     struct kvm_rtas_token_args args = {
1789         .token = token,
1790     };
1791
1792     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1793         return -ENOENT;
1794     }
1795
1796     strncpy(args.name, function, sizeof(args.name));
1797
1798     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1799 }
1800
1801 int kvmppc_get_htab_fd(bool write)
1802 {
1803     struct kvm_get_htab_fd s = {
1804         .flags = write ? KVM_GET_HTAB_WRITE : 0,
1805         .start_index = 0,
1806     };
1807
1808     if (!cap_htab_fd) {
1809         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1810         return -1;
1811     }
1812
1813     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1814 }
1815
1816 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1817 {
1818     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1819     uint8_t buf[bufsize];
1820     ssize_t rc;
1821
1822     do {
1823         rc = read(fd, buf, bufsize);
1824         if (rc < 0) {
1825             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1826                     strerror(errno));
1827             return rc;
1828         } else if (rc) {
1829             /* Kernel already retuns data in BE format for the file */
1830             qemu_put_buffer(f, buf, rc);
1831         }
1832     } while ((rc != 0)
1833              && ((max_ns < 0)
1834                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1835
1836     return (rc == 0) ? 1 : 0;
1837 }
1838
1839 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1840                            uint16_t n_valid, uint16_t n_invalid)
1841 {
1842     struct kvm_get_htab_header *buf;
1843     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1844     ssize_t rc;
1845
1846     buf = alloca(chunksize);
1847     /* This is KVM on ppc, so this is all big-endian */
1848     buf->index = index;
1849     buf->n_valid = n_valid;
1850     buf->n_invalid = n_invalid;
1851
1852     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1853
1854     rc = write(fd, buf, chunksize);
1855     if (rc < 0) {
1856         fprintf(stderr, "Error writing KVM hash table: %s\n",
1857                 strerror(errno));
1858         return rc;
1859     }
1860     if (rc != chunksize) {
1861         /* We should never get a short write on a single chunk */
1862         fprintf(stderr, "Short write, restoring KVM hash table\n");
1863         return -1;
1864     }
1865     return 0;
1866 }
1867
1868 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1869 {
1870     return true;
1871 }
1872
1873 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1874 {
1875     return 1;
1876 }
1877
1878 int kvm_arch_on_sigbus(int code, void *addr)
1879 {
1880     return 1;
1881 }
1882
1883 void kvm_arch_init_irq_routing(KVMState *s)
1884 {
1885 }
1886
1887 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1888 {
1889     return -EINVAL;
1890 }
1891
1892 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1893 {
1894     return -EINVAL;
1895 }
1896
1897 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1898 {
1899     return -EINVAL;
1900 }
1901
1902 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1903 {
1904     return -EINVAL;
1905 }
1906
1907 void kvm_arch_remove_all_hw_breakpoints(void)
1908 {
1909 }
1910
1911 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1912 {
1913 }
1914
1915 struct kvm_get_htab_buf {
1916     struct kvm_get_htab_header header;
1917     /*
1918      * We require one extra byte for read
1919      */
1920     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1921 };
1922
1923 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1924 {
1925     int htab_fd;
1926     struct kvm_get_htab_fd ghf;
1927     struct kvm_get_htab_buf  *hpte_buf;
1928
1929     ghf.flags = 0;
1930     ghf.start_index = pte_index;
1931     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1932     if (htab_fd < 0) {
1933         goto error_out;
1934     }
1935
1936     hpte_buf = g_malloc0(sizeof(*hpte_buf));
1937     /*
1938      * Read the hpte group
1939      */
1940     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1941         goto out_close;
1942     }
1943
1944     close(htab_fd);
1945     return (uint64_t)(uintptr_t) hpte_buf->hpte;
1946
1947 out_close:
1948     g_free(hpte_buf);
1949     close(htab_fd);
1950 error_out:
1951     return 0;
1952 }
1953
1954 void kvmppc_hash64_free_pteg(uint64_t token)
1955 {
1956     struct kvm_get_htab_buf *htab_buf;
1957
1958     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1959                             hpte);
1960     g_free(htab_buf);
1961     return;
1962 }
1963
1964 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1965                              target_ulong pte0, target_ulong pte1)
1966 {
1967     int htab_fd;
1968     struct kvm_get_htab_fd ghf;
1969     struct kvm_get_htab_buf hpte_buf;
1970
1971     ghf.flags = 0;
1972     ghf.start_index = 0;     /* Ignored */
1973     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1974     if (htab_fd < 0) {
1975         goto error_out;
1976     }
1977
1978     hpte_buf.header.n_valid = 1;
1979     hpte_buf.header.n_invalid = 0;
1980     hpte_buf.header.index = pte_index;
1981     hpte_buf.hpte[0] = pte0;
1982     hpte_buf.hpte[1] = pte1;
1983     /*
1984      * Write the hpte entry.
1985      * CAUTION: write() has the warn_unused_result attribute. Hence we
1986      * need to check the return value, even though we do nothing.
1987      */
1988     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
1989         goto out_close;
1990     }
1991
1992 out_close:
1993     close(htab_fd);
1994     return;
1995
1996 error_out:
1997     return;
1998 }