target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "sysemu/watchdog.h"
  39 #include "trace.h"
  40
  41 //#define DEBUG_KVM
  42
  43 #ifdef DEBUG_KVM
  44 #define DPRINTF(fmt, ...) \
  45     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  46 #else
  47 #define DPRINTF(fmt, ...) \
  48     do { } while (0)
  49 #endif
  50
  51 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  52
  53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  54     KVM_CAP_LAST_INFO
  55 };
  56
  57 static int cap_interrupt_unset = false;
  58 static int cap_interrupt_level = false;
  59 static int cap_segstate;
  60 static int cap_booke_sregs;
  61 static int cap_ppc_smt;
  62 static int cap_ppc_rma;
  63 static int cap_spapr_tce;
  64 static int cap_hior;
  65 static int cap_one_reg;
  66 static int cap_epr;
  67 static int cap_ppc_watchdog;
  68 static int cap_papr;
  69 static int cap_htab_fd;
  70
  71 /* XXX We have a race condition where we actually have a level triggered
  72  *     interrupt, but the infrastructure can't expose that yet, so the guest
  73  *     takes but ignores it, goes to sleep and never gets notified that there's
  74  *     still an interrupt pending.
  75  *
  76  *     As a quick workaround, let's just wake up again 20 ms after we injected
  77  *     an interrupt. That way we can assure that we're always reinjecting
  78  *     interrupts in case the guest swallowed them.
  79  */
  80 static QEMUTimer *idle_timer;
  81
  82 static void kvm_kick_cpu(void *opaque)
  83 {
  84     PowerPCCPU *cpu = opaque;
  85
  86     qemu_cpu_kick(CPU(cpu));
  87 }
  88
  89 static int kvm_ppc_register_host_cpu_type(void);
  90
  91 int kvm_arch_init(KVMState *s)
  92 {
  93     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  94     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  95     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  96     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  97     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  98     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  99     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 100     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 101     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 102     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 103     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 104     /* Note: we don't set cap_papr here, because this capability is
 105      * only activated after this by kvmppc_set_papr() */
 106     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 107
 108     if (!cap_interrupt_level) {
 109         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 110                         "VM to stall at times!\n");
 111     }
 112
 113     kvm_ppc_register_host_cpu_type();
 114
 115     return 0;
 116 }
 117
 118 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 119 {
 120     CPUPPCState *cenv = &cpu->env;
 121     CPUState *cs = CPU(cpu);
 122     struct kvm_sregs sregs;
 123     int ret;
 124
 125     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 126         /* What we're really trying to say is "if we're on BookE, we use
 127            the native PVR for now". This is the only sane way to check
 128            it though, so we potentially confuse users that they can run
 129            BookE guests on BookS. Let's hope nobody dares enough :) */
 130         return 0;
 131     } else {
 132         if (!cap_segstate) {
 133             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 134             return -ENOSYS;
 135         }
 136     }
 137
 138     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 139     if (ret) {
 140         return ret;
 141     }
 142
 143     sregs.pvr = cenv->spr[SPR_PVR];
 144     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 145 }
 146
 147 /* Set up a shared TLB array with KVM */
 148 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 149 {
 150     CPUPPCState *env = &cpu->env;
 151     CPUState *cs = CPU(cpu);
 152     struct kvm_book3e_206_tlb_params params = {};
 153     struct kvm_config_tlb cfg = {};
 154     unsigned int entries = 0;
 155     int ret, i;
 156
 157     if (!kvm_enabled() ||
 158         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 159         return 0;
 160     }
 161
 162     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 163
 164     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 165         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 166         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 167         entries += params.tlb_sizes[i];
 168     }
 169
 170     assert(entries == env->nb_tlb);
 171     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 172
 173     env->tlb_dirty = true;
 174
 175     cfg.array = (uintptr_t)env->tlb.tlbm;
 176     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 177     cfg.params = (uintptr_t)&params;
 178     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 179
 180     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 181     if (ret < 0) {
 182         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 183                 __func__, strerror(-ret));
 184         return ret;
 185     }
 186
 187     env->kvm_sw_tlb = true;
 188     return 0;
 189 }
 190
 191
 192 #if defined(TARGET_PPC64)
 193 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 194                                        struct kvm_ppc_smmu_info *info)
 195 {
 196     CPUPPCState *env = &cpu->env;
 197     CPUState *cs = CPU(cpu);
 198
 199     memset(info, 0, sizeof(*info));
 200
 201     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 202      * need to "guess" what the supported page sizes are.
 203      *
 204      * For that to work we make a few assumptions:
 205      *
 206      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 207      *   KVM which only supports 4K and 16M pages, but supports them
 208      *   regardless of the backing store characteritics. We also don't
 209      *   support 1T segments.
 210      *
 211      *   This is safe as if HV KVM ever supports that capability or PR
 212      *   KVM grows supports for more page/segment sizes, those versions
 213      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 214      *   will not hit this fallback
 215      *
 216      * - Else we are running HV KVM. This means we only support page
 217      *   sizes that fit in the backing store. Additionally we only
 218      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 219      *   P7 encodings for the SLB and hash table. Here too, we assume
 220      *   support for any newer processor will mean a kernel that
 221      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 222      *   this fallback.
 223      */
 224     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 225         /* No flags */
 226         info->flags = 0;
 227         info->slb_size = 64;
 228
 229         /* Standard 4k base page size segment */
 230         info->sps[0].page_shift = 12;
 231         info->sps[0].slb_enc = 0;
 232         info->sps[0].enc[0].page_shift = 12;
 233         info->sps[0].enc[0].pte_enc = 0;
 234
 235         /* Standard 16M large page size segment */
 236         info->sps[1].page_shift = 24;
 237         info->sps[1].slb_enc = SLB_VSID_L;
 238         info->sps[1].enc[0].page_shift = 24;
 239         info->sps[1].enc[0].pte_enc = 0;
 240     } else {
 241         int i = 0;
 242
 243         /* HV KVM has backing store size restrictions */
 244         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 245
 246         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 247             info->flags |= KVM_PPC_1T_SEGMENTS;
 248         }
 249
 250         if (env->mmu_model == POWERPC_MMU_2_06) {
 251             info->slb_size = 32;
 252         } else {
 253             info->slb_size = 64;
 254         }
 255
 256         /* Standard 4k base page size segment */
 257         info->sps[i].page_shift = 12;
 258         info->sps[i].slb_enc = 0;
 259         info->sps[i].enc[0].page_shift = 12;
 260         info->sps[i].enc[0].pte_enc = 0;
 261         i++;
 262
 263         /* 64K on MMU 2.06 */
 264         if (env->mmu_model == POWERPC_MMU_2_06) {
 265             info->sps[i].page_shift = 16;
 266             info->sps[i].slb_enc = 0x110;
 267             info->sps[i].enc[0].page_shift = 16;
 268             info->sps[i].enc[0].pte_enc = 1;
 269             i++;
 270         }
 271
 272         /* Standard 16M large page size segment */
 273         info->sps[i].page_shift = 24;
 274         info->sps[i].slb_enc = SLB_VSID_L;
 275         info->sps[i].enc[0].page_shift = 24;
 276         info->sps[i].enc[0].pte_enc = 0;
 277     }
 278 }
 279
 280 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 281 {
 282     CPUState *cs = CPU(cpu);
 283     int ret;
 284
 285     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 286         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 287         if (ret == 0) {
 288             return;
 289         }
 290     }
 291
 292     kvm_get_fallback_smmu_info(cpu, info);
 293 }
 294
 295 static long getrampagesize(void)
 296 {
 297     struct statfs fs;
 298     int ret;
 299
 300     if (!mem_path) {
 301         /* guest RAM is backed by normal anonymous pages */
 302         return getpagesize();
 303     }
 304
 305     do {
 306         ret = statfs(mem_path, &fs);
 307     } while (ret != 0 && errno == EINTR);
 308
 309     if (ret != 0) {
 310         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 311                 strerror(errno));
 312         exit(1);
 313     }
 314
 315 #define HUGETLBFS_MAGIC       0x958458f6
 316
 317     if (fs.f_type != HUGETLBFS_MAGIC) {
 318         /* Explicit mempath, but it's ordinary pages */
 319         return getpagesize();
 320     }
 321
 322     /* It's hugepage, return the huge page size */
 323     return fs.f_bsize;
 324 }
 325
 326 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 327 {
 328     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 329         return true;
 330     }
 331
 332     return (1ul << shift) <= rampgsize;
 333 }
 334
 335 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 336 {
 337     static struct kvm_ppc_smmu_info smmu_info;
 338     static bool has_smmu_info;
 339     CPUPPCState *env = &cpu->env;
 340     long rampagesize;
 341     int iq, ik, jq, jk;
 342
 343     /* We only handle page sizes for 64-bit server guests for now */
 344     if (!(env->mmu_model & POWERPC_MMU_64)) {
 345         return;
 346     }
 347
 348     /* Collect MMU info from kernel if not already */
 349     if (!has_smmu_info) {
 350         kvm_get_smmu_info(cpu, &smmu_info);
 351         has_smmu_info = true;
 352     }
 353
 354     rampagesize = getrampagesize();
 355
 356     /* Convert to QEMU form */
 357     memset(&env->sps, 0, sizeof(env->sps));
 358
 359     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 360         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 361         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 362
 363         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 364                                  ksps->page_shift)) {
 365             continue;
 366         }
 367         qsps->page_shift = ksps->page_shift;
 368         qsps->slb_enc = ksps->slb_enc;
 369         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 370             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 371                                      ksps->enc[jk].page_shift)) {
 372                 continue;
 373             }
 374             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 375             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 376             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 377                 break;
 378             }
 379         }
 380         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 381             break;
 382         }
 383     }
 384     env->slb_nr = smmu_info.slb_size;
 385     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 386         env->mmu_model |= POWERPC_MMU_1TSEG;
 387     } else {
 388         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 389     }
 390 }
 391 #else /* defined (TARGET_PPC64) */
 392
 393 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 394 {
 395 }
 396
 397 #endif /* !defined (TARGET_PPC64) */
 398
 399 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 400 {
 401     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 402 }
 403
 404 int kvm_arch_init_vcpu(CPUState *cs)
 405 {
 406     PowerPCCPU *cpu = POWERPC_CPU(cs);
 407     CPUPPCState *cenv = &cpu->env;
 408     int ret;
 409
 410     /* Gather server mmu info from KVM and update the CPU state */
 411     kvm_fixup_page_sizes(cpu);
 412
 413     /* Synchronize sregs with kvm */
 414     ret = kvm_arch_sync_sregs(cpu);
 415     if (ret) {
 416         return ret;
 417     }
 418
 419     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 420
 421     /* Some targets support access to KVM's guest TLB. */
 422     switch (cenv->mmu_model) {
 423     case POWERPC_MMU_BOOKE206:
 424         ret = kvm_booke206_tlb_init(cpu);
 425         break;
 426     default:
 427         break;
 428     }
 429
 430     return ret;
 431 }
 432
 433 void kvm_arch_reset_vcpu(CPUState *cpu)
 434 {
 435 }
 436
 437 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 438 {
 439     CPUPPCState *env = &cpu->env;
 440     CPUState *cs = CPU(cpu);
 441     struct kvm_dirty_tlb dirty_tlb;
 442     unsigned char *bitmap;
 443     int ret;
 444
 445     if (!env->kvm_sw_tlb) {
 446         return;
 447     }
 448
 449     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 450     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 451
 452     dirty_tlb.bitmap = (uintptr_t)bitmap;
 453     dirty_tlb.num_dirty = env->nb_tlb;
 454
 455     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 456     if (ret) {
 457         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 458                 __func__, strerror(-ret));
 459     }
 460
 461     g_free(bitmap);
 462 }
 463
 464 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 465 {
 466     PowerPCCPU *cpu = POWERPC_CPU(cs);
 467     CPUPPCState *env = &cpu->env;
 468     union {
 469         uint32_t u32;
 470         uint64_t u64;
 471     } val;
 472     struct kvm_one_reg reg = {
 473         .id = id,
 474         .addr = (uintptr_t) &val,
 475     };
 476     int ret;
 477
 478     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 479     if (ret != 0) {
 480         trace_kvm_failed_spr_get(spr, strerror(errno));
 481     } else {
 482         switch (id & KVM_REG_SIZE_MASK) {
 483         case KVM_REG_SIZE_U32:
 484             env->spr[spr] = val.u32;
 485             break;
 486
 487         case KVM_REG_SIZE_U64:
 488             env->spr[spr] = val.u64;
 489             break;
 490
 491         default:
 492             /* Don't handle this size yet */
 493             abort();
 494         }
 495     }
 496 }
 497
 498 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 499 {
 500     PowerPCCPU *cpu = POWERPC_CPU(cs);
 501     CPUPPCState *env = &cpu->env;
 502     union {
 503         uint32_t u32;
 504         uint64_t u64;
 505     } val;
 506     struct kvm_one_reg reg = {
 507         .id = id,
 508         .addr = (uintptr_t) &val,
 509     };
 510     int ret;
 511
 512     switch (id & KVM_REG_SIZE_MASK) {
 513     case KVM_REG_SIZE_U32:
 514         val.u32 = env->spr[spr];
 515         break;
 516
 517     case KVM_REG_SIZE_U64:
 518         val.u64 = env->spr[spr];
 519         break;
 520
 521     default:
 522         /* Don't handle this size yet */
 523         abort();
 524     }
 525
 526     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 527     if (ret != 0) {
 528         trace_kvm_failed_spr_set(spr, strerror(errno));
 529     }
 530 }
 531
 532 static int kvm_put_fp(CPUState *cs)
 533 {
 534     PowerPCCPU *cpu = POWERPC_CPU(cs);
 535     CPUPPCState *env = &cpu->env;
 536     struct kvm_one_reg reg;
 537     int i;
 538     int ret;
 539
 540     if (env->insns_flags & PPC_FLOAT) {
 541         uint64_t fpscr = env->fpscr;
 542         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 543
 544         reg.id = KVM_REG_PPC_FPSCR;
 545         reg.addr = (uintptr_t)&fpscr;
 546         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 547         if (ret < 0) {
 548             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 549             return ret;
 550         }
 551
 552         for (i = 0; i < 32; i++) {
 553             uint64_t vsr[2];
 554
 555             vsr[0] = float64_val(env->fpr[i]);
 556             vsr[1] = env->vsr[i];
 557             reg.addr = (uintptr_t) &vsr;
 558             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 559
 560             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 561             if (ret < 0) {
 562                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 563                         i, strerror(errno));
 564                 return ret;
 565             }
 566         }
 567     }
 568
 569     if (env->insns_flags & PPC_ALTIVEC) {
 570         reg.id = KVM_REG_PPC_VSCR;
 571         reg.addr = (uintptr_t)&env->vscr;
 572         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 573         if (ret < 0) {
 574             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 575             return ret;
 576         }
 577
 578         for (i = 0; i < 32; i++) {
 579             reg.id = KVM_REG_PPC_VR(i);
 580             reg.addr = (uintptr_t)&env->avr[i];
 581             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 582             if (ret < 0) {
 583                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 584                 return ret;
 585             }
 586         }
 587     }
 588
 589     return 0;
 590 }
 591
 592 static int kvm_get_fp(CPUState *cs)
 593 {
 594     PowerPCCPU *cpu = POWERPC_CPU(cs);
 595     CPUPPCState *env = &cpu->env;
 596     struct kvm_one_reg reg;
 597     int i;
 598     int ret;
 599
 600     if (env->insns_flags & PPC_FLOAT) {
 601         uint64_t fpscr;
 602         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 603
 604         reg.id = KVM_REG_PPC_FPSCR;
 605         reg.addr = (uintptr_t)&fpscr;
 606         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 607         if (ret < 0) {
 608             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 609             return ret;
 610         } else {
 611             env->fpscr = fpscr;
 612         }
 613
 614         for (i = 0; i < 32; i++) {
 615             uint64_t vsr[2];
 616
 617             reg.addr = (uintptr_t) &vsr;
 618             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 619
 620             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 621             if (ret < 0) {
 622                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 623                         vsx ? "VSR" : "FPR", i, strerror(errno));
 624                 return ret;
 625             } else {
 626                 env->fpr[i] = vsr[0];
 627                 if (vsx) {
 628                     env->vsr[i] = vsr[1];
 629                 }
 630             }
 631         }
 632     }
 633
 634     if (env->insns_flags & PPC_ALTIVEC) {
 635         reg.id = KVM_REG_PPC_VSCR;
 636         reg.addr = (uintptr_t)&env->vscr;
 637         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 638         if (ret < 0) {
 639             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 640             return ret;
 641         }
 642
 643         for (i = 0; i < 32; i++) {
 644             reg.id = KVM_REG_PPC_VR(i);
 645             reg.addr = (uintptr_t)&env->avr[i];
 646             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 647             if (ret < 0) {
 648                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 649                         i, strerror(errno));
 650                 return ret;
 651             }
 652         }
 653     }
 654
 655     return 0;
 656 }
 657
 658 #if defined(TARGET_PPC64)
 659 static int kvm_get_vpa(CPUState *cs)
 660 {
 661     PowerPCCPU *cpu = POWERPC_CPU(cs);
 662     CPUPPCState *env = &cpu->env;
 663     struct kvm_one_reg reg;
 664     int ret;
 665
 666     reg.id = KVM_REG_PPC_VPA_ADDR;
 667     reg.addr = (uintptr_t)&env->vpa_addr;
 668     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 669     if (ret < 0) {
 670         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 671         return ret;
 672     }
 673
 674     assert((uintptr_t)&env->slb_shadow_size
 675            == ((uintptr_t)&env->slb_shadow_addr + 8));
 676     reg.id = KVM_REG_PPC_VPA_SLB;
 677     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 678     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 679     if (ret < 0) {
 680         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 681                 strerror(errno));
 682         return ret;
 683     }
 684
 685     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 686     reg.id = KVM_REG_PPC_VPA_DTL;
 687     reg.addr = (uintptr_t)&env->dtl_addr;
 688     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 689     if (ret < 0) {
 690         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 691                 strerror(errno));
 692         return ret;
 693     }
 694
 695     return 0;
 696 }
 697
 698 static int kvm_put_vpa(CPUState *cs)
 699 {
 700     PowerPCCPU *cpu = POWERPC_CPU(cs);
 701     CPUPPCState *env = &cpu->env;
 702     struct kvm_one_reg reg;
 703     int ret;
 704
 705     /* SLB shadow or DTL can't be registered unless a master VPA is
 706      * registered.  That means when restoring state, if a VPA *is*
 707      * registered, we need to set that up first.  If not, we need to
 708      * deregister the others before deregistering the master VPA */
 709     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 710
 711     if (env->vpa_addr) {
 712         reg.id = KVM_REG_PPC_VPA_ADDR;
 713         reg.addr = (uintptr_t)&env->vpa_addr;
 714         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 715         if (ret < 0) {
 716             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 717             return ret;
 718         }
 719     }
 720
 721     assert((uintptr_t)&env->slb_shadow_size
 722            == ((uintptr_t)&env->slb_shadow_addr + 8));
 723     reg.id = KVM_REG_PPC_VPA_SLB;
 724     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 725     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 726     if (ret < 0) {
 727         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 728         return ret;
 729     }
 730
 731     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 732     reg.id = KVM_REG_PPC_VPA_DTL;
 733     reg.addr = (uintptr_t)&env->dtl_addr;
 734     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 735     if (ret < 0) {
 736         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 737                 strerror(errno));
 738         return ret;
 739     }
 740
 741     if (!env->vpa_addr) {
 742         reg.id = KVM_REG_PPC_VPA_ADDR;
 743         reg.addr = (uintptr_t)&env->vpa_addr;
 744         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 745         if (ret < 0) {
 746             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 747             return ret;
 748         }
 749     }
 750
 751     return 0;
 752 }
 753 #endif /* TARGET_PPC64 */
 754
 755 int kvm_arch_put_registers(CPUState *cs, int level)
 756 {
 757     PowerPCCPU *cpu = POWERPC_CPU(cs);
 758     CPUPPCState *env = &cpu->env;
 759     struct kvm_regs regs;
 760     int ret;
 761     int i;
 762
 763     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 764     if (ret < 0) {
 765         return ret;
 766     }
 767
 768     regs.ctr = env->ctr;
 769     regs.lr  = env->lr;
 770     regs.xer = cpu_read_xer(env);
 771     regs.msr = env->msr;
 772     regs.pc = env->nip;
 773
 774     regs.srr0 = env->spr[SPR_SRR0];
 775     regs.srr1 = env->spr[SPR_SRR1];
 776
 777     regs.sprg0 = env->spr[SPR_SPRG0];
 778     regs.sprg1 = env->spr[SPR_SPRG1];
 779     regs.sprg2 = env->spr[SPR_SPRG2];
 780     regs.sprg3 = env->spr[SPR_SPRG3];
 781     regs.sprg4 = env->spr[SPR_SPRG4];
 782     regs.sprg5 = env->spr[SPR_SPRG5];
 783     regs.sprg6 = env->spr[SPR_SPRG6];
 784     regs.sprg7 = env->spr[SPR_SPRG7];
 785
 786     regs.pid = env->spr[SPR_BOOKE_PID];
 787
 788     for (i = 0;i < 32; i++)
 789         regs.gpr[i] = env->gpr[i];
 790
 791     regs.cr = 0;
 792     for (i = 0; i < 8; i++) {
 793         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 794     }
 795
 796     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 797     if (ret < 0)
 798         return ret;
 799
 800     kvm_put_fp(cs);
 801
 802     if (env->tlb_dirty) {
 803         kvm_sw_tlb_put(cpu);
 804         env->tlb_dirty = false;
 805     }
 806
 807     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 808         struct kvm_sregs sregs;
 809
 810         sregs.pvr = env->spr[SPR_PVR];
 811
 812         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 813
 814         /* Sync SLB */
 815 #ifdef TARGET_PPC64
 816         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 817             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 818             if (env->slb[i].esid & SLB_ESID_V) {
 819                 sregs.u.s.ppc64.slb[i].slbe |= i;
 820             }
 821             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 822         }
 823 #endif
 824
 825         /* Sync SRs */
 826         for (i = 0; i < 16; i++) {
 827             sregs.u.s.ppc32.sr[i] = env->sr[i];
 828         }
 829
 830         /* Sync BATs */
 831         for (i = 0; i < 8; i++) {
 832             /* Beware. We have to swap upper and lower bits here */
 833             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 834                 | env->DBAT[1][i];
 835             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 836                 | env->IBAT[1][i];
 837         }
 838
 839         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 840         if (ret) {
 841             return ret;
 842         }
 843     }
 844
 845     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 846         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 847     }
 848
 849     if (cap_one_reg) {
 850         int i;
 851
 852         /* We deliberately ignore errors here, for kernels which have
 853          * the ONE_REG calls, but don't support the specific
 854          * registers, there's a reasonable chance things will still
 855          * work, at least until we try to migrate. */
 856         for (i = 0; i < 1024; i++) {
 857             uint64_t id = env->spr_cb[i].one_reg_id;
 858
 859             if (id != 0) {
 860                 kvm_put_one_spr(cs, id, i);
 861             }
 862         }
 863
 864 #ifdef TARGET_PPC64
 865         if (cap_papr) {
 866             if (kvm_put_vpa(cs) < 0) {
 867                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 868             }
 869         }
 870 #endif /* TARGET_PPC64 */
 871     }
 872
 873     return ret;
 874 }
 875
 876 int kvm_arch_get_registers(CPUState *cs)
 877 {
 878     PowerPCCPU *cpu = POWERPC_CPU(cs);
 879     CPUPPCState *env = &cpu->env;
 880     struct kvm_regs regs;
 881     struct kvm_sregs sregs;
 882     uint32_t cr;
 883     int i, ret;
 884
 885     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 886     if (ret < 0)
 887         return ret;
 888
 889     cr = regs.cr;
 890     for (i = 7; i >= 0; i--) {
 891         env->crf[i] = cr & 15;
 892         cr >>= 4;
 893     }
 894
 895     env->ctr = regs.ctr;
 896     env->lr = regs.lr;
 897     cpu_write_xer(env, regs.xer);
 898     env->msr = regs.msr;
 899     env->nip = regs.pc;
 900
 901     env->spr[SPR_SRR0] = regs.srr0;
 902     env->spr[SPR_SRR1] = regs.srr1;
 903
 904     env->spr[SPR_SPRG0] = regs.sprg0;
 905     env->spr[SPR_SPRG1] = regs.sprg1;
 906     env->spr[SPR_SPRG2] = regs.sprg2;
 907     env->spr[SPR_SPRG3] = regs.sprg3;
 908     env->spr[SPR_SPRG4] = regs.sprg4;
 909     env->spr[SPR_SPRG5] = regs.sprg5;
 910     env->spr[SPR_SPRG6] = regs.sprg6;
 911     env->spr[SPR_SPRG7] = regs.sprg7;
 912
 913     env->spr[SPR_BOOKE_PID] = regs.pid;
 914
 915     for (i = 0;i < 32; i++)
 916         env->gpr[i] = regs.gpr[i];
 917
 918     kvm_get_fp(cs);
 919
 920     if (cap_booke_sregs) {
 921         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 922         if (ret < 0) {
 923             return ret;
 924         }
 925
 926         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 927             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 928             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 929             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 930             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 931             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 932             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 933             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 934             env->spr[SPR_DECR] = sregs.u.e.dec;
 935             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 936             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 937             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 938         }
 939
 940         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 941             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 942             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 943             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 944             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 945             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 946         }
 947
 948         if (sregs.u.e.features & KVM_SREGS_E_64) {
 949             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 950         }
 951
 952         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 953             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 954         }
 955
 956         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 957             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 958             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 959             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 960             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 961             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 962             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 963             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 964             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 965             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 966             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 967             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 968             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 969             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 970             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 971             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 972             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 973
 974             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 975                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 976                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 977                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 978             }
 979
 980             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 981                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 982             }
 983
 984             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 985                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 986                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 987             }
 988         }
 989
 990         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 991             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 992             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 993             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 994             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 995             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 996             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
 997             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
 998             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
 999             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1000             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1001         }
1002
1003         if (sregs.u.e.features & KVM_SREGS_EXP) {
1004             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1005         }
1006
1007         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1008             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1009             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1010         }
1011
1012         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1013             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1014             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1015             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1016
1017             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1018                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1019                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1020             }
1021         }
1022     }
1023
1024     if (cap_segstate) {
1025         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1026         if (ret < 0) {
1027             return ret;
1028         }
1029
1030         if (!env->external_htab) {
1031             ppc_store_sdr1(env, sregs.u.s.sdr1);
1032         }
1033
1034         /* Sync SLB */
1035 #ifdef TARGET_PPC64
1036         /*
1037          * The packed SLB array we get from KVM_GET_SREGS only contains
1038          * information about valid entries. So we flush our internal
1039          * copy to get rid of stale ones, then put all valid SLB entries
1040          * back in.
1041          */
1042         memset(env->slb, 0, sizeof(env->slb));
1043         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1044             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1045             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1046             /*
1047              * Only restore valid entries
1048              */
1049             if (rb & SLB_ESID_V) {
1050                 ppc_store_slb(env, rb, rs);
1051             }
1052         }
1053 #endif
1054
1055         /* Sync SRs */
1056         for (i = 0; i < 16; i++) {
1057             env->sr[i] = sregs.u.s.ppc32.sr[i];
1058         }
1059
1060         /* Sync BATs */
1061         for (i = 0; i < 8; i++) {
1062             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1063             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1064             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1065             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1066         }
1067     }
1068
1069     if (cap_hior) {
1070         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1071     }
1072
1073     if (cap_one_reg) {
1074         int i;
1075
1076         /* We deliberately ignore errors here, for kernels which have
1077          * the ONE_REG calls, but don't support the specific
1078          * registers, there's a reasonable chance things will still
1079          * work, at least until we try to migrate. */
1080         for (i = 0; i < 1024; i++) {
1081             uint64_t id = env->spr_cb[i].one_reg_id;
1082
1083             if (id != 0) {
1084                 kvm_get_one_spr(cs, id, i);
1085             }
1086         }
1087
1088 #ifdef TARGET_PPC64
1089         if (cap_papr) {
1090             if (kvm_get_vpa(cs) < 0) {
1091                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1092             }
1093         }
1094 #endif
1095     }
1096
1097     return 0;
1098 }
1099
1100 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1101 {
1102     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1103
1104     if (irq != PPC_INTERRUPT_EXT) {
1105         return 0;
1106     }
1107
1108     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1109         return 0;
1110     }
1111
1112     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1113
1114     return 0;
1115 }
1116
1117 #if defined(TARGET_PPCEMB)
1118 #define PPC_INPUT_INT PPC40x_INPUT_INT
1119 #elif defined(TARGET_PPC64)
1120 #define PPC_INPUT_INT PPC970_INPUT_INT
1121 #else
1122 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1123 #endif
1124
1125 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1126 {
1127     PowerPCCPU *cpu = POWERPC_CPU(cs);
1128     CPUPPCState *env = &cpu->env;
1129     int r;
1130     unsigned irq;
1131
1132     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1133      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1134     if (!cap_interrupt_level &&
1135         run->ready_for_interrupt_injection &&
1136         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1137         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1138     {
1139         /* For now KVM disregards the 'irq' argument. However, in the
1140          * future KVM could cache it in-kernel to avoid a heavyweight exit
1141          * when reading the UIC.
1142          */
1143         irq = KVM_INTERRUPT_SET;
1144
1145         DPRINTF("injected interrupt %d\n", irq);
1146         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1147         if (r < 0) {
1148             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1149         }
1150
1151         /* Always wake up soon in case the interrupt was level based */
1152         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1153                        (get_ticks_per_sec() / 50));
1154     }
1155
1156     /* We don't know if there are more interrupts pending after this. However,
1157      * the guest will return to userspace in the course of handling this one
1158      * anyways, so we will get a chance to deliver the rest. */
1159 }
1160
1161 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1162 {
1163 }
1164
1165 int kvm_arch_process_async_events(CPUState *cs)
1166 {
1167     return cs->halted;
1168 }
1169
1170 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1171 {
1172     CPUState *cs = CPU(cpu);
1173     CPUPPCState *env = &cpu->env;
1174
1175     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1176         cs->halted = 1;
1177         cs->exception_index = EXCP_HLT;
1178     }
1179
1180     return 0;
1181 }
1182
1183 /* map dcr access to existing qemu dcr emulation */
1184 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1185 {
1186     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1187         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1188
1189     return 0;
1190 }
1191
1192 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1193 {
1194     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1195         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1196
1197     return 0;
1198 }
1199
1200 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1201 {
1202     PowerPCCPU *cpu = POWERPC_CPU(cs);
1203     CPUPPCState *env = &cpu->env;
1204     int ret;
1205
1206     switch (run->exit_reason) {
1207     case KVM_EXIT_DCR:
1208         if (run->dcr.is_write) {
1209             DPRINTF("handle dcr write\n");
1210             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1211         } else {
1212             DPRINTF("handle dcr read\n");
1213             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1214         }
1215         break;
1216     case KVM_EXIT_HLT:
1217         DPRINTF("handle halt\n");
1218         ret = kvmppc_handle_halt(cpu);
1219         break;
1220 #if defined(TARGET_PPC64)
1221     case KVM_EXIT_PAPR_HCALL:
1222         DPRINTF("handle PAPR hypercall\n");
1223         run->papr_hcall.ret = spapr_hypercall(cpu,
1224                                               run->papr_hcall.nr,
1225                                               run->papr_hcall.args);
1226         ret = 0;
1227         break;
1228 #endif
1229     case KVM_EXIT_EPR:
1230         DPRINTF("handle epr\n");
1231         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1232         ret = 0;
1233         break;
1234     case KVM_EXIT_WATCHDOG:
1235         DPRINTF("handle watchdog expiry\n");
1236         watchdog_perform_action();
1237         ret = 0;
1238         break;
1239
1240     default:
1241         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1242         ret = -1;
1243         break;
1244     }
1245
1246     return ret;
1247 }
1248
1249 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1250 {
1251     CPUState *cs = CPU(cpu);
1252     uint32_t bits = tsr_bits;
1253     struct kvm_one_reg reg = {
1254         .id = KVM_REG_PPC_OR_TSR,
1255         .addr = (uintptr_t) &bits,
1256     };
1257
1258     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1259 }
1260
1261 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1262 {
1263
1264     CPUState *cs = CPU(cpu);
1265     uint32_t bits = tsr_bits;
1266     struct kvm_one_reg reg = {
1267         .id = KVM_REG_PPC_CLEAR_TSR,
1268         .addr = (uintptr_t) &bits,
1269     };
1270
1271     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1272 }
1273
1274 int kvmppc_set_tcr(PowerPCCPU *cpu)
1275 {
1276     CPUState *cs = CPU(cpu);
1277     CPUPPCState *env = &cpu->env;
1278     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1279
1280     struct kvm_one_reg reg = {
1281         .id = KVM_REG_PPC_TCR,
1282         .addr = (uintptr_t) &tcr,
1283     };
1284
1285     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1286 }
1287
1288 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1289 {
1290     CPUState *cs = CPU(cpu);
1291     int ret;
1292
1293     if (!kvm_enabled()) {
1294         return -1;
1295     }
1296
1297     if (!cap_ppc_watchdog) {
1298         printf("warning: KVM does not support watchdog");
1299         return -1;
1300     }
1301
1302     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1303     if (ret < 0) {
1304         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1305                 __func__, strerror(-ret));
1306         return ret;
1307     }
1308
1309     return ret;
1310 }
1311
1312 static int read_cpuinfo(const char *field, char *value, int len)
1313 {
1314     FILE *f;
1315     int ret = -1;
1316     int field_len = strlen(field);
1317     char line[512];
1318
1319     f = fopen("/proc/cpuinfo", "r");
1320     if (!f) {
1321         return -1;
1322     }
1323
1324     do {
1325         if(!fgets(line, sizeof(line), f)) {
1326             break;
1327         }
1328         if (!strncmp(line, field, field_len)) {
1329             pstrcpy(value, len, line);
1330             ret = 0;
1331             break;
1332         }
1333     } while(*line);
1334
1335     fclose(f);
1336
1337     return ret;
1338 }
1339
1340 uint32_t kvmppc_get_tbfreq(void)
1341 {
1342     char line[512];
1343     char *ns;
1344     uint32_t retval = get_ticks_per_sec();
1345
1346     if (read_cpuinfo("timebase", line, sizeof(line))) {
1347         return retval;
1348     }
1349
1350     if (!(ns = strchr(line, ':'))) {
1351         return retval;
1352     }
1353
1354     ns++;
1355
1356     retval = atoi(ns);
1357     return retval;
1358 }
1359
1360 /* Try to find a device tree node for a CPU with clock-frequency property */
1361 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1362 {
1363     struct dirent *dirp;
1364     DIR *dp;
1365
1366     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1367         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1368         return -1;
1369     }
1370
1371     buf[0] = '\0';
1372     while ((dirp = readdir(dp)) != NULL) {
1373         FILE *f;
1374         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1375                  dirp->d_name);
1376         f = fopen(buf, "r");
1377         if (f) {
1378             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1379             fclose(f);
1380             break;
1381         }
1382         buf[0] = '\0';
1383     }
1384     closedir(dp);
1385     if (buf[0] == '\0') {
1386         printf("Unknown host!\n");
1387         return -1;
1388     }
1389
1390     return 0;
1391 }
1392
1393 /* Read a CPU node property from the host device tree that's a single
1394  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1395  * (can't find or open the property, or doesn't understand the
1396  * format) */
1397 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1398 {
1399     char buf[PATH_MAX];
1400     union {
1401         uint32_t v32;
1402         uint64_t v64;
1403     } u;
1404     FILE *f;
1405     int len;
1406
1407     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1408         return -1;
1409     }
1410
1411     strncat(buf, "/", sizeof(buf) - strlen(buf));
1412     strncat(buf, propname, sizeof(buf) - strlen(buf));
1413
1414     f = fopen(buf, "rb");
1415     if (!f) {
1416         return -1;
1417     }
1418
1419     len = fread(&u, 1, sizeof(u), f);
1420     fclose(f);
1421     switch (len) {
1422     case 4:
1423         /* property is a 32-bit quantity */
1424         return be32_to_cpu(u.v32);
1425     case 8:
1426         return be64_to_cpu(u.v64);
1427     }
1428
1429     return 0;
1430 }
1431
1432 uint64_t kvmppc_get_clockfreq(void)
1433 {
1434     return kvmppc_read_int_cpu_dt("clock-frequency");
1435 }
1436
1437 uint32_t kvmppc_get_vmx(void)
1438 {
1439     return kvmppc_read_int_cpu_dt("ibm,vmx");
1440 }
1441
1442 uint32_t kvmppc_get_dfp(void)
1443 {
1444     return kvmppc_read_int_cpu_dt("ibm,dfp");
1445 }
1446
1447 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1448  {
1449      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1450      CPUState *cs = CPU(cpu);
1451
1452     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1453         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1454         return 0;
1455     }
1456
1457     return 1;
1458 }
1459
1460 int kvmppc_get_hasidle(CPUPPCState *env)
1461 {
1462     struct kvm_ppc_pvinfo pvinfo;
1463
1464     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1465         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1466         return 1;
1467     }
1468
1469     return 0;
1470 }
1471
1472 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1473 {
1474     uint32_t *hc = (uint32_t*)buf;
1475     struct kvm_ppc_pvinfo pvinfo;
1476
1477     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1478         memcpy(buf, pvinfo.hcall, buf_len);
1479         return 0;
1480     }
1481
1482     /*
1483      * Fallback to always fail hypercalls:
1484      *
1485      *     li r3, -1
1486      *     nop
1487      *     nop
1488      *     nop
1489      */
1490
1491     hc[0] = 0x3860ffff;
1492     hc[1] = 0x60000000;
1493     hc[2] = 0x60000000;
1494     hc[3] = 0x60000000;
1495
1496     return 0;
1497 }
1498
1499 void kvmppc_set_papr(PowerPCCPU *cpu)
1500 {
1501     CPUState *cs = CPU(cpu);
1502     int ret;
1503
1504     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1505     if (ret) {
1506         cpu_abort(cs, "This KVM version does not support PAPR\n");
1507     }
1508
1509     /* Update the capability flag so we sync the right information
1510      * with kvm */
1511     cap_papr = 1;
1512 }
1513
1514 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1515 {
1516     CPUState *cs = CPU(cpu);
1517     int ret;
1518
1519     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1520     if (ret && mpic_proxy) {
1521         cpu_abort(cs, "This KVM version does not support EPR\n");
1522     }
1523 }
1524
1525 int kvmppc_smt_threads(void)
1526 {
1527     return cap_ppc_smt ? cap_ppc_smt : 1;
1528 }
1529
1530 #ifdef TARGET_PPC64
1531 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1532 {
1533     void *rma;
1534     off_t size;
1535     int fd;
1536     struct kvm_allocate_rma ret;
1537     MemoryRegion *rma_region;
1538
1539     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1540      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1541      *                      not necessary on this hardware
1542      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1543      *
1544      * FIXME: We should allow the user to force contiguous RMA
1545      * allocation in the cap_ppc_rma==1 case.
1546      */
1547     if (cap_ppc_rma < 2) {
1548         return 0;
1549     }
1550
1551     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1552     if (fd < 0) {
1553         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1554                 strerror(errno));
1555         return -1;
1556     }
1557
1558     size = MIN(ret.rma_size, 256ul << 20);
1559
1560     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1561     if (rma == MAP_FAILED) {
1562         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1563         return -1;
1564     };
1565
1566     rma_region = g_new(MemoryRegion, 1);
1567     memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1568     vmstate_register_ram_global(rma_region);
1569     memory_region_add_subregion(sysmem, 0, rma_region);
1570
1571     return size;
1572 }
1573
1574 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1575 {
1576     struct kvm_ppc_smmu_info info;
1577     long rampagesize, best_page_shift;
1578     int i;
1579
1580     if (cap_ppc_rma >= 2) {
1581         return current_size;
1582     }
1583
1584     /* Find the largest hardware supported page size that's less than
1585      * or equal to the (logical) backing page size of guest RAM */
1586     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1587     rampagesize = getrampagesize();
1588     best_page_shift = 0;
1589
1590     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1591         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1592
1593         if (!sps->page_shift) {
1594             continue;
1595         }
1596
1597         if ((sps->page_shift > best_page_shift)
1598             && ((1UL << sps->page_shift) <= rampagesize)) {
1599             best_page_shift = sps->page_shift;
1600         }
1601     }
1602
1603     return MIN(current_size,
1604                1ULL << (best_page_shift + hash_shift - 7));
1605 }
1606 #endif
1607
1608 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1609 {
1610     struct kvm_create_spapr_tce args = {
1611         .liobn = liobn,
1612         .window_size = window_size,
1613     };
1614     long len;
1615     int fd;
1616     void *table;
1617
1618     /* Must set fd to -1 so we don't try to munmap when called for
1619      * destroying the table, which the upper layers -will- do
1620      */
1621     *pfd = -1;
1622     if (!cap_spapr_tce) {
1623         return NULL;
1624     }
1625
1626     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1627     if (fd < 0) {
1628         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1629                 liobn);
1630         return NULL;
1631     }
1632
1633     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1634     /* FIXME: round this up to page size */
1635
1636     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1637     if (table == MAP_FAILED) {
1638         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1639                 liobn);
1640         close(fd);
1641         return NULL;
1642     }
1643
1644     *pfd = fd;
1645     return table;
1646 }
1647
1648 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1649 {
1650     long len;
1651
1652     if (fd < 0) {
1653         return -1;
1654     }
1655
1656     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1657     if ((munmap(table, len) < 0) ||
1658         (close(fd) < 0)) {
1659         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1660                 strerror(errno));
1661         /* Leak the table */
1662     }
1663
1664     return 0;
1665 }
1666
1667 int kvmppc_reset_htab(int shift_hint)
1668 {
1669     uint32_t shift = shift_hint;
1670
1671     if (!kvm_enabled()) {
1672         /* Full emulation, tell caller to allocate htab itself */
1673         return 0;
1674     }
1675     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1676         int ret;
1677         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1678         if (ret == -ENOTTY) {
1679             /* At least some versions of PR KVM advertise the
1680              * capability, but don't implement the ioctl().  Oops.
1681              * Return 0 so that we allocate the htab in qemu, as is
1682              * correct for PR. */
1683             return 0;
1684         } else if (ret < 0) {
1685             return ret;
1686         }
1687         return shift;
1688     }
1689
1690     /* We have a kernel that predates the htab reset calls.  For PR
1691      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1692      * this era, it has allocated a 16MB fixed size hash table
1693      * already.  Kernels of this era have the GET_PVINFO capability
1694      * only on PR, so we use this hack to determine the right
1695      * answer */
1696     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1697         /* PR - tell caller to allocate htab */
1698         return 0;
1699     } else {
1700         /* HV - assume 16MB kernel allocated htab */
1701         return 24;
1702     }
1703 }
1704
1705 static inline uint32_t mfpvr(void)
1706 {
1707     uint32_t pvr;
1708
1709     asm ("mfpvr %0"
1710          : "=r"(pvr));
1711     return pvr;
1712 }
1713
1714 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1715 {
1716     if (on) {
1717         *word |= flags;
1718     } else {
1719         *word &= ~flags;
1720     }
1721 }
1722
1723 static void kvmppc_host_cpu_initfn(Object *obj)
1724 {
1725     assert(kvm_enabled());
1726 }
1727
1728 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1729 {
1730     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1731     uint32_t vmx = kvmppc_get_vmx();
1732     uint32_t dfp = kvmppc_get_dfp();
1733     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1734     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1735
1736     /* Now fix up the class with information we can query from the host */
1737     pcc->pvr = mfpvr();
1738
1739     if (vmx != -1) {
1740         /* Only override when we know what the host supports */
1741         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1742         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1743     }
1744     if (dfp != -1) {
1745         /* Only override when we know what the host supports */
1746         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1747     }
1748
1749     if (dcache_size != -1) {
1750         pcc->l1_dcache_size = dcache_size;
1751     }
1752
1753     if (icache_size != -1) {
1754         pcc->l1_icache_size = icache_size;
1755     }
1756 }
1757
1758 bool kvmppc_has_cap_epr(void)
1759 {
1760     return cap_epr;
1761 }
1762
1763 bool kvmppc_has_cap_htab_fd(void)
1764 {
1765     return cap_htab_fd;
1766 }
1767
1768 static int kvm_ppc_register_host_cpu_type(void)
1769 {
1770     TypeInfo type_info = {
1771         .name = TYPE_HOST_POWERPC_CPU,
1772         .instance_init = kvmppc_host_cpu_initfn,
1773         .class_init = kvmppc_host_cpu_class_init,
1774     };
1775     uint32_t host_pvr = mfpvr();
1776     PowerPCCPUClass *pvr_pcc;
1777
1778     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1779     if (pvr_pcc == NULL) {
1780         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1781     }
1782     if (pvr_pcc == NULL) {
1783         return -1;
1784     }
1785     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1786     type_register(&type_info);
1787     return 0;
1788 }
1789
1790 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1791 {
1792     struct kvm_rtas_token_args args = {
1793         .token = token,
1794     };
1795
1796     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1797         return -ENOENT;
1798     }
1799
1800     strncpy(args.name, function, sizeof(args.name));
1801
1802     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1803 }
1804
1805 int kvmppc_get_htab_fd(bool write)
1806 {
1807     struct kvm_get_htab_fd s = {
1808         .flags = write ? KVM_GET_HTAB_WRITE : 0,
1809         .start_index = 0,
1810     };
1811
1812     if (!cap_htab_fd) {
1813         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1814         return -1;
1815     }
1816
1817     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1818 }
1819
1820 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1821 {
1822     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1823     uint8_t buf[bufsize];
1824     ssize_t rc;
1825
1826     do {
1827         rc = read(fd, buf, bufsize);
1828         if (rc < 0) {
1829             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1830                     strerror(errno));
1831             return rc;
1832         } else if (rc) {
1833             /* Kernel already retuns data in BE format for the file */
1834             qemu_put_buffer(f, buf, rc);
1835         }
1836     } while ((rc != 0)
1837              && ((max_ns < 0)
1838                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1839
1840     return (rc == 0) ? 1 : 0;
1841 }
1842
1843 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1844                            uint16_t n_valid, uint16_t n_invalid)
1845 {
1846     struct kvm_get_htab_header *buf;
1847     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1848     ssize_t rc;
1849
1850     buf = alloca(chunksize);
1851     /* This is KVM on ppc, so this is all big-endian */
1852     buf->index = index;
1853     buf->n_valid = n_valid;
1854     buf->n_invalid = n_invalid;
1855
1856     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1857
1858     rc = write(fd, buf, chunksize);
1859     if (rc < 0) {
1860         fprintf(stderr, "Error writing KVM hash table: %s\n",
1861                 strerror(errno));
1862         return rc;
1863     }
1864     if (rc != chunksize) {
1865         /* We should never get a short write on a single chunk */
1866         fprintf(stderr, "Short write, restoring KVM hash table\n");
1867         return -1;
1868     }
1869     return 0;
1870 }
1871
1872 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1873 {
1874     return true;
1875 }
1876
1877 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1878 {
1879     return 1;
1880 }
1881
1882 int kvm_arch_on_sigbus(int code, void *addr)
1883 {
1884     return 1;
1885 }
1886
1887 void kvm_arch_init_irq_routing(KVMState *s)
1888 {
1889 }
1890
1891 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1892 {
1893     return -EINVAL;
1894 }
1895
1896 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1897 {
1898     return -EINVAL;
1899 }
1900
1901 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1902 {
1903     return -EINVAL;
1904 }
1905
1906 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1907 {
1908     return -EINVAL;
1909 }
1910
1911 void kvm_arch_remove_all_hw_breakpoints(void)
1912 {
1913 }
1914
1915 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1916 {
1917 }
1918
1919 struct kvm_get_htab_buf {
1920     struct kvm_get_htab_header header;
1921     /*
1922      * We require one extra byte for read
1923      */
1924     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1925 };
1926
1927 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1928 {
1929     int htab_fd;
1930     struct kvm_get_htab_fd ghf;
1931     struct kvm_get_htab_buf  *hpte_buf;
1932
1933     ghf.flags = 0;
1934     ghf.start_index = pte_index;
1935     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1936     if (htab_fd < 0) {
1937         goto error_out;
1938     }
1939
1940     hpte_buf = g_malloc0(sizeof(*hpte_buf));
1941     /*
1942      * Read the hpte group
1943      */
1944     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1945         goto out_close;
1946     }
1947
1948     close(htab_fd);
1949     return (uint64_t)(uintptr_t) hpte_buf->hpte;
1950
1951 out_close:
1952     g_free(hpte_buf);
1953     close(htab_fd);
1954 error_out:
1955     return 0;
1956 }
1957
1958 void kvmppc_hash64_free_pteg(uint64_t token)
1959 {
1960     struct kvm_get_htab_buf *htab_buf;
1961
1962     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1963                             hpte);
1964     g_free(htab_buf);
1965     return;
1966 }
1967
1968 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1969                              target_ulong pte0, target_ulong pte1)
1970 {
1971     int htab_fd;
1972     struct kvm_get_htab_fd ghf;
1973     struct kvm_get_htab_buf hpte_buf;
1974
1975     ghf.flags = 0;
1976     ghf.start_index = 0;     /* Ignored */
1977     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1978     if (htab_fd < 0) {
1979         goto error_out;
1980     }
1981
1982     hpte_buf.header.n_valid = 1;
1983     hpte_buf.header.n_invalid = 0;
1984     hpte_buf.header.index = pte_index;
1985     hpte_buf.hpte[0] = pte0;
1986     hpte_buf.hpte[1] = pte1;
1987     /*
1988      * Write the hpte entry.
1989      * CAUTION: write() has the warn_unused_result attribute. Hence we
1990      * need to check the return value, even though we do nothing.
1991      */
1992     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
1993         goto out_close;
1994     }
1995
1996 out_close:
1997     close(htab_fd);
1998     return;
1999
2000 error_out:
2001     return;
2002 }