target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "sysemu/watchdog.h"
  39 #include "trace.h"
  40
  41 //#define DEBUG_KVM
  42
  43 #ifdef DEBUG_KVM
  44 #define DPRINTF(fmt, ...) \
  45     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  46 #else
  47 #define DPRINTF(fmt, ...) \
  48     do { } while (0)
  49 #endif
  50
  51 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  52
  53 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  54     KVM_CAP_LAST_INFO
  55 };
  56
  57 static int cap_interrupt_unset = false;
  58 static int cap_interrupt_level = false;
  59 static int cap_segstate;
  60 static int cap_booke_sregs;
  61 static int cap_ppc_smt;
  62 static int cap_ppc_rma;
  63 static int cap_spapr_tce;
  64 static int cap_hior;
  65 static int cap_one_reg;
  66 static int cap_epr;
  67 static int cap_ppc_watchdog;
  68 static int cap_papr;
  69 static int cap_htab_fd;
  70
  71 /* XXX We have a race condition where we actually have a level triggered
  72  *     interrupt, but the infrastructure can't expose that yet, so the guest
  73  *     takes but ignores it, goes to sleep and never gets notified that there's
  74  *     still an interrupt pending.
  75  *
  76  *     As a quick workaround, let's just wake up again 20 ms after we injected
  77  *     an interrupt. That way we can assure that we're always reinjecting
  78  *     interrupts in case the guest swallowed them.
  79  */
  80 static QEMUTimer *idle_timer;
  81
  82 static void kvm_kick_cpu(void *opaque)
  83 {
  84     PowerPCCPU *cpu = opaque;
  85
  86     qemu_cpu_kick(CPU(cpu));
  87 }
  88
  89 static int kvm_ppc_register_host_cpu_type(void);
  90
  91 int kvm_arch_init(KVMState *s)
  92 {
  93     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  94     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  95     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  96     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  97     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
  98     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
  99     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 100     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 101     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 102     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 103     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 104     /* Note: we don't set cap_papr here, because this capability is
 105      * only activated after this by kvmppc_set_papr() */
 106     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 107
 108     if (!cap_interrupt_level) {
 109         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 110                         "VM to stall at times!\n");
 111     }
 112
 113     kvm_ppc_register_host_cpu_type();
 114
 115     return 0;
 116 }
 117
 118 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 119 {
 120     CPUPPCState *cenv = &cpu->env;
 121     CPUState *cs = CPU(cpu);
 122     struct kvm_sregs sregs;
 123     int ret;
 124
 125     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 126         /* What we're really trying to say is "if we're on BookE, we use
 127            the native PVR for now". This is the only sane way to check
 128            it though, so we potentially confuse users that they can run
 129            BookE guests on BookS. Let's hope nobody dares enough :) */
 130         return 0;
 131     } else {
 132         if (!cap_segstate) {
 133             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 134             return -ENOSYS;
 135         }
 136     }
 137
 138     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 139     if (ret) {
 140         return ret;
 141     }
 142
 143     sregs.pvr = cenv->spr[SPR_PVR];
 144     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 145 }
 146
 147 /* Set up a shared TLB array with KVM */
 148 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 149 {
 150     CPUPPCState *env = &cpu->env;
 151     CPUState *cs = CPU(cpu);
 152     struct kvm_book3e_206_tlb_params params = {};
 153     struct kvm_config_tlb cfg = {};
 154     struct kvm_enable_cap encap = {};
 155     unsigned int entries = 0;
 156     int ret, i;
 157
 158     if (!kvm_enabled() ||
 159         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 160         return 0;
 161     }
 162
 163     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 164
 165     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 166         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 167         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 168         entries += params.tlb_sizes[i];
 169     }
 170
 171     assert(entries == env->nb_tlb);
 172     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 173
 174     env->tlb_dirty = true;
 175
 176     cfg.array = (uintptr_t)env->tlb.tlbm;
 177     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 178     cfg.params = (uintptr_t)&params;
 179     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 180
 181     encap.cap = KVM_CAP_SW_TLB;
 182     encap.args[0] = (uintptr_t)&cfg;
 183
 184     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
 185     if (ret < 0) {
 186         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 187                 __func__, strerror(-ret));
 188         return ret;
 189     }
 190
 191     env->kvm_sw_tlb = true;
 192     return 0;
 193 }
 194
 195
 196 #if defined(TARGET_PPC64)
 197 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 198                                        struct kvm_ppc_smmu_info *info)
 199 {
 200     CPUPPCState *env = &cpu->env;
 201     CPUState *cs = CPU(cpu);
 202
 203     memset(info, 0, sizeof(*info));
 204
 205     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 206      * need to "guess" what the supported page sizes are.
 207      *
 208      * For that to work we make a few assumptions:
 209      *
 210      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 211      *   KVM which only supports 4K and 16M pages, but supports them
 212      *   regardless of the backing store characteritics. We also don't
 213      *   support 1T segments.
 214      *
 215      *   This is safe as if HV KVM ever supports that capability or PR
 216      *   KVM grows supports for more page/segment sizes, those versions
 217      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 218      *   will not hit this fallback
 219      *
 220      * - Else we are running HV KVM. This means we only support page
 221      *   sizes that fit in the backing store. Additionally we only
 222      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 223      *   P7 encodings for the SLB and hash table. Here too, we assume
 224      *   support for any newer processor will mean a kernel that
 225      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 226      *   this fallback.
 227      */
 228     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 229         /* No flags */
 230         info->flags = 0;
 231         info->slb_size = 64;
 232
 233         /* Standard 4k base page size segment */
 234         info->sps[0].page_shift = 12;
 235         info->sps[0].slb_enc = 0;
 236         info->sps[0].enc[0].page_shift = 12;
 237         info->sps[0].enc[0].pte_enc = 0;
 238
 239         /* Standard 16M large page size segment */
 240         info->sps[1].page_shift = 24;
 241         info->sps[1].slb_enc = SLB_VSID_L;
 242         info->sps[1].enc[0].page_shift = 24;
 243         info->sps[1].enc[0].pte_enc = 0;
 244     } else {
 245         int i = 0;
 246
 247         /* HV KVM has backing store size restrictions */
 248         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 249
 250         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 251             info->flags |= KVM_PPC_1T_SEGMENTS;
 252         }
 253
 254         if (env->mmu_model == POWERPC_MMU_2_06) {
 255             info->slb_size = 32;
 256         } else {
 257             info->slb_size = 64;
 258         }
 259
 260         /* Standard 4k base page size segment */
 261         info->sps[i].page_shift = 12;
 262         info->sps[i].slb_enc = 0;
 263         info->sps[i].enc[0].page_shift = 12;
 264         info->sps[i].enc[0].pte_enc = 0;
 265         i++;
 266
 267         /* 64K on MMU 2.06 */
 268         if (env->mmu_model == POWERPC_MMU_2_06) {
 269             info->sps[i].page_shift = 16;
 270             info->sps[i].slb_enc = 0x110;
 271             info->sps[i].enc[0].page_shift = 16;
 272             info->sps[i].enc[0].pte_enc = 1;
 273             i++;
 274         }
 275
 276         /* Standard 16M large page size segment */
 277         info->sps[i].page_shift = 24;
 278         info->sps[i].slb_enc = SLB_VSID_L;
 279         info->sps[i].enc[0].page_shift = 24;
 280         info->sps[i].enc[0].pte_enc = 0;
 281     }
 282 }
 283
 284 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 285 {
 286     CPUState *cs = CPU(cpu);
 287     int ret;
 288
 289     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 290         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 291         if (ret == 0) {
 292             return;
 293         }
 294     }
 295
 296     kvm_get_fallback_smmu_info(cpu, info);
 297 }
 298
 299 static long getrampagesize(void)
 300 {
 301     struct statfs fs;
 302     int ret;
 303
 304     if (!mem_path) {
 305         /* guest RAM is backed by normal anonymous pages */
 306         return getpagesize();
 307     }
 308
 309     do {
 310         ret = statfs(mem_path, &fs);
 311     } while (ret != 0 && errno == EINTR);
 312
 313     if (ret != 0) {
 314         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 315                 strerror(errno));
 316         exit(1);
 317     }
 318
 319 #define HUGETLBFS_MAGIC       0x958458f6
 320
 321     if (fs.f_type != HUGETLBFS_MAGIC) {
 322         /* Explicit mempath, but it's ordinary pages */
 323         return getpagesize();
 324     }
 325
 326     /* It's hugepage, return the huge page size */
 327     return fs.f_bsize;
 328 }
 329
 330 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 331 {
 332     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 333         return true;
 334     }
 335
 336     return (1ul << shift) <= rampgsize;
 337 }
 338
 339 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 340 {
 341     static struct kvm_ppc_smmu_info smmu_info;
 342     static bool has_smmu_info;
 343     CPUPPCState *env = &cpu->env;
 344     long rampagesize;
 345     int iq, ik, jq, jk;
 346
 347     /* We only handle page sizes for 64-bit server guests for now */
 348     if (!(env->mmu_model & POWERPC_MMU_64)) {
 349         return;
 350     }
 351
 352     /* Collect MMU info from kernel if not already */
 353     if (!has_smmu_info) {
 354         kvm_get_smmu_info(cpu, &smmu_info);
 355         has_smmu_info = true;
 356     }
 357
 358     rampagesize = getrampagesize();
 359
 360     /* Convert to QEMU form */
 361     memset(&env->sps, 0, sizeof(env->sps));
 362
 363     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 364         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 365         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 366
 367         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 368                                  ksps->page_shift)) {
 369             continue;
 370         }
 371         qsps->page_shift = ksps->page_shift;
 372         qsps->slb_enc = ksps->slb_enc;
 373         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 374             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 375                                      ksps->enc[jk].page_shift)) {
 376                 continue;
 377             }
 378             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 379             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 380             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 381                 break;
 382             }
 383         }
 384         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 385             break;
 386         }
 387     }
 388     env->slb_nr = smmu_info.slb_size;
 389     if (smmu_info.flags & KVM_PPC_1T_SEGMENTS) {
 390         env->mmu_model |= POWERPC_MMU_1TSEG;
 391     } else {
 392         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 393     }
 394 }
 395 #else /* defined (TARGET_PPC64) */
 396
 397 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 398 {
 399 }
 400
 401 #endif /* !defined (TARGET_PPC64) */
 402
 403 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 404 {
 405     return cpu->cpu_index;
 406 }
 407
 408 int kvm_arch_init_vcpu(CPUState *cs)
 409 {
 410     PowerPCCPU *cpu = POWERPC_CPU(cs);
 411     CPUPPCState *cenv = &cpu->env;
 412     int ret;
 413
 414     /* Gather server mmu info from KVM and update the CPU state */
 415     kvm_fixup_page_sizes(cpu);
 416
 417     /* Synchronize sregs with kvm */
 418     ret = kvm_arch_sync_sregs(cpu);
 419     if (ret) {
 420         return ret;
 421     }
 422
 423     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 424
 425     /* Some targets support access to KVM's guest TLB. */
 426     switch (cenv->mmu_model) {
 427     case POWERPC_MMU_BOOKE206:
 428         ret = kvm_booke206_tlb_init(cpu);
 429         break;
 430     default:
 431         break;
 432     }
 433
 434     return ret;
 435 }
 436
 437 void kvm_arch_reset_vcpu(CPUState *cpu)
 438 {
 439 }
 440
 441 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 442 {
 443     CPUPPCState *env = &cpu->env;
 444     CPUState *cs = CPU(cpu);
 445     struct kvm_dirty_tlb dirty_tlb;
 446     unsigned char *bitmap;
 447     int ret;
 448
 449     if (!env->kvm_sw_tlb) {
 450         return;
 451     }
 452
 453     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 454     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 455
 456     dirty_tlb.bitmap = (uintptr_t)bitmap;
 457     dirty_tlb.num_dirty = env->nb_tlb;
 458
 459     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 460     if (ret) {
 461         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 462                 __func__, strerror(-ret));
 463     }
 464
 465     g_free(bitmap);
 466 }
 467
 468 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 469 {
 470     PowerPCCPU *cpu = POWERPC_CPU(cs);
 471     CPUPPCState *env = &cpu->env;
 472     union {
 473         uint32_t u32;
 474         uint64_t u64;
 475     } val;
 476     struct kvm_one_reg reg = {
 477         .id = id,
 478         .addr = (uintptr_t) &val,
 479     };
 480     int ret;
 481
 482     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 483     if (ret != 0) {
 484         trace_kvm_failed_spr_get(spr, strerror(errno));
 485     } else {
 486         switch (id & KVM_REG_SIZE_MASK) {
 487         case KVM_REG_SIZE_U32:
 488             env->spr[spr] = val.u32;
 489             break;
 490
 491         case KVM_REG_SIZE_U64:
 492             env->spr[spr] = val.u64;
 493             break;
 494
 495         default:
 496             /* Don't handle this size yet */
 497             abort();
 498         }
 499     }
 500 }
 501
 502 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 503 {
 504     PowerPCCPU *cpu = POWERPC_CPU(cs);
 505     CPUPPCState *env = &cpu->env;
 506     union {
 507         uint32_t u32;
 508         uint64_t u64;
 509     } val;
 510     struct kvm_one_reg reg = {
 511         .id = id,
 512         .addr = (uintptr_t) &val,
 513     };
 514     int ret;
 515
 516     switch (id & KVM_REG_SIZE_MASK) {
 517     case KVM_REG_SIZE_U32:
 518         val.u32 = env->spr[spr];
 519         break;
 520
 521     case KVM_REG_SIZE_U64:
 522         val.u64 = env->spr[spr];
 523         break;
 524
 525     default:
 526         /* Don't handle this size yet */
 527         abort();
 528     }
 529
 530     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 531     if (ret != 0) {
 532         trace_kvm_failed_spr_set(spr, strerror(errno));
 533     }
 534 }
 535
 536 static int kvm_put_fp(CPUState *cs)
 537 {
 538     PowerPCCPU *cpu = POWERPC_CPU(cs);
 539     CPUPPCState *env = &cpu->env;
 540     struct kvm_one_reg reg;
 541     int i;
 542     int ret;
 543
 544     if (env->insns_flags & PPC_FLOAT) {
 545         uint64_t fpscr = env->fpscr;
 546         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 547
 548         reg.id = KVM_REG_PPC_FPSCR;
 549         reg.addr = (uintptr_t)&fpscr;
 550         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 551         if (ret < 0) {
 552             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 553             return ret;
 554         }
 555
 556         for (i = 0; i < 32; i++) {
 557             uint64_t vsr[2];
 558
 559             vsr[0] = float64_val(env->fpr[i]);
 560             vsr[1] = env->vsr[i];
 561             reg.addr = (uintptr_t) &vsr;
 562             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 563
 564             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 565             if (ret < 0) {
 566                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 567                         i, strerror(errno));
 568                 return ret;
 569             }
 570         }
 571     }
 572
 573     if (env->insns_flags & PPC_ALTIVEC) {
 574         reg.id = KVM_REG_PPC_VSCR;
 575         reg.addr = (uintptr_t)&env->vscr;
 576         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 577         if (ret < 0) {
 578             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 579             return ret;
 580         }
 581
 582         for (i = 0; i < 32; i++) {
 583             reg.id = KVM_REG_PPC_VR(i);
 584             reg.addr = (uintptr_t)&env->avr[i];
 585             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 586             if (ret < 0) {
 587                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 588                 return ret;
 589             }
 590         }
 591     }
 592
 593     return 0;
 594 }
 595
 596 static int kvm_get_fp(CPUState *cs)
 597 {
 598     PowerPCCPU *cpu = POWERPC_CPU(cs);
 599     CPUPPCState *env = &cpu->env;
 600     struct kvm_one_reg reg;
 601     int i;
 602     int ret;
 603
 604     if (env->insns_flags & PPC_FLOAT) {
 605         uint64_t fpscr;
 606         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 607
 608         reg.id = KVM_REG_PPC_FPSCR;
 609         reg.addr = (uintptr_t)&fpscr;
 610         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 611         if (ret < 0) {
 612             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 613             return ret;
 614         } else {
 615             env->fpscr = fpscr;
 616         }
 617
 618         for (i = 0; i < 32; i++) {
 619             uint64_t vsr[2];
 620
 621             reg.addr = (uintptr_t) &vsr;
 622             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 623
 624             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 625             if (ret < 0) {
 626                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 627                         vsx ? "VSR" : "FPR", i, strerror(errno));
 628                 return ret;
 629             } else {
 630                 env->fpr[i] = vsr[0];
 631                 if (vsx) {
 632                     env->vsr[i] = vsr[1];
 633                 }
 634             }
 635         }
 636     }
 637
 638     if (env->insns_flags & PPC_ALTIVEC) {
 639         reg.id = KVM_REG_PPC_VSCR;
 640         reg.addr = (uintptr_t)&env->vscr;
 641         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 642         if (ret < 0) {
 643             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 644             return ret;
 645         }
 646
 647         for (i = 0; i < 32; i++) {
 648             reg.id = KVM_REG_PPC_VR(i);
 649             reg.addr = (uintptr_t)&env->avr[i];
 650             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 651             if (ret < 0) {
 652                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 653                         i, strerror(errno));
 654                 return ret;
 655             }
 656         }
 657     }
 658
 659     return 0;
 660 }
 661
 662 #if defined(TARGET_PPC64)
 663 static int kvm_get_vpa(CPUState *cs)
 664 {
 665     PowerPCCPU *cpu = POWERPC_CPU(cs);
 666     CPUPPCState *env = &cpu->env;
 667     struct kvm_one_reg reg;
 668     int ret;
 669
 670     reg.id = KVM_REG_PPC_VPA_ADDR;
 671     reg.addr = (uintptr_t)&env->vpa_addr;
 672     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 673     if (ret < 0) {
 674         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 675         return ret;
 676     }
 677
 678     assert((uintptr_t)&env->slb_shadow_size
 679            == ((uintptr_t)&env->slb_shadow_addr + 8));
 680     reg.id = KVM_REG_PPC_VPA_SLB;
 681     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 682     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 683     if (ret < 0) {
 684         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 685                 strerror(errno));
 686         return ret;
 687     }
 688
 689     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 690     reg.id = KVM_REG_PPC_VPA_DTL;
 691     reg.addr = (uintptr_t)&env->dtl_addr;
 692     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 693     if (ret < 0) {
 694         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 695                 strerror(errno));
 696         return ret;
 697     }
 698
 699     return 0;
 700 }
 701
 702 static int kvm_put_vpa(CPUState *cs)
 703 {
 704     PowerPCCPU *cpu = POWERPC_CPU(cs);
 705     CPUPPCState *env = &cpu->env;
 706     struct kvm_one_reg reg;
 707     int ret;
 708
 709     /* SLB shadow or DTL can't be registered unless a master VPA is
 710      * registered.  That means when restoring state, if a VPA *is*
 711      * registered, we need to set that up first.  If not, we need to
 712      * deregister the others before deregistering the master VPA */
 713     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 714
 715     if (env->vpa_addr) {
 716         reg.id = KVM_REG_PPC_VPA_ADDR;
 717         reg.addr = (uintptr_t)&env->vpa_addr;
 718         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 719         if (ret < 0) {
 720             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 721             return ret;
 722         }
 723     }
 724
 725     assert((uintptr_t)&env->slb_shadow_size
 726            == ((uintptr_t)&env->slb_shadow_addr + 8));
 727     reg.id = KVM_REG_PPC_VPA_SLB;
 728     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 729     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 730     if (ret < 0) {
 731         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 732         return ret;
 733     }
 734
 735     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 736     reg.id = KVM_REG_PPC_VPA_DTL;
 737     reg.addr = (uintptr_t)&env->dtl_addr;
 738     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 739     if (ret < 0) {
 740         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 741                 strerror(errno));
 742         return ret;
 743     }
 744
 745     if (!env->vpa_addr) {
 746         reg.id = KVM_REG_PPC_VPA_ADDR;
 747         reg.addr = (uintptr_t)&env->vpa_addr;
 748         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 749         if (ret < 0) {
 750             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 751             return ret;
 752         }
 753     }
 754
 755     return 0;
 756 }
 757 #endif /* TARGET_PPC64 */
 758
 759 int kvm_arch_put_registers(CPUState *cs, int level)
 760 {
 761     PowerPCCPU *cpu = POWERPC_CPU(cs);
 762     CPUPPCState *env = &cpu->env;
 763     struct kvm_regs regs;
 764     int ret;
 765     int i;
 766
 767     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 768     if (ret < 0) {
 769         return ret;
 770     }
 771
 772     regs.ctr = env->ctr;
 773     regs.lr  = env->lr;
 774     regs.xer = cpu_read_xer(env);
 775     regs.msr = env->msr;
 776     regs.pc = env->nip;
 777
 778     regs.srr0 = env->spr[SPR_SRR0];
 779     regs.srr1 = env->spr[SPR_SRR1];
 780
 781     regs.sprg0 = env->spr[SPR_SPRG0];
 782     regs.sprg1 = env->spr[SPR_SPRG1];
 783     regs.sprg2 = env->spr[SPR_SPRG2];
 784     regs.sprg3 = env->spr[SPR_SPRG3];
 785     regs.sprg4 = env->spr[SPR_SPRG4];
 786     regs.sprg5 = env->spr[SPR_SPRG5];
 787     regs.sprg6 = env->spr[SPR_SPRG6];
 788     regs.sprg7 = env->spr[SPR_SPRG7];
 789
 790     regs.pid = env->spr[SPR_BOOKE_PID];
 791
 792     for (i = 0;i < 32; i++)
 793         regs.gpr[i] = env->gpr[i];
 794
 795     regs.cr = 0;
 796     for (i = 0; i < 8; i++) {
 797         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 798     }
 799
 800     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 801     if (ret < 0)
 802         return ret;
 803
 804     kvm_put_fp(cs);
 805
 806     if (env->tlb_dirty) {
 807         kvm_sw_tlb_put(cpu);
 808         env->tlb_dirty = false;
 809     }
 810
 811     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 812         struct kvm_sregs sregs;
 813
 814         sregs.pvr = env->spr[SPR_PVR];
 815
 816         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 817
 818         /* Sync SLB */
 819 #ifdef TARGET_PPC64
 820         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 821             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 822             if (env->slb[i].esid & SLB_ESID_V) {
 823                 sregs.u.s.ppc64.slb[i].slbe |= i;
 824             }
 825             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 826         }
 827 #endif
 828
 829         /* Sync SRs */
 830         for (i = 0; i < 16; i++) {
 831             sregs.u.s.ppc32.sr[i] = env->sr[i];
 832         }
 833
 834         /* Sync BATs */
 835         for (i = 0; i < 8; i++) {
 836             /* Beware. We have to swap upper and lower bits here */
 837             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 838                 | env->DBAT[1][i];
 839             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 840                 | env->IBAT[1][i];
 841         }
 842
 843         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 844         if (ret) {
 845             return ret;
 846         }
 847     }
 848
 849     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 850         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 851     }
 852
 853     if (cap_one_reg) {
 854         int i;
 855
 856         /* We deliberately ignore errors here, for kernels which have
 857          * the ONE_REG calls, but don't support the specific
 858          * registers, there's a reasonable chance things will still
 859          * work, at least until we try to migrate. */
 860         for (i = 0; i < 1024; i++) {
 861             uint64_t id = env->spr_cb[i].one_reg_id;
 862
 863             if (id != 0) {
 864                 kvm_put_one_spr(cs, id, i);
 865             }
 866         }
 867
 868 #ifdef TARGET_PPC64
 869         if (cap_papr) {
 870             if (kvm_put_vpa(cs) < 0) {
 871                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 872             }
 873         }
 874 #endif /* TARGET_PPC64 */
 875     }
 876
 877     return ret;
 878 }
 879
 880 int kvm_arch_get_registers(CPUState *cs)
 881 {
 882     PowerPCCPU *cpu = POWERPC_CPU(cs);
 883     CPUPPCState *env = &cpu->env;
 884     struct kvm_regs regs;
 885     struct kvm_sregs sregs;
 886     uint32_t cr;
 887     int i, ret;
 888
 889     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 890     if (ret < 0)
 891         return ret;
 892
 893     cr = regs.cr;
 894     for (i = 7; i >= 0; i--) {
 895         env->crf[i] = cr & 15;
 896         cr >>= 4;
 897     }
 898
 899     env->ctr = regs.ctr;
 900     env->lr = regs.lr;
 901     cpu_write_xer(env, regs.xer);
 902     env->msr = regs.msr;
 903     env->nip = regs.pc;
 904
 905     env->spr[SPR_SRR0] = regs.srr0;
 906     env->spr[SPR_SRR1] = regs.srr1;
 907
 908     env->spr[SPR_SPRG0] = regs.sprg0;
 909     env->spr[SPR_SPRG1] = regs.sprg1;
 910     env->spr[SPR_SPRG2] = regs.sprg2;
 911     env->spr[SPR_SPRG3] = regs.sprg3;
 912     env->spr[SPR_SPRG4] = regs.sprg4;
 913     env->spr[SPR_SPRG5] = regs.sprg5;
 914     env->spr[SPR_SPRG6] = regs.sprg6;
 915     env->spr[SPR_SPRG7] = regs.sprg7;
 916
 917     env->spr[SPR_BOOKE_PID] = regs.pid;
 918
 919     for (i = 0;i < 32; i++)
 920         env->gpr[i] = regs.gpr[i];
 921
 922     kvm_get_fp(cs);
 923
 924     if (cap_booke_sregs) {
 925         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 926         if (ret < 0) {
 927             return ret;
 928         }
 929
 930         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 931             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 932             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 933             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 934             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 935             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 936             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 937             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 938             env->spr[SPR_DECR] = sregs.u.e.dec;
 939             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 940             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 941             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 942         }
 943
 944         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 945             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 946             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 947             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 948             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 949             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 950         }
 951
 952         if (sregs.u.e.features & KVM_SREGS_E_64) {
 953             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 954         }
 955
 956         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 957             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 958         }
 959
 960         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 961             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 962             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 963             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 964             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 965             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 966             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 967             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 968             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 969             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 970             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 971             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 972             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 973             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 974             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 975             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 976             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 977
 978             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 979                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 980                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 981                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 982             }
 983
 984             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 985                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 986             }
 987
 988             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 989                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 990                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 991             }
 992         }
 993
 994         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 995             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 996             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 997             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 998             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 999             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1000             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1001             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1002             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1003             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1004             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1005         }
1006
1007         if (sregs.u.e.features & KVM_SREGS_EXP) {
1008             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1009         }
1010
1011         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1012             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1013             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1014         }
1015
1016         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1017             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1018             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1019             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1020
1021             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1022                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1023                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1024             }
1025         }
1026     }
1027
1028     if (cap_segstate) {
1029         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1030         if (ret < 0) {
1031             return ret;
1032         }
1033
1034         if (!env->external_htab) {
1035             ppc_store_sdr1(env, sregs.u.s.sdr1);
1036         }
1037
1038         /* Sync SLB */
1039 #ifdef TARGET_PPC64
1040         /*
1041          * The packed SLB array we get from KVM_GET_SREGS only contains
1042          * information about valid entries. So we flush our internal
1043          * copy to get rid of stale ones, then put all valid SLB entries
1044          * back in.
1045          */
1046         memset(env->slb, 0, sizeof(env->slb));
1047         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1048             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1049             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1050             /*
1051              * Only restore valid entries
1052              */
1053             if (rb & SLB_ESID_V) {
1054                 ppc_store_slb(env, rb, rs);
1055             }
1056         }
1057 #endif
1058
1059         /* Sync SRs */
1060         for (i = 0; i < 16; i++) {
1061             env->sr[i] = sregs.u.s.ppc32.sr[i];
1062         }
1063
1064         /* Sync BATs */
1065         for (i = 0; i < 8; i++) {
1066             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1067             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1068             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1069             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1070         }
1071     }
1072
1073     if (cap_hior) {
1074         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1075     }
1076
1077     if (cap_one_reg) {
1078         int i;
1079
1080         /* We deliberately ignore errors here, for kernels which have
1081          * the ONE_REG calls, but don't support the specific
1082          * registers, there's a reasonable chance things will still
1083          * work, at least until we try to migrate. */
1084         for (i = 0; i < 1024; i++) {
1085             uint64_t id = env->spr_cb[i].one_reg_id;
1086
1087             if (id != 0) {
1088                 kvm_get_one_spr(cs, id, i);
1089             }
1090         }
1091
1092 #ifdef TARGET_PPC64
1093         if (cap_papr) {
1094             if (kvm_get_vpa(cs) < 0) {
1095                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1096             }
1097         }
1098 #endif
1099     }
1100
1101     return 0;
1102 }
1103
1104 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1105 {
1106     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1107
1108     if (irq != PPC_INTERRUPT_EXT) {
1109         return 0;
1110     }
1111
1112     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1113         return 0;
1114     }
1115
1116     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1117
1118     return 0;
1119 }
1120
1121 #if defined(TARGET_PPCEMB)
1122 #define PPC_INPUT_INT PPC40x_INPUT_INT
1123 #elif defined(TARGET_PPC64)
1124 #define PPC_INPUT_INT PPC970_INPUT_INT
1125 #else
1126 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1127 #endif
1128
1129 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1130 {
1131     PowerPCCPU *cpu = POWERPC_CPU(cs);
1132     CPUPPCState *env = &cpu->env;
1133     int r;
1134     unsigned irq;
1135
1136     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1137      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1138     if (!cap_interrupt_level &&
1139         run->ready_for_interrupt_injection &&
1140         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1141         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1142     {
1143         /* For now KVM disregards the 'irq' argument. However, in the
1144          * future KVM could cache it in-kernel to avoid a heavyweight exit
1145          * when reading the UIC.
1146          */
1147         irq = KVM_INTERRUPT_SET;
1148
1149         DPRINTF("injected interrupt %d\n", irq);
1150         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1151         if (r < 0) {
1152             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1153         }
1154
1155         /* Always wake up soon in case the interrupt was level based */
1156         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1157                        (get_ticks_per_sec() / 50));
1158     }
1159
1160     /* We don't know if there are more interrupts pending after this. However,
1161      * the guest will return to userspace in the course of handling this one
1162      * anyways, so we will get a chance to deliver the rest. */
1163 }
1164
1165 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1166 {
1167 }
1168
1169 int kvm_arch_process_async_events(CPUState *cs)
1170 {
1171     return cs->halted;
1172 }
1173
1174 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1175 {
1176     CPUState *cs = CPU(cpu);
1177     CPUPPCState *env = &cpu->env;
1178
1179     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1180         cs->halted = 1;
1181         env->exception_index = EXCP_HLT;
1182     }
1183
1184     return 0;
1185 }
1186
1187 /* map dcr access to existing qemu dcr emulation */
1188 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1189 {
1190     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1191         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1192
1193     return 0;
1194 }
1195
1196 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1197 {
1198     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1199         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1200
1201     return 0;
1202 }
1203
1204 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1205 {
1206     PowerPCCPU *cpu = POWERPC_CPU(cs);
1207     CPUPPCState *env = &cpu->env;
1208     int ret;
1209
1210     switch (run->exit_reason) {
1211     case KVM_EXIT_DCR:
1212         if (run->dcr.is_write) {
1213             DPRINTF("handle dcr write\n");
1214             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1215         } else {
1216             DPRINTF("handle dcr read\n");
1217             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1218         }
1219         break;
1220     case KVM_EXIT_HLT:
1221         DPRINTF("handle halt\n");
1222         ret = kvmppc_handle_halt(cpu);
1223         break;
1224 #if defined(TARGET_PPC64)
1225     case KVM_EXIT_PAPR_HCALL:
1226         DPRINTF("handle PAPR hypercall\n");
1227         run->papr_hcall.ret = spapr_hypercall(cpu,
1228                                               run->papr_hcall.nr,
1229                                               run->papr_hcall.args);
1230         ret = 0;
1231         break;
1232 #endif
1233     case KVM_EXIT_EPR:
1234         DPRINTF("handle epr\n");
1235         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1236         ret = 0;
1237         break;
1238     case KVM_EXIT_WATCHDOG:
1239         DPRINTF("handle watchdog expiry\n");
1240         watchdog_perform_action();
1241         ret = 0;
1242         break;
1243
1244     default:
1245         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1246         ret = -1;
1247         break;
1248     }
1249
1250     return ret;
1251 }
1252
1253 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1254 {
1255     CPUState *cs = CPU(cpu);
1256     uint32_t bits = tsr_bits;
1257     struct kvm_one_reg reg = {
1258         .id = KVM_REG_PPC_OR_TSR,
1259         .addr = (uintptr_t) &bits,
1260     };
1261
1262     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1263 }
1264
1265 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1266 {
1267
1268     CPUState *cs = CPU(cpu);
1269     uint32_t bits = tsr_bits;
1270     struct kvm_one_reg reg = {
1271         .id = KVM_REG_PPC_CLEAR_TSR,
1272         .addr = (uintptr_t) &bits,
1273     };
1274
1275     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1276 }
1277
1278 int kvmppc_set_tcr(PowerPCCPU *cpu)
1279 {
1280     CPUState *cs = CPU(cpu);
1281     CPUPPCState *env = &cpu->env;
1282     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1283
1284     struct kvm_one_reg reg = {
1285         .id = KVM_REG_PPC_TCR,
1286         .addr = (uintptr_t) &tcr,
1287     };
1288
1289     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1290 }
1291
1292 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1293 {
1294     CPUState *cs = CPU(cpu);
1295     struct kvm_enable_cap encap = {};
1296     int ret;
1297
1298     if (!kvm_enabled()) {
1299         return -1;
1300     }
1301
1302     if (!cap_ppc_watchdog) {
1303         printf("warning: KVM does not support watchdog");
1304         return -1;
1305     }
1306
1307     encap.cap = KVM_CAP_PPC_BOOKE_WATCHDOG;
1308     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &encap);
1309     if (ret < 0) {
1310         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1311                 __func__, strerror(-ret));
1312         return ret;
1313     }
1314
1315     return ret;
1316 }
1317
1318 static int read_cpuinfo(const char *field, char *value, int len)
1319 {
1320     FILE *f;
1321     int ret = -1;
1322     int field_len = strlen(field);
1323     char line[512];
1324
1325     f = fopen("/proc/cpuinfo", "r");
1326     if (!f) {
1327         return -1;
1328     }
1329
1330     do {
1331         if(!fgets(line, sizeof(line), f)) {
1332             break;
1333         }
1334         if (!strncmp(line, field, field_len)) {
1335             pstrcpy(value, len, line);
1336             ret = 0;
1337             break;
1338         }
1339     } while(*line);
1340
1341     fclose(f);
1342
1343     return ret;
1344 }
1345
1346 uint32_t kvmppc_get_tbfreq(void)
1347 {
1348     char line[512];
1349     char *ns;
1350     uint32_t retval = get_ticks_per_sec();
1351
1352     if (read_cpuinfo("timebase", line, sizeof(line))) {
1353         return retval;
1354     }
1355
1356     if (!(ns = strchr(line, ':'))) {
1357         return retval;
1358     }
1359
1360     ns++;
1361
1362     retval = atoi(ns);
1363     return retval;
1364 }
1365
1366 /* Try to find a device tree node for a CPU with clock-frequency property */
1367 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1368 {
1369     struct dirent *dirp;
1370     DIR *dp;
1371
1372     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1373         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1374         return -1;
1375     }
1376
1377     buf[0] = '\0';
1378     while ((dirp = readdir(dp)) != NULL) {
1379         FILE *f;
1380         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1381                  dirp->d_name);
1382         f = fopen(buf, "r");
1383         if (f) {
1384             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1385             fclose(f);
1386             break;
1387         }
1388         buf[0] = '\0';
1389     }
1390     closedir(dp);
1391     if (buf[0] == '\0') {
1392         printf("Unknown host!\n");
1393         return -1;
1394     }
1395
1396     return 0;
1397 }
1398
1399 /* Read a CPU node property from the host device tree that's a single
1400  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1401  * (can't find or open the property, or doesn't understand the
1402  * format) */
1403 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1404 {
1405     char buf[PATH_MAX];
1406     union {
1407         uint32_t v32;
1408         uint64_t v64;
1409     } u;
1410     FILE *f;
1411     int len;
1412
1413     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1414         return -1;
1415     }
1416
1417     strncat(buf, "/", sizeof(buf) - strlen(buf));
1418     strncat(buf, propname, sizeof(buf) - strlen(buf));
1419
1420     f = fopen(buf, "rb");
1421     if (!f) {
1422         return -1;
1423     }
1424
1425     len = fread(&u, 1, sizeof(u), f);
1426     fclose(f);
1427     switch (len) {
1428     case 4:
1429         /* property is a 32-bit quantity */
1430         return be32_to_cpu(u.v32);
1431     case 8:
1432         return be64_to_cpu(u.v64);
1433     }
1434
1435     return 0;
1436 }
1437
1438 uint64_t kvmppc_get_clockfreq(void)
1439 {
1440     return kvmppc_read_int_cpu_dt("clock-frequency");
1441 }
1442
1443 uint32_t kvmppc_get_vmx(void)
1444 {
1445     return kvmppc_read_int_cpu_dt("ibm,vmx");
1446 }
1447
1448 uint32_t kvmppc_get_dfp(void)
1449 {
1450     return kvmppc_read_int_cpu_dt("ibm,dfp");
1451 }
1452
1453 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1454  {
1455      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1456      CPUState *cs = CPU(cpu);
1457
1458     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1459         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1460         return 0;
1461     }
1462
1463     return 1;
1464 }
1465
1466 int kvmppc_get_hasidle(CPUPPCState *env)
1467 {
1468     struct kvm_ppc_pvinfo pvinfo;
1469
1470     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1471         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1472         return 1;
1473     }
1474
1475     return 0;
1476 }
1477
1478 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1479 {
1480     uint32_t *hc = (uint32_t*)buf;
1481     struct kvm_ppc_pvinfo pvinfo;
1482
1483     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1484         memcpy(buf, pvinfo.hcall, buf_len);
1485         return 0;
1486     }
1487
1488     /*
1489      * Fallback to always fail hypercalls:
1490      *
1491      *     li r3, -1
1492      *     nop
1493      *     nop
1494      *     nop
1495      */
1496
1497     hc[0] = 0x3860ffff;
1498     hc[1] = 0x60000000;
1499     hc[2] = 0x60000000;
1500     hc[3] = 0x60000000;
1501
1502     return 0;
1503 }
1504
1505 void kvmppc_set_papr(PowerPCCPU *cpu)
1506 {
1507     CPUPPCState *env = &cpu->env;
1508     CPUState *cs = CPU(cpu);
1509     struct kvm_enable_cap cap = {};
1510     int ret;
1511
1512     cap.cap = KVM_CAP_PPC_PAPR;
1513     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1514
1515     if (ret) {
1516         cpu_abort(env, "This KVM version does not support PAPR\n");
1517     }
1518
1519     /* Update the capability flag so we sync the right information
1520      * with kvm */
1521     cap_papr = 1;
1522 }
1523
1524 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1525 {
1526     CPUPPCState *env = &cpu->env;
1527     CPUState *cs = CPU(cpu);
1528     struct kvm_enable_cap cap = {};
1529     int ret;
1530
1531     cap.cap = KVM_CAP_PPC_EPR;
1532     cap.args[0] = mpic_proxy;
1533     ret = kvm_vcpu_ioctl(cs, KVM_ENABLE_CAP, &cap);
1534
1535     if (ret && mpic_proxy) {
1536         cpu_abort(env, "This KVM version does not support EPR\n");
1537     }
1538 }
1539
1540 int kvmppc_smt_threads(void)
1541 {
1542     return cap_ppc_smt ? cap_ppc_smt : 1;
1543 }
1544
1545 #ifdef TARGET_PPC64
1546 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1547 {
1548     void *rma;
1549     off_t size;
1550     int fd;
1551     struct kvm_allocate_rma ret;
1552     MemoryRegion *rma_region;
1553
1554     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1555      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1556      *                      not necessary on this hardware
1557      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1558      *
1559      * FIXME: We should allow the user to force contiguous RMA
1560      * allocation in the cap_ppc_rma==1 case.
1561      */
1562     if (cap_ppc_rma < 2) {
1563         return 0;
1564     }
1565
1566     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1567     if (fd < 0) {
1568         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1569                 strerror(errno));
1570         return -1;
1571     }
1572
1573     size = MIN(ret.rma_size, 256ul << 20);
1574
1575     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1576     if (rma == MAP_FAILED) {
1577         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1578         return -1;
1579     };
1580
1581     rma_region = g_new(MemoryRegion, 1);
1582     memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1583     vmstate_register_ram_global(rma_region);
1584     memory_region_add_subregion(sysmem, 0, rma_region);
1585
1586     return size;
1587 }
1588
1589 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1590 {
1591     struct kvm_ppc_smmu_info info;
1592     long rampagesize, best_page_shift;
1593     int i;
1594
1595     if (cap_ppc_rma >= 2) {
1596         return current_size;
1597     }
1598
1599     /* Find the largest hardware supported page size that's less than
1600      * or equal to the (logical) backing page size of guest RAM */
1601     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1602     rampagesize = getrampagesize();
1603     best_page_shift = 0;
1604
1605     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1606         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1607
1608         if (!sps->page_shift) {
1609             continue;
1610         }
1611
1612         if ((sps->page_shift > best_page_shift)
1613             && ((1UL << sps->page_shift) <= rampagesize)) {
1614             best_page_shift = sps->page_shift;
1615         }
1616     }
1617
1618     return MIN(current_size,
1619                1ULL << (best_page_shift + hash_shift - 7));
1620 }
1621 #endif
1622
1623 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1624 {
1625     struct kvm_create_spapr_tce args = {
1626         .liobn = liobn,
1627         .window_size = window_size,
1628     };
1629     long len;
1630     int fd;
1631     void *table;
1632
1633     /* Must set fd to -1 so we don't try to munmap when called for
1634      * destroying the table, which the upper layers -will- do
1635      */
1636     *pfd = -1;
1637     if (!cap_spapr_tce) {
1638         return NULL;
1639     }
1640
1641     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1642     if (fd < 0) {
1643         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1644                 liobn);
1645         return NULL;
1646     }
1647
1648     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1649     /* FIXME: round this up to page size */
1650
1651     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1652     if (table == MAP_FAILED) {
1653         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1654                 liobn);
1655         close(fd);
1656         return NULL;
1657     }
1658
1659     *pfd = fd;
1660     return table;
1661 }
1662
1663 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t window_size)
1664 {
1665     long len;
1666
1667     if (fd < 0) {
1668         return -1;
1669     }
1670
1671     len = (window_size / SPAPR_TCE_PAGE_SIZE)*sizeof(uint64_t);
1672     if ((munmap(table, len) < 0) ||
1673         (close(fd) < 0)) {
1674         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1675                 strerror(errno));
1676         /* Leak the table */
1677     }
1678
1679     return 0;
1680 }
1681
1682 int kvmppc_reset_htab(int shift_hint)
1683 {
1684     uint32_t shift = shift_hint;
1685
1686     if (!kvm_enabled()) {
1687         /* Full emulation, tell caller to allocate htab itself */
1688         return 0;
1689     }
1690     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1691         int ret;
1692         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1693         if (ret == -ENOTTY) {
1694             /* At least some versions of PR KVM advertise the
1695              * capability, but don't implement the ioctl().  Oops.
1696              * Return 0 so that we allocate the htab in qemu, as is
1697              * correct for PR. */
1698             return 0;
1699         } else if (ret < 0) {
1700             return ret;
1701         }
1702         return shift;
1703     }
1704
1705     /* We have a kernel that predates the htab reset calls.  For PR
1706      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1707      * this era, it has allocated a 16MB fixed size hash table
1708      * already.  Kernels of this era have the GET_PVINFO capability
1709      * only on PR, so we use this hack to determine the right
1710      * answer */
1711     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1712         /* PR - tell caller to allocate htab */
1713         return 0;
1714     } else {
1715         /* HV - assume 16MB kernel allocated htab */
1716         return 24;
1717     }
1718 }
1719
1720 static inline uint32_t mfpvr(void)
1721 {
1722     uint32_t pvr;
1723
1724     asm ("mfpvr %0"
1725          : "=r"(pvr));
1726     return pvr;
1727 }
1728
1729 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1730 {
1731     if (on) {
1732         *word |= flags;
1733     } else {
1734         *word &= ~flags;
1735     }
1736 }
1737
1738 static void kvmppc_host_cpu_initfn(Object *obj)
1739 {
1740     assert(kvm_enabled());
1741 }
1742
1743 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1744 {
1745     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1746     uint32_t vmx = kvmppc_get_vmx();
1747     uint32_t dfp = kvmppc_get_dfp();
1748     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1749     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1750
1751     /* Now fix up the class with information we can query from the host */
1752     pcc->pvr = mfpvr();
1753
1754     if (vmx != -1) {
1755         /* Only override when we know what the host supports */
1756         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1757         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1758     }
1759     if (dfp != -1) {
1760         /* Only override when we know what the host supports */
1761         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1762     }
1763
1764     if (dcache_size != -1) {
1765         pcc->l1_dcache_size = dcache_size;
1766     }
1767
1768     if (icache_size != -1) {
1769         pcc->l1_icache_size = icache_size;
1770     }
1771 }
1772
1773 int kvmppc_fixup_cpu(PowerPCCPU *cpu)
1774 {
1775     CPUState *cs = CPU(cpu);
1776     int smt;
1777
1778     /* Adjust cpu index for SMT */
1779     smt = kvmppc_smt_threads();
1780     cs->cpu_index = (cs->cpu_index / smp_threads) * smt
1781         + (cs->cpu_index % smp_threads);
1782
1783     return 0;
1784 }
1785
1786 bool kvmppc_has_cap_epr(void)
1787 {
1788     return cap_epr;
1789 }
1790
1791 bool kvmppc_has_cap_htab_fd(void)
1792 {
1793     return cap_htab_fd;
1794 }
1795
1796 static int kvm_ppc_register_host_cpu_type(void)
1797 {
1798     TypeInfo type_info = {
1799         .name = TYPE_HOST_POWERPC_CPU,
1800         .instance_init = kvmppc_host_cpu_initfn,
1801         .class_init = kvmppc_host_cpu_class_init,
1802     };
1803     uint32_t host_pvr = mfpvr();
1804     PowerPCCPUClass *pvr_pcc;
1805
1806     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1807     if (pvr_pcc == NULL) {
1808         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1809     }
1810     if (pvr_pcc == NULL) {
1811         return -1;
1812     }
1813     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1814     type_register(&type_info);
1815     return 0;
1816 }
1817
1818 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1819 {
1820     struct kvm_rtas_token_args args = {
1821         .token = token,
1822     };
1823
1824     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1825         return -ENOENT;
1826     }
1827
1828     strncpy(args.name, function, sizeof(args.name));
1829
1830     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1831 }
1832
1833 int kvmppc_get_htab_fd(bool write)
1834 {
1835     struct kvm_get_htab_fd s = {
1836         .flags = write ? KVM_GET_HTAB_WRITE : 0,
1837         .start_index = 0,
1838     };
1839
1840     if (!cap_htab_fd) {
1841         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1842         return -1;
1843     }
1844
1845     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1846 }
1847
1848 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1849 {
1850     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1851     uint8_t buf[bufsize];
1852     ssize_t rc;
1853
1854     do {
1855         rc = read(fd, buf, bufsize);
1856         if (rc < 0) {
1857             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1858                     strerror(errno));
1859             return rc;
1860         } else if (rc) {
1861             /* Kernel already retuns data in BE format for the file */
1862             qemu_put_buffer(f, buf, rc);
1863         }
1864     } while ((rc != 0)
1865              && ((max_ns < 0)
1866                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1867
1868     return (rc == 0) ? 1 : 0;
1869 }
1870
1871 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1872                            uint16_t n_valid, uint16_t n_invalid)
1873 {
1874     struct kvm_get_htab_header *buf;
1875     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1876     ssize_t rc;
1877
1878     buf = alloca(chunksize);
1879     /* This is KVM on ppc, so this is all big-endian */
1880     buf->index = index;
1881     buf->n_valid = n_valid;
1882     buf->n_invalid = n_invalid;
1883
1884     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1885
1886     rc = write(fd, buf, chunksize);
1887     if (rc < 0) {
1888         fprintf(stderr, "Error writing KVM hash table: %s\n",
1889                 strerror(errno));
1890         return rc;
1891     }
1892     if (rc != chunksize) {
1893         /* We should never get a short write on a single chunk */
1894         fprintf(stderr, "Short write, restoring KVM hash table\n");
1895         return -1;
1896     }
1897     return 0;
1898 }
1899
1900 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1901 {
1902     return true;
1903 }
1904
1905 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1906 {
1907     return 1;
1908 }
1909
1910 int kvm_arch_on_sigbus(int code, void *addr)
1911 {
1912     return 1;
1913 }
1914
1915 void kvm_arch_init_irq_routing(KVMState *s)
1916 {
1917 }
1918
1919 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1920 {
1921     return -EINVAL;
1922 }
1923
1924 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1925 {
1926     return -EINVAL;
1927 }
1928
1929 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1930 {
1931     return -EINVAL;
1932 }
1933
1934 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1935 {
1936     return -EINVAL;
1937 }
1938
1939 void kvm_arch_remove_all_hw_breakpoints(void)
1940 {
1941 }
1942
1943 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1944 {
1945 }
1946
1947 struct kvm_get_htab_buf {
1948     struct kvm_get_htab_header header;
1949     /*
1950      * We require one extra byte for read
1951      */
1952     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1953 };
1954
1955 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1956 {
1957     int htab_fd;
1958     struct kvm_get_htab_fd ghf;
1959     struct kvm_get_htab_buf  *hpte_buf;
1960
1961     ghf.flags = 0;
1962     ghf.start_index = pte_index;
1963     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1964     if (htab_fd < 0) {
1965         goto error_out;
1966     }
1967
1968     hpte_buf = g_malloc0(sizeof(*hpte_buf));
1969     /*
1970      * Read the hpte group
1971      */
1972     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1973         goto out_close;
1974     }
1975
1976     close(htab_fd);
1977     return (uint64_t)(uintptr_t) hpte_buf->hpte;
1978
1979 out_close:
1980     g_free(hpte_buf);
1981     close(htab_fd);
1982 error_out:
1983     return 0;
1984 }
1985
1986 void kvmppc_hash64_free_pteg(uint64_t token)
1987 {
1988     struct kvm_get_htab_buf *htab_buf;
1989
1990     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1991                             hpte);
1992     g_free(htab_buf);
1993     return;
1994 }
1995
1996 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
1997                              target_ulong pte0, target_ulong pte1)
1998 {
1999     int htab_fd;
2000     struct kvm_get_htab_fd ghf;
2001     struct kvm_get_htab_buf hpte_buf;
2002
2003     ghf.flags = 0;
2004     ghf.start_index = 0;     /* Ignored */
2005     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2006     if (htab_fd < 0) {
2007         goto error_out;
2008     }
2009
2010     hpte_buf.header.n_valid = 1;
2011     hpte_buf.header.n_invalid = 0;
2012     hpte_buf.header.index = pte_index;
2013     hpte_buf.hpte[0] = pte0;
2014     hpte_buf.hpte[1] = pte1;
2015     /*
2016      * Write the hpte entry.
2017      * CAUTION: write() has the warn_unused_result attribute. Hence we
2018      * need to check the return value, even though we do nothing.
2019      */
2020     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2021         goto out_close;
2022     }
2023
2024 out_close:
2025     close(htab_fd);
2026     return;
2027
2028 error_out:
2029     return;
2030 }