target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41
  42 //#define DEBUG_KVM
  43
  44 #ifdef DEBUG_KVM
  45 #define DPRINTF(fmt, ...) \
  46     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  47 #else
  48 #define DPRINTF(fmt, ...) \
  49     do { } while (0)
  50 #endif
  51
  52 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  53
  54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  55     KVM_CAP_LAST_INFO
  56 };
  57
  58 static int cap_interrupt_unset = false;
  59 static int cap_interrupt_level = false;
  60 static int cap_segstate;
  61 static int cap_booke_sregs;
  62 static int cap_ppc_smt;
  63 static int cap_ppc_rma;
  64 static int cap_spapr_tce;
  65 static int cap_spapr_multitce;
  66 static int cap_hior;
  67 static int cap_one_reg;
  68 static int cap_epr;
  69 static int cap_ppc_watchdog;
  70 static int cap_papr;
  71 static int cap_htab_fd;
  72 static int cap_fixup_hcalls;
  73
  74 /* XXX We have a race condition where we actually have a level triggered
  75  *     interrupt, but the infrastructure can't expose that yet, so the guest
  76  *     takes but ignores it, goes to sleep and never gets notified that there's
  77  *     still an interrupt pending.
  78  *
  79  *     As a quick workaround, let's just wake up again 20 ms after we injected
  80  *     an interrupt. That way we can assure that we're always reinjecting
  81  *     interrupts in case the guest swallowed them.
  82  */
  83 static QEMUTimer *idle_timer;
  84
  85 static void kvm_kick_cpu(void *opaque)
  86 {
  87     PowerPCCPU *cpu = opaque;
  88
  89     qemu_cpu_kick(CPU(cpu));
  90 }
  91
  92 static int kvm_ppc_register_host_cpu_type(void);
  93
  94 int kvm_arch_init(KVMState *s)
  95 {
  96     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  97     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  98     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  99     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 100     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 101     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 102     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 103     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 104     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 105     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 106     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 107     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 108     /* Note: we don't set cap_papr here, because this capability is
 109      * only activated after this by kvmppc_set_papr() */
 110     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 111     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 112
 113     if (!cap_interrupt_level) {
 114         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 115                         "VM to stall at times!\n");
 116     }
 117
 118     kvm_ppc_register_host_cpu_type();
 119
 120     return 0;
 121 }
 122
 123 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 124 {
 125     CPUPPCState *cenv = &cpu->env;
 126     CPUState *cs = CPU(cpu);
 127     struct kvm_sregs sregs;
 128     int ret;
 129
 130     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 131         /* What we're really trying to say is "if we're on BookE, we use
 132            the native PVR for now". This is the only sane way to check
 133            it though, so we potentially confuse users that they can run
 134            BookE guests on BookS. Let's hope nobody dares enough :) */
 135         return 0;
 136     } else {
 137         if (!cap_segstate) {
 138             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 139             return -ENOSYS;
 140         }
 141     }
 142
 143     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 144     if (ret) {
 145         return ret;
 146     }
 147
 148     sregs.pvr = cenv->spr[SPR_PVR];
 149     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 150 }
 151
 152 /* Set up a shared TLB array with KVM */
 153 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 154 {
 155     CPUPPCState *env = &cpu->env;
 156     CPUState *cs = CPU(cpu);
 157     struct kvm_book3e_206_tlb_params params = {};
 158     struct kvm_config_tlb cfg = {};
 159     unsigned int entries = 0;
 160     int ret, i;
 161
 162     if (!kvm_enabled() ||
 163         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 164         return 0;
 165     }
 166
 167     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 168
 169     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 170         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 171         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 172         entries += params.tlb_sizes[i];
 173     }
 174
 175     assert(entries == env->nb_tlb);
 176     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 177
 178     env->tlb_dirty = true;
 179
 180     cfg.array = (uintptr_t)env->tlb.tlbm;
 181     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 182     cfg.params = (uintptr_t)&params;
 183     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 184
 185     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 186     if (ret < 0) {
 187         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 188                 __func__, strerror(-ret));
 189         return ret;
 190     }
 191
 192     env->kvm_sw_tlb = true;
 193     return 0;
 194 }
 195
 196
 197 #if defined(TARGET_PPC64)
 198 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 199                                        struct kvm_ppc_smmu_info *info)
 200 {
 201     CPUPPCState *env = &cpu->env;
 202     CPUState *cs = CPU(cpu);
 203
 204     memset(info, 0, sizeof(*info));
 205
 206     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 207      * need to "guess" what the supported page sizes are.
 208      *
 209      * For that to work we make a few assumptions:
 210      *
 211      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 212      *   KVM which only supports 4K and 16M pages, but supports them
 213      *   regardless of the backing store characteritics. We also don't
 214      *   support 1T segments.
 215      *
 216      *   This is safe as if HV KVM ever supports that capability or PR
 217      *   KVM grows supports for more page/segment sizes, those versions
 218      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 219      *   will not hit this fallback
 220      *
 221      * - Else we are running HV KVM. This means we only support page
 222      *   sizes that fit in the backing store. Additionally we only
 223      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 224      *   P7 encodings for the SLB and hash table. Here too, we assume
 225      *   support for any newer processor will mean a kernel that
 226      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 227      *   this fallback.
 228      */
 229     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 230         /* No flags */
 231         info->flags = 0;
 232         info->slb_size = 64;
 233
 234         /* Standard 4k base page size segment */
 235         info->sps[0].page_shift = 12;
 236         info->sps[0].slb_enc = 0;
 237         info->sps[0].enc[0].page_shift = 12;
 238         info->sps[0].enc[0].pte_enc = 0;
 239
 240         /* Standard 16M large page size segment */
 241         info->sps[1].page_shift = 24;
 242         info->sps[1].slb_enc = SLB_VSID_L;
 243         info->sps[1].enc[0].page_shift = 24;
 244         info->sps[1].enc[0].pte_enc = 0;
 245     } else {
 246         int i = 0;
 247
 248         /* HV KVM has backing store size restrictions */
 249         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 250
 251         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 252             info->flags |= KVM_PPC_1T_SEGMENTS;
 253         }
 254
 255         if (env->mmu_model == POWERPC_MMU_2_06) {
 256             info->slb_size = 32;
 257         } else {
 258             info->slb_size = 64;
 259         }
 260
 261         /* Standard 4k base page size segment */
 262         info->sps[i].page_shift = 12;
 263         info->sps[i].slb_enc = 0;
 264         info->sps[i].enc[0].page_shift = 12;
 265         info->sps[i].enc[0].pte_enc = 0;
 266         i++;
 267
 268         /* 64K on MMU 2.06 */
 269         if (env->mmu_model == POWERPC_MMU_2_06) {
 270             info->sps[i].page_shift = 16;
 271             info->sps[i].slb_enc = 0x110;
 272             info->sps[i].enc[0].page_shift = 16;
 273             info->sps[i].enc[0].pte_enc = 1;
 274             i++;
 275         }
 276
 277         /* Standard 16M large page size segment */
 278         info->sps[i].page_shift = 24;
 279         info->sps[i].slb_enc = SLB_VSID_L;
 280         info->sps[i].enc[0].page_shift = 24;
 281         info->sps[i].enc[0].pte_enc = 0;
 282     }
 283 }
 284
 285 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 286 {
 287     CPUState *cs = CPU(cpu);
 288     int ret;
 289
 290     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 291         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 292         if (ret == 0) {
 293             return;
 294         }
 295     }
 296
 297     kvm_get_fallback_smmu_info(cpu, info);
 298 }
 299
 300 static long getrampagesize(void)
 301 {
 302     struct statfs fs;
 303     int ret;
 304
 305     if (!mem_path) {
 306         /* guest RAM is backed by normal anonymous pages */
 307         return getpagesize();
 308     }
 309
 310     do {
 311         ret = statfs(mem_path, &fs);
 312     } while (ret != 0 && errno == EINTR);
 313
 314     if (ret != 0) {
 315         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 316                 strerror(errno));
 317         exit(1);
 318     }
 319
 320 #define HUGETLBFS_MAGIC       0x958458f6
 321
 322     if (fs.f_type != HUGETLBFS_MAGIC) {
 323         /* Explicit mempath, but it's ordinary pages */
 324         return getpagesize();
 325     }
 326
 327     /* It's hugepage, return the huge page size */
 328     return fs.f_bsize;
 329 }
 330
 331 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 332 {
 333     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 334         return true;
 335     }
 336
 337     return (1ul << shift) <= rampgsize;
 338 }
 339
 340 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 341 {
 342     static struct kvm_ppc_smmu_info smmu_info;
 343     static bool has_smmu_info;
 344     CPUPPCState *env = &cpu->env;
 345     long rampagesize;
 346     int iq, ik, jq, jk;
 347
 348     /* We only handle page sizes for 64-bit server guests for now */
 349     if (!(env->mmu_model & POWERPC_MMU_64)) {
 350         return;
 351     }
 352
 353     /* Collect MMU info from kernel if not already */
 354     if (!has_smmu_info) {
 355         kvm_get_smmu_info(cpu, &smmu_info);
 356         has_smmu_info = true;
 357     }
 358
 359     rampagesize = getrampagesize();
 360
 361     /* Convert to QEMU form */
 362     memset(&env->sps, 0, sizeof(env->sps));
 363
 364     /*
 365      * XXX This loop should be an entry wide AND of the capabilities that
 366      *     the selected CPU has with the capabilities that KVM supports.
 367      */
 368     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 369         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 370         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 371
 372         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 373                                  ksps->page_shift)) {
 374             continue;
 375         }
 376         qsps->page_shift = ksps->page_shift;
 377         qsps->slb_enc = ksps->slb_enc;
 378         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 379             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 380                                      ksps->enc[jk].page_shift)) {
 381                 continue;
 382             }
 383             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 384             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 385             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 386                 break;
 387             }
 388         }
 389         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 390             break;
 391         }
 392     }
 393     env->slb_nr = smmu_info.slb_size;
 394     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 395         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 396     }
 397 }
 398 #else /* defined (TARGET_PPC64) */
 399
 400 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 401 {
 402 }
 403
 404 #endif /* !defined (TARGET_PPC64) */
 405
 406 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 407 {
 408     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 409 }
 410
 411 int kvm_arch_init_vcpu(CPUState *cs)
 412 {
 413     PowerPCCPU *cpu = POWERPC_CPU(cs);
 414     CPUPPCState *cenv = &cpu->env;
 415     int ret;
 416
 417     /* Gather server mmu info from KVM and update the CPU state */
 418     kvm_fixup_page_sizes(cpu);
 419
 420     /* Synchronize sregs with kvm */
 421     ret = kvm_arch_sync_sregs(cpu);
 422     if (ret) {
 423         return ret;
 424     }
 425
 426     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 427
 428     /* Some targets support access to KVM's guest TLB. */
 429     switch (cenv->mmu_model) {
 430     case POWERPC_MMU_BOOKE206:
 431         ret = kvm_booke206_tlb_init(cpu);
 432         break;
 433     default:
 434         break;
 435     }
 436
 437     return ret;
 438 }
 439
 440 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 441 {
 442     CPUPPCState *env = &cpu->env;
 443     CPUState *cs = CPU(cpu);
 444     struct kvm_dirty_tlb dirty_tlb;
 445     unsigned char *bitmap;
 446     int ret;
 447
 448     if (!env->kvm_sw_tlb) {
 449         return;
 450     }
 451
 452     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 453     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 454
 455     dirty_tlb.bitmap = (uintptr_t)bitmap;
 456     dirty_tlb.num_dirty = env->nb_tlb;
 457
 458     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 459     if (ret) {
 460         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 461                 __func__, strerror(-ret));
 462     }
 463
 464     g_free(bitmap);
 465 }
 466
 467 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 468 {
 469     PowerPCCPU *cpu = POWERPC_CPU(cs);
 470     CPUPPCState *env = &cpu->env;
 471     union {
 472         uint32_t u32;
 473         uint64_t u64;
 474     } val;
 475     struct kvm_one_reg reg = {
 476         .id = id,
 477         .addr = (uintptr_t) &val,
 478     };
 479     int ret;
 480
 481     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 482     if (ret != 0) {
 483         trace_kvm_failed_spr_get(spr, strerror(errno));
 484     } else {
 485         switch (id & KVM_REG_SIZE_MASK) {
 486         case KVM_REG_SIZE_U32:
 487             env->spr[spr] = val.u32;
 488             break;
 489
 490         case KVM_REG_SIZE_U64:
 491             env->spr[spr] = val.u64;
 492             break;
 493
 494         default:
 495             /* Don't handle this size yet */
 496             abort();
 497         }
 498     }
 499 }
 500
 501 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 502 {
 503     PowerPCCPU *cpu = POWERPC_CPU(cs);
 504     CPUPPCState *env = &cpu->env;
 505     union {
 506         uint32_t u32;
 507         uint64_t u64;
 508     } val;
 509     struct kvm_one_reg reg = {
 510         .id = id,
 511         .addr = (uintptr_t) &val,
 512     };
 513     int ret;
 514
 515     switch (id & KVM_REG_SIZE_MASK) {
 516     case KVM_REG_SIZE_U32:
 517         val.u32 = env->spr[spr];
 518         break;
 519
 520     case KVM_REG_SIZE_U64:
 521         val.u64 = env->spr[spr];
 522         break;
 523
 524     default:
 525         /* Don't handle this size yet */
 526         abort();
 527     }
 528
 529     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 530     if (ret != 0) {
 531         trace_kvm_failed_spr_set(spr, strerror(errno));
 532     }
 533 }
 534
 535 static int kvm_put_fp(CPUState *cs)
 536 {
 537     PowerPCCPU *cpu = POWERPC_CPU(cs);
 538     CPUPPCState *env = &cpu->env;
 539     struct kvm_one_reg reg;
 540     int i;
 541     int ret;
 542
 543     if (env->insns_flags & PPC_FLOAT) {
 544         uint64_t fpscr = env->fpscr;
 545         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 546
 547         reg.id = KVM_REG_PPC_FPSCR;
 548         reg.addr = (uintptr_t)&fpscr;
 549         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 550         if (ret < 0) {
 551             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 552             return ret;
 553         }
 554
 555         for (i = 0; i < 32; i++) {
 556             uint64_t vsr[2];
 557
 558             vsr[0] = float64_val(env->fpr[i]);
 559             vsr[1] = env->vsr[i];
 560             reg.addr = (uintptr_t) &vsr;
 561             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 562
 563             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 564             if (ret < 0) {
 565                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 566                         i, strerror(errno));
 567                 return ret;
 568             }
 569         }
 570     }
 571
 572     if (env->insns_flags & PPC_ALTIVEC) {
 573         reg.id = KVM_REG_PPC_VSCR;
 574         reg.addr = (uintptr_t)&env->vscr;
 575         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 576         if (ret < 0) {
 577             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 578             return ret;
 579         }
 580
 581         for (i = 0; i < 32; i++) {
 582             reg.id = KVM_REG_PPC_VR(i);
 583             reg.addr = (uintptr_t)&env->avr[i];
 584             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 585             if (ret < 0) {
 586                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 587                 return ret;
 588             }
 589         }
 590     }
 591
 592     return 0;
 593 }
 594
 595 static int kvm_get_fp(CPUState *cs)
 596 {
 597     PowerPCCPU *cpu = POWERPC_CPU(cs);
 598     CPUPPCState *env = &cpu->env;
 599     struct kvm_one_reg reg;
 600     int i;
 601     int ret;
 602
 603     if (env->insns_flags & PPC_FLOAT) {
 604         uint64_t fpscr;
 605         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 606
 607         reg.id = KVM_REG_PPC_FPSCR;
 608         reg.addr = (uintptr_t)&fpscr;
 609         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 610         if (ret < 0) {
 611             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 612             return ret;
 613         } else {
 614             env->fpscr = fpscr;
 615         }
 616
 617         for (i = 0; i < 32; i++) {
 618             uint64_t vsr[2];
 619
 620             reg.addr = (uintptr_t) &vsr;
 621             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 622
 623             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 624             if (ret < 0) {
 625                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 626                         vsx ? "VSR" : "FPR", i, strerror(errno));
 627                 return ret;
 628             } else {
 629                 env->fpr[i] = vsr[0];
 630                 if (vsx) {
 631                     env->vsr[i] = vsr[1];
 632                 }
 633             }
 634         }
 635     }
 636
 637     if (env->insns_flags & PPC_ALTIVEC) {
 638         reg.id = KVM_REG_PPC_VSCR;
 639         reg.addr = (uintptr_t)&env->vscr;
 640         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 641         if (ret < 0) {
 642             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 643             return ret;
 644         }
 645
 646         for (i = 0; i < 32; i++) {
 647             reg.id = KVM_REG_PPC_VR(i);
 648             reg.addr = (uintptr_t)&env->avr[i];
 649             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 650             if (ret < 0) {
 651                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 652                         i, strerror(errno));
 653                 return ret;
 654             }
 655         }
 656     }
 657
 658     return 0;
 659 }
 660
 661 #if defined(TARGET_PPC64)
 662 static int kvm_get_vpa(CPUState *cs)
 663 {
 664     PowerPCCPU *cpu = POWERPC_CPU(cs);
 665     CPUPPCState *env = &cpu->env;
 666     struct kvm_one_reg reg;
 667     int ret;
 668
 669     reg.id = KVM_REG_PPC_VPA_ADDR;
 670     reg.addr = (uintptr_t)&env->vpa_addr;
 671     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 672     if (ret < 0) {
 673         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 674         return ret;
 675     }
 676
 677     assert((uintptr_t)&env->slb_shadow_size
 678            == ((uintptr_t)&env->slb_shadow_addr + 8));
 679     reg.id = KVM_REG_PPC_VPA_SLB;
 680     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 681     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 682     if (ret < 0) {
 683         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 684                 strerror(errno));
 685         return ret;
 686     }
 687
 688     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 689     reg.id = KVM_REG_PPC_VPA_DTL;
 690     reg.addr = (uintptr_t)&env->dtl_addr;
 691     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 692     if (ret < 0) {
 693         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 694                 strerror(errno));
 695         return ret;
 696     }
 697
 698     return 0;
 699 }
 700
 701 static int kvm_put_vpa(CPUState *cs)
 702 {
 703     PowerPCCPU *cpu = POWERPC_CPU(cs);
 704     CPUPPCState *env = &cpu->env;
 705     struct kvm_one_reg reg;
 706     int ret;
 707
 708     /* SLB shadow or DTL can't be registered unless a master VPA is
 709      * registered.  That means when restoring state, if a VPA *is*
 710      * registered, we need to set that up first.  If not, we need to
 711      * deregister the others before deregistering the master VPA */
 712     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 713
 714     if (env->vpa_addr) {
 715         reg.id = KVM_REG_PPC_VPA_ADDR;
 716         reg.addr = (uintptr_t)&env->vpa_addr;
 717         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 718         if (ret < 0) {
 719             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 720             return ret;
 721         }
 722     }
 723
 724     assert((uintptr_t)&env->slb_shadow_size
 725            == ((uintptr_t)&env->slb_shadow_addr + 8));
 726     reg.id = KVM_REG_PPC_VPA_SLB;
 727     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 728     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 729     if (ret < 0) {
 730         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 731         return ret;
 732     }
 733
 734     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 735     reg.id = KVM_REG_PPC_VPA_DTL;
 736     reg.addr = (uintptr_t)&env->dtl_addr;
 737     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 738     if (ret < 0) {
 739         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 740                 strerror(errno));
 741         return ret;
 742     }
 743
 744     if (!env->vpa_addr) {
 745         reg.id = KVM_REG_PPC_VPA_ADDR;
 746         reg.addr = (uintptr_t)&env->vpa_addr;
 747         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 748         if (ret < 0) {
 749             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 750             return ret;
 751         }
 752     }
 753
 754     return 0;
 755 }
 756 #endif /* TARGET_PPC64 */
 757
 758 int kvm_arch_put_registers(CPUState *cs, int level)
 759 {
 760     PowerPCCPU *cpu = POWERPC_CPU(cs);
 761     CPUPPCState *env = &cpu->env;
 762     struct kvm_regs regs;
 763     int ret;
 764     int i;
 765
 766     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 767     if (ret < 0) {
 768         return ret;
 769     }
 770
 771     regs.ctr = env->ctr;
 772     regs.lr  = env->lr;
 773     regs.xer = cpu_read_xer(env);
 774     regs.msr = env->msr;
 775     regs.pc = env->nip;
 776
 777     regs.srr0 = env->spr[SPR_SRR0];
 778     regs.srr1 = env->spr[SPR_SRR1];
 779
 780     regs.sprg0 = env->spr[SPR_SPRG0];
 781     regs.sprg1 = env->spr[SPR_SPRG1];
 782     regs.sprg2 = env->spr[SPR_SPRG2];
 783     regs.sprg3 = env->spr[SPR_SPRG3];
 784     regs.sprg4 = env->spr[SPR_SPRG4];
 785     regs.sprg5 = env->spr[SPR_SPRG5];
 786     regs.sprg6 = env->spr[SPR_SPRG6];
 787     regs.sprg7 = env->spr[SPR_SPRG7];
 788
 789     regs.pid = env->spr[SPR_BOOKE_PID];
 790
 791     for (i = 0;i < 32; i++)
 792         regs.gpr[i] = env->gpr[i];
 793
 794     regs.cr = 0;
 795     for (i = 0; i < 8; i++) {
 796         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 797     }
 798
 799     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 800     if (ret < 0)
 801         return ret;
 802
 803     kvm_put_fp(cs);
 804
 805     if (env->tlb_dirty) {
 806         kvm_sw_tlb_put(cpu);
 807         env->tlb_dirty = false;
 808     }
 809
 810     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 811         struct kvm_sregs sregs;
 812
 813         sregs.pvr = env->spr[SPR_PVR];
 814
 815         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 816
 817         /* Sync SLB */
 818 #ifdef TARGET_PPC64
 819         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 820             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 821             if (env->slb[i].esid & SLB_ESID_V) {
 822                 sregs.u.s.ppc64.slb[i].slbe |= i;
 823             }
 824             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 825         }
 826 #endif
 827
 828         /* Sync SRs */
 829         for (i = 0; i < 16; i++) {
 830             sregs.u.s.ppc32.sr[i] = env->sr[i];
 831         }
 832
 833         /* Sync BATs */
 834         for (i = 0; i < 8; i++) {
 835             /* Beware. We have to swap upper and lower bits here */
 836             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 837                 | env->DBAT[1][i];
 838             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 839                 | env->IBAT[1][i];
 840         }
 841
 842         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 843         if (ret) {
 844             return ret;
 845         }
 846     }
 847
 848     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 849         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 850     }
 851
 852     if (cap_one_reg) {
 853         int i;
 854
 855         /* We deliberately ignore errors here, for kernels which have
 856          * the ONE_REG calls, but don't support the specific
 857          * registers, there's a reasonable chance things will still
 858          * work, at least until we try to migrate. */
 859         for (i = 0; i < 1024; i++) {
 860             uint64_t id = env->spr_cb[i].one_reg_id;
 861
 862             if (id != 0) {
 863                 kvm_put_one_spr(cs, id, i);
 864             }
 865         }
 866
 867 #ifdef TARGET_PPC64
 868         if (cap_papr) {
 869             if (kvm_put_vpa(cs) < 0) {
 870                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 871             }
 872         }
 873
 874         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 875 #endif /* TARGET_PPC64 */
 876     }
 877
 878     return ret;
 879 }
 880
 881 int kvm_arch_get_registers(CPUState *cs)
 882 {
 883     PowerPCCPU *cpu = POWERPC_CPU(cs);
 884     CPUPPCState *env = &cpu->env;
 885     struct kvm_regs regs;
 886     struct kvm_sregs sregs;
 887     uint32_t cr;
 888     int i, ret;
 889
 890     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 891     if (ret < 0)
 892         return ret;
 893
 894     cr = regs.cr;
 895     for (i = 7; i >= 0; i--) {
 896         env->crf[i] = cr & 15;
 897         cr >>= 4;
 898     }
 899
 900     env->ctr = regs.ctr;
 901     env->lr = regs.lr;
 902     cpu_write_xer(env, regs.xer);
 903     env->msr = regs.msr;
 904     env->nip = regs.pc;
 905
 906     env->spr[SPR_SRR0] = regs.srr0;
 907     env->spr[SPR_SRR1] = regs.srr1;
 908
 909     env->spr[SPR_SPRG0] = regs.sprg0;
 910     env->spr[SPR_SPRG1] = regs.sprg1;
 911     env->spr[SPR_SPRG2] = regs.sprg2;
 912     env->spr[SPR_SPRG3] = regs.sprg3;
 913     env->spr[SPR_SPRG4] = regs.sprg4;
 914     env->spr[SPR_SPRG5] = regs.sprg5;
 915     env->spr[SPR_SPRG6] = regs.sprg6;
 916     env->spr[SPR_SPRG7] = regs.sprg7;
 917
 918     env->spr[SPR_BOOKE_PID] = regs.pid;
 919
 920     for (i = 0;i < 32; i++)
 921         env->gpr[i] = regs.gpr[i];
 922
 923     kvm_get_fp(cs);
 924
 925     if (cap_booke_sregs) {
 926         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 927         if (ret < 0) {
 928             return ret;
 929         }
 930
 931         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 932             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 933             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 934             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 935             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 936             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 937             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 938             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 939             env->spr[SPR_DECR] = sregs.u.e.dec;
 940             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 941             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 942             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 943         }
 944
 945         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 946             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 947             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 948             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 949             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 950             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 951         }
 952
 953         if (sregs.u.e.features & KVM_SREGS_E_64) {
 954             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 955         }
 956
 957         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 958             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 959         }
 960
 961         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 962             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 963             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 964             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 965             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 966             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 967             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 968             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 969             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 970             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 971             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 972             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 973             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 974             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 975             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 976             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 977             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 978
 979             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 980                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 981                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 982                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 983             }
 984
 985             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 986                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 987             }
 988
 989             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 990                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 991                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 992             }
 993         }
 994
 995         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 996             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 997             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 998             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 999             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1000             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1001             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1002             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1003             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1004             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1005             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1006         }
1007
1008         if (sregs.u.e.features & KVM_SREGS_EXP) {
1009             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1010         }
1011
1012         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1013             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1014             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1015         }
1016
1017         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1018             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1019             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1020             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1021
1022             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1023                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1024                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1025             }
1026         }
1027     }
1028
1029     if (cap_segstate) {
1030         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1031         if (ret < 0) {
1032             return ret;
1033         }
1034
1035         if (!env->external_htab) {
1036             ppc_store_sdr1(env, sregs.u.s.sdr1);
1037         }
1038
1039         /* Sync SLB */
1040 #ifdef TARGET_PPC64
1041         /*
1042          * The packed SLB array we get from KVM_GET_SREGS only contains
1043          * information about valid entries. So we flush our internal
1044          * copy to get rid of stale ones, then put all valid SLB entries
1045          * back in.
1046          */
1047         memset(env->slb, 0, sizeof(env->slb));
1048         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1049             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1050             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1051             /*
1052              * Only restore valid entries
1053              */
1054             if (rb & SLB_ESID_V) {
1055                 ppc_store_slb(env, rb, rs);
1056             }
1057         }
1058 #endif
1059
1060         /* Sync SRs */
1061         for (i = 0; i < 16; i++) {
1062             env->sr[i] = sregs.u.s.ppc32.sr[i];
1063         }
1064
1065         /* Sync BATs */
1066         for (i = 0; i < 8; i++) {
1067             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1068             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1069             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1070             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1071         }
1072     }
1073
1074     if (cap_hior) {
1075         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1076     }
1077
1078     if (cap_one_reg) {
1079         int i;
1080
1081         /* We deliberately ignore errors here, for kernels which have
1082          * the ONE_REG calls, but don't support the specific
1083          * registers, there's a reasonable chance things will still
1084          * work, at least until we try to migrate. */
1085         for (i = 0; i < 1024; i++) {
1086             uint64_t id = env->spr_cb[i].one_reg_id;
1087
1088             if (id != 0) {
1089                 kvm_get_one_spr(cs, id, i);
1090             }
1091         }
1092
1093 #ifdef TARGET_PPC64
1094         if (cap_papr) {
1095             if (kvm_get_vpa(cs) < 0) {
1096                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1097             }
1098         }
1099
1100         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1101 #endif
1102     }
1103
1104     return 0;
1105 }
1106
1107 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1108 {
1109     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1110
1111     if (irq != PPC_INTERRUPT_EXT) {
1112         return 0;
1113     }
1114
1115     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1116         return 0;
1117     }
1118
1119     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1120
1121     return 0;
1122 }
1123
1124 #if defined(TARGET_PPCEMB)
1125 #define PPC_INPUT_INT PPC40x_INPUT_INT
1126 #elif defined(TARGET_PPC64)
1127 #define PPC_INPUT_INT PPC970_INPUT_INT
1128 #else
1129 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1130 #endif
1131
1132 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1133 {
1134     PowerPCCPU *cpu = POWERPC_CPU(cs);
1135     CPUPPCState *env = &cpu->env;
1136     int r;
1137     unsigned irq;
1138
1139     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1140      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1141     if (!cap_interrupt_level &&
1142         run->ready_for_interrupt_injection &&
1143         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1144         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1145     {
1146         /* For now KVM disregards the 'irq' argument. However, in the
1147          * future KVM could cache it in-kernel to avoid a heavyweight exit
1148          * when reading the UIC.
1149          */
1150         irq = KVM_INTERRUPT_SET;
1151
1152         DPRINTF("injected interrupt %d\n", irq);
1153         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1154         if (r < 0) {
1155             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1156         }
1157
1158         /* Always wake up soon in case the interrupt was level based */
1159         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1160                        (get_ticks_per_sec() / 50));
1161     }
1162
1163     /* We don't know if there are more interrupts pending after this. However,
1164      * the guest will return to userspace in the course of handling this one
1165      * anyways, so we will get a chance to deliver the rest. */
1166 }
1167
1168 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1169 {
1170 }
1171
1172 int kvm_arch_process_async_events(CPUState *cs)
1173 {
1174     return cs->halted;
1175 }
1176
1177 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1178 {
1179     CPUState *cs = CPU(cpu);
1180     CPUPPCState *env = &cpu->env;
1181
1182     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1183         cs->halted = 1;
1184         cs->exception_index = EXCP_HLT;
1185     }
1186
1187     return 0;
1188 }
1189
1190 /* map dcr access to existing qemu dcr emulation */
1191 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1192 {
1193     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1194         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1195
1196     return 0;
1197 }
1198
1199 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1200 {
1201     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1202         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1203
1204     return 0;
1205 }
1206
1207 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1208 {
1209     PowerPCCPU *cpu = POWERPC_CPU(cs);
1210     CPUPPCState *env = &cpu->env;
1211     int ret;
1212
1213     switch (run->exit_reason) {
1214     case KVM_EXIT_DCR:
1215         if (run->dcr.is_write) {
1216             DPRINTF("handle dcr write\n");
1217             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1218         } else {
1219             DPRINTF("handle dcr read\n");
1220             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1221         }
1222         break;
1223     case KVM_EXIT_HLT:
1224         DPRINTF("handle halt\n");
1225         ret = kvmppc_handle_halt(cpu);
1226         break;
1227 #if defined(TARGET_PPC64)
1228     case KVM_EXIT_PAPR_HCALL:
1229         DPRINTF("handle PAPR hypercall\n");
1230         run->papr_hcall.ret = spapr_hypercall(cpu,
1231                                               run->papr_hcall.nr,
1232                                               run->papr_hcall.args);
1233         ret = 0;
1234         break;
1235 #endif
1236     case KVM_EXIT_EPR:
1237         DPRINTF("handle epr\n");
1238         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1239         ret = 0;
1240         break;
1241     case KVM_EXIT_WATCHDOG:
1242         DPRINTF("handle watchdog expiry\n");
1243         watchdog_perform_action();
1244         ret = 0;
1245         break;
1246
1247     default:
1248         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1249         ret = -1;
1250         break;
1251     }
1252
1253     return ret;
1254 }
1255
1256 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1257 {
1258     CPUState *cs = CPU(cpu);
1259     uint32_t bits = tsr_bits;
1260     struct kvm_one_reg reg = {
1261         .id = KVM_REG_PPC_OR_TSR,
1262         .addr = (uintptr_t) &bits,
1263     };
1264
1265     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1266 }
1267
1268 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1269 {
1270
1271     CPUState *cs = CPU(cpu);
1272     uint32_t bits = tsr_bits;
1273     struct kvm_one_reg reg = {
1274         .id = KVM_REG_PPC_CLEAR_TSR,
1275         .addr = (uintptr_t) &bits,
1276     };
1277
1278     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1279 }
1280
1281 int kvmppc_set_tcr(PowerPCCPU *cpu)
1282 {
1283     CPUState *cs = CPU(cpu);
1284     CPUPPCState *env = &cpu->env;
1285     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1286
1287     struct kvm_one_reg reg = {
1288         .id = KVM_REG_PPC_TCR,
1289         .addr = (uintptr_t) &tcr,
1290     };
1291
1292     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1293 }
1294
1295 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1296 {
1297     CPUState *cs = CPU(cpu);
1298     int ret;
1299
1300     if (!kvm_enabled()) {
1301         return -1;
1302     }
1303
1304     if (!cap_ppc_watchdog) {
1305         printf("warning: KVM does not support watchdog");
1306         return -1;
1307     }
1308
1309     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1310     if (ret < 0) {
1311         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1312                 __func__, strerror(-ret));
1313         return ret;
1314     }
1315
1316     return ret;
1317 }
1318
1319 static int read_cpuinfo(const char *field, char *value, int len)
1320 {
1321     FILE *f;
1322     int ret = -1;
1323     int field_len = strlen(field);
1324     char line[512];
1325
1326     f = fopen("/proc/cpuinfo", "r");
1327     if (!f) {
1328         return -1;
1329     }
1330
1331     do {
1332         if(!fgets(line, sizeof(line), f)) {
1333             break;
1334         }
1335         if (!strncmp(line, field, field_len)) {
1336             pstrcpy(value, len, line);
1337             ret = 0;
1338             break;
1339         }
1340     } while(*line);
1341
1342     fclose(f);
1343
1344     return ret;
1345 }
1346
1347 uint32_t kvmppc_get_tbfreq(void)
1348 {
1349     char line[512];
1350     char *ns;
1351     uint32_t retval = get_ticks_per_sec();
1352
1353     if (read_cpuinfo("timebase", line, sizeof(line))) {
1354         return retval;
1355     }
1356
1357     if (!(ns = strchr(line, ':'))) {
1358         return retval;
1359     }
1360
1361     ns++;
1362
1363     retval = atoi(ns);
1364     return retval;
1365 }
1366
1367 /* Try to find a device tree node for a CPU with clock-frequency property */
1368 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1369 {
1370     struct dirent *dirp;
1371     DIR *dp;
1372
1373     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1374         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1375         return -1;
1376     }
1377
1378     buf[0] = '\0';
1379     while ((dirp = readdir(dp)) != NULL) {
1380         FILE *f;
1381         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1382                  dirp->d_name);
1383         f = fopen(buf, "r");
1384         if (f) {
1385             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1386             fclose(f);
1387             break;
1388         }
1389         buf[0] = '\0';
1390     }
1391     closedir(dp);
1392     if (buf[0] == '\0') {
1393         printf("Unknown host!\n");
1394         return -1;
1395     }
1396
1397     return 0;
1398 }
1399
1400 /* Read a CPU node property from the host device tree that's a single
1401  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1402  * (can't find or open the property, or doesn't understand the
1403  * format) */
1404 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1405 {
1406     char buf[PATH_MAX];
1407     union {
1408         uint32_t v32;
1409         uint64_t v64;
1410     } u;
1411     FILE *f;
1412     int len;
1413
1414     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1415         return -1;
1416     }
1417
1418     strncat(buf, "/", sizeof(buf) - strlen(buf));
1419     strncat(buf, propname, sizeof(buf) - strlen(buf));
1420
1421     f = fopen(buf, "rb");
1422     if (!f) {
1423         return -1;
1424     }
1425
1426     len = fread(&u, 1, sizeof(u), f);
1427     fclose(f);
1428     switch (len) {
1429     case 4:
1430         /* property is a 32-bit quantity */
1431         return be32_to_cpu(u.v32);
1432     case 8:
1433         return be64_to_cpu(u.v64);
1434     }
1435
1436     return 0;
1437 }
1438
1439 uint64_t kvmppc_get_clockfreq(void)
1440 {
1441     return kvmppc_read_int_cpu_dt("clock-frequency");
1442 }
1443
1444 uint32_t kvmppc_get_vmx(void)
1445 {
1446     return kvmppc_read_int_cpu_dt("ibm,vmx");
1447 }
1448
1449 uint32_t kvmppc_get_dfp(void)
1450 {
1451     return kvmppc_read_int_cpu_dt("ibm,dfp");
1452 }
1453
1454 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1455  {
1456      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1457      CPUState *cs = CPU(cpu);
1458
1459     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1460         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1461         return 0;
1462     }
1463
1464     return 1;
1465 }
1466
1467 int kvmppc_get_hasidle(CPUPPCState *env)
1468 {
1469     struct kvm_ppc_pvinfo pvinfo;
1470
1471     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1472         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1473         return 1;
1474     }
1475
1476     return 0;
1477 }
1478
1479 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1480 {
1481     uint32_t *hc = (uint32_t*)buf;
1482     struct kvm_ppc_pvinfo pvinfo;
1483
1484     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1485         memcpy(buf, pvinfo.hcall, buf_len);
1486         return 0;
1487     }
1488
1489     /*
1490      * Fallback to always fail hypercalls:
1491      *
1492      *     li r3, -1
1493      *     nop
1494      *     nop
1495      *     nop
1496      */
1497
1498     hc[0] = 0x3860ffff;
1499     hc[1] = 0x60000000;
1500     hc[2] = 0x60000000;
1501     hc[3] = 0x60000000;
1502
1503     return 0;
1504 }
1505
1506 void kvmppc_set_papr(PowerPCCPU *cpu)
1507 {
1508     CPUState *cs = CPU(cpu);
1509     int ret;
1510
1511     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1512     if (ret) {
1513         cpu_abort(cs, "This KVM version does not support PAPR\n");
1514     }
1515
1516     /* Update the capability flag so we sync the right information
1517      * with kvm */
1518     cap_papr = 1;
1519 }
1520
1521 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1522 {
1523     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1524 }
1525
1526 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1527 {
1528     CPUState *cs = CPU(cpu);
1529     int ret;
1530
1531     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1532     if (ret && mpic_proxy) {
1533         cpu_abort(cs, "This KVM version does not support EPR\n");
1534     }
1535 }
1536
1537 int kvmppc_smt_threads(void)
1538 {
1539     return cap_ppc_smt ? cap_ppc_smt : 1;
1540 }
1541
1542 #ifdef TARGET_PPC64
1543 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1544 {
1545     void *rma;
1546     off_t size;
1547     int fd;
1548     struct kvm_allocate_rma ret;
1549     MemoryRegion *rma_region;
1550
1551     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1552      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1553      *                      not necessary on this hardware
1554      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1555      *
1556      * FIXME: We should allow the user to force contiguous RMA
1557      * allocation in the cap_ppc_rma==1 case.
1558      */
1559     if (cap_ppc_rma < 2) {
1560         return 0;
1561     }
1562
1563     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1564     if (fd < 0) {
1565         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1566                 strerror(errno));
1567         return -1;
1568     }
1569
1570     size = MIN(ret.rma_size, 256ul << 20);
1571
1572     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1573     if (rma == MAP_FAILED) {
1574         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1575         return -1;
1576     };
1577
1578     rma_region = g_new(MemoryRegion, 1);
1579     memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1580     vmstate_register_ram_global(rma_region);
1581     memory_region_add_subregion(sysmem, 0, rma_region);
1582
1583     return size;
1584 }
1585
1586 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1587 {
1588     struct kvm_ppc_smmu_info info;
1589     long rampagesize, best_page_shift;
1590     int i;
1591
1592     if (cap_ppc_rma >= 2) {
1593         return current_size;
1594     }
1595
1596     /* Find the largest hardware supported page size that's less than
1597      * or equal to the (logical) backing page size of guest RAM */
1598     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1599     rampagesize = getrampagesize();
1600     best_page_shift = 0;
1601
1602     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1603         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1604
1605         if (!sps->page_shift) {
1606             continue;
1607         }
1608
1609         if ((sps->page_shift > best_page_shift)
1610             && ((1UL << sps->page_shift) <= rampagesize)) {
1611             best_page_shift = sps->page_shift;
1612         }
1613     }
1614
1615     return MIN(current_size,
1616                1ULL << (best_page_shift + hash_shift - 7));
1617 }
1618 #endif
1619
1620 bool kvmppc_spapr_use_multitce(void)
1621 {
1622     return cap_spapr_multitce;
1623 }
1624
1625 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1626 {
1627     struct kvm_create_spapr_tce args = {
1628         .liobn = liobn,
1629         .window_size = window_size,
1630     };
1631     long len;
1632     int fd;
1633     void *table;
1634
1635     /* Must set fd to -1 so we don't try to munmap when called for
1636      * destroying the table, which the upper layers -will- do
1637      */
1638     *pfd = -1;
1639     if (!cap_spapr_tce) {
1640         return NULL;
1641     }
1642
1643     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1644     if (fd < 0) {
1645         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1646                 liobn);
1647         return NULL;
1648     }
1649
1650     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1651     /* FIXME: round this up to page size */
1652
1653     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1654     if (table == MAP_FAILED) {
1655         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1656                 liobn);
1657         close(fd);
1658         return NULL;
1659     }
1660
1661     *pfd = fd;
1662     return table;
1663 }
1664
1665 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
1666 {
1667     long len;
1668
1669     if (fd < 0) {
1670         return -1;
1671     }
1672
1673     len = nb_table * sizeof(uint64_t);
1674     if ((munmap(table, len) < 0) ||
1675         (close(fd) < 0)) {
1676         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1677                 strerror(errno));
1678         /* Leak the table */
1679     }
1680
1681     return 0;
1682 }
1683
1684 int kvmppc_reset_htab(int shift_hint)
1685 {
1686     uint32_t shift = shift_hint;
1687
1688     if (!kvm_enabled()) {
1689         /* Full emulation, tell caller to allocate htab itself */
1690         return 0;
1691     }
1692     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1693         int ret;
1694         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1695         if (ret == -ENOTTY) {
1696             /* At least some versions of PR KVM advertise the
1697              * capability, but don't implement the ioctl().  Oops.
1698              * Return 0 so that we allocate the htab in qemu, as is
1699              * correct for PR. */
1700             return 0;
1701         } else if (ret < 0) {
1702             return ret;
1703         }
1704         return shift;
1705     }
1706
1707     /* We have a kernel that predates the htab reset calls.  For PR
1708      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1709      * this era, it has allocated a 16MB fixed size hash table
1710      * already.  Kernels of this era have the GET_PVINFO capability
1711      * only on PR, so we use this hack to determine the right
1712      * answer */
1713     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1714         /* PR - tell caller to allocate htab */
1715         return 0;
1716     } else {
1717         /* HV - assume 16MB kernel allocated htab */
1718         return 24;
1719     }
1720 }
1721
1722 static inline uint32_t mfpvr(void)
1723 {
1724     uint32_t pvr;
1725
1726     asm ("mfpvr %0"
1727          : "=r"(pvr));
1728     return pvr;
1729 }
1730
1731 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1732 {
1733     if (on) {
1734         *word |= flags;
1735     } else {
1736         *word &= ~flags;
1737     }
1738 }
1739
1740 static void kvmppc_host_cpu_initfn(Object *obj)
1741 {
1742     assert(kvm_enabled());
1743 }
1744
1745 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1746 {
1747     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1748     uint32_t vmx = kvmppc_get_vmx();
1749     uint32_t dfp = kvmppc_get_dfp();
1750     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1751     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1752
1753     /* Now fix up the class with information we can query from the host */
1754     pcc->pvr = mfpvr();
1755
1756     if (vmx != -1) {
1757         /* Only override when we know what the host supports */
1758         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1759         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1760     }
1761     if (dfp != -1) {
1762         /* Only override when we know what the host supports */
1763         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1764     }
1765
1766     if (dcache_size != -1) {
1767         pcc->l1_dcache_size = dcache_size;
1768     }
1769
1770     if (icache_size != -1) {
1771         pcc->l1_icache_size = icache_size;
1772     }
1773 }
1774
1775 bool kvmppc_has_cap_epr(void)
1776 {
1777     return cap_epr;
1778 }
1779
1780 bool kvmppc_has_cap_htab_fd(void)
1781 {
1782     return cap_htab_fd;
1783 }
1784
1785 bool kvmppc_has_cap_fixup_hcalls(void)
1786 {
1787     return cap_fixup_hcalls;
1788 }
1789
1790 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1791 {
1792     ObjectClass *oc = OBJECT_CLASS(pcc);
1793
1794     while (oc && !object_class_is_abstract(oc)) {
1795         oc = object_class_get_parent(oc);
1796     }
1797     assert(oc);
1798
1799     return POWERPC_CPU_CLASS(oc);
1800 }
1801
1802 static int kvm_ppc_register_host_cpu_type(void)
1803 {
1804     TypeInfo type_info = {
1805         .name = TYPE_HOST_POWERPC_CPU,
1806         .instance_init = kvmppc_host_cpu_initfn,
1807         .class_init = kvmppc_host_cpu_class_init,
1808     };
1809     uint32_t host_pvr = mfpvr();
1810     PowerPCCPUClass *pvr_pcc;
1811     DeviceClass *dc;
1812
1813     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1814     if (pvr_pcc == NULL) {
1815         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1816     }
1817     if (pvr_pcc == NULL) {
1818         return -1;
1819     }
1820     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1821     type_register(&type_info);
1822
1823     /* Register generic family CPU class for a family */
1824     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1825     dc = DEVICE_CLASS(pvr_pcc);
1826     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1827     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1828     type_register(&type_info);
1829
1830     return 0;
1831 }
1832
1833 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1834 {
1835     struct kvm_rtas_token_args args = {
1836         .token = token,
1837     };
1838
1839     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1840         return -ENOENT;
1841     }
1842
1843     strncpy(args.name, function, sizeof(args.name));
1844
1845     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1846 }
1847
1848 int kvmppc_get_htab_fd(bool write)
1849 {
1850     struct kvm_get_htab_fd s = {
1851         .flags = write ? KVM_GET_HTAB_WRITE : 0,
1852         .start_index = 0,
1853     };
1854
1855     if (!cap_htab_fd) {
1856         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1857         return -1;
1858     }
1859
1860     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1861 }
1862
1863 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1864 {
1865     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1866     uint8_t buf[bufsize];
1867     ssize_t rc;
1868
1869     do {
1870         rc = read(fd, buf, bufsize);
1871         if (rc < 0) {
1872             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1873                     strerror(errno));
1874             return rc;
1875         } else if (rc) {
1876             /* Kernel already retuns data in BE format for the file */
1877             qemu_put_buffer(f, buf, rc);
1878         }
1879     } while ((rc != 0)
1880              && ((max_ns < 0)
1881                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1882
1883     return (rc == 0) ? 1 : 0;
1884 }
1885
1886 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1887                            uint16_t n_valid, uint16_t n_invalid)
1888 {
1889     struct kvm_get_htab_header *buf;
1890     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1891     ssize_t rc;
1892
1893     buf = alloca(chunksize);
1894     /* This is KVM on ppc, so this is all big-endian */
1895     buf->index = index;
1896     buf->n_valid = n_valid;
1897     buf->n_invalid = n_invalid;
1898
1899     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1900
1901     rc = write(fd, buf, chunksize);
1902     if (rc < 0) {
1903         fprintf(stderr, "Error writing KVM hash table: %s\n",
1904                 strerror(errno));
1905         return rc;
1906     }
1907     if (rc != chunksize) {
1908         /* We should never get a short write on a single chunk */
1909         fprintf(stderr, "Short write, restoring KVM hash table\n");
1910         return -1;
1911     }
1912     return 0;
1913 }
1914
1915 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1916 {
1917     return true;
1918 }
1919
1920 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1921 {
1922     return 1;
1923 }
1924
1925 int kvm_arch_on_sigbus(int code, void *addr)
1926 {
1927     return 1;
1928 }
1929
1930 void kvm_arch_init_irq_routing(KVMState *s)
1931 {
1932 }
1933
1934 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1935 {
1936     return -EINVAL;
1937 }
1938
1939 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1940 {
1941     return -EINVAL;
1942 }
1943
1944 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1945 {
1946     return -EINVAL;
1947 }
1948
1949 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1950 {
1951     return -EINVAL;
1952 }
1953
1954 void kvm_arch_remove_all_hw_breakpoints(void)
1955 {
1956 }
1957
1958 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1959 {
1960 }
1961
1962 struct kvm_get_htab_buf {
1963     struct kvm_get_htab_header header;
1964     /*
1965      * We require one extra byte for read
1966      */
1967     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1968 };
1969
1970 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1971 {
1972     int htab_fd;
1973     struct kvm_get_htab_fd ghf;
1974     struct kvm_get_htab_buf  *hpte_buf;
1975
1976     ghf.flags = 0;
1977     ghf.start_index = pte_index;
1978     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1979     if (htab_fd < 0) {
1980         goto error_out;
1981     }
1982
1983     hpte_buf = g_malloc0(sizeof(*hpte_buf));
1984     /*
1985      * Read the hpte group
1986      */
1987     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1988         goto out_close;
1989     }
1990
1991     close(htab_fd);
1992     return (uint64_t)(uintptr_t) hpte_buf->hpte;
1993
1994 out_close:
1995     g_free(hpte_buf);
1996     close(htab_fd);
1997 error_out:
1998     return 0;
1999 }
2000
2001 void kvmppc_hash64_free_pteg(uint64_t token)
2002 {
2003     struct kvm_get_htab_buf *htab_buf;
2004
2005     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2006                             hpte);
2007     g_free(htab_buf);
2008     return;
2009 }
2010
2011 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2012                              target_ulong pte0, target_ulong pte1)
2013 {
2014     int htab_fd;
2015     struct kvm_get_htab_fd ghf;
2016     struct kvm_get_htab_buf hpte_buf;
2017
2018     ghf.flags = 0;
2019     ghf.start_index = 0;     /* Ignored */
2020     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2021     if (htab_fd < 0) {
2022         goto error_out;
2023     }
2024
2025     hpte_buf.header.n_valid = 1;
2026     hpte_buf.header.n_invalid = 0;
2027     hpte_buf.header.index = pte_index;
2028     hpte_buf.hpte[0] = pte0;
2029     hpte_buf.hpte[1] = pte1;
2030     /*
2031      * Write the hpte entry.
2032      * CAUTION: write() has the warn_unused_result attribute. Hence we
2033      * need to check the return value, even though we do nothing.
2034      */
2035     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2036         goto out_close;
2037     }
2038
2039 out_close:
2040     close(htab_fd);
2041     return;
2042
2043 error_out:
2044     return;
2045 }