target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41
  42 //#define DEBUG_KVM
  43
  44 #ifdef DEBUG_KVM
  45 #define DPRINTF(fmt, ...) \
  46     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  47 #else
  48 #define DPRINTF(fmt, ...) \
  49     do { } while (0)
  50 #endif
  51
  52 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  53
  54 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  55     KVM_CAP_LAST_INFO
  56 };
  57
  58 static int cap_interrupt_unset = false;
  59 static int cap_interrupt_level = false;
  60 static int cap_segstate;
  61 static int cap_booke_sregs;
  62 static int cap_ppc_smt;
  63 static int cap_ppc_rma;
  64 static int cap_spapr_tce;
  65 static int cap_spapr_multitce;
  66 static int cap_hior;
  67 static int cap_one_reg;
  68 static int cap_epr;
  69 static int cap_ppc_watchdog;
  70 static int cap_papr;
  71 static int cap_htab_fd;
  72
  73 /* XXX We have a race condition where we actually have a level triggered
  74  *     interrupt, but the infrastructure can't expose that yet, so the guest
  75  *     takes but ignores it, goes to sleep and never gets notified that there's
  76  *     still an interrupt pending.
  77  *
  78  *     As a quick workaround, let's just wake up again 20 ms after we injected
  79  *     an interrupt. That way we can assure that we're always reinjecting
  80  *     interrupts in case the guest swallowed them.
  81  */
  82 static QEMUTimer *idle_timer;
  83
  84 static void kvm_kick_cpu(void *opaque)
  85 {
  86     PowerPCCPU *cpu = opaque;
  87
  88     qemu_cpu_kick(CPU(cpu));
  89 }
  90
  91 static int kvm_ppc_register_host_cpu_type(void);
  92
  93 int kvm_arch_init(KVMState *s)
  94 {
  95     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
  96     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
  97     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
  98     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
  99     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 100     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 101     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 102     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 103     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 104     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 105     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 106     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 107     /* Note: we don't set cap_papr here, because this capability is
 108      * only activated after this by kvmppc_set_papr() */
 109     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 110
 111     if (!cap_interrupt_level) {
 112         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 113                         "VM to stall at times!\n");
 114     }
 115
 116     kvm_ppc_register_host_cpu_type();
 117
 118     return 0;
 119 }
 120
 121 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 122 {
 123     CPUPPCState *cenv = &cpu->env;
 124     CPUState *cs = CPU(cpu);
 125     struct kvm_sregs sregs;
 126     int ret;
 127
 128     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 129         /* What we're really trying to say is "if we're on BookE, we use
 130            the native PVR for now". This is the only sane way to check
 131            it though, so we potentially confuse users that they can run
 132            BookE guests on BookS. Let's hope nobody dares enough :) */
 133         return 0;
 134     } else {
 135         if (!cap_segstate) {
 136             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 137             return -ENOSYS;
 138         }
 139     }
 140
 141     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 142     if (ret) {
 143         return ret;
 144     }
 145
 146     sregs.pvr = cenv->spr[SPR_PVR];
 147     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 148 }
 149
 150 /* Set up a shared TLB array with KVM */
 151 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 152 {
 153     CPUPPCState *env = &cpu->env;
 154     CPUState *cs = CPU(cpu);
 155     struct kvm_book3e_206_tlb_params params = {};
 156     struct kvm_config_tlb cfg = {};
 157     unsigned int entries = 0;
 158     int ret, i;
 159
 160     if (!kvm_enabled() ||
 161         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 162         return 0;
 163     }
 164
 165     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 166
 167     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 168         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 169         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 170         entries += params.tlb_sizes[i];
 171     }
 172
 173     assert(entries == env->nb_tlb);
 174     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 175
 176     env->tlb_dirty = true;
 177
 178     cfg.array = (uintptr_t)env->tlb.tlbm;
 179     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 180     cfg.params = (uintptr_t)&params;
 181     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 182
 183     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 184     if (ret < 0) {
 185         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 186                 __func__, strerror(-ret));
 187         return ret;
 188     }
 189
 190     env->kvm_sw_tlb = true;
 191     return 0;
 192 }
 193
 194
 195 #if defined(TARGET_PPC64)
 196 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 197                                        struct kvm_ppc_smmu_info *info)
 198 {
 199     CPUPPCState *env = &cpu->env;
 200     CPUState *cs = CPU(cpu);
 201
 202     memset(info, 0, sizeof(*info));
 203
 204     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 205      * need to "guess" what the supported page sizes are.
 206      *
 207      * For that to work we make a few assumptions:
 208      *
 209      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 210      *   KVM which only supports 4K and 16M pages, but supports them
 211      *   regardless of the backing store characteritics. We also don't
 212      *   support 1T segments.
 213      *
 214      *   This is safe as if HV KVM ever supports that capability or PR
 215      *   KVM grows supports for more page/segment sizes, those versions
 216      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 217      *   will not hit this fallback
 218      *
 219      * - Else we are running HV KVM. This means we only support page
 220      *   sizes that fit in the backing store. Additionally we only
 221      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 222      *   P7 encodings for the SLB and hash table. Here too, we assume
 223      *   support for any newer processor will mean a kernel that
 224      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 225      *   this fallback.
 226      */
 227     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 228         /* No flags */
 229         info->flags = 0;
 230         info->slb_size = 64;
 231
 232         /* Standard 4k base page size segment */
 233         info->sps[0].page_shift = 12;
 234         info->sps[0].slb_enc = 0;
 235         info->sps[0].enc[0].page_shift = 12;
 236         info->sps[0].enc[0].pte_enc = 0;
 237
 238         /* Standard 16M large page size segment */
 239         info->sps[1].page_shift = 24;
 240         info->sps[1].slb_enc = SLB_VSID_L;
 241         info->sps[1].enc[0].page_shift = 24;
 242         info->sps[1].enc[0].pte_enc = 0;
 243     } else {
 244         int i = 0;
 245
 246         /* HV KVM has backing store size restrictions */
 247         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 248
 249         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 250             info->flags |= KVM_PPC_1T_SEGMENTS;
 251         }
 252
 253         if (env->mmu_model == POWERPC_MMU_2_06) {
 254             info->slb_size = 32;
 255         } else {
 256             info->slb_size = 64;
 257         }
 258
 259         /* Standard 4k base page size segment */
 260         info->sps[i].page_shift = 12;
 261         info->sps[i].slb_enc = 0;
 262         info->sps[i].enc[0].page_shift = 12;
 263         info->sps[i].enc[0].pte_enc = 0;
 264         i++;
 265
 266         /* 64K on MMU 2.06 */
 267         if (env->mmu_model == POWERPC_MMU_2_06) {
 268             info->sps[i].page_shift = 16;
 269             info->sps[i].slb_enc = 0x110;
 270             info->sps[i].enc[0].page_shift = 16;
 271             info->sps[i].enc[0].pte_enc = 1;
 272             i++;
 273         }
 274
 275         /* Standard 16M large page size segment */
 276         info->sps[i].page_shift = 24;
 277         info->sps[i].slb_enc = SLB_VSID_L;
 278         info->sps[i].enc[0].page_shift = 24;
 279         info->sps[i].enc[0].pte_enc = 0;
 280     }
 281 }
 282
 283 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 284 {
 285     CPUState *cs = CPU(cpu);
 286     int ret;
 287
 288     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 289         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 290         if (ret == 0) {
 291             return;
 292         }
 293     }
 294
 295     kvm_get_fallback_smmu_info(cpu, info);
 296 }
 297
 298 static long getrampagesize(void)
 299 {
 300     struct statfs fs;
 301     int ret;
 302
 303     if (!mem_path) {
 304         /* guest RAM is backed by normal anonymous pages */
 305         return getpagesize();
 306     }
 307
 308     do {
 309         ret = statfs(mem_path, &fs);
 310     } while (ret != 0 && errno == EINTR);
 311
 312     if (ret != 0) {
 313         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 314                 strerror(errno));
 315         exit(1);
 316     }
 317
 318 #define HUGETLBFS_MAGIC       0x958458f6
 319
 320     if (fs.f_type != HUGETLBFS_MAGIC) {
 321         /* Explicit mempath, but it's ordinary pages */
 322         return getpagesize();
 323     }
 324
 325     /* It's hugepage, return the huge page size */
 326     return fs.f_bsize;
 327 }
 328
 329 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 330 {
 331     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 332         return true;
 333     }
 334
 335     return (1ul << shift) <= rampgsize;
 336 }
 337
 338 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 339 {
 340     static struct kvm_ppc_smmu_info smmu_info;
 341     static bool has_smmu_info;
 342     CPUPPCState *env = &cpu->env;
 343     long rampagesize;
 344     int iq, ik, jq, jk;
 345
 346     /* We only handle page sizes for 64-bit server guests for now */
 347     if (!(env->mmu_model & POWERPC_MMU_64)) {
 348         return;
 349     }
 350
 351     /* Collect MMU info from kernel if not already */
 352     if (!has_smmu_info) {
 353         kvm_get_smmu_info(cpu, &smmu_info);
 354         has_smmu_info = true;
 355     }
 356
 357     rampagesize = getrampagesize();
 358
 359     /* Convert to QEMU form */
 360     memset(&env->sps, 0, sizeof(env->sps));
 361
 362     /*
 363      * XXX This loop should be an entry wide AND of the capabilities that
 364      *     the selected CPU has with the capabilities that KVM supports.
 365      */
 366     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 367         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 368         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 369
 370         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 371                                  ksps->page_shift)) {
 372             continue;
 373         }
 374         qsps->page_shift = ksps->page_shift;
 375         qsps->slb_enc = ksps->slb_enc;
 376         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 377             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 378                                      ksps->enc[jk].page_shift)) {
 379                 continue;
 380             }
 381             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 382             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 383             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 384                 break;
 385             }
 386         }
 387         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 388             break;
 389         }
 390     }
 391     env->slb_nr = smmu_info.slb_size;
 392     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 393         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 394     }
 395 }
 396 #else /* defined (TARGET_PPC64) */
 397
 398 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 399 {
 400 }
 401
 402 #endif /* !defined (TARGET_PPC64) */
 403
 404 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 405 {
 406     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 407 }
 408
 409 int kvm_arch_init_vcpu(CPUState *cs)
 410 {
 411     PowerPCCPU *cpu = POWERPC_CPU(cs);
 412     CPUPPCState *cenv = &cpu->env;
 413     int ret;
 414
 415     /* Gather server mmu info from KVM and update the CPU state */
 416     kvm_fixup_page_sizes(cpu);
 417
 418     /* Synchronize sregs with kvm */
 419     ret = kvm_arch_sync_sregs(cpu);
 420     if (ret) {
 421         return ret;
 422     }
 423
 424     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 425
 426     /* Some targets support access to KVM's guest TLB. */
 427     switch (cenv->mmu_model) {
 428     case POWERPC_MMU_BOOKE206:
 429         ret = kvm_booke206_tlb_init(cpu);
 430         break;
 431     default:
 432         break;
 433     }
 434
 435     return ret;
 436 }
 437
 438 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 439 {
 440     CPUPPCState *env = &cpu->env;
 441     CPUState *cs = CPU(cpu);
 442     struct kvm_dirty_tlb dirty_tlb;
 443     unsigned char *bitmap;
 444     int ret;
 445
 446     if (!env->kvm_sw_tlb) {
 447         return;
 448     }
 449
 450     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 451     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 452
 453     dirty_tlb.bitmap = (uintptr_t)bitmap;
 454     dirty_tlb.num_dirty = env->nb_tlb;
 455
 456     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 457     if (ret) {
 458         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 459                 __func__, strerror(-ret));
 460     }
 461
 462     g_free(bitmap);
 463 }
 464
 465 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 466 {
 467     PowerPCCPU *cpu = POWERPC_CPU(cs);
 468     CPUPPCState *env = &cpu->env;
 469     union {
 470         uint32_t u32;
 471         uint64_t u64;
 472     } val;
 473     struct kvm_one_reg reg = {
 474         .id = id,
 475         .addr = (uintptr_t) &val,
 476     };
 477     int ret;
 478
 479     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 480     if (ret != 0) {
 481         trace_kvm_failed_spr_get(spr, strerror(errno));
 482     } else {
 483         switch (id & KVM_REG_SIZE_MASK) {
 484         case KVM_REG_SIZE_U32:
 485             env->spr[spr] = val.u32;
 486             break;
 487
 488         case KVM_REG_SIZE_U64:
 489             env->spr[spr] = val.u64;
 490             break;
 491
 492         default:
 493             /* Don't handle this size yet */
 494             abort();
 495         }
 496     }
 497 }
 498
 499 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 500 {
 501     PowerPCCPU *cpu = POWERPC_CPU(cs);
 502     CPUPPCState *env = &cpu->env;
 503     union {
 504         uint32_t u32;
 505         uint64_t u64;
 506     } val;
 507     struct kvm_one_reg reg = {
 508         .id = id,
 509         .addr = (uintptr_t) &val,
 510     };
 511     int ret;
 512
 513     switch (id & KVM_REG_SIZE_MASK) {
 514     case KVM_REG_SIZE_U32:
 515         val.u32 = env->spr[spr];
 516         break;
 517
 518     case KVM_REG_SIZE_U64:
 519         val.u64 = env->spr[spr];
 520         break;
 521
 522     default:
 523         /* Don't handle this size yet */
 524         abort();
 525     }
 526
 527     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 528     if (ret != 0) {
 529         trace_kvm_failed_spr_set(spr, strerror(errno));
 530     }
 531 }
 532
 533 static int kvm_put_fp(CPUState *cs)
 534 {
 535     PowerPCCPU *cpu = POWERPC_CPU(cs);
 536     CPUPPCState *env = &cpu->env;
 537     struct kvm_one_reg reg;
 538     int i;
 539     int ret;
 540
 541     if (env->insns_flags & PPC_FLOAT) {
 542         uint64_t fpscr = env->fpscr;
 543         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 544
 545         reg.id = KVM_REG_PPC_FPSCR;
 546         reg.addr = (uintptr_t)&fpscr;
 547         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 548         if (ret < 0) {
 549             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 550             return ret;
 551         }
 552
 553         for (i = 0; i < 32; i++) {
 554             uint64_t vsr[2];
 555
 556             vsr[0] = float64_val(env->fpr[i]);
 557             vsr[1] = env->vsr[i];
 558             reg.addr = (uintptr_t) &vsr;
 559             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 560
 561             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 562             if (ret < 0) {
 563                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 564                         i, strerror(errno));
 565                 return ret;
 566             }
 567         }
 568     }
 569
 570     if (env->insns_flags & PPC_ALTIVEC) {
 571         reg.id = KVM_REG_PPC_VSCR;
 572         reg.addr = (uintptr_t)&env->vscr;
 573         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 574         if (ret < 0) {
 575             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 576             return ret;
 577         }
 578
 579         for (i = 0; i < 32; i++) {
 580             reg.id = KVM_REG_PPC_VR(i);
 581             reg.addr = (uintptr_t)&env->avr[i];
 582             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 583             if (ret < 0) {
 584                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 585                 return ret;
 586             }
 587         }
 588     }
 589
 590     return 0;
 591 }
 592
 593 static int kvm_get_fp(CPUState *cs)
 594 {
 595     PowerPCCPU *cpu = POWERPC_CPU(cs);
 596     CPUPPCState *env = &cpu->env;
 597     struct kvm_one_reg reg;
 598     int i;
 599     int ret;
 600
 601     if (env->insns_flags & PPC_FLOAT) {
 602         uint64_t fpscr;
 603         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 604
 605         reg.id = KVM_REG_PPC_FPSCR;
 606         reg.addr = (uintptr_t)&fpscr;
 607         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 608         if (ret < 0) {
 609             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 610             return ret;
 611         } else {
 612             env->fpscr = fpscr;
 613         }
 614
 615         for (i = 0; i < 32; i++) {
 616             uint64_t vsr[2];
 617
 618             reg.addr = (uintptr_t) &vsr;
 619             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 620
 621             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 622             if (ret < 0) {
 623                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 624                         vsx ? "VSR" : "FPR", i, strerror(errno));
 625                 return ret;
 626             } else {
 627                 env->fpr[i] = vsr[0];
 628                 if (vsx) {
 629                     env->vsr[i] = vsr[1];
 630                 }
 631             }
 632         }
 633     }
 634
 635     if (env->insns_flags & PPC_ALTIVEC) {
 636         reg.id = KVM_REG_PPC_VSCR;
 637         reg.addr = (uintptr_t)&env->vscr;
 638         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 639         if (ret < 0) {
 640             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 641             return ret;
 642         }
 643
 644         for (i = 0; i < 32; i++) {
 645             reg.id = KVM_REG_PPC_VR(i);
 646             reg.addr = (uintptr_t)&env->avr[i];
 647             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 648             if (ret < 0) {
 649                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 650                         i, strerror(errno));
 651                 return ret;
 652             }
 653         }
 654     }
 655
 656     return 0;
 657 }
 658
 659 #if defined(TARGET_PPC64)
 660 static int kvm_get_vpa(CPUState *cs)
 661 {
 662     PowerPCCPU *cpu = POWERPC_CPU(cs);
 663     CPUPPCState *env = &cpu->env;
 664     struct kvm_one_reg reg;
 665     int ret;
 666
 667     reg.id = KVM_REG_PPC_VPA_ADDR;
 668     reg.addr = (uintptr_t)&env->vpa_addr;
 669     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 670     if (ret < 0) {
 671         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 672         return ret;
 673     }
 674
 675     assert((uintptr_t)&env->slb_shadow_size
 676            == ((uintptr_t)&env->slb_shadow_addr + 8));
 677     reg.id = KVM_REG_PPC_VPA_SLB;
 678     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 679     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 680     if (ret < 0) {
 681         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 682                 strerror(errno));
 683         return ret;
 684     }
 685
 686     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 687     reg.id = KVM_REG_PPC_VPA_DTL;
 688     reg.addr = (uintptr_t)&env->dtl_addr;
 689     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 690     if (ret < 0) {
 691         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 692                 strerror(errno));
 693         return ret;
 694     }
 695
 696     return 0;
 697 }
 698
 699 static int kvm_put_vpa(CPUState *cs)
 700 {
 701     PowerPCCPU *cpu = POWERPC_CPU(cs);
 702     CPUPPCState *env = &cpu->env;
 703     struct kvm_one_reg reg;
 704     int ret;
 705
 706     /* SLB shadow or DTL can't be registered unless a master VPA is
 707      * registered.  That means when restoring state, if a VPA *is*
 708      * registered, we need to set that up first.  If not, we need to
 709      * deregister the others before deregistering the master VPA */
 710     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 711
 712     if (env->vpa_addr) {
 713         reg.id = KVM_REG_PPC_VPA_ADDR;
 714         reg.addr = (uintptr_t)&env->vpa_addr;
 715         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 716         if (ret < 0) {
 717             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 718             return ret;
 719         }
 720     }
 721
 722     assert((uintptr_t)&env->slb_shadow_size
 723            == ((uintptr_t)&env->slb_shadow_addr + 8));
 724     reg.id = KVM_REG_PPC_VPA_SLB;
 725     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 726     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 727     if (ret < 0) {
 728         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 729         return ret;
 730     }
 731
 732     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 733     reg.id = KVM_REG_PPC_VPA_DTL;
 734     reg.addr = (uintptr_t)&env->dtl_addr;
 735     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 736     if (ret < 0) {
 737         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 738                 strerror(errno));
 739         return ret;
 740     }
 741
 742     if (!env->vpa_addr) {
 743         reg.id = KVM_REG_PPC_VPA_ADDR;
 744         reg.addr = (uintptr_t)&env->vpa_addr;
 745         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 746         if (ret < 0) {
 747             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 748             return ret;
 749         }
 750     }
 751
 752     return 0;
 753 }
 754 #endif /* TARGET_PPC64 */
 755
 756 int kvm_arch_put_registers(CPUState *cs, int level)
 757 {
 758     PowerPCCPU *cpu = POWERPC_CPU(cs);
 759     CPUPPCState *env = &cpu->env;
 760     struct kvm_regs regs;
 761     int ret;
 762     int i;
 763
 764     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 765     if (ret < 0) {
 766         return ret;
 767     }
 768
 769     regs.ctr = env->ctr;
 770     regs.lr  = env->lr;
 771     regs.xer = cpu_read_xer(env);
 772     regs.msr = env->msr;
 773     regs.pc = env->nip;
 774
 775     regs.srr0 = env->spr[SPR_SRR0];
 776     regs.srr1 = env->spr[SPR_SRR1];
 777
 778     regs.sprg0 = env->spr[SPR_SPRG0];
 779     regs.sprg1 = env->spr[SPR_SPRG1];
 780     regs.sprg2 = env->spr[SPR_SPRG2];
 781     regs.sprg3 = env->spr[SPR_SPRG3];
 782     regs.sprg4 = env->spr[SPR_SPRG4];
 783     regs.sprg5 = env->spr[SPR_SPRG5];
 784     regs.sprg6 = env->spr[SPR_SPRG6];
 785     regs.sprg7 = env->spr[SPR_SPRG7];
 786
 787     regs.pid = env->spr[SPR_BOOKE_PID];
 788
 789     for (i = 0;i < 32; i++)
 790         regs.gpr[i] = env->gpr[i];
 791
 792     regs.cr = 0;
 793     for (i = 0; i < 8; i++) {
 794         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 795     }
 796
 797     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 798     if (ret < 0)
 799         return ret;
 800
 801     kvm_put_fp(cs);
 802
 803     if (env->tlb_dirty) {
 804         kvm_sw_tlb_put(cpu);
 805         env->tlb_dirty = false;
 806     }
 807
 808     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 809         struct kvm_sregs sregs;
 810
 811         sregs.pvr = env->spr[SPR_PVR];
 812
 813         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 814
 815         /* Sync SLB */
 816 #ifdef TARGET_PPC64
 817         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 818             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 819             if (env->slb[i].esid & SLB_ESID_V) {
 820                 sregs.u.s.ppc64.slb[i].slbe |= i;
 821             }
 822             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 823         }
 824 #endif
 825
 826         /* Sync SRs */
 827         for (i = 0; i < 16; i++) {
 828             sregs.u.s.ppc32.sr[i] = env->sr[i];
 829         }
 830
 831         /* Sync BATs */
 832         for (i = 0; i < 8; i++) {
 833             /* Beware. We have to swap upper and lower bits here */
 834             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 835                 | env->DBAT[1][i];
 836             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 837                 | env->IBAT[1][i];
 838         }
 839
 840         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 841         if (ret) {
 842             return ret;
 843         }
 844     }
 845
 846     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 847         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 848     }
 849
 850     if (cap_one_reg) {
 851         int i;
 852
 853         /* We deliberately ignore errors here, for kernels which have
 854          * the ONE_REG calls, but don't support the specific
 855          * registers, there's a reasonable chance things will still
 856          * work, at least until we try to migrate. */
 857         for (i = 0; i < 1024; i++) {
 858             uint64_t id = env->spr_cb[i].one_reg_id;
 859
 860             if (id != 0) {
 861                 kvm_put_one_spr(cs, id, i);
 862             }
 863         }
 864
 865 #ifdef TARGET_PPC64
 866         if (cap_papr) {
 867             if (kvm_put_vpa(cs) < 0) {
 868                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 869             }
 870         }
 871
 872         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 873 #endif /* TARGET_PPC64 */
 874     }
 875
 876     return ret;
 877 }
 878
 879 int kvm_arch_get_registers(CPUState *cs)
 880 {
 881     PowerPCCPU *cpu = POWERPC_CPU(cs);
 882     CPUPPCState *env = &cpu->env;
 883     struct kvm_regs regs;
 884     struct kvm_sregs sregs;
 885     uint32_t cr;
 886     int i, ret;
 887
 888     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 889     if (ret < 0)
 890         return ret;
 891
 892     cr = regs.cr;
 893     for (i = 7; i >= 0; i--) {
 894         env->crf[i] = cr & 15;
 895         cr >>= 4;
 896     }
 897
 898     env->ctr = regs.ctr;
 899     env->lr = regs.lr;
 900     cpu_write_xer(env, regs.xer);
 901     env->msr = regs.msr;
 902     env->nip = regs.pc;
 903
 904     env->spr[SPR_SRR0] = regs.srr0;
 905     env->spr[SPR_SRR1] = regs.srr1;
 906
 907     env->spr[SPR_SPRG0] = regs.sprg0;
 908     env->spr[SPR_SPRG1] = regs.sprg1;
 909     env->spr[SPR_SPRG2] = regs.sprg2;
 910     env->spr[SPR_SPRG3] = regs.sprg3;
 911     env->spr[SPR_SPRG4] = regs.sprg4;
 912     env->spr[SPR_SPRG5] = regs.sprg5;
 913     env->spr[SPR_SPRG6] = regs.sprg6;
 914     env->spr[SPR_SPRG7] = regs.sprg7;
 915
 916     env->spr[SPR_BOOKE_PID] = regs.pid;
 917
 918     for (i = 0;i < 32; i++)
 919         env->gpr[i] = regs.gpr[i];
 920
 921     kvm_get_fp(cs);
 922
 923     if (cap_booke_sregs) {
 924         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 925         if (ret < 0) {
 926             return ret;
 927         }
 928
 929         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 930             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 931             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 932             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 933             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
 934             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
 935             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
 936             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
 937             env->spr[SPR_DECR] = sregs.u.e.dec;
 938             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
 939             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
 940             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
 941         }
 942
 943         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
 944             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
 945             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
 946             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
 947             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
 948             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
 949         }
 950
 951         if (sregs.u.e.features & KVM_SREGS_E_64) {
 952             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
 953         }
 954
 955         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
 956             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
 957         }
 958
 959         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
 960             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
 961             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
 962             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
 963             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
 964             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
 965             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
 966             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
 967             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
 968             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
 969             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
 970             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
 971             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
 972             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
 973             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
 974             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
 975             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
 976
 977             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
 978                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
 979                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
 980                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
 981             }
 982
 983             if (sregs.u.e.features & KVM_SREGS_E_PM) {
 984                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
 985             }
 986
 987             if (sregs.u.e.features & KVM_SREGS_E_PC) {
 988                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
 989                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
 990             }
 991         }
 992
 993         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
 994             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
 995             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
 996             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
 997             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
 998             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
 999             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1000             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1001             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1002             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1003             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1004         }
1005
1006         if (sregs.u.e.features & KVM_SREGS_EXP) {
1007             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1008         }
1009
1010         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1011             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1012             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1013         }
1014
1015         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1016             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1017             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1018             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1019
1020             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1021                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1022                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1023             }
1024         }
1025     }
1026
1027     if (cap_segstate) {
1028         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1029         if (ret < 0) {
1030             return ret;
1031         }
1032
1033         if (!env->external_htab) {
1034             ppc_store_sdr1(env, sregs.u.s.sdr1);
1035         }
1036
1037         /* Sync SLB */
1038 #ifdef TARGET_PPC64
1039         /*
1040          * The packed SLB array we get from KVM_GET_SREGS only contains
1041          * information about valid entries. So we flush our internal
1042          * copy to get rid of stale ones, then put all valid SLB entries
1043          * back in.
1044          */
1045         memset(env->slb, 0, sizeof(env->slb));
1046         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1047             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1048             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1049             /*
1050              * Only restore valid entries
1051              */
1052             if (rb & SLB_ESID_V) {
1053                 ppc_store_slb(env, rb, rs);
1054             }
1055         }
1056 #endif
1057
1058         /* Sync SRs */
1059         for (i = 0; i < 16; i++) {
1060             env->sr[i] = sregs.u.s.ppc32.sr[i];
1061         }
1062
1063         /* Sync BATs */
1064         for (i = 0; i < 8; i++) {
1065             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1066             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1067             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1068             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1069         }
1070     }
1071
1072     if (cap_hior) {
1073         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1074     }
1075
1076     if (cap_one_reg) {
1077         int i;
1078
1079         /* We deliberately ignore errors here, for kernels which have
1080          * the ONE_REG calls, but don't support the specific
1081          * registers, there's a reasonable chance things will still
1082          * work, at least until we try to migrate. */
1083         for (i = 0; i < 1024; i++) {
1084             uint64_t id = env->spr_cb[i].one_reg_id;
1085
1086             if (id != 0) {
1087                 kvm_get_one_spr(cs, id, i);
1088             }
1089         }
1090
1091 #ifdef TARGET_PPC64
1092         if (cap_papr) {
1093             if (kvm_get_vpa(cs) < 0) {
1094                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1095             }
1096         }
1097
1098         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1099 #endif
1100     }
1101
1102     return 0;
1103 }
1104
1105 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1106 {
1107     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1108
1109     if (irq != PPC_INTERRUPT_EXT) {
1110         return 0;
1111     }
1112
1113     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1114         return 0;
1115     }
1116
1117     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1118
1119     return 0;
1120 }
1121
1122 #if defined(TARGET_PPCEMB)
1123 #define PPC_INPUT_INT PPC40x_INPUT_INT
1124 #elif defined(TARGET_PPC64)
1125 #define PPC_INPUT_INT PPC970_INPUT_INT
1126 #else
1127 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1128 #endif
1129
1130 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1131 {
1132     PowerPCCPU *cpu = POWERPC_CPU(cs);
1133     CPUPPCState *env = &cpu->env;
1134     int r;
1135     unsigned irq;
1136
1137     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1138      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1139     if (!cap_interrupt_level &&
1140         run->ready_for_interrupt_injection &&
1141         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1142         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1143     {
1144         /* For now KVM disregards the 'irq' argument. However, in the
1145          * future KVM could cache it in-kernel to avoid a heavyweight exit
1146          * when reading the UIC.
1147          */
1148         irq = KVM_INTERRUPT_SET;
1149
1150         DPRINTF("injected interrupt %d\n", irq);
1151         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1152         if (r < 0) {
1153             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1154         }
1155
1156         /* Always wake up soon in case the interrupt was level based */
1157         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1158                        (get_ticks_per_sec() / 50));
1159     }
1160
1161     /* We don't know if there are more interrupts pending after this. However,
1162      * the guest will return to userspace in the course of handling this one
1163      * anyways, so we will get a chance to deliver the rest. */
1164 }
1165
1166 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1167 {
1168 }
1169
1170 int kvm_arch_process_async_events(CPUState *cs)
1171 {
1172     return cs->halted;
1173 }
1174
1175 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1176 {
1177     CPUState *cs = CPU(cpu);
1178     CPUPPCState *env = &cpu->env;
1179
1180     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1181         cs->halted = 1;
1182         cs->exception_index = EXCP_HLT;
1183     }
1184
1185     return 0;
1186 }
1187
1188 /* map dcr access to existing qemu dcr emulation */
1189 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1190 {
1191     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1192         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1193
1194     return 0;
1195 }
1196
1197 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1198 {
1199     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1200         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1201
1202     return 0;
1203 }
1204
1205 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1206 {
1207     PowerPCCPU *cpu = POWERPC_CPU(cs);
1208     CPUPPCState *env = &cpu->env;
1209     int ret;
1210
1211     switch (run->exit_reason) {
1212     case KVM_EXIT_DCR:
1213         if (run->dcr.is_write) {
1214             DPRINTF("handle dcr write\n");
1215             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1216         } else {
1217             DPRINTF("handle dcr read\n");
1218             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1219         }
1220         break;
1221     case KVM_EXIT_HLT:
1222         DPRINTF("handle halt\n");
1223         ret = kvmppc_handle_halt(cpu);
1224         break;
1225 #if defined(TARGET_PPC64)
1226     case KVM_EXIT_PAPR_HCALL:
1227         DPRINTF("handle PAPR hypercall\n");
1228         run->papr_hcall.ret = spapr_hypercall(cpu,
1229                                               run->papr_hcall.nr,
1230                                               run->papr_hcall.args);
1231         ret = 0;
1232         break;
1233 #endif
1234     case KVM_EXIT_EPR:
1235         DPRINTF("handle epr\n");
1236         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1237         ret = 0;
1238         break;
1239     case KVM_EXIT_WATCHDOG:
1240         DPRINTF("handle watchdog expiry\n");
1241         watchdog_perform_action();
1242         ret = 0;
1243         break;
1244
1245     default:
1246         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1247         ret = -1;
1248         break;
1249     }
1250
1251     return ret;
1252 }
1253
1254 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1255 {
1256     CPUState *cs = CPU(cpu);
1257     uint32_t bits = tsr_bits;
1258     struct kvm_one_reg reg = {
1259         .id = KVM_REG_PPC_OR_TSR,
1260         .addr = (uintptr_t) &bits,
1261     };
1262
1263     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1264 }
1265
1266 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1267 {
1268
1269     CPUState *cs = CPU(cpu);
1270     uint32_t bits = tsr_bits;
1271     struct kvm_one_reg reg = {
1272         .id = KVM_REG_PPC_CLEAR_TSR,
1273         .addr = (uintptr_t) &bits,
1274     };
1275
1276     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1277 }
1278
1279 int kvmppc_set_tcr(PowerPCCPU *cpu)
1280 {
1281     CPUState *cs = CPU(cpu);
1282     CPUPPCState *env = &cpu->env;
1283     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1284
1285     struct kvm_one_reg reg = {
1286         .id = KVM_REG_PPC_TCR,
1287         .addr = (uintptr_t) &tcr,
1288     };
1289
1290     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1291 }
1292
1293 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1294 {
1295     CPUState *cs = CPU(cpu);
1296     int ret;
1297
1298     if (!kvm_enabled()) {
1299         return -1;
1300     }
1301
1302     if (!cap_ppc_watchdog) {
1303         printf("warning: KVM does not support watchdog");
1304         return -1;
1305     }
1306
1307     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1308     if (ret < 0) {
1309         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1310                 __func__, strerror(-ret));
1311         return ret;
1312     }
1313
1314     return ret;
1315 }
1316
1317 static int read_cpuinfo(const char *field, char *value, int len)
1318 {
1319     FILE *f;
1320     int ret = -1;
1321     int field_len = strlen(field);
1322     char line[512];
1323
1324     f = fopen("/proc/cpuinfo", "r");
1325     if (!f) {
1326         return -1;
1327     }
1328
1329     do {
1330         if(!fgets(line, sizeof(line), f)) {
1331             break;
1332         }
1333         if (!strncmp(line, field, field_len)) {
1334             pstrcpy(value, len, line);
1335             ret = 0;
1336             break;
1337         }
1338     } while(*line);
1339
1340     fclose(f);
1341
1342     return ret;
1343 }
1344
1345 uint32_t kvmppc_get_tbfreq(void)
1346 {
1347     char line[512];
1348     char *ns;
1349     uint32_t retval = get_ticks_per_sec();
1350
1351     if (read_cpuinfo("timebase", line, sizeof(line))) {
1352         return retval;
1353     }
1354
1355     if (!(ns = strchr(line, ':'))) {
1356         return retval;
1357     }
1358
1359     ns++;
1360
1361     retval = atoi(ns);
1362     return retval;
1363 }
1364
1365 /* Try to find a device tree node for a CPU with clock-frequency property */
1366 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1367 {
1368     struct dirent *dirp;
1369     DIR *dp;
1370
1371     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1372         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1373         return -1;
1374     }
1375
1376     buf[0] = '\0';
1377     while ((dirp = readdir(dp)) != NULL) {
1378         FILE *f;
1379         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1380                  dirp->d_name);
1381         f = fopen(buf, "r");
1382         if (f) {
1383             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1384             fclose(f);
1385             break;
1386         }
1387         buf[0] = '\0';
1388     }
1389     closedir(dp);
1390     if (buf[0] == '\0') {
1391         printf("Unknown host!\n");
1392         return -1;
1393     }
1394
1395     return 0;
1396 }
1397
1398 /* Read a CPU node property from the host device tree that's a single
1399  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1400  * (can't find or open the property, or doesn't understand the
1401  * format) */
1402 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1403 {
1404     char buf[PATH_MAX];
1405     union {
1406         uint32_t v32;
1407         uint64_t v64;
1408     } u;
1409     FILE *f;
1410     int len;
1411
1412     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1413         return -1;
1414     }
1415
1416     strncat(buf, "/", sizeof(buf) - strlen(buf));
1417     strncat(buf, propname, sizeof(buf) - strlen(buf));
1418
1419     f = fopen(buf, "rb");
1420     if (!f) {
1421         return -1;
1422     }
1423
1424     len = fread(&u, 1, sizeof(u), f);
1425     fclose(f);
1426     switch (len) {
1427     case 4:
1428         /* property is a 32-bit quantity */
1429         return be32_to_cpu(u.v32);
1430     case 8:
1431         return be64_to_cpu(u.v64);
1432     }
1433
1434     return 0;
1435 }
1436
1437 uint64_t kvmppc_get_clockfreq(void)
1438 {
1439     return kvmppc_read_int_cpu_dt("clock-frequency");
1440 }
1441
1442 uint32_t kvmppc_get_vmx(void)
1443 {
1444     return kvmppc_read_int_cpu_dt("ibm,vmx");
1445 }
1446
1447 uint32_t kvmppc_get_dfp(void)
1448 {
1449     return kvmppc_read_int_cpu_dt("ibm,dfp");
1450 }
1451
1452 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1453  {
1454      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1455      CPUState *cs = CPU(cpu);
1456
1457     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1458         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1459         return 0;
1460     }
1461
1462     return 1;
1463 }
1464
1465 int kvmppc_get_hasidle(CPUPPCState *env)
1466 {
1467     struct kvm_ppc_pvinfo pvinfo;
1468
1469     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1470         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1471         return 1;
1472     }
1473
1474     return 0;
1475 }
1476
1477 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1478 {
1479     uint32_t *hc = (uint32_t*)buf;
1480     struct kvm_ppc_pvinfo pvinfo;
1481
1482     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1483         memcpy(buf, pvinfo.hcall, buf_len);
1484         return 0;
1485     }
1486
1487     /*
1488      * Fallback to always fail hypercalls:
1489      *
1490      *     li r3, -1
1491      *     nop
1492      *     nop
1493      *     nop
1494      */
1495
1496     hc[0] = 0x3860ffff;
1497     hc[1] = 0x60000000;
1498     hc[2] = 0x60000000;
1499     hc[3] = 0x60000000;
1500
1501     return 0;
1502 }
1503
1504 void kvmppc_set_papr(PowerPCCPU *cpu)
1505 {
1506     CPUState *cs = CPU(cpu);
1507     int ret;
1508
1509     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1510     if (ret) {
1511         cpu_abort(cs, "This KVM version does not support PAPR\n");
1512     }
1513
1514     /* Update the capability flag so we sync the right information
1515      * with kvm */
1516     cap_papr = 1;
1517 }
1518
1519 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1520 {
1521     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1522 }
1523
1524 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1525 {
1526     CPUState *cs = CPU(cpu);
1527     int ret;
1528
1529     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1530     if (ret && mpic_proxy) {
1531         cpu_abort(cs, "This KVM version does not support EPR\n");
1532     }
1533 }
1534
1535 int kvmppc_smt_threads(void)
1536 {
1537     return cap_ppc_smt ? cap_ppc_smt : 1;
1538 }
1539
1540 #ifdef TARGET_PPC64
1541 off_t kvmppc_alloc_rma(const char *name, MemoryRegion *sysmem)
1542 {
1543     void *rma;
1544     off_t size;
1545     int fd;
1546     struct kvm_allocate_rma ret;
1547     MemoryRegion *rma_region;
1548
1549     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1550      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1551      *                      not necessary on this hardware
1552      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1553      *
1554      * FIXME: We should allow the user to force contiguous RMA
1555      * allocation in the cap_ppc_rma==1 case.
1556      */
1557     if (cap_ppc_rma < 2) {
1558         return 0;
1559     }
1560
1561     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1562     if (fd < 0) {
1563         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1564                 strerror(errno));
1565         return -1;
1566     }
1567
1568     size = MIN(ret.rma_size, 256ul << 20);
1569
1570     rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1571     if (rma == MAP_FAILED) {
1572         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1573         return -1;
1574     };
1575
1576     rma_region = g_new(MemoryRegion, 1);
1577     memory_region_init_ram_ptr(rma_region, NULL, name, size, rma);
1578     vmstate_register_ram_global(rma_region);
1579     memory_region_add_subregion(sysmem, 0, rma_region);
1580
1581     return size;
1582 }
1583
1584 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1585 {
1586     struct kvm_ppc_smmu_info info;
1587     long rampagesize, best_page_shift;
1588     int i;
1589
1590     if (cap_ppc_rma >= 2) {
1591         return current_size;
1592     }
1593
1594     /* Find the largest hardware supported page size that's less than
1595      * or equal to the (logical) backing page size of guest RAM */
1596     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1597     rampagesize = getrampagesize();
1598     best_page_shift = 0;
1599
1600     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1601         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1602
1603         if (!sps->page_shift) {
1604             continue;
1605         }
1606
1607         if ((sps->page_shift > best_page_shift)
1608             && ((1UL << sps->page_shift) <= rampagesize)) {
1609             best_page_shift = sps->page_shift;
1610         }
1611     }
1612
1613     return MIN(current_size,
1614                1ULL << (best_page_shift + hash_shift - 7));
1615 }
1616 #endif
1617
1618 bool kvmppc_spapr_use_multitce(void)
1619 {
1620     return cap_spapr_multitce;
1621 }
1622
1623 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd)
1624 {
1625     struct kvm_create_spapr_tce args = {
1626         .liobn = liobn,
1627         .window_size = window_size,
1628     };
1629     long len;
1630     int fd;
1631     void *table;
1632
1633     /* Must set fd to -1 so we don't try to munmap when called for
1634      * destroying the table, which the upper layers -will- do
1635      */
1636     *pfd = -1;
1637     if (!cap_spapr_tce) {
1638         return NULL;
1639     }
1640
1641     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
1642     if (fd < 0) {
1643         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
1644                 liobn);
1645         return NULL;
1646     }
1647
1648     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
1649     /* FIXME: round this up to page size */
1650
1651     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1652     if (table == MAP_FAILED) {
1653         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
1654                 liobn);
1655         close(fd);
1656         return NULL;
1657     }
1658
1659     *pfd = fd;
1660     return table;
1661 }
1662
1663 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
1664 {
1665     long len;
1666
1667     if (fd < 0) {
1668         return -1;
1669     }
1670
1671     len = nb_table * sizeof(uint64_t);
1672     if ((munmap(table, len) < 0) ||
1673         (close(fd) < 0)) {
1674         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
1675                 strerror(errno));
1676         /* Leak the table */
1677     }
1678
1679     return 0;
1680 }
1681
1682 int kvmppc_reset_htab(int shift_hint)
1683 {
1684     uint32_t shift = shift_hint;
1685
1686     if (!kvm_enabled()) {
1687         /* Full emulation, tell caller to allocate htab itself */
1688         return 0;
1689     }
1690     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
1691         int ret;
1692         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
1693         if (ret == -ENOTTY) {
1694             /* At least some versions of PR KVM advertise the
1695              * capability, but don't implement the ioctl().  Oops.
1696              * Return 0 so that we allocate the htab in qemu, as is
1697              * correct for PR. */
1698             return 0;
1699         } else if (ret < 0) {
1700             return ret;
1701         }
1702         return shift;
1703     }
1704
1705     /* We have a kernel that predates the htab reset calls.  For PR
1706      * KVM, we need to allocate the htab ourselves, for an HV KVM of
1707      * this era, it has allocated a 16MB fixed size hash table
1708      * already.  Kernels of this era have the GET_PVINFO capability
1709      * only on PR, so we use this hack to determine the right
1710      * answer */
1711     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
1712         /* PR - tell caller to allocate htab */
1713         return 0;
1714     } else {
1715         /* HV - assume 16MB kernel allocated htab */
1716         return 24;
1717     }
1718 }
1719
1720 static inline uint32_t mfpvr(void)
1721 {
1722     uint32_t pvr;
1723
1724     asm ("mfpvr %0"
1725          : "=r"(pvr));
1726     return pvr;
1727 }
1728
1729 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
1730 {
1731     if (on) {
1732         *word |= flags;
1733     } else {
1734         *word &= ~flags;
1735     }
1736 }
1737
1738 static void kvmppc_host_cpu_initfn(Object *obj)
1739 {
1740     assert(kvm_enabled());
1741 }
1742
1743 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
1744 {
1745     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
1746     uint32_t vmx = kvmppc_get_vmx();
1747     uint32_t dfp = kvmppc_get_dfp();
1748     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
1749     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
1750
1751     /* Now fix up the class with information we can query from the host */
1752     pcc->pvr = mfpvr();
1753
1754     if (vmx != -1) {
1755         /* Only override when we know what the host supports */
1756         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
1757         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
1758     }
1759     if (dfp != -1) {
1760         /* Only override when we know what the host supports */
1761         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
1762     }
1763
1764     if (dcache_size != -1) {
1765         pcc->l1_dcache_size = dcache_size;
1766     }
1767
1768     if (icache_size != -1) {
1769         pcc->l1_icache_size = icache_size;
1770     }
1771 }
1772
1773 bool kvmppc_has_cap_epr(void)
1774 {
1775     return cap_epr;
1776 }
1777
1778 bool kvmppc_has_cap_htab_fd(void)
1779 {
1780     return cap_htab_fd;
1781 }
1782
1783 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
1784 {
1785     ObjectClass *oc = OBJECT_CLASS(pcc);
1786
1787     while (oc && !object_class_is_abstract(oc)) {
1788         oc = object_class_get_parent(oc);
1789     }
1790     assert(oc);
1791
1792     return POWERPC_CPU_CLASS(oc);
1793 }
1794
1795 static int kvm_ppc_register_host_cpu_type(void)
1796 {
1797     TypeInfo type_info = {
1798         .name = TYPE_HOST_POWERPC_CPU,
1799         .instance_init = kvmppc_host_cpu_initfn,
1800         .class_init = kvmppc_host_cpu_class_init,
1801     };
1802     uint32_t host_pvr = mfpvr();
1803     PowerPCCPUClass *pvr_pcc;
1804     DeviceClass *dc;
1805
1806     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
1807     if (pvr_pcc == NULL) {
1808         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
1809     }
1810     if (pvr_pcc == NULL) {
1811         return -1;
1812     }
1813     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1814     type_register(&type_info);
1815
1816     /* Register generic family CPU class for a family */
1817     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
1818     dc = DEVICE_CLASS(pvr_pcc);
1819     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
1820     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
1821     type_register(&type_info);
1822
1823     return 0;
1824 }
1825
1826 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
1827 {
1828     struct kvm_rtas_token_args args = {
1829         .token = token,
1830     };
1831
1832     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
1833         return -ENOENT;
1834     }
1835
1836     strncpy(args.name, function, sizeof(args.name));
1837
1838     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
1839 }
1840
1841 int kvmppc_get_htab_fd(bool write)
1842 {
1843     struct kvm_get_htab_fd s = {
1844         .flags = write ? KVM_GET_HTAB_WRITE : 0,
1845         .start_index = 0,
1846     };
1847
1848     if (!cap_htab_fd) {
1849         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
1850         return -1;
1851     }
1852
1853     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
1854 }
1855
1856 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
1857 {
1858     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
1859     uint8_t buf[bufsize];
1860     ssize_t rc;
1861
1862     do {
1863         rc = read(fd, buf, bufsize);
1864         if (rc < 0) {
1865             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
1866                     strerror(errno));
1867             return rc;
1868         } else if (rc) {
1869             /* Kernel already retuns data in BE format for the file */
1870             qemu_put_buffer(f, buf, rc);
1871         }
1872     } while ((rc != 0)
1873              && ((max_ns < 0)
1874                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
1875
1876     return (rc == 0) ? 1 : 0;
1877 }
1878
1879 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
1880                            uint16_t n_valid, uint16_t n_invalid)
1881 {
1882     struct kvm_get_htab_header *buf;
1883     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
1884     ssize_t rc;
1885
1886     buf = alloca(chunksize);
1887     /* This is KVM on ppc, so this is all big-endian */
1888     buf->index = index;
1889     buf->n_valid = n_valid;
1890     buf->n_invalid = n_invalid;
1891
1892     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
1893
1894     rc = write(fd, buf, chunksize);
1895     if (rc < 0) {
1896         fprintf(stderr, "Error writing KVM hash table: %s\n",
1897                 strerror(errno));
1898         return rc;
1899     }
1900     if (rc != chunksize) {
1901         /* We should never get a short write on a single chunk */
1902         fprintf(stderr, "Short write, restoring KVM hash table\n");
1903         return -1;
1904     }
1905     return 0;
1906 }
1907
1908 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
1909 {
1910     return true;
1911 }
1912
1913 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
1914 {
1915     return 1;
1916 }
1917
1918 int kvm_arch_on_sigbus(int code, void *addr)
1919 {
1920     return 1;
1921 }
1922
1923 void kvm_arch_init_irq_routing(KVMState *s)
1924 {
1925 }
1926
1927 int kvm_arch_insert_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1928 {
1929     return -EINVAL;
1930 }
1931
1932 int kvm_arch_remove_sw_breakpoint(CPUState *cpu, struct kvm_sw_breakpoint *bp)
1933 {
1934     return -EINVAL;
1935 }
1936
1937 int kvm_arch_insert_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1938 {
1939     return -EINVAL;
1940 }
1941
1942 int kvm_arch_remove_hw_breakpoint(target_ulong addr, target_ulong len, int type)
1943 {
1944     return -EINVAL;
1945 }
1946
1947 void kvm_arch_remove_all_hw_breakpoints(void)
1948 {
1949 }
1950
1951 void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg)
1952 {
1953 }
1954
1955 struct kvm_get_htab_buf {
1956     struct kvm_get_htab_header header;
1957     /*
1958      * We require one extra byte for read
1959      */
1960     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
1961 };
1962
1963 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
1964 {
1965     int htab_fd;
1966     struct kvm_get_htab_fd ghf;
1967     struct kvm_get_htab_buf  *hpte_buf;
1968
1969     ghf.flags = 0;
1970     ghf.start_index = pte_index;
1971     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
1972     if (htab_fd < 0) {
1973         goto error_out;
1974     }
1975
1976     hpte_buf = g_malloc0(sizeof(*hpte_buf));
1977     /*
1978      * Read the hpte group
1979      */
1980     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
1981         goto out_close;
1982     }
1983
1984     close(htab_fd);
1985     return (uint64_t)(uintptr_t) hpte_buf->hpte;
1986
1987 out_close:
1988     g_free(hpte_buf);
1989     close(htab_fd);
1990 error_out:
1991     return 0;
1992 }
1993
1994 void kvmppc_hash64_free_pteg(uint64_t token)
1995 {
1996     struct kvm_get_htab_buf *htab_buf;
1997
1998     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
1999                             hpte);
2000     g_free(htab_buf);
2001     return;
2002 }
2003
2004 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2005                              target_ulong pte0, target_ulong pte1)
2006 {
2007     int htab_fd;
2008     struct kvm_get_htab_fd ghf;
2009     struct kvm_get_htab_buf hpte_buf;
2010
2011     ghf.flags = 0;
2012     ghf.start_index = 0;     /* Ignored */
2013     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2014     if (htab_fd < 0) {
2015         goto error_out;
2016     }
2017
2018     hpte_buf.header.n_valid = 1;
2019     hpte_buf.header.n_invalid = 0;
2020     hpte_buf.header.index = pte_index;
2021     hpte_buf.hpte[0] = pte0;
2022     hpte_buf.hpte[1] = pte1;
2023     /*
2024      * Write the hpte entry.
2025      * CAUTION: write() has the warn_unused_result attribute. Hence we
2026      * need to check the return value, even though we do nothing.
2027      */
2028     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2029         goto out_close;
2030     }
2031
2032 out_close:
2033     close(htab_fd);
2034     return;
2035
2036 error_out:
2037     return;
2038 }