target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41 #include "exec/gdbstub.h"
  42 #include "exec/memattrs.h"
  43
  44 //#define DEBUG_KVM
  45
  46 #ifdef DEBUG_KVM
  47 #define DPRINTF(fmt, ...) \
  48     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  49 #else
  50 #define DPRINTF(fmt, ...) \
  51     do { } while (0)
  52 #endif
  53
  54 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  55
  56 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  57     KVM_CAP_LAST_INFO
  58 };
  59
  60 static int cap_interrupt_unset = false;
  61 static int cap_interrupt_level = false;
  62 static int cap_segstate;
  63 static int cap_booke_sregs;
  64 static int cap_ppc_smt;
  65 static int cap_ppc_rma;
  66 static int cap_spapr_tce;
  67 static int cap_spapr_multitce;
  68 static int cap_spapr_vfio;
  69 static int cap_hior;
  70 static int cap_one_reg;
  71 static int cap_epr;
  72 static int cap_ppc_watchdog;
  73 static int cap_papr;
  74 static int cap_htab_fd;
  75 static int cap_fixup_hcalls;
  76
  77 static uint32_t debug_inst_opcode;
  78
  79 /* XXX We have a race condition where we actually have a level triggered
  80  *     interrupt, but the infrastructure can't expose that yet, so the guest
  81  *     takes but ignores it, goes to sleep and never gets notified that there's
  82  *     still an interrupt pending.
  83  *
  84  *     As a quick workaround, let's just wake up again 20 ms after we injected
  85  *     an interrupt. That way we can assure that we're always reinjecting
  86  *     interrupts in case the guest swallowed them.
  87  */
  88 static QEMUTimer *idle_timer;
  89
  90 static void kvm_kick_cpu(void *opaque)
  91 {
  92     PowerPCCPU *cpu = opaque;
  93
  94     qemu_cpu_kick(CPU(cpu));
  95 }
  96
  97 static int kvm_ppc_register_host_cpu_type(void);
  98
  99 int kvm_arch_init(MachineState *ms, KVMState *s)
 100 {
 101     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 102     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 103     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 104     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 105     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 106     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 107     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 108     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 109     cap_spapr_vfio = false;
 110     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 111     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 112     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 113     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 114     /* Note: we don't set cap_papr here, because this capability is
 115      * only activated after this by kvmppc_set_papr() */
 116     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 117     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 118
 119     if (!cap_interrupt_level) {
 120         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 121                         "VM to stall at times!\n");
 122     }
 123
 124     kvm_ppc_register_host_cpu_type();
 125
 126     return 0;
 127 }
 128
 129 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 130 {
 131     CPUPPCState *cenv = &cpu->env;
 132     CPUState *cs = CPU(cpu);
 133     struct kvm_sregs sregs;
 134     int ret;
 135
 136     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 137         /* What we're really trying to say is "if we're on BookE, we use
 138            the native PVR for now". This is the only sane way to check
 139            it though, so we potentially confuse users that they can run
 140            BookE guests on BookS. Let's hope nobody dares enough :) */
 141         return 0;
 142     } else {
 143         if (!cap_segstate) {
 144             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 145             return -ENOSYS;
 146         }
 147     }
 148
 149     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 150     if (ret) {
 151         return ret;
 152     }
 153
 154     sregs.pvr = cenv->spr[SPR_PVR];
 155     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 156 }
 157
 158 /* Set up a shared TLB array with KVM */
 159 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 160 {
 161     CPUPPCState *env = &cpu->env;
 162     CPUState *cs = CPU(cpu);
 163     struct kvm_book3e_206_tlb_params params = {};
 164     struct kvm_config_tlb cfg = {};
 165     unsigned int entries = 0;
 166     int ret, i;
 167
 168     if (!kvm_enabled() ||
 169         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 170         return 0;
 171     }
 172
 173     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 174
 175     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 176         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 177         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 178         entries += params.tlb_sizes[i];
 179     }
 180
 181     assert(entries == env->nb_tlb);
 182     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 183
 184     env->tlb_dirty = true;
 185
 186     cfg.array = (uintptr_t)env->tlb.tlbm;
 187     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 188     cfg.params = (uintptr_t)&params;
 189     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 190
 191     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 192     if (ret < 0) {
 193         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 194                 __func__, strerror(-ret));
 195         return ret;
 196     }
 197
 198     env->kvm_sw_tlb = true;
 199     return 0;
 200 }
 201
 202
 203 #if defined(TARGET_PPC64)
 204 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 205                                        struct kvm_ppc_smmu_info *info)
 206 {
 207     CPUPPCState *env = &cpu->env;
 208     CPUState *cs = CPU(cpu);
 209
 210     memset(info, 0, sizeof(*info));
 211
 212     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 213      * need to "guess" what the supported page sizes are.
 214      *
 215      * For that to work we make a few assumptions:
 216      *
 217      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 218      *   KVM which only supports 4K and 16M pages, but supports them
 219      *   regardless of the backing store characteritics. We also don't
 220      *   support 1T segments.
 221      *
 222      *   This is safe as if HV KVM ever supports that capability or PR
 223      *   KVM grows supports for more page/segment sizes, those versions
 224      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 225      *   will not hit this fallback
 226      *
 227      * - Else we are running HV KVM. This means we only support page
 228      *   sizes that fit in the backing store. Additionally we only
 229      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 230      *   P7 encodings for the SLB and hash table. Here too, we assume
 231      *   support for any newer processor will mean a kernel that
 232      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 233      *   this fallback.
 234      */
 235     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 236         /* No flags */
 237         info->flags = 0;
 238         info->slb_size = 64;
 239
 240         /* Standard 4k base page size segment */
 241         info->sps[0].page_shift = 12;
 242         info->sps[0].slb_enc = 0;
 243         info->sps[0].enc[0].page_shift = 12;
 244         info->sps[0].enc[0].pte_enc = 0;
 245
 246         /* Standard 16M large page size segment */
 247         info->sps[1].page_shift = 24;
 248         info->sps[1].slb_enc = SLB_VSID_L;
 249         info->sps[1].enc[0].page_shift = 24;
 250         info->sps[1].enc[0].pte_enc = 0;
 251     } else {
 252         int i = 0;
 253
 254         /* HV KVM has backing store size restrictions */
 255         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 256
 257         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 258             info->flags |= KVM_PPC_1T_SEGMENTS;
 259         }
 260
 261         if (env->mmu_model == POWERPC_MMU_2_06) {
 262             info->slb_size = 32;
 263         } else {
 264             info->slb_size = 64;
 265         }
 266
 267         /* Standard 4k base page size segment */
 268         info->sps[i].page_shift = 12;
 269         info->sps[i].slb_enc = 0;
 270         info->sps[i].enc[0].page_shift = 12;
 271         info->sps[i].enc[0].pte_enc = 0;
 272         i++;
 273
 274         /* 64K on MMU 2.06 */
 275         if (env->mmu_model == POWERPC_MMU_2_06) {
 276             info->sps[i].page_shift = 16;
 277             info->sps[i].slb_enc = 0x110;
 278             info->sps[i].enc[0].page_shift = 16;
 279             info->sps[i].enc[0].pte_enc = 1;
 280             i++;
 281         }
 282
 283         /* Standard 16M large page size segment */
 284         info->sps[i].page_shift = 24;
 285         info->sps[i].slb_enc = SLB_VSID_L;
 286         info->sps[i].enc[0].page_shift = 24;
 287         info->sps[i].enc[0].pte_enc = 0;
 288     }
 289 }
 290
 291 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 292 {
 293     CPUState *cs = CPU(cpu);
 294     int ret;
 295
 296     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 297         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 298         if (ret == 0) {
 299             return;
 300         }
 301     }
 302
 303     kvm_get_fallback_smmu_info(cpu, info);
 304 }
 305
 306 static long getrampagesize(void)
 307 {
 308     struct statfs fs;
 309     int ret;
 310
 311     if (!mem_path) {
 312         /* guest RAM is backed by normal anonymous pages */
 313         return getpagesize();
 314     }
 315
 316     do {
 317         ret = statfs(mem_path, &fs);
 318     } while (ret != 0 && errno == EINTR);
 319
 320     if (ret != 0) {
 321         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 322                 strerror(errno));
 323         exit(1);
 324     }
 325
 326 #define HUGETLBFS_MAGIC       0x958458f6
 327
 328     if (fs.f_type != HUGETLBFS_MAGIC) {
 329         /* Explicit mempath, but it's ordinary pages */
 330         return getpagesize();
 331     }
 332
 333     /* It's hugepage, return the huge page size */
 334     return fs.f_bsize;
 335 }
 336
 337 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 338 {
 339     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 340         return true;
 341     }
 342
 343     return (1ul << shift) <= rampgsize;
 344 }
 345
 346 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 347 {
 348     static struct kvm_ppc_smmu_info smmu_info;
 349     static bool has_smmu_info;
 350     CPUPPCState *env = &cpu->env;
 351     long rampagesize;
 352     int iq, ik, jq, jk;
 353
 354     /* We only handle page sizes for 64-bit server guests for now */
 355     if (!(env->mmu_model & POWERPC_MMU_64)) {
 356         return;
 357     }
 358
 359     /* Collect MMU info from kernel if not already */
 360     if (!has_smmu_info) {
 361         kvm_get_smmu_info(cpu, &smmu_info);
 362         has_smmu_info = true;
 363     }
 364
 365     rampagesize = getrampagesize();
 366
 367     /* Convert to QEMU form */
 368     memset(&env->sps, 0, sizeof(env->sps));
 369
 370     /*
 371      * XXX This loop should be an entry wide AND of the capabilities that
 372      *     the selected CPU has with the capabilities that KVM supports.
 373      */
 374     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 375         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 376         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 377
 378         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 379                                  ksps->page_shift)) {
 380             continue;
 381         }
 382         qsps->page_shift = ksps->page_shift;
 383         qsps->slb_enc = ksps->slb_enc;
 384         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 385             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 386                                      ksps->enc[jk].page_shift)) {
 387                 continue;
 388             }
 389             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 390             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 391             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 392                 break;
 393             }
 394         }
 395         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 396             break;
 397         }
 398     }
 399     env->slb_nr = smmu_info.slb_size;
 400     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 401         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 402     }
 403 }
 404 #else /* defined (TARGET_PPC64) */
 405
 406 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 407 {
 408 }
 409
 410 #endif /* !defined (TARGET_PPC64) */
 411
 412 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 413 {
 414     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 415 }
 416
 417 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 418  * book3s supports only 1 watchpoint, so array size
 419  * of 4 is sufficient for now.
 420  */
 421 #define MAX_HW_BKPTS 4
 422
 423 static struct HWBreakpoint {
 424     target_ulong addr;
 425     int type;
 426 } hw_debug_points[MAX_HW_BKPTS];
 427
 428 static CPUWatchpoint hw_watchpoint;
 429
 430 /* Default there is no breakpoint and watchpoint supported */
 431 static int max_hw_breakpoint;
 432 static int max_hw_watchpoint;
 433 static int nb_hw_breakpoint;
 434 static int nb_hw_watchpoint;
 435
 436 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 437 {
 438     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 439         max_hw_breakpoint = 2;
 440         max_hw_watchpoint = 2;
 441     }
 442
 443     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 444         fprintf(stderr, "Error initializing h/w breakpoints\n");
 445         return;
 446     }
 447 }
 448
 449 int kvm_arch_init_vcpu(CPUState *cs)
 450 {
 451     PowerPCCPU *cpu = POWERPC_CPU(cs);
 452     CPUPPCState *cenv = &cpu->env;
 453     int ret;
 454
 455     /* Gather server mmu info from KVM and update the CPU state */
 456     kvm_fixup_page_sizes(cpu);
 457
 458     /* Synchronize sregs with kvm */
 459     ret = kvm_arch_sync_sregs(cpu);
 460     if (ret) {
 461         return ret;
 462     }
 463
 464     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 465
 466     /* Some targets support access to KVM's guest TLB. */
 467     switch (cenv->mmu_model) {
 468     case POWERPC_MMU_BOOKE206:
 469         ret = kvm_booke206_tlb_init(cpu);
 470         break;
 471     default:
 472         break;
 473     }
 474
 475     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 476     kvmppc_hw_debug_points_init(cenv);
 477
 478     return ret;
 479 }
 480
 481 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 482 {
 483     CPUPPCState *env = &cpu->env;
 484     CPUState *cs = CPU(cpu);
 485     struct kvm_dirty_tlb dirty_tlb;
 486     unsigned char *bitmap;
 487     int ret;
 488
 489     if (!env->kvm_sw_tlb) {
 490         return;
 491     }
 492
 493     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 494     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 495
 496     dirty_tlb.bitmap = (uintptr_t)bitmap;
 497     dirty_tlb.num_dirty = env->nb_tlb;
 498
 499     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 500     if (ret) {
 501         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 502                 __func__, strerror(-ret));
 503     }
 504
 505     g_free(bitmap);
 506 }
 507
 508 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 509 {
 510     PowerPCCPU *cpu = POWERPC_CPU(cs);
 511     CPUPPCState *env = &cpu->env;
 512     union {
 513         uint32_t u32;
 514         uint64_t u64;
 515     } val;
 516     struct kvm_one_reg reg = {
 517         .id = id,
 518         .addr = (uintptr_t) &val,
 519     };
 520     int ret;
 521
 522     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 523     if (ret != 0) {
 524         trace_kvm_failed_spr_get(spr, strerror(errno));
 525     } else {
 526         switch (id & KVM_REG_SIZE_MASK) {
 527         case KVM_REG_SIZE_U32:
 528             env->spr[spr] = val.u32;
 529             break;
 530
 531         case KVM_REG_SIZE_U64:
 532             env->spr[spr] = val.u64;
 533             break;
 534
 535         default:
 536             /* Don't handle this size yet */
 537             abort();
 538         }
 539     }
 540 }
 541
 542 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 543 {
 544     PowerPCCPU *cpu = POWERPC_CPU(cs);
 545     CPUPPCState *env = &cpu->env;
 546     union {
 547         uint32_t u32;
 548         uint64_t u64;
 549     } val;
 550     struct kvm_one_reg reg = {
 551         .id = id,
 552         .addr = (uintptr_t) &val,
 553     };
 554     int ret;
 555
 556     switch (id & KVM_REG_SIZE_MASK) {
 557     case KVM_REG_SIZE_U32:
 558         val.u32 = env->spr[spr];
 559         break;
 560
 561     case KVM_REG_SIZE_U64:
 562         val.u64 = env->spr[spr];
 563         break;
 564
 565     default:
 566         /* Don't handle this size yet */
 567         abort();
 568     }
 569
 570     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 571     if (ret != 0) {
 572         trace_kvm_failed_spr_set(spr, strerror(errno));
 573     }
 574 }
 575
 576 static int kvm_put_fp(CPUState *cs)
 577 {
 578     PowerPCCPU *cpu = POWERPC_CPU(cs);
 579     CPUPPCState *env = &cpu->env;
 580     struct kvm_one_reg reg;
 581     int i;
 582     int ret;
 583
 584     if (env->insns_flags & PPC_FLOAT) {
 585         uint64_t fpscr = env->fpscr;
 586         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 587
 588         reg.id = KVM_REG_PPC_FPSCR;
 589         reg.addr = (uintptr_t)&fpscr;
 590         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 591         if (ret < 0) {
 592             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 593             return ret;
 594         }
 595
 596         for (i = 0; i < 32; i++) {
 597             uint64_t vsr[2];
 598
 599             vsr[0] = float64_val(env->fpr[i]);
 600             vsr[1] = env->vsr[i];
 601             reg.addr = (uintptr_t) &vsr;
 602             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 603
 604             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 605             if (ret < 0) {
 606                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 607                         i, strerror(errno));
 608                 return ret;
 609             }
 610         }
 611     }
 612
 613     if (env->insns_flags & PPC_ALTIVEC) {
 614         reg.id = KVM_REG_PPC_VSCR;
 615         reg.addr = (uintptr_t)&env->vscr;
 616         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 617         if (ret < 0) {
 618             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 619             return ret;
 620         }
 621
 622         for (i = 0; i < 32; i++) {
 623             reg.id = KVM_REG_PPC_VR(i);
 624             reg.addr = (uintptr_t)&env->avr[i];
 625             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 626             if (ret < 0) {
 627                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 628                 return ret;
 629             }
 630         }
 631     }
 632
 633     return 0;
 634 }
 635
 636 static int kvm_get_fp(CPUState *cs)
 637 {
 638     PowerPCCPU *cpu = POWERPC_CPU(cs);
 639     CPUPPCState *env = &cpu->env;
 640     struct kvm_one_reg reg;
 641     int i;
 642     int ret;
 643
 644     if (env->insns_flags & PPC_FLOAT) {
 645         uint64_t fpscr;
 646         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 647
 648         reg.id = KVM_REG_PPC_FPSCR;
 649         reg.addr = (uintptr_t)&fpscr;
 650         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 651         if (ret < 0) {
 652             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 653             return ret;
 654         } else {
 655             env->fpscr = fpscr;
 656         }
 657
 658         for (i = 0; i < 32; i++) {
 659             uint64_t vsr[2];
 660
 661             reg.addr = (uintptr_t) &vsr;
 662             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 663
 664             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 665             if (ret < 0) {
 666                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 667                         vsx ? "VSR" : "FPR", i, strerror(errno));
 668                 return ret;
 669             } else {
 670                 env->fpr[i] = vsr[0];
 671                 if (vsx) {
 672                     env->vsr[i] = vsr[1];
 673                 }
 674             }
 675         }
 676     }
 677
 678     if (env->insns_flags & PPC_ALTIVEC) {
 679         reg.id = KVM_REG_PPC_VSCR;
 680         reg.addr = (uintptr_t)&env->vscr;
 681         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 682         if (ret < 0) {
 683             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 684             return ret;
 685         }
 686
 687         for (i = 0; i < 32; i++) {
 688             reg.id = KVM_REG_PPC_VR(i);
 689             reg.addr = (uintptr_t)&env->avr[i];
 690             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 691             if (ret < 0) {
 692                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 693                         i, strerror(errno));
 694                 return ret;
 695             }
 696         }
 697     }
 698
 699     return 0;
 700 }
 701
 702 #if defined(TARGET_PPC64)
 703 static int kvm_get_vpa(CPUState *cs)
 704 {
 705     PowerPCCPU *cpu = POWERPC_CPU(cs);
 706     CPUPPCState *env = &cpu->env;
 707     struct kvm_one_reg reg;
 708     int ret;
 709
 710     reg.id = KVM_REG_PPC_VPA_ADDR;
 711     reg.addr = (uintptr_t)&env->vpa_addr;
 712     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 713     if (ret < 0) {
 714         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 715         return ret;
 716     }
 717
 718     assert((uintptr_t)&env->slb_shadow_size
 719            == ((uintptr_t)&env->slb_shadow_addr + 8));
 720     reg.id = KVM_REG_PPC_VPA_SLB;
 721     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 722     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 723     if (ret < 0) {
 724         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 725                 strerror(errno));
 726         return ret;
 727     }
 728
 729     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 730     reg.id = KVM_REG_PPC_VPA_DTL;
 731     reg.addr = (uintptr_t)&env->dtl_addr;
 732     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 733     if (ret < 0) {
 734         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 735                 strerror(errno));
 736         return ret;
 737     }
 738
 739     return 0;
 740 }
 741
 742 static int kvm_put_vpa(CPUState *cs)
 743 {
 744     PowerPCCPU *cpu = POWERPC_CPU(cs);
 745     CPUPPCState *env = &cpu->env;
 746     struct kvm_one_reg reg;
 747     int ret;
 748
 749     /* SLB shadow or DTL can't be registered unless a master VPA is
 750      * registered.  That means when restoring state, if a VPA *is*
 751      * registered, we need to set that up first.  If not, we need to
 752      * deregister the others before deregistering the master VPA */
 753     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 754
 755     if (env->vpa_addr) {
 756         reg.id = KVM_REG_PPC_VPA_ADDR;
 757         reg.addr = (uintptr_t)&env->vpa_addr;
 758         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 759         if (ret < 0) {
 760             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 761             return ret;
 762         }
 763     }
 764
 765     assert((uintptr_t)&env->slb_shadow_size
 766            == ((uintptr_t)&env->slb_shadow_addr + 8));
 767     reg.id = KVM_REG_PPC_VPA_SLB;
 768     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 769     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 770     if (ret < 0) {
 771         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 772         return ret;
 773     }
 774
 775     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 776     reg.id = KVM_REG_PPC_VPA_DTL;
 777     reg.addr = (uintptr_t)&env->dtl_addr;
 778     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 779     if (ret < 0) {
 780         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 781                 strerror(errno));
 782         return ret;
 783     }
 784
 785     if (!env->vpa_addr) {
 786         reg.id = KVM_REG_PPC_VPA_ADDR;
 787         reg.addr = (uintptr_t)&env->vpa_addr;
 788         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 789         if (ret < 0) {
 790             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 791             return ret;
 792         }
 793     }
 794
 795     return 0;
 796 }
 797 #endif /* TARGET_PPC64 */
 798
 799 int kvm_arch_put_registers(CPUState *cs, int level)
 800 {
 801     PowerPCCPU *cpu = POWERPC_CPU(cs);
 802     CPUPPCState *env = &cpu->env;
 803     struct kvm_regs regs;
 804     int ret;
 805     int i;
 806
 807     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 808     if (ret < 0) {
 809         return ret;
 810     }
 811
 812     regs.ctr = env->ctr;
 813     regs.lr  = env->lr;
 814     regs.xer = cpu_read_xer(env);
 815     regs.msr = env->msr;
 816     regs.pc = env->nip;
 817
 818     regs.srr0 = env->spr[SPR_SRR0];
 819     regs.srr1 = env->spr[SPR_SRR1];
 820
 821     regs.sprg0 = env->spr[SPR_SPRG0];
 822     regs.sprg1 = env->spr[SPR_SPRG1];
 823     regs.sprg2 = env->spr[SPR_SPRG2];
 824     regs.sprg3 = env->spr[SPR_SPRG3];
 825     regs.sprg4 = env->spr[SPR_SPRG4];
 826     regs.sprg5 = env->spr[SPR_SPRG5];
 827     regs.sprg6 = env->spr[SPR_SPRG6];
 828     regs.sprg7 = env->spr[SPR_SPRG7];
 829
 830     regs.pid = env->spr[SPR_BOOKE_PID];
 831
 832     for (i = 0;i < 32; i++)
 833         regs.gpr[i] = env->gpr[i];
 834
 835     regs.cr = 0;
 836     for (i = 0; i < 8; i++) {
 837         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 838     }
 839
 840     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 841     if (ret < 0)
 842         return ret;
 843
 844     kvm_put_fp(cs);
 845
 846     if (env->tlb_dirty) {
 847         kvm_sw_tlb_put(cpu);
 848         env->tlb_dirty = false;
 849     }
 850
 851     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 852         struct kvm_sregs sregs;
 853
 854         sregs.pvr = env->spr[SPR_PVR];
 855
 856         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 857
 858         /* Sync SLB */
 859 #ifdef TARGET_PPC64
 860         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 861             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 862             if (env->slb[i].esid & SLB_ESID_V) {
 863                 sregs.u.s.ppc64.slb[i].slbe |= i;
 864             }
 865             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 866         }
 867 #endif
 868
 869         /* Sync SRs */
 870         for (i = 0; i < 16; i++) {
 871             sregs.u.s.ppc32.sr[i] = env->sr[i];
 872         }
 873
 874         /* Sync BATs */
 875         for (i = 0; i < 8; i++) {
 876             /* Beware. We have to swap upper and lower bits here */
 877             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 878                 | env->DBAT[1][i];
 879             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 880                 | env->IBAT[1][i];
 881         }
 882
 883         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 884         if (ret) {
 885             return ret;
 886         }
 887     }
 888
 889     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 890         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 891     }
 892
 893     if (cap_one_reg) {
 894         int i;
 895
 896         /* We deliberately ignore errors here, for kernels which have
 897          * the ONE_REG calls, but don't support the specific
 898          * registers, there's a reasonable chance things will still
 899          * work, at least until we try to migrate. */
 900         for (i = 0; i < 1024; i++) {
 901             uint64_t id = env->spr_cb[i].one_reg_id;
 902
 903             if (id != 0) {
 904                 kvm_put_one_spr(cs, id, i);
 905             }
 906         }
 907
 908 #ifdef TARGET_PPC64
 909         if (msr_ts) {
 910             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 911                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 912             }
 913             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 914                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 915             }
 916             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 917             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 918             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 919             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 920             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 921             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 922             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 923             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 924             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 925             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 926         }
 927
 928         if (cap_papr) {
 929             if (kvm_put_vpa(cs) < 0) {
 930                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 931             }
 932         }
 933
 934         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 935 #endif /* TARGET_PPC64 */
 936     }
 937
 938     return ret;
 939 }
 940
 941 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
 942 {
 943      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
 944 }
 945
 946 int kvm_arch_get_registers(CPUState *cs)
 947 {
 948     PowerPCCPU *cpu = POWERPC_CPU(cs);
 949     CPUPPCState *env = &cpu->env;
 950     struct kvm_regs regs;
 951     struct kvm_sregs sregs;
 952     uint32_t cr;
 953     int i, ret;
 954
 955     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 956     if (ret < 0)
 957         return ret;
 958
 959     cr = regs.cr;
 960     for (i = 7; i >= 0; i--) {
 961         env->crf[i] = cr & 15;
 962         cr >>= 4;
 963     }
 964
 965     env->ctr = regs.ctr;
 966     env->lr = regs.lr;
 967     cpu_write_xer(env, regs.xer);
 968     env->msr = regs.msr;
 969     env->nip = regs.pc;
 970
 971     env->spr[SPR_SRR0] = regs.srr0;
 972     env->spr[SPR_SRR1] = regs.srr1;
 973
 974     env->spr[SPR_SPRG0] = regs.sprg0;
 975     env->spr[SPR_SPRG1] = regs.sprg1;
 976     env->spr[SPR_SPRG2] = regs.sprg2;
 977     env->spr[SPR_SPRG3] = regs.sprg3;
 978     env->spr[SPR_SPRG4] = regs.sprg4;
 979     env->spr[SPR_SPRG5] = regs.sprg5;
 980     env->spr[SPR_SPRG6] = regs.sprg6;
 981     env->spr[SPR_SPRG7] = regs.sprg7;
 982
 983     env->spr[SPR_BOOKE_PID] = regs.pid;
 984
 985     for (i = 0;i < 32; i++)
 986         env->gpr[i] = regs.gpr[i];
 987
 988     kvm_get_fp(cs);
 989
 990     if (cap_booke_sregs) {
 991         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 992         if (ret < 0) {
 993             return ret;
 994         }
 995
 996         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 997             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 998             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 999             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1000             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1001             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1002             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1003             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1004             env->spr[SPR_DECR] = sregs.u.e.dec;
1005             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1006             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1007             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1008         }
1009
1010         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1011             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1012             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1013             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1014             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1015             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1016         }
1017
1018         if (sregs.u.e.features & KVM_SREGS_E_64) {
1019             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1020         }
1021
1022         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1023             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1024         }
1025
1026         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1027             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1028             kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1029             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1030             kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1031             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1032             kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1033             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1034             kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1035             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1036             kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1037             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1038             kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1039             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1040             kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1041             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1042             kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1043             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1044             kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1045             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1046             kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1047             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1048             kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1049             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1050             kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1051             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1052             kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1053             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1054             kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1055             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1056             kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1057             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1058             kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1059
1060             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1061                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1062                 kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1063                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1064                 kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1065                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1066                 kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1067             }
1068
1069             if (sregs.u.e.features & KVM_SREGS_E_PM) {
1070                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1071                 kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1072             }
1073
1074             if (sregs.u.e.features & KVM_SREGS_E_PC) {
1075                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1076                 kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1077                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1078                 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1079             }
1080         }
1081
1082         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1083             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1084             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1085             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1086             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1087             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1088             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1089             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1090             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1091             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1092             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1093         }
1094
1095         if (sregs.u.e.features & KVM_SREGS_EXP) {
1096             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1097         }
1098
1099         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1100             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1101             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1102         }
1103
1104         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1105             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1106             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1107             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1108
1109             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1110                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1111                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1112             }
1113         }
1114     }
1115
1116     if (cap_segstate) {
1117         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1118         if (ret < 0) {
1119             return ret;
1120         }
1121
1122         if (!env->external_htab) {
1123             ppc_store_sdr1(env, sregs.u.s.sdr1);
1124         }
1125
1126         /* Sync SLB */
1127 #ifdef TARGET_PPC64
1128         /*
1129          * The packed SLB array we get from KVM_GET_SREGS only contains
1130          * information about valid entries. So we flush our internal
1131          * copy to get rid of stale ones, then put all valid SLB entries
1132          * back in.
1133          */
1134         memset(env->slb, 0, sizeof(env->slb));
1135         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1136             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1137             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1138             /*
1139              * Only restore valid entries
1140              */
1141             if (rb & SLB_ESID_V) {
1142                 ppc_store_slb(env, rb, rs);
1143             }
1144         }
1145 #endif
1146
1147         /* Sync SRs */
1148         for (i = 0; i < 16; i++) {
1149             env->sr[i] = sregs.u.s.ppc32.sr[i];
1150         }
1151
1152         /* Sync BATs */
1153         for (i = 0; i < 8; i++) {
1154             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1155             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1156             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1157             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1158         }
1159     }
1160
1161     if (cap_hior) {
1162         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1163     }
1164
1165     if (cap_one_reg) {
1166         int i;
1167
1168         /* We deliberately ignore errors here, for kernels which have
1169          * the ONE_REG calls, but don't support the specific
1170          * registers, there's a reasonable chance things will still
1171          * work, at least until we try to migrate. */
1172         for (i = 0; i < 1024; i++) {
1173             uint64_t id = env->spr_cb[i].one_reg_id;
1174
1175             if (id != 0) {
1176                 kvm_get_one_spr(cs, id, i);
1177             }
1178         }
1179
1180 #ifdef TARGET_PPC64
1181         if (msr_ts) {
1182             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1183                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1184             }
1185             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1186                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1187             }
1188             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1189             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1190             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1191             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1192             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1193             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1194             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1195             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1196             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1197             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1198         }
1199
1200         if (cap_papr) {
1201             if (kvm_get_vpa(cs) < 0) {
1202                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1203             }
1204         }
1205
1206         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1207 #endif
1208     }
1209
1210     return 0;
1211 }
1212
1213 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1214 {
1215     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1216
1217     if (irq != PPC_INTERRUPT_EXT) {
1218         return 0;
1219     }
1220
1221     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1222         return 0;
1223     }
1224
1225     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1226
1227     return 0;
1228 }
1229
1230 #if defined(TARGET_PPCEMB)
1231 #define PPC_INPUT_INT PPC40x_INPUT_INT
1232 #elif defined(TARGET_PPC64)
1233 #define PPC_INPUT_INT PPC970_INPUT_INT
1234 #else
1235 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1236 #endif
1237
1238 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1239 {
1240     PowerPCCPU *cpu = POWERPC_CPU(cs);
1241     CPUPPCState *env = &cpu->env;
1242     int r;
1243     unsigned irq;
1244
1245     qemu_mutex_lock_iothread();
1246
1247     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1248      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1249     if (!cap_interrupt_level &&
1250         run->ready_for_interrupt_injection &&
1251         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1252         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1253     {
1254         /* For now KVM disregards the 'irq' argument. However, in the
1255          * future KVM could cache it in-kernel to avoid a heavyweight exit
1256          * when reading the UIC.
1257          */
1258         irq = KVM_INTERRUPT_SET;
1259
1260         DPRINTF("injected interrupt %d\n", irq);
1261         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1262         if (r < 0) {
1263             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1264         }
1265
1266         /* Always wake up soon in case the interrupt was level based */
1267         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1268                        (get_ticks_per_sec() / 50));
1269     }
1270
1271     /* We don't know if there are more interrupts pending after this. However,
1272      * the guest will return to userspace in the course of handling this one
1273      * anyways, so we will get a chance to deliver the rest. */
1274
1275     qemu_mutex_unlock_iothread();
1276 }
1277
1278 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1279 {
1280     return MEMTXATTRS_UNSPECIFIED;
1281 }
1282
1283 int kvm_arch_process_async_events(CPUState *cs)
1284 {
1285     return cs->halted;
1286 }
1287
1288 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1289 {
1290     CPUState *cs = CPU(cpu);
1291     CPUPPCState *env = &cpu->env;
1292
1293     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1294         cs->halted = 1;
1295         cs->exception_index = EXCP_HLT;
1296     }
1297
1298     return 0;
1299 }
1300
1301 /* map dcr access to existing qemu dcr emulation */
1302 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1303 {
1304     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1305         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1306
1307     return 0;
1308 }
1309
1310 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1311 {
1312     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1313         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1314
1315     return 0;
1316 }
1317
1318 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1319 {
1320     /* Mixed endian case is not handled */
1321     uint32_t sc = debug_inst_opcode;
1322
1323     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1324                             sizeof(sc), 0) ||
1325         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1326         return -EINVAL;
1327     }
1328
1329     return 0;
1330 }
1331
1332 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1333 {
1334     uint32_t sc;
1335
1336     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1337         sc != debug_inst_opcode ||
1338         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1339                             sizeof(sc), 1)) {
1340         return -EINVAL;
1341     }
1342
1343     return 0;
1344 }
1345
1346 static int find_hw_breakpoint(target_ulong addr, int type)
1347 {
1348     int n;
1349
1350     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1351            <= ARRAY_SIZE(hw_debug_points));
1352
1353     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1354         if (hw_debug_points[n].addr == addr &&
1355              hw_debug_points[n].type == type) {
1356             return n;
1357         }
1358     }
1359
1360     return -1;
1361 }
1362
1363 static int find_hw_watchpoint(target_ulong addr, int *flag)
1364 {
1365     int n;
1366
1367     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1368     if (n >= 0) {
1369         *flag = BP_MEM_ACCESS;
1370         return n;
1371     }
1372
1373     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1374     if (n >= 0) {
1375         *flag = BP_MEM_WRITE;
1376         return n;
1377     }
1378
1379     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1380     if (n >= 0) {
1381         *flag = BP_MEM_READ;
1382         return n;
1383     }
1384
1385     return -1;
1386 }
1387
1388 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1389                                   target_ulong len, int type)
1390 {
1391     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1392         return -ENOBUFS;
1393     }
1394
1395     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1396     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1397
1398     switch (type) {
1399     case GDB_BREAKPOINT_HW:
1400         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1401             return -ENOBUFS;
1402         }
1403
1404         if (find_hw_breakpoint(addr, type) >= 0) {
1405             return -EEXIST;
1406         }
1407
1408         nb_hw_breakpoint++;
1409         break;
1410
1411     case GDB_WATCHPOINT_WRITE:
1412     case GDB_WATCHPOINT_READ:
1413     case GDB_WATCHPOINT_ACCESS:
1414         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1415             return -ENOBUFS;
1416         }
1417
1418         if (find_hw_breakpoint(addr, type) >= 0) {
1419             return -EEXIST;
1420         }
1421
1422         nb_hw_watchpoint++;
1423         break;
1424
1425     default:
1426         return -ENOSYS;
1427     }
1428
1429     return 0;
1430 }
1431
1432 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1433                                   target_ulong len, int type)
1434 {
1435     int n;
1436
1437     n = find_hw_breakpoint(addr, type);
1438     if (n < 0) {
1439         return -ENOENT;
1440     }
1441
1442     switch (type) {
1443     case GDB_BREAKPOINT_HW:
1444         nb_hw_breakpoint--;
1445         break;
1446
1447     case GDB_WATCHPOINT_WRITE:
1448     case GDB_WATCHPOINT_READ:
1449     case GDB_WATCHPOINT_ACCESS:
1450         nb_hw_watchpoint--;
1451         break;
1452
1453     default:
1454         return -ENOSYS;
1455     }
1456     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1457
1458     return 0;
1459 }
1460
1461 void kvm_arch_remove_all_hw_breakpoints(void)
1462 {
1463     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1464 }
1465
1466 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1467 {
1468     int n;
1469
1470     /* Software Breakpoint updates */
1471     if (kvm_sw_breakpoints_active(cs)) {
1472         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1473     }
1474
1475     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1476            <= ARRAY_SIZE(hw_debug_points));
1477     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1478
1479     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1480         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1481         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1482         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1483             switch (hw_debug_points[n].type) {
1484             case GDB_BREAKPOINT_HW:
1485                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1486                 break;
1487             case GDB_WATCHPOINT_WRITE:
1488                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1489                 break;
1490             case GDB_WATCHPOINT_READ:
1491                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1492                 break;
1493             case GDB_WATCHPOINT_ACCESS:
1494                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1495                                         KVMPPC_DEBUG_WATCH_READ;
1496                 break;
1497             default:
1498                 cpu_abort(cs, "Unsupported breakpoint type\n");
1499             }
1500             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1501         }
1502     }
1503 }
1504
1505 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1506 {
1507     CPUState *cs = CPU(cpu);
1508     CPUPPCState *env = &cpu->env;
1509     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1510     int handle = 0;
1511     int n;
1512     int flag = 0;
1513
1514     if (cs->singlestep_enabled) {
1515         handle = 1;
1516     } else if (arch_info->status) {
1517         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1518             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1519                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1520                 if (n >= 0) {
1521                     handle = 1;
1522                 }
1523             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1524                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1525                 n = find_hw_watchpoint(arch_info->address,  &flag);
1526                 if (n >= 0) {
1527                     handle = 1;
1528                     cs->watchpoint_hit = &hw_watchpoint;
1529                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1530                     hw_watchpoint.flags = flag;
1531                 }
1532             }
1533         }
1534     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1535         handle = 1;
1536     } else {
1537         /* QEMU is not able to handle debug exception, so inject
1538          * program exception to guest;
1539          * Yes program exception NOT debug exception !!
1540          * When QEMU is using debug resources then debug exception must
1541          * be always set. To achieve this we set MSR_DE and also set
1542          * MSRP_DEP so guest cannot change MSR_DE.
1543          * When emulating debug resource for guest we want guest
1544          * to control MSR_DE (enable/disable debug interrupt on need).
1545          * Supporting both configurations are NOT possible.
1546          * So the result is that we cannot share debug resources
1547          * between QEMU and Guest on BOOKE architecture.
1548          * In the current design QEMU gets the priority over guest,
1549          * this means that if QEMU is using debug resources then guest
1550          * cannot use them;
1551          * For software breakpoint QEMU uses a privileged instruction;
1552          * So there cannot be any reason that we are here for guest
1553          * set debug exception, only possibility is guest executed a
1554          * privileged / illegal instruction and that's why we are
1555          * injecting a program interrupt.
1556          */
1557
1558         cpu_synchronize_state(cs);
1559         /* env->nip is PC, so increment this by 4 to use
1560          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1561          */
1562         env->nip += 4;
1563         cs->exception_index = POWERPC_EXCP_PROGRAM;
1564         env->error_code = POWERPC_EXCP_INVAL;
1565         ppc_cpu_do_interrupt(cs);
1566     }
1567
1568     return handle;
1569 }
1570
1571 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1572 {
1573     PowerPCCPU *cpu = POWERPC_CPU(cs);
1574     CPUPPCState *env = &cpu->env;
1575     int ret;
1576
1577     qemu_mutex_lock_iothread();
1578
1579     switch (run->exit_reason) {
1580     case KVM_EXIT_DCR:
1581         if (run->dcr.is_write) {
1582             DPRINTF("handle dcr write\n");
1583             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1584         } else {
1585             DPRINTF("handle dcr read\n");
1586             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1587         }
1588         break;
1589     case KVM_EXIT_HLT:
1590         DPRINTF("handle halt\n");
1591         ret = kvmppc_handle_halt(cpu);
1592         break;
1593 #if defined(TARGET_PPC64)
1594     case KVM_EXIT_PAPR_HCALL:
1595         DPRINTF("handle PAPR hypercall\n");
1596         run->papr_hcall.ret = spapr_hypercall(cpu,
1597                                               run->papr_hcall.nr,
1598                                               run->papr_hcall.args);
1599         ret = 0;
1600         break;
1601 #endif
1602     case KVM_EXIT_EPR:
1603         DPRINTF("handle epr\n");
1604         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1605         ret = 0;
1606         break;
1607     case KVM_EXIT_WATCHDOG:
1608         DPRINTF("handle watchdog expiry\n");
1609         watchdog_perform_action();
1610         ret = 0;
1611         break;
1612
1613     case KVM_EXIT_DEBUG:
1614         DPRINTF("handle debug exception\n");
1615         if (kvm_handle_debug(cpu, run)) {
1616             ret = EXCP_DEBUG;
1617             break;
1618         }
1619         /* re-enter, this exception was guest-internal */
1620         ret = 0;
1621         break;
1622
1623     default:
1624         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1625         ret = -1;
1626         break;
1627     }
1628
1629     qemu_mutex_unlock_iothread();
1630     return ret;
1631 }
1632
1633 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1634 {
1635     CPUState *cs = CPU(cpu);
1636     uint32_t bits = tsr_bits;
1637     struct kvm_one_reg reg = {
1638         .id = KVM_REG_PPC_OR_TSR,
1639         .addr = (uintptr_t) &bits,
1640     };
1641
1642     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1643 }
1644
1645 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1646 {
1647
1648     CPUState *cs = CPU(cpu);
1649     uint32_t bits = tsr_bits;
1650     struct kvm_one_reg reg = {
1651         .id = KVM_REG_PPC_CLEAR_TSR,
1652         .addr = (uintptr_t) &bits,
1653     };
1654
1655     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1656 }
1657
1658 int kvmppc_set_tcr(PowerPCCPU *cpu)
1659 {
1660     CPUState *cs = CPU(cpu);
1661     CPUPPCState *env = &cpu->env;
1662     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1663
1664     struct kvm_one_reg reg = {
1665         .id = KVM_REG_PPC_TCR,
1666         .addr = (uintptr_t) &tcr,
1667     };
1668
1669     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1670 }
1671
1672 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1673 {
1674     CPUState *cs = CPU(cpu);
1675     int ret;
1676
1677     if (!kvm_enabled()) {
1678         return -1;
1679     }
1680
1681     if (!cap_ppc_watchdog) {
1682         printf("warning: KVM does not support watchdog");
1683         return -1;
1684     }
1685
1686     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1687     if (ret < 0) {
1688         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1689                 __func__, strerror(-ret));
1690         return ret;
1691     }
1692
1693     return ret;
1694 }
1695
1696 static int read_cpuinfo(const char *field, char *value, int len)
1697 {
1698     FILE *f;
1699     int ret = -1;
1700     int field_len = strlen(field);
1701     char line[512];
1702
1703     f = fopen("/proc/cpuinfo", "r");
1704     if (!f) {
1705         return -1;
1706     }
1707
1708     do {
1709         if (!fgets(line, sizeof(line), f)) {
1710             break;
1711         }
1712         if (!strncmp(line, field, field_len)) {
1713             pstrcpy(value, len, line);
1714             ret = 0;
1715             break;
1716         }
1717     } while(*line);
1718
1719     fclose(f);
1720
1721     return ret;
1722 }
1723
1724 uint32_t kvmppc_get_tbfreq(void)
1725 {
1726     char line[512];
1727     char *ns;
1728     uint32_t retval = get_ticks_per_sec();
1729
1730     if (read_cpuinfo("timebase", line, sizeof(line))) {
1731         return retval;
1732     }
1733
1734     if (!(ns = strchr(line, ':'))) {
1735         return retval;
1736     }
1737
1738     ns++;
1739
1740     retval = atoi(ns);
1741     return retval;
1742 }
1743
1744 bool kvmppc_get_host_serial(char **value)
1745 {
1746     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1747                                NULL);
1748 }
1749
1750 bool kvmppc_get_host_model(char **value)
1751 {
1752     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1753 }
1754
1755 /* Try to find a device tree node for a CPU with clock-frequency property */
1756 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1757 {
1758     struct dirent *dirp;
1759     DIR *dp;
1760
1761     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1762         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1763         return -1;
1764     }
1765
1766     buf[0] = '\0';
1767     while ((dirp = readdir(dp)) != NULL) {
1768         FILE *f;
1769         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1770                  dirp->d_name);
1771         f = fopen(buf, "r");
1772         if (f) {
1773             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1774             fclose(f);
1775             break;
1776         }
1777         buf[0] = '\0';
1778     }
1779     closedir(dp);
1780     if (buf[0] == '\0') {
1781         printf("Unknown host!\n");
1782         return -1;
1783     }
1784
1785     return 0;
1786 }
1787
1788 /* Read a CPU node property from the host device tree that's a single
1789  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1790  * (can't find or open the property, or doesn't understand the
1791  * format) */
1792 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1793 {
1794     char buf[PATH_MAX], *tmp;
1795     union {
1796         uint32_t v32;
1797         uint64_t v64;
1798     } u;
1799     FILE *f;
1800     int len;
1801
1802     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1803         return -1;
1804     }
1805
1806     tmp = g_strdup_printf("%s/%s", buf, propname);
1807
1808     f = fopen(tmp, "rb");
1809     g_free(tmp);
1810     if (!f) {
1811         return -1;
1812     }
1813
1814     len = fread(&u, 1, sizeof(u), f);
1815     fclose(f);
1816     switch (len) {
1817     case 4:
1818         /* property is a 32-bit quantity */
1819         return be32_to_cpu(u.v32);
1820     case 8:
1821         return be64_to_cpu(u.v64);
1822     }
1823
1824     return 0;
1825 }
1826
1827 uint64_t kvmppc_get_clockfreq(void)
1828 {
1829     return kvmppc_read_int_cpu_dt("clock-frequency");
1830 }
1831
1832 uint32_t kvmppc_get_vmx(void)
1833 {
1834     return kvmppc_read_int_cpu_dt("ibm,vmx");
1835 }
1836
1837 uint32_t kvmppc_get_dfp(void)
1838 {
1839     return kvmppc_read_int_cpu_dt("ibm,dfp");
1840 }
1841
1842 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1843  {
1844      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1845      CPUState *cs = CPU(cpu);
1846
1847     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1848         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1849         return 0;
1850     }
1851
1852     return 1;
1853 }
1854
1855 int kvmppc_get_hasidle(CPUPPCState *env)
1856 {
1857     struct kvm_ppc_pvinfo pvinfo;
1858
1859     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1860         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1861         return 1;
1862     }
1863
1864     return 0;
1865 }
1866
1867 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1868 {
1869     uint32_t *hc = (uint32_t*)buf;
1870     struct kvm_ppc_pvinfo pvinfo;
1871
1872     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1873         memcpy(buf, pvinfo.hcall, buf_len);
1874         return 0;
1875     }
1876
1877     /*
1878      * Fallback to always fail hypercalls regardless of endianness:
1879      *
1880      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1881      *     li r3, -1
1882      *     b .+8       (becomes nop in wrong endian)
1883      *     bswap32(li r3, -1)
1884      */
1885
1886     hc[0] = cpu_to_be32(0x08000048);
1887     hc[1] = cpu_to_be32(0x3860ffff);
1888     hc[2] = cpu_to_be32(0x48000008);
1889     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1890
1891     return 0;
1892 }
1893
1894 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1895 {
1896     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
1897 }
1898
1899 void kvmppc_enable_logical_ci_hcalls(void)
1900 {
1901     /*
1902      * FIXME: it would be nice if we could detect the cases where
1903      * we're using a device which requires the in kernel
1904      * implementation of these hcalls, but the kernel lacks them and
1905      * produce a warning.
1906      */
1907     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
1908     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
1909 }
1910
1911 void kvmppc_set_papr(PowerPCCPU *cpu)
1912 {
1913     CPUState *cs = CPU(cpu);
1914     int ret;
1915
1916     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1917     if (ret) {
1918         cpu_abort(cs, "This KVM version does not support PAPR\n");
1919     }
1920
1921     /* Update the capability flag so we sync the right information
1922      * with kvm */
1923     cap_papr = 1;
1924 }
1925
1926 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1927 {
1928     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1929 }
1930
1931 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1932 {
1933     CPUState *cs = CPU(cpu);
1934     int ret;
1935
1936     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1937     if (ret && mpic_proxy) {
1938         cpu_abort(cs, "This KVM version does not support EPR\n");
1939     }
1940 }
1941
1942 int kvmppc_smt_threads(void)
1943 {
1944     return cap_ppc_smt ? cap_ppc_smt : 1;
1945 }
1946
1947 #ifdef TARGET_PPC64
1948 off_t kvmppc_alloc_rma(void **rma)
1949 {
1950     off_t size;
1951     int fd;
1952     struct kvm_allocate_rma ret;
1953
1954     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1955      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1956      *                      not necessary on this hardware
1957      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1958      *
1959      * FIXME: We should allow the user to force contiguous RMA
1960      * allocation in the cap_ppc_rma==1 case.
1961      */
1962     if (cap_ppc_rma < 2) {
1963         return 0;
1964     }
1965
1966     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1967     if (fd < 0) {
1968         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1969                 strerror(errno));
1970         return -1;
1971     }
1972
1973     size = MIN(ret.rma_size, 256ul << 20);
1974
1975     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1976     if (*rma == MAP_FAILED) {
1977         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1978         return -1;
1979     };
1980
1981     return size;
1982 }
1983
1984 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1985 {
1986     struct kvm_ppc_smmu_info info;
1987     long rampagesize, best_page_shift;
1988     int i;
1989
1990     if (cap_ppc_rma >= 2) {
1991         return current_size;
1992     }
1993
1994     /* Find the largest hardware supported page size that's less than
1995      * or equal to the (logical) backing page size of guest RAM */
1996     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1997     rampagesize = getrampagesize();
1998     best_page_shift = 0;
1999
2000     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2001         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2002
2003         if (!sps->page_shift) {
2004             continue;
2005         }
2006
2007         if ((sps->page_shift > best_page_shift)
2008             && ((1UL << sps->page_shift) <= rampagesize)) {
2009             best_page_shift = sps->page_shift;
2010         }
2011     }
2012
2013     return MIN(current_size,
2014                1ULL << (best_page_shift + hash_shift - 7));
2015 }
2016 #endif
2017
2018 bool kvmppc_spapr_use_multitce(void)
2019 {
2020     return cap_spapr_multitce;
2021 }
2022
2023 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2024                               bool vfio_accel)
2025 {
2026     struct kvm_create_spapr_tce args = {
2027         .liobn = liobn,
2028         .window_size = window_size,
2029     };
2030     long len;
2031     int fd;
2032     void *table;
2033
2034     /* Must set fd to -1 so we don't try to munmap when called for
2035      * destroying the table, which the upper layers -will- do
2036      */
2037     *pfd = -1;
2038     if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
2039         return NULL;
2040     }
2041
2042     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2043     if (fd < 0) {
2044         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2045                 liobn);
2046         return NULL;
2047     }
2048
2049     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2050     /* FIXME: round this up to page size */
2051
2052     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2053     if (table == MAP_FAILED) {
2054         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2055                 liobn);
2056         close(fd);
2057         return NULL;
2058     }
2059
2060     *pfd = fd;
2061     return table;
2062 }
2063
2064 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2065 {
2066     long len;
2067
2068     if (fd < 0) {
2069         return -1;
2070     }
2071
2072     len = nb_table * sizeof(uint64_t);
2073     if ((munmap(table, len) < 0) ||
2074         (close(fd) < 0)) {
2075         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2076                 strerror(errno));
2077         /* Leak the table */
2078     }
2079
2080     return 0;
2081 }
2082
2083 int kvmppc_reset_htab(int shift_hint)
2084 {
2085     uint32_t shift = shift_hint;
2086
2087     if (!kvm_enabled()) {
2088         /* Full emulation, tell caller to allocate htab itself */
2089         return 0;
2090     }
2091     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2092         int ret;
2093         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2094         if (ret == -ENOTTY) {
2095             /* At least some versions of PR KVM advertise the
2096              * capability, but don't implement the ioctl().  Oops.
2097              * Return 0 so that we allocate the htab in qemu, as is
2098              * correct for PR. */
2099             return 0;
2100         } else if (ret < 0) {
2101             return ret;
2102         }
2103         return shift;
2104     }
2105
2106     /* We have a kernel that predates the htab reset calls.  For PR
2107      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2108      * this era, it has allocated a 16MB fixed size hash table
2109      * already.  Kernels of this era have the GET_PVINFO capability
2110      * only on PR, so we use this hack to determine the right
2111      * answer */
2112     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2113         /* PR - tell caller to allocate htab */
2114         return 0;
2115     } else {
2116         /* HV - assume 16MB kernel allocated htab */
2117         return 24;
2118     }
2119 }
2120
2121 static inline uint32_t mfpvr(void)
2122 {
2123     uint32_t pvr;
2124
2125     asm ("mfpvr %0"
2126          : "=r"(pvr));
2127     return pvr;
2128 }
2129
2130 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2131 {
2132     if (on) {
2133         *word |= flags;
2134     } else {
2135         *word &= ~flags;
2136     }
2137 }
2138
2139 static void kvmppc_host_cpu_initfn(Object *obj)
2140 {
2141     assert(kvm_enabled());
2142 }
2143
2144 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2145 {
2146     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2147     uint32_t vmx = kvmppc_get_vmx();
2148     uint32_t dfp = kvmppc_get_dfp();
2149     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2150     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2151
2152     /* Now fix up the class with information we can query from the host */
2153     pcc->pvr = mfpvr();
2154
2155     if (vmx != -1) {
2156         /* Only override when we know what the host supports */
2157         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2158         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2159     }
2160     if (dfp != -1) {
2161         /* Only override when we know what the host supports */
2162         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2163     }
2164
2165     if (dcache_size != -1) {
2166         pcc->l1_dcache_size = dcache_size;
2167     }
2168
2169     if (icache_size != -1) {
2170         pcc->l1_icache_size = icache_size;
2171     }
2172 }
2173
2174 bool kvmppc_has_cap_epr(void)
2175 {
2176     return cap_epr;
2177 }
2178
2179 bool kvmppc_has_cap_htab_fd(void)
2180 {
2181     return cap_htab_fd;
2182 }
2183
2184 bool kvmppc_has_cap_fixup_hcalls(void)
2185 {
2186     return cap_fixup_hcalls;
2187 }
2188
2189 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2190 {
2191     ObjectClass *oc = OBJECT_CLASS(pcc);
2192
2193     while (oc && !object_class_is_abstract(oc)) {
2194         oc = object_class_get_parent(oc);
2195     }
2196     assert(oc);
2197
2198     return POWERPC_CPU_CLASS(oc);
2199 }
2200
2201 static int kvm_ppc_register_host_cpu_type(void)
2202 {
2203     TypeInfo type_info = {
2204         .name = TYPE_HOST_POWERPC_CPU,
2205         .instance_init = kvmppc_host_cpu_initfn,
2206         .class_init = kvmppc_host_cpu_class_init,
2207     };
2208     uint32_t host_pvr = mfpvr();
2209     PowerPCCPUClass *pvr_pcc;
2210     DeviceClass *dc;
2211
2212     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2213     if (pvr_pcc == NULL) {
2214         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2215     }
2216     if (pvr_pcc == NULL) {
2217         return -1;
2218     }
2219     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2220     type_register(&type_info);
2221
2222     /* Register generic family CPU class for a family */
2223     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2224     dc = DEVICE_CLASS(pvr_pcc);
2225     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2226     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2227     type_register(&type_info);
2228
2229     return 0;
2230 }
2231
2232 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2233 {
2234     struct kvm_rtas_token_args args = {
2235         .token = token,
2236     };
2237
2238     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2239         return -ENOENT;
2240     }
2241
2242     strncpy(args.name, function, sizeof(args.name));
2243
2244     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2245 }
2246
2247 int kvmppc_get_htab_fd(bool write)
2248 {
2249     struct kvm_get_htab_fd s = {
2250         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2251         .start_index = 0,
2252     };
2253
2254     if (!cap_htab_fd) {
2255         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2256         return -1;
2257     }
2258
2259     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2260 }
2261
2262 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2263 {
2264     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2265     uint8_t buf[bufsize];
2266     ssize_t rc;
2267
2268     do {
2269         rc = read(fd, buf, bufsize);
2270         if (rc < 0) {
2271             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2272                     strerror(errno));
2273             return rc;
2274         } else if (rc) {
2275             uint8_t *buffer = buf;
2276             ssize_t n = rc;
2277             while (n) {
2278                 struct kvm_get_htab_header *head =
2279                     (struct kvm_get_htab_header *) buffer;
2280                 size_t chunksize = sizeof(*head) +
2281                      HASH_PTE_SIZE_64 * head->n_valid;
2282
2283                 qemu_put_be32(f, head->index);
2284                 qemu_put_be16(f, head->n_valid);
2285                 qemu_put_be16(f, head->n_invalid);
2286                 qemu_put_buffer(f, (void *)(head + 1),
2287                                 HASH_PTE_SIZE_64 * head->n_valid);
2288
2289                 buffer += chunksize;
2290                 n -= chunksize;
2291             }
2292         }
2293     } while ((rc != 0)
2294              && ((max_ns < 0)
2295                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2296
2297     return (rc == 0) ? 1 : 0;
2298 }
2299
2300 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2301                            uint16_t n_valid, uint16_t n_invalid)
2302 {
2303     struct kvm_get_htab_header *buf;
2304     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2305     ssize_t rc;
2306
2307     buf = alloca(chunksize);
2308     buf->index = index;
2309     buf->n_valid = n_valid;
2310     buf->n_invalid = n_invalid;
2311
2312     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2313
2314     rc = write(fd, buf, chunksize);
2315     if (rc < 0) {
2316         fprintf(stderr, "Error writing KVM hash table: %s\n",
2317                 strerror(errno));
2318         return rc;
2319     }
2320     if (rc != chunksize) {
2321         /* We should never get a short write on a single chunk */
2322         fprintf(stderr, "Short write, restoring KVM hash table\n");
2323         return -1;
2324     }
2325     return 0;
2326 }
2327
2328 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2329 {
2330     return true;
2331 }
2332
2333 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2334 {
2335     return 1;
2336 }
2337
2338 int kvm_arch_on_sigbus(int code, void *addr)
2339 {
2340     return 1;
2341 }
2342
2343 void kvm_arch_init_irq_routing(KVMState *s)
2344 {
2345 }
2346
2347 struct kvm_get_htab_buf {
2348     struct kvm_get_htab_header header;
2349     /*
2350      * We require one extra byte for read
2351      */
2352     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2353 };
2354
2355 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2356 {
2357     int htab_fd;
2358     struct kvm_get_htab_fd ghf;
2359     struct kvm_get_htab_buf  *hpte_buf;
2360
2361     ghf.flags = 0;
2362     ghf.start_index = pte_index;
2363     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2364     if (htab_fd < 0) {
2365         goto error_out;
2366     }
2367
2368     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2369     /*
2370      * Read the hpte group
2371      */
2372     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2373         goto out_close;
2374     }
2375
2376     close(htab_fd);
2377     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2378
2379 out_close:
2380     g_free(hpte_buf);
2381     close(htab_fd);
2382 error_out:
2383     return 0;
2384 }
2385
2386 void kvmppc_hash64_free_pteg(uint64_t token)
2387 {
2388     struct kvm_get_htab_buf *htab_buf;
2389
2390     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2391                             hpte);
2392     g_free(htab_buf);
2393     return;
2394 }
2395
2396 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2397                              target_ulong pte0, target_ulong pte1)
2398 {
2399     int htab_fd;
2400     struct kvm_get_htab_fd ghf;
2401     struct kvm_get_htab_buf hpte_buf;
2402
2403     ghf.flags = 0;
2404     ghf.start_index = 0;     /* Ignored */
2405     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2406     if (htab_fd < 0) {
2407         goto error_out;
2408     }
2409
2410     hpte_buf.header.n_valid = 1;
2411     hpte_buf.header.n_invalid = 0;
2412     hpte_buf.header.index = pte_index;
2413     hpte_buf.hpte[0] = pte0;
2414     hpte_buf.hpte[1] = pte1;
2415     /*
2416      * Write the hpte entry.
2417      * CAUTION: write() has the warn_unused_result attribute. Hence we
2418      * need to check the return value, even though we do nothing.
2419      */
2420     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2421         goto out_close;
2422     }
2423
2424 out_close:
2425     close(htab_fd);
2426     return;
2427
2428 error_out:
2429     return;
2430 }
2431
2432 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2433                              uint64_t address, uint32_t data)
2434 {
2435     return 0;
2436 }
2437
2438 int kvm_arch_msi_data_to_gsi(uint32_t data)
2439 {
2440     return data & 0xffff;
2441 }