target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include <dirent.h>
  18 #include <sys/types.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/mman.h>
  21 #include <sys/vfs.h>
  22
  23 #include <linux/kvm.h>
  24
  25 #include "qemu-common.h"
  26 #include "qemu/timer.h"
  27 #include "sysemu/sysemu.h"
  28 #include "sysemu/kvm.h"
  29 #include "kvm_ppc.h"
  30 #include "cpu.h"
  31 #include "sysemu/cpus.h"
  32 #include "sysemu/device_tree.h"
  33 #include "mmu-hash64.h"
  34
  35 #include "hw/sysbus.h"
  36 #include "hw/ppc/spapr.h"
  37 #include "hw/ppc/spapr_vio.h"
  38 #include "hw/ppc/ppc.h"
  39 #include "sysemu/watchdog.h"
  40 #include "trace.h"
  41 #include "exec/gdbstub.h"
  42
  43 //#define DEBUG_KVM
  44
  45 #ifdef DEBUG_KVM
  46 #define DPRINTF(fmt, ...) \
  47     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  48 #else
  49 #define DPRINTF(fmt, ...) \
  50     do { } while (0)
  51 #endif
  52
  53 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  54
  55 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  56     KVM_CAP_LAST_INFO
  57 };
  58
  59 static int cap_interrupt_unset = false;
  60 static int cap_interrupt_level = false;
  61 static int cap_segstate;
  62 static int cap_booke_sregs;
  63 static int cap_ppc_smt;
  64 static int cap_ppc_rma;
  65 static int cap_spapr_tce;
  66 static int cap_spapr_multitce;
  67 static int cap_spapr_vfio;
  68 static int cap_hior;
  69 static int cap_one_reg;
  70 static int cap_epr;
  71 static int cap_ppc_watchdog;
  72 static int cap_papr;
  73 static int cap_htab_fd;
  74 static int cap_fixup_hcalls;
  75
  76 static uint32_t debug_inst_opcode;
  77
  78 /* XXX We have a race condition where we actually have a level triggered
  79  *     interrupt, but the infrastructure can't expose that yet, so the guest
  80  *     takes but ignores it, goes to sleep and never gets notified that there's
  81  *     still an interrupt pending.
  82  *
  83  *     As a quick workaround, let's just wake up again 20 ms after we injected
  84  *     an interrupt. That way we can assure that we're always reinjecting
  85  *     interrupts in case the guest swallowed them.
  86  */
  87 static QEMUTimer *idle_timer;
  88
  89 static void kvm_kick_cpu(void *opaque)
  90 {
  91     PowerPCCPU *cpu = opaque;
  92
  93     qemu_cpu_kick(CPU(cpu));
  94 }
  95
  96 static int kvm_ppc_register_host_cpu_type(void);
  97
  98 int kvm_arch_init(KVMState *s)
  99 {
 100     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 101     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 102     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 103     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 104     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 105     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 106     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 107     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 108     cap_spapr_vfio = false;
 109     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 110     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 111     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 112     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 113     /* Note: we don't set cap_papr here, because this capability is
 114      * only activated after this by kvmppc_set_papr() */
 115     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 116     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 117
 118     if (!cap_interrupt_level) {
 119         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 120                         "VM to stall at times!\n");
 121     }
 122
 123     kvm_ppc_register_host_cpu_type();
 124
 125     return 0;
 126 }
 127
 128 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 129 {
 130     CPUPPCState *cenv = &cpu->env;
 131     CPUState *cs = CPU(cpu);
 132     struct kvm_sregs sregs;
 133     int ret;
 134
 135     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 136         /* What we're really trying to say is "if we're on BookE, we use
 137            the native PVR for now". This is the only sane way to check
 138            it though, so we potentially confuse users that they can run
 139            BookE guests on BookS. Let's hope nobody dares enough :) */
 140         return 0;
 141     } else {
 142         if (!cap_segstate) {
 143             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 144             return -ENOSYS;
 145         }
 146     }
 147
 148     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 149     if (ret) {
 150         return ret;
 151     }
 152
 153     sregs.pvr = cenv->spr[SPR_PVR];
 154     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 155 }
 156
 157 /* Set up a shared TLB array with KVM */
 158 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 159 {
 160     CPUPPCState *env = &cpu->env;
 161     CPUState *cs = CPU(cpu);
 162     struct kvm_book3e_206_tlb_params params = {};
 163     struct kvm_config_tlb cfg = {};
 164     unsigned int entries = 0;
 165     int ret, i;
 166
 167     if (!kvm_enabled() ||
 168         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 169         return 0;
 170     }
 171
 172     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 173
 174     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 175         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 176         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 177         entries += params.tlb_sizes[i];
 178     }
 179
 180     assert(entries == env->nb_tlb);
 181     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 182
 183     env->tlb_dirty = true;
 184
 185     cfg.array = (uintptr_t)env->tlb.tlbm;
 186     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 187     cfg.params = (uintptr_t)&params;
 188     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 189
 190     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 191     if (ret < 0) {
 192         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 193                 __func__, strerror(-ret));
 194         return ret;
 195     }
 196
 197     env->kvm_sw_tlb = true;
 198     return 0;
 199 }
 200
 201
 202 #if defined(TARGET_PPC64)
 203 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 204                                        struct kvm_ppc_smmu_info *info)
 205 {
 206     CPUPPCState *env = &cpu->env;
 207     CPUState *cs = CPU(cpu);
 208
 209     memset(info, 0, sizeof(*info));
 210
 211     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 212      * need to "guess" what the supported page sizes are.
 213      *
 214      * For that to work we make a few assumptions:
 215      *
 216      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 217      *   KVM which only supports 4K and 16M pages, but supports them
 218      *   regardless of the backing store characteritics. We also don't
 219      *   support 1T segments.
 220      *
 221      *   This is safe as if HV KVM ever supports that capability or PR
 222      *   KVM grows supports for more page/segment sizes, those versions
 223      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 224      *   will not hit this fallback
 225      *
 226      * - Else we are running HV KVM. This means we only support page
 227      *   sizes that fit in the backing store. Additionally we only
 228      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 229      *   P7 encodings for the SLB and hash table. Here too, we assume
 230      *   support for any newer processor will mean a kernel that
 231      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 232      *   this fallback.
 233      */
 234     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 235         /* No flags */
 236         info->flags = 0;
 237         info->slb_size = 64;
 238
 239         /* Standard 4k base page size segment */
 240         info->sps[0].page_shift = 12;
 241         info->sps[0].slb_enc = 0;
 242         info->sps[0].enc[0].page_shift = 12;
 243         info->sps[0].enc[0].pte_enc = 0;
 244
 245         /* Standard 16M large page size segment */
 246         info->sps[1].page_shift = 24;
 247         info->sps[1].slb_enc = SLB_VSID_L;
 248         info->sps[1].enc[0].page_shift = 24;
 249         info->sps[1].enc[0].pte_enc = 0;
 250     } else {
 251         int i = 0;
 252
 253         /* HV KVM has backing store size restrictions */
 254         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 255
 256         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 257             info->flags |= KVM_PPC_1T_SEGMENTS;
 258         }
 259
 260         if (env->mmu_model == POWERPC_MMU_2_06) {
 261             info->slb_size = 32;
 262         } else {
 263             info->slb_size = 64;
 264         }
 265
 266         /* Standard 4k base page size segment */
 267         info->sps[i].page_shift = 12;
 268         info->sps[i].slb_enc = 0;
 269         info->sps[i].enc[0].page_shift = 12;
 270         info->sps[i].enc[0].pte_enc = 0;
 271         i++;
 272
 273         /* 64K on MMU 2.06 */
 274         if (env->mmu_model == POWERPC_MMU_2_06) {
 275             info->sps[i].page_shift = 16;
 276             info->sps[i].slb_enc = 0x110;
 277             info->sps[i].enc[0].page_shift = 16;
 278             info->sps[i].enc[0].pte_enc = 1;
 279             i++;
 280         }
 281
 282         /* Standard 16M large page size segment */
 283         info->sps[i].page_shift = 24;
 284         info->sps[i].slb_enc = SLB_VSID_L;
 285         info->sps[i].enc[0].page_shift = 24;
 286         info->sps[i].enc[0].pte_enc = 0;
 287     }
 288 }
 289
 290 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 291 {
 292     CPUState *cs = CPU(cpu);
 293     int ret;
 294
 295     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 296         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 297         if (ret == 0) {
 298             return;
 299         }
 300     }
 301
 302     kvm_get_fallback_smmu_info(cpu, info);
 303 }
 304
 305 static long getrampagesize(void)
 306 {
 307     struct statfs fs;
 308     int ret;
 309
 310     if (!mem_path) {
 311         /* guest RAM is backed by normal anonymous pages */
 312         return getpagesize();
 313     }
 314
 315     do {
 316         ret = statfs(mem_path, &fs);
 317     } while (ret != 0 && errno == EINTR);
 318
 319     if (ret != 0) {
 320         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 321                 strerror(errno));
 322         exit(1);
 323     }
 324
 325 #define HUGETLBFS_MAGIC       0x958458f6
 326
 327     if (fs.f_type != HUGETLBFS_MAGIC) {
 328         /* Explicit mempath, but it's ordinary pages */
 329         return getpagesize();
 330     }
 331
 332     /* It's hugepage, return the huge page size */
 333     return fs.f_bsize;
 334 }
 335
 336 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 337 {
 338     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 339         return true;
 340     }
 341
 342     return (1ul << shift) <= rampgsize;
 343 }
 344
 345 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 346 {
 347     static struct kvm_ppc_smmu_info smmu_info;
 348     static bool has_smmu_info;
 349     CPUPPCState *env = &cpu->env;
 350     long rampagesize;
 351     int iq, ik, jq, jk;
 352
 353     /* We only handle page sizes for 64-bit server guests for now */
 354     if (!(env->mmu_model & POWERPC_MMU_64)) {
 355         return;
 356     }
 357
 358     /* Collect MMU info from kernel if not already */
 359     if (!has_smmu_info) {
 360         kvm_get_smmu_info(cpu, &smmu_info);
 361         has_smmu_info = true;
 362     }
 363
 364     rampagesize = getrampagesize();
 365
 366     /* Convert to QEMU form */
 367     memset(&env->sps, 0, sizeof(env->sps));
 368
 369     /*
 370      * XXX This loop should be an entry wide AND of the capabilities that
 371      *     the selected CPU has with the capabilities that KVM supports.
 372      */
 373     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 374         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 375         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 376
 377         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 378                                  ksps->page_shift)) {
 379             continue;
 380         }
 381         qsps->page_shift = ksps->page_shift;
 382         qsps->slb_enc = ksps->slb_enc;
 383         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 384             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 385                                      ksps->enc[jk].page_shift)) {
 386                 continue;
 387             }
 388             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 389             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 390             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 391                 break;
 392             }
 393         }
 394         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 395             break;
 396         }
 397     }
 398     env->slb_nr = smmu_info.slb_size;
 399     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 400         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 401     }
 402 }
 403 #else /* defined (TARGET_PPC64) */
 404
 405 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 406 {
 407 }
 408
 409 #endif /* !defined (TARGET_PPC64) */
 410
 411 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 412 {
 413     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 414 }
 415
 416 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 417  * book3s supports only 1 watchpoint, so array size
 418  * of 4 is sufficient for now.
 419  */
 420 #define MAX_HW_BKPTS 4
 421
 422 static struct HWBreakpoint {
 423     target_ulong addr;
 424     int type;
 425 } hw_debug_points[MAX_HW_BKPTS];
 426
 427 static CPUWatchpoint hw_watchpoint;
 428
 429 /* Default there is no breakpoint and watchpoint supported */
 430 static int max_hw_breakpoint;
 431 static int max_hw_watchpoint;
 432 static int nb_hw_breakpoint;
 433 static int nb_hw_watchpoint;
 434
 435 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 436 {
 437     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 438         max_hw_breakpoint = 2;
 439         max_hw_watchpoint = 2;
 440     }
 441
 442     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 443         fprintf(stderr, "Error initializing h/w breakpoints\n");
 444         return;
 445     }
 446 }
 447
 448 int kvm_arch_init_vcpu(CPUState *cs)
 449 {
 450     PowerPCCPU *cpu = POWERPC_CPU(cs);
 451     CPUPPCState *cenv = &cpu->env;
 452     int ret;
 453
 454     /* Gather server mmu info from KVM and update the CPU state */
 455     kvm_fixup_page_sizes(cpu);
 456
 457     /* Synchronize sregs with kvm */
 458     ret = kvm_arch_sync_sregs(cpu);
 459     if (ret) {
 460         return ret;
 461     }
 462
 463     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 464
 465     /* Some targets support access to KVM's guest TLB. */
 466     switch (cenv->mmu_model) {
 467     case POWERPC_MMU_BOOKE206:
 468         ret = kvm_booke206_tlb_init(cpu);
 469         break;
 470     default:
 471         break;
 472     }
 473
 474     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 475     kvmppc_hw_debug_points_init(cenv);
 476
 477     return ret;
 478 }
 479
 480 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 481 {
 482     CPUPPCState *env = &cpu->env;
 483     CPUState *cs = CPU(cpu);
 484     struct kvm_dirty_tlb dirty_tlb;
 485     unsigned char *bitmap;
 486     int ret;
 487
 488     if (!env->kvm_sw_tlb) {
 489         return;
 490     }
 491
 492     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 493     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 494
 495     dirty_tlb.bitmap = (uintptr_t)bitmap;
 496     dirty_tlb.num_dirty = env->nb_tlb;
 497
 498     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 499     if (ret) {
 500         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 501                 __func__, strerror(-ret));
 502     }
 503
 504     g_free(bitmap);
 505 }
 506
 507 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 508 {
 509     PowerPCCPU *cpu = POWERPC_CPU(cs);
 510     CPUPPCState *env = &cpu->env;
 511     union {
 512         uint32_t u32;
 513         uint64_t u64;
 514     } val;
 515     struct kvm_one_reg reg = {
 516         .id = id,
 517         .addr = (uintptr_t) &val,
 518     };
 519     int ret;
 520
 521     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 522     if (ret != 0) {
 523         trace_kvm_failed_spr_get(spr, strerror(errno));
 524     } else {
 525         switch (id & KVM_REG_SIZE_MASK) {
 526         case KVM_REG_SIZE_U32:
 527             env->spr[spr] = val.u32;
 528             break;
 529
 530         case KVM_REG_SIZE_U64:
 531             env->spr[spr] = val.u64;
 532             break;
 533
 534         default:
 535             /* Don't handle this size yet */
 536             abort();
 537         }
 538     }
 539 }
 540
 541 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 542 {
 543     PowerPCCPU *cpu = POWERPC_CPU(cs);
 544     CPUPPCState *env = &cpu->env;
 545     union {
 546         uint32_t u32;
 547         uint64_t u64;
 548     } val;
 549     struct kvm_one_reg reg = {
 550         .id = id,
 551         .addr = (uintptr_t) &val,
 552     };
 553     int ret;
 554
 555     switch (id & KVM_REG_SIZE_MASK) {
 556     case KVM_REG_SIZE_U32:
 557         val.u32 = env->spr[spr];
 558         break;
 559
 560     case KVM_REG_SIZE_U64:
 561         val.u64 = env->spr[spr];
 562         break;
 563
 564     default:
 565         /* Don't handle this size yet */
 566         abort();
 567     }
 568
 569     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 570     if (ret != 0) {
 571         trace_kvm_failed_spr_set(spr, strerror(errno));
 572     }
 573 }
 574
 575 static int kvm_put_fp(CPUState *cs)
 576 {
 577     PowerPCCPU *cpu = POWERPC_CPU(cs);
 578     CPUPPCState *env = &cpu->env;
 579     struct kvm_one_reg reg;
 580     int i;
 581     int ret;
 582
 583     if (env->insns_flags & PPC_FLOAT) {
 584         uint64_t fpscr = env->fpscr;
 585         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 586
 587         reg.id = KVM_REG_PPC_FPSCR;
 588         reg.addr = (uintptr_t)&fpscr;
 589         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 590         if (ret < 0) {
 591             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 592             return ret;
 593         }
 594
 595         for (i = 0; i < 32; i++) {
 596             uint64_t vsr[2];
 597
 598             vsr[0] = float64_val(env->fpr[i]);
 599             vsr[1] = env->vsr[i];
 600             reg.addr = (uintptr_t) &vsr;
 601             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 602
 603             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 604             if (ret < 0) {
 605                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 606                         i, strerror(errno));
 607                 return ret;
 608             }
 609         }
 610     }
 611
 612     if (env->insns_flags & PPC_ALTIVEC) {
 613         reg.id = KVM_REG_PPC_VSCR;
 614         reg.addr = (uintptr_t)&env->vscr;
 615         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 616         if (ret < 0) {
 617             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 618             return ret;
 619         }
 620
 621         for (i = 0; i < 32; i++) {
 622             reg.id = KVM_REG_PPC_VR(i);
 623             reg.addr = (uintptr_t)&env->avr[i];
 624             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 625             if (ret < 0) {
 626                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 627                 return ret;
 628             }
 629         }
 630     }
 631
 632     return 0;
 633 }
 634
 635 static int kvm_get_fp(CPUState *cs)
 636 {
 637     PowerPCCPU *cpu = POWERPC_CPU(cs);
 638     CPUPPCState *env = &cpu->env;
 639     struct kvm_one_reg reg;
 640     int i;
 641     int ret;
 642
 643     if (env->insns_flags & PPC_FLOAT) {
 644         uint64_t fpscr;
 645         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 646
 647         reg.id = KVM_REG_PPC_FPSCR;
 648         reg.addr = (uintptr_t)&fpscr;
 649         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 650         if (ret < 0) {
 651             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 652             return ret;
 653         } else {
 654             env->fpscr = fpscr;
 655         }
 656
 657         for (i = 0; i < 32; i++) {
 658             uint64_t vsr[2];
 659
 660             reg.addr = (uintptr_t) &vsr;
 661             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 662
 663             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 664             if (ret < 0) {
 665                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 666                         vsx ? "VSR" : "FPR", i, strerror(errno));
 667                 return ret;
 668             } else {
 669                 env->fpr[i] = vsr[0];
 670                 if (vsx) {
 671                     env->vsr[i] = vsr[1];
 672                 }
 673             }
 674         }
 675     }
 676
 677     if (env->insns_flags & PPC_ALTIVEC) {
 678         reg.id = KVM_REG_PPC_VSCR;
 679         reg.addr = (uintptr_t)&env->vscr;
 680         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 681         if (ret < 0) {
 682             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 683             return ret;
 684         }
 685
 686         for (i = 0; i < 32; i++) {
 687             reg.id = KVM_REG_PPC_VR(i);
 688             reg.addr = (uintptr_t)&env->avr[i];
 689             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 690             if (ret < 0) {
 691                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 692                         i, strerror(errno));
 693                 return ret;
 694             }
 695         }
 696     }
 697
 698     return 0;
 699 }
 700
 701 #if defined(TARGET_PPC64)
 702 static int kvm_get_vpa(CPUState *cs)
 703 {
 704     PowerPCCPU *cpu = POWERPC_CPU(cs);
 705     CPUPPCState *env = &cpu->env;
 706     struct kvm_one_reg reg;
 707     int ret;
 708
 709     reg.id = KVM_REG_PPC_VPA_ADDR;
 710     reg.addr = (uintptr_t)&env->vpa_addr;
 711     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 712     if (ret < 0) {
 713         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 714         return ret;
 715     }
 716
 717     assert((uintptr_t)&env->slb_shadow_size
 718            == ((uintptr_t)&env->slb_shadow_addr + 8));
 719     reg.id = KVM_REG_PPC_VPA_SLB;
 720     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 721     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 722     if (ret < 0) {
 723         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 724                 strerror(errno));
 725         return ret;
 726     }
 727
 728     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 729     reg.id = KVM_REG_PPC_VPA_DTL;
 730     reg.addr = (uintptr_t)&env->dtl_addr;
 731     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 732     if (ret < 0) {
 733         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 734                 strerror(errno));
 735         return ret;
 736     }
 737
 738     return 0;
 739 }
 740
 741 static int kvm_put_vpa(CPUState *cs)
 742 {
 743     PowerPCCPU *cpu = POWERPC_CPU(cs);
 744     CPUPPCState *env = &cpu->env;
 745     struct kvm_one_reg reg;
 746     int ret;
 747
 748     /* SLB shadow or DTL can't be registered unless a master VPA is
 749      * registered.  That means when restoring state, if a VPA *is*
 750      * registered, we need to set that up first.  If not, we need to
 751      * deregister the others before deregistering the master VPA */
 752     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 753
 754     if (env->vpa_addr) {
 755         reg.id = KVM_REG_PPC_VPA_ADDR;
 756         reg.addr = (uintptr_t)&env->vpa_addr;
 757         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 758         if (ret < 0) {
 759             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 760             return ret;
 761         }
 762     }
 763
 764     assert((uintptr_t)&env->slb_shadow_size
 765            == ((uintptr_t)&env->slb_shadow_addr + 8));
 766     reg.id = KVM_REG_PPC_VPA_SLB;
 767     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 768     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 769     if (ret < 0) {
 770         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 771         return ret;
 772     }
 773
 774     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 775     reg.id = KVM_REG_PPC_VPA_DTL;
 776     reg.addr = (uintptr_t)&env->dtl_addr;
 777     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 778     if (ret < 0) {
 779         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 780                 strerror(errno));
 781         return ret;
 782     }
 783
 784     if (!env->vpa_addr) {
 785         reg.id = KVM_REG_PPC_VPA_ADDR;
 786         reg.addr = (uintptr_t)&env->vpa_addr;
 787         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 788         if (ret < 0) {
 789             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 790             return ret;
 791         }
 792     }
 793
 794     return 0;
 795 }
 796 #endif /* TARGET_PPC64 */
 797
 798 int kvm_arch_put_registers(CPUState *cs, int level)
 799 {
 800     PowerPCCPU *cpu = POWERPC_CPU(cs);
 801     CPUPPCState *env = &cpu->env;
 802     struct kvm_regs regs;
 803     int ret;
 804     int i;
 805
 806     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 807     if (ret < 0) {
 808         return ret;
 809     }
 810
 811     regs.ctr = env->ctr;
 812     regs.lr  = env->lr;
 813     regs.xer = cpu_read_xer(env);
 814     regs.msr = env->msr;
 815     regs.pc = env->nip;
 816
 817     regs.srr0 = env->spr[SPR_SRR0];
 818     regs.srr1 = env->spr[SPR_SRR1];
 819
 820     regs.sprg0 = env->spr[SPR_SPRG0];
 821     regs.sprg1 = env->spr[SPR_SPRG1];
 822     regs.sprg2 = env->spr[SPR_SPRG2];
 823     regs.sprg3 = env->spr[SPR_SPRG3];
 824     regs.sprg4 = env->spr[SPR_SPRG4];
 825     regs.sprg5 = env->spr[SPR_SPRG5];
 826     regs.sprg6 = env->spr[SPR_SPRG6];
 827     regs.sprg7 = env->spr[SPR_SPRG7];
 828
 829     regs.pid = env->spr[SPR_BOOKE_PID];
 830
 831     for (i = 0;i < 32; i++)
 832         regs.gpr[i] = env->gpr[i];
 833
 834     regs.cr = 0;
 835     for (i = 0; i < 8; i++) {
 836         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 837     }
 838
 839     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 840     if (ret < 0)
 841         return ret;
 842
 843     kvm_put_fp(cs);
 844
 845     if (env->tlb_dirty) {
 846         kvm_sw_tlb_put(cpu);
 847         env->tlb_dirty = false;
 848     }
 849
 850     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 851         struct kvm_sregs sregs;
 852
 853         sregs.pvr = env->spr[SPR_PVR];
 854
 855         sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 856
 857         /* Sync SLB */
 858 #ifdef TARGET_PPC64
 859         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 860             sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 861             if (env->slb[i].esid & SLB_ESID_V) {
 862                 sregs.u.s.ppc64.slb[i].slbe |= i;
 863             }
 864             sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 865         }
 866 #endif
 867
 868         /* Sync SRs */
 869         for (i = 0; i < 16; i++) {
 870             sregs.u.s.ppc32.sr[i] = env->sr[i];
 871         }
 872
 873         /* Sync BATs */
 874         for (i = 0; i < 8; i++) {
 875             /* Beware. We have to swap upper and lower bits here */
 876             sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 877                 | env->DBAT[1][i];
 878             sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 879                 | env->IBAT[1][i];
 880         }
 881
 882         ret = kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 883         if (ret) {
 884             return ret;
 885         }
 886     }
 887
 888     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 889         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 890     }
 891
 892     if (cap_one_reg) {
 893         int i;
 894
 895         /* We deliberately ignore errors here, for kernels which have
 896          * the ONE_REG calls, but don't support the specific
 897          * registers, there's a reasonable chance things will still
 898          * work, at least until we try to migrate. */
 899         for (i = 0; i < 1024; i++) {
 900             uint64_t id = env->spr_cb[i].one_reg_id;
 901
 902             if (id != 0) {
 903                 kvm_put_one_spr(cs, id, i);
 904             }
 905         }
 906
 907 #ifdef TARGET_PPC64
 908         if (msr_ts) {
 909             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 910                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 911             }
 912             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 913                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 914             }
 915             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 916             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 917             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 918             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 919             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 920             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 921             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 922             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 923             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 924             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 925         }
 926
 927         if (cap_papr) {
 928             if (kvm_put_vpa(cs) < 0) {
 929                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 930             }
 931         }
 932
 933         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
 934 #endif /* TARGET_PPC64 */
 935     }
 936
 937     return ret;
 938 }
 939
 940 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
 941 {
 942      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
 943 }
 944
 945 int kvm_arch_get_registers(CPUState *cs)
 946 {
 947     PowerPCCPU *cpu = POWERPC_CPU(cs);
 948     CPUPPCState *env = &cpu->env;
 949     struct kvm_regs regs;
 950     struct kvm_sregs sregs;
 951     uint32_t cr;
 952     int i, ret;
 953
 954     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 955     if (ret < 0)
 956         return ret;
 957
 958     cr = regs.cr;
 959     for (i = 7; i >= 0; i--) {
 960         env->crf[i] = cr & 15;
 961         cr >>= 4;
 962     }
 963
 964     env->ctr = regs.ctr;
 965     env->lr = regs.lr;
 966     cpu_write_xer(env, regs.xer);
 967     env->msr = regs.msr;
 968     env->nip = regs.pc;
 969
 970     env->spr[SPR_SRR0] = regs.srr0;
 971     env->spr[SPR_SRR1] = regs.srr1;
 972
 973     env->spr[SPR_SPRG0] = regs.sprg0;
 974     env->spr[SPR_SPRG1] = regs.sprg1;
 975     env->spr[SPR_SPRG2] = regs.sprg2;
 976     env->spr[SPR_SPRG3] = regs.sprg3;
 977     env->spr[SPR_SPRG4] = regs.sprg4;
 978     env->spr[SPR_SPRG5] = regs.sprg5;
 979     env->spr[SPR_SPRG6] = regs.sprg6;
 980     env->spr[SPR_SPRG7] = regs.sprg7;
 981
 982     env->spr[SPR_BOOKE_PID] = regs.pid;
 983
 984     for (i = 0;i < 32; i++)
 985         env->gpr[i] = regs.gpr[i];
 986
 987     kvm_get_fp(cs);
 988
 989     if (cap_booke_sregs) {
 990         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 991         if (ret < 0) {
 992             return ret;
 993         }
 994
 995         if (sregs.u.e.features & KVM_SREGS_E_BASE) {
 996             env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
 997             env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
 998             env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
 999             env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1000             env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1001             env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1002             env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1003             env->spr[SPR_DECR] = sregs.u.e.dec;
1004             env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1005             env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1006             env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1007         }
1008
1009         if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1010             env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1011             env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1012             env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1013             env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1014             env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1015         }
1016
1017         if (sregs.u.e.features & KVM_SREGS_E_64) {
1018             env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1019         }
1020
1021         if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1022             env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1023         }
1024
1025         if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1026             env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1027             kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1028             env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1029             kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1030             env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1031             kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1032             env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1033             kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1034             env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1035             kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1036             env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1037             kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1038             env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1039             kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1040             env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1041             kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1042             env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1043             kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1044             env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1045             kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1046             env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1047             kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1048             env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1049             kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1050             env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1051             kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1052             env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1053             kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1054             env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1055             kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1056             env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1057             kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1058
1059             if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1060                 env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1061                 kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1062                 env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1063                 kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1064                 env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1065                 kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1066             }
1067
1068             if (sregs.u.e.features & KVM_SREGS_E_PM) {
1069                 env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1070                 kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1071             }
1072
1073             if (sregs.u.e.features & KVM_SREGS_E_PC) {
1074                 env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1075                 kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1076                 env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1077                 kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1078             }
1079         }
1080
1081         if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1082             env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1083             env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1084             env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1085             env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1086             env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1087             env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1088             env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1089             env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1090             env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1091             env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1092         }
1093
1094         if (sregs.u.e.features & KVM_SREGS_EXP) {
1095             env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1096         }
1097
1098         if (sregs.u.e.features & KVM_SREGS_E_PD) {
1099             env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1100             env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1101         }
1102
1103         if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1104             env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1105             env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1106             env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1107
1108             if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1109                 env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1110                 env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1111             }
1112         }
1113     }
1114
1115     if (cap_segstate) {
1116         ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
1117         if (ret < 0) {
1118             return ret;
1119         }
1120
1121         if (!env->external_htab) {
1122             ppc_store_sdr1(env, sregs.u.s.sdr1);
1123         }
1124
1125         /* Sync SLB */
1126 #ifdef TARGET_PPC64
1127         /*
1128          * The packed SLB array we get from KVM_GET_SREGS only contains
1129          * information about valid entries. So we flush our internal
1130          * copy to get rid of stale ones, then put all valid SLB entries
1131          * back in.
1132          */
1133         memset(env->slb, 0, sizeof(env->slb));
1134         for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1135             target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1136             target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1137             /*
1138              * Only restore valid entries
1139              */
1140             if (rb & SLB_ESID_V) {
1141                 ppc_store_slb(env, rb, rs);
1142             }
1143         }
1144 #endif
1145
1146         /* Sync SRs */
1147         for (i = 0; i < 16; i++) {
1148             env->sr[i] = sregs.u.s.ppc32.sr[i];
1149         }
1150
1151         /* Sync BATs */
1152         for (i = 0; i < 8; i++) {
1153             env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1154             env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1155             env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1156             env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1157         }
1158     }
1159
1160     if (cap_hior) {
1161         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1162     }
1163
1164     if (cap_one_reg) {
1165         int i;
1166
1167         /* We deliberately ignore errors here, for kernels which have
1168          * the ONE_REG calls, but don't support the specific
1169          * registers, there's a reasonable chance things will still
1170          * work, at least until we try to migrate. */
1171         for (i = 0; i < 1024; i++) {
1172             uint64_t id = env->spr_cb[i].one_reg_id;
1173
1174             if (id != 0) {
1175                 kvm_get_one_spr(cs, id, i);
1176             }
1177         }
1178
1179 #ifdef TARGET_PPC64
1180         if (msr_ts) {
1181             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1182                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1183             }
1184             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1185                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1186             }
1187             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1188             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1189             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1190             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1191             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1192             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1193             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1194             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1195             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1196             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1197         }
1198
1199         if (cap_papr) {
1200             if (kvm_get_vpa(cs) < 0) {
1201                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1202             }
1203         }
1204
1205         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1206 #endif
1207     }
1208
1209     return 0;
1210 }
1211
1212 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1213 {
1214     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1215
1216     if (irq != PPC_INTERRUPT_EXT) {
1217         return 0;
1218     }
1219
1220     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1221         return 0;
1222     }
1223
1224     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1225
1226     return 0;
1227 }
1228
1229 #if defined(TARGET_PPCEMB)
1230 #define PPC_INPUT_INT PPC40x_INPUT_INT
1231 #elif defined(TARGET_PPC64)
1232 #define PPC_INPUT_INT PPC970_INPUT_INT
1233 #else
1234 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1235 #endif
1236
1237 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1238 {
1239     PowerPCCPU *cpu = POWERPC_CPU(cs);
1240     CPUPPCState *env = &cpu->env;
1241     int r;
1242     unsigned irq;
1243
1244     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1245      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1246     if (!cap_interrupt_level &&
1247         run->ready_for_interrupt_injection &&
1248         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1249         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1250     {
1251         /* For now KVM disregards the 'irq' argument. However, in the
1252          * future KVM could cache it in-kernel to avoid a heavyweight exit
1253          * when reading the UIC.
1254          */
1255         irq = KVM_INTERRUPT_SET;
1256
1257         DPRINTF("injected interrupt %d\n", irq);
1258         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1259         if (r < 0) {
1260             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1261         }
1262
1263         /* Always wake up soon in case the interrupt was level based */
1264         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1265                        (get_ticks_per_sec() / 50));
1266     }
1267
1268     /* We don't know if there are more interrupts pending after this. However,
1269      * the guest will return to userspace in the course of handling this one
1270      * anyways, so we will get a chance to deliver the rest. */
1271 }
1272
1273 void kvm_arch_post_run(CPUState *cpu, struct kvm_run *run)
1274 {
1275 }
1276
1277 int kvm_arch_process_async_events(CPUState *cs)
1278 {
1279     return cs->halted;
1280 }
1281
1282 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1283 {
1284     CPUState *cs = CPU(cpu);
1285     CPUPPCState *env = &cpu->env;
1286
1287     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1288         cs->halted = 1;
1289         cs->exception_index = EXCP_HLT;
1290     }
1291
1292     return 0;
1293 }
1294
1295 /* map dcr access to existing qemu dcr emulation */
1296 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1297 {
1298     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1299         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1300
1301     return 0;
1302 }
1303
1304 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1305 {
1306     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1307         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1308
1309     return 0;
1310 }
1311
1312 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1313 {
1314     /* Mixed endian case is not handled */
1315     uint32_t sc = debug_inst_opcode;
1316
1317     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1318                             sizeof(sc), 0) ||
1319         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1320         return -EINVAL;
1321     }
1322
1323     return 0;
1324 }
1325
1326 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1327 {
1328     uint32_t sc;
1329
1330     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1331         sc != debug_inst_opcode ||
1332         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1333                             sizeof(sc), 1)) {
1334         return -EINVAL;
1335     }
1336
1337     return 0;
1338 }
1339
1340 static int find_hw_breakpoint(target_ulong addr, int type)
1341 {
1342     int n;
1343
1344     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1345            <= ARRAY_SIZE(hw_debug_points));
1346
1347     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1348         if (hw_debug_points[n].addr == addr &&
1349              hw_debug_points[n].type == type) {
1350             return n;
1351         }
1352     }
1353
1354     return -1;
1355 }
1356
1357 static int find_hw_watchpoint(target_ulong addr, int *flag)
1358 {
1359     int n;
1360
1361     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1362     if (n >= 0) {
1363         *flag = BP_MEM_ACCESS;
1364         return n;
1365     }
1366
1367     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1368     if (n >= 0) {
1369         *flag = BP_MEM_WRITE;
1370         return n;
1371     }
1372
1373     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1374     if (n >= 0) {
1375         *flag = BP_MEM_READ;
1376         return n;
1377     }
1378
1379     return -1;
1380 }
1381
1382 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1383                                   target_ulong len, int type)
1384 {
1385     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1386         return -ENOBUFS;
1387     }
1388
1389     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1390     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1391
1392     switch (type) {
1393     case GDB_BREAKPOINT_HW:
1394         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1395             return -ENOBUFS;
1396         }
1397
1398         if (find_hw_breakpoint(addr, type) >= 0) {
1399             return -EEXIST;
1400         }
1401
1402         nb_hw_breakpoint++;
1403         break;
1404
1405     case GDB_WATCHPOINT_WRITE:
1406     case GDB_WATCHPOINT_READ:
1407     case GDB_WATCHPOINT_ACCESS:
1408         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1409             return -ENOBUFS;
1410         }
1411
1412         if (find_hw_breakpoint(addr, type) >= 0) {
1413             return -EEXIST;
1414         }
1415
1416         nb_hw_watchpoint++;
1417         break;
1418
1419     default:
1420         return -ENOSYS;
1421     }
1422
1423     return 0;
1424 }
1425
1426 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1427                                   target_ulong len, int type)
1428 {
1429     int n;
1430
1431     n = find_hw_breakpoint(addr, type);
1432     if (n < 0) {
1433         return -ENOENT;
1434     }
1435
1436     switch (type) {
1437     case GDB_BREAKPOINT_HW:
1438         nb_hw_breakpoint--;
1439         break;
1440
1441     case GDB_WATCHPOINT_WRITE:
1442     case GDB_WATCHPOINT_READ:
1443     case GDB_WATCHPOINT_ACCESS:
1444         nb_hw_watchpoint--;
1445         break;
1446
1447     default:
1448         return -ENOSYS;
1449     }
1450     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1451
1452     return 0;
1453 }
1454
1455 void kvm_arch_remove_all_hw_breakpoints(void)
1456 {
1457     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1458 }
1459
1460 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1461 {
1462     int n;
1463
1464     /* Software Breakpoint updates */
1465     if (kvm_sw_breakpoints_active(cs)) {
1466         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1467     }
1468
1469     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1470            <= ARRAY_SIZE(hw_debug_points));
1471     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1472
1473     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1474         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1475         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1476         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1477             switch (hw_debug_points[n].type) {
1478             case GDB_BREAKPOINT_HW:
1479                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1480                 break;
1481             case GDB_WATCHPOINT_WRITE:
1482                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1483                 break;
1484             case GDB_WATCHPOINT_READ:
1485                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1486                 break;
1487             case GDB_WATCHPOINT_ACCESS:
1488                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1489                                         KVMPPC_DEBUG_WATCH_READ;
1490                 break;
1491             default:
1492                 cpu_abort(cs, "Unsupported breakpoint type\n");
1493             }
1494             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1495         }
1496     }
1497 }
1498
1499 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1500 {
1501     CPUState *cs = CPU(cpu);
1502     CPUPPCState *env = &cpu->env;
1503     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1504     int handle = 0;
1505     int n;
1506     int flag = 0;
1507
1508     if (cs->singlestep_enabled) {
1509         handle = 1;
1510     } else if (arch_info->status) {
1511         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1512             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1513                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1514                 if (n >= 0) {
1515                     handle = 1;
1516                 }
1517             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1518                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1519                 n = find_hw_watchpoint(arch_info->address,  &flag);
1520                 if (n >= 0) {
1521                     handle = 1;
1522                     cs->watchpoint_hit = &hw_watchpoint;
1523                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1524                     hw_watchpoint.flags = flag;
1525                 }
1526             }
1527         }
1528     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1529         handle = 1;
1530     } else {
1531         /* QEMU is not able to handle debug exception, so inject
1532          * program exception to guest;
1533          * Yes program exception NOT debug exception !!
1534          * When QEMU is using debug resources then debug exception must
1535          * be always set. To achieve this we set MSR_DE and also set
1536          * MSRP_DEP so guest cannot change MSR_DE.
1537          * When emulating debug resource for guest we want guest
1538          * to control MSR_DE (enable/disable debug interrupt on need).
1539          * Supporting both configurations are NOT possible.
1540          * So the result is that we cannot share debug resources
1541          * between QEMU and Guest on BOOKE architecture.
1542          * In the current design QEMU gets the priority over guest,
1543          * this means that if QEMU is using debug resources then guest
1544          * cannot use them;
1545          * For software breakpoint QEMU uses a privileged instruction;
1546          * So there cannot be any reason that we are here for guest
1547          * set debug exception, only possibility is guest executed a
1548          * privileged / illegal instruction and that's why we are
1549          * injecting a program interrupt.
1550          */
1551
1552         cpu_synchronize_state(cs);
1553         /* env->nip is PC, so increment this by 4 to use
1554          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1555          */
1556         env->nip += 4;
1557         cs->exception_index = POWERPC_EXCP_PROGRAM;
1558         env->error_code = POWERPC_EXCP_INVAL;
1559         ppc_cpu_do_interrupt(cs);
1560     }
1561
1562     return handle;
1563 }
1564
1565 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1566 {
1567     PowerPCCPU *cpu = POWERPC_CPU(cs);
1568     CPUPPCState *env = &cpu->env;
1569     int ret;
1570
1571     switch (run->exit_reason) {
1572     case KVM_EXIT_DCR:
1573         if (run->dcr.is_write) {
1574             DPRINTF("handle dcr write\n");
1575             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1576         } else {
1577             DPRINTF("handle dcr read\n");
1578             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1579         }
1580         break;
1581     case KVM_EXIT_HLT:
1582         DPRINTF("handle halt\n");
1583         ret = kvmppc_handle_halt(cpu);
1584         break;
1585 #if defined(TARGET_PPC64)
1586     case KVM_EXIT_PAPR_HCALL:
1587         DPRINTF("handle PAPR hypercall\n");
1588         run->papr_hcall.ret = spapr_hypercall(cpu,
1589                                               run->papr_hcall.nr,
1590                                               run->papr_hcall.args);
1591         ret = 0;
1592         break;
1593 #endif
1594     case KVM_EXIT_EPR:
1595         DPRINTF("handle epr\n");
1596         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1597         ret = 0;
1598         break;
1599     case KVM_EXIT_WATCHDOG:
1600         DPRINTF("handle watchdog expiry\n");
1601         watchdog_perform_action();
1602         ret = 0;
1603         break;
1604
1605     case KVM_EXIT_DEBUG:
1606         DPRINTF("handle debug exception\n");
1607         if (kvm_handle_debug(cpu, run)) {
1608             ret = EXCP_DEBUG;
1609             break;
1610         }
1611         /* re-enter, this exception was guest-internal */
1612         ret = 0;
1613         break;
1614
1615     default:
1616         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1617         ret = -1;
1618         break;
1619     }
1620
1621     return ret;
1622 }
1623
1624 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1625 {
1626     CPUState *cs = CPU(cpu);
1627     uint32_t bits = tsr_bits;
1628     struct kvm_one_reg reg = {
1629         .id = KVM_REG_PPC_OR_TSR,
1630         .addr = (uintptr_t) &bits,
1631     };
1632
1633     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1634 }
1635
1636 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1637 {
1638
1639     CPUState *cs = CPU(cpu);
1640     uint32_t bits = tsr_bits;
1641     struct kvm_one_reg reg = {
1642         .id = KVM_REG_PPC_CLEAR_TSR,
1643         .addr = (uintptr_t) &bits,
1644     };
1645
1646     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1647 }
1648
1649 int kvmppc_set_tcr(PowerPCCPU *cpu)
1650 {
1651     CPUState *cs = CPU(cpu);
1652     CPUPPCState *env = &cpu->env;
1653     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1654
1655     struct kvm_one_reg reg = {
1656         .id = KVM_REG_PPC_TCR,
1657         .addr = (uintptr_t) &tcr,
1658     };
1659
1660     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1661 }
1662
1663 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1664 {
1665     CPUState *cs = CPU(cpu);
1666     int ret;
1667
1668     if (!kvm_enabled()) {
1669         return -1;
1670     }
1671
1672     if (!cap_ppc_watchdog) {
1673         printf("warning: KVM does not support watchdog");
1674         return -1;
1675     }
1676
1677     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1678     if (ret < 0) {
1679         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1680                 __func__, strerror(-ret));
1681         return ret;
1682     }
1683
1684     return ret;
1685 }
1686
1687 static int read_cpuinfo(const char *field, char *value, int len)
1688 {
1689     FILE *f;
1690     int ret = -1;
1691     int field_len = strlen(field);
1692     char line[512];
1693
1694     f = fopen("/proc/cpuinfo", "r");
1695     if (!f) {
1696         return -1;
1697     }
1698
1699     do {
1700         if (!fgets(line, sizeof(line), f)) {
1701             break;
1702         }
1703         if (!strncmp(line, field, field_len)) {
1704             pstrcpy(value, len, line);
1705             ret = 0;
1706             break;
1707         }
1708     } while(*line);
1709
1710     fclose(f);
1711
1712     return ret;
1713 }
1714
1715 uint32_t kvmppc_get_tbfreq(void)
1716 {
1717     char line[512];
1718     char *ns;
1719     uint32_t retval = get_ticks_per_sec();
1720
1721     if (read_cpuinfo("timebase", line, sizeof(line))) {
1722         return retval;
1723     }
1724
1725     if (!(ns = strchr(line, ':'))) {
1726         return retval;
1727     }
1728
1729     ns++;
1730
1731     retval = atoi(ns);
1732     return retval;
1733 }
1734
1735 bool kvmppc_get_host_serial(char **value)
1736 {
1737     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1738                                NULL);
1739 }
1740
1741 bool kvmppc_get_host_model(char **value)
1742 {
1743     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1744 }
1745
1746 /* Try to find a device tree node for a CPU with clock-frequency property */
1747 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1748 {
1749     struct dirent *dirp;
1750     DIR *dp;
1751
1752     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1753         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1754         return -1;
1755     }
1756
1757     buf[0] = '\0';
1758     while ((dirp = readdir(dp)) != NULL) {
1759         FILE *f;
1760         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1761                  dirp->d_name);
1762         f = fopen(buf, "r");
1763         if (f) {
1764             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1765             fclose(f);
1766             break;
1767         }
1768         buf[0] = '\0';
1769     }
1770     closedir(dp);
1771     if (buf[0] == '\0') {
1772         printf("Unknown host!\n");
1773         return -1;
1774     }
1775
1776     return 0;
1777 }
1778
1779 /* Read a CPU node property from the host device tree that's a single
1780  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1781  * (can't find or open the property, or doesn't understand the
1782  * format) */
1783 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1784 {
1785     char buf[PATH_MAX], *tmp;
1786     union {
1787         uint32_t v32;
1788         uint64_t v64;
1789     } u;
1790     FILE *f;
1791     int len;
1792
1793     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1794         return -1;
1795     }
1796
1797     tmp = g_strdup_printf("%s/%s", buf, propname);
1798
1799     f = fopen(tmp, "rb");
1800     g_free(tmp);
1801     if (!f) {
1802         return -1;
1803     }
1804
1805     len = fread(&u, 1, sizeof(u), f);
1806     fclose(f);
1807     switch (len) {
1808     case 4:
1809         /* property is a 32-bit quantity */
1810         return be32_to_cpu(u.v32);
1811     case 8:
1812         return be64_to_cpu(u.v64);
1813     }
1814
1815     return 0;
1816 }
1817
1818 uint64_t kvmppc_get_clockfreq(void)
1819 {
1820     return kvmppc_read_int_cpu_dt("clock-frequency");
1821 }
1822
1823 uint32_t kvmppc_get_vmx(void)
1824 {
1825     return kvmppc_read_int_cpu_dt("ibm,vmx");
1826 }
1827
1828 uint32_t kvmppc_get_dfp(void)
1829 {
1830     return kvmppc_read_int_cpu_dt("ibm,dfp");
1831 }
1832
1833 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1834  {
1835      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1836      CPUState *cs = CPU(cpu);
1837
1838     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1839         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1840         return 0;
1841     }
1842
1843     return 1;
1844 }
1845
1846 int kvmppc_get_hasidle(CPUPPCState *env)
1847 {
1848     struct kvm_ppc_pvinfo pvinfo;
1849
1850     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1851         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1852         return 1;
1853     }
1854
1855     return 0;
1856 }
1857
1858 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1859 {
1860     uint32_t *hc = (uint32_t*)buf;
1861     struct kvm_ppc_pvinfo pvinfo;
1862
1863     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1864         memcpy(buf, pvinfo.hcall, buf_len);
1865         return 0;
1866     }
1867
1868     /*
1869      * Fallback to always fail hypercalls regardless of endianness:
1870      *
1871      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1872      *     li r3, -1
1873      *     b .+8       (becomes nop in wrong endian)
1874      *     bswap32(li r3, -1)
1875      */
1876
1877     hc[0] = cpu_to_be32(0x08000048);
1878     hc[1] = cpu_to_be32(0x3860ffff);
1879     hc[2] = cpu_to_be32(0x48000008);
1880     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1881
1882     return 0;
1883 }
1884
1885 void kvmppc_set_papr(PowerPCCPU *cpu)
1886 {
1887     CPUState *cs = CPU(cpu);
1888     int ret;
1889
1890     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
1891     if (ret) {
1892         cpu_abort(cs, "This KVM version does not support PAPR\n");
1893     }
1894
1895     /* Update the capability flag so we sync the right information
1896      * with kvm */
1897     cap_papr = 1;
1898 }
1899
1900 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
1901 {
1902     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
1903 }
1904
1905 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
1906 {
1907     CPUState *cs = CPU(cpu);
1908     int ret;
1909
1910     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
1911     if (ret && mpic_proxy) {
1912         cpu_abort(cs, "This KVM version does not support EPR\n");
1913     }
1914 }
1915
1916 int kvmppc_smt_threads(void)
1917 {
1918     return cap_ppc_smt ? cap_ppc_smt : 1;
1919 }
1920
1921 #ifdef TARGET_PPC64
1922 off_t kvmppc_alloc_rma(void **rma)
1923 {
1924     off_t size;
1925     int fd;
1926     struct kvm_allocate_rma ret;
1927
1928     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
1929      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
1930      *                      not necessary on this hardware
1931      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
1932      *
1933      * FIXME: We should allow the user to force contiguous RMA
1934      * allocation in the cap_ppc_rma==1 case.
1935      */
1936     if (cap_ppc_rma < 2) {
1937         return 0;
1938     }
1939
1940     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
1941     if (fd < 0) {
1942         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
1943                 strerror(errno));
1944         return -1;
1945     }
1946
1947     size = MIN(ret.rma_size, 256ul << 20);
1948
1949     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
1950     if (*rma == MAP_FAILED) {
1951         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
1952         return -1;
1953     };
1954
1955     return size;
1956 }
1957
1958 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
1959 {
1960     struct kvm_ppc_smmu_info info;
1961     long rampagesize, best_page_shift;
1962     int i;
1963
1964     if (cap_ppc_rma >= 2) {
1965         return current_size;
1966     }
1967
1968     /* Find the largest hardware supported page size that's less than
1969      * or equal to the (logical) backing page size of guest RAM */
1970     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
1971     rampagesize = getrampagesize();
1972     best_page_shift = 0;
1973
1974     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
1975         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
1976
1977         if (!sps->page_shift) {
1978             continue;
1979         }
1980
1981         if ((sps->page_shift > best_page_shift)
1982             && ((1UL << sps->page_shift) <= rampagesize)) {
1983             best_page_shift = sps->page_shift;
1984         }
1985     }
1986
1987     return MIN(current_size,
1988                1ULL << (best_page_shift + hash_shift - 7));
1989 }
1990 #endif
1991
1992 bool kvmppc_spapr_use_multitce(void)
1993 {
1994     return cap_spapr_multitce;
1995 }
1996
1997 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
1998                               bool vfio_accel)
1999 {
2000     struct kvm_create_spapr_tce args = {
2001         .liobn = liobn,
2002         .window_size = window_size,
2003     };
2004     long len;
2005     int fd;
2006     void *table;
2007
2008     /* Must set fd to -1 so we don't try to munmap when called for
2009      * destroying the table, which the upper layers -will- do
2010      */
2011     *pfd = -1;
2012     if (!cap_spapr_tce || (vfio_accel && !cap_spapr_vfio)) {
2013         return NULL;
2014     }
2015
2016     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2017     if (fd < 0) {
2018         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2019                 liobn);
2020         return NULL;
2021     }
2022
2023     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2024     /* FIXME: round this up to page size */
2025
2026     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2027     if (table == MAP_FAILED) {
2028         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2029                 liobn);
2030         close(fd);
2031         return NULL;
2032     }
2033
2034     *pfd = fd;
2035     return table;
2036 }
2037
2038 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2039 {
2040     long len;
2041
2042     if (fd < 0) {
2043         return -1;
2044     }
2045
2046     len = nb_table * sizeof(uint64_t);
2047     if ((munmap(table, len) < 0) ||
2048         (close(fd) < 0)) {
2049         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2050                 strerror(errno));
2051         /* Leak the table */
2052     }
2053
2054     return 0;
2055 }
2056
2057 int kvmppc_reset_htab(int shift_hint)
2058 {
2059     uint32_t shift = shift_hint;
2060
2061     if (!kvm_enabled()) {
2062         /* Full emulation, tell caller to allocate htab itself */
2063         return 0;
2064     }
2065     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2066         int ret;
2067         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2068         if (ret == -ENOTTY) {
2069             /* At least some versions of PR KVM advertise the
2070              * capability, but don't implement the ioctl().  Oops.
2071              * Return 0 so that we allocate the htab in qemu, as is
2072              * correct for PR. */
2073             return 0;
2074         } else if (ret < 0) {
2075             return ret;
2076         }
2077         return shift;
2078     }
2079
2080     /* We have a kernel that predates the htab reset calls.  For PR
2081      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2082      * this era, it has allocated a 16MB fixed size hash table
2083      * already.  Kernels of this era have the GET_PVINFO capability
2084      * only on PR, so we use this hack to determine the right
2085      * answer */
2086     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2087         /* PR - tell caller to allocate htab */
2088         return 0;
2089     } else {
2090         /* HV - assume 16MB kernel allocated htab */
2091         return 24;
2092     }
2093 }
2094
2095 static inline uint32_t mfpvr(void)
2096 {
2097     uint32_t pvr;
2098
2099     asm ("mfpvr %0"
2100          : "=r"(pvr));
2101     return pvr;
2102 }
2103
2104 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2105 {
2106     if (on) {
2107         *word |= flags;
2108     } else {
2109         *word &= ~flags;
2110     }
2111 }
2112
2113 static void kvmppc_host_cpu_initfn(Object *obj)
2114 {
2115     assert(kvm_enabled());
2116 }
2117
2118 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2119 {
2120     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2121     uint32_t vmx = kvmppc_get_vmx();
2122     uint32_t dfp = kvmppc_get_dfp();
2123     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2124     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2125
2126     /* Now fix up the class with information we can query from the host */
2127     pcc->pvr = mfpvr();
2128
2129     if (vmx != -1) {
2130         /* Only override when we know what the host supports */
2131         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2132         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2133     }
2134     if (dfp != -1) {
2135         /* Only override when we know what the host supports */
2136         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2137     }
2138
2139     if (dcache_size != -1) {
2140         pcc->l1_dcache_size = dcache_size;
2141     }
2142
2143     if (icache_size != -1) {
2144         pcc->l1_icache_size = icache_size;
2145     }
2146 }
2147
2148 bool kvmppc_has_cap_epr(void)
2149 {
2150     return cap_epr;
2151 }
2152
2153 bool kvmppc_has_cap_htab_fd(void)
2154 {
2155     return cap_htab_fd;
2156 }
2157
2158 bool kvmppc_has_cap_fixup_hcalls(void)
2159 {
2160     return cap_fixup_hcalls;
2161 }
2162
2163 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2164 {
2165     ObjectClass *oc = OBJECT_CLASS(pcc);
2166
2167     while (oc && !object_class_is_abstract(oc)) {
2168         oc = object_class_get_parent(oc);
2169     }
2170     assert(oc);
2171
2172     return POWERPC_CPU_CLASS(oc);
2173 }
2174
2175 static int kvm_ppc_register_host_cpu_type(void)
2176 {
2177     TypeInfo type_info = {
2178         .name = TYPE_HOST_POWERPC_CPU,
2179         .instance_init = kvmppc_host_cpu_initfn,
2180         .class_init = kvmppc_host_cpu_class_init,
2181     };
2182     uint32_t host_pvr = mfpvr();
2183     PowerPCCPUClass *pvr_pcc;
2184     DeviceClass *dc;
2185
2186     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2187     if (pvr_pcc == NULL) {
2188         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2189     }
2190     if (pvr_pcc == NULL) {
2191         return -1;
2192     }
2193     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2194     type_register(&type_info);
2195
2196     /* Register generic family CPU class for a family */
2197     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2198     dc = DEVICE_CLASS(pvr_pcc);
2199     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2200     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2201     type_register(&type_info);
2202
2203     return 0;
2204 }
2205
2206 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2207 {
2208     struct kvm_rtas_token_args args = {
2209         .token = token,
2210     };
2211
2212     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2213         return -ENOENT;
2214     }
2215
2216     strncpy(args.name, function, sizeof(args.name));
2217
2218     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2219 }
2220
2221 int kvmppc_get_htab_fd(bool write)
2222 {
2223     struct kvm_get_htab_fd s = {
2224         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2225         .start_index = 0,
2226     };
2227
2228     if (!cap_htab_fd) {
2229         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2230         return -1;
2231     }
2232
2233     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2234 }
2235
2236 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2237 {
2238     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2239     uint8_t buf[bufsize];
2240     ssize_t rc;
2241
2242     do {
2243         rc = read(fd, buf, bufsize);
2244         if (rc < 0) {
2245             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2246                     strerror(errno));
2247             return rc;
2248         } else if (rc) {
2249             /* Kernel already retuns data in BE format for the file */
2250             qemu_put_buffer(f, buf, rc);
2251         }
2252     } while ((rc != 0)
2253              && ((max_ns < 0)
2254                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2255
2256     return (rc == 0) ? 1 : 0;
2257 }
2258
2259 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2260                            uint16_t n_valid, uint16_t n_invalid)
2261 {
2262     struct kvm_get_htab_header *buf;
2263     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2264     ssize_t rc;
2265
2266     buf = alloca(chunksize);
2267     /* This is KVM on ppc, so this is all big-endian */
2268     buf->index = index;
2269     buf->n_valid = n_valid;
2270     buf->n_invalid = n_invalid;
2271
2272     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2273
2274     rc = write(fd, buf, chunksize);
2275     if (rc < 0) {
2276         fprintf(stderr, "Error writing KVM hash table: %s\n",
2277                 strerror(errno));
2278         return rc;
2279     }
2280     if (rc != chunksize) {
2281         /* We should never get a short write on a single chunk */
2282         fprintf(stderr, "Short write, restoring KVM hash table\n");
2283         return -1;
2284     }
2285     return 0;
2286 }
2287
2288 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2289 {
2290     return true;
2291 }
2292
2293 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2294 {
2295     return 1;
2296 }
2297
2298 int kvm_arch_on_sigbus(int code, void *addr)
2299 {
2300     return 1;
2301 }
2302
2303 void kvm_arch_init_irq_routing(KVMState *s)
2304 {
2305 }
2306
2307 struct kvm_get_htab_buf {
2308     struct kvm_get_htab_header header;
2309     /*
2310      * We require one extra byte for read
2311      */
2312     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2313 };
2314
2315 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2316 {
2317     int htab_fd;
2318     struct kvm_get_htab_fd ghf;
2319     struct kvm_get_htab_buf  *hpte_buf;
2320
2321     ghf.flags = 0;
2322     ghf.start_index = pte_index;
2323     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2324     if (htab_fd < 0) {
2325         goto error_out;
2326     }
2327
2328     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2329     /*
2330      * Read the hpte group
2331      */
2332     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2333         goto out_close;
2334     }
2335
2336     close(htab_fd);
2337     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2338
2339 out_close:
2340     g_free(hpte_buf);
2341     close(htab_fd);
2342 error_out:
2343     return 0;
2344 }
2345
2346 void kvmppc_hash64_free_pteg(uint64_t token)
2347 {
2348     struct kvm_get_htab_buf *htab_buf;
2349
2350     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2351                             hpte);
2352     g_free(htab_buf);
2353     return;
2354 }
2355
2356 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2357                              target_ulong pte0, target_ulong pte1)
2358 {
2359     int htab_fd;
2360     struct kvm_get_htab_fd ghf;
2361     struct kvm_get_htab_buf hpte_buf;
2362
2363     ghf.flags = 0;
2364     ghf.start_index = 0;     /* Ignored */
2365     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2366     if (htab_fd < 0) {
2367         goto error_out;
2368     }
2369
2370     hpte_buf.header.n_valid = 1;
2371     hpte_buf.header.n_invalid = 0;
2372     hpte_buf.header.index = pte_index;
2373     hpte_buf.hpte[0] = pte0;
2374     hpte_buf.hpte[1] = pte1;
2375     /*
2376      * Write the hpte entry.
2377      * CAUTION: write() has the warn_unused_result attribute. Hence we
2378      * need to check the return value, even though we do nothing.
2379      */
2380     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2381         goto out_close;
2382     }
2383
2384 out_close:
2385     close(htab_fd);
2386     return;
2387
2388 error_out:
2389     return;
2390 }