target-ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/kvm.h"
  30 #include "sysemu/numa.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/ppc.h"
  40 #include "sysemu/watchdog.h"
  41 #include "trace.h"
  42 #include "exec/gdbstub.h"
  43 #include "exec/memattrs.h"
  44 #include "sysemu/hostmem.h"
  45 #include "qemu/cutils.h"
  46 #if defined(TARGET_PPC64)
  47 #include "hw/ppc/spapr_cpu_core.h"
  48 #endif
  49
  50 //#define DEBUG_KVM
  51
  52 #ifdef DEBUG_KVM
  53 #define DPRINTF(fmt, ...) \
  54     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  55 #else
  56 #define DPRINTF(fmt, ...) \
  57     do { } while (0)
  58 #endif
  59
  60 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  61
  62 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  63     KVM_CAP_LAST_INFO
  64 };
  65
  66 static int cap_interrupt_unset = false;
  67 static int cap_interrupt_level = false;
  68 static int cap_segstate;
  69 static int cap_booke_sregs;
  70 static int cap_ppc_smt;
  71 static int cap_ppc_rma;
  72 static int cap_spapr_tce;
  73 static int cap_spapr_multitce;
  74 static int cap_spapr_vfio;
  75 static int cap_hior;
  76 static int cap_one_reg;
  77 static int cap_epr;
  78 static int cap_ppc_watchdog;
  79 static int cap_papr;
  80 static int cap_htab_fd;
  81 static int cap_fixup_hcalls;
  82
  83 static uint32_t debug_inst_opcode;
  84
  85 /* XXX We have a race condition where we actually have a level triggered
  86  *     interrupt, but the infrastructure can't expose that yet, so the guest
  87  *     takes but ignores it, goes to sleep and never gets notified that there's
  88  *     still an interrupt pending.
  89  *
  90  *     As a quick workaround, let's just wake up again 20 ms after we injected
  91  *     an interrupt. That way we can assure that we're always reinjecting
  92  *     interrupts in case the guest swallowed them.
  93  */
  94 static QEMUTimer *idle_timer;
  95
  96 static void kvm_kick_cpu(void *opaque)
  97 {
  98     PowerPCCPU *cpu = opaque;
  99
 100     qemu_cpu_kick(CPU(cpu));
 101 }
 102
 103 static int kvm_ppc_register_host_cpu_type(void);
 104
 105 int kvm_arch_init(MachineState *ms, KVMState *s)
 106 {
 107     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 108     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 109     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 110     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 111     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 112     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 113     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 114     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 115     cap_spapr_vfio = false;
 116     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 117     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 118     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 119     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 120     /* Note: we don't set cap_papr here, because this capability is
 121      * only activated after this by kvmppc_set_papr() */
 122     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 123     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 124
 125     if (!cap_interrupt_level) {
 126         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 127                         "VM to stall at times!\n");
 128     }
 129
 130     kvm_ppc_register_host_cpu_type();
 131
 132     return 0;
 133 }
 134
 135 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 136 {
 137     CPUPPCState *cenv = &cpu->env;
 138     CPUState *cs = CPU(cpu);
 139     struct kvm_sregs sregs;
 140     int ret;
 141
 142     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 143         /* What we're really trying to say is "if we're on BookE, we use
 144            the native PVR for now". This is the only sane way to check
 145            it though, so we potentially confuse users that they can run
 146            BookE guests on BookS. Let's hope nobody dares enough :) */
 147         return 0;
 148     } else {
 149         if (!cap_segstate) {
 150             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 151             return -ENOSYS;
 152         }
 153     }
 154
 155     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 156     if (ret) {
 157         return ret;
 158     }
 159
 160     sregs.pvr = cenv->spr[SPR_PVR];
 161     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 162 }
 163
 164 /* Set up a shared TLB array with KVM */
 165 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 166 {
 167     CPUPPCState *env = &cpu->env;
 168     CPUState *cs = CPU(cpu);
 169     struct kvm_book3e_206_tlb_params params = {};
 170     struct kvm_config_tlb cfg = {};
 171     unsigned int entries = 0;
 172     int ret, i;
 173
 174     if (!kvm_enabled() ||
 175         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 176         return 0;
 177     }
 178
 179     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 180
 181     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 182         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 183         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 184         entries += params.tlb_sizes[i];
 185     }
 186
 187     assert(entries == env->nb_tlb);
 188     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 189
 190     env->tlb_dirty = true;
 191
 192     cfg.array = (uintptr_t)env->tlb.tlbm;
 193     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 194     cfg.params = (uintptr_t)&params;
 195     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 196
 197     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 198     if (ret < 0) {
 199         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 200                 __func__, strerror(-ret));
 201         return ret;
 202     }
 203
 204     env->kvm_sw_tlb = true;
 205     return 0;
 206 }
 207
 208
 209 #if defined(TARGET_PPC64)
 210 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 211                                        struct kvm_ppc_smmu_info *info)
 212 {
 213     CPUPPCState *env = &cpu->env;
 214     CPUState *cs = CPU(cpu);
 215
 216     memset(info, 0, sizeof(*info));
 217
 218     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 219      * need to "guess" what the supported page sizes are.
 220      *
 221      * For that to work we make a few assumptions:
 222      *
 223      * - If KVM_CAP_PPC_GET_PVINFO is supported we are running "PR"
 224      *   KVM which only supports 4K and 16M pages, but supports them
 225      *   regardless of the backing store characteritics. We also don't
 226      *   support 1T segments.
 227      *
 228      *   This is safe as if HV KVM ever supports that capability or PR
 229      *   KVM grows supports for more page/segment sizes, those versions
 230      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 231      *   will not hit this fallback
 232      *
 233      * - Else we are running HV KVM. This means we only support page
 234      *   sizes that fit in the backing store. Additionally we only
 235      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 236      *   P7 encodings for the SLB and hash table. Here too, we assume
 237      *   support for any newer processor will mean a kernel that
 238      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 239      *   this fallback.
 240      */
 241     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
 242         /* No flags */
 243         info->flags = 0;
 244         info->slb_size = 64;
 245
 246         /* Standard 4k base page size segment */
 247         info->sps[0].page_shift = 12;
 248         info->sps[0].slb_enc = 0;
 249         info->sps[0].enc[0].page_shift = 12;
 250         info->sps[0].enc[0].pte_enc = 0;
 251
 252         /* Standard 16M large page size segment */
 253         info->sps[1].page_shift = 24;
 254         info->sps[1].slb_enc = SLB_VSID_L;
 255         info->sps[1].enc[0].page_shift = 24;
 256         info->sps[1].enc[0].pte_enc = 0;
 257     } else {
 258         int i = 0;
 259
 260         /* HV KVM has backing store size restrictions */
 261         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 262
 263         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 264             info->flags |= KVM_PPC_1T_SEGMENTS;
 265         }
 266
 267         if (env->mmu_model == POWERPC_MMU_2_06 ||
 268             env->mmu_model == POWERPC_MMU_2_07) {
 269             info->slb_size = 32;
 270         } else {
 271             info->slb_size = 64;
 272         }
 273
 274         /* Standard 4k base page size segment */
 275         info->sps[i].page_shift = 12;
 276         info->sps[i].slb_enc = 0;
 277         info->sps[i].enc[0].page_shift = 12;
 278         info->sps[i].enc[0].pte_enc = 0;
 279         i++;
 280
 281         /* 64K on MMU 2.06 and later */
 282         if (env->mmu_model == POWERPC_MMU_2_06 ||
 283             env->mmu_model == POWERPC_MMU_2_07) {
 284             info->sps[i].page_shift = 16;
 285             info->sps[i].slb_enc = 0x110;
 286             info->sps[i].enc[0].page_shift = 16;
 287             info->sps[i].enc[0].pte_enc = 1;
 288             i++;
 289         }
 290
 291         /* Standard 16M large page size segment */
 292         info->sps[i].page_shift = 24;
 293         info->sps[i].slb_enc = SLB_VSID_L;
 294         info->sps[i].enc[0].page_shift = 24;
 295         info->sps[i].enc[0].pte_enc = 0;
 296     }
 297 }
 298
 299 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 300 {
 301     CPUState *cs = CPU(cpu);
 302     int ret;
 303
 304     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 305         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 306         if (ret == 0) {
 307             return;
 308         }
 309     }
 310
 311     kvm_get_fallback_smmu_info(cpu, info);
 312 }
 313
 314 static long gethugepagesize(const char *mem_path)
 315 {
 316     struct statfs fs;
 317     int ret;
 318
 319     do {
 320         ret = statfs(mem_path, &fs);
 321     } while (ret != 0 && errno == EINTR);
 322
 323     if (ret != 0) {
 324         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 325                 strerror(errno));
 326         exit(1);
 327     }
 328
 329 #define HUGETLBFS_MAGIC       0x958458f6
 330
 331     if (fs.f_type != HUGETLBFS_MAGIC) {
 332         /* Explicit mempath, but it's ordinary pages */
 333         return getpagesize();
 334     }
 335
 336     /* It's hugepage, return the huge page size */
 337     return fs.f_bsize;
 338 }
 339
 340 /*
 341  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 342  * may or may not name the same files / on the same filesystem now as
 343  * when we actually open and map them.  Iterate over the file
 344  * descriptors instead, and use qemu_fd_getpagesize().
 345  */
 346 static int find_max_supported_pagesize(Object *obj, void *opaque)
 347 {
 348     char *mem_path;
 349     long *hpsize_min = opaque;
 350
 351     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 352         mem_path = object_property_get_str(obj, "mem-path", NULL);
 353         if (mem_path) {
 354             long hpsize = gethugepagesize(mem_path);
 355             if (hpsize < *hpsize_min) {
 356                 *hpsize_min = hpsize;
 357             }
 358         } else {
 359             *hpsize_min = getpagesize();
 360         }
 361     }
 362
 363     return 0;
 364 }
 365
 366 static long getrampagesize(void)
 367 {
 368     long hpsize = LONG_MAX;
 369     Object *memdev_root;
 370
 371     if (mem_path) {
 372         return gethugepagesize(mem_path);
 373     }
 374
 375     /* it's possible we have memory-backend objects with
 376      * hugepage-backed RAM. these may get mapped into system
 377      * address space via -numa parameters or memory hotplug
 378      * hooks. we want to take these into account, but we
 379      * also want to make sure these supported hugepage
 380      * sizes are applicable across the entire range of memory
 381      * we may boot from, so we take the min across all
 382      * backends, and assume normal pages in cases where a
 383      * backend isn't backed by hugepages.
 384      */
 385     memdev_root = object_resolve_path("/objects", NULL);
 386     if (!memdev_root) {
 387         return getpagesize();
 388     }
 389
 390     object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 391
 392     if (hpsize == LONG_MAX) {
 393         return getpagesize();
 394     }
 395
 396     if (nb_numa_nodes == 0 && hpsize > getpagesize()) {
 397         /* No NUMA nodes and normal RAM without -mem-path ==> no huge pages! */
 398         static bool warned;
 399         if (!warned) {
 400             error_report("Huge page support disabled (n/a for main memory).");
 401             warned = true;
 402         }
 403         return getpagesize();
 404     }
 405
 406     return hpsize;
 407 }
 408
 409 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 410 {
 411     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 412         return true;
 413     }
 414
 415     return (1ul << shift) <= rampgsize;
 416 }
 417
 418 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 419 {
 420     static struct kvm_ppc_smmu_info smmu_info;
 421     static bool has_smmu_info;
 422     CPUPPCState *env = &cpu->env;
 423     long rampagesize;
 424     int iq, ik, jq, jk;
 425
 426     /* We only handle page sizes for 64-bit server guests for now */
 427     if (!(env->mmu_model & POWERPC_MMU_64)) {
 428         return;
 429     }
 430
 431     /* Collect MMU info from kernel if not already */
 432     if (!has_smmu_info) {
 433         kvm_get_smmu_info(cpu, &smmu_info);
 434         has_smmu_info = true;
 435     }
 436
 437     rampagesize = getrampagesize();
 438
 439     /* Convert to QEMU form */
 440     memset(&env->sps, 0, sizeof(env->sps));
 441
 442     /* If we have HV KVM, we need to forbid CI large pages if our
 443      * host page size is smaller than 64K.
 444      */
 445     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 446         env->ci_large_pages = getpagesize() >= 0x10000;
 447     }
 448
 449     /*
 450      * XXX This loop should be an entry wide AND of the capabilities that
 451      *     the selected CPU has with the capabilities that KVM supports.
 452      */
 453     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 454         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 455         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 456
 457         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 458                                  ksps->page_shift)) {
 459             continue;
 460         }
 461         qsps->page_shift = ksps->page_shift;
 462         qsps->slb_enc = ksps->slb_enc;
 463         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 464             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 465                                      ksps->enc[jk].page_shift)) {
 466                 continue;
 467             }
 468             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 469             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 470             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 471                 break;
 472             }
 473         }
 474         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 475             break;
 476         }
 477     }
 478     env->slb_nr = smmu_info.slb_size;
 479     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 480         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 481     }
 482 }
 483 #else /* defined (TARGET_PPC64) */
 484
 485 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 486 {
 487 }
 488
 489 #endif /* !defined (TARGET_PPC64) */
 490
 491 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 492 {
 493     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 494 }
 495
 496 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 497  * book3s supports only 1 watchpoint, so array size
 498  * of 4 is sufficient for now.
 499  */
 500 #define MAX_HW_BKPTS 4
 501
 502 static struct HWBreakpoint {
 503     target_ulong addr;
 504     int type;
 505 } hw_debug_points[MAX_HW_BKPTS];
 506
 507 static CPUWatchpoint hw_watchpoint;
 508
 509 /* Default there is no breakpoint and watchpoint supported */
 510 static int max_hw_breakpoint;
 511 static int max_hw_watchpoint;
 512 static int nb_hw_breakpoint;
 513 static int nb_hw_watchpoint;
 514
 515 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 516 {
 517     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 518         max_hw_breakpoint = 2;
 519         max_hw_watchpoint = 2;
 520     }
 521
 522     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 523         fprintf(stderr, "Error initializing h/w breakpoints\n");
 524         return;
 525     }
 526 }
 527
 528 int kvm_arch_init_vcpu(CPUState *cs)
 529 {
 530     PowerPCCPU *cpu = POWERPC_CPU(cs);
 531     CPUPPCState *cenv = &cpu->env;
 532     int ret;
 533
 534     /* Gather server mmu info from KVM and update the CPU state */
 535     kvm_fixup_page_sizes(cpu);
 536
 537     /* Synchronize sregs with kvm */
 538     ret = kvm_arch_sync_sregs(cpu);
 539     if (ret) {
 540         if (ret == -EINVAL) {
 541             error_report("Register sync failed... If you're using kvm-hv.ko,"
 542                          " only \"-cpu host\" is possible");
 543         }
 544         return ret;
 545     }
 546
 547     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 548
 549     /* Some targets support access to KVM's guest TLB. */
 550     switch (cenv->mmu_model) {
 551     case POWERPC_MMU_BOOKE206:
 552         ret = kvm_booke206_tlb_init(cpu);
 553         break;
 554     default:
 555         break;
 556     }
 557
 558     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 559     kvmppc_hw_debug_points_init(cenv);
 560
 561     return ret;
 562 }
 563
 564 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 565 {
 566     CPUPPCState *env = &cpu->env;
 567     CPUState *cs = CPU(cpu);
 568     struct kvm_dirty_tlb dirty_tlb;
 569     unsigned char *bitmap;
 570     int ret;
 571
 572     if (!env->kvm_sw_tlb) {
 573         return;
 574     }
 575
 576     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 577     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 578
 579     dirty_tlb.bitmap = (uintptr_t)bitmap;
 580     dirty_tlb.num_dirty = env->nb_tlb;
 581
 582     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 583     if (ret) {
 584         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 585                 __func__, strerror(-ret));
 586     }
 587
 588     g_free(bitmap);
 589 }
 590
 591 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 592 {
 593     PowerPCCPU *cpu = POWERPC_CPU(cs);
 594     CPUPPCState *env = &cpu->env;
 595     union {
 596         uint32_t u32;
 597         uint64_t u64;
 598     } val;
 599     struct kvm_one_reg reg = {
 600         .id = id,
 601         .addr = (uintptr_t) &val,
 602     };
 603     int ret;
 604
 605     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 606     if (ret != 0) {
 607         trace_kvm_failed_spr_get(spr, strerror(errno));
 608     } else {
 609         switch (id & KVM_REG_SIZE_MASK) {
 610         case KVM_REG_SIZE_U32:
 611             env->spr[spr] = val.u32;
 612             break;
 613
 614         case KVM_REG_SIZE_U64:
 615             env->spr[spr] = val.u64;
 616             break;
 617
 618         default:
 619             /* Don't handle this size yet */
 620             abort();
 621         }
 622     }
 623 }
 624
 625 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 626 {
 627     PowerPCCPU *cpu = POWERPC_CPU(cs);
 628     CPUPPCState *env = &cpu->env;
 629     union {
 630         uint32_t u32;
 631         uint64_t u64;
 632     } val;
 633     struct kvm_one_reg reg = {
 634         .id = id,
 635         .addr = (uintptr_t) &val,
 636     };
 637     int ret;
 638
 639     switch (id & KVM_REG_SIZE_MASK) {
 640     case KVM_REG_SIZE_U32:
 641         val.u32 = env->spr[spr];
 642         break;
 643
 644     case KVM_REG_SIZE_U64:
 645         val.u64 = env->spr[spr];
 646         break;
 647
 648     default:
 649         /* Don't handle this size yet */
 650         abort();
 651     }
 652
 653     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 654     if (ret != 0) {
 655         trace_kvm_failed_spr_set(spr, strerror(errno));
 656     }
 657 }
 658
 659 static int kvm_put_fp(CPUState *cs)
 660 {
 661     PowerPCCPU *cpu = POWERPC_CPU(cs);
 662     CPUPPCState *env = &cpu->env;
 663     struct kvm_one_reg reg;
 664     int i;
 665     int ret;
 666
 667     if (env->insns_flags & PPC_FLOAT) {
 668         uint64_t fpscr = env->fpscr;
 669         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 670
 671         reg.id = KVM_REG_PPC_FPSCR;
 672         reg.addr = (uintptr_t)&fpscr;
 673         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 674         if (ret < 0) {
 675             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 676             return ret;
 677         }
 678
 679         for (i = 0; i < 32; i++) {
 680             uint64_t vsr[2];
 681
 682 #ifdef HOST_WORDS_BIGENDIAN
 683             vsr[0] = float64_val(env->fpr[i]);
 684             vsr[1] = env->vsr[i];
 685 #else
 686             vsr[0] = env->vsr[i];
 687             vsr[1] = float64_val(env->fpr[i]);
 688 #endif
 689             reg.addr = (uintptr_t) &vsr;
 690             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 691
 692             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 693             if (ret < 0) {
 694                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 695                         i, strerror(errno));
 696                 return ret;
 697             }
 698         }
 699     }
 700
 701     if (env->insns_flags & PPC_ALTIVEC) {
 702         reg.id = KVM_REG_PPC_VSCR;
 703         reg.addr = (uintptr_t)&env->vscr;
 704         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 705         if (ret < 0) {
 706             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 707             return ret;
 708         }
 709
 710         for (i = 0; i < 32; i++) {
 711             reg.id = KVM_REG_PPC_VR(i);
 712             reg.addr = (uintptr_t)&env->avr[i];
 713             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 714             if (ret < 0) {
 715                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 716                 return ret;
 717             }
 718         }
 719     }
 720
 721     return 0;
 722 }
 723
 724 static int kvm_get_fp(CPUState *cs)
 725 {
 726     PowerPCCPU *cpu = POWERPC_CPU(cs);
 727     CPUPPCState *env = &cpu->env;
 728     struct kvm_one_reg reg;
 729     int i;
 730     int ret;
 731
 732     if (env->insns_flags & PPC_FLOAT) {
 733         uint64_t fpscr;
 734         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 735
 736         reg.id = KVM_REG_PPC_FPSCR;
 737         reg.addr = (uintptr_t)&fpscr;
 738         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 739         if (ret < 0) {
 740             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 741             return ret;
 742         } else {
 743             env->fpscr = fpscr;
 744         }
 745
 746         for (i = 0; i < 32; i++) {
 747             uint64_t vsr[2];
 748
 749             reg.addr = (uintptr_t) &vsr;
 750             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 751
 752             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 753             if (ret < 0) {
 754                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 755                         vsx ? "VSR" : "FPR", i, strerror(errno));
 756                 return ret;
 757             } else {
 758 #ifdef HOST_WORDS_BIGENDIAN
 759                 env->fpr[i] = vsr[0];
 760                 if (vsx) {
 761                     env->vsr[i] = vsr[1];
 762                 }
 763 #else
 764                 env->fpr[i] = vsr[1];
 765                 if (vsx) {
 766                     env->vsr[i] = vsr[0];
 767                 }
 768 #endif
 769             }
 770         }
 771     }
 772
 773     if (env->insns_flags & PPC_ALTIVEC) {
 774         reg.id = KVM_REG_PPC_VSCR;
 775         reg.addr = (uintptr_t)&env->vscr;
 776         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 777         if (ret < 0) {
 778             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 779             return ret;
 780         }
 781
 782         for (i = 0; i < 32; i++) {
 783             reg.id = KVM_REG_PPC_VR(i);
 784             reg.addr = (uintptr_t)&env->avr[i];
 785             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 786             if (ret < 0) {
 787                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 788                         i, strerror(errno));
 789                 return ret;
 790             }
 791         }
 792     }
 793
 794     return 0;
 795 }
 796
 797 #if defined(TARGET_PPC64)
 798 static int kvm_get_vpa(CPUState *cs)
 799 {
 800     PowerPCCPU *cpu = POWERPC_CPU(cs);
 801     CPUPPCState *env = &cpu->env;
 802     struct kvm_one_reg reg;
 803     int ret;
 804
 805     reg.id = KVM_REG_PPC_VPA_ADDR;
 806     reg.addr = (uintptr_t)&env->vpa_addr;
 807     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 808     if (ret < 0) {
 809         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 810         return ret;
 811     }
 812
 813     assert((uintptr_t)&env->slb_shadow_size
 814            == ((uintptr_t)&env->slb_shadow_addr + 8));
 815     reg.id = KVM_REG_PPC_VPA_SLB;
 816     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 817     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 818     if (ret < 0) {
 819         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 820                 strerror(errno));
 821         return ret;
 822     }
 823
 824     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 825     reg.id = KVM_REG_PPC_VPA_DTL;
 826     reg.addr = (uintptr_t)&env->dtl_addr;
 827     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 828     if (ret < 0) {
 829         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 830                 strerror(errno));
 831         return ret;
 832     }
 833
 834     return 0;
 835 }
 836
 837 static int kvm_put_vpa(CPUState *cs)
 838 {
 839     PowerPCCPU *cpu = POWERPC_CPU(cs);
 840     CPUPPCState *env = &cpu->env;
 841     struct kvm_one_reg reg;
 842     int ret;
 843
 844     /* SLB shadow or DTL can't be registered unless a master VPA is
 845      * registered.  That means when restoring state, if a VPA *is*
 846      * registered, we need to set that up first.  If not, we need to
 847      * deregister the others before deregistering the master VPA */
 848     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 849
 850     if (env->vpa_addr) {
 851         reg.id = KVM_REG_PPC_VPA_ADDR;
 852         reg.addr = (uintptr_t)&env->vpa_addr;
 853         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 854         if (ret < 0) {
 855             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 856             return ret;
 857         }
 858     }
 859
 860     assert((uintptr_t)&env->slb_shadow_size
 861            == ((uintptr_t)&env->slb_shadow_addr + 8));
 862     reg.id = KVM_REG_PPC_VPA_SLB;
 863     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 864     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 865     if (ret < 0) {
 866         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 867         return ret;
 868     }
 869
 870     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 871     reg.id = KVM_REG_PPC_VPA_DTL;
 872     reg.addr = (uintptr_t)&env->dtl_addr;
 873     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 874     if (ret < 0) {
 875         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 876                 strerror(errno));
 877         return ret;
 878     }
 879
 880     if (!env->vpa_addr) {
 881         reg.id = KVM_REG_PPC_VPA_ADDR;
 882         reg.addr = (uintptr_t)&env->vpa_addr;
 883         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 884         if (ret < 0) {
 885             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 886             return ret;
 887         }
 888     }
 889
 890     return 0;
 891 }
 892 #endif /* TARGET_PPC64 */
 893
 894 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 895 {
 896     CPUPPCState *env = &cpu->env;
 897     struct kvm_sregs sregs;
 898     int i;
 899
 900     sregs.pvr = env->spr[SPR_PVR];
 901
 902     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 903
 904     /* Sync SLB */
 905 #ifdef TARGET_PPC64
 906     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 907         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 908         if (env->slb[i].esid & SLB_ESID_V) {
 909             sregs.u.s.ppc64.slb[i].slbe |= i;
 910         }
 911         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 912     }
 913 #endif
 914
 915     /* Sync SRs */
 916     for (i = 0; i < 16; i++) {
 917         sregs.u.s.ppc32.sr[i] = env->sr[i];
 918     }
 919
 920     /* Sync BATs */
 921     for (i = 0; i < 8; i++) {
 922         /* Beware. We have to swap upper and lower bits here */
 923         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 924             | env->DBAT[1][i];
 925         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 926             | env->IBAT[1][i];
 927     }
 928
 929     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 930 }
 931
 932 int kvm_arch_put_registers(CPUState *cs, int level)
 933 {
 934     PowerPCCPU *cpu = POWERPC_CPU(cs);
 935     CPUPPCState *env = &cpu->env;
 936     struct kvm_regs regs;
 937     int ret;
 938     int i;
 939
 940     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 941     if (ret < 0) {
 942         return ret;
 943     }
 944
 945     regs.ctr = env->ctr;
 946     regs.lr  = env->lr;
 947     regs.xer = cpu_read_xer(env);
 948     regs.msr = env->msr;
 949     regs.pc = env->nip;
 950
 951     regs.srr0 = env->spr[SPR_SRR0];
 952     regs.srr1 = env->spr[SPR_SRR1];
 953
 954     regs.sprg0 = env->spr[SPR_SPRG0];
 955     regs.sprg1 = env->spr[SPR_SPRG1];
 956     regs.sprg2 = env->spr[SPR_SPRG2];
 957     regs.sprg3 = env->spr[SPR_SPRG3];
 958     regs.sprg4 = env->spr[SPR_SPRG4];
 959     regs.sprg5 = env->spr[SPR_SPRG5];
 960     regs.sprg6 = env->spr[SPR_SPRG6];
 961     regs.sprg7 = env->spr[SPR_SPRG7];
 962
 963     regs.pid = env->spr[SPR_BOOKE_PID];
 964
 965     for (i = 0;i < 32; i++)
 966         regs.gpr[i] = env->gpr[i];
 967
 968     regs.cr = 0;
 969     for (i = 0; i < 8; i++) {
 970         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 971     }
 972
 973     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 974     if (ret < 0)
 975         return ret;
 976
 977     kvm_put_fp(cs);
 978
 979     if (env->tlb_dirty) {
 980         kvm_sw_tlb_put(cpu);
 981         env->tlb_dirty = false;
 982     }
 983
 984     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 985         ret = kvmppc_put_books_sregs(cpu);
 986         if (ret < 0) {
 987             return ret;
 988         }
 989     }
 990
 991     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 992         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 993     }
 994
 995     if (cap_one_reg) {
 996         int i;
 997
 998         /* We deliberately ignore errors here, for kernels which have
 999          * the ONE_REG calls, but don't support the specific
1000          * registers, there's a reasonable chance things will still
1001          * work, at least until we try to migrate. */
1002         for (i = 0; i < 1024; i++) {
1003             uint64_t id = env->spr_cb[i].one_reg_id;
1004
1005             if (id != 0) {
1006                 kvm_put_one_spr(cs, id, i);
1007             }
1008         }
1009
1010 #ifdef TARGET_PPC64
1011         if (msr_ts) {
1012             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1013                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1014             }
1015             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1016                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1017             }
1018             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1019             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1020             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1021             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1022             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1023             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1024             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1025             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1026             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1027             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1028         }
1029
1030         if (cap_papr) {
1031             if (kvm_put_vpa(cs) < 0) {
1032                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1033             }
1034         }
1035
1036         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1037 #endif /* TARGET_PPC64 */
1038     }
1039
1040     return ret;
1041 }
1042
1043 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1044 {
1045      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1046 }
1047
1048 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1049 {
1050     CPUPPCState *env = &cpu->env;
1051     struct kvm_sregs sregs;
1052     int ret;
1053
1054     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1055     if (ret < 0) {
1056         return ret;
1057     }
1058
1059     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1060         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1061         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1062         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1063         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1064         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1065         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1066         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1067         env->spr[SPR_DECR] = sregs.u.e.dec;
1068         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1069         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1070         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1071     }
1072
1073     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1074         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1075         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1076         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1077         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1078         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1079     }
1080
1081     if (sregs.u.e.features & KVM_SREGS_E_64) {
1082         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1083     }
1084
1085     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1086         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1087     }
1088
1089     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1090         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1091         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1092         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1093         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1094         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1095         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1096         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1097         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1098         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1099         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1100         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1101         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1102         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1103         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1104         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1105         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1106         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1107         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1108         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1109         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1110         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1111         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1112         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1113         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1114         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1115         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1116         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1117         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1118         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1119         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1120         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1121         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1122
1123         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1124             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1125             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1126             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1127             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1128             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1129             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1130         }
1131
1132         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1133             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1134             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1135         }
1136
1137         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1138             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1139             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1140             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1141             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1142         }
1143     }
1144
1145     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1146         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1147         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1148         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1149         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1150         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1151         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1152         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1153         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1154         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1155         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1156     }
1157
1158     if (sregs.u.e.features & KVM_SREGS_EXP) {
1159         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1160     }
1161
1162     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1163         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1164         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1165     }
1166
1167     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1168         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1169         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1170         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1171
1172         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1173             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1174             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1175         }
1176     }
1177
1178     return 0;
1179 }
1180
1181 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1182 {
1183     CPUPPCState *env = &cpu->env;
1184     struct kvm_sregs sregs;
1185     int ret;
1186     int i;
1187
1188     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1189     if (ret < 0) {
1190         return ret;
1191     }
1192
1193     if (!env->external_htab) {
1194         ppc_store_sdr1(env, sregs.u.s.sdr1);
1195     }
1196
1197     /* Sync SLB */
1198 #ifdef TARGET_PPC64
1199     /*
1200      * The packed SLB array we get from KVM_GET_SREGS only contains
1201      * information about valid entries. So we flush our internal copy
1202      * to get rid of stale ones, then put all valid SLB entries back
1203      * in.
1204      */
1205     memset(env->slb, 0, sizeof(env->slb));
1206     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1207         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1208         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1209         /*
1210          * Only restore valid entries
1211          */
1212         if (rb & SLB_ESID_V) {
1213             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1214         }
1215     }
1216 #endif
1217
1218     /* Sync SRs */
1219     for (i = 0; i < 16; i++) {
1220         env->sr[i] = sregs.u.s.ppc32.sr[i];
1221     }
1222
1223     /* Sync BATs */
1224     for (i = 0; i < 8; i++) {
1225         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1226         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1227         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1228         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1229     }
1230
1231     return 0;
1232 }
1233
1234 int kvm_arch_get_registers(CPUState *cs)
1235 {
1236     PowerPCCPU *cpu = POWERPC_CPU(cs);
1237     CPUPPCState *env = &cpu->env;
1238     struct kvm_regs regs;
1239     uint32_t cr;
1240     int i, ret;
1241
1242     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1243     if (ret < 0)
1244         return ret;
1245
1246     cr = regs.cr;
1247     for (i = 7; i >= 0; i--) {
1248         env->crf[i] = cr & 15;
1249         cr >>= 4;
1250     }
1251
1252     env->ctr = regs.ctr;
1253     env->lr = regs.lr;
1254     cpu_write_xer(env, regs.xer);
1255     env->msr = regs.msr;
1256     env->nip = regs.pc;
1257
1258     env->spr[SPR_SRR0] = regs.srr0;
1259     env->spr[SPR_SRR1] = regs.srr1;
1260
1261     env->spr[SPR_SPRG0] = regs.sprg0;
1262     env->spr[SPR_SPRG1] = regs.sprg1;
1263     env->spr[SPR_SPRG2] = regs.sprg2;
1264     env->spr[SPR_SPRG3] = regs.sprg3;
1265     env->spr[SPR_SPRG4] = regs.sprg4;
1266     env->spr[SPR_SPRG5] = regs.sprg5;
1267     env->spr[SPR_SPRG6] = regs.sprg6;
1268     env->spr[SPR_SPRG7] = regs.sprg7;
1269
1270     env->spr[SPR_BOOKE_PID] = regs.pid;
1271
1272     for (i = 0;i < 32; i++)
1273         env->gpr[i] = regs.gpr[i];
1274
1275     kvm_get_fp(cs);
1276
1277     if (cap_booke_sregs) {
1278         ret = kvmppc_get_booke_sregs(cpu);
1279         if (ret < 0) {
1280             return ret;
1281         }
1282     }
1283
1284     if (cap_segstate) {
1285         ret = kvmppc_get_books_sregs(cpu);
1286         if (ret < 0) {
1287             return ret;
1288         }
1289     }
1290
1291     if (cap_hior) {
1292         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1293     }
1294
1295     if (cap_one_reg) {
1296         int i;
1297
1298         /* We deliberately ignore errors here, for kernels which have
1299          * the ONE_REG calls, but don't support the specific
1300          * registers, there's a reasonable chance things will still
1301          * work, at least until we try to migrate. */
1302         for (i = 0; i < 1024; i++) {
1303             uint64_t id = env->spr_cb[i].one_reg_id;
1304
1305             if (id != 0) {
1306                 kvm_get_one_spr(cs, id, i);
1307             }
1308         }
1309
1310 #ifdef TARGET_PPC64
1311         if (msr_ts) {
1312             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1313                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1314             }
1315             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1316                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1317             }
1318             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1319             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1320             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1321             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1322             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1323             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1324             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1325             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1326             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1327             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1328         }
1329
1330         if (cap_papr) {
1331             if (kvm_get_vpa(cs) < 0) {
1332                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1333             }
1334         }
1335
1336         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1337 #endif
1338     }
1339
1340     return 0;
1341 }
1342
1343 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1344 {
1345     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1346
1347     if (irq != PPC_INTERRUPT_EXT) {
1348         return 0;
1349     }
1350
1351     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1352         return 0;
1353     }
1354
1355     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1356
1357     return 0;
1358 }
1359
1360 #if defined(TARGET_PPCEMB)
1361 #define PPC_INPUT_INT PPC40x_INPUT_INT
1362 #elif defined(TARGET_PPC64)
1363 #define PPC_INPUT_INT PPC970_INPUT_INT
1364 #else
1365 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1366 #endif
1367
1368 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1369 {
1370     PowerPCCPU *cpu = POWERPC_CPU(cs);
1371     CPUPPCState *env = &cpu->env;
1372     int r;
1373     unsigned irq;
1374
1375     qemu_mutex_lock_iothread();
1376
1377     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1378      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1379     if (!cap_interrupt_level &&
1380         run->ready_for_interrupt_injection &&
1381         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1382         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1383     {
1384         /* For now KVM disregards the 'irq' argument. However, in the
1385          * future KVM could cache it in-kernel to avoid a heavyweight exit
1386          * when reading the UIC.
1387          */
1388         irq = KVM_INTERRUPT_SET;
1389
1390         DPRINTF("injected interrupt %d\n", irq);
1391         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1392         if (r < 0) {
1393             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1394         }
1395
1396         /* Always wake up soon in case the interrupt was level based */
1397         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1398                        (NANOSECONDS_PER_SECOND / 50));
1399     }
1400
1401     /* We don't know if there are more interrupts pending after this. However,
1402      * the guest will return to userspace in the course of handling this one
1403      * anyways, so we will get a chance to deliver the rest. */
1404
1405     qemu_mutex_unlock_iothread();
1406 }
1407
1408 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1409 {
1410     return MEMTXATTRS_UNSPECIFIED;
1411 }
1412
1413 int kvm_arch_process_async_events(CPUState *cs)
1414 {
1415     return cs->halted;
1416 }
1417
1418 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1419 {
1420     CPUState *cs = CPU(cpu);
1421     CPUPPCState *env = &cpu->env;
1422
1423     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1424         cs->halted = 1;
1425         cs->exception_index = EXCP_HLT;
1426     }
1427
1428     return 0;
1429 }
1430
1431 /* map dcr access to existing qemu dcr emulation */
1432 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1433 {
1434     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1435         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1436
1437     return 0;
1438 }
1439
1440 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1441 {
1442     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1443         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1444
1445     return 0;
1446 }
1447
1448 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1449 {
1450     /* Mixed endian case is not handled */
1451     uint32_t sc = debug_inst_opcode;
1452
1453     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1454                             sizeof(sc), 0) ||
1455         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1456         return -EINVAL;
1457     }
1458
1459     return 0;
1460 }
1461
1462 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1463 {
1464     uint32_t sc;
1465
1466     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1467         sc != debug_inst_opcode ||
1468         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1469                             sizeof(sc), 1)) {
1470         return -EINVAL;
1471     }
1472
1473     return 0;
1474 }
1475
1476 static int find_hw_breakpoint(target_ulong addr, int type)
1477 {
1478     int n;
1479
1480     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1481            <= ARRAY_SIZE(hw_debug_points));
1482
1483     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1484         if (hw_debug_points[n].addr == addr &&
1485              hw_debug_points[n].type == type) {
1486             return n;
1487         }
1488     }
1489
1490     return -1;
1491 }
1492
1493 static int find_hw_watchpoint(target_ulong addr, int *flag)
1494 {
1495     int n;
1496
1497     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1498     if (n >= 0) {
1499         *flag = BP_MEM_ACCESS;
1500         return n;
1501     }
1502
1503     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1504     if (n >= 0) {
1505         *flag = BP_MEM_WRITE;
1506         return n;
1507     }
1508
1509     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1510     if (n >= 0) {
1511         *flag = BP_MEM_READ;
1512         return n;
1513     }
1514
1515     return -1;
1516 }
1517
1518 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1519                                   target_ulong len, int type)
1520 {
1521     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1522         return -ENOBUFS;
1523     }
1524
1525     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1526     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1527
1528     switch (type) {
1529     case GDB_BREAKPOINT_HW:
1530         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1531             return -ENOBUFS;
1532         }
1533
1534         if (find_hw_breakpoint(addr, type) >= 0) {
1535             return -EEXIST;
1536         }
1537
1538         nb_hw_breakpoint++;
1539         break;
1540
1541     case GDB_WATCHPOINT_WRITE:
1542     case GDB_WATCHPOINT_READ:
1543     case GDB_WATCHPOINT_ACCESS:
1544         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1545             return -ENOBUFS;
1546         }
1547
1548         if (find_hw_breakpoint(addr, type) >= 0) {
1549             return -EEXIST;
1550         }
1551
1552         nb_hw_watchpoint++;
1553         break;
1554
1555     default:
1556         return -ENOSYS;
1557     }
1558
1559     return 0;
1560 }
1561
1562 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1563                                   target_ulong len, int type)
1564 {
1565     int n;
1566
1567     n = find_hw_breakpoint(addr, type);
1568     if (n < 0) {
1569         return -ENOENT;
1570     }
1571
1572     switch (type) {
1573     case GDB_BREAKPOINT_HW:
1574         nb_hw_breakpoint--;
1575         break;
1576
1577     case GDB_WATCHPOINT_WRITE:
1578     case GDB_WATCHPOINT_READ:
1579     case GDB_WATCHPOINT_ACCESS:
1580         nb_hw_watchpoint--;
1581         break;
1582
1583     default:
1584         return -ENOSYS;
1585     }
1586     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1587
1588     return 0;
1589 }
1590
1591 void kvm_arch_remove_all_hw_breakpoints(void)
1592 {
1593     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1594 }
1595
1596 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1597 {
1598     int n;
1599
1600     /* Software Breakpoint updates */
1601     if (kvm_sw_breakpoints_active(cs)) {
1602         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1603     }
1604
1605     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1606            <= ARRAY_SIZE(hw_debug_points));
1607     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1608
1609     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1610         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1611         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1612         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1613             switch (hw_debug_points[n].type) {
1614             case GDB_BREAKPOINT_HW:
1615                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1616                 break;
1617             case GDB_WATCHPOINT_WRITE:
1618                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1619                 break;
1620             case GDB_WATCHPOINT_READ:
1621                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1622                 break;
1623             case GDB_WATCHPOINT_ACCESS:
1624                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1625                                         KVMPPC_DEBUG_WATCH_READ;
1626                 break;
1627             default:
1628                 cpu_abort(cs, "Unsupported breakpoint type\n");
1629             }
1630             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1631         }
1632     }
1633 }
1634
1635 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1636 {
1637     CPUState *cs = CPU(cpu);
1638     CPUPPCState *env = &cpu->env;
1639     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1640     int handle = 0;
1641     int n;
1642     int flag = 0;
1643
1644     if (cs->singlestep_enabled) {
1645         handle = 1;
1646     } else if (arch_info->status) {
1647         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1648             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1649                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1650                 if (n >= 0) {
1651                     handle = 1;
1652                 }
1653             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1654                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1655                 n = find_hw_watchpoint(arch_info->address,  &flag);
1656                 if (n >= 0) {
1657                     handle = 1;
1658                     cs->watchpoint_hit = &hw_watchpoint;
1659                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1660                     hw_watchpoint.flags = flag;
1661                 }
1662             }
1663         }
1664     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1665         handle = 1;
1666     } else {
1667         /* QEMU is not able to handle debug exception, so inject
1668          * program exception to guest;
1669          * Yes program exception NOT debug exception !!
1670          * When QEMU is using debug resources then debug exception must
1671          * be always set. To achieve this we set MSR_DE and also set
1672          * MSRP_DEP so guest cannot change MSR_DE.
1673          * When emulating debug resource for guest we want guest
1674          * to control MSR_DE (enable/disable debug interrupt on need).
1675          * Supporting both configurations are NOT possible.
1676          * So the result is that we cannot share debug resources
1677          * between QEMU and Guest on BOOKE architecture.
1678          * In the current design QEMU gets the priority over guest,
1679          * this means that if QEMU is using debug resources then guest
1680          * cannot use them;
1681          * For software breakpoint QEMU uses a privileged instruction;
1682          * So there cannot be any reason that we are here for guest
1683          * set debug exception, only possibility is guest executed a
1684          * privileged / illegal instruction and that's why we are
1685          * injecting a program interrupt.
1686          */
1687
1688         cpu_synchronize_state(cs);
1689         /* env->nip is PC, so increment this by 4 to use
1690          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1691          */
1692         env->nip += 4;
1693         cs->exception_index = POWERPC_EXCP_PROGRAM;
1694         env->error_code = POWERPC_EXCP_INVAL;
1695         ppc_cpu_do_interrupt(cs);
1696     }
1697
1698     return handle;
1699 }
1700
1701 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1702 {
1703     PowerPCCPU *cpu = POWERPC_CPU(cs);
1704     CPUPPCState *env = &cpu->env;
1705     int ret;
1706
1707     qemu_mutex_lock_iothread();
1708
1709     switch (run->exit_reason) {
1710     case KVM_EXIT_DCR:
1711         if (run->dcr.is_write) {
1712             DPRINTF("handle dcr write\n");
1713             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1714         } else {
1715             DPRINTF("handle dcr read\n");
1716             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1717         }
1718         break;
1719     case KVM_EXIT_HLT:
1720         DPRINTF("handle halt\n");
1721         ret = kvmppc_handle_halt(cpu);
1722         break;
1723 #if defined(TARGET_PPC64)
1724     case KVM_EXIT_PAPR_HCALL:
1725         DPRINTF("handle PAPR hypercall\n");
1726         run->papr_hcall.ret = spapr_hypercall(cpu,
1727                                               run->papr_hcall.nr,
1728                                               run->papr_hcall.args);
1729         ret = 0;
1730         break;
1731 #endif
1732     case KVM_EXIT_EPR:
1733         DPRINTF("handle epr\n");
1734         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1735         ret = 0;
1736         break;
1737     case KVM_EXIT_WATCHDOG:
1738         DPRINTF("handle watchdog expiry\n");
1739         watchdog_perform_action();
1740         ret = 0;
1741         break;
1742
1743     case KVM_EXIT_DEBUG:
1744         DPRINTF("handle debug exception\n");
1745         if (kvm_handle_debug(cpu, run)) {
1746             ret = EXCP_DEBUG;
1747             break;
1748         }
1749         /* re-enter, this exception was guest-internal */
1750         ret = 0;
1751         break;
1752
1753     default:
1754         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1755         ret = -1;
1756         break;
1757     }
1758
1759     qemu_mutex_unlock_iothread();
1760     return ret;
1761 }
1762
1763 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1764 {
1765     CPUState *cs = CPU(cpu);
1766     uint32_t bits = tsr_bits;
1767     struct kvm_one_reg reg = {
1768         .id = KVM_REG_PPC_OR_TSR,
1769         .addr = (uintptr_t) &bits,
1770     };
1771
1772     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1773 }
1774
1775 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1776 {
1777
1778     CPUState *cs = CPU(cpu);
1779     uint32_t bits = tsr_bits;
1780     struct kvm_one_reg reg = {
1781         .id = KVM_REG_PPC_CLEAR_TSR,
1782         .addr = (uintptr_t) &bits,
1783     };
1784
1785     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1786 }
1787
1788 int kvmppc_set_tcr(PowerPCCPU *cpu)
1789 {
1790     CPUState *cs = CPU(cpu);
1791     CPUPPCState *env = &cpu->env;
1792     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1793
1794     struct kvm_one_reg reg = {
1795         .id = KVM_REG_PPC_TCR,
1796         .addr = (uintptr_t) &tcr,
1797     };
1798
1799     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1800 }
1801
1802 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1803 {
1804     CPUState *cs = CPU(cpu);
1805     int ret;
1806
1807     if (!kvm_enabled()) {
1808         return -1;
1809     }
1810
1811     if (!cap_ppc_watchdog) {
1812         printf("warning: KVM does not support watchdog");
1813         return -1;
1814     }
1815
1816     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1817     if (ret < 0) {
1818         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1819                 __func__, strerror(-ret));
1820         return ret;
1821     }
1822
1823     return ret;
1824 }
1825
1826 static int read_cpuinfo(const char *field, char *value, int len)
1827 {
1828     FILE *f;
1829     int ret = -1;
1830     int field_len = strlen(field);
1831     char line[512];
1832
1833     f = fopen("/proc/cpuinfo", "r");
1834     if (!f) {
1835         return -1;
1836     }
1837
1838     do {
1839         if (!fgets(line, sizeof(line), f)) {
1840             break;
1841         }
1842         if (!strncmp(line, field, field_len)) {
1843             pstrcpy(value, len, line);
1844             ret = 0;
1845             break;
1846         }
1847     } while(*line);
1848
1849     fclose(f);
1850
1851     return ret;
1852 }
1853
1854 uint32_t kvmppc_get_tbfreq(void)
1855 {
1856     char line[512];
1857     char *ns;
1858     uint32_t retval = NANOSECONDS_PER_SECOND;
1859
1860     if (read_cpuinfo("timebase", line, sizeof(line))) {
1861         return retval;
1862     }
1863
1864     if (!(ns = strchr(line, ':'))) {
1865         return retval;
1866     }
1867
1868     ns++;
1869
1870     return atoi(ns);
1871 }
1872
1873 bool kvmppc_get_host_serial(char **value)
1874 {
1875     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1876                                NULL);
1877 }
1878
1879 bool kvmppc_get_host_model(char **value)
1880 {
1881     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1882 }
1883
1884 /* Try to find a device tree node for a CPU with clock-frequency property */
1885 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1886 {
1887     struct dirent *dirp;
1888     DIR *dp;
1889
1890     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1891         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1892         return -1;
1893     }
1894
1895     buf[0] = '\0';
1896     while ((dirp = readdir(dp)) != NULL) {
1897         FILE *f;
1898         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1899                  dirp->d_name);
1900         f = fopen(buf, "r");
1901         if (f) {
1902             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1903             fclose(f);
1904             break;
1905         }
1906         buf[0] = '\0';
1907     }
1908     closedir(dp);
1909     if (buf[0] == '\0') {
1910         printf("Unknown host!\n");
1911         return -1;
1912     }
1913
1914     return 0;
1915 }
1916
1917 static uint64_t kvmppc_read_int_dt(const char *filename)
1918 {
1919     union {
1920         uint32_t v32;
1921         uint64_t v64;
1922     } u;
1923     FILE *f;
1924     int len;
1925
1926     f = fopen(filename, "rb");
1927     if (!f) {
1928         return -1;
1929     }
1930
1931     len = fread(&u, 1, sizeof(u), f);
1932     fclose(f);
1933     switch (len) {
1934     case 4:
1935         /* property is a 32-bit quantity */
1936         return be32_to_cpu(u.v32);
1937     case 8:
1938         return be64_to_cpu(u.v64);
1939     }
1940
1941     return 0;
1942 }
1943
1944 /* Read a CPU node property from the host device tree that's a single
1945  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1946  * (can't find or open the property, or doesn't understand the
1947  * format) */
1948 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1949 {
1950     char buf[PATH_MAX], *tmp;
1951     uint64_t val;
1952
1953     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1954         return -1;
1955     }
1956
1957     tmp = g_strdup_printf("%s/%s", buf, propname);
1958     val = kvmppc_read_int_dt(tmp);
1959     g_free(tmp);
1960
1961     return val;
1962 }
1963
1964 uint64_t kvmppc_get_clockfreq(void)
1965 {
1966     return kvmppc_read_int_cpu_dt("clock-frequency");
1967 }
1968
1969 uint32_t kvmppc_get_vmx(void)
1970 {
1971     return kvmppc_read_int_cpu_dt("ibm,vmx");
1972 }
1973
1974 uint32_t kvmppc_get_dfp(void)
1975 {
1976     return kvmppc_read_int_cpu_dt("ibm,dfp");
1977 }
1978
1979 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1980  {
1981      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1982      CPUState *cs = CPU(cpu);
1983
1984     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1985         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1986         return 0;
1987     }
1988
1989     return 1;
1990 }
1991
1992 int kvmppc_get_hasidle(CPUPPCState *env)
1993 {
1994     struct kvm_ppc_pvinfo pvinfo;
1995
1996     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1997         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1998         return 1;
1999     }
2000
2001     return 0;
2002 }
2003
2004 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2005 {
2006     uint32_t *hc = (uint32_t*)buf;
2007     struct kvm_ppc_pvinfo pvinfo;
2008
2009     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2010         memcpy(buf, pvinfo.hcall, buf_len);
2011         return 0;
2012     }
2013
2014     /*
2015      * Fallback to always fail hypercalls regardless of endianness:
2016      *
2017      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2018      *     li r3, -1
2019      *     b .+8       (becomes nop in wrong endian)
2020      *     bswap32(li r3, -1)
2021      */
2022
2023     hc[0] = cpu_to_be32(0x08000048);
2024     hc[1] = cpu_to_be32(0x3860ffff);
2025     hc[2] = cpu_to_be32(0x48000008);
2026     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2027
2028     return 1;
2029 }
2030
2031 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2032 {
2033     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2034 }
2035
2036 void kvmppc_enable_logical_ci_hcalls(void)
2037 {
2038     /*
2039      * FIXME: it would be nice if we could detect the cases where
2040      * we're using a device which requires the in kernel
2041      * implementation of these hcalls, but the kernel lacks them and
2042      * produce a warning.
2043      */
2044     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2045     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2046 }
2047
2048 void kvmppc_enable_set_mode_hcall(void)
2049 {
2050     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2051 }
2052
2053 void kvmppc_set_papr(PowerPCCPU *cpu)
2054 {
2055     CPUState *cs = CPU(cpu);
2056     int ret;
2057
2058     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2059     if (ret) {
2060         error_report("This vCPU type or KVM version does not support PAPR");
2061         exit(1);
2062     }
2063
2064     /* Update the capability flag so we sync the right information
2065      * with kvm */
2066     cap_papr = 1;
2067 }
2068
2069 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2070 {
2071     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2072 }
2073
2074 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2075 {
2076     CPUState *cs = CPU(cpu);
2077     int ret;
2078
2079     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2080     if (ret && mpic_proxy) {
2081         error_report("This KVM version does not support EPR");
2082         exit(1);
2083     }
2084 }
2085
2086 int kvmppc_smt_threads(void)
2087 {
2088     return cap_ppc_smt ? cap_ppc_smt : 1;
2089 }
2090
2091 #ifdef TARGET_PPC64
2092 off_t kvmppc_alloc_rma(void **rma)
2093 {
2094     off_t size;
2095     int fd;
2096     struct kvm_allocate_rma ret;
2097
2098     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2099      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2100      *                      not necessary on this hardware
2101      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2102      *
2103      * FIXME: We should allow the user to force contiguous RMA
2104      * allocation in the cap_ppc_rma==1 case.
2105      */
2106     if (cap_ppc_rma < 2) {
2107         return 0;
2108     }
2109
2110     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2111     if (fd < 0) {
2112         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2113                 strerror(errno));
2114         return -1;
2115     }
2116
2117     size = MIN(ret.rma_size, 256ul << 20);
2118
2119     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2120     if (*rma == MAP_FAILED) {
2121         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2122         return -1;
2123     };
2124
2125     return size;
2126 }
2127
2128 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2129 {
2130     struct kvm_ppc_smmu_info info;
2131     long rampagesize, best_page_shift;
2132     int i;
2133
2134     if (cap_ppc_rma >= 2) {
2135         return current_size;
2136     }
2137
2138     /* Find the largest hardware supported page size that's less than
2139      * or equal to the (logical) backing page size of guest RAM */
2140     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2141     rampagesize = getrampagesize();
2142     best_page_shift = 0;
2143
2144     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2145         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2146
2147         if (!sps->page_shift) {
2148             continue;
2149         }
2150
2151         if ((sps->page_shift > best_page_shift)
2152             && ((1UL << sps->page_shift) <= rampagesize)) {
2153             best_page_shift = sps->page_shift;
2154         }
2155     }
2156
2157     return MIN(current_size,
2158                1ULL << (best_page_shift + hash_shift - 7));
2159 }
2160 #endif
2161
2162 bool kvmppc_spapr_use_multitce(void)
2163 {
2164     return cap_spapr_multitce;
2165 }
2166
2167 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2168                               bool need_vfio)
2169 {
2170     struct kvm_create_spapr_tce args = {
2171         .liobn = liobn,
2172         .window_size = window_size,
2173     };
2174     long len;
2175     int fd;
2176     void *table;
2177
2178     /* Must set fd to -1 so we don't try to munmap when called for
2179      * destroying the table, which the upper layers -will- do
2180      */
2181     *pfd = -1;
2182     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2183         return NULL;
2184     }
2185
2186     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2187     if (fd < 0) {
2188         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2189                 liobn);
2190         return NULL;
2191     }
2192
2193     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2194     /* FIXME: round this up to page size */
2195
2196     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2197     if (table == MAP_FAILED) {
2198         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2199                 liobn);
2200         close(fd);
2201         return NULL;
2202     }
2203
2204     *pfd = fd;
2205     return table;
2206 }
2207
2208 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2209 {
2210     long len;
2211
2212     if (fd < 0) {
2213         return -1;
2214     }
2215
2216     len = nb_table * sizeof(uint64_t);
2217     if ((munmap(table, len) < 0) ||
2218         (close(fd) < 0)) {
2219         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2220                 strerror(errno));
2221         /* Leak the table */
2222     }
2223
2224     return 0;
2225 }
2226
2227 int kvmppc_reset_htab(int shift_hint)
2228 {
2229     uint32_t shift = shift_hint;
2230
2231     if (!kvm_enabled()) {
2232         /* Full emulation, tell caller to allocate htab itself */
2233         return 0;
2234     }
2235     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2236         int ret;
2237         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2238         if (ret == -ENOTTY) {
2239             /* At least some versions of PR KVM advertise the
2240              * capability, but don't implement the ioctl().  Oops.
2241              * Return 0 so that we allocate the htab in qemu, as is
2242              * correct for PR. */
2243             return 0;
2244         } else if (ret < 0) {
2245             return ret;
2246         }
2247         return shift;
2248     }
2249
2250     /* We have a kernel that predates the htab reset calls.  For PR
2251      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2252      * this era, it has allocated a 16MB fixed size hash table
2253      * already.  Kernels of this era have the GET_PVINFO capability
2254      * only on PR, so we use this hack to determine the right
2255      * answer */
2256     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_GET_PVINFO)) {
2257         /* PR - tell caller to allocate htab */
2258         return 0;
2259     } else {
2260         /* HV - assume 16MB kernel allocated htab */
2261         return 24;
2262     }
2263 }
2264
2265 static inline uint32_t mfpvr(void)
2266 {
2267     uint32_t pvr;
2268
2269     asm ("mfpvr %0"
2270          : "=r"(pvr));
2271     return pvr;
2272 }
2273
2274 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2275 {
2276     if (on) {
2277         *word |= flags;
2278     } else {
2279         *word &= ~flags;
2280     }
2281 }
2282
2283 static void kvmppc_host_cpu_initfn(Object *obj)
2284 {
2285     assert(kvm_enabled());
2286 }
2287
2288 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2289 {
2290     DeviceClass *dc = DEVICE_CLASS(oc);
2291     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2292     uint32_t vmx = kvmppc_get_vmx();
2293     uint32_t dfp = kvmppc_get_dfp();
2294     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2295     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2296
2297     /* Now fix up the class with information we can query from the host */
2298     pcc->pvr = mfpvr();
2299
2300     if (vmx != -1) {
2301         /* Only override when we know what the host supports */
2302         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2303         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2304     }
2305     if (dfp != -1) {
2306         /* Only override when we know what the host supports */
2307         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2308     }
2309
2310     if (dcache_size != -1) {
2311         pcc->l1_dcache_size = dcache_size;
2312     }
2313
2314     if (icache_size != -1) {
2315         pcc->l1_icache_size = icache_size;
2316     }
2317
2318     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2319     dc->cannot_destroy_with_object_finalize_yet = true;
2320 }
2321
2322 bool kvmppc_has_cap_epr(void)
2323 {
2324     return cap_epr;
2325 }
2326
2327 bool kvmppc_has_cap_htab_fd(void)
2328 {
2329     return cap_htab_fd;
2330 }
2331
2332 bool kvmppc_has_cap_fixup_hcalls(void)
2333 {
2334     return cap_fixup_hcalls;
2335 }
2336
2337 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2338 {
2339     ObjectClass *oc = OBJECT_CLASS(pcc);
2340
2341     while (oc && !object_class_is_abstract(oc)) {
2342         oc = object_class_get_parent(oc);
2343     }
2344     assert(oc);
2345
2346     return POWERPC_CPU_CLASS(oc);
2347 }
2348
2349 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2350 {
2351     uint32_t host_pvr = mfpvr();
2352     PowerPCCPUClass *pvr_pcc;
2353
2354     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2355     if (pvr_pcc == NULL) {
2356         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2357     }
2358
2359     return pvr_pcc;
2360 }
2361
2362 #if defined(TARGET_PPC64)
2363 static void spapr_cpu_core_host_initfn(Object *obj)
2364 {
2365     sPAPRCPUCore *core = SPAPR_CPU_CORE(obj);
2366     char *name = g_strdup_printf("%s-" TYPE_POWERPC_CPU, "host");
2367     ObjectClass *oc = object_class_by_name(name);
2368
2369     g_assert(oc);
2370     g_free((void *)name);
2371     core->cpu_class = oc;
2372 }
2373 #endif
2374
2375 static int kvm_ppc_register_host_cpu_type(void)
2376 {
2377     TypeInfo type_info = {
2378         .name = TYPE_HOST_POWERPC_CPU,
2379         .instance_init = kvmppc_host_cpu_initfn,
2380         .class_init = kvmppc_host_cpu_class_init,
2381     };
2382     PowerPCCPUClass *pvr_pcc;
2383     DeviceClass *dc;
2384
2385     pvr_pcc = kvm_ppc_get_host_cpu_class();
2386     if (pvr_pcc == NULL) {
2387         return -1;
2388     }
2389     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2390     type_register(&type_info);
2391
2392 #if defined(TARGET_PPC64)
2393     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2394     type_info.parent = TYPE_SPAPR_CPU_CORE,
2395     type_info.instance_size = sizeof(sPAPRCPUCore),
2396     type_info.instance_init = spapr_cpu_core_host_initfn,
2397     type_info.class_init = NULL;
2398     type_register(&type_info);
2399     g_free((void *)type_info.name);
2400     type_info.instance_size = 0;
2401     type_info.instance_init = NULL;
2402 #endif
2403
2404     /* Register generic family CPU class for a family */
2405     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2406     dc = DEVICE_CLASS(pvr_pcc);
2407     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2408     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2409     type_register(&type_info);
2410
2411     return 0;
2412 }
2413
2414 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2415 {
2416     struct kvm_rtas_token_args args = {
2417         .token = token,
2418     };
2419
2420     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2421         return -ENOENT;
2422     }
2423
2424     strncpy(args.name, function, sizeof(args.name));
2425
2426     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2427 }
2428
2429 int kvmppc_get_htab_fd(bool write)
2430 {
2431     struct kvm_get_htab_fd s = {
2432         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2433         .start_index = 0,
2434     };
2435
2436     if (!cap_htab_fd) {
2437         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2438         return -1;
2439     }
2440
2441     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2442 }
2443
2444 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2445 {
2446     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2447     uint8_t buf[bufsize];
2448     ssize_t rc;
2449
2450     do {
2451         rc = read(fd, buf, bufsize);
2452         if (rc < 0) {
2453             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2454                     strerror(errno));
2455             return rc;
2456         } else if (rc) {
2457             uint8_t *buffer = buf;
2458             ssize_t n = rc;
2459             while (n) {
2460                 struct kvm_get_htab_header *head =
2461                     (struct kvm_get_htab_header *) buffer;
2462                 size_t chunksize = sizeof(*head) +
2463                      HASH_PTE_SIZE_64 * head->n_valid;
2464
2465                 qemu_put_be32(f, head->index);
2466                 qemu_put_be16(f, head->n_valid);
2467                 qemu_put_be16(f, head->n_invalid);
2468                 qemu_put_buffer(f, (void *)(head + 1),
2469                                 HASH_PTE_SIZE_64 * head->n_valid);
2470
2471                 buffer += chunksize;
2472                 n -= chunksize;
2473             }
2474         }
2475     } while ((rc != 0)
2476              && ((max_ns < 0)
2477                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2478
2479     return (rc == 0) ? 1 : 0;
2480 }
2481
2482 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2483                            uint16_t n_valid, uint16_t n_invalid)
2484 {
2485     struct kvm_get_htab_header *buf;
2486     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2487     ssize_t rc;
2488
2489     buf = alloca(chunksize);
2490     buf->index = index;
2491     buf->n_valid = n_valid;
2492     buf->n_invalid = n_invalid;
2493
2494     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2495
2496     rc = write(fd, buf, chunksize);
2497     if (rc < 0) {
2498         fprintf(stderr, "Error writing KVM hash table: %s\n",
2499                 strerror(errno));
2500         return rc;
2501     }
2502     if (rc != chunksize) {
2503         /* We should never get a short write on a single chunk */
2504         fprintf(stderr, "Short write, restoring KVM hash table\n");
2505         return -1;
2506     }
2507     return 0;
2508 }
2509
2510 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2511 {
2512     return true;
2513 }
2514
2515 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2516 {
2517     return 1;
2518 }
2519
2520 int kvm_arch_on_sigbus(int code, void *addr)
2521 {
2522     return 1;
2523 }
2524
2525 void kvm_arch_init_irq_routing(KVMState *s)
2526 {
2527 }
2528
2529 struct kvm_get_htab_buf {
2530     struct kvm_get_htab_header header;
2531     /*
2532      * We require one extra byte for read
2533      */
2534     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2535 };
2536
2537 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2538 {
2539     int htab_fd;
2540     struct kvm_get_htab_fd ghf;
2541     struct kvm_get_htab_buf  *hpte_buf;
2542
2543     ghf.flags = 0;
2544     ghf.start_index = pte_index;
2545     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2546     if (htab_fd < 0) {
2547         goto error_out;
2548     }
2549
2550     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2551     /*
2552      * Read the hpte group
2553      */
2554     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2555         goto out_close;
2556     }
2557
2558     close(htab_fd);
2559     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2560
2561 out_close:
2562     g_free(hpte_buf);
2563     close(htab_fd);
2564 error_out:
2565     return 0;
2566 }
2567
2568 void kvmppc_hash64_free_pteg(uint64_t token)
2569 {
2570     struct kvm_get_htab_buf *htab_buf;
2571
2572     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2573                             hpte);
2574     g_free(htab_buf);
2575     return;
2576 }
2577
2578 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2579                              target_ulong pte0, target_ulong pte1)
2580 {
2581     int htab_fd;
2582     struct kvm_get_htab_fd ghf;
2583     struct kvm_get_htab_buf hpte_buf;
2584
2585     ghf.flags = 0;
2586     ghf.start_index = 0;     /* Ignored */
2587     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2588     if (htab_fd < 0) {
2589         goto error_out;
2590     }
2591
2592     hpte_buf.header.n_valid = 1;
2593     hpte_buf.header.n_invalid = 0;
2594     hpte_buf.header.index = pte_index;
2595     hpte_buf.hpte[0] = pte0;
2596     hpte_buf.hpte[1] = pte1;
2597     /*
2598      * Write the hpte entry.
2599      * CAUTION: write() has the warn_unused_result attribute. Hence we
2600      * need to check the return value, even though we do nothing.
2601      */
2602     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2603         goto out_close;
2604     }
2605
2606 out_close:
2607     close(htab_fd);
2608     return;
2609
2610 error_out:
2611     return;
2612 }
2613
2614 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2615                              uint64_t address, uint32_t data, PCIDevice *dev)
2616 {
2617     return 0;
2618 }
2619
2620 int kvm_arch_msi_data_to_gsi(uint32_t data)
2621 {
2622     return data & 0xffff;
2623 }
2624
2625 int kvmppc_enable_hwrng(void)
2626 {
2627     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2628         return -1;
2629     }
2630
2631     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2632 }