target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "qemu/timer.h"
  28 #include "sysemu/sysemu.h"
  29 #include "sysemu/hw_accel.h"
  30 #include "sysemu/numa.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/spapr_cpu_core.h"
  40 #include "hw/ppc/ppc.h"
  41 #include "sysemu/watchdog.h"
  42 #include "trace.h"
  43 #include "exec/gdbstub.h"
  44 #include "exec/memattrs.h"
  45 #include "sysemu/hostmem.h"
  46 #include "qemu/cutils.h"
  47 #if defined(TARGET_PPC64)
  48 #include "hw/ppc/spapr_cpu_core.h"
  49 #endif
  50
  51 //#define DEBUG_KVM
  52
  53 #ifdef DEBUG_KVM
  54 #define DPRINTF(fmt, ...) \
  55     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  56 #else
  57 #define DPRINTF(fmt, ...) \
  58     do { } while (0)
  59 #endif
  60
  61 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  62
  63 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  64     KVM_CAP_LAST_INFO
  65 };
  66
  67 static int cap_interrupt_unset = false;
  68 static int cap_interrupt_level = false;
  69 static int cap_segstate;
  70 static int cap_booke_sregs;
  71 static int cap_ppc_smt;
  72 static int cap_ppc_rma;
  73 static int cap_spapr_tce;
  74 static int cap_spapr_multitce;
  75 static int cap_spapr_vfio;
  76 static int cap_hior;
  77 static int cap_one_reg;
  78 static int cap_epr;
  79 static int cap_ppc_watchdog;
  80 static int cap_papr;
  81 static int cap_htab_fd;
  82 static int cap_fixup_hcalls;
  83 static int cap_htm;             /* Hardware transactional memory support */
  84
  85 static uint32_t debug_inst_opcode;
  86
  87 /* XXX We have a race condition where we actually have a level triggered
  88  *     interrupt, but the infrastructure can't expose that yet, so the guest
  89  *     takes but ignores it, goes to sleep and never gets notified that there's
  90  *     still an interrupt pending.
  91  *
  92  *     As a quick workaround, let's just wake up again 20 ms after we injected
  93  *     an interrupt. That way we can assure that we're always reinjecting
  94  *     interrupts in case the guest swallowed them.
  95  */
  96 static QEMUTimer *idle_timer;
  97
  98 static void kvm_kick_cpu(void *opaque)
  99 {
 100     PowerPCCPU *cpu = opaque;
 101
 102     qemu_cpu_kick(CPU(cpu));
 103 }
 104
 105 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 106  * should only be used for fallback tests - generally we should use
 107  * explicit capabilities for the features we want, rather than
 108  * assuming what is/isn't available depending on the KVM variant. */
 109 static bool kvmppc_is_pr(KVMState *ks)
 110 {
 111     /* Assume KVM-PR if the GET_PVINFO capability is available */
 112     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 113 }
 114
 115 static int kvm_ppc_register_host_cpu_type(void);
 116
 117 int kvm_arch_init(MachineState *ms, KVMState *s)
 118 {
 119     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 120     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 121     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 122     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 123     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 124     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 125     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 126     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 127     cap_spapr_vfio = false;
 128     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 129     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 130     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 131     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 132     /* Note: we don't set cap_papr here, because this capability is
 133      * only activated after this by kvmppc_set_papr() */
 134     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 135     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 136     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 137
 138     if (!cap_interrupt_level) {
 139         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 140                         "VM to stall at times!\n");
 141     }
 142
 143     kvm_ppc_register_host_cpu_type();
 144
 145     return 0;
 146 }
 147
 148 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 149 {
 150     return 0;
 151 }
 152
 153 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 154 {
 155     CPUPPCState *cenv = &cpu->env;
 156     CPUState *cs = CPU(cpu);
 157     struct kvm_sregs sregs;
 158     int ret;
 159
 160     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 161         /* What we're really trying to say is "if we're on BookE, we use
 162            the native PVR for now". This is the only sane way to check
 163            it though, so we potentially confuse users that they can run
 164            BookE guests on BookS. Let's hope nobody dares enough :) */
 165         return 0;
 166     } else {
 167         if (!cap_segstate) {
 168             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 169             return -ENOSYS;
 170         }
 171     }
 172
 173     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 174     if (ret) {
 175         return ret;
 176     }
 177
 178     sregs.pvr = cenv->spr[SPR_PVR];
 179     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 180 }
 181
 182 /* Set up a shared TLB array with KVM */
 183 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 184 {
 185     CPUPPCState *env = &cpu->env;
 186     CPUState *cs = CPU(cpu);
 187     struct kvm_book3e_206_tlb_params params = {};
 188     struct kvm_config_tlb cfg = {};
 189     unsigned int entries = 0;
 190     int ret, i;
 191
 192     if (!kvm_enabled() ||
 193         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 194         return 0;
 195     }
 196
 197     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 198
 199     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 200         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 201         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 202         entries += params.tlb_sizes[i];
 203     }
 204
 205     assert(entries == env->nb_tlb);
 206     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 207
 208     env->tlb_dirty = true;
 209
 210     cfg.array = (uintptr_t)env->tlb.tlbm;
 211     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 212     cfg.params = (uintptr_t)&params;
 213     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 214
 215     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 216     if (ret < 0) {
 217         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 218                 __func__, strerror(-ret));
 219         return ret;
 220     }
 221
 222     env->kvm_sw_tlb = true;
 223     return 0;
 224 }
 225
 226
 227 #if defined(TARGET_PPC64)
 228 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 229                                        struct kvm_ppc_smmu_info *info)
 230 {
 231     CPUPPCState *env = &cpu->env;
 232     CPUState *cs = CPU(cpu);
 233
 234     memset(info, 0, sizeof(*info));
 235
 236     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 237      * need to "guess" what the supported page sizes are.
 238      *
 239      * For that to work we make a few assumptions:
 240      *
 241      * - Check whether we are running "PR" KVM which only supports 4K
 242      *   and 16M pages, but supports them regardless of the backing
 243      *   store characteritics. We also don't support 1T segments.
 244      *
 245      *   This is safe as if HV KVM ever supports that capability or PR
 246      *   KVM grows supports for more page/segment sizes, those versions
 247      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 248      *   will not hit this fallback
 249      *
 250      * - Else we are running HV KVM. This means we only support page
 251      *   sizes that fit in the backing store. Additionally we only
 252      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 253      *   P7 encodings for the SLB and hash table. Here too, we assume
 254      *   support for any newer processor will mean a kernel that
 255      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 256      *   this fallback.
 257      */
 258     if (kvmppc_is_pr(cs->kvm_state)) {
 259         /* No flags */
 260         info->flags = 0;
 261         info->slb_size = 64;
 262
 263         /* Standard 4k base page size segment */
 264         info->sps[0].page_shift = 12;
 265         info->sps[0].slb_enc = 0;
 266         info->sps[0].enc[0].page_shift = 12;
 267         info->sps[0].enc[0].pte_enc = 0;
 268
 269         /* Standard 16M large page size segment */
 270         info->sps[1].page_shift = 24;
 271         info->sps[1].slb_enc = SLB_VSID_L;
 272         info->sps[1].enc[0].page_shift = 24;
 273         info->sps[1].enc[0].pte_enc = 0;
 274     } else {
 275         int i = 0;
 276
 277         /* HV KVM has backing store size restrictions */
 278         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 279
 280         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 281             info->flags |= KVM_PPC_1T_SEGMENTS;
 282         }
 283
 284         if (env->mmu_model == POWERPC_MMU_2_06 ||
 285             env->mmu_model == POWERPC_MMU_2_07) {
 286             info->slb_size = 32;
 287         } else {
 288             info->slb_size = 64;
 289         }
 290
 291         /* Standard 4k base page size segment */
 292         info->sps[i].page_shift = 12;
 293         info->sps[i].slb_enc = 0;
 294         info->sps[i].enc[0].page_shift = 12;
 295         info->sps[i].enc[0].pte_enc = 0;
 296         i++;
 297
 298         /* 64K on MMU 2.06 and later */
 299         if (env->mmu_model == POWERPC_MMU_2_06 ||
 300             env->mmu_model == POWERPC_MMU_2_07) {
 301             info->sps[i].page_shift = 16;
 302             info->sps[i].slb_enc = 0x110;
 303             info->sps[i].enc[0].page_shift = 16;
 304             info->sps[i].enc[0].pte_enc = 1;
 305             i++;
 306         }
 307
 308         /* Standard 16M large page size segment */
 309         info->sps[i].page_shift = 24;
 310         info->sps[i].slb_enc = SLB_VSID_L;
 311         info->sps[i].enc[0].page_shift = 24;
 312         info->sps[i].enc[0].pte_enc = 0;
 313     }
 314 }
 315
 316 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 317 {
 318     CPUState *cs = CPU(cpu);
 319     int ret;
 320
 321     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 322         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 323         if (ret == 0) {
 324             return;
 325         }
 326     }
 327
 328     kvm_get_fallback_smmu_info(cpu, info);
 329 }
 330
 331 static long gethugepagesize(const char *mem_path)
 332 {
 333     struct statfs fs;
 334     int ret;
 335
 336     do {
 337         ret = statfs(mem_path, &fs);
 338     } while (ret != 0 && errno == EINTR);
 339
 340     if (ret != 0) {
 341         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 342                 strerror(errno));
 343         exit(1);
 344     }
 345
 346 #define HUGETLBFS_MAGIC       0x958458f6
 347
 348     if (fs.f_type != HUGETLBFS_MAGIC) {
 349         /* Explicit mempath, but it's ordinary pages */
 350         return getpagesize();
 351     }
 352
 353     /* It's hugepage, return the huge page size */
 354     return fs.f_bsize;
 355 }
 356
 357 /*
 358  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 359  * may or may not name the same files / on the same filesystem now as
 360  * when we actually open and map them.  Iterate over the file
 361  * descriptors instead, and use qemu_fd_getpagesize().
 362  */
 363 static int find_max_supported_pagesize(Object *obj, void *opaque)
 364 {
 365     char *mem_path;
 366     long *hpsize_min = opaque;
 367
 368     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 369         mem_path = object_property_get_str(obj, "mem-path", NULL);
 370         if (mem_path) {
 371             long hpsize = gethugepagesize(mem_path);
 372             if (hpsize < *hpsize_min) {
 373                 *hpsize_min = hpsize;
 374             }
 375         } else {
 376             *hpsize_min = getpagesize();
 377         }
 378     }
 379
 380     return 0;
 381 }
 382
 383 static long getrampagesize(void)
 384 {
 385     long hpsize = LONG_MAX;
 386     long mainrampagesize;
 387     Object *memdev_root;
 388
 389     if (mem_path) {
 390         mainrampagesize = gethugepagesize(mem_path);
 391     } else {
 392         mainrampagesize = getpagesize();
 393     }
 394
 395     /* it's possible we have memory-backend objects with
 396      * hugepage-backed RAM. these may get mapped into system
 397      * address space via -numa parameters or memory hotplug
 398      * hooks. we want to take these into account, but we
 399      * also want to make sure these supported hugepage
 400      * sizes are applicable across the entire range of memory
 401      * we may boot from, so we take the min across all
 402      * backends, and assume normal pages in cases where a
 403      * backend isn't backed by hugepages.
 404      */
 405     memdev_root = object_resolve_path("/objects", NULL);
 406     if (memdev_root) {
 407         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 408     }
 409     if (hpsize == LONG_MAX) {
 410         /* No additional memory regions found ==> Report main RAM page size */
 411         return mainrampagesize;
 412     }
 413
 414     /* If NUMA is disabled or the NUMA nodes are not backed with a
 415      * memory-backend, then there is at least one node using "normal" RAM,
 416      * so if its page size is smaller we have got to report that size instead.
 417      */
 418     if (hpsize > mainrampagesize &&
 419         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
 420         static bool warned;
 421         if (!warned) {
 422             error_report("Huge page support disabled (n/a for main memory).");
 423             warned = true;
 424         }
 425         return mainrampagesize;
 426     }
 427
 428     return hpsize;
 429 }
 430
 431 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 432 {
 433     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 434         return true;
 435     }
 436
 437     return (1ul << shift) <= rampgsize;
 438 }
 439
 440 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 441 {
 442     static struct kvm_ppc_smmu_info smmu_info;
 443     static bool has_smmu_info;
 444     CPUPPCState *env = &cpu->env;
 445     long rampagesize;
 446     int iq, ik, jq, jk;
 447     bool has_64k_pages = false;
 448
 449     /* We only handle page sizes for 64-bit server guests for now */
 450     if (!(env->mmu_model & POWERPC_MMU_64)) {
 451         return;
 452     }
 453
 454     /* Collect MMU info from kernel if not already */
 455     if (!has_smmu_info) {
 456         kvm_get_smmu_info(cpu, &smmu_info);
 457         has_smmu_info = true;
 458     }
 459
 460     rampagesize = getrampagesize();
 461
 462     /* Convert to QEMU form */
 463     memset(&env->sps, 0, sizeof(env->sps));
 464
 465     /* If we have HV KVM, we need to forbid CI large pages if our
 466      * host page size is smaller than 64K.
 467      */
 468     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 469         env->ci_large_pages = getpagesize() >= 0x10000;
 470     }
 471
 472     /*
 473      * XXX This loop should be an entry wide AND of the capabilities that
 474      *     the selected CPU has with the capabilities that KVM supports.
 475      */
 476     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 477         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 478         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 479
 480         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 481                                  ksps->page_shift)) {
 482             continue;
 483         }
 484         qsps->page_shift = ksps->page_shift;
 485         qsps->slb_enc = ksps->slb_enc;
 486         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 487             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 488                                      ksps->enc[jk].page_shift)) {
 489                 continue;
 490             }
 491             if (ksps->enc[jk].page_shift == 16) {
 492                 has_64k_pages = true;
 493             }
 494             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 495             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 496             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 497                 break;
 498             }
 499         }
 500         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 501             break;
 502         }
 503     }
 504     env->slb_nr = smmu_info.slb_size;
 505     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 506         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 507     }
 508     if (!has_64k_pages) {
 509         env->mmu_model &= ~POWERPC_MMU_64K;
 510     }
 511 }
 512 #else /* defined (TARGET_PPC64) */
 513
 514 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 515 {
 516 }
 517
 518 #endif /* !defined (TARGET_PPC64) */
 519
 520 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 521 {
 522     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 523 }
 524
 525 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 526  * book3s supports only 1 watchpoint, so array size
 527  * of 4 is sufficient for now.
 528  */
 529 #define MAX_HW_BKPTS 4
 530
 531 static struct HWBreakpoint {
 532     target_ulong addr;
 533     int type;
 534 } hw_debug_points[MAX_HW_BKPTS];
 535
 536 static CPUWatchpoint hw_watchpoint;
 537
 538 /* Default there is no breakpoint and watchpoint supported */
 539 static int max_hw_breakpoint;
 540 static int max_hw_watchpoint;
 541 static int nb_hw_breakpoint;
 542 static int nb_hw_watchpoint;
 543
 544 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 545 {
 546     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 547         max_hw_breakpoint = 2;
 548         max_hw_watchpoint = 2;
 549     }
 550
 551     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 552         fprintf(stderr, "Error initializing h/w breakpoints\n");
 553         return;
 554     }
 555 }
 556
 557 int kvm_arch_init_vcpu(CPUState *cs)
 558 {
 559     PowerPCCPU *cpu = POWERPC_CPU(cs);
 560     CPUPPCState *cenv = &cpu->env;
 561     int ret;
 562
 563     /* Gather server mmu info from KVM and update the CPU state */
 564     kvm_fixup_page_sizes(cpu);
 565
 566     /* Synchronize sregs with kvm */
 567     ret = kvm_arch_sync_sregs(cpu);
 568     if (ret) {
 569         if (ret == -EINVAL) {
 570             error_report("Register sync failed... If you're using kvm-hv.ko,"
 571                          " only \"-cpu host\" is possible");
 572         }
 573         return ret;
 574     }
 575
 576     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 577
 578     switch (cenv->mmu_model) {
 579     case POWERPC_MMU_BOOKE206:
 580         /* This target supports access to KVM's guest TLB */
 581         ret = kvm_booke206_tlb_init(cpu);
 582         break;
 583     case POWERPC_MMU_2_07:
 584         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 585             /* KVM-HV has transactional memory on POWER8 also without the
 586              * KVM_CAP_PPC_HTM extension, so enable it here instead. */
 587             cap_htm = true;
 588         }
 589         break;
 590     default:
 591         break;
 592     }
 593
 594     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 595     kvmppc_hw_debug_points_init(cenv);
 596
 597     return ret;
 598 }
 599
 600 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 601 {
 602     CPUPPCState *env = &cpu->env;
 603     CPUState *cs = CPU(cpu);
 604     struct kvm_dirty_tlb dirty_tlb;
 605     unsigned char *bitmap;
 606     int ret;
 607
 608     if (!env->kvm_sw_tlb) {
 609         return;
 610     }
 611
 612     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 613     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 614
 615     dirty_tlb.bitmap = (uintptr_t)bitmap;
 616     dirty_tlb.num_dirty = env->nb_tlb;
 617
 618     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 619     if (ret) {
 620         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 621                 __func__, strerror(-ret));
 622     }
 623
 624     g_free(bitmap);
 625 }
 626
 627 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 628 {
 629     PowerPCCPU *cpu = POWERPC_CPU(cs);
 630     CPUPPCState *env = &cpu->env;
 631     union {
 632         uint32_t u32;
 633         uint64_t u64;
 634     } val;
 635     struct kvm_one_reg reg = {
 636         .id = id,
 637         .addr = (uintptr_t) &val,
 638     };
 639     int ret;
 640
 641     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 642     if (ret != 0) {
 643         trace_kvm_failed_spr_get(spr, strerror(errno));
 644     } else {
 645         switch (id & KVM_REG_SIZE_MASK) {
 646         case KVM_REG_SIZE_U32:
 647             env->spr[spr] = val.u32;
 648             break;
 649
 650         case KVM_REG_SIZE_U64:
 651             env->spr[spr] = val.u64;
 652             break;
 653
 654         default:
 655             /* Don't handle this size yet */
 656             abort();
 657         }
 658     }
 659 }
 660
 661 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 662 {
 663     PowerPCCPU *cpu = POWERPC_CPU(cs);
 664     CPUPPCState *env = &cpu->env;
 665     union {
 666         uint32_t u32;
 667         uint64_t u64;
 668     } val;
 669     struct kvm_one_reg reg = {
 670         .id = id,
 671         .addr = (uintptr_t) &val,
 672     };
 673     int ret;
 674
 675     switch (id & KVM_REG_SIZE_MASK) {
 676     case KVM_REG_SIZE_U32:
 677         val.u32 = env->spr[spr];
 678         break;
 679
 680     case KVM_REG_SIZE_U64:
 681         val.u64 = env->spr[spr];
 682         break;
 683
 684     default:
 685         /* Don't handle this size yet */
 686         abort();
 687     }
 688
 689     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 690     if (ret != 0) {
 691         trace_kvm_failed_spr_set(spr, strerror(errno));
 692     }
 693 }
 694
 695 static int kvm_put_fp(CPUState *cs)
 696 {
 697     PowerPCCPU *cpu = POWERPC_CPU(cs);
 698     CPUPPCState *env = &cpu->env;
 699     struct kvm_one_reg reg;
 700     int i;
 701     int ret;
 702
 703     if (env->insns_flags & PPC_FLOAT) {
 704         uint64_t fpscr = env->fpscr;
 705         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 706
 707         reg.id = KVM_REG_PPC_FPSCR;
 708         reg.addr = (uintptr_t)&fpscr;
 709         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 710         if (ret < 0) {
 711             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 712             return ret;
 713         }
 714
 715         for (i = 0; i < 32; i++) {
 716             uint64_t vsr[2];
 717
 718 #ifdef HOST_WORDS_BIGENDIAN
 719             vsr[0] = float64_val(env->fpr[i]);
 720             vsr[1] = env->vsr[i];
 721 #else
 722             vsr[0] = env->vsr[i];
 723             vsr[1] = float64_val(env->fpr[i]);
 724 #endif
 725             reg.addr = (uintptr_t) &vsr;
 726             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 727
 728             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 729             if (ret < 0) {
 730                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 731                         i, strerror(errno));
 732                 return ret;
 733             }
 734         }
 735     }
 736
 737     if (env->insns_flags & PPC_ALTIVEC) {
 738         reg.id = KVM_REG_PPC_VSCR;
 739         reg.addr = (uintptr_t)&env->vscr;
 740         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 741         if (ret < 0) {
 742             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 743             return ret;
 744         }
 745
 746         for (i = 0; i < 32; i++) {
 747             reg.id = KVM_REG_PPC_VR(i);
 748             reg.addr = (uintptr_t)&env->avr[i];
 749             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 750             if (ret < 0) {
 751                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 752                 return ret;
 753             }
 754         }
 755     }
 756
 757     return 0;
 758 }
 759
 760 static int kvm_get_fp(CPUState *cs)
 761 {
 762     PowerPCCPU *cpu = POWERPC_CPU(cs);
 763     CPUPPCState *env = &cpu->env;
 764     struct kvm_one_reg reg;
 765     int i;
 766     int ret;
 767
 768     if (env->insns_flags & PPC_FLOAT) {
 769         uint64_t fpscr;
 770         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 771
 772         reg.id = KVM_REG_PPC_FPSCR;
 773         reg.addr = (uintptr_t)&fpscr;
 774         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 775         if (ret < 0) {
 776             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 777             return ret;
 778         } else {
 779             env->fpscr = fpscr;
 780         }
 781
 782         for (i = 0; i < 32; i++) {
 783             uint64_t vsr[2];
 784
 785             reg.addr = (uintptr_t) &vsr;
 786             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 787
 788             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 789             if (ret < 0) {
 790                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 791                         vsx ? "VSR" : "FPR", i, strerror(errno));
 792                 return ret;
 793             } else {
 794 #ifdef HOST_WORDS_BIGENDIAN
 795                 env->fpr[i] = vsr[0];
 796                 if (vsx) {
 797                     env->vsr[i] = vsr[1];
 798                 }
 799 #else
 800                 env->fpr[i] = vsr[1];
 801                 if (vsx) {
 802                     env->vsr[i] = vsr[0];
 803                 }
 804 #endif
 805             }
 806         }
 807     }
 808
 809     if (env->insns_flags & PPC_ALTIVEC) {
 810         reg.id = KVM_REG_PPC_VSCR;
 811         reg.addr = (uintptr_t)&env->vscr;
 812         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 813         if (ret < 0) {
 814             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 815             return ret;
 816         }
 817
 818         for (i = 0; i < 32; i++) {
 819             reg.id = KVM_REG_PPC_VR(i);
 820             reg.addr = (uintptr_t)&env->avr[i];
 821             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 822             if (ret < 0) {
 823                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 824                         i, strerror(errno));
 825                 return ret;
 826             }
 827         }
 828     }
 829
 830     return 0;
 831 }
 832
 833 #if defined(TARGET_PPC64)
 834 static int kvm_get_vpa(CPUState *cs)
 835 {
 836     PowerPCCPU *cpu = POWERPC_CPU(cs);
 837     CPUPPCState *env = &cpu->env;
 838     struct kvm_one_reg reg;
 839     int ret;
 840
 841     reg.id = KVM_REG_PPC_VPA_ADDR;
 842     reg.addr = (uintptr_t)&env->vpa_addr;
 843     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 844     if (ret < 0) {
 845         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 846         return ret;
 847     }
 848
 849     assert((uintptr_t)&env->slb_shadow_size
 850            == ((uintptr_t)&env->slb_shadow_addr + 8));
 851     reg.id = KVM_REG_PPC_VPA_SLB;
 852     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 853     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 854     if (ret < 0) {
 855         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 856                 strerror(errno));
 857         return ret;
 858     }
 859
 860     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 861     reg.id = KVM_REG_PPC_VPA_DTL;
 862     reg.addr = (uintptr_t)&env->dtl_addr;
 863     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 864     if (ret < 0) {
 865         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 866                 strerror(errno));
 867         return ret;
 868     }
 869
 870     return 0;
 871 }
 872
 873 static int kvm_put_vpa(CPUState *cs)
 874 {
 875     PowerPCCPU *cpu = POWERPC_CPU(cs);
 876     CPUPPCState *env = &cpu->env;
 877     struct kvm_one_reg reg;
 878     int ret;
 879
 880     /* SLB shadow or DTL can't be registered unless a master VPA is
 881      * registered.  That means when restoring state, if a VPA *is*
 882      * registered, we need to set that up first.  If not, we need to
 883      * deregister the others before deregistering the master VPA */
 884     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 885
 886     if (env->vpa_addr) {
 887         reg.id = KVM_REG_PPC_VPA_ADDR;
 888         reg.addr = (uintptr_t)&env->vpa_addr;
 889         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 890         if (ret < 0) {
 891             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 892             return ret;
 893         }
 894     }
 895
 896     assert((uintptr_t)&env->slb_shadow_size
 897            == ((uintptr_t)&env->slb_shadow_addr + 8));
 898     reg.id = KVM_REG_PPC_VPA_SLB;
 899     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 900     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 901     if (ret < 0) {
 902         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 903         return ret;
 904     }
 905
 906     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 907     reg.id = KVM_REG_PPC_VPA_DTL;
 908     reg.addr = (uintptr_t)&env->dtl_addr;
 909     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 910     if (ret < 0) {
 911         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 912                 strerror(errno));
 913         return ret;
 914     }
 915
 916     if (!env->vpa_addr) {
 917         reg.id = KVM_REG_PPC_VPA_ADDR;
 918         reg.addr = (uintptr_t)&env->vpa_addr;
 919         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 920         if (ret < 0) {
 921             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 922             return ret;
 923         }
 924     }
 925
 926     return 0;
 927 }
 928 #endif /* TARGET_PPC64 */
 929
 930 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 931 {
 932     CPUPPCState *env = &cpu->env;
 933     struct kvm_sregs sregs;
 934     int i;
 935
 936     sregs.pvr = env->spr[SPR_PVR];
 937
 938     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 939
 940     /* Sync SLB */
 941 #ifdef TARGET_PPC64
 942     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 943         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 944         if (env->slb[i].esid & SLB_ESID_V) {
 945             sregs.u.s.ppc64.slb[i].slbe |= i;
 946         }
 947         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 948     }
 949 #endif
 950
 951     /* Sync SRs */
 952     for (i = 0; i < 16; i++) {
 953         sregs.u.s.ppc32.sr[i] = env->sr[i];
 954     }
 955
 956     /* Sync BATs */
 957     for (i = 0; i < 8; i++) {
 958         /* Beware. We have to swap upper and lower bits here */
 959         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 960             | env->DBAT[1][i];
 961         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 962             | env->IBAT[1][i];
 963     }
 964
 965     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 966 }
 967
 968 int kvm_arch_put_registers(CPUState *cs, int level)
 969 {
 970     PowerPCCPU *cpu = POWERPC_CPU(cs);
 971     CPUPPCState *env = &cpu->env;
 972     struct kvm_regs regs;
 973     int ret;
 974     int i;
 975
 976     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 977     if (ret < 0) {
 978         return ret;
 979     }
 980
 981     regs.ctr = env->ctr;
 982     regs.lr  = env->lr;
 983     regs.xer = cpu_read_xer(env);
 984     regs.msr = env->msr;
 985     regs.pc = env->nip;
 986
 987     regs.srr0 = env->spr[SPR_SRR0];
 988     regs.srr1 = env->spr[SPR_SRR1];
 989
 990     regs.sprg0 = env->spr[SPR_SPRG0];
 991     regs.sprg1 = env->spr[SPR_SPRG1];
 992     regs.sprg2 = env->spr[SPR_SPRG2];
 993     regs.sprg3 = env->spr[SPR_SPRG3];
 994     regs.sprg4 = env->spr[SPR_SPRG4];
 995     regs.sprg5 = env->spr[SPR_SPRG5];
 996     regs.sprg6 = env->spr[SPR_SPRG6];
 997     regs.sprg7 = env->spr[SPR_SPRG7];
 998
 999     regs.pid = env->spr[SPR_BOOKE_PID];
1000
1001     for (i = 0;i < 32; i++)
1002         regs.gpr[i] = env->gpr[i];
1003
1004     regs.cr = 0;
1005     for (i = 0; i < 8; i++) {
1006         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1007     }
1008
1009     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1010     if (ret < 0)
1011         return ret;
1012
1013     kvm_put_fp(cs);
1014
1015     if (env->tlb_dirty) {
1016         kvm_sw_tlb_put(cpu);
1017         env->tlb_dirty = false;
1018     }
1019
1020     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1021         ret = kvmppc_put_books_sregs(cpu);
1022         if (ret < 0) {
1023             return ret;
1024         }
1025     }
1026
1027     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1028         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1029     }
1030
1031     if (cap_one_reg) {
1032         int i;
1033
1034         /* We deliberately ignore errors here, for kernels which have
1035          * the ONE_REG calls, but don't support the specific
1036          * registers, there's a reasonable chance things will still
1037          * work, at least until we try to migrate. */
1038         for (i = 0; i < 1024; i++) {
1039             uint64_t id = env->spr_cb[i].one_reg_id;
1040
1041             if (id != 0) {
1042                 kvm_put_one_spr(cs, id, i);
1043             }
1044         }
1045
1046 #ifdef TARGET_PPC64
1047         if (msr_ts) {
1048             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1049                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1050             }
1051             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1052                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1053             }
1054             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1064         }
1065
1066         if (cap_papr) {
1067             if (kvm_put_vpa(cs) < 0) {
1068                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1069             }
1070         }
1071
1072         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1073 #endif /* TARGET_PPC64 */
1074     }
1075
1076     return ret;
1077 }
1078
1079 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1080 {
1081      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1082 }
1083
1084 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1085 {
1086     CPUPPCState *env = &cpu->env;
1087     struct kvm_sregs sregs;
1088     int ret;
1089
1090     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1091     if (ret < 0) {
1092         return ret;
1093     }
1094
1095     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1096         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1097         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1098         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1099         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1100         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1101         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1102         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1103         env->spr[SPR_DECR] = sregs.u.e.dec;
1104         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1105         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1106         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1107     }
1108
1109     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1110         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1111         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1112         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1113         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1114         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1115     }
1116
1117     if (sregs.u.e.features & KVM_SREGS_E_64) {
1118         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1119     }
1120
1121     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1122         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1123     }
1124
1125     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1126         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1127         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1128         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1129         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1130         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1131         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1132         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1133         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1134         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1135         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1136         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1137         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1138         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1139         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1140         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1141         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1142         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1143         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1144         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1145         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1146         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1147         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1148         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1149         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1150         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1151         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1152         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1153         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1154         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1155         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1156         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1157         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1158
1159         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1160             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1161             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1162             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1163             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1164             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1165             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1166         }
1167
1168         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1169             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1170             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1171         }
1172
1173         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1174             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1175             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1176             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1177             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1178         }
1179     }
1180
1181     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1182         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1183         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1184         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1185         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1186         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1187         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1188         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1189         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1190         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1191         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1192     }
1193
1194     if (sregs.u.e.features & KVM_SREGS_EXP) {
1195         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1196     }
1197
1198     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1199         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1200         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1201     }
1202
1203     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1204         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1205         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1206         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1207
1208         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1209             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1210             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1211         }
1212     }
1213
1214     return 0;
1215 }
1216
1217 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1218 {
1219     CPUPPCState *env = &cpu->env;
1220     struct kvm_sregs sregs;
1221     int ret;
1222     int i;
1223
1224     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1225     if (ret < 0) {
1226         return ret;
1227     }
1228
1229     if (!env->external_htab) {
1230         ppc_store_sdr1(env, sregs.u.s.sdr1);
1231     }
1232
1233     /* Sync SLB */
1234 #ifdef TARGET_PPC64
1235     /*
1236      * The packed SLB array we get from KVM_GET_SREGS only contains
1237      * information about valid entries. So we flush our internal copy
1238      * to get rid of stale ones, then put all valid SLB entries back
1239      * in.
1240      */
1241     memset(env->slb, 0, sizeof(env->slb));
1242     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1243         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1244         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1245         /*
1246          * Only restore valid entries
1247          */
1248         if (rb & SLB_ESID_V) {
1249             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1250         }
1251     }
1252 #endif
1253
1254     /* Sync SRs */
1255     for (i = 0; i < 16; i++) {
1256         env->sr[i] = sregs.u.s.ppc32.sr[i];
1257     }
1258
1259     /* Sync BATs */
1260     for (i = 0; i < 8; i++) {
1261         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1262         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1263         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1264         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1265     }
1266
1267     return 0;
1268 }
1269
1270 int kvm_arch_get_registers(CPUState *cs)
1271 {
1272     PowerPCCPU *cpu = POWERPC_CPU(cs);
1273     CPUPPCState *env = &cpu->env;
1274     struct kvm_regs regs;
1275     uint32_t cr;
1276     int i, ret;
1277
1278     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1279     if (ret < 0)
1280         return ret;
1281
1282     cr = regs.cr;
1283     for (i = 7; i >= 0; i--) {
1284         env->crf[i] = cr & 15;
1285         cr >>= 4;
1286     }
1287
1288     env->ctr = regs.ctr;
1289     env->lr = regs.lr;
1290     cpu_write_xer(env, regs.xer);
1291     env->msr = regs.msr;
1292     env->nip = regs.pc;
1293
1294     env->spr[SPR_SRR0] = regs.srr0;
1295     env->spr[SPR_SRR1] = regs.srr1;
1296
1297     env->spr[SPR_SPRG0] = regs.sprg0;
1298     env->spr[SPR_SPRG1] = regs.sprg1;
1299     env->spr[SPR_SPRG2] = regs.sprg2;
1300     env->spr[SPR_SPRG3] = regs.sprg3;
1301     env->spr[SPR_SPRG4] = regs.sprg4;
1302     env->spr[SPR_SPRG5] = regs.sprg5;
1303     env->spr[SPR_SPRG6] = regs.sprg6;
1304     env->spr[SPR_SPRG7] = regs.sprg7;
1305
1306     env->spr[SPR_BOOKE_PID] = regs.pid;
1307
1308     for (i = 0;i < 32; i++)
1309         env->gpr[i] = regs.gpr[i];
1310
1311     kvm_get_fp(cs);
1312
1313     if (cap_booke_sregs) {
1314         ret = kvmppc_get_booke_sregs(cpu);
1315         if (ret < 0) {
1316             return ret;
1317         }
1318     }
1319
1320     if (cap_segstate) {
1321         ret = kvmppc_get_books_sregs(cpu);
1322         if (ret < 0) {
1323             return ret;
1324         }
1325     }
1326
1327     if (cap_hior) {
1328         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1329     }
1330
1331     if (cap_one_reg) {
1332         int i;
1333
1334         /* We deliberately ignore errors here, for kernels which have
1335          * the ONE_REG calls, but don't support the specific
1336          * registers, there's a reasonable chance things will still
1337          * work, at least until we try to migrate. */
1338         for (i = 0; i < 1024; i++) {
1339             uint64_t id = env->spr_cb[i].one_reg_id;
1340
1341             if (id != 0) {
1342                 kvm_get_one_spr(cs, id, i);
1343             }
1344         }
1345
1346 #ifdef TARGET_PPC64
1347         if (msr_ts) {
1348             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1349                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1350             }
1351             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1352                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1353             }
1354             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1364         }
1365
1366         if (cap_papr) {
1367             if (kvm_get_vpa(cs) < 0) {
1368                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1369             }
1370         }
1371
1372         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1373 #endif
1374     }
1375
1376     return 0;
1377 }
1378
1379 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1380 {
1381     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1382
1383     if (irq != PPC_INTERRUPT_EXT) {
1384         return 0;
1385     }
1386
1387     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1388         return 0;
1389     }
1390
1391     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1392
1393     return 0;
1394 }
1395
1396 #if defined(TARGET_PPCEMB)
1397 #define PPC_INPUT_INT PPC40x_INPUT_INT
1398 #elif defined(TARGET_PPC64)
1399 #define PPC_INPUT_INT PPC970_INPUT_INT
1400 #else
1401 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1402 #endif
1403
1404 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1405 {
1406     PowerPCCPU *cpu = POWERPC_CPU(cs);
1407     CPUPPCState *env = &cpu->env;
1408     int r;
1409     unsigned irq;
1410
1411     qemu_mutex_lock_iothread();
1412
1413     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1414      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1415     if (!cap_interrupt_level &&
1416         run->ready_for_interrupt_injection &&
1417         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1418         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1419     {
1420         /* For now KVM disregards the 'irq' argument. However, in the
1421          * future KVM could cache it in-kernel to avoid a heavyweight exit
1422          * when reading the UIC.
1423          */
1424         irq = KVM_INTERRUPT_SET;
1425
1426         DPRINTF("injected interrupt %d\n", irq);
1427         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1428         if (r < 0) {
1429             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1430         }
1431
1432         /* Always wake up soon in case the interrupt was level based */
1433         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1434                        (NANOSECONDS_PER_SECOND / 50));
1435     }
1436
1437     /* We don't know if there are more interrupts pending after this. However,
1438      * the guest will return to userspace in the course of handling this one
1439      * anyways, so we will get a chance to deliver the rest. */
1440
1441     qemu_mutex_unlock_iothread();
1442 }
1443
1444 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1445 {
1446     return MEMTXATTRS_UNSPECIFIED;
1447 }
1448
1449 int kvm_arch_process_async_events(CPUState *cs)
1450 {
1451     return cs->halted;
1452 }
1453
1454 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1455 {
1456     CPUState *cs = CPU(cpu);
1457     CPUPPCState *env = &cpu->env;
1458
1459     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1460         cs->halted = 1;
1461         cs->exception_index = EXCP_HLT;
1462     }
1463
1464     return 0;
1465 }
1466
1467 /* map dcr access to existing qemu dcr emulation */
1468 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1469 {
1470     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1471         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1472
1473     return 0;
1474 }
1475
1476 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1477 {
1478     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1479         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1480
1481     return 0;
1482 }
1483
1484 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1485 {
1486     /* Mixed endian case is not handled */
1487     uint32_t sc = debug_inst_opcode;
1488
1489     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1490                             sizeof(sc), 0) ||
1491         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1492         return -EINVAL;
1493     }
1494
1495     return 0;
1496 }
1497
1498 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1499 {
1500     uint32_t sc;
1501
1502     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1503         sc != debug_inst_opcode ||
1504         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1505                             sizeof(sc), 1)) {
1506         return -EINVAL;
1507     }
1508
1509     return 0;
1510 }
1511
1512 static int find_hw_breakpoint(target_ulong addr, int type)
1513 {
1514     int n;
1515
1516     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1517            <= ARRAY_SIZE(hw_debug_points));
1518
1519     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1520         if (hw_debug_points[n].addr == addr &&
1521              hw_debug_points[n].type == type) {
1522             return n;
1523         }
1524     }
1525
1526     return -1;
1527 }
1528
1529 static int find_hw_watchpoint(target_ulong addr, int *flag)
1530 {
1531     int n;
1532
1533     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1534     if (n >= 0) {
1535         *flag = BP_MEM_ACCESS;
1536         return n;
1537     }
1538
1539     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1540     if (n >= 0) {
1541         *flag = BP_MEM_WRITE;
1542         return n;
1543     }
1544
1545     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1546     if (n >= 0) {
1547         *flag = BP_MEM_READ;
1548         return n;
1549     }
1550
1551     return -1;
1552 }
1553
1554 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1555                                   target_ulong len, int type)
1556 {
1557     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1558         return -ENOBUFS;
1559     }
1560
1561     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1562     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1563
1564     switch (type) {
1565     case GDB_BREAKPOINT_HW:
1566         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1567             return -ENOBUFS;
1568         }
1569
1570         if (find_hw_breakpoint(addr, type) >= 0) {
1571             return -EEXIST;
1572         }
1573
1574         nb_hw_breakpoint++;
1575         break;
1576
1577     case GDB_WATCHPOINT_WRITE:
1578     case GDB_WATCHPOINT_READ:
1579     case GDB_WATCHPOINT_ACCESS:
1580         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1581             return -ENOBUFS;
1582         }
1583
1584         if (find_hw_breakpoint(addr, type) >= 0) {
1585             return -EEXIST;
1586         }
1587
1588         nb_hw_watchpoint++;
1589         break;
1590
1591     default:
1592         return -ENOSYS;
1593     }
1594
1595     return 0;
1596 }
1597
1598 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1599                                   target_ulong len, int type)
1600 {
1601     int n;
1602
1603     n = find_hw_breakpoint(addr, type);
1604     if (n < 0) {
1605         return -ENOENT;
1606     }
1607
1608     switch (type) {
1609     case GDB_BREAKPOINT_HW:
1610         nb_hw_breakpoint--;
1611         break;
1612
1613     case GDB_WATCHPOINT_WRITE:
1614     case GDB_WATCHPOINT_READ:
1615     case GDB_WATCHPOINT_ACCESS:
1616         nb_hw_watchpoint--;
1617         break;
1618
1619     default:
1620         return -ENOSYS;
1621     }
1622     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1623
1624     return 0;
1625 }
1626
1627 void kvm_arch_remove_all_hw_breakpoints(void)
1628 {
1629     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1630 }
1631
1632 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1633 {
1634     int n;
1635
1636     /* Software Breakpoint updates */
1637     if (kvm_sw_breakpoints_active(cs)) {
1638         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1639     }
1640
1641     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1642            <= ARRAY_SIZE(hw_debug_points));
1643     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1644
1645     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1646         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1647         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1648         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1649             switch (hw_debug_points[n].type) {
1650             case GDB_BREAKPOINT_HW:
1651                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1652                 break;
1653             case GDB_WATCHPOINT_WRITE:
1654                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1655                 break;
1656             case GDB_WATCHPOINT_READ:
1657                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1658                 break;
1659             case GDB_WATCHPOINT_ACCESS:
1660                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1661                                         KVMPPC_DEBUG_WATCH_READ;
1662                 break;
1663             default:
1664                 cpu_abort(cs, "Unsupported breakpoint type\n");
1665             }
1666             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1667         }
1668     }
1669 }
1670
1671 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1672 {
1673     CPUState *cs = CPU(cpu);
1674     CPUPPCState *env = &cpu->env;
1675     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1676     int handle = 0;
1677     int n;
1678     int flag = 0;
1679
1680     if (cs->singlestep_enabled) {
1681         handle = 1;
1682     } else if (arch_info->status) {
1683         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1684             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1685                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1686                 if (n >= 0) {
1687                     handle = 1;
1688                 }
1689             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1690                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1691                 n = find_hw_watchpoint(arch_info->address,  &flag);
1692                 if (n >= 0) {
1693                     handle = 1;
1694                     cs->watchpoint_hit = &hw_watchpoint;
1695                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1696                     hw_watchpoint.flags = flag;
1697                 }
1698             }
1699         }
1700     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1701         handle = 1;
1702     } else {
1703         /* QEMU is not able to handle debug exception, so inject
1704          * program exception to guest;
1705          * Yes program exception NOT debug exception !!
1706          * When QEMU is using debug resources then debug exception must
1707          * be always set. To achieve this we set MSR_DE and also set
1708          * MSRP_DEP so guest cannot change MSR_DE.
1709          * When emulating debug resource for guest we want guest
1710          * to control MSR_DE (enable/disable debug interrupt on need).
1711          * Supporting both configurations are NOT possible.
1712          * So the result is that we cannot share debug resources
1713          * between QEMU and Guest on BOOKE architecture.
1714          * In the current design QEMU gets the priority over guest,
1715          * this means that if QEMU is using debug resources then guest
1716          * cannot use them;
1717          * For software breakpoint QEMU uses a privileged instruction;
1718          * So there cannot be any reason that we are here for guest
1719          * set debug exception, only possibility is guest executed a
1720          * privileged / illegal instruction and that's why we are
1721          * injecting a program interrupt.
1722          */
1723
1724         cpu_synchronize_state(cs);
1725         /* env->nip is PC, so increment this by 4 to use
1726          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1727          */
1728         env->nip += 4;
1729         cs->exception_index = POWERPC_EXCP_PROGRAM;
1730         env->error_code = POWERPC_EXCP_INVAL;
1731         ppc_cpu_do_interrupt(cs);
1732     }
1733
1734     return handle;
1735 }
1736
1737 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1738 {
1739     PowerPCCPU *cpu = POWERPC_CPU(cs);
1740     CPUPPCState *env = &cpu->env;
1741     int ret;
1742
1743     qemu_mutex_lock_iothread();
1744
1745     switch (run->exit_reason) {
1746     case KVM_EXIT_DCR:
1747         if (run->dcr.is_write) {
1748             DPRINTF("handle dcr write\n");
1749             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1750         } else {
1751             DPRINTF("handle dcr read\n");
1752             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1753         }
1754         break;
1755     case KVM_EXIT_HLT:
1756         DPRINTF("handle halt\n");
1757         ret = kvmppc_handle_halt(cpu);
1758         break;
1759 #if defined(TARGET_PPC64)
1760     case KVM_EXIT_PAPR_HCALL:
1761         DPRINTF("handle PAPR hypercall\n");
1762         run->papr_hcall.ret = spapr_hypercall(cpu,
1763                                               run->papr_hcall.nr,
1764                                               run->papr_hcall.args);
1765         ret = 0;
1766         break;
1767 #endif
1768     case KVM_EXIT_EPR:
1769         DPRINTF("handle epr\n");
1770         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1771         ret = 0;
1772         break;
1773     case KVM_EXIT_WATCHDOG:
1774         DPRINTF("handle watchdog expiry\n");
1775         watchdog_perform_action();
1776         ret = 0;
1777         break;
1778
1779     case KVM_EXIT_DEBUG:
1780         DPRINTF("handle debug exception\n");
1781         if (kvm_handle_debug(cpu, run)) {
1782             ret = EXCP_DEBUG;
1783             break;
1784         }
1785         /* re-enter, this exception was guest-internal */
1786         ret = 0;
1787         break;
1788
1789     default:
1790         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1791         ret = -1;
1792         break;
1793     }
1794
1795     qemu_mutex_unlock_iothread();
1796     return ret;
1797 }
1798
1799 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1800 {
1801     CPUState *cs = CPU(cpu);
1802     uint32_t bits = tsr_bits;
1803     struct kvm_one_reg reg = {
1804         .id = KVM_REG_PPC_OR_TSR,
1805         .addr = (uintptr_t) &bits,
1806     };
1807
1808     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1809 }
1810
1811 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1812 {
1813
1814     CPUState *cs = CPU(cpu);
1815     uint32_t bits = tsr_bits;
1816     struct kvm_one_reg reg = {
1817         .id = KVM_REG_PPC_CLEAR_TSR,
1818         .addr = (uintptr_t) &bits,
1819     };
1820
1821     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1822 }
1823
1824 int kvmppc_set_tcr(PowerPCCPU *cpu)
1825 {
1826     CPUState *cs = CPU(cpu);
1827     CPUPPCState *env = &cpu->env;
1828     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1829
1830     struct kvm_one_reg reg = {
1831         .id = KVM_REG_PPC_TCR,
1832         .addr = (uintptr_t) &tcr,
1833     };
1834
1835     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1836 }
1837
1838 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1839 {
1840     CPUState *cs = CPU(cpu);
1841     int ret;
1842
1843     if (!kvm_enabled()) {
1844         return -1;
1845     }
1846
1847     if (!cap_ppc_watchdog) {
1848         printf("warning: KVM does not support watchdog");
1849         return -1;
1850     }
1851
1852     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1853     if (ret < 0) {
1854         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1855                 __func__, strerror(-ret));
1856         return ret;
1857     }
1858
1859     return ret;
1860 }
1861
1862 static int read_cpuinfo(const char *field, char *value, int len)
1863 {
1864     FILE *f;
1865     int ret = -1;
1866     int field_len = strlen(field);
1867     char line[512];
1868
1869     f = fopen("/proc/cpuinfo", "r");
1870     if (!f) {
1871         return -1;
1872     }
1873
1874     do {
1875         if (!fgets(line, sizeof(line), f)) {
1876             break;
1877         }
1878         if (!strncmp(line, field, field_len)) {
1879             pstrcpy(value, len, line);
1880             ret = 0;
1881             break;
1882         }
1883     } while(*line);
1884
1885     fclose(f);
1886
1887     return ret;
1888 }
1889
1890 uint32_t kvmppc_get_tbfreq(void)
1891 {
1892     char line[512];
1893     char *ns;
1894     uint32_t retval = NANOSECONDS_PER_SECOND;
1895
1896     if (read_cpuinfo("timebase", line, sizeof(line))) {
1897         return retval;
1898     }
1899
1900     if (!(ns = strchr(line, ':'))) {
1901         return retval;
1902     }
1903
1904     ns++;
1905
1906     return atoi(ns);
1907 }
1908
1909 bool kvmppc_get_host_serial(char **value)
1910 {
1911     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1912                                NULL);
1913 }
1914
1915 bool kvmppc_get_host_model(char **value)
1916 {
1917     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1918 }
1919
1920 /* Try to find a device tree node for a CPU with clock-frequency property */
1921 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1922 {
1923     struct dirent *dirp;
1924     DIR *dp;
1925
1926     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1927         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1928         return -1;
1929     }
1930
1931     buf[0] = '\0';
1932     while ((dirp = readdir(dp)) != NULL) {
1933         FILE *f;
1934         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1935                  dirp->d_name);
1936         f = fopen(buf, "r");
1937         if (f) {
1938             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1939             fclose(f);
1940             break;
1941         }
1942         buf[0] = '\0';
1943     }
1944     closedir(dp);
1945     if (buf[0] == '\0') {
1946         printf("Unknown host!\n");
1947         return -1;
1948     }
1949
1950     return 0;
1951 }
1952
1953 static uint64_t kvmppc_read_int_dt(const char *filename)
1954 {
1955     union {
1956         uint32_t v32;
1957         uint64_t v64;
1958     } u;
1959     FILE *f;
1960     int len;
1961
1962     f = fopen(filename, "rb");
1963     if (!f) {
1964         return -1;
1965     }
1966
1967     len = fread(&u, 1, sizeof(u), f);
1968     fclose(f);
1969     switch (len) {
1970     case 4:
1971         /* property is a 32-bit quantity */
1972         return be32_to_cpu(u.v32);
1973     case 8:
1974         return be64_to_cpu(u.v64);
1975     }
1976
1977     return 0;
1978 }
1979
1980 /* Read a CPU node property from the host device tree that's a single
1981  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1982  * (can't find or open the property, or doesn't understand the
1983  * format) */
1984 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1985 {
1986     char buf[PATH_MAX], *tmp;
1987     uint64_t val;
1988
1989     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1990         return -1;
1991     }
1992
1993     tmp = g_strdup_printf("%s/%s", buf, propname);
1994     val = kvmppc_read_int_dt(tmp);
1995     g_free(tmp);
1996
1997     return val;
1998 }
1999
2000 uint64_t kvmppc_get_clockfreq(void)
2001 {
2002     return kvmppc_read_int_cpu_dt("clock-frequency");
2003 }
2004
2005 uint32_t kvmppc_get_vmx(void)
2006 {
2007     return kvmppc_read_int_cpu_dt("ibm,vmx");
2008 }
2009
2010 uint32_t kvmppc_get_dfp(void)
2011 {
2012     return kvmppc_read_int_cpu_dt("ibm,dfp");
2013 }
2014
2015 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2016  {
2017      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2018      CPUState *cs = CPU(cpu);
2019
2020     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2021         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2022         return 0;
2023     }
2024
2025     return 1;
2026 }
2027
2028 int kvmppc_get_hasidle(CPUPPCState *env)
2029 {
2030     struct kvm_ppc_pvinfo pvinfo;
2031
2032     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2033         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2034         return 1;
2035     }
2036
2037     return 0;
2038 }
2039
2040 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2041 {
2042     uint32_t *hc = (uint32_t*)buf;
2043     struct kvm_ppc_pvinfo pvinfo;
2044
2045     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2046         memcpy(buf, pvinfo.hcall, buf_len);
2047         return 0;
2048     }
2049
2050     /*
2051      * Fallback to always fail hypercalls regardless of endianness:
2052      *
2053      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2054      *     li r3, -1
2055      *     b .+8       (becomes nop in wrong endian)
2056      *     bswap32(li r3, -1)
2057      */
2058
2059     hc[0] = cpu_to_be32(0x08000048);
2060     hc[1] = cpu_to_be32(0x3860ffff);
2061     hc[2] = cpu_to_be32(0x48000008);
2062     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2063
2064     return 1;
2065 }
2066
2067 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2068 {
2069     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2070 }
2071
2072 void kvmppc_enable_logical_ci_hcalls(void)
2073 {
2074     /*
2075      * FIXME: it would be nice if we could detect the cases where
2076      * we're using a device which requires the in kernel
2077      * implementation of these hcalls, but the kernel lacks them and
2078      * produce a warning.
2079      */
2080     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2081     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2082 }
2083
2084 void kvmppc_enable_set_mode_hcall(void)
2085 {
2086     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2087 }
2088
2089 void kvmppc_enable_clear_ref_mod_hcalls(void)
2090 {
2091     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2092     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2093 }
2094
2095 void kvmppc_set_papr(PowerPCCPU *cpu)
2096 {
2097     CPUState *cs = CPU(cpu);
2098     int ret;
2099
2100     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2101     if (ret) {
2102         error_report("This vCPU type or KVM version does not support PAPR");
2103         exit(1);
2104     }
2105
2106     /* Update the capability flag so we sync the right information
2107      * with kvm */
2108     cap_papr = 1;
2109 }
2110
2111 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t cpu_version)
2112 {
2113     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &cpu_version);
2114 }
2115
2116 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2117 {
2118     CPUState *cs = CPU(cpu);
2119     int ret;
2120
2121     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2122     if (ret && mpic_proxy) {
2123         error_report("This KVM version does not support EPR");
2124         exit(1);
2125     }
2126 }
2127
2128 int kvmppc_smt_threads(void)
2129 {
2130     return cap_ppc_smt ? cap_ppc_smt : 1;
2131 }
2132
2133 #ifdef TARGET_PPC64
2134 off_t kvmppc_alloc_rma(void **rma)
2135 {
2136     off_t size;
2137     int fd;
2138     struct kvm_allocate_rma ret;
2139
2140     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2141      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2142      *                      not necessary on this hardware
2143      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2144      *
2145      * FIXME: We should allow the user to force contiguous RMA
2146      * allocation in the cap_ppc_rma==1 case.
2147      */
2148     if (cap_ppc_rma < 2) {
2149         return 0;
2150     }
2151
2152     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2153     if (fd < 0) {
2154         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2155                 strerror(errno));
2156         return -1;
2157     }
2158
2159     size = MIN(ret.rma_size, 256ul << 20);
2160
2161     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2162     if (*rma == MAP_FAILED) {
2163         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2164         return -1;
2165     };
2166
2167     return size;
2168 }
2169
2170 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2171 {
2172     struct kvm_ppc_smmu_info info;
2173     long rampagesize, best_page_shift;
2174     int i;
2175
2176     if (cap_ppc_rma >= 2) {
2177         return current_size;
2178     }
2179
2180     /* Find the largest hardware supported page size that's less than
2181      * or equal to the (logical) backing page size of guest RAM */
2182     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2183     rampagesize = getrampagesize();
2184     best_page_shift = 0;
2185
2186     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2187         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2188
2189         if (!sps->page_shift) {
2190             continue;
2191         }
2192
2193         if ((sps->page_shift > best_page_shift)
2194             && ((1UL << sps->page_shift) <= rampagesize)) {
2195             best_page_shift = sps->page_shift;
2196         }
2197     }
2198
2199     return MIN(current_size,
2200                1ULL << (best_page_shift + hash_shift - 7));
2201 }
2202 #endif
2203
2204 bool kvmppc_spapr_use_multitce(void)
2205 {
2206     return cap_spapr_multitce;
2207 }
2208
2209 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2210                               bool need_vfio)
2211 {
2212     struct kvm_create_spapr_tce args = {
2213         .liobn = liobn,
2214         .window_size = window_size,
2215     };
2216     long len;
2217     int fd;
2218     void *table;
2219
2220     /* Must set fd to -1 so we don't try to munmap when called for
2221      * destroying the table, which the upper layers -will- do
2222      */
2223     *pfd = -1;
2224     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2225         return NULL;
2226     }
2227
2228     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2229     if (fd < 0) {
2230         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2231                 liobn);
2232         return NULL;
2233     }
2234
2235     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2236     /* FIXME: round this up to page size */
2237
2238     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2239     if (table == MAP_FAILED) {
2240         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2241                 liobn);
2242         close(fd);
2243         return NULL;
2244     }
2245
2246     *pfd = fd;
2247     return table;
2248 }
2249
2250 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2251 {
2252     long len;
2253
2254     if (fd < 0) {
2255         return -1;
2256     }
2257
2258     len = nb_table * sizeof(uint64_t);
2259     if ((munmap(table, len) < 0) ||
2260         (close(fd) < 0)) {
2261         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2262                 strerror(errno));
2263         /* Leak the table */
2264     }
2265
2266     return 0;
2267 }
2268
2269 int kvmppc_reset_htab(int shift_hint)
2270 {
2271     uint32_t shift = shift_hint;
2272
2273     if (!kvm_enabled()) {
2274         /* Full emulation, tell caller to allocate htab itself */
2275         return 0;
2276     }
2277     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2278         int ret;
2279         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2280         if (ret == -ENOTTY) {
2281             /* At least some versions of PR KVM advertise the
2282              * capability, but don't implement the ioctl().  Oops.
2283              * Return 0 so that we allocate the htab in qemu, as is
2284              * correct for PR. */
2285             return 0;
2286         } else if (ret < 0) {
2287             return ret;
2288         }
2289         return shift;
2290     }
2291
2292     /* We have a kernel that predates the htab reset calls.  For PR
2293      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2294      * this era, it has allocated a 16MB fixed size hash table already. */
2295     if (kvmppc_is_pr(kvm_state)) {
2296         /* PR - tell caller to allocate htab */
2297         return 0;
2298     } else {
2299         /* HV - assume 16MB kernel allocated htab */
2300         return 24;
2301     }
2302 }
2303
2304 static inline uint32_t mfpvr(void)
2305 {
2306     uint32_t pvr;
2307
2308     asm ("mfpvr %0"
2309          : "=r"(pvr));
2310     return pvr;
2311 }
2312
2313 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2314 {
2315     if (on) {
2316         *word |= flags;
2317     } else {
2318         *word &= ~flags;
2319     }
2320 }
2321
2322 static void kvmppc_host_cpu_initfn(Object *obj)
2323 {
2324     assert(kvm_enabled());
2325 }
2326
2327 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2328 {
2329     DeviceClass *dc = DEVICE_CLASS(oc);
2330     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2331     uint32_t vmx = kvmppc_get_vmx();
2332     uint32_t dfp = kvmppc_get_dfp();
2333     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2334     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2335
2336     /* Now fix up the class with information we can query from the host */
2337     pcc->pvr = mfpvr();
2338
2339     if (vmx != -1) {
2340         /* Only override when we know what the host supports */
2341         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2342         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2343     }
2344     if (dfp != -1) {
2345         /* Only override when we know what the host supports */
2346         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2347     }
2348
2349     if (dcache_size != -1) {
2350         pcc->l1_dcache_size = dcache_size;
2351     }
2352
2353     if (icache_size != -1) {
2354         pcc->l1_icache_size = icache_size;
2355     }
2356
2357     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2358     dc->cannot_destroy_with_object_finalize_yet = true;
2359 }
2360
2361 bool kvmppc_has_cap_epr(void)
2362 {
2363     return cap_epr;
2364 }
2365
2366 bool kvmppc_has_cap_htab_fd(void)
2367 {
2368     return cap_htab_fd;
2369 }
2370
2371 bool kvmppc_has_cap_fixup_hcalls(void)
2372 {
2373     return cap_fixup_hcalls;
2374 }
2375
2376 bool kvmppc_has_cap_htm(void)
2377 {
2378     return cap_htm;
2379 }
2380
2381 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2382 {
2383     ObjectClass *oc = OBJECT_CLASS(pcc);
2384
2385     while (oc && !object_class_is_abstract(oc)) {
2386         oc = object_class_get_parent(oc);
2387     }
2388     assert(oc);
2389
2390     return POWERPC_CPU_CLASS(oc);
2391 }
2392
2393 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2394 {
2395     uint32_t host_pvr = mfpvr();
2396     PowerPCCPUClass *pvr_pcc;
2397
2398     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2399     if (pvr_pcc == NULL) {
2400         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2401     }
2402
2403     return pvr_pcc;
2404 }
2405
2406 static int kvm_ppc_register_host_cpu_type(void)
2407 {
2408     TypeInfo type_info = {
2409         .name = TYPE_HOST_POWERPC_CPU,
2410         .instance_init = kvmppc_host_cpu_initfn,
2411         .class_init = kvmppc_host_cpu_class_init,
2412     };
2413     PowerPCCPUClass *pvr_pcc;
2414     DeviceClass *dc;
2415
2416     pvr_pcc = kvm_ppc_get_host_cpu_class();
2417     if (pvr_pcc == NULL) {
2418         return -1;
2419     }
2420     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2421     type_register(&type_info);
2422
2423     /* Register generic family CPU class for a family */
2424     pvr_pcc = ppc_cpu_get_family_class(pvr_pcc);
2425     dc = DEVICE_CLASS(pvr_pcc);
2426     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2427     type_info.name = g_strdup_printf("%s-"TYPE_POWERPC_CPU, dc->desc);
2428     type_register(&type_info);
2429
2430 #if defined(TARGET_PPC64)
2431     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2432     type_info.parent = TYPE_SPAPR_CPU_CORE,
2433     type_info.instance_size = sizeof(sPAPRCPUCore);
2434     type_info.instance_init = NULL;
2435     type_info.class_init = spapr_cpu_core_class_init;
2436     type_info.class_data = (void *) "host";
2437     type_register(&type_info);
2438     g_free((void *)type_info.name);
2439
2440     /* Register generic spapr CPU family class for current host CPU type */
2441     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, dc->desc);
2442     type_info.class_data = (void *) dc->desc;
2443     type_register(&type_info);
2444     g_free((void *)type_info.name);
2445 #endif
2446
2447     return 0;
2448 }
2449
2450 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2451 {
2452     struct kvm_rtas_token_args args = {
2453         .token = token,
2454     };
2455
2456     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2457         return -ENOENT;
2458     }
2459
2460     strncpy(args.name, function, sizeof(args.name));
2461
2462     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2463 }
2464
2465 int kvmppc_get_htab_fd(bool write)
2466 {
2467     struct kvm_get_htab_fd s = {
2468         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2469         .start_index = 0,
2470     };
2471
2472     if (!cap_htab_fd) {
2473         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2474         return -1;
2475     }
2476
2477     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2478 }
2479
2480 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2481 {
2482     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2483     uint8_t buf[bufsize];
2484     ssize_t rc;
2485
2486     do {
2487         rc = read(fd, buf, bufsize);
2488         if (rc < 0) {
2489             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2490                     strerror(errno));
2491             return rc;
2492         } else if (rc) {
2493             uint8_t *buffer = buf;
2494             ssize_t n = rc;
2495             while (n) {
2496                 struct kvm_get_htab_header *head =
2497                     (struct kvm_get_htab_header *) buffer;
2498                 size_t chunksize = sizeof(*head) +
2499                      HASH_PTE_SIZE_64 * head->n_valid;
2500
2501                 qemu_put_be32(f, head->index);
2502                 qemu_put_be16(f, head->n_valid);
2503                 qemu_put_be16(f, head->n_invalid);
2504                 qemu_put_buffer(f, (void *)(head + 1),
2505                                 HASH_PTE_SIZE_64 * head->n_valid);
2506
2507                 buffer += chunksize;
2508                 n -= chunksize;
2509             }
2510         }
2511     } while ((rc != 0)
2512              && ((max_ns < 0)
2513                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2514
2515     return (rc == 0) ? 1 : 0;
2516 }
2517
2518 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2519                            uint16_t n_valid, uint16_t n_invalid)
2520 {
2521     struct kvm_get_htab_header *buf;
2522     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2523     ssize_t rc;
2524
2525     buf = alloca(chunksize);
2526     buf->index = index;
2527     buf->n_valid = n_valid;
2528     buf->n_invalid = n_invalid;
2529
2530     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2531
2532     rc = write(fd, buf, chunksize);
2533     if (rc < 0) {
2534         fprintf(stderr, "Error writing KVM hash table: %s\n",
2535                 strerror(errno));
2536         return rc;
2537     }
2538     if (rc != chunksize) {
2539         /* We should never get a short write on a single chunk */
2540         fprintf(stderr, "Short write, restoring KVM hash table\n");
2541         return -1;
2542     }
2543     return 0;
2544 }
2545
2546 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2547 {
2548     return true;
2549 }
2550
2551 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2552 {
2553     return 1;
2554 }
2555
2556 int kvm_arch_on_sigbus(int code, void *addr)
2557 {
2558     return 1;
2559 }
2560
2561 void kvm_arch_init_irq_routing(KVMState *s)
2562 {
2563 }
2564
2565 struct kvm_get_htab_buf {
2566     struct kvm_get_htab_header header;
2567     /*
2568      * We require one extra byte for read
2569      */
2570     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2571 };
2572
2573 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2574 {
2575     int htab_fd;
2576     struct kvm_get_htab_fd ghf;
2577     struct kvm_get_htab_buf  *hpte_buf;
2578
2579     ghf.flags = 0;
2580     ghf.start_index = pte_index;
2581     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2582     if (htab_fd < 0) {
2583         goto error_out;
2584     }
2585
2586     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2587     /*
2588      * Read the hpte group
2589      */
2590     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2591         goto out_close;
2592     }
2593
2594     close(htab_fd);
2595     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2596
2597 out_close:
2598     g_free(hpte_buf);
2599     close(htab_fd);
2600 error_out:
2601     return 0;
2602 }
2603
2604 void kvmppc_hash64_free_pteg(uint64_t token)
2605 {
2606     struct kvm_get_htab_buf *htab_buf;
2607
2608     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2609                             hpte);
2610     g_free(htab_buf);
2611     return;
2612 }
2613
2614 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2615                              target_ulong pte0, target_ulong pte1)
2616 {
2617     int htab_fd;
2618     struct kvm_get_htab_fd ghf;
2619     struct kvm_get_htab_buf hpte_buf;
2620
2621     ghf.flags = 0;
2622     ghf.start_index = 0;     /* Ignored */
2623     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2624     if (htab_fd < 0) {
2625         goto error_out;
2626     }
2627
2628     hpte_buf.header.n_valid = 1;
2629     hpte_buf.header.n_invalid = 0;
2630     hpte_buf.header.index = pte_index;
2631     hpte_buf.hpte[0] = pte0;
2632     hpte_buf.hpte[1] = pte1;
2633     /*
2634      * Write the hpte entry.
2635      * CAUTION: write() has the warn_unused_result attribute. Hence we
2636      * need to check the return value, even though we do nothing.
2637      */
2638     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2639         goto out_close;
2640     }
2641
2642 out_close:
2643     close(htab_fd);
2644     return;
2645
2646 error_out:
2647     return;
2648 }
2649
2650 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2651                              uint64_t address, uint32_t data, PCIDevice *dev)
2652 {
2653     return 0;
2654 }
2655
2656 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2657                                 int vector, PCIDevice *dev)
2658 {
2659     return 0;
2660 }
2661
2662 int kvm_arch_release_virq_post(int virq)
2663 {
2664     return 0;
2665 }
2666
2667 int kvm_arch_msi_data_to_gsi(uint32_t data)
2668 {
2669     return data & 0xffff;
2670 }
2671
2672 int kvmppc_enable_hwrng(void)
2673 {
2674     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2675         return -1;
2676     }
2677
2678     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2679 }