target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "cpu-models.h"
  28 #include "qemu/timer.h"
  29 #include "sysemu/sysemu.h"
  30 #include "sysemu/hw_accel.h"
  31 #include "sysemu/numa.h"
  32 #include "kvm_ppc.h"
  33 #include "sysemu/cpus.h"
  34 #include "sysemu/device_tree.h"
  35 #include "mmu-hash64.h"
  36
  37 #include "hw/sysbus.h"
  38 #include "hw/ppc/spapr.h"
  39 #include "hw/ppc/spapr_vio.h"
  40 #include "hw/ppc/spapr_cpu_core.h"
  41 #include "hw/ppc/ppc.h"
  42 #include "sysemu/watchdog.h"
  43 #include "trace.h"
  44 #include "exec/gdbstub.h"
  45 #include "exec/memattrs.h"
  46 #include "sysemu/hostmem.h"
  47 #include "qemu/cutils.h"
  48 #if defined(TARGET_PPC64)
  49 #include "hw/ppc/spapr_cpu_core.h"
  50 #endif
  51
  52 //#define DEBUG_KVM
  53
  54 #ifdef DEBUG_KVM
  55 #define DPRINTF(fmt, ...) \
  56     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  57 #else
  58 #define DPRINTF(fmt, ...) \
  59     do { } while (0)
  60 #endif
  61
  62 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  63
  64 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  65     KVM_CAP_LAST_INFO
  66 };
  67
  68 static int cap_interrupt_unset = false;
  69 static int cap_interrupt_level = false;
  70 static int cap_segstate;
  71 static int cap_booke_sregs;
  72 static int cap_ppc_smt;
  73 static int cap_ppc_rma;
  74 static int cap_spapr_tce;
  75 static int cap_spapr_multitce;
  76 static int cap_spapr_vfio;
  77 static int cap_hior;
  78 static int cap_one_reg;
  79 static int cap_epr;
  80 static int cap_ppc_watchdog;
  81 static int cap_papr;
  82 static int cap_htab_fd;
  83 static int cap_fixup_hcalls;
  84 static int cap_htm;             /* Hardware transactional memory support */
  85
  86 static uint32_t debug_inst_opcode;
  87
  88 /* XXX We have a race condition where we actually have a level triggered
  89  *     interrupt, but the infrastructure can't expose that yet, so the guest
  90  *     takes but ignores it, goes to sleep and never gets notified that there's
  91  *     still an interrupt pending.
  92  *
  93  *     As a quick workaround, let's just wake up again 20 ms after we injected
  94  *     an interrupt. That way we can assure that we're always reinjecting
  95  *     interrupts in case the guest swallowed them.
  96  */
  97 static QEMUTimer *idle_timer;
  98
  99 static void kvm_kick_cpu(void *opaque)
 100 {
 101     PowerPCCPU *cpu = opaque;
 102
 103     qemu_cpu_kick(CPU(cpu));
 104 }
 105
 106 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 107  * should only be used for fallback tests - generally we should use
 108  * explicit capabilities for the features we want, rather than
 109  * assuming what is/isn't available depending on the KVM variant. */
 110 static bool kvmppc_is_pr(KVMState *ks)
 111 {
 112     /* Assume KVM-PR if the GET_PVINFO capability is available */
 113     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 114 }
 115
 116 static int kvm_ppc_register_host_cpu_type(void);
 117
 118 int kvm_arch_init(MachineState *ms, KVMState *s)
 119 {
 120     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 121     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 122     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 123     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 124     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 125     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 126     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 127     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 128     cap_spapr_vfio = false;
 129     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 130     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 131     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 132     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 133     /* Note: we don't set cap_papr here, because this capability is
 134      * only activated after this by kvmppc_set_papr() */
 135     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 136     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 137     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 138
 139     if (!cap_interrupt_level) {
 140         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 141                         "VM to stall at times!\n");
 142     }
 143
 144     kvm_ppc_register_host_cpu_type();
 145
 146     return 0;
 147 }
 148
 149 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 150 {
 151     return 0;
 152 }
 153
 154 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 155 {
 156     CPUPPCState *cenv = &cpu->env;
 157     CPUState *cs = CPU(cpu);
 158     struct kvm_sregs sregs;
 159     int ret;
 160
 161     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 162         /* What we're really trying to say is "if we're on BookE, we use
 163            the native PVR for now". This is the only sane way to check
 164            it though, so we potentially confuse users that they can run
 165            BookE guests on BookS. Let's hope nobody dares enough :) */
 166         return 0;
 167     } else {
 168         if (!cap_segstate) {
 169             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 170             return -ENOSYS;
 171         }
 172     }
 173
 174     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 175     if (ret) {
 176         return ret;
 177     }
 178
 179     sregs.pvr = cenv->spr[SPR_PVR];
 180     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 181 }
 182
 183 /* Set up a shared TLB array with KVM */
 184 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 185 {
 186     CPUPPCState *env = &cpu->env;
 187     CPUState *cs = CPU(cpu);
 188     struct kvm_book3e_206_tlb_params params = {};
 189     struct kvm_config_tlb cfg = {};
 190     unsigned int entries = 0;
 191     int ret, i;
 192
 193     if (!kvm_enabled() ||
 194         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 195         return 0;
 196     }
 197
 198     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 199
 200     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 201         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 202         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 203         entries += params.tlb_sizes[i];
 204     }
 205
 206     assert(entries == env->nb_tlb);
 207     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 208
 209     env->tlb_dirty = true;
 210
 211     cfg.array = (uintptr_t)env->tlb.tlbm;
 212     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 213     cfg.params = (uintptr_t)&params;
 214     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 215
 216     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 217     if (ret < 0) {
 218         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 219                 __func__, strerror(-ret));
 220         return ret;
 221     }
 222
 223     env->kvm_sw_tlb = true;
 224     return 0;
 225 }
 226
 227
 228 #if defined(TARGET_PPC64)
 229 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 230                                        struct kvm_ppc_smmu_info *info)
 231 {
 232     CPUPPCState *env = &cpu->env;
 233     CPUState *cs = CPU(cpu);
 234
 235     memset(info, 0, sizeof(*info));
 236
 237     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 238      * need to "guess" what the supported page sizes are.
 239      *
 240      * For that to work we make a few assumptions:
 241      *
 242      * - Check whether we are running "PR" KVM which only supports 4K
 243      *   and 16M pages, but supports them regardless of the backing
 244      *   store characteritics. We also don't support 1T segments.
 245      *
 246      *   This is safe as if HV KVM ever supports that capability or PR
 247      *   KVM grows supports for more page/segment sizes, those versions
 248      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 249      *   will not hit this fallback
 250      *
 251      * - Else we are running HV KVM. This means we only support page
 252      *   sizes that fit in the backing store. Additionally we only
 253      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 254      *   P7 encodings for the SLB and hash table. Here too, we assume
 255      *   support for any newer processor will mean a kernel that
 256      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 257      *   this fallback.
 258      */
 259     if (kvmppc_is_pr(cs->kvm_state)) {
 260         /* No flags */
 261         info->flags = 0;
 262         info->slb_size = 64;
 263
 264         /* Standard 4k base page size segment */
 265         info->sps[0].page_shift = 12;
 266         info->sps[0].slb_enc = 0;
 267         info->sps[0].enc[0].page_shift = 12;
 268         info->sps[0].enc[0].pte_enc = 0;
 269
 270         /* Standard 16M large page size segment */
 271         info->sps[1].page_shift = 24;
 272         info->sps[1].slb_enc = SLB_VSID_L;
 273         info->sps[1].enc[0].page_shift = 24;
 274         info->sps[1].enc[0].pte_enc = 0;
 275     } else {
 276         int i = 0;
 277
 278         /* HV KVM has backing store size restrictions */
 279         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 280
 281         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 282             info->flags |= KVM_PPC_1T_SEGMENTS;
 283         }
 284
 285         if (env->mmu_model == POWERPC_MMU_2_06 ||
 286             env->mmu_model == POWERPC_MMU_2_07) {
 287             info->slb_size = 32;
 288         } else {
 289             info->slb_size = 64;
 290         }
 291
 292         /* Standard 4k base page size segment */
 293         info->sps[i].page_shift = 12;
 294         info->sps[i].slb_enc = 0;
 295         info->sps[i].enc[0].page_shift = 12;
 296         info->sps[i].enc[0].pte_enc = 0;
 297         i++;
 298
 299         /* 64K on MMU 2.06 and later */
 300         if (env->mmu_model == POWERPC_MMU_2_06 ||
 301             env->mmu_model == POWERPC_MMU_2_07) {
 302             info->sps[i].page_shift = 16;
 303             info->sps[i].slb_enc = 0x110;
 304             info->sps[i].enc[0].page_shift = 16;
 305             info->sps[i].enc[0].pte_enc = 1;
 306             i++;
 307         }
 308
 309         /* Standard 16M large page size segment */
 310         info->sps[i].page_shift = 24;
 311         info->sps[i].slb_enc = SLB_VSID_L;
 312         info->sps[i].enc[0].page_shift = 24;
 313         info->sps[i].enc[0].pte_enc = 0;
 314     }
 315 }
 316
 317 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 318 {
 319     CPUState *cs = CPU(cpu);
 320     int ret;
 321
 322     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 323         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 324         if (ret == 0) {
 325             return;
 326         }
 327     }
 328
 329     kvm_get_fallback_smmu_info(cpu, info);
 330 }
 331
 332 static long gethugepagesize(const char *mem_path)
 333 {
 334     struct statfs fs;
 335     int ret;
 336
 337     do {
 338         ret = statfs(mem_path, &fs);
 339     } while (ret != 0 && errno == EINTR);
 340
 341     if (ret != 0) {
 342         fprintf(stderr, "Couldn't statfs() memory path: %s\n",
 343                 strerror(errno));
 344         exit(1);
 345     }
 346
 347 #define HUGETLBFS_MAGIC       0x958458f6
 348
 349     if (fs.f_type != HUGETLBFS_MAGIC) {
 350         /* Explicit mempath, but it's ordinary pages */
 351         return getpagesize();
 352     }
 353
 354     /* It's hugepage, return the huge page size */
 355     return fs.f_bsize;
 356 }
 357
 358 /*
 359  * FIXME TOCTTOU: this iterates over memory backends' mem-path, which
 360  * may or may not name the same files / on the same filesystem now as
 361  * when we actually open and map them.  Iterate over the file
 362  * descriptors instead, and use qemu_fd_getpagesize().
 363  */
 364 static int find_max_supported_pagesize(Object *obj, void *opaque)
 365 {
 366     char *mem_path;
 367     long *hpsize_min = opaque;
 368
 369     if (object_dynamic_cast(obj, TYPE_MEMORY_BACKEND)) {
 370         mem_path = object_property_get_str(obj, "mem-path", NULL);
 371         if (mem_path) {
 372             long hpsize = gethugepagesize(mem_path);
 373             if (hpsize < *hpsize_min) {
 374                 *hpsize_min = hpsize;
 375             }
 376         } else {
 377             *hpsize_min = getpagesize();
 378         }
 379     }
 380
 381     return 0;
 382 }
 383
 384 static long getrampagesize(void)
 385 {
 386     long hpsize = LONG_MAX;
 387     long mainrampagesize;
 388     Object *memdev_root;
 389
 390     if (mem_path) {
 391         mainrampagesize = gethugepagesize(mem_path);
 392     } else {
 393         mainrampagesize = getpagesize();
 394     }
 395
 396     /* it's possible we have memory-backend objects with
 397      * hugepage-backed RAM. these may get mapped into system
 398      * address space via -numa parameters or memory hotplug
 399      * hooks. we want to take these into account, but we
 400      * also want to make sure these supported hugepage
 401      * sizes are applicable across the entire range of memory
 402      * we may boot from, so we take the min across all
 403      * backends, and assume normal pages in cases where a
 404      * backend isn't backed by hugepages.
 405      */
 406     memdev_root = object_resolve_path("/objects", NULL);
 407     if (memdev_root) {
 408         object_child_foreach(memdev_root, find_max_supported_pagesize, &hpsize);
 409     }
 410     if (hpsize == LONG_MAX) {
 411         /* No additional memory regions found ==> Report main RAM page size */
 412         return mainrampagesize;
 413     }
 414
 415     /* If NUMA is disabled or the NUMA nodes are not backed with a
 416      * memory-backend, then there is at least one node using "normal" RAM,
 417      * so if its page size is smaller we have got to report that size instead.
 418      */
 419     if (hpsize > mainrampagesize &&
 420         (nb_numa_nodes == 0 || numa_info[0].node_memdev == NULL)) {
 421         static bool warned;
 422         if (!warned) {
 423             error_report("Huge page support disabled (n/a for main memory).");
 424             warned = true;
 425         }
 426         return mainrampagesize;
 427     }
 428
 429     return hpsize;
 430 }
 431
 432 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 433 {
 434     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 435         return true;
 436     }
 437
 438     return (1ul << shift) <= rampgsize;
 439 }
 440
 441 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 442 {
 443     static struct kvm_ppc_smmu_info smmu_info;
 444     static bool has_smmu_info;
 445     CPUPPCState *env = &cpu->env;
 446     long rampagesize;
 447     int iq, ik, jq, jk;
 448     bool has_64k_pages = false;
 449
 450     /* We only handle page sizes for 64-bit server guests for now */
 451     if (!(env->mmu_model & POWERPC_MMU_64)) {
 452         return;
 453     }
 454
 455     /* Collect MMU info from kernel if not already */
 456     if (!has_smmu_info) {
 457         kvm_get_smmu_info(cpu, &smmu_info);
 458         has_smmu_info = true;
 459     }
 460
 461     rampagesize = getrampagesize();
 462
 463     /* Convert to QEMU form */
 464     memset(&env->sps, 0, sizeof(env->sps));
 465
 466     /* If we have HV KVM, we need to forbid CI large pages if our
 467      * host page size is smaller than 64K.
 468      */
 469     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 470         env->ci_large_pages = getpagesize() >= 0x10000;
 471     }
 472
 473     /*
 474      * XXX This loop should be an entry wide AND of the capabilities that
 475      *     the selected CPU has with the capabilities that KVM supports.
 476      */
 477     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 478         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 479         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 480
 481         if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 482                                  ksps->page_shift)) {
 483             continue;
 484         }
 485         qsps->page_shift = ksps->page_shift;
 486         qsps->slb_enc = ksps->slb_enc;
 487         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 488             if (!kvm_valid_page_size(smmu_info.flags, rampagesize,
 489                                      ksps->enc[jk].page_shift)) {
 490                 continue;
 491             }
 492             if (ksps->enc[jk].page_shift == 16) {
 493                 has_64k_pages = true;
 494             }
 495             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 496             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 497             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 498                 break;
 499             }
 500         }
 501         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 502             break;
 503         }
 504     }
 505     env->slb_nr = smmu_info.slb_size;
 506     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 507         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 508     }
 509     if (!has_64k_pages) {
 510         env->mmu_model &= ~POWERPC_MMU_64K;
 511     }
 512 }
 513 #else /* defined (TARGET_PPC64) */
 514
 515 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 516 {
 517 }
 518
 519 #endif /* !defined (TARGET_PPC64) */
 520
 521 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 522 {
 523     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 524 }
 525
 526 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 527  * book3s supports only 1 watchpoint, so array size
 528  * of 4 is sufficient for now.
 529  */
 530 #define MAX_HW_BKPTS 4
 531
 532 static struct HWBreakpoint {
 533     target_ulong addr;
 534     int type;
 535 } hw_debug_points[MAX_HW_BKPTS];
 536
 537 static CPUWatchpoint hw_watchpoint;
 538
 539 /* Default there is no breakpoint and watchpoint supported */
 540 static int max_hw_breakpoint;
 541 static int max_hw_watchpoint;
 542 static int nb_hw_breakpoint;
 543 static int nb_hw_watchpoint;
 544
 545 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 546 {
 547     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 548         max_hw_breakpoint = 2;
 549         max_hw_watchpoint = 2;
 550     }
 551
 552     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 553         fprintf(stderr, "Error initializing h/w breakpoints\n");
 554         return;
 555     }
 556 }
 557
 558 int kvm_arch_init_vcpu(CPUState *cs)
 559 {
 560     PowerPCCPU *cpu = POWERPC_CPU(cs);
 561     CPUPPCState *cenv = &cpu->env;
 562     int ret;
 563
 564     /* Gather server mmu info from KVM and update the CPU state */
 565     kvm_fixup_page_sizes(cpu);
 566
 567     /* Synchronize sregs with kvm */
 568     ret = kvm_arch_sync_sregs(cpu);
 569     if (ret) {
 570         if (ret == -EINVAL) {
 571             error_report("Register sync failed... If you're using kvm-hv.ko,"
 572                          " only \"-cpu host\" is possible");
 573         }
 574         return ret;
 575     }
 576
 577     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 578
 579     switch (cenv->mmu_model) {
 580     case POWERPC_MMU_BOOKE206:
 581         /* This target supports access to KVM's guest TLB */
 582         ret = kvm_booke206_tlb_init(cpu);
 583         break;
 584     case POWERPC_MMU_2_07:
 585         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 586             /* KVM-HV has transactional memory on POWER8 also without the
 587              * KVM_CAP_PPC_HTM extension, so enable it here instead. */
 588             cap_htm = true;
 589         }
 590         break;
 591     default:
 592         break;
 593     }
 594
 595     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 596     kvmppc_hw_debug_points_init(cenv);
 597
 598     return ret;
 599 }
 600
 601 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 602 {
 603     CPUPPCState *env = &cpu->env;
 604     CPUState *cs = CPU(cpu);
 605     struct kvm_dirty_tlb dirty_tlb;
 606     unsigned char *bitmap;
 607     int ret;
 608
 609     if (!env->kvm_sw_tlb) {
 610         return;
 611     }
 612
 613     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 614     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 615
 616     dirty_tlb.bitmap = (uintptr_t)bitmap;
 617     dirty_tlb.num_dirty = env->nb_tlb;
 618
 619     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 620     if (ret) {
 621         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 622                 __func__, strerror(-ret));
 623     }
 624
 625     g_free(bitmap);
 626 }
 627
 628 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 629 {
 630     PowerPCCPU *cpu = POWERPC_CPU(cs);
 631     CPUPPCState *env = &cpu->env;
 632     union {
 633         uint32_t u32;
 634         uint64_t u64;
 635     } val;
 636     struct kvm_one_reg reg = {
 637         .id = id,
 638         .addr = (uintptr_t) &val,
 639     };
 640     int ret;
 641
 642     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 643     if (ret != 0) {
 644         trace_kvm_failed_spr_get(spr, strerror(errno));
 645     } else {
 646         switch (id & KVM_REG_SIZE_MASK) {
 647         case KVM_REG_SIZE_U32:
 648             env->spr[spr] = val.u32;
 649             break;
 650
 651         case KVM_REG_SIZE_U64:
 652             env->spr[spr] = val.u64;
 653             break;
 654
 655         default:
 656             /* Don't handle this size yet */
 657             abort();
 658         }
 659     }
 660 }
 661
 662 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 663 {
 664     PowerPCCPU *cpu = POWERPC_CPU(cs);
 665     CPUPPCState *env = &cpu->env;
 666     union {
 667         uint32_t u32;
 668         uint64_t u64;
 669     } val;
 670     struct kvm_one_reg reg = {
 671         .id = id,
 672         .addr = (uintptr_t) &val,
 673     };
 674     int ret;
 675
 676     switch (id & KVM_REG_SIZE_MASK) {
 677     case KVM_REG_SIZE_U32:
 678         val.u32 = env->spr[spr];
 679         break;
 680
 681     case KVM_REG_SIZE_U64:
 682         val.u64 = env->spr[spr];
 683         break;
 684
 685     default:
 686         /* Don't handle this size yet */
 687         abort();
 688     }
 689
 690     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 691     if (ret != 0) {
 692         trace_kvm_failed_spr_set(spr, strerror(errno));
 693     }
 694 }
 695
 696 static int kvm_put_fp(CPUState *cs)
 697 {
 698     PowerPCCPU *cpu = POWERPC_CPU(cs);
 699     CPUPPCState *env = &cpu->env;
 700     struct kvm_one_reg reg;
 701     int i;
 702     int ret;
 703
 704     if (env->insns_flags & PPC_FLOAT) {
 705         uint64_t fpscr = env->fpscr;
 706         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 707
 708         reg.id = KVM_REG_PPC_FPSCR;
 709         reg.addr = (uintptr_t)&fpscr;
 710         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 711         if (ret < 0) {
 712             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 713             return ret;
 714         }
 715
 716         for (i = 0; i < 32; i++) {
 717             uint64_t vsr[2];
 718
 719 #ifdef HOST_WORDS_BIGENDIAN
 720             vsr[0] = float64_val(env->fpr[i]);
 721             vsr[1] = env->vsr[i];
 722 #else
 723             vsr[0] = env->vsr[i];
 724             vsr[1] = float64_val(env->fpr[i]);
 725 #endif
 726             reg.addr = (uintptr_t) &vsr;
 727             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 728
 729             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 730             if (ret < 0) {
 731                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 732                         i, strerror(errno));
 733                 return ret;
 734             }
 735         }
 736     }
 737
 738     if (env->insns_flags & PPC_ALTIVEC) {
 739         reg.id = KVM_REG_PPC_VSCR;
 740         reg.addr = (uintptr_t)&env->vscr;
 741         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 742         if (ret < 0) {
 743             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 744             return ret;
 745         }
 746
 747         for (i = 0; i < 32; i++) {
 748             reg.id = KVM_REG_PPC_VR(i);
 749             reg.addr = (uintptr_t)&env->avr[i];
 750             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 751             if (ret < 0) {
 752                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 753                 return ret;
 754             }
 755         }
 756     }
 757
 758     return 0;
 759 }
 760
 761 static int kvm_get_fp(CPUState *cs)
 762 {
 763     PowerPCCPU *cpu = POWERPC_CPU(cs);
 764     CPUPPCState *env = &cpu->env;
 765     struct kvm_one_reg reg;
 766     int i;
 767     int ret;
 768
 769     if (env->insns_flags & PPC_FLOAT) {
 770         uint64_t fpscr;
 771         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 772
 773         reg.id = KVM_REG_PPC_FPSCR;
 774         reg.addr = (uintptr_t)&fpscr;
 775         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 776         if (ret < 0) {
 777             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 778             return ret;
 779         } else {
 780             env->fpscr = fpscr;
 781         }
 782
 783         for (i = 0; i < 32; i++) {
 784             uint64_t vsr[2];
 785
 786             reg.addr = (uintptr_t) &vsr;
 787             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 788
 789             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 790             if (ret < 0) {
 791                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 792                         vsx ? "VSR" : "FPR", i, strerror(errno));
 793                 return ret;
 794             } else {
 795 #ifdef HOST_WORDS_BIGENDIAN
 796                 env->fpr[i] = vsr[0];
 797                 if (vsx) {
 798                     env->vsr[i] = vsr[1];
 799                 }
 800 #else
 801                 env->fpr[i] = vsr[1];
 802                 if (vsx) {
 803                     env->vsr[i] = vsr[0];
 804                 }
 805 #endif
 806             }
 807         }
 808     }
 809
 810     if (env->insns_flags & PPC_ALTIVEC) {
 811         reg.id = KVM_REG_PPC_VSCR;
 812         reg.addr = (uintptr_t)&env->vscr;
 813         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 814         if (ret < 0) {
 815             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 816             return ret;
 817         }
 818
 819         for (i = 0; i < 32; i++) {
 820             reg.id = KVM_REG_PPC_VR(i);
 821             reg.addr = (uintptr_t)&env->avr[i];
 822             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 823             if (ret < 0) {
 824                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 825                         i, strerror(errno));
 826                 return ret;
 827             }
 828         }
 829     }
 830
 831     return 0;
 832 }
 833
 834 #if defined(TARGET_PPC64)
 835 static int kvm_get_vpa(CPUState *cs)
 836 {
 837     PowerPCCPU *cpu = POWERPC_CPU(cs);
 838     CPUPPCState *env = &cpu->env;
 839     struct kvm_one_reg reg;
 840     int ret;
 841
 842     reg.id = KVM_REG_PPC_VPA_ADDR;
 843     reg.addr = (uintptr_t)&env->vpa_addr;
 844     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 845     if (ret < 0) {
 846         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 847         return ret;
 848     }
 849
 850     assert((uintptr_t)&env->slb_shadow_size
 851            == ((uintptr_t)&env->slb_shadow_addr + 8));
 852     reg.id = KVM_REG_PPC_VPA_SLB;
 853     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 854     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 855     if (ret < 0) {
 856         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 857                 strerror(errno));
 858         return ret;
 859     }
 860
 861     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 862     reg.id = KVM_REG_PPC_VPA_DTL;
 863     reg.addr = (uintptr_t)&env->dtl_addr;
 864     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 865     if (ret < 0) {
 866         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 867                 strerror(errno));
 868         return ret;
 869     }
 870
 871     return 0;
 872 }
 873
 874 static int kvm_put_vpa(CPUState *cs)
 875 {
 876     PowerPCCPU *cpu = POWERPC_CPU(cs);
 877     CPUPPCState *env = &cpu->env;
 878     struct kvm_one_reg reg;
 879     int ret;
 880
 881     /* SLB shadow or DTL can't be registered unless a master VPA is
 882      * registered.  That means when restoring state, if a VPA *is*
 883      * registered, we need to set that up first.  If not, we need to
 884      * deregister the others before deregistering the master VPA */
 885     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 886
 887     if (env->vpa_addr) {
 888         reg.id = KVM_REG_PPC_VPA_ADDR;
 889         reg.addr = (uintptr_t)&env->vpa_addr;
 890         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 891         if (ret < 0) {
 892             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 893             return ret;
 894         }
 895     }
 896
 897     assert((uintptr_t)&env->slb_shadow_size
 898            == ((uintptr_t)&env->slb_shadow_addr + 8));
 899     reg.id = KVM_REG_PPC_VPA_SLB;
 900     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 901     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 902     if (ret < 0) {
 903         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 904         return ret;
 905     }
 906
 907     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 908     reg.id = KVM_REG_PPC_VPA_DTL;
 909     reg.addr = (uintptr_t)&env->dtl_addr;
 910     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 911     if (ret < 0) {
 912         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 913                 strerror(errno));
 914         return ret;
 915     }
 916
 917     if (!env->vpa_addr) {
 918         reg.id = KVM_REG_PPC_VPA_ADDR;
 919         reg.addr = (uintptr_t)&env->vpa_addr;
 920         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 921         if (ret < 0) {
 922             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 923             return ret;
 924         }
 925     }
 926
 927     return 0;
 928 }
 929 #endif /* TARGET_PPC64 */
 930
 931 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 932 {
 933     CPUPPCState *env = &cpu->env;
 934     struct kvm_sregs sregs;
 935     int i;
 936
 937     sregs.pvr = env->spr[SPR_PVR];
 938
 939     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 940
 941     /* Sync SLB */
 942 #ifdef TARGET_PPC64
 943     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 944         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 945         if (env->slb[i].esid & SLB_ESID_V) {
 946             sregs.u.s.ppc64.slb[i].slbe |= i;
 947         }
 948         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 949     }
 950 #endif
 951
 952     /* Sync SRs */
 953     for (i = 0; i < 16; i++) {
 954         sregs.u.s.ppc32.sr[i] = env->sr[i];
 955     }
 956
 957     /* Sync BATs */
 958     for (i = 0; i < 8; i++) {
 959         /* Beware. We have to swap upper and lower bits here */
 960         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 961             | env->DBAT[1][i];
 962         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 963             | env->IBAT[1][i];
 964     }
 965
 966     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 967 }
 968
 969 int kvm_arch_put_registers(CPUState *cs, int level)
 970 {
 971     PowerPCCPU *cpu = POWERPC_CPU(cs);
 972     CPUPPCState *env = &cpu->env;
 973     struct kvm_regs regs;
 974     int ret;
 975     int i;
 976
 977     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 978     if (ret < 0) {
 979         return ret;
 980     }
 981
 982     regs.ctr = env->ctr;
 983     regs.lr  = env->lr;
 984     regs.xer = cpu_read_xer(env);
 985     regs.msr = env->msr;
 986     regs.pc = env->nip;
 987
 988     regs.srr0 = env->spr[SPR_SRR0];
 989     regs.srr1 = env->spr[SPR_SRR1];
 990
 991     regs.sprg0 = env->spr[SPR_SPRG0];
 992     regs.sprg1 = env->spr[SPR_SPRG1];
 993     regs.sprg2 = env->spr[SPR_SPRG2];
 994     regs.sprg3 = env->spr[SPR_SPRG3];
 995     regs.sprg4 = env->spr[SPR_SPRG4];
 996     regs.sprg5 = env->spr[SPR_SPRG5];
 997     regs.sprg6 = env->spr[SPR_SPRG6];
 998     regs.sprg7 = env->spr[SPR_SPRG7];
 999
1000     regs.pid = env->spr[SPR_BOOKE_PID];
1001
1002     for (i = 0;i < 32; i++)
1003         regs.gpr[i] = env->gpr[i];
1004
1005     regs.cr = 0;
1006     for (i = 0; i < 8; i++) {
1007         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
1008     }
1009
1010     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
1011     if (ret < 0)
1012         return ret;
1013
1014     kvm_put_fp(cs);
1015
1016     if (env->tlb_dirty) {
1017         kvm_sw_tlb_put(cpu);
1018         env->tlb_dirty = false;
1019     }
1020
1021     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
1022         ret = kvmppc_put_books_sregs(cpu);
1023         if (ret < 0) {
1024             return ret;
1025         }
1026     }
1027
1028     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
1029         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1030     }
1031
1032     if (cap_one_reg) {
1033         int i;
1034
1035         /* We deliberately ignore errors here, for kernels which have
1036          * the ONE_REG calls, but don't support the specific
1037          * registers, there's a reasonable chance things will still
1038          * work, at least until we try to migrate. */
1039         for (i = 0; i < 1024; i++) {
1040             uint64_t id = env->spr_cb[i].one_reg_id;
1041
1042             if (id != 0) {
1043                 kvm_put_one_spr(cs, id, i);
1044             }
1045         }
1046
1047 #ifdef TARGET_PPC64
1048         if (msr_ts) {
1049             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1050                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1051             }
1052             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1053                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1054             }
1055             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1056             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1057             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1058             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1059             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1060             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1061             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1062             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1063             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1064             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1065         }
1066
1067         if (cap_papr) {
1068             if (kvm_put_vpa(cs) < 0) {
1069                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
1070             }
1071         }
1072
1073         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1074 #endif /* TARGET_PPC64 */
1075     }
1076
1077     return ret;
1078 }
1079
1080 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1081 {
1082      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1083 }
1084
1085 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1086 {
1087     CPUPPCState *env = &cpu->env;
1088     struct kvm_sregs sregs;
1089     int ret;
1090
1091     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1092     if (ret < 0) {
1093         return ret;
1094     }
1095
1096     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1097         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1098         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1099         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1100         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1101         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1102         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1103         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1104         env->spr[SPR_DECR] = sregs.u.e.dec;
1105         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1106         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1107         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1108     }
1109
1110     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1111         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1112         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1113         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1114         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1115         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1116     }
1117
1118     if (sregs.u.e.features & KVM_SREGS_E_64) {
1119         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1120     }
1121
1122     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1123         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1124     }
1125
1126     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1127         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1128         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1129         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1130         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1131         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1132         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1133         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1134         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1135         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1136         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1137         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1138         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1139         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1140         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1141         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1142         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1143         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1144         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1145         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1146         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1147         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1148         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1149         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1150         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1151         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1152         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1153         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1154         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1155         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1156         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1157         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1158         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1159
1160         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1161             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1162             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1163             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1164             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1165             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1166             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1167         }
1168
1169         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1170             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1171             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1172         }
1173
1174         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1175             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1176             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1177             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1178             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1179         }
1180     }
1181
1182     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1183         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1184         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1185         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1186         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1187         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1188         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1189         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1190         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1191         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1192         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1193     }
1194
1195     if (sregs.u.e.features & KVM_SREGS_EXP) {
1196         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1197     }
1198
1199     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1200         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1201         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1202     }
1203
1204     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1205         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1206         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1207         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1208
1209         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1210             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1211             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1212         }
1213     }
1214
1215     return 0;
1216 }
1217
1218 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1219 {
1220     CPUPPCState *env = &cpu->env;
1221     struct kvm_sregs sregs;
1222     int ret;
1223     int i;
1224
1225     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1226     if (ret < 0) {
1227         return ret;
1228     }
1229
1230     if (!env->external_htab) {
1231         ppc_store_sdr1(env, sregs.u.s.sdr1);
1232     }
1233
1234     /* Sync SLB */
1235 #ifdef TARGET_PPC64
1236     /*
1237      * The packed SLB array we get from KVM_GET_SREGS only contains
1238      * information about valid entries. So we flush our internal copy
1239      * to get rid of stale ones, then put all valid SLB entries back
1240      * in.
1241      */
1242     memset(env->slb, 0, sizeof(env->slb));
1243     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1244         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1245         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1246         /*
1247          * Only restore valid entries
1248          */
1249         if (rb & SLB_ESID_V) {
1250             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1251         }
1252     }
1253 #endif
1254
1255     /* Sync SRs */
1256     for (i = 0; i < 16; i++) {
1257         env->sr[i] = sregs.u.s.ppc32.sr[i];
1258     }
1259
1260     /* Sync BATs */
1261     for (i = 0; i < 8; i++) {
1262         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1263         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1264         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1265         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1266     }
1267
1268     return 0;
1269 }
1270
1271 int kvm_arch_get_registers(CPUState *cs)
1272 {
1273     PowerPCCPU *cpu = POWERPC_CPU(cs);
1274     CPUPPCState *env = &cpu->env;
1275     struct kvm_regs regs;
1276     uint32_t cr;
1277     int i, ret;
1278
1279     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1280     if (ret < 0)
1281         return ret;
1282
1283     cr = regs.cr;
1284     for (i = 7; i >= 0; i--) {
1285         env->crf[i] = cr & 15;
1286         cr >>= 4;
1287     }
1288
1289     env->ctr = regs.ctr;
1290     env->lr = regs.lr;
1291     cpu_write_xer(env, regs.xer);
1292     env->msr = regs.msr;
1293     env->nip = regs.pc;
1294
1295     env->spr[SPR_SRR0] = regs.srr0;
1296     env->spr[SPR_SRR1] = regs.srr1;
1297
1298     env->spr[SPR_SPRG0] = regs.sprg0;
1299     env->spr[SPR_SPRG1] = regs.sprg1;
1300     env->spr[SPR_SPRG2] = regs.sprg2;
1301     env->spr[SPR_SPRG3] = regs.sprg3;
1302     env->spr[SPR_SPRG4] = regs.sprg4;
1303     env->spr[SPR_SPRG5] = regs.sprg5;
1304     env->spr[SPR_SPRG6] = regs.sprg6;
1305     env->spr[SPR_SPRG7] = regs.sprg7;
1306
1307     env->spr[SPR_BOOKE_PID] = regs.pid;
1308
1309     for (i = 0;i < 32; i++)
1310         env->gpr[i] = regs.gpr[i];
1311
1312     kvm_get_fp(cs);
1313
1314     if (cap_booke_sregs) {
1315         ret = kvmppc_get_booke_sregs(cpu);
1316         if (ret < 0) {
1317             return ret;
1318         }
1319     }
1320
1321     if (cap_segstate) {
1322         ret = kvmppc_get_books_sregs(cpu);
1323         if (ret < 0) {
1324             return ret;
1325         }
1326     }
1327
1328     if (cap_hior) {
1329         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1330     }
1331
1332     if (cap_one_reg) {
1333         int i;
1334
1335         /* We deliberately ignore errors here, for kernels which have
1336          * the ONE_REG calls, but don't support the specific
1337          * registers, there's a reasonable chance things will still
1338          * work, at least until we try to migrate. */
1339         for (i = 0; i < 1024; i++) {
1340             uint64_t id = env->spr_cb[i].one_reg_id;
1341
1342             if (id != 0) {
1343                 kvm_get_one_spr(cs, id, i);
1344             }
1345         }
1346
1347 #ifdef TARGET_PPC64
1348         if (msr_ts) {
1349             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1350                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1351             }
1352             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1353                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1354             }
1355             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1356             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1357             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1358             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1359             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1360             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1361             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1362             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1363             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1364             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1365         }
1366
1367         if (cap_papr) {
1368             if (kvm_get_vpa(cs) < 0) {
1369                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1370             }
1371         }
1372
1373         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1374 #endif
1375     }
1376
1377     return 0;
1378 }
1379
1380 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1381 {
1382     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1383
1384     if (irq != PPC_INTERRUPT_EXT) {
1385         return 0;
1386     }
1387
1388     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1389         return 0;
1390     }
1391
1392     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1393
1394     return 0;
1395 }
1396
1397 #if defined(TARGET_PPCEMB)
1398 #define PPC_INPUT_INT PPC40x_INPUT_INT
1399 #elif defined(TARGET_PPC64)
1400 #define PPC_INPUT_INT PPC970_INPUT_INT
1401 #else
1402 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1403 #endif
1404
1405 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1406 {
1407     PowerPCCPU *cpu = POWERPC_CPU(cs);
1408     CPUPPCState *env = &cpu->env;
1409     int r;
1410     unsigned irq;
1411
1412     qemu_mutex_lock_iothread();
1413
1414     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1415      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1416     if (!cap_interrupt_level &&
1417         run->ready_for_interrupt_injection &&
1418         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1419         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1420     {
1421         /* For now KVM disregards the 'irq' argument. However, in the
1422          * future KVM could cache it in-kernel to avoid a heavyweight exit
1423          * when reading the UIC.
1424          */
1425         irq = KVM_INTERRUPT_SET;
1426
1427         DPRINTF("injected interrupt %d\n", irq);
1428         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1429         if (r < 0) {
1430             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1431         }
1432
1433         /* Always wake up soon in case the interrupt was level based */
1434         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1435                        (NANOSECONDS_PER_SECOND / 50));
1436     }
1437
1438     /* We don't know if there are more interrupts pending after this. However,
1439      * the guest will return to userspace in the course of handling this one
1440      * anyways, so we will get a chance to deliver the rest. */
1441
1442     qemu_mutex_unlock_iothread();
1443 }
1444
1445 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1446 {
1447     return MEMTXATTRS_UNSPECIFIED;
1448 }
1449
1450 int kvm_arch_process_async_events(CPUState *cs)
1451 {
1452     return cs->halted;
1453 }
1454
1455 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1456 {
1457     CPUState *cs = CPU(cpu);
1458     CPUPPCState *env = &cpu->env;
1459
1460     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1461         cs->halted = 1;
1462         cs->exception_index = EXCP_HLT;
1463     }
1464
1465     return 0;
1466 }
1467
1468 /* map dcr access to existing qemu dcr emulation */
1469 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1470 {
1471     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1472         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1473
1474     return 0;
1475 }
1476
1477 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1478 {
1479     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1480         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1481
1482     return 0;
1483 }
1484
1485 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1486 {
1487     /* Mixed endian case is not handled */
1488     uint32_t sc = debug_inst_opcode;
1489
1490     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1491                             sizeof(sc), 0) ||
1492         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1493         return -EINVAL;
1494     }
1495
1496     return 0;
1497 }
1498
1499 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1500 {
1501     uint32_t sc;
1502
1503     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1504         sc != debug_inst_opcode ||
1505         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1506                             sizeof(sc), 1)) {
1507         return -EINVAL;
1508     }
1509
1510     return 0;
1511 }
1512
1513 static int find_hw_breakpoint(target_ulong addr, int type)
1514 {
1515     int n;
1516
1517     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1518            <= ARRAY_SIZE(hw_debug_points));
1519
1520     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1521         if (hw_debug_points[n].addr == addr &&
1522              hw_debug_points[n].type == type) {
1523             return n;
1524         }
1525     }
1526
1527     return -1;
1528 }
1529
1530 static int find_hw_watchpoint(target_ulong addr, int *flag)
1531 {
1532     int n;
1533
1534     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1535     if (n >= 0) {
1536         *flag = BP_MEM_ACCESS;
1537         return n;
1538     }
1539
1540     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1541     if (n >= 0) {
1542         *flag = BP_MEM_WRITE;
1543         return n;
1544     }
1545
1546     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1547     if (n >= 0) {
1548         *flag = BP_MEM_READ;
1549         return n;
1550     }
1551
1552     return -1;
1553 }
1554
1555 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1556                                   target_ulong len, int type)
1557 {
1558     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1559         return -ENOBUFS;
1560     }
1561
1562     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1563     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1564
1565     switch (type) {
1566     case GDB_BREAKPOINT_HW:
1567         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1568             return -ENOBUFS;
1569         }
1570
1571         if (find_hw_breakpoint(addr, type) >= 0) {
1572             return -EEXIST;
1573         }
1574
1575         nb_hw_breakpoint++;
1576         break;
1577
1578     case GDB_WATCHPOINT_WRITE:
1579     case GDB_WATCHPOINT_READ:
1580     case GDB_WATCHPOINT_ACCESS:
1581         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1582             return -ENOBUFS;
1583         }
1584
1585         if (find_hw_breakpoint(addr, type) >= 0) {
1586             return -EEXIST;
1587         }
1588
1589         nb_hw_watchpoint++;
1590         break;
1591
1592     default:
1593         return -ENOSYS;
1594     }
1595
1596     return 0;
1597 }
1598
1599 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1600                                   target_ulong len, int type)
1601 {
1602     int n;
1603
1604     n = find_hw_breakpoint(addr, type);
1605     if (n < 0) {
1606         return -ENOENT;
1607     }
1608
1609     switch (type) {
1610     case GDB_BREAKPOINT_HW:
1611         nb_hw_breakpoint--;
1612         break;
1613
1614     case GDB_WATCHPOINT_WRITE:
1615     case GDB_WATCHPOINT_READ:
1616     case GDB_WATCHPOINT_ACCESS:
1617         nb_hw_watchpoint--;
1618         break;
1619
1620     default:
1621         return -ENOSYS;
1622     }
1623     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1624
1625     return 0;
1626 }
1627
1628 void kvm_arch_remove_all_hw_breakpoints(void)
1629 {
1630     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1631 }
1632
1633 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1634 {
1635     int n;
1636
1637     /* Software Breakpoint updates */
1638     if (kvm_sw_breakpoints_active(cs)) {
1639         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1640     }
1641
1642     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1643            <= ARRAY_SIZE(hw_debug_points));
1644     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1645
1646     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1647         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1648         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1649         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1650             switch (hw_debug_points[n].type) {
1651             case GDB_BREAKPOINT_HW:
1652                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1653                 break;
1654             case GDB_WATCHPOINT_WRITE:
1655                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1656                 break;
1657             case GDB_WATCHPOINT_READ:
1658                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1659                 break;
1660             case GDB_WATCHPOINT_ACCESS:
1661                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1662                                         KVMPPC_DEBUG_WATCH_READ;
1663                 break;
1664             default:
1665                 cpu_abort(cs, "Unsupported breakpoint type\n");
1666             }
1667             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1668         }
1669     }
1670 }
1671
1672 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1673 {
1674     CPUState *cs = CPU(cpu);
1675     CPUPPCState *env = &cpu->env;
1676     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1677     int handle = 0;
1678     int n;
1679     int flag = 0;
1680
1681     if (cs->singlestep_enabled) {
1682         handle = 1;
1683     } else if (arch_info->status) {
1684         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1685             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1686                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1687                 if (n >= 0) {
1688                     handle = 1;
1689                 }
1690             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1691                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1692                 n = find_hw_watchpoint(arch_info->address,  &flag);
1693                 if (n >= 0) {
1694                     handle = 1;
1695                     cs->watchpoint_hit = &hw_watchpoint;
1696                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1697                     hw_watchpoint.flags = flag;
1698                 }
1699             }
1700         }
1701     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1702         handle = 1;
1703     } else {
1704         /* QEMU is not able to handle debug exception, so inject
1705          * program exception to guest;
1706          * Yes program exception NOT debug exception !!
1707          * When QEMU is using debug resources then debug exception must
1708          * be always set. To achieve this we set MSR_DE and also set
1709          * MSRP_DEP so guest cannot change MSR_DE.
1710          * When emulating debug resource for guest we want guest
1711          * to control MSR_DE (enable/disable debug interrupt on need).
1712          * Supporting both configurations are NOT possible.
1713          * So the result is that we cannot share debug resources
1714          * between QEMU and Guest on BOOKE architecture.
1715          * In the current design QEMU gets the priority over guest,
1716          * this means that if QEMU is using debug resources then guest
1717          * cannot use them;
1718          * For software breakpoint QEMU uses a privileged instruction;
1719          * So there cannot be any reason that we are here for guest
1720          * set debug exception, only possibility is guest executed a
1721          * privileged / illegal instruction and that's why we are
1722          * injecting a program interrupt.
1723          */
1724
1725         cpu_synchronize_state(cs);
1726         /* env->nip is PC, so increment this by 4 to use
1727          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1728          */
1729         env->nip += 4;
1730         cs->exception_index = POWERPC_EXCP_PROGRAM;
1731         env->error_code = POWERPC_EXCP_INVAL;
1732         ppc_cpu_do_interrupt(cs);
1733     }
1734
1735     return handle;
1736 }
1737
1738 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1739 {
1740     PowerPCCPU *cpu = POWERPC_CPU(cs);
1741     CPUPPCState *env = &cpu->env;
1742     int ret;
1743
1744     qemu_mutex_lock_iothread();
1745
1746     switch (run->exit_reason) {
1747     case KVM_EXIT_DCR:
1748         if (run->dcr.is_write) {
1749             DPRINTF("handle dcr write\n");
1750             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1751         } else {
1752             DPRINTF("handle dcr read\n");
1753             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1754         }
1755         break;
1756     case KVM_EXIT_HLT:
1757         DPRINTF("handle halt\n");
1758         ret = kvmppc_handle_halt(cpu);
1759         break;
1760 #if defined(TARGET_PPC64)
1761     case KVM_EXIT_PAPR_HCALL:
1762         DPRINTF("handle PAPR hypercall\n");
1763         run->papr_hcall.ret = spapr_hypercall(cpu,
1764                                               run->papr_hcall.nr,
1765                                               run->papr_hcall.args);
1766         ret = 0;
1767         break;
1768 #endif
1769     case KVM_EXIT_EPR:
1770         DPRINTF("handle epr\n");
1771         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1772         ret = 0;
1773         break;
1774     case KVM_EXIT_WATCHDOG:
1775         DPRINTF("handle watchdog expiry\n");
1776         watchdog_perform_action();
1777         ret = 0;
1778         break;
1779
1780     case KVM_EXIT_DEBUG:
1781         DPRINTF("handle debug exception\n");
1782         if (kvm_handle_debug(cpu, run)) {
1783             ret = EXCP_DEBUG;
1784             break;
1785         }
1786         /* re-enter, this exception was guest-internal */
1787         ret = 0;
1788         break;
1789
1790     default:
1791         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1792         ret = -1;
1793         break;
1794     }
1795
1796     qemu_mutex_unlock_iothread();
1797     return ret;
1798 }
1799
1800 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1801 {
1802     CPUState *cs = CPU(cpu);
1803     uint32_t bits = tsr_bits;
1804     struct kvm_one_reg reg = {
1805         .id = KVM_REG_PPC_OR_TSR,
1806         .addr = (uintptr_t) &bits,
1807     };
1808
1809     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1810 }
1811
1812 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1813 {
1814
1815     CPUState *cs = CPU(cpu);
1816     uint32_t bits = tsr_bits;
1817     struct kvm_one_reg reg = {
1818         .id = KVM_REG_PPC_CLEAR_TSR,
1819         .addr = (uintptr_t) &bits,
1820     };
1821
1822     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1823 }
1824
1825 int kvmppc_set_tcr(PowerPCCPU *cpu)
1826 {
1827     CPUState *cs = CPU(cpu);
1828     CPUPPCState *env = &cpu->env;
1829     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1830
1831     struct kvm_one_reg reg = {
1832         .id = KVM_REG_PPC_TCR,
1833         .addr = (uintptr_t) &tcr,
1834     };
1835
1836     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1837 }
1838
1839 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1840 {
1841     CPUState *cs = CPU(cpu);
1842     int ret;
1843
1844     if (!kvm_enabled()) {
1845         return -1;
1846     }
1847
1848     if (!cap_ppc_watchdog) {
1849         printf("warning: KVM does not support watchdog");
1850         return -1;
1851     }
1852
1853     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1854     if (ret < 0) {
1855         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1856                 __func__, strerror(-ret));
1857         return ret;
1858     }
1859
1860     return ret;
1861 }
1862
1863 static int read_cpuinfo(const char *field, char *value, int len)
1864 {
1865     FILE *f;
1866     int ret = -1;
1867     int field_len = strlen(field);
1868     char line[512];
1869
1870     f = fopen("/proc/cpuinfo", "r");
1871     if (!f) {
1872         return -1;
1873     }
1874
1875     do {
1876         if (!fgets(line, sizeof(line), f)) {
1877             break;
1878         }
1879         if (!strncmp(line, field, field_len)) {
1880             pstrcpy(value, len, line);
1881             ret = 0;
1882             break;
1883         }
1884     } while(*line);
1885
1886     fclose(f);
1887
1888     return ret;
1889 }
1890
1891 uint32_t kvmppc_get_tbfreq(void)
1892 {
1893     char line[512];
1894     char *ns;
1895     uint32_t retval = NANOSECONDS_PER_SECOND;
1896
1897     if (read_cpuinfo("timebase", line, sizeof(line))) {
1898         return retval;
1899     }
1900
1901     if (!(ns = strchr(line, ':'))) {
1902         return retval;
1903     }
1904
1905     ns++;
1906
1907     return atoi(ns);
1908 }
1909
1910 bool kvmppc_get_host_serial(char **value)
1911 {
1912     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1913                                NULL);
1914 }
1915
1916 bool kvmppc_get_host_model(char **value)
1917 {
1918     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1919 }
1920
1921 /* Try to find a device tree node for a CPU with clock-frequency property */
1922 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1923 {
1924     struct dirent *dirp;
1925     DIR *dp;
1926
1927     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1928         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1929         return -1;
1930     }
1931
1932     buf[0] = '\0';
1933     while ((dirp = readdir(dp)) != NULL) {
1934         FILE *f;
1935         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1936                  dirp->d_name);
1937         f = fopen(buf, "r");
1938         if (f) {
1939             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1940             fclose(f);
1941             break;
1942         }
1943         buf[0] = '\0';
1944     }
1945     closedir(dp);
1946     if (buf[0] == '\0') {
1947         printf("Unknown host!\n");
1948         return -1;
1949     }
1950
1951     return 0;
1952 }
1953
1954 static uint64_t kvmppc_read_int_dt(const char *filename)
1955 {
1956     union {
1957         uint32_t v32;
1958         uint64_t v64;
1959     } u;
1960     FILE *f;
1961     int len;
1962
1963     f = fopen(filename, "rb");
1964     if (!f) {
1965         return -1;
1966     }
1967
1968     len = fread(&u, 1, sizeof(u), f);
1969     fclose(f);
1970     switch (len) {
1971     case 4:
1972         /* property is a 32-bit quantity */
1973         return be32_to_cpu(u.v32);
1974     case 8:
1975         return be64_to_cpu(u.v64);
1976     }
1977
1978     return 0;
1979 }
1980
1981 /* Read a CPU node property from the host device tree that's a single
1982  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1983  * (can't find or open the property, or doesn't understand the
1984  * format) */
1985 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1986 {
1987     char buf[PATH_MAX], *tmp;
1988     uint64_t val;
1989
1990     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1991         return -1;
1992     }
1993
1994     tmp = g_strdup_printf("%s/%s", buf, propname);
1995     val = kvmppc_read_int_dt(tmp);
1996     g_free(tmp);
1997
1998     return val;
1999 }
2000
2001 uint64_t kvmppc_get_clockfreq(void)
2002 {
2003     return kvmppc_read_int_cpu_dt("clock-frequency");
2004 }
2005
2006 uint32_t kvmppc_get_vmx(void)
2007 {
2008     return kvmppc_read_int_cpu_dt("ibm,vmx");
2009 }
2010
2011 uint32_t kvmppc_get_dfp(void)
2012 {
2013     return kvmppc_read_int_cpu_dt("ibm,dfp");
2014 }
2015
2016 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
2017  {
2018      PowerPCCPU *cpu = ppc_env_get_cpu(env);
2019      CPUState *cs = CPU(cpu);
2020
2021     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
2022         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
2023         return 0;
2024     }
2025
2026     return 1;
2027 }
2028
2029 int kvmppc_get_hasidle(CPUPPCState *env)
2030 {
2031     struct kvm_ppc_pvinfo pvinfo;
2032
2033     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
2034         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
2035         return 1;
2036     }
2037
2038     return 0;
2039 }
2040
2041 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
2042 {
2043     uint32_t *hc = (uint32_t*)buf;
2044     struct kvm_ppc_pvinfo pvinfo;
2045
2046     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
2047         memcpy(buf, pvinfo.hcall, buf_len);
2048         return 0;
2049     }
2050
2051     /*
2052      * Fallback to always fail hypercalls regardless of endianness:
2053      *
2054      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
2055      *     li r3, -1
2056      *     b .+8       (becomes nop in wrong endian)
2057      *     bswap32(li r3, -1)
2058      */
2059
2060     hc[0] = cpu_to_be32(0x08000048);
2061     hc[1] = cpu_to_be32(0x3860ffff);
2062     hc[2] = cpu_to_be32(0x48000008);
2063     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
2064
2065     return 1;
2066 }
2067
2068 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
2069 {
2070     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2071 }
2072
2073 void kvmppc_enable_logical_ci_hcalls(void)
2074 {
2075     /*
2076      * FIXME: it would be nice if we could detect the cases where
2077      * we're using a device which requires the in kernel
2078      * implementation of these hcalls, but the kernel lacks them and
2079      * produce a warning.
2080      */
2081     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2082     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2083 }
2084
2085 void kvmppc_enable_set_mode_hcall(void)
2086 {
2087     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2088 }
2089
2090 void kvmppc_enable_clear_ref_mod_hcalls(void)
2091 {
2092     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2093     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2094 }
2095
2096 void kvmppc_set_papr(PowerPCCPU *cpu)
2097 {
2098     CPUState *cs = CPU(cpu);
2099     int ret;
2100
2101     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2102     if (ret) {
2103         error_report("This vCPU type or KVM version does not support PAPR");
2104         exit(1);
2105     }
2106
2107     /* Update the capability flag so we sync the right information
2108      * with kvm */
2109     cap_papr = 1;
2110 }
2111
2112 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2113 {
2114     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2115 }
2116
2117 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2118 {
2119     CPUState *cs = CPU(cpu);
2120     int ret;
2121
2122     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2123     if (ret && mpic_proxy) {
2124         error_report("This KVM version does not support EPR");
2125         exit(1);
2126     }
2127 }
2128
2129 int kvmppc_smt_threads(void)
2130 {
2131     return cap_ppc_smt ? cap_ppc_smt : 1;
2132 }
2133
2134 #ifdef TARGET_PPC64
2135 off_t kvmppc_alloc_rma(void **rma)
2136 {
2137     off_t size;
2138     int fd;
2139     struct kvm_allocate_rma ret;
2140
2141     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2142      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2143      *                      not necessary on this hardware
2144      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2145      *
2146      * FIXME: We should allow the user to force contiguous RMA
2147      * allocation in the cap_ppc_rma==1 case.
2148      */
2149     if (cap_ppc_rma < 2) {
2150         return 0;
2151     }
2152
2153     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2154     if (fd < 0) {
2155         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2156                 strerror(errno));
2157         return -1;
2158     }
2159
2160     size = MIN(ret.rma_size, 256ul << 20);
2161
2162     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2163     if (*rma == MAP_FAILED) {
2164         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2165         return -1;
2166     };
2167
2168     return size;
2169 }
2170
2171 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2172 {
2173     struct kvm_ppc_smmu_info info;
2174     long rampagesize, best_page_shift;
2175     int i;
2176
2177     if (cap_ppc_rma >= 2) {
2178         return current_size;
2179     }
2180
2181     /* Find the largest hardware supported page size that's less than
2182      * or equal to the (logical) backing page size of guest RAM */
2183     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2184     rampagesize = getrampagesize();
2185     best_page_shift = 0;
2186
2187     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2188         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2189
2190         if (!sps->page_shift) {
2191             continue;
2192         }
2193
2194         if ((sps->page_shift > best_page_shift)
2195             && ((1UL << sps->page_shift) <= rampagesize)) {
2196             best_page_shift = sps->page_shift;
2197         }
2198     }
2199
2200     return MIN(current_size,
2201                1ULL << (best_page_shift + hash_shift - 7));
2202 }
2203 #endif
2204
2205 bool kvmppc_spapr_use_multitce(void)
2206 {
2207     return cap_spapr_multitce;
2208 }
2209
2210 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2211                               bool need_vfio)
2212 {
2213     struct kvm_create_spapr_tce args = {
2214         .liobn = liobn,
2215         .window_size = window_size,
2216     };
2217     long len;
2218     int fd;
2219     void *table;
2220
2221     /* Must set fd to -1 so we don't try to munmap when called for
2222      * destroying the table, which the upper layers -will- do
2223      */
2224     *pfd = -1;
2225     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2226         return NULL;
2227     }
2228
2229     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2230     if (fd < 0) {
2231         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2232                 liobn);
2233         return NULL;
2234     }
2235
2236     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2237     /* FIXME: round this up to page size */
2238
2239     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2240     if (table == MAP_FAILED) {
2241         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2242                 liobn);
2243         close(fd);
2244         return NULL;
2245     }
2246
2247     *pfd = fd;
2248     return table;
2249 }
2250
2251 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2252 {
2253     long len;
2254
2255     if (fd < 0) {
2256         return -1;
2257     }
2258
2259     len = nb_table * sizeof(uint64_t);
2260     if ((munmap(table, len) < 0) ||
2261         (close(fd) < 0)) {
2262         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2263                 strerror(errno));
2264         /* Leak the table */
2265     }
2266
2267     return 0;
2268 }
2269
2270 int kvmppc_reset_htab(int shift_hint)
2271 {
2272     uint32_t shift = shift_hint;
2273
2274     if (!kvm_enabled()) {
2275         /* Full emulation, tell caller to allocate htab itself */
2276         return 0;
2277     }
2278     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2279         int ret;
2280         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2281         if (ret == -ENOTTY) {
2282             /* At least some versions of PR KVM advertise the
2283              * capability, but don't implement the ioctl().  Oops.
2284              * Return 0 so that we allocate the htab in qemu, as is
2285              * correct for PR. */
2286             return 0;
2287         } else if (ret < 0) {
2288             return ret;
2289         }
2290         return shift;
2291     }
2292
2293     /* We have a kernel that predates the htab reset calls.  For PR
2294      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2295      * this era, it has allocated a 16MB fixed size hash table already. */
2296     if (kvmppc_is_pr(kvm_state)) {
2297         /* PR - tell caller to allocate htab */
2298         return 0;
2299     } else {
2300         /* HV - assume 16MB kernel allocated htab */
2301         return 24;
2302     }
2303 }
2304
2305 static inline uint32_t mfpvr(void)
2306 {
2307     uint32_t pvr;
2308
2309     asm ("mfpvr %0"
2310          : "=r"(pvr));
2311     return pvr;
2312 }
2313
2314 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2315 {
2316     if (on) {
2317         *word |= flags;
2318     } else {
2319         *word &= ~flags;
2320     }
2321 }
2322
2323 static void kvmppc_host_cpu_initfn(Object *obj)
2324 {
2325     assert(kvm_enabled());
2326 }
2327
2328 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2329 {
2330     DeviceClass *dc = DEVICE_CLASS(oc);
2331     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2332     uint32_t vmx = kvmppc_get_vmx();
2333     uint32_t dfp = kvmppc_get_dfp();
2334     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2335     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2336
2337     /* Now fix up the class with information we can query from the host */
2338     pcc->pvr = mfpvr();
2339
2340     if (vmx != -1) {
2341         /* Only override when we know what the host supports */
2342         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2343         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2344     }
2345     if (dfp != -1) {
2346         /* Only override when we know what the host supports */
2347         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2348     }
2349
2350     if (dcache_size != -1) {
2351         pcc->l1_dcache_size = dcache_size;
2352     }
2353
2354     if (icache_size != -1) {
2355         pcc->l1_icache_size = icache_size;
2356     }
2357
2358     /* Reason: kvmppc_host_cpu_initfn() dies when !kvm_enabled() */
2359     dc->cannot_destroy_with_object_finalize_yet = true;
2360 }
2361
2362 bool kvmppc_has_cap_epr(void)
2363 {
2364     return cap_epr;
2365 }
2366
2367 bool kvmppc_has_cap_htab_fd(void)
2368 {
2369     return cap_htab_fd;
2370 }
2371
2372 bool kvmppc_has_cap_fixup_hcalls(void)
2373 {
2374     return cap_fixup_hcalls;
2375 }
2376
2377 bool kvmppc_has_cap_htm(void)
2378 {
2379     return cap_htm;
2380 }
2381
2382 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2383 {
2384     ObjectClass *oc = OBJECT_CLASS(pcc);
2385
2386     while (oc && !object_class_is_abstract(oc)) {
2387         oc = object_class_get_parent(oc);
2388     }
2389     assert(oc);
2390
2391     return POWERPC_CPU_CLASS(oc);
2392 }
2393
2394 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2395 {
2396     uint32_t host_pvr = mfpvr();
2397     PowerPCCPUClass *pvr_pcc;
2398
2399     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2400     if (pvr_pcc == NULL) {
2401         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2402     }
2403
2404     return pvr_pcc;
2405 }
2406
2407 static int kvm_ppc_register_host_cpu_type(void)
2408 {
2409     TypeInfo type_info = {
2410         .name = TYPE_HOST_POWERPC_CPU,
2411         .instance_init = kvmppc_host_cpu_initfn,
2412         .class_init = kvmppc_host_cpu_class_init,
2413     };
2414     PowerPCCPUClass *pvr_pcc;
2415     DeviceClass *dc;
2416     int i;
2417
2418     pvr_pcc = kvm_ppc_get_host_cpu_class();
2419     if (pvr_pcc == NULL) {
2420         return -1;
2421     }
2422     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2423     type_register(&type_info);
2424
2425 #if defined(TARGET_PPC64)
2426     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2427     type_info.parent = TYPE_SPAPR_CPU_CORE,
2428     type_info.instance_size = sizeof(sPAPRCPUCore);
2429     type_info.instance_init = NULL;
2430     type_info.class_init = spapr_cpu_core_class_init;
2431     type_info.class_data = (void *) "host";
2432     type_register(&type_info);
2433     g_free((void *)type_info.name);
2434 #endif
2435
2436     /*
2437      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2438      * we want "POWER8" to be a "family" alias that points to the current
2439      * host CPU type, too)
2440      */
2441     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2442     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2443         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2444             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2445             char *suffix;
2446
2447             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2448             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2449             if (suffix) {
2450                 *suffix = 0;
2451             }
2452             ppc_cpu_aliases[i].oc = oc;
2453             break;
2454         }
2455     }
2456
2457     return 0;
2458 }
2459
2460 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2461 {
2462     struct kvm_rtas_token_args args = {
2463         .token = token,
2464     };
2465
2466     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2467         return -ENOENT;
2468     }
2469
2470     strncpy(args.name, function, sizeof(args.name));
2471
2472     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2473 }
2474
2475 int kvmppc_get_htab_fd(bool write)
2476 {
2477     struct kvm_get_htab_fd s = {
2478         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2479         .start_index = 0,
2480     };
2481
2482     if (!cap_htab_fd) {
2483         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2484         return -1;
2485     }
2486
2487     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2488 }
2489
2490 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2491 {
2492     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2493     uint8_t buf[bufsize];
2494     ssize_t rc;
2495
2496     do {
2497         rc = read(fd, buf, bufsize);
2498         if (rc < 0) {
2499             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2500                     strerror(errno));
2501             return rc;
2502         } else if (rc) {
2503             uint8_t *buffer = buf;
2504             ssize_t n = rc;
2505             while (n) {
2506                 struct kvm_get_htab_header *head =
2507                     (struct kvm_get_htab_header *) buffer;
2508                 size_t chunksize = sizeof(*head) +
2509                      HASH_PTE_SIZE_64 * head->n_valid;
2510
2511                 qemu_put_be32(f, head->index);
2512                 qemu_put_be16(f, head->n_valid);
2513                 qemu_put_be16(f, head->n_invalid);
2514                 qemu_put_buffer(f, (void *)(head + 1),
2515                                 HASH_PTE_SIZE_64 * head->n_valid);
2516
2517                 buffer += chunksize;
2518                 n -= chunksize;
2519             }
2520         }
2521     } while ((rc != 0)
2522              && ((max_ns < 0)
2523                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2524
2525     return (rc == 0) ? 1 : 0;
2526 }
2527
2528 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2529                            uint16_t n_valid, uint16_t n_invalid)
2530 {
2531     struct kvm_get_htab_header *buf;
2532     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2533     ssize_t rc;
2534
2535     buf = alloca(chunksize);
2536     buf->index = index;
2537     buf->n_valid = n_valid;
2538     buf->n_invalid = n_invalid;
2539
2540     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2541
2542     rc = write(fd, buf, chunksize);
2543     if (rc < 0) {
2544         fprintf(stderr, "Error writing KVM hash table: %s\n",
2545                 strerror(errno));
2546         return rc;
2547     }
2548     if (rc != chunksize) {
2549         /* We should never get a short write on a single chunk */
2550         fprintf(stderr, "Short write, restoring KVM hash table\n");
2551         return -1;
2552     }
2553     return 0;
2554 }
2555
2556 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2557 {
2558     return true;
2559 }
2560
2561 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
2562 {
2563     return 1;
2564 }
2565
2566 int kvm_arch_on_sigbus(int code, void *addr)
2567 {
2568     return 1;
2569 }
2570
2571 void kvm_arch_init_irq_routing(KVMState *s)
2572 {
2573 }
2574
2575 struct kvm_get_htab_buf {
2576     struct kvm_get_htab_header header;
2577     /*
2578      * We require one extra byte for read
2579      */
2580     target_ulong hpte[(HPTES_PER_GROUP * 2) + 1];
2581 };
2582
2583 uint64_t kvmppc_hash64_read_pteg(PowerPCCPU *cpu, target_ulong pte_index)
2584 {
2585     int htab_fd;
2586     struct kvm_get_htab_fd ghf;
2587     struct kvm_get_htab_buf  *hpte_buf;
2588
2589     ghf.flags = 0;
2590     ghf.start_index = pte_index;
2591     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2592     if (htab_fd < 0) {
2593         goto error_out;
2594     }
2595
2596     hpte_buf = g_malloc0(sizeof(*hpte_buf));
2597     /*
2598      * Read the hpte group
2599      */
2600     if (read(htab_fd, hpte_buf, sizeof(*hpte_buf)) < 0) {
2601         goto out_close;
2602     }
2603
2604     close(htab_fd);
2605     return (uint64_t)(uintptr_t) hpte_buf->hpte;
2606
2607 out_close:
2608     g_free(hpte_buf);
2609     close(htab_fd);
2610 error_out:
2611     return 0;
2612 }
2613
2614 void kvmppc_hash64_free_pteg(uint64_t token)
2615 {
2616     struct kvm_get_htab_buf *htab_buf;
2617
2618     htab_buf = container_of((void *)(uintptr_t) token, struct kvm_get_htab_buf,
2619                             hpte);
2620     g_free(htab_buf);
2621     return;
2622 }
2623
2624 void kvmppc_hash64_write_pte(CPUPPCState *env, target_ulong pte_index,
2625                              target_ulong pte0, target_ulong pte1)
2626 {
2627     int htab_fd;
2628     struct kvm_get_htab_fd ghf;
2629     struct kvm_get_htab_buf hpte_buf;
2630
2631     ghf.flags = 0;
2632     ghf.start_index = 0;     /* Ignored */
2633     htab_fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2634     if (htab_fd < 0) {
2635         goto error_out;
2636     }
2637
2638     hpte_buf.header.n_valid = 1;
2639     hpte_buf.header.n_invalid = 0;
2640     hpte_buf.header.index = pte_index;
2641     hpte_buf.hpte[0] = pte0;
2642     hpte_buf.hpte[1] = pte1;
2643     /*
2644      * Write the hpte entry.
2645      * CAUTION: write() has the warn_unused_result attribute. Hence we
2646      * need to check the return value, even though we do nothing.
2647      */
2648     if (write(htab_fd, &hpte_buf, sizeof(hpte_buf)) < 0) {
2649         goto out_close;
2650     }
2651
2652 out_close:
2653     close(htab_fd);
2654     return;
2655
2656 error_out:
2657     return;
2658 }
2659
2660 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2661                              uint64_t address, uint32_t data, PCIDevice *dev)
2662 {
2663     return 0;
2664 }
2665
2666 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2667                                 int vector, PCIDevice *dev)
2668 {
2669     return 0;
2670 }
2671
2672 int kvm_arch_release_virq_post(int virq)
2673 {
2674     return 0;
2675 }
2676
2677 int kvm_arch_msi_data_to_gsi(uint32_t data)
2678 {
2679     return data & 0xffff;
2680 }
2681
2682 int kvmppc_enable_hwrng(void)
2683 {
2684     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2685         return -1;
2686     }
2687
2688     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2689 }