target/ppc/kvm.c

   1 /*
   2  * PowerPC implementation of KVM hooks
   3  *
   4  * Copyright IBM Corp. 2007
   5  * Copyright (C) 2011 Freescale Semiconductor, Inc.
   6  *
   7  * Authors:
   8  *  Jerone Young <jyoung5@us.ibm.com>
   9  *  Christian Ehrhardt <ehrhardt@linux.vnet.ibm.com>
  10  *  Hollis Blanchard <hollisb@us.ibm.com>
  11  *
  12  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  13  * See the COPYING file in the top-level directory.
  14  *
  15  */
  16
  17 #include "qemu/osdep.h"
  18 #include <dirent.h>
  19 #include <sys/ioctl.h>
  20 #include <sys/vfs.h>
  21
  22 #include <linux/kvm.h>
  23
  24 #include "qemu-common.h"
  25 #include "qemu/error-report.h"
  26 #include "cpu.h"
  27 #include "cpu-models.h"
  28 #include "qemu/timer.h"
  29 #include "sysemu/sysemu.h"
  30 #include "sysemu/hw_accel.h"
  31 #include "kvm_ppc.h"
  32 #include "sysemu/cpus.h"
  33 #include "sysemu/device_tree.h"
  34 #include "mmu-hash64.h"
  35
  36 #include "hw/sysbus.h"
  37 #include "hw/ppc/spapr.h"
  38 #include "hw/ppc/spapr_vio.h"
  39 #include "hw/ppc/spapr_cpu_core.h"
  40 #include "hw/ppc/ppc.h"
  41 #include "sysemu/watchdog.h"
  42 #include "trace.h"
  43 #include "exec/gdbstub.h"
  44 #include "exec/memattrs.h"
  45 #include "exec/ram_addr.h"
  46 #include "sysemu/hostmem.h"
  47 #include "qemu/cutils.h"
  48 #include "qemu/mmap-alloc.h"
  49 #if defined(TARGET_PPC64)
  50 #include "hw/ppc/spapr_cpu_core.h"
  51 #endif
  52 #include "elf.h"
  53
  54 //#define DEBUG_KVM
  55
  56 #ifdef DEBUG_KVM
  57 #define DPRINTF(fmt, ...) \
  58     do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
  59 #else
  60 #define DPRINTF(fmt, ...) \
  61     do { } while (0)
  62 #endif
  63
  64 #define PROC_DEVTREE_CPU      "/proc/device-tree/cpus/"
  65
  66 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
  67     KVM_CAP_LAST_INFO
  68 };
  69
  70 static int cap_interrupt_unset = false;
  71 static int cap_interrupt_level = false;
  72 static int cap_segstate;
  73 static int cap_booke_sregs;
  74 static int cap_ppc_smt;
  75 static int cap_ppc_rma;
  76 static int cap_spapr_tce;
  77 static int cap_spapr_multitce;
  78 static int cap_spapr_vfio;
  79 static int cap_hior;
  80 static int cap_one_reg;
  81 static int cap_epr;
  82 static int cap_ppc_watchdog;
  83 static int cap_papr;
  84 static int cap_htab_fd;
  85 static int cap_fixup_hcalls;
  86 static int cap_htm;             /* Hardware transactional memory support */
  87
  88 static uint32_t debug_inst_opcode;
  89
  90 /* XXX We have a race condition where we actually have a level triggered
  91  *     interrupt, but the infrastructure can't expose that yet, so the guest
  92  *     takes but ignores it, goes to sleep and never gets notified that there's
  93  *     still an interrupt pending.
  94  *
  95  *     As a quick workaround, let's just wake up again 20 ms after we injected
  96  *     an interrupt. That way we can assure that we're always reinjecting
  97  *     interrupts in case the guest swallowed them.
  98  */
  99 static QEMUTimer *idle_timer;
 100
 101 static void kvm_kick_cpu(void *opaque)
 102 {
 103     PowerPCCPU *cpu = opaque;
 104
 105     qemu_cpu_kick(CPU(cpu));
 106 }
 107
 108 /* Check whether we are running with KVM-PR (instead of KVM-HV).  This
 109  * should only be used for fallback tests - generally we should use
 110  * explicit capabilities for the features we want, rather than
 111  * assuming what is/isn't available depending on the KVM variant. */
 112 static bool kvmppc_is_pr(KVMState *ks)
 113 {
 114     /* Assume KVM-PR if the GET_PVINFO capability is available */
 115     return kvm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0;
 116 }
 117
 118 static int kvm_ppc_register_host_cpu_type(void);
 119
 120 int kvm_arch_init(MachineState *ms, KVMState *s)
 121 {
 122     cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ);
 123     cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL);
 124     cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE);
 125     cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS);
 126     cap_ppc_smt = kvm_check_extension(s, KVM_CAP_PPC_SMT);
 127     cap_ppc_rma = kvm_check_extension(s, KVM_CAP_PPC_RMA);
 128     cap_spapr_tce = kvm_check_extension(s, KVM_CAP_SPAPR_TCE);
 129     cap_spapr_multitce = kvm_check_extension(s, KVM_CAP_SPAPR_MULTITCE);
 130     cap_spapr_vfio = false;
 131     cap_one_reg = kvm_check_extension(s, KVM_CAP_ONE_REG);
 132     cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR);
 133     cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR);
 134     cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG);
 135     /* Note: we don't set cap_papr here, because this capability is
 136      * only activated after this by kvmppc_set_papr() */
 137     cap_htab_fd = kvm_check_extension(s, KVM_CAP_PPC_HTAB_FD);
 138     cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL);
 139     cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM);
 140
 141     if (!cap_interrupt_level) {
 142         fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the "
 143                         "VM to stall at times!\n");
 144     }
 145
 146     kvm_ppc_register_host_cpu_type();
 147
 148     return 0;
 149 }
 150
 151 int kvm_arch_irqchip_create(MachineState *ms, KVMState *s)
 152 {
 153     return 0;
 154 }
 155
 156 static int kvm_arch_sync_sregs(PowerPCCPU *cpu)
 157 {
 158     CPUPPCState *cenv = &cpu->env;
 159     CPUState *cs = CPU(cpu);
 160     struct kvm_sregs sregs;
 161     int ret;
 162
 163     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 164         /* What we're really trying to say is "if we're on BookE, we use
 165            the native PVR for now". This is the only sane way to check
 166            it though, so we potentially confuse users that they can run
 167            BookE guests on BookS. Let's hope nobody dares enough :) */
 168         return 0;
 169     } else {
 170         if (!cap_segstate) {
 171             fprintf(stderr, "kvm error: missing PVR setting capability\n");
 172             return -ENOSYS;
 173         }
 174     }
 175
 176     ret = kvm_vcpu_ioctl(cs, KVM_GET_SREGS, &sregs);
 177     if (ret) {
 178         return ret;
 179     }
 180
 181     sregs.pvr = cenv->spr[SPR_PVR];
 182     return kvm_vcpu_ioctl(cs, KVM_SET_SREGS, &sregs);
 183 }
 184
 185 /* Set up a shared TLB array with KVM */
 186 static int kvm_booke206_tlb_init(PowerPCCPU *cpu)
 187 {
 188     CPUPPCState *env = &cpu->env;
 189     CPUState *cs = CPU(cpu);
 190     struct kvm_book3e_206_tlb_params params = {};
 191     struct kvm_config_tlb cfg = {};
 192     unsigned int entries = 0;
 193     int ret, i;
 194
 195     if (!kvm_enabled() ||
 196         !kvm_check_extension(cs->kvm_state, KVM_CAP_SW_TLB)) {
 197         return 0;
 198     }
 199
 200     assert(ARRAY_SIZE(params.tlb_sizes) == BOOKE206_MAX_TLBN);
 201
 202     for (i = 0; i < BOOKE206_MAX_TLBN; i++) {
 203         params.tlb_sizes[i] = booke206_tlb_size(env, i);
 204         params.tlb_ways[i] = booke206_tlb_ways(env, i);
 205         entries += params.tlb_sizes[i];
 206     }
 207
 208     assert(entries == env->nb_tlb);
 209     assert(sizeof(struct kvm_book3e_206_tlb_entry) == sizeof(ppcmas_tlb_t));
 210
 211     env->tlb_dirty = true;
 212
 213     cfg.array = (uintptr_t)env->tlb.tlbm;
 214     cfg.array_len = sizeof(ppcmas_tlb_t) * entries;
 215     cfg.params = (uintptr_t)&params;
 216     cfg.mmu_type = KVM_MMU_FSL_BOOKE_NOHV;
 217
 218     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_SW_TLB, 0, (uintptr_t)&cfg);
 219     if (ret < 0) {
 220         fprintf(stderr, "%s: couldn't enable KVM_CAP_SW_TLB: %s\n",
 221                 __func__, strerror(-ret));
 222         return ret;
 223     }
 224
 225     env->kvm_sw_tlb = true;
 226     return 0;
 227 }
 228
 229
 230 #if defined(TARGET_PPC64)
 231 static void kvm_get_fallback_smmu_info(PowerPCCPU *cpu,
 232                                        struct kvm_ppc_smmu_info *info)
 233 {
 234     CPUPPCState *env = &cpu->env;
 235     CPUState *cs = CPU(cpu);
 236
 237     memset(info, 0, sizeof(*info));
 238
 239     /* We don't have the new KVM_PPC_GET_SMMU_INFO ioctl, so
 240      * need to "guess" what the supported page sizes are.
 241      *
 242      * For that to work we make a few assumptions:
 243      *
 244      * - Check whether we are running "PR" KVM which only supports 4K
 245      *   and 16M pages, but supports them regardless of the backing
 246      *   store characteritics. We also don't support 1T segments.
 247      *
 248      *   This is safe as if HV KVM ever supports that capability or PR
 249      *   KVM grows supports for more page/segment sizes, those versions
 250      *   will have implemented KVM_CAP_PPC_GET_SMMU_INFO and thus we
 251      *   will not hit this fallback
 252      *
 253      * - Else we are running HV KVM. This means we only support page
 254      *   sizes that fit in the backing store. Additionally we only
 255      *   advertize 64K pages if the processor is ARCH 2.06 and we assume
 256      *   P7 encodings for the SLB and hash table. Here too, we assume
 257      *   support for any newer processor will mean a kernel that
 258      *   implements KVM_CAP_PPC_GET_SMMU_INFO and thus doesn't hit
 259      *   this fallback.
 260      */
 261     if (kvmppc_is_pr(cs->kvm_state)) {
 262         /* No flags */
 263         info->flags = 0;
 264         info->slb_size = 64;
 265
 266         /* Standard 4k base page size segment */
 267         info->sps[0].page_shift = 12;
 268         info->sps[0].slb_enc = 0;
 269         info->sps[0].enc[0].page_shift = 12;
 270         info->sps[0].enc[0].pte_enc = 0;
 271
 272         /* Standard 16M large page size segment */
 273         info->sps[1].page_shift = 24;
 274         info->sps[1].slb_enc = SLB_VSID_L;
 275         info->sps[1].enc[0].page_shift = 24;
 276         info->sps[1].enc[0].pte_enc = 0;
 277     } else {
 278         int i = 0;
 279
 280         /* HV KVM has backing store size restrictions */
 281         info->flags = KVM_PPC_PAGE_SIZES_REAL;
 282
 283         if (env->mmu_model & POWERPC_MMU_1TSEG) {
 284             info->flags |= KVM_PPC_1T_SEGMENTS;
 285         }
 286
 287         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 288            POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 289             info->slb_size = 32;
 290         } else {
 291             info->slb_size = 64;
 292         }
 293
 294         /* Standard 4k base page size segment */
 295         info->sps[i].page_shift = 12;
 296         info->sps[i].slb_enc = 0;
 297         info->sps[i].enc[0].page_shift = 12;
 298         info->sps[i].enc[0].pte_enc = 0;
 299         i++;
 300
 301         /* 64K on MMU 2.06 and later */
 302         if (POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_06 ||
 303             POWERPC_MMU_VER(env->mmu_model) == POWERPC_MMU_VER_2_07) {
 304             info->sps[i].page_shift = 16;
 305             info->sps[i].slb_enc = 0x110;
 306             info->sps[i].enc[0].page_shift = 16;
 307             info->sps[i].enc[0].pte_enc = 1;
 308             i++;
 309         }
 310
 311         /* Standard 16M large page size segment */
 312         info->sps[i].page_shift = 24;
 313         info->sps[i].slb_enc = SLB_VSID_L;
 314         info->sps[i].enc[0].page_shift = 24;
 315         info->sps[i].enc[0].pte_enc = 0;
 316     }
 317 }
 318
 319 static void kvm_get_smmu_info(PowerPCCPU *cpu, struct kvm_ppc_smmu_info *info)
 320 {
 321     CPUState *cs = CPU(cpu);
 322     int ret;
 323
 324     if (kvm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_SMMU_INFO)) {
 325         ret = kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_SMMU_INFO, info);
 326         if (ret == 0) {
 327             return;
 328         }
 329     }
 330
 331     kvm_get_fallback_smmu_info(cpu, info);
 332 }
 333
 334 static bool kvm_valid_page_size(uint32_t flags, long rampgsize, uint32_t shift)
 335 {
 336     if (!(flags & KVM_PPC_PAGE_SIZES_REAL)) {
 337         return true;
 338     }
 339
 340     return (1ul << shift) <= rampgsize;
 341 }
 342
 343 static long max_cpu_page_size;
 344
 345 static void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 346 {
 347     static struct kvm_ppc_smmu_info smmu_info;
 348     static bool has_smmu_info;
 349     CPUPPCState *env = &cpu->env;
 350     int iq, ik, jq, jk;
 351     bool has_64k_pages = false;
 352
 353     /* We only handle page sizes for 64-bit server guests for now */
 354     if (!(env->mmu_model & POWERPC_MMU_64)) {
 355         return;
 356     }
 357
 358     /* Collect MMU info from kernel if not already */
 359     if (!has_smmu_info) {
 360         kvm_get_smmu_info(cpu, &smmu_info);
 361         has_smmu_info = true;
 362     }
 363
 364     if (!max_cpu_page_size) {
 365         max_cpu_page_size = qemu_getrampagesize();
 366     }
 367
 368     /* Convert to QEMU form */
 369     memset(&env->sps, 0, sizeof(env->sps));
 370
 371     /* If we have HV KVM, we need to forbid CI large pages if our
 372      * host page size is smaller than 64K.
 373      */
 374     if (smmu_info.flags & KVM_PPC_PAGE_SIZES_REAL) {
 375         env->ci_large_pages = getpagesize() >= 0x10000;
 376     }
 377
 378     /*
 379      * XXX This loop should be an entry wide AND of the capabilities that
 380      *     the selected CPU has with the capabilities that KVM supports.
 381      */
 382     for (ik = iq = 0; ik < KVM_PPC_PAGE_SIZES_MAX_SZ; ik++) {
 383         struct ppc_one_seg_page_size *qsps = &env->sps.sps[iq];
 384         struct kvm_ppc_one_seg_page_size *ksps = &smmu_info.sps[ik];
 385
 386         if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 387                                  ksps->page_shift)) {
 388             continue;
 389         }
 390         qsps->page_shift = ksps->page_shift;
 391         qsps->slb_enc = ksps->slb_enc;
 392         for (jk = jq = 0; jk < KVM_PPC_PAGE_SIZES_MAX_SZ; jk++) {
 393             if (!kvm_valid_page_size(smmu_info.flags, max_cpu_page_size,
 394                                      ksps->enc[jk].page_shift)) {
 395                 continue;
 396             }
 397             if (ksps->enc[jk].page_shift == 16) {
 398                 has_64k_pages = true;
 399             }
 400             qsps->enc[jq].page_shift = ksps->enc[jk].page_shift;
 401             qsps->enc[jq].pte_enc = ksps->enc[jk].pte_enc;
 402             if (++jq >= PPC_PAGE_SIZES_MAX_SZ) {
 403                 break;
 404             }
 405         }
 406         if (++iq >= PPC_PAGE_SIZES_MAX_SZ) {
 407             break;
 408         }
 409     }
 410     env->slb_nr = smmu_info.slb_size;
 411     if (!(smmu_info.flags & KVM_PPC_1T_SEGMENTS)) {
 412         env->mmu_model &= ~POWERPC_MMU_1TSEG;
 413     }
 414     if (!has_64k_pages) {
 415         env->mmu_model &= ~POWERPC_MMU_64K;
 416     }
 417 }
 418
 419 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
 420 {
 421     Object *mem_obj = object_resolve_path(obj_path, NULL);
 422     char *mempath = object_property_get_str(mem_obj, "mem-path", NULL);
 423     long pagesize;
 424
 425     if (mempath) {
 426         pagesize = qemu_mempath_getpagesize(mempath);
 427     } else {
 428         pagesize = getpagesize();
 429     }
 430
 431     return pagesize >= max_cpu_page_size;
 432 }
 433
 434 #else /* defined (TARGET_PPC64) */
 435
 436 static inline void kvm_fixup_page_sizes(PowerPCCPU *cpu)
 437 {
 438 }
 439
 440 bool kvmppc_is_mem_backend_page_size_ok(char *obj_path)
 441 {
 442     return true;
 443 }
 444
 445 #endif /* !defined (TARGET_PPC64) */
 446
 447 unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 448 {
 449     return ppc_get_vcpu_dt_id(POWERPC_CPU(cpu));
 450 }
 451
 452 /* e500 supports 2 h/w breakpoint and 2 watchpoint.
 453  * book3s supports only 1 watchpoint, so array size
 454  * of 4 is sufficient for now.
 455  */
 456 #define MAX_HW_BKPTS 4
 457
 458 static struct HWBreakpoint {
 459     target_ulong addr;
 460     int type;
 461 } hw_debug_points[MAX_HW_BKPTS];
 462
 463 static CPUWatchpoint hw_watchpoint;
 464
 465 /* Default there is no breakpoint and watchpoint supported */
 466 static int max_hw_breakpoint;
 467 static int max_hw_watchpoint;
 468 static int nb_hw_breakpoint;
 469 static int nb_hw_watchpoint;
 470
 471 static void kvmppc_hw_debug_points_init(CPUPPCState *cenv)
 472 {
 473     if (cenv->excp_model == POWERPC_EXCP_BOOKE) {
 474         max_hw_breakpoint = 2;
 475         max_hw_watchpoint = 2;
 476     }
 477
 478     if ((max_hw_breakpoint + max_hw_watchpoint) > MAX_HW_BKPTS) {
 479         fprintf(stderr, "Error initializing h/w breakpoints\n");
 480         return;
 481     }
 482 }
 483
 484 int kvm_arch_init_vcpu(CPUState *cs)
 485 {
 486     PowerPCCPU *cpu = POWERPC_CPU(cs);
 487     CPUPPCState *cenv = &cpu->env;
 488     int ret;
 489
 490     /* Gather server mmu info from KVM and update the CPU state */
 491     kvm_fixup_page_sizes(cpu);
 492
 493     /* Synchronize sregs with kvm */
 494     ret = kvm_arch_sync_sregs(cpu);
 495     if (ret) {
 496         if (ret == -EINVAL) {
 497             error_report("Register sync failed... If you're using kvm-hv.ko,"
 498                          " only \"-cpu host\" is possible");
 499         }
 500         return ret;
 501     }
 502
 503     idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu);
 504
 505     switch (cenv->mmu_model) {
 506     case POWERPC_MMU_BOOKE206:
 507         /* This target supports access to KVM's guest TLB */
 508         ret = kvm_booke206_tlb_init(cpu);
 509         break;
 510     case POWERPC_MMU_2_07:
 511         if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) {
 512             /* KVM-HV has transactional memory on POWER8 also without the
 513              * KVM_CAP_PPC_HTM extension, so enable it here instead as
 514              * long as it's availble to userspace on the host. */
 515             if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) {
 516                 cap_htm = true;
 517             }
 518         }
 519         break;
 520     default:
 521         break;
 522     }
 523
 524     kvm_get_one_reg(cs, KVM_REG_PPC_DEBUG_INST, &debug_inst_opcode);
 525     kvmppc_hw_debug_points_init(cenv);
 526
 527     return ret;
 528 }
 529
 530 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 531 {
 532     CPUPPCState *env = &cpu->env;
 533     CPUState *cs = CPU(cpu);
 534     struct kvm_dirty_tlb dirty_tlb;
 535     unsigned char *bitmap;
 536     int ret;
 537
 538     if (!env->kvm_sw_tlb) {
 539         return;
 540     }
 541
 542     bitmap = g_malloc((env->nb_tlb + 7) / 8);
 543     memset(bitmap, 0xFF, (env->nb_tlb + 7) / 8);
 544
 545     dirty_tlb.bitmap = (uintptr_t)bitmap;
 546     dirty_tlb.num_dirty = env->nb_tlb;
 547
 548     ret = kvm_vcpu_ioctl(cs, KVM_DIRTY_TLB, &dirty_tlb);
 549     if (ret) {
 550         fprintf(stderr, "%s: KVM_DIRTY_TLB: %s\n",
 551                 __func__, strerror(-ret));
 552     }
 553
 554     g_free(bitmap);
 555 }
 556
 557 static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr)
 558 {
 559     PowerPCCPU *cpu = POWERPC_CPU(cs);
 560     CPUPPCState *env = &cpu->env;
 561     union {
 562         uint32_t u32;
 563         uint64_t u64;
 564     } val;
 565     struct kvm_one_reg reg = {
 566         .id = id,
 567         .addr = (uintptr_t) &val,
 568     };
 569     int ret;
 570
 571     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 572     if (ret != 0) {
 573         trace_kvm_failed_spr_get(spr, strerror(errno));
 574     } else {
 575         switch (id & KVM_REG_SIZE_MASK) {
 576         case KVM_REG_SIZE_U32:
 577             env->spr[spr] = val.u32;
 578             break;
 579
 580         case KVM_REG_SIZE_U64:
 581             env->spr[spr] = val.u64;
 582             break;
 583
 584         default:
 585             /* Don't handle this size yet */
 586             abort();
 587         }
 588     }
 589 }
 590
 591 static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr)
 592 {
 593     PowerPCCPU *cpu = POWERPC_CPU(cs);
 594     CPUPPCState *env = &cpu->env;
 595     union {
 596         uint32_t u32;
 597         uint64_t u64;
 598     } val;
 599     struct kvm_one_reg reg = {
 600         .id = id,
 601         .addr = (uintptr_t) &val,
 602     };
 603     int ret;
 604
 605     switch (id & KVM_REG_SIZE_MASK) {
 606     case KVM_REG_SIZE_U32:
 607         val.u32 = env->spr[spr];
 608         break;
 609
 610     case KVM_REG_SIZE_U64:
 611         val.u64 = env->spr[spr];
 612         break;
 613
 614     default:
 615         /* Don't handle this size yet */
 616         abort();
 617     }
 618
 619     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 620     if (ret != 0) {
 621         trace_kvm_failed_spr_set(spr, strerror(errno));
 622     }
 623 }
 624
 625 static int kvm_put_fp(CPUState *cs)
 626 {
 627     PowerPCCPU *cpu = POWERPC_CPU(cs);
 628     CPUPPCState *env = &cpu->env;
 629     struct kvm_one_reg reg;
 630     int i;
 631     int ret;
 632
 633     if (env->insns_flags & PPC_FLOAT) {
 634         uint64_t fpscr = env->fpscr;
 635         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 636
 637         reg.id = KVM_REG_PPC_FPSCR;
 638         reg.addr = (uintptr_t)&fpscr;
 639         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 640         if (ret < 0) {
 641             DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno));
 642             return ret;
 643         }
 644
 645         for (i = 0; i < 32; i++) {
 646             uint64_t vsr[2];
 647
 648 #ifdef HOST_WORDS_BIGENDIAN
 649             vsr[0] = float64_val(env->fpr[i]);
 650             vsr[1] = env->vsr[i];
 651 #else
 652             vsr[0] = env->vsr[i];
 653             vsr[1] = float64_val(env->fpr[i]);
 654 #endif
 655             reg.addr = (uintptr_t) &vsr;
 656             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 657
 658             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 659             if (ret < 0) {
 660                 DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR",
 661                         i, strerror(errno));
 662                 return ret;
 663             }
 664         }
 665     }
 666
 667     if (env->insns_flags & PPC_ALTIVEC) {
 668         reg.id = KVM_REG_PPC_VSCR;
 669         reg.addr = (uintptr_t)&env->vscr;
 670         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 671         if (ret < 0) {
 672             DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno));
 673             return ret;
 674         }
 675
 676         for (i = 0; i < 32; i++) {
 677             reg.id = KVM_REG_PPC_VR(i);
 678             reg.addr = (uintptr_t)&env->avr[i];
 679             ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 680             if (ret < 0) {
 681                 DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno));
 682                 return ret;
 683             }
 684         }
 685     }
 686
 687     return 0;
 688 }
 689
 690 static int kvm_get_fp(CPUState *cs)
 691 {
 692     PowerPCCPU *cpu = POWERPC_CPU(cs);
 693     CPUPPCState *env = &cpu->env;
 694     struct kvm_one_reg reg;
 695     int i;
 696     int ret;
 697
 698     if (env->insns_flags & PPC_FLOAT) {
 699         uint64_t fpscr;
 700         bool vsx = !!(env->insns_flags2 & PPC2_VSX);
 701
 702         reg.id = KVM_REG_PPC_FPSCR;
 703         reg.addr = (uintptr_t)&fpscr;
 704         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 705         if (ret < 0) {
 706             DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno));
 707             return ret;
 708         } else {
 709             env->fpscr = fpscr;
 710         }
 711
 712         for (i = 0; i < 32; i++) {
 713             uint64_t vsr[2];
 714
 715             reg.addr = (uintptr_t) &vsr;
 716             reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i);
 717
 718             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 719             if (ret < 0) {
 720                 DPRINTF("Unable to get %s%d from KVM: %s\n",
 721                         vsx ? "VSR" : "FPR", i, strerror(errno));
 722                 return ret;
 723             } else {
 724 #ifdef HOST_WORDS_BIGENDIAN
 725                 env->fpr[i] = vsr[0];
 726                 if (vsx) {
 727                     env->vsr[i] = vsr[1];
 728                 }
 729 #else
 730                 env->fpr[i] = vsr[1];
 731                 if (vsx) {
 732                     env->vsr[i] = vsr[0];
 733                 }
 734 #endif
 735             }
 736         }
 737     }
 738
 739     if (env->insns_flags & PPC_ALTIVEC) {
 740         reg.id = KVM_REG_PPC_VSCR;
 741         reg.addr = (uintptr_t)&env->vscr;
 742         ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 743         if (ret < 0) {
 744             DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno));
 745             return ret;
 746         }
 747
 748         for (i = 0; i < 32; i++) {
 749             reg.id = KVM_REG_PPC_VR(i);
 750             reg.addr = (uintptr_t)&env->avr[i];
 751             ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 752             if (ret < 0) {
 753                 DPRINTF("Unable to get VR%d from KVM: %s\n",
 754                         i, strerror(errno));
 755                 return ret;
 756             }
 757         }
 758     }
 759
 760     return 0;
 761 }
 762
 763 #if defined(TARGET_PPC64)
 764 static int kvm_get_vpa(CPUState *cs)
 765 {
 766     PowerPCCPU *cpu = POWERPC_CPU(cs);
 767     CPUPPCState *env = &cpu->env;
 768     struct kvm_one_reg reg;
 769     int ret;
 770
 771     reg.id = KVM_REG_PPC_VPA_ADDR;
 772     reg.addr = (uintptr_t)&env->vpa_addr;
 773     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 774     if (ret < 0) {
 775         DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno));
 776         return ret;
 777     }
 778
 779     assert((uintptr_t)&env->slb_shadow_size
 780            == ((uintptr_t)&env->slb_shadow_addr + 8));
 781     reg.id = KVM_REG_PPC_VPA_SLB;
 782     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 783     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 784     if (ret < 0) {
 785         DPRINTF("Unable to get SLB shadow state from KVM: %s\n",
 786                 strerror(errno));
 787         return ret;
 788     }
 789
 790     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 791     reg.id = KVM_REG_PPC_VPA_DTL;
 792     reg.addr = (uintptr_t)&env->dtl_addr;
 793     ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
 794     if (ret < 0) {
 795         DPRINTF("Unable to get dispatch trace log state from KVM: %s\n",
 796                 strerror(errno));
 797         return ret;
 798     }
 799
 800     return 0;
 801 }
 802
 803 static int kvm_put_vpa(CPUState *cs)
 804 {
 805     PowerPCCPU *cpu = POWERPC_CPU(cs);
 806     CPUPPCState *env = &cpu->env;
 807     struct kvm_one_reg reg;
 808     int ret;
 809
 810     /* SLB shadow or DTL can't be registered unless a master VPA is
 811      * registered.  That means when restoring state, if a VPA *is*
 812      * registered, we need to set that up first.  If not, we need to
 813      * deregister the others before deregistering the master VPA */
 814     assert(env->vpa_addr || !(env->slb_shadow_addr || env->dtl_addr));
 815
 816     if (env->vpa_addr) {
 817         reg.id = KVM_REG_PPC_VPA_ADDR;
 818         reg.addr = (uintptr_t)&env->vpa_addr;
 819         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 820         if (ret < 0) {
 821             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 822             return ret;
 823         }
 824     }
 825
 826     assert((uintptr_t)&env->slb_shadow_size
 827            == ((uintptr_t)&env->slb_shadow_addr + 8));
 828     reg.id = KVM_REG_PPC_VPA_SLB;
 829     reg.addr = (uintptr_t)&env->slb_shadow_addr;
 830     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 831     if (ret < 0) {
 832         DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno));
 833         return ret;
 834     }
 835
 836     assert((uintptr_t)&env->dtl_size == ((uintptr_t)&env->dtl_addr + 8));
 837     reg.id = KVM_REG_PPC_VPA_DTL;
 838     reg.addr = (uintptr_t)&env->dtl_addr;
 839     ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 840     if (ret < 0) {
 841         DPRINTF("Unable to set dispatch trace log state to KVM: %s\n",
 842                 strerror(errno));
 843         return ret;
 844     }
 845
 846     if (!env->vpa_addr) {
 847         reg.id = KVM_REG_PPC_VPA_ADDR;
 848         reg.addr = (uintptr_t)&env->vpa_addr;
 849         ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
 850         if (ret < 0) {
 851             DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno));
 852             return ret;
 853         }
 854     }
 855
 856     return 0;
 857 }
 858 #endif /* TARGET_PPC64 */
 859
 860 int kvmppc_put_books_sregs(PowerPCCPU *cpu)
 861 {
 862     CPUPPCState *env = &cpu->env;
 863     struct kvm_sregs sregs;
 864     int i;
 865
 866     sregs.pvr = env->spr[SPR_PVR];
 867
 868     sregs.u.s.sdr1 = env->spr[SPR_SDR1];
 869
 870     /* Sync SLB */
 871 #ifdef TARGET_PPC64
 872     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
 873         sregs.u.s.ppc64.slb[i].slbe = env->slb[i].esid;
 874         if (env->slb[i].esid & SLB_ESID_V) {
 875             sregs.u.s.ppc64.slb[i].slbe |= i;
 876         }
 877         sregs.u.s.ppc64.slb[i].slbv = env->slb[i].vsid;
 878     }
 879 #endif
 880
 881     /* Sync SRs */
 882     for (i = 0; i < 16; i++) {
 883         sregs.u.s.ppc32.sr[i] = env->sr[i];
 884     }
 885
 886     /* Sync BATs */
 887     for (i = 0; i < 8; i++) {
 888         /* Beware. We have to swap upper and lower bits here */
 889         sregs.u.s.ppc32.dbat[i] = ((uint64_t)env->DBAT[0][i] << 32)
 890             | env->DBAT[1][i];
 891         sregs.u.s.ppc32.ibat[i] = ((uint64_t)env->IBAT[0][i] << 32)
 892             | env->IBAT[1][i];
 893     }
 894
 895     return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs);
 896 }
 897
 898 int kvm_arch_put_registers(CPUState *cs, int level)
 899 {
 900     PowerPCCPU *cpu = POWERPC_CPU(cs);
 901     CPUPPCState *env = &cpu->env;
 902     struct kvm_regs regs;
 903     int ret;
 904     int i;
 905
 906     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
 907     if (ret < 0) {
 908         return ret;
 909     }
 910
 911     regs.ctr = env->ctr;
 912     regs.lr  = env->lr;
 913     regs.xer = cpu_read_xer(env);
 914     regs.msr = env->msr;
 915     regs.pc = env->nip;
 916
 917     regs.srr0 = env->spr[SPR_SRR0];
 918     regs.srr1 = env->spr[SPR_SRR1];
 919
 920     regs.sprg0 = env->spr[SPR_SPRG0];
 921     regs.sprg1 = env->spr[SPR_SPRG1];
 922     regs.sprg2 = env->spr[SPR_SPRG2];
 923     regs.sprg3 = env->spr[SPR_SPRG3];
 924     regs.sprg4 = env->spr[SPR_SPRG4];
 925     regs.sprg5 = env->spr[SPR_SPRG5];
 926     regs.sprg6 = env->spr[SPR_SPRG6];
 927     regs.sprg7 = env->spr[SPR_SPRG7];
 928
 929     regs.pid = env->spr[SPR_BOOKE_PID];
 930
 931     for (i = 0;i < 32; i++)
 932         regs.gpr[i] = env->gpr[i];
 933
 934     regs.cr = 0;
 935     for (i = 0; i < 8; i++) {
 936         regs.cr |= (env->crf[i] & 15) << (4 * (7 - i));
 937     }
 938
 939     ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, &regs);
 940     if (ret < 0)
 941         return ret;
 942
 943     kvm_put_fp(cs);
 944
 945     if (env->tlb_dirty) {
 946         kvm_sw_tlb_put(cpu);
 947         env->tlb_dirty = false;
 948     }
 949
 950     if (cap_segstate && (level >= KVM_PUT_RESET_STATE)) {
 951         ret = kvmppc_put_books_sregs(cpu);
 952         if (ret < 0) {
 953             return ret;
 954         }
 955     }
 956
 957     if (cap_hior && (level >= KVM_PUT_RESET_STATE)) {
 958         kvm_put_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
 959     }
 960
 961     if (cap_one_reg) {
 962         int i;
 963
 964         /* We deliberately ignore errors here, for kernels which have
 965          * the ONE_REG calls, but don't support the specific
 966          * registers, there's a reasonable chance things will still
 967          * work, at least until we try to migrate. */
 968         for (i = 0; i < 1024; i++) {
 969             uint64_t id = env->spr_cb[i].one_reg_id;
 970
 971             if (id != 0) {
 972                 kvm_put_one_spr(cs, id, i);
 973             }
 974         }
 975
 976 #ifdef TARGET_PPC64
 977         if (msr_ts) {
 978             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
 979                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
 980             }
 981             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
 982                 kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
 983             }
 984             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
 985             kvm_set_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
 986             kvm_set_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
 987             kvm_set_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
 988             kvm_set_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
 989             kvm_set_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
 990             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
 991             kvm_set_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
 992             kvm_set_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
 993             kvm_set_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
 994         }
 995
 996         if (cap_papr) {
 997             if (kvm_put_vpa(cs) < 0) {
 998                 DPRINTF("Warning: Unable to set VPA information to KVM\n");
 999             }
1000         }
1001
1002         kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1003 #endif /* TARGET_PPC64 */
1004     }
1005
1006     return ret;
1007 }
1008
1009 static void kvm_sync_excp(CPUPPCState *env, int vector, int ivor)
1010 {
1011      env->excp_vectors[vector] = env->spr[ivor] + env->spr[SPR_BOOKE_IVPR];
1012 }
1013
1014 static int kvmppc_get_booke_sregs(PowerPCCPU *cpu)
1015 {
1016     CPUPPCState *env = &cpu->env;
1017     struct kvm_sregs sregs;
1018     int ret;
1019
1020     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1021     if (ret < 0) {
1022         return ret;
1023     }
1024
1025     if (sregs.u.e.features & KVM_SREGS_E_BASE) {
1026         env->spr[SPR_BOOKE_CSRR0] = sregs.u.e.csrr0;
1027         env->spr[SPR_BOOKE_CSRR1] = sregs.u.e.csrr1;
1028         env->spr[SPR_BOOKE_ESR] = sregs.u.e.esr;
1029         env->spr[SPR_BOOKE_DEAR] = sregs.u.e.dear;
1030         env->spr[SPR_BOOKE_MCSR] = sregs.u.e.mcsr;
1031         env->spr[SPR_BOOKE_TSR] = sregs.u.e.tsr;
1032         env->spr[SPR_BOOKE_TCR] = sregs.u.e.tcr;
1033         env->spr[SPR_DECR] = sregs.u.e.dec;
1034         env->spr[SPR_TBL] = sregs.u.e.tb & 0xffffffff;
1035         env->spr[SPR_TBU] = sregs.u.e.tb >> 32;
1036         env->spr[SPR_VRSAVE] = sregs.u.e.vrsave;
1037     }
1038
1039     if (sregs.u.e.features & KVM_SREGS_E_ARCH206) {
1040         env->spr[SPR_BOOKE_PIR] = sregs.u.e.pir;
1041         env->spr[SPR_BOOKE_MCSRR0] = sregs.u.e.mcsrr0;
1042         env->spr[SPR_BOOKE_MCSRR1] = sregs.u.e.mcsrr1;
1043         env->spr[SPR_BOOKE_DECAR] = sregs.u.e.decar;
1044         env->spr[SPR_BOOKE_IVPR] = sregs.u.e.ivpr;
1045     }
1046
1047     if (sregs.u.e.features & KVM_SREGS_E_64) {
1048         env->spr[SPR_BOOKE_EPCR] = sregs.u.e.epcr;
1049     }
1050
1051     if (sregs.u.e.features & KVM_SREGS_E_SPRG8) {
1052         env->spr[SPR_BOOKE_SPRG8] = sregs.u.e.sprg8;
1053     }
1054
1055     if (sregs.u.e.features & KVM_SREGS_E_IVOR) {
1056         env->spr[SPR_BOOKE_IVOR0] = sregs.u.e.ivor_low[0];
1057         kvm_sync_excp(env, POWERPC_EXCP_CRITICAL,  SPR_BOOKE_IVOR0);
1058         env->spr[SPR_BOOKE_IVOR1] = sregs.u.e.ivor_low[1];
1059         kvm_sync_excp(env, POWERPC_EXCP_MCHECK,  SPR_BOOKE_IVOR1);
1060         env->spr[SPR_BOOKE_IVOR2] = sregs.u.e.ivor_low[2];
1061         kvm_sync_excp(env, POWERPC_EXCP_DSI,  SPR_BOOKE_IVOR2);
1062         env->spr[SPR_BOOKE_IVOR3] = sregs.u.e.ivor_low[3];
1063         kvm_sync_excp(env, POWERPC_EXCP_ISI,  SPR_BOOKE_IVOR3);
1064         env->spr[SPR_BOOKE_IVOR4] = sregs.u.e.ivor_low[4];
1065         kvm_sync_excp(env, POWERPC_EXCP_EXTERNAL,  SPR_BOOKE_IVOR4);
1066         env->spr[SPR_BOOKE_IVOR5] = sregs.u.e.ivor_low[5];
1067         kvm_sync_excp(env, POWERPC_EXCP_ALIGN,  SPR_BOOKE_IVOR5);
1068         env->spr[SPR_BOOKE_IVOR6] = sregs.u.e.ivor_low[6];
1069         kvm_sync_excp(env, POWERPC_EXCP_PROGRAM,  SPR_BOOKE_IVOR6);
1070         env->spr[SPR_BOOKE_IVOR7] = sregs.u.e.ivor_low[7];
1071         kvm_sync_excp(env, POWERPC_EXCP_FPU,  SPR_BOOKE_IVOR7);
1072         env->spr[SPR_BOOKE_IVOR8] = sregs.u.e.ivor_low[8];
1073         kvm_sync_excp(env, POWERPC_EXCP_SYSCALL,  SPR_BOOKE_IVOR8);
1074         env->spr[SPR_BOOKE_IVOR9] = sregs.u.e.ivor_low[9];
1075         kvm_sync_excp(env, POWERPC_EXCP_APU,  SPR_BOOKE_IVOR9);
1076         env->spr[SPR_BOOKE_IVOR10] = sregs.u.e.ivor_low[10];
1077         kvm_sync_excp(env, POWERPC_EXCP_DECR,  SPR_BOOKE_IVOR10);
1078         env->spr[SPR_BOOKE_IVOR11] = sregs.u.e.ivor_low[11];
1079         kvm_sync_excp(env, POWERPC_EXCP_FIT,  SPR_BOOKE_IVOR11);
1080         env->spr[SPR_BOOKE_IVOR12] = sregs.u.e.ivor_low[12];
1081         kvm_sync_excp(env, POWERPC_EXCP_WDT,  SPR_BOOKE_IVOR12);
1082         env->spr[SPR_BOOKE_IVOR13] = sregs.u.e.ivor_low[13];
1083         kvm_sync_excp(env, POWERPC_EXCP_DTLB,  SPR_BOOKE_IVOR13);
1084         env->spr[SPR_BOOKE_IVOR14] = sregs.u.e.ivor_low[14];
1085         kvm_sync_excp(env, POWERPC_EXCP_ITLB,  SPR_BOOKE_IVOR14);
1086         env->spr[SPR_BOOKE_IVOR15] = sregs.u.e.ivor_low[15];
1087         kvm_sync_excp(env, POWERPC_EXCP_DEBUG,  SPR_BOOKE_IVOR15);
1088
1089         if (sregs.u.e.features & KVM_SREGS_E_SPE) {
1090             env->spr[SPR_BOOKE_IVOR32] = sregs.u.e.ivor_high[0];
1091             kvm_sync_excp(env, POWERPC_EXCP_SPEU,  SPR_BOOKE_IVOR32);
1092             env->spr[SPR_BOOKE_IVOR33] = sregs.u.e.ivor_high[1];
1093             kvm_sync_excp(env, POWERPC_EXCP_EFPDI,  SPR_BOOKE_IVOR33);
1094             env->spr[SPR_BOOKE_IVOR34] = sregs.u.e.ivor_high[2];
1095             kvm_sync_excp(env, POWERPC_EXCP_EFPRI,  SPR_BOOKE_IVOR34);
1096         }
1097
1098         if (sregs.u.e.features & KVM_SREGS_E_PM) {
1099             env->spr[SPR_BOOKE_IVOR35] = sregs.u.e.ivor_high[3];
1100             kvm_sync_excp(env, POWERPC_EXCP_EPERFM,  SPR_BOOKE_IVOR35);
1101         }
1102
1103         if (sregs.u.e.features & KVM_SREGS_E_PC) {
1104             env->spr[SPR_BOOKE_IVOR36] = sregs.u.e.ivor_high[4];
1105             kvm_sync_excp(env, POWERPC_EXCP_DOORI,  SPR_BOOKE_IVOR36);
1106             env->spr[SPR_BOOKE_IVOR37] = sregs.u.e.ivor_high[5];
1107             kvm_sync_excp(env, POWERPC_EXCP_DOORCI, SPR_BOOKE_IVOR37);
1108         }
1109     }
1110
1111     if (sregs.u.e.features & KVM_SREGS_E_ARCH206_MMU) {
1112         env->spr[SPR_BOOKE_MAS0] = sregs.u.e.mas0;
1113         env->spr[SPR_BOOKE_MAS1] = sregs.u.e.mas1;
1114         env->spr[SPR_BOOKE_MAS2] = sregs.u.e.mas2;
1115         env->spr[SPR_BOOKE_MAS3] = sregs.u.e.mas7_3 & 0xffffffff;
1116         env->spr[SPR_BOOKE_MAS4] = sregs.u.e.mas4;
1117         env->spr[SPR_BOOKE_MAS6] = sregs.u.e.mas6;
1118         env->spr[SPR_BOOKE_MAS7] = sregs.u.e.mas7_3 >> 32;
1119         env->spr[SPR_MMUCFG] = sregs.u.e.mmucfg;
1120         env->spr[SPR_BOOKE_TLB0CFG] = sregs.u.e.tlbcfg[0];
1121         env->spr[SPR_BOOKE_TLB1CFG] = sregs.u.e.tlbcfg[1];
1122     }
1123
1124     if (sregs.u.e.features & KVM_SREGS_EXP) {
1125         env->spr[SPR_BOOKE_EPR] = sregs.u.e.epr;
1126     }
1127
1128     if (sregs.u.e.features & KVM_SREGS_E_PD) {
1129         env->spr[SPR_BOOKE_EPLC] = sregs.u.e.eplc;
1130         env->spr[SPR_BOOKE_EPSC] = sregs.u.e.epsc;
1131     }
1132
1133     if (sregs.u.e.impl_id == KVM_SREGS_E_IMPL_FSL) {
1134         env->spr[SPR_E500_SVR] = sregs.u.e.impl.fsl.svr;
1135         env->spr[SPR_Exxx_MCAR] = sregs.u.e.impl.fsl.mcar;
1136         env->spr[SPR_HID0] = sregs.u.e.impl.fsl.hid0;
1137
1138         if (sregs.u.e.impl.fsl.features & KVM_SREGS_E_FSL_PIDn) {
1139             env->spr[SPR_BOOKE_PID1] = sregs.u.e.impl.fsl.pid1;
1140             env->spr[SPR_BOOKE_PID2] = sregs.u.e.impl.fsl.pid2;
1141         }
1142     }
1143
1144     return 0;
1145 }
1146
1147 static int kvmppc_get_books_sregs(PowerPCCPU *cpu)
1148 {
1149     CPUPPCState *env = &cpu->env;
1150     struct kvm_sregs sregs;
1151     int ret;
1152     int i;
1153
1154     ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs);
1155     if (ret < 0) {
1156         return ret;
1157     }
1158
1159     if (!cpu->vhyp) {
1160         ppc_store_sdr1(env, sregs.u.s.sdr1);
1161     }
1162
1163     /* Sync SLB */
1164 #ifdef TARGET_PPC64
1165     /*
1166      * The packed SLB array we get from KVM_GET_SREGS only contains
1167      * information about valid entries. So we flush our internal copy
1168      * to get rid of stale ones, then put all valid SLB entries back
1169      * in.
1170      */
1171     memset(env->slb, 0, sizeof(env->slb));
1172     for (i = 0; i < ARRAY_SIZE(env->slb); i++) {
1173         target_ulong rb = sregs.u.s.ppc64.slb[i].slbe;
1174         target_ulong rs = sregs.u.s.ppc64.slb[i].slbv;
1175         /*
1176          * Only restore valid entries
1177          */
1178         if (rb & SLB_ESID_V) {
1179             ppc_store_slb(cpu, rb & 0xfff, rb & ~0xfffULL, rs);
1180         }
1181     }
1182 #endif
1183
1184     /* Sync SRs */
1185     for (i = 0; i < 16; i++) {
1186         env->sr[i] = sregs.u.s.ppc32.sr[i];
1187     }
1188
1189     /* Sync BATs */
1190     for (i = 0; i < 8; i++) {
1191         env->DBAT[0][i] = sregs.u.s.ppc32.dbat[i] & 0xffffffff;
1192         env->DBAT[1][i] = sregs.u.s.ppc32.dbat[i] >> 32;
1193         env->IBAT[0][i] = sregs.u.s.ppc32.ibat[i] & 0xffffffff;
1194         env->IBAT[1][i] = sregs.u.s.ppc32.ibat[i] >> 32;
1195     }
1196
1197     return 0;
1198 }
1199
1200 int kvm_arch_get_registers(CPUState *cs)
1201 {
1202     PowerPCCPU *cpu = POWERPC_CPU(cs);
1203     CPUPPCState *env = &cpu->env;
1204     struct kvm_regs regs;
1205     uint32_t cr;
1206     int i, ret;
1207
1208     ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, &regs);
1209     if (ret < 0)
1210         return ret;
1211
1212     cr = regs.cr;
1213     for (i = 7; i >= 0; i--) {
1214         env->crf[i] = cr & 15;
1215         cr >>= 4;
1216     }
1217
1218     env->ctr = regs.ctr;
1219     env->lr = regs.lr;
1220     cpu_write_xer(env, regs.xer);
1221     env->msr = regs.msr;
1222     env->nip = regs.pc;
1223
1224     env->spr[SPR_SRR0] = regs.srr0;
1225     env->spr[SPR_SRR1] = regs.srr1;
1226
1227     env->spr[SPR_SPRG0] = regs.sprg0;
1228     env->spr[SPR_SPRG1] = regs.sprg1;
1229     env->spr[SPR_SPRG2] = regs.sprg2;
1230     env->spr[SPR_SPRG3] = regs.sprg3;
1231     env->spr[SPR_SPRG4] = regs.sprg4;
1232     env->spr[SPR_SPRG5] = regs.sprg5;
1233     env->spr[SPR_SPRG6] = regs.sprg6;
1234     env->spr[SPR_SPRG7] = regs.sprg7;
1235
1236     env->spr[SPR_BOOKE_PID] = regs.pid;
1237
1238     for (i = 0;i < 32; i++)
1239         env->gpr[i] = regs.gpr[i];
1240
1241     kvm_get_fp(cs);
1242
1243     if (cap_booke_sregs) {
1244         ret = kvmppc_get_booke_sregs(cpu);
1245         if (ret < 0) {
1246             return ret;
1247         }
1248     }
1249
1250     if (cap_segstate) {
1251         ret = kvmppc_get_books_sregs(cpu);
1252         if (ret < 0) {
1253             return ret;
1254         }
1255     }
1256
1257     if (cap_hior) {
1258         kvm_get_one_spr(cs, KVM_REG_PPC_HIOR, SPR_HIOR);
1259     }
1260
1261     if (cap_one_reg) {
1262         int i;
1263
1264         /* We deliberately ignore errors here, for kernels which have
1265          * the ONE_REG calls, but don't support the specific
1266          * registers, there's a reasonable chance things will still
1267          * work, at least until we try to migrate. */
1268         for (i = 0; i < 1024; i++) {
1269             uint64_t id = env->spr_cb[i].one_reg_id;
1270
1271             if (id != 0) {
1272                 kvm_get_one_spr(cs, id, i);
1273             }
1274         }
1275
1276 #ifdef TARGET_PPC64
1277         if (msr_ts) {
1278             for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) {
1279                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]);
1280             }
1281             for (i = 0; i < ARRAY_SIZE(env->tm_vsr); i++) {
1282                 kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSR(i), &env->tm_vsr[i]);
1283             }
1284             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CR, &env->tm_cr);
1285             kvm_get_one_reg(cs, KVM_REG_PPC_TM_LR, &env->tm_lr);
1286             kvm_get_one_reg(cs, KVM_REG_PPC_TM_CTR, &env->tm_ctr);
1287             kvm_get_one_reg(cs, KVM_REG_PPC_TM_FPSCR, &env->tm_fpscr);
1288             kvm_get_one_reg(cs, KVM_REG_PPC_TM_AMR, &env->tm_amr);
1289             kvm_get_one_reg(cs, KVM_REG_PPC_TM_PPR, &env->tm_ppr);
1290             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VRSAVE, &env->tm_vrsave);
1291             kvm_get_one_reg(cs, KVM_REG_PPC_TM_VSCR, &env->tm_vscr);
1292             kvm_get_one_reg(cs, KVM_REG_PPC_TM_DSCR, &env->tm_dscr);
1293             kvm_get_one_reg(cs, KVM_REG_PPC_TM_TAR, &env->tm_tar);
1294         }
1295
1296         if (cap_papr) {
1297             if (kvm_get_vpa(cs) < 0) {
1298                 DPRINTF("Warning: Unable to get VPA information from KVM\n");
1299             }
1300         }
1301
1302         kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset);
1303 #endif
1304     }
1305
1306     return 0;
1307 }
1308
1309 int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level)
1310 {
1311     unsigned virq = level ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
1312
1313     if (irq != PPC_INTERRUPT_EXT) {
1314         return 0;
1315     }
1316
1317     if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) {
1318         return 0;
1319     }
1320
1321     kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, &virq);
1322
1323     return 0;
1324 }
1325
1326 #if defined(TARGET_PPCEMB)
1327 #define PPC_INPUT_INT PPC40x_INPUT_INT
1328 #elif defined(TARGET_PPC64)
1329 #define PPC_INPUT_INT PPC970_INPUT_INT
1330 #else
1331 #define PPC_INPUT_INT PPC6xx_INPUT_INT
1332 #endif
1333
1334 void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
1335 {
1336     PowerPCCPU *cpu = POWERPC_CPU(cs);
1337     CPUPPCState *env = &cpu->env;
1338     int r;
1339     unsigned irq;
1340
1341     qemu_mutex_lock_iothread();
1342
1343     /* PowerPC QEMU tracks the various core input pins (interrupt, critical
1344      * interrupt, reset, etc) in PPC-specific env->irq_input_state. */
1345     if (!cap_interrupt_level &&
1346         run->ready_for_interrupt_injection &&
1347         (cs->interrupt_request & CPU_INTERRUPT_HARD) &&
1348         (env->irq_input_state & (1<<PPC_INPUT_INT)))
1349     {
1350         /* For now KVM disregards the 'irq' argument. However, in the
1351          * future KVM could cache it in-kernel to avoid a heavyweight exit
1352          * when reading the UIC.
1353          */
1354         irq = KVM_INTERRUPT_SET;
1355
1356         DPRINTF("injected interrupt %d\n", irq);
1357         r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq);
1358         if (r < 0) {
1359             printf("cpu %d fail inject %x\n", cs->cpu_index, irq);
1360         }
1361
1362         /* Always wake up soon in case the interrupt was level based */
1363         timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) +
1364                        (NANOSECONDS_PER_SECOND / 50));
1365     }
1366
1367     /* We don't know if there are more interrupts pending after this. However,
1368      * the guest will return to userspace in the course of handling this one
1369      * anyways, so we will get a chance to deliver the rest. */
1370
1371     qemu_mutex_unlock_iothread();
1372 }
1373
1374 MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
1375 {
1376     return MEMTXATTRS_UNSPECIFIED;
1377 }
1378
1379 int kvm_arch_process_async_events(CPUState *cs)
1380 {
1381     return cs->halted;
1382 }
1383
1384 static int kvmppc_handle_halt(PowerPCCPU *cpu)
1385 {
1386     CPUState *cs = CPU(cpu);
1387     CPUPPCState *env = &cpu->env;
1388
1389     if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) {
1390         cs->halted = 1;
1391         cs->exception_index = EXCP_HLT;
1392     }
1393
1394     return 0;
1395 }
1396
1397 /* map dcr access to existing qemu dcr emulation */
1398 static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data)
1399 {
1400     if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0)
1401         fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn);
1402
1403     return 0;
1404 }
1405
1406 static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data)
1407 {
1408     if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0)
1409         fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn);
1410
1411     return 0;
1412 }
1413
1414 int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1415 {
1416     /* Mixed endian case is not handled */
1417     uint32_t sc = debug_inst_opcode;
1418
1419     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1420                             sizeof(sc), 0) ||
1421         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 1)) {
1422         return -EINVAL;
1423     }
1424
1425     return 0;
1426 }
1427
1428 int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp)
1429 {
1430     uint32_t sc;
1431
1432     if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&sc, sizeof(sc), 0) ||
1433         sc != debug_inst_opcode ||
1434         cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn,
1435                             sizeof(sc), 1)) {
1436         return -EINVAL;
1437     }
1438
1439     return 0;
1440 }
1441
1442 static int find_hw_breakpoint(target_ulong addr, int type)
1443 {
1444     int n;
1445
1446     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1447            <= ARRAY_SIZE(hw_debug_points));
1448
1449     for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1450         if (hw_debug_points[n].addr == addr &&
1451              hw_debug_points[n].type == type) {
1452             return n;
1453         }
1454     }
1455
1456     return -1;
1457 }
1458
1459 static int find_hw_watchpoint(target_ulong addr, int *flag)
1460 {
1461     int n;
1462
1463     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_ACCESS);
1464     if (n >= 0) {
1465         *flag = BP_MEM_ACCESS;
1466         return n;
1467     }
1468
1469     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_WRITE);
1470     if (n >= 0) {
1471         *flag = BP_MEM_WRITE;
1472         return n;
1473     }
1474
1475     n = find_hw_breakpoint(addr, GDB_WATCHPOINT_READ);
1476     if (n >= 0) {
1477         *flag = BP_MEM_READ;
1478         return n;
1479     }
1480
1481     return -1;
1482 }
1483
1484 int kvm_arch_insert_hw_breakpoint(target_ulong addr,
1485                                   target_ulong len, int type)
1486 {
1487     if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) {
1488         return -ENOBUFS;
1489     }
1490
1491     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr;
1492     hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type;
1493
1494     switch (type) {
1495     case GDB_BREAKPOINT_HW:
1496         if (nb_hw_breakpoint >= max_hw_breakpoint) {
1497             return -ENOBUFS;
1498         }
1499
1500         if (find_hw_breakpoint(addr, type) >= 0) {
1501             return -EEXIST;
1502         }
1503
1504         nb_hw_breakpoint++;
1505         break;
1506
1507     case GDB_WATCHPOINT_WRITE:
1508     case GDB_WATCHPOINT_READ:
1509     case GDB_WATCHPOINT_ACCESS:
1510         if (nb_hw_watchpoint >= max_hw_watchpoint) {
1511             return -ENOBUFS;
1512         }
1513
1514         if (find_hw_breakpoint(addr, type) >= 0) {
1515             return -EEXIST;
1516         }
1517
1518         nb_hw_watchpoint++;
1519         break;
1520
1521     default:
1522         return -ENOSYS;
1523     }
1524
1525     return 0;
1526 }
1527
1528 int kvm_arch_remove_hw_breakpoint(target_ulong addr,
1529                                   target_ulong len, int type)
1530 {
1531     int n;
1532
1533     n = find_hw_breakpoint(addr, type);
1534     if (n < 0) {
1535         return -ENOENT;
1536     }
1537
1538     switch (type) {
1539     case GDB_BREAKPOINT_HW:
1540         nb_hw_breakpoint--;
1541         break;
1542
1543     case GDB_WATCHPOINT_WRITE:
1544     case GDB_WATCHPOINT_READ:
1545     case GDB_WATCHPOINT_ACCESS:
1546         nb_hw_watchpoint--;
1547         break;
1548
1549     default:
1550         return -ENOSYS;
1551     }
1552     hw_debug_points[n] = hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint];
1553
1554     return 0;
1555 }
1556
1557 void kvm_arch_remove_all_hw_breakpoints(void)
1558 {
1559     nb_hw_breakpoint = nb_hw_watchpoint = 0;
1560 }
1561
1562 void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg)
1563 {
1564     int n;
1565
1566     /* Software Breakpoint updates */
1567     if (kvm_sw_breakpoints_active(cs)) {
1568         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
1569     }
1570
1571     assert((nb_hw_breakpoint + nb_hw_watchpoint)
1572            <= ARRAY_SIZE(hw_debug_points));
1573     assert((nb_hw_breakpoint + nb_hw_watchpoint) <= ARRAY_SIZE(dbg->arch.bp));
1574
1575     if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1576         dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
1577         memset(dbg->arch.bp, 0, sizeof(dbg->arch.bp));
1578         for (n = 0; n < nb_hw_breakpoint + nb_hw_watchpoint; n++) {
1579             switch (hw_debug_points[n].type) {
1580             case GDB_BREAKPOINT_HW:
1581                 dbg->arch.bp[n].type = KVMPPC_DEBUG_BREAKPOINT;
1582                 break;
1583             case GDB_WATCHPOINT_WRITE:
1584                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE;
1585                 break;
1586             case GDB_WATCHPOINT_READ:
1587                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_READ;
1588                 break;
1589             case GDB_WATCHPOINT_ACCESS:
1590                 dbg->arch.bp[n].type = KVMPPC_DEBUG_WATCH_WRITE |
1591                                         KVMPPC_DEBUG_WATCH_READ;
1592                 break;
1593             default:
1594                 cpu_abort(cs, "Unsupported breakpoint type\n");
1595             }
1596             dbg->arch.bp[n].addr = hw_debug_points[n].addr;
1597         }
1598     }
1599 }
1600
1601 static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run)
1602 {
1603     CPUState *cs = CPU(cpu);
1604     CPUPPCState *env = &cpu->env;
1605     struct kvm_debug_exit_arch *arch_info = &run->debug.arch;
1606     int handle = 0;
1607     int n;
1608     int flag = 0;
1609
1610     if (cs->singlestep_enabled) {
1611         handle = 1;
1612     } else if (arch_info->status) {
1613         if (nb_hw_breakpoint + nb_hw_watchpoint > 0) {
1614             if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) {
1615                 n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW);
1616                 if (n >= 0) {
1617                     handle = 1;
1618                 }
1619             } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ |
1620                                             KVMPPC_DEBUG_WATCH_WRITE)) {
1621                 n = find_hw_watchpoint(arch_info->address,  &flag);
1622                 if (n >= 0) {
1623                     handle = 1;
1624                     cs->watchpoint_hit = &hw_watchpoint;
1625                     hw_watchpoint.vaddr = hw_debug_points[n].addr;
1626                     hw_watchpoint.flags = flag;
1627                 }
1628             }
1629         }
1630     } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) {
1631         handle = 1;
1632     } else {
1633         /* QEMU is not able to handle debug exception, so inject
1634          * program exception to guest;
1635          * Yes program exception NOT debug exception !!
1636          * When QEMU is using debug resources then debug exception must
1637          * be always set. To achieve this we set MSR_DE and also set
1638          * MSRP_DEP so guest cannot change MSR_DE.
1639          * When emulating debug resource for guest we want guest
1640          * to control MSR_DE (enable/disable debug interrupt on need).
1641          * Supporting both configurations are NOT possible.
1642          * So the result is that we cannot share debug resources
1643          * between QEMU and Guest on BOOKE architecture.
1644          * In the current design QEMU gets the priority over guest,
1645          * this means that if QEMU is using debug resources then guest
1646          * cannot use them;
1647          * For software breakpoint QEMU uses a privileged instruction;
1648          * So there cannot be any reason that we are here for guest
1649          * set debug exception, only possibility is guest executed a
1650          * privileged / illegal instruction and that's why we are
1651          * injecting a program interrupt.
1652          */
1653
1654         cpu_synchronize_state(cs);
1655         /* env->nip is PC, so increment this by 4 to use
1656          * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4.
1657          */
1658         env->nip += 4;
1659         cs->exception_index = POWERPC_EXCP_PROGRAM;
1660         env->error_code = POWERPC_EXCP_INVAL;
1661         ppc_cpu_do_interrupt(cs);
1662     }
1663
1664     return handle;
1665 }
1666
1667 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
1668 {
1669     PowerPCCPU *cpu = POWERPC_CPU(cs);
1670     CPUPPCState *env = &cpu->env;
1671     int ret;
1672
1673     qemu_mutex_lock_iothread();
1674
1675     switch (run->exit_reason) {
1676     case KVM_EXIT_DCR:
1677         if (run->dcr.is_write) {
1678             DPRINTF("handle dcr write\n");
1679             ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data);
1680         } else {
1681             DPRINTF("handle dcr read\n");
1682             ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data);
1683         }
1684         break;
1685     case KVM_EXIT_HLT:
1686         DPRINTF("handle halt\n");
1687         ret = kvmppc_handle_halt(cpu);
1688         break;
1689 #if defined(TARGET_PPC64)
1690     case KVM_EXIT_PAPR_HCALL:
1691         DPRINTF("handle PAPR hypercall\n");
1692         run->papr_hcall.ret = spapr_hypercall(cpu,
1693                                               run->papr_hcall.nr,
1694                                               run->papr_hcall.args);
1695         ret = 0;
1696         break;
1697 #endif
1698     case KVM_EXIT_EPR:
1699         DPRINTF("handle epr\n");
1700         run->epr.epr = ldl_phys(cs->as, env->mpic_iack);
1701         ret = 0;
1702         break;
1703     case KVM_EXIT_WATCHDOG:
1704         DPRINTF("handle watchdog expiry\n");
1705         watchdog_perform_action();
1706         ret = 0;
1707         break;
1708
1709     case KVM_EXIT_DEBUG:
1710         DPRINTF("handle debug exception\n");
1711         if (kvm_handle_debug(cpu, run)) {
1712             ret = EXCP_DEBUG;
1713             break;
1714         }
1715         /* re-enter, this exception was guest-internal */
1716         ret = 0;
1717         break;
1718
1719     default:
1720         fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason);
1721         ret = -1;
1722         break;
1723     }
1724
1725     qemu_mutex_unlock_iothread();
1726     return ret;
1727 }
1728
1729 int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1730 {
1731     CPUState *cs = CPU(cpu);
1732     uint32_t bits = tsr_bits;
1733     struct kvm_one_reg reg = {
1734         .id = KVM_REG_PPC_OR_TSR,
1735         .addr = (uintptr_t) &bits,
1736     };
1737
1738     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1739 }
1740
1741 int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits)
1742 {
1743
1744     CPUState *cs = CPU(cpu);
1745     uint32_t bits = tsr_bits;
1746     struct kvm_one_reg reg = {
1747         .id = KVM_REG_PPC_CLEAR_TSR,
1748         .addr = (uintptr_t) &bits,
1749     };
1750
1751     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1752 }
1753
1754 int kvmppc_set_tcr(PowerPCCPU *cpu)
1755 {
1756     CPUState *cs = CPU(cpu);
1757     CPUPPCState *env = &cpu->env;
1758     uint32_t tcr = env->spr[SPR_BOOKE_TCR];
1759
1760     struct kvm_one_reg reg = {
1761         .id = KVM_REG_PPC_TCR,
1762         .addr = (uintptr_t) &tcr,
1763     };
1764
1765     return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
1766 }
1767
1768 int kvmppc_booke_watchdog_enable(PowerPCCPU *cpu)
1769 {
1770     CPUState *cs = CPU(cpu);
1771     int ret;
1772
1773     if (!kvm_enabled()) {
1774         return -1;
1775     }
1776
1777     if (!cap_ppc_watchdog) {
1778         printf("warning: KVM does not support watchdog");
1779         return -1;
1780     }
1781
1782     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_BOOKE_WATCHDOG, 0);
1783     if (ret < 0) {
1784         fprintf(stderr, "%s: couldn't enable KVM_CAP_PPC_BOOKE_WATCHDOG: %s\n",
1785                 __func__, strerror(-ret));
1786         return ret;
1787     }
1788
1789     return ret;
1790 }
1791
1792 static int read_cpuinfo(const char *field, char *value, int len)
1793 {
1794     FILE *f;
1795     int ret = -1;
1796     int field_len = strlen(field);
1797     char line[512];
1798
1799     f = fopen("/proc/cpuinfo", "r");
1800     if (!f) {
1801         return -1;
1802     }
1803
1804     do {
1805         if (!fgets(line, sizeof(line), f)) {
1806             break;
1807         }
1808         if (!strncmp(line, field, field_len)) {
1809             pstrcpy(value, len, line);
1810             ret = 0;
1811             break;
1812         }
1813     } while(*line);
1814
1815     fclose(f);
1816
1817     return ret;
1818 }
1819
1820 uint32_t kvmppc_get_tbfreq(void)
1821 {
1822     char line[512];
1823     char *ns;
1824     uint32_t retval = NANOSECONDS_PER_SECOND;
1825
1826     if (read_cpuinfo("timebase", line, sizeof(line))) {
1827         return retval;
1828     }
1829
1830     if (!(ns = strchr(line, ':'))) {
1831         return retval;
1832     }
1833
1834     ns++;
1835
1836     return atoi(ns);
1837 }
1838
1839 bool kvmppc_get_host_serial(char **value)
1840 {
1841     return g_file_get_contents("/proc/device-tree/system-id", value, NULL,
1842                                NULL);
1843 }
1844
1845 bool kvmppc_get_host_model(char **value)
1846 {
1847     return g_file_get_contents("/proc/device-tree/model", value, NULL, NULL);
1848 }
1849
1850 /* Try to find a device tree node for a CPU with clock-frequency property */
1851 static int kvmppc_find_cpu_dt(char *buf, int buf_len)
1852 {
1853     struct dirent *dirp;
1854     DIR *dp;
1855
1856     if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) {
1857         printf("Can't open directory " PROC_DEVTREE_CPU "\n");
1858         return -1;
1859     }
1860
1861     buf[0] = '\0';
1862     while ((dirp = readdir(dp)) != NULL) {
1863         FILE *f;
1864         snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU,
1865                  dirp->d_name);
1866         f = fopen(buf, "r");
1867         if (f) {
1868             snprintf(buf, buf_len, "%s%s", PROC_DEVTREE_CPU, dirp->d_name);
1869             fclose(f);
1870             break;
1871         }
1872         buf[0] = '\0';
1873     }
1874     closedir(dp);
1875     if (buf[0] == '\0') {
1876         printf("Unknown host!\n");
1877         return -1;
1878     }
1879
1880     return 0;
1881 }
1882
1883 static uint64_t kvmppc_read_int_dt(const char *filename)
1884 {
1885     union {
1886         uint32_t v32;
1887         uint64_t v64;
1888     } u;
1889     FILE *f;
1890     int len;
1891
1892     f = fopen(filename, "rb");
1893     if (!f) {
1894         return -1;
1895     }
1896
1897     len = fread(&u, 1, sizeof(u), f);
1898     fclose(f);
1899     switch (len) {
1900     case 4:
1901         /* property is a 32-bit quantity */
1902         return be32_to_cpu(u.v32);
1903     case 8:
1904         return be64_to_cpu(u.v64);
1905     }
1906
1907     return 0;
1908 }
1909
1910 /* Read a CPU node property from the host device tree that's a single
1911  * integer (32-bit or 64-bit).  Returns 0 if anything goes wrong
1912  * (can't find or open the property, or doesn't understand the
1913  * format) */
1914 static uint64_t kvmppc_read_int_cpu_dt(const char *propname)
1915 {
1916     char buf[PATH_MAX], *tmp;
1917     uint64_t val;
1918
1919     if (kvmppc_find_cpu_dt(buf, sizeof(buf))) {
1920         return -1;
1921     }
1922
1923     tmp = g_strdup_printf("%s/%s", buf, propname);
1924     val = kvmppc_read_int_dt(tmp);
1925     g_free(tmp);
1926
1927     return val;
1928 }
1929
1930 uint64_t kvmppc_get_clockfreq(void)
1931 {
1932     return kvmppc_read_int_cpu_dt("clock-frequency");
1933 }
1934
1935 uint32_t kvmppc_get_vmx(void)
1936 {
1937     return kvmppc_read_int_cpu_dt("ibm,vmx");
1938 }
1939
1940 uint32_t kvmppc_get_dfp(void)
1941 {
1942     return kvmppc_read_int_cpu_dt("ibm,dfp");
1943 }
1944
1945 static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo)
1946  {
1947      PowerPCCPU *cpu = ppc_env_get_cpu(env);
1948      CPUState *cs = CPU(cpu);
1949
1950     if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) &&
1951         !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) {
1952         return 0;
1953     }
1954
1955     return 1;
1956 }
1957
1958 int kvmppc_get_hasidle(CPUPPCState *env)
1959 {
1960     struct kvm_ppc_pvinfo pvinfo;
1961
1962     if (!kvmppc_get_pvinfo(env, &pvinfo) &&
1963         (pvinfo.flags & KVM_PPC_PVINFO_FLAGS_EV_IDLE)) {
1964         return 1;
1965     }
1966
1967     return 0;
1968 }
1969
1970 int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len)
1971 {
1972     uint32_t *hc = (uint32_t*)buf;
1973     struct kvm_ppc_pvinfo pvinfo;
1974
1975     if (!kvmppc_get_pvinfo(env, &pvinfo)) {
1976         memcpy(buf, pvinfo.hcall, buf_len);
1977         return 0;
1978     }
1979
1980     /*
1981      * Fallback to always fail hypercalls regardless of endianness:
1982      *
1983      *     tdi 0,r0,72 (becomes b .+8 in wrong endian, nop in good endian)
1984      *     li r3, -1
1985      *     b .+8       (becomes nop in wrong endian)
1986      *     bswap32(li r3, -1)
1987      */
1988
1989     hc[0] = cpu_to_be32(0x08000048);
1990     hc[1] = cpu_to_be32(0x3860ffff);
1991     hc[2] = cpu_to_be32(0x48000008);
1992     hc[3] = cpu_to_be32(bswap32(0x3860ffff));
1993
1994     return 1;
1995 }
1996
1997 static inline int kvmppc_enable_hcall(KVMState *s, target_ulong hcall)
1998 {
1999     return kvm_vm_enable_cap(s, KVM_CAP_PPC_ENABLE_HCALL, 0, hcall, 1);
2000 }
2001
2002 void kvmppc_enable_logical_ci_hcalls(void)
2003 {
2004     /*
2005      * FIXME: it would be nice if we could detect the cases where
2006      * we're using a device which requires the in kernel
2007      * implementation of these hcalls, but the kernel lacks them and
2008      * produce a warning.
2009      */
2010     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_LOAD);
2011     kvmppc_enable_hcall(kvm_state, H_LOGICAL_CI_STORE);
2012 }
2013
2014 void kvmppc_enable_set_mode_hcall(void)
2015 {
2016     kvmppc_enable_hcall(kvm_state, H_SET_MODE);
2017 }
2018
2019 void kvmppc_enable_clear_ref_mod_hcalls(void)
2020 {
2021     kvmppc_enable_hcall(kvm_state, H_CLEAR_REF);
2022     kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD);
2023 }
2024
2025 void kvmppc_set_papr(PowerPCCPU *cpu)
2026 {
2027     CPUState *cs = CPU(cpu);
2028     int ret;
2029
2030     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_PAPR, 0);
2031     if (ret) {
2032         error_report("This vCPU type or KVM version does not support PAPR");
2033         exit(1);
2034     }
2035
2036     /* Update the capability flag so we sync the right information
2037      * with kvm */
2038     cap_papr = 1;
2039 }
2040
2041 int kvmppc_set_compat(PowerPCCPU *cpu, uint32_t compat_pvr)
2042 {
2043     return kvm_set_one_reg(CPU(cpu), KVM_REG_PPC_ARCH_COMPAT, &compat_pvr);
2044 }
2045
2046 void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy)
2047 {
2048     CPUState *cs = CPU(cpu);
2049     int ret;
2050
2051     ret = kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_EPR, 0, mpic_proxy);
2052     if (ret && mpic_proxy) {
2053         error_report("This KVM version does not support EPR");
2054         exit(1);
2055     }
2056 }
2057
2058 int kvmppc_smt_threads(void)
2059 {
2060     return cap_ppc_smt ? cap_ppc_smt : 1;
2061 }
2062
2063 #ifdef TARGET_PPC64
2064 off_t kvmppc_alloc_rma(void **rma)
2065 {
2066     off_t size;
2067     int fd;
2068     struct kvm_allocate_rma ret;
2069
2070     /* If cap_ppc_rma == 0, contiguous RMA allocation is not supported
2071      * if cap_ppc_rma == 1, contiguous RMA allocation is supported, but
2072      *                      not necessary on this hardware
2073      * if cap_ppc_rma == 2, contiguous RMA allocation is needed on this hardware
2074      *
2075      * FIXME: We should allow the user to force contiguous RMA
2076      * allocation in the cap_ppc_rma==1 case.
2077      */
2078     if (cap_ppc_rma < 2) {
2079         return 0;
2080     }
2081
2082     fd = kvm_vm_ioctl(kvm_state, KVM_ALLOCATE_RMA, &ret);
2083     if (fd < 0) {
2084         fprintf(stderr, "KVM: Error on KVM_ALLOCATE_RMA: %s\n",
2085                 strerror(errno));
2086         return -1;
2087     }
2088
2089     size = MIN(ret.rma_size, 256ul << 20);
2090
2091     *rma = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2092     if (*rma == MAP_FAILED) {
2093         fprintf(stderr, "KVM: Error mapping RMA: %s\n", strerror(errno));
2094         return -1;
2095     };
2096
2097     return size;
2098 }
2099
2100 uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift)
2101 {
2102     struct kvm_ppc_smmu_info info;
2103     long rampagesize, best_page_shift;
2104     int i;
2105
2106     if (cap_ppc_rma >= 2) {
2107         return current_size;
2108     }
2109
2110     /* Find the largest hardware supported page size that's less than
2111      * or equal to the (logical) backing page size of guest RAM */
2112     kvm_get_smmu_info(POWERPC_CPU(first_cpu), &info);
2113     rampagesize = qemu_getrampagesize();
2114     best_page_shift = 0;
2115
2116     for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) {
2117         struct kvm_ppc_one_seg_page_size *sps = &info.sps[i];
2118
2119         if (!sps->page_shift) {
2120             continue;
2121         }
2122
2123         if ((sps->page_shift > best_page_shift)
2124             && ((1UL << sps->page_shift) <= rampagesize)) {
2125             best_page_shift = sps->page_shift;
2126         }
2127     }
2128
2129     return MIN(current_size,
2130                1ULL << (best_page_shift + hash_shift - 7));
2131 }
2132 #endif
2133
2134 bool kvmppc_spapr_use_multitce(void)
2135 {
2136     return cap_spapr_multitce;
2137 }
2138
2139 void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t window_size, int *pfd,
2140                               bool need_vfio)
2141 {
2142     struct kvm_create_spapr_tce args = {
2143         .liobn = liobn,
2144         .window_size = window_size,
2145     };
2146     long len;
2147     int fd;
2148     void *table;
2149
2150     /* Must set fd to -1 so we don't try to munmap when called for
2151      * destroying the table, which the upper layers -will- do
2152      */
2153     *pfd = -1;
2154     if (!cap_spapr_tce || (need_vfio && !cap_spapr_vfio)) {
2155         return NULL;
2156     }
2157
2158     fd = kvm_vm_ioctl(kvm_state, KVM_CREATE_SPAPR_TCE, &args);
2159     if (fd < 0) {
2160         fprintf(stderr, "KVM: Failed to create TCE table for liobn 0x%x\n",
2161                 liobn);
2162         return NULL;
2163     }
2164
2165     len = (window_size / SPAPR_TCE_PAGE_SIZE) * sizeof(uint64_t);
2166     /* FIXME: round this up to page size */
2167
2168     table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
2169     if (table == MAP_FAILED) {
2170         fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n",
2171                 liobn);
2172         close(fd);
2173         return NULL;
2174     }
2175
2176     *pfd = fd;
2177     return table;
2178 }
2179
2180 int kvmppc_remove_spapr_tce(void *table, int fd, uint32_t nb_table)
2181 {
2182     long len;
2183
2184     if (fd < 0) {
2185         return -1;
2186     }
2187
2188     len = nb_table * sizeof(uint64_t);
2189     if ((munmap(table, len) < 0) ||
2190         (close(fd) < 0)) {
2191         fprintf(stderr, "KVM: Unexpected error removing TCE table: %s",
2192                 strerror(errno));
2193         /* Leak the table */
2194     }
2195
2196     return 0;
2197 }
2198
2199 int kvmppc_reset_htab(int shift_hint)
2200 {
2201     uint32_t shift = shift_hint;
2202
2203     if (!kvm_enabled()) {
2204         /* Full emulation, tell caller to allocate htab itself */
2205         return 0;
2206     }
2207     if (kvm_check_extension(kvm_state, KVM_CAP_PPC_ALLOC_HTAB)) {
2208         int ret;
2209         ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift);
2210         if (ret == -ENOTTY) {
2211             /* At least some versions of PR KVM advertise the
2212              * capability, but don't implement the ioctl().  Oops.
2213              * Return 0 so that we allocate the htab in qemu, as is
2214              * correct for PR. */
2215             return 0;
2216         } else if (ret < 0) {
2217             return ret;
2218         }
2219         return shift;
2220     }
2221
2222     /* We have a kernel that predates the htab reset calls.  For PR
2223      * KVM, we need to allocate the htab ourselves, for an HV KVM of
2224      * this era, it has allocated a 16MB fixed size hash table already. */
2225     if (kvmppc_is_pr(kvm_state)) {
2226         /* PR - tell caller to allocate htab */
2227         return 0;
2228     } else {
2229         /* HV - assume 16MB kernel allocated htab */
2230         return 24;
2231     }
2232 }
2233
2234 static inline uint32_t mfpvr(void)
2235 {
2236     uint32_t pvr;
2237
2238     asm ("mfpvr %0"
2239          : "=r"(pvr));
2240     return pvr;
2241 }
2242
2243 static void alter_insns(uint64_t *word, uint64_t flags, bool on)
2244 {
2245     if (on) {
2246         *word |= flags;
2247     } else {
2248         *word &= ~flags;
2249     }
2250 }
2251
2252 static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data)
2253 {
2254     PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
2255     uint32_t vmx = kvmppc_get_vmx();
2256     uint32_t dfp = kvmppc_get_dfp();
2257     uint32_t dcache_size = kvmppc_read_int_cpu_dt("d-cache-size");
2258     uint32_t icache_size = kvmppc_read_int_cpu_dt("i-cache-size");
2259
2260     /* Now fix up the class with information we can query from the host */
2261     pcc->pvr = mfpvr();
2262
2263     if (vmx != -1) {
2264         /* Only override when we know what the host supports */
2265         alter_insns(&pcc->insns_flags, PPC_ALTIVEC, vmx > 0);
2266         alter_insns(&pcc->insns_flags2, PPC2_VSX, vmx > 1);
2267     }
2268     if (dfp != -1) {
2269         /* Only override when we know what the host supports */
2270         alter_insns(&pcc->insns_flags2, PPC2_DFP, dfp);
2271     }
2272
2273     if (dcache_size != -1) {
2274         pcc->l1_dcache_size = dcache_size;
2275     }
2276
2277     if (icache_size != -1) {
2278         pcc->l1_icache_size = icache_size;
2279     }
2280 }
2281
2282 bool kvmppc_has_cap_epr(void)
2283 {
2284     return cap_epr;
2285 }
2286
2287 bool kvmppc_has_cap_htab_fd(void)
2288 {
2289     return cap_htab_fd;
2290 }
2291
2292 bool kvmppc_has_cap_fixup_hcalls(void)
2293 {
2294     return cap_fixup_hcalls;
2295 }
2296
2297 bool kvmppc_has_cap_htm(void)
2298 {
2299     return cap_htm;
2300 }
2301
2302 static PowerPCCPUClass *ppc_cpu_get_family_class(PowerPCCPUClass *pcc)
2303 {
2304     ObjectClass *oc = OBJECT_CLASS(pcc);
2305
2306     while (oc && !object_class_is_abstract(oc)) {
2307         oc = object_class_get_parent(oc);
2308     }
2309     assert(oc);
2310
2311     return POWERPC_CPU_CLASS(oc);
2312 }
2313
2314 PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void)
2315 {
2316     uint32_t host_pvr = mfpvr();
2317     PowerPCCPUClass *pvr_pcc;
2318
2319     pvr_pcc = ppc_cpu_class_by_pvr(host_pvr);
2320     if (pvr_pcc == NULL) {
2321         pvr_pcc = ppc_cpu_class_by_pvr_mask(host_pvr);
2322     }
2323
2324     return pvr_pcc;
2325 }
2326
2327 static int kvm_ppc_register_host_cpu_type(void)
2328 {
2329     TypeInfo type_info = {
2330         .name = TYPE_HOST_POWERPC_CPU,
2331         .class_init = kvmppc_host_cpu_class_init,
2332     };
2333     PowerPCCPUClass *pvr_pcc;
2334     DeviceClass *dc;
2335     int i;
2336
2337     pvr_pcc = kvm_ppc_get_host_cpu_class();
2338     if (pvr_pcc == NULL) {
2339         return -1;
2340     }
2341     type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc));
2342     type_register(&type_info);
2343
2344 #if defined(TARGET_PPC64)
2345     type_info.name = g_strdup_printf("%s-"TYPE_SPAPR_CPU_CORE, "host");
2346     type_info.parent = TYPE_SPAPR_CPU_CORE,
2347     type_info.instance_size = sizeof(sPAPRCPUCore);
2348     type_info.instance_init = NULL;
2349     type_info.class_init = spapr_cpu_core_class_init;
2350     type_info.class_data = (void *) "host";
2351     type_register(&type_info);
2352     g_free((void *)type_info.name);
2353 #endif
2354
2355     /*
2356      * Update generic CPU family class alias (e.g. on a POWER8NVL host,
2357      * we want "POWER8" to be a "family" alias that points to the current
2358      * host CPU type, too)
2359      */
2360     dc = DEVICE_CLASS(ppc_cpu_get_family_class(pvr_pcc));
2361     for (i = 0; ppc_cpu_aliases[i].alias != NULL; i++) {
2362         if (strcmp(ppc_cpu_aliases[i].alias, dc->desc) == 0) {
2363             ObjectClass *oc = OBJECT_CLASS(pvr_pcc);
2364             char *suffix;
2365
2366             ppc_cpu_aliases[i].model = g_strdup(object_class_get_name(oc));
2367             suffix = strstr(ppc_cpu_aliases[i].model, "-"TYPE_POWERPC_CPU);
2368             if (suffix) {
2369                 *suffix = 0;
2370             }
2371             ppc_cpu_aliases[i].oc = oc;
2372             break;
2373         }
2374     }
2375
2376     return 0;
2377 }
2378
2379 int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function)
2380 {
2381     struct kvm_rtas_token_args args = {
2382         .token = token,
2383     };
2384
2385     if (!kvm_check_extension(kvm_state, KVM_CAP_PPC_RTAS)) {
2386         return -ENOENT;
2387     }
2388
2389     strncpy(args.name, function, sizeof(args.name));
2390
2391     return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args);
2392 }
2393
2394 int kvmppc_get_htab_fd(bool write)
2395 {
2396     struct kvm_get_htab_fd s = {
2397         .flags = write ? KVM_GET_HTAB_WRITE : 0,
2398         .start_index = 0,
2399     };
2400
2401     if (!cap_htab_fd) {
2402         fprintf(stderr, "KVM version doesn't support saving the hash table\n");
2403         return -1;
2404     }
2405
2406     return kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &s);
2407 }
2408
2409 int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns)
2410 {
2411     int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
2412     uint8_t buf[bufsize];
2413     ssize_t rc;
2414
2415     do {
2416         rc = read(fd, buf, bufsize);
2417         if (rc < 0) {
2418             fprintf(stderr, "Error reading data from KVM HTAB fd: %s\n",
2419                     strerror(errno));
2420             return rc;
2421         } else if (rc) {
2422             uint8_t *buffer = buf;
2423             ssize_t n = rc;
2424             while (n) {
2425                 struct kvm_get_htab_header *head =
2426                     (struct kvm_get_htab_header *) buffer;
2427                 size_t chunksize = sizeof(*head) +
2428                      HASH_PTE_SIZE_64 * head->n_valid;
2429
2430                 qemu_put_be32(f, head->index);
2431                 qemu_put_be16(f, head->n_valid);
2432                 qemu_put_be16(f, head->n_invalid);
2433                 qemu_put_buffer(f, (void *)(head + 1),
2434                                 HASH_PTE_SIZE_64 * head->n_valid);
2435
2436                 buffer += chunksize;
2437                 n -= chunksize;
2438             }
2439         }
2440     } while ((rc != 0)
2441              && ((max_ns < 0)
2442                  || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns)));
2443
2444     return (rc == 0) ? 1 : 0;
2445 }
2446
2447 int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index,
2448                            uint16_t n_valid, uint16_t n_invalid)
2449 {
2450     struct kvm_get_htab_header *buf;
2451     size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64;
2452     ssize_t rc;
2453
2454     buf = alloca(chunksize);
2455     buf->index = index;
2456     buf->n_valid = n_valid;
2457     buf->n_invalid = n_invalid;
2458
2459     qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid);
2460
2461     rc = write(fd, buf, chunksize);
2462     if (rc < 0) {
2463         fprintf(stderr, "Error writing KVM hash table: %s\n",
2464                 strerror(errno));
2465         return rc;
2466     }
2467     if (rc != chunksize) {
2468         /* We should never get a short write on a single chunk */
2469         fprintf(stderr, "Short write, restoring KVM hash table\n");
2470         return -1;
2471     }
2472     return 0;
2473 }
2474
2475 bool kvm_arch_stop_on_emulation_error(CPUState *cpu)
2476 {
2477     return true;
2478 }
2479
2480 void kvm_arch_init_irq_routing(KVMState *s)
2481 {
2482 }
2483
2484 void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n)
2485 {
2486     struct kvm_get_htab_fd ghf = {
2487         .flags = 0,
2488         .start_index = ptex,
2489     };
2490     int fd, rc;
2491     int i;
2492
2493     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2494     if (fd < 0) {
2495         hw_error("kvmppc_read_hptes: Unable to open HPT fd");
2496     }
2497
2498     i = 0;
2499     while (i < n) {
2500         struct kvm_get_htab_header *hdr;
2501         int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP;
2502         char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64];
2503
2504         rc = read(fd, buf, sizeof(buf));
2505         if (rc < 0) {
2506             hw_error("kvmppc_read_hptes: Unable to read HPTEs");
2507         }
2508
2509         hdr = (struct kvm_get_htab_header *)buf;
2510         while ((i < n) && ((char *)hdr < (buf + rc))) {
2511             int invalid = hdr->n_invalid;
2512
2513             if (hdr->index != (ptex + i)) {
2514                 hw_error("kvmppc_read_hptes: Unexpected HPTE index %"PRIu32
2515                          " != (%"HWADDR_PRIu" + %d", hdr->index, ptex, i);
2516             }
2517
2518             memcpy(hptes + i, hdr + 1, HASH_PTE_SIZE_64 * hdr->n_valid);
2519             i += hdr->n_valid;
2520
2521             if ((n - i) < invalid) {
2522                 invalid = n - i;
2523             }
2524             memset(hptes + i, 0, invalid * HASH_PTE_SIZE_64);
2525             i += hdr->n_invalid;
2526
2527             hdr = (struct kvm_get_htab_header *)
2528                 ((char *)(hdr + 1) + HASH_PTE_SIZE_64 * hdr->n_valid);
2529         }
2530     }
2531
2532     close(fd);
2533 }
2534
2535 void kvmppc_write_hpte(hwaddr ptex, uint64_t pte0, uint64_t pte1)
2536 {
2537     int fd, rc;
2538     struct kvm_get_htab_fd ghf;
2539     struct {
2540         struct kvm_get_htab_header hdr;
2541         uint64_t pte0;
2542         uint64_t pte1;
2543     } buf;
2544
2545     ghf.flags = 0;
2546     ghf.start_index = 0;     /* Ignored */
2547     fd = kvm_vm_ioctl(kvm_state, KVM_PPC_GET_HTAB_FD, &ghf);
2548     if (fd < 0) {
2549         hw_error("kvmppc_write_hpte: Unable to open HPT fd");
2550     }
2551
2552     buf.hdr.n_valid = 1;
2553     buf.hdr.n_invalid = 0;
2554     buf.hdr.index = ptex;
2555     buf.pte0 = cpu_to_be64(pte0);
2556     buf.pte1 = cpu_to_be64(pte1);
2557
2558     rc = write(fd, &buf, sizeof(buf));
2559     if (rc != sizeof(buf)) {
2560         hw_error("kvmppc_write_hpte: Unable to update KVM HPT");
2561     }
2562     close(fd);
2563 }
2564
2565 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
2566                              uint64_t address, uint32_t data, PCIDevice *dev)
2567 {
2568     return 0;
2569 }
2570
2571 int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
2572                                 int vector, PCIDevice *dev)
2573 {
2574     return 0;
2575 }
2576
2577 int kvm_arch_release_virq_post(int virq)
2578 {
2579     return 0;
2580 }
2581
2582 int kvm_arch_msi_data_to_gsi(uint32_t data)
2583 {
2584     return data & 0xffff;
2585 }
2586
2587 int kvmppc_enable_hwrng(void)
2588 {
2589     if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) {
2590         return -1;
2591     }
2592
2593     return kvmppc_enable_hcall(kvm_state, H_RANDOM);
2594 }