target/i386/kvm/xen-emu.c

   1 /*
   2  * Xen HVM emulation support in KVM
   3  *
   4  * Copyright © 2019 Oracle and/or its affiliates. All rights reserved.
   5  * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
   6  *
   7  * This work is licensed under the terms of the GNU GPL, version 2 or later.
   8  * See the COPYING file in the top-level directory.
   9  *
  10  */
  11
  12 #include "qemu/osdep.h"
  13 #include "qemu/log.h"
  14 #include "qemu/main-loop.h"
  15 #include "qemu/error-report.h"
  16 #include "hw/xen/xen.h"
  17 #include "sysemu/kvm_int.h"
  18 #include "sysemu/kvm_xen.h"
  19 #include "kvm/kvm_i386.h"
  20 #include "exec/address-spaces.h"
  21 #include "xen-emu.h"
  22 #include "trace.h"
  23 #include "sysemu/runstate.h"
  24
  25 #include "hw/pci/msi.h"
  26 #include "hw/i386/apic-msidef.h"
  27 #include "hw/i386/e820_memory_layout.h"
  28 #include "hw/i386/kvm/xen_overlay.h"
  29 #include "hw/i386/kvm/xen_evtchn.h"
  30 #include "hw/i386/kvm/xen_gnttab.h"
  31 #include "hw/i386/kvm/xen_primary_console.h"
  32 #include "hw/i386/kvm/xen_xenstore.h"
  33
  34 #include "hw/xen/interface/version.h"
  35 #include "hw/xen/interface/sched.h"
  36 #include "hw/xen/interface/memory.h"
  37 #include "hw/xen/interface/hvm/hvm_op.h"
  38 #include "hw/xen/interface/hvm/params.h"
  39 #include "hw/xen/interface/vcpu.h"
  40 #include "hw/xen/interface/event_channel.h"
  41 #include "hw/xen/interface/grant_table.h"
  42
  43 #include "xen-compat.h"
  44
  45 static void xen_vcpu_singleshot_timer_event(void *opaque);
  46 static void xen_vcpu_periodic_timer_event(void *opaque);
  47 static int vcpuop_stop_singleshot_timer(CPUState *cs);
  48
  49 #ifdef TARGET_X86_64
  50 #define hypercall_compat32(longmode) (!(longmode))
  51 #else
  52 #define hypercall_compat32(longmode) (false)
  53 #endif
  54
  55 static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa,
  56                            size_t *len, bool is_write)
  57 {
  58         struct kvm_translation tr = {
  59             .linear_address = gva,
  60         };
  61
  62         if (len) {
  63             *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK);
  64         }
  65
  66         if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid ||
  67             (is_write && !tr.writeable)) {
  68             return false;
  69         }
  70         *gpa = tr.physical_address;
  71         return true;
  72 }
  73
  74 static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz,
  75                       bool is_write)
  76 {
  77     uint8_t *buf = (uint8_t *)_buf;
  78     uint64_t gpa;
  79     size_t len;
  80
  81     while (sz) {
  82         if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) {
  83             return -EFAULT;
  84         }
  85         if (len > sz) {
  86             len = sz;
  87         }
  88
  89         cpu_physical_memory_rw(gpa, buf, len, is_write);
  90
  91         buf += len;
  92         sz -= len;
  93         gva += len;
  94     }
  95
  96     return 0;
  97 }
  98
  99 static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf,
 100                                     size_t sz)
 101 {
 102     return kvm_gva_rw(cs, gva, buf, sz, false);
 103 }
 104
 105 static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf,
 106                                   size_t sz)
 107 {
 108     return kvm_gva_rw(cs, gva, buf, sz, true);
 109 }
 110
 111 int kvm_xen_init(KVMState *s, uint32_t hypercall_msr)
 112 {
 113     const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
 114         KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO;
 115     struct kvm_xen_hvm_config cfg = {
 116         .msr = hypercall_msr,
 117         .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
 118     };
 119     int xen_caps, ret;
 120
 121     xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM);
 122     if (required_caps & ~xen_caps) {
 123         error_report("kvm: Xen HVM guest support not present or insufficient");
 124         return -ENOSYS;
 125     }
 126
 127     if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) {
 128         struct kvm_xen_hvm_attr ha = {
 129             .type = KVM_XEN_ATTR_TYPE_XEN_VERSION,
 130             .u.xen_version = s->xen_version,
 131         };
 132         (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha);
 133
 134         cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
 135     }
 136
 137     ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg);
 138     if (ret < 0) {
 139         error_report("kvm: Failed to enable Xen HVM support: %s",
 140                      strerror(-ret));
 141         return ret;
 142     }
 143
 144     /* If called a second time, don't repeat the rest of the setup. */
 145     if (s->xen_caps) {
 146         return 0;
 147     }
 148
 149     /*
 150      * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info
 151      * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared.
 152      *
 153      * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows
 154      * such things to be polled at precisely the right time. We *could* do
 155      * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at
 156      * the moment the IRQ is acked, and see if it should be reasserted.
 157      *
 158      * But the in-kernel irqchip is deprecated, so we're unlikely to add
 159      * that support in the kernel. Insist on using the split irqchip mode
 160      * instead.
 161      *
 162      * This leaves us polling for the level going low in QEMU, which lacks
 163      * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a
 164      * spurious 'ack' to an INTX IRQ every time there's any MMIO access to
 165      * the device (for which it has to unmap the device and trap access, for
 166      * some period after an IRQ!!). In the Xen case, we do it on exit from
 167      * KVM_RUN, if the flag is set to say that the GSI is currently asserted.
 168      * Which is kind of icky, but less so than the VFIO one. I may fix them
 169      * both later...
 170      */
 171     if (!kvm_kernel_irqchip_split()) {
 172         error_report("kvm: Xen support requires kernel-irqchip=split");
 173         return -EINVAL;
 174     }
 175
 176     s->xen_caps = xen_caps;
 177
 178     /* Tell fw_cfg to notify the BIOS to reserve the range. */
 179     e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE, E820_RESERVED);
 180
 181     /* The pages couldn't be overlaid until KVM was initialized */
 182     xen_primary_console_reset();
 183     xen_xenstore_reset();
 184
 185     return 0;
 186 }
 187
 188 int kvm_xen_init_vcpu(CPUState *cs)
 189 {
 190     X86CPU *cpu = X86_CPU(cs);
 191     CPUX86State *env = &cpu->env;
 192     int err;
 193
 194     /*
 195      * The kernel needs to know the Xen/ACPI vCPU ID because that's
 196      * what the guest uses in hypercalls such as timers. It doesn't
 197      * match the APIC ID which is generally used for talking to the
 198      * kernel about vCPUs. And if vCPU threads race with creating
 199      * their KVM vCPUs out of order, it doesn't necessarily match
 200      * with the kernel's internal vCPU indices either.
 201      */
 202     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 203         struct kvm_xen_vcpu_attr va = {
 204             .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID,
 205             .u.vcpu_id = cs->cpu_index,
 206         };
 207         err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
 208         if (err) {
 209             error_report("kvm: Failed to set Xen vCPU ID attribute: %s",
 210                          strerror(-err));
 211             return err;
 212         }
 213     }
 214
 215     env->xen_vcpu_info_gpa = INVALID_GPA;
 216     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 217     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 218     env->xen_vcpu_runstate_gpa = INVALID_GPA;
 219
 220     qemu_mutex_init(&env->xen_timers_lock);
 221     env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 222                                              xen_vcpu_singleshot_timer_event,
 223                                              cpu);
 224     if (!env->xen_singleshot_timer) {
 225         return -ENOMEM;
 226     }
 227     env->xen_singleshot_timer->opaque = cs;
 228
 229     env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
 230                                            xen_vcpu_periodic_timer_event,
 231                                            cpu);
 232     if (!env->xen_periodic_timer) {
 233         return -ENOMEM;
 234     }
 235     env->xen_periodic_timer->opaque = cs;
 236
 237     return 0;
 238 }
 239
 240 uint32_t kvm_xen_get_caps(void)
 241 {
 242     return kvm_state->xen_caps;
 243 }
 244
 245 static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu,
 246                                      int cmd, uint64_t arg)
 247 {
 248     int err = 0;
 249
 250     switch (cmd) {
 251     case XENVER_get_features: {
 252         struct xen_feature_info fi;
 253
 254         /* No need for 32/64 compat handling */
 255         qemu_build_assert(sizeof(fi) == 8);
 256
 257         err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi));
 258         if (err) {
 259             break;
 260         }
 261
 262         fi.submap = 0;
 263         if (fi.submap_idx == 0) {
 264             fi.submap |= 1 << XENFEAT_writable_page_tables |
 265                          1 << XENFEAT_writable_descriptor_tables |
 266                          1 << XENFEAT_auto_translated_physmap |
 267                          1 << XENFEAT_hvm_callback_vector |
 268                          1 << XENFEAT_hvm_safe_pvclock |
 269                          1 << XENFEAT_hvm_pirqs;
 270         }
 271
 272         err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi));
 273         break;
 274     }
 275
 276     default:
 277         return false;
 278     }
 279
 280     exit->u.hcall.result = err;
 281     return true;
 282 }
 283
 284 static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa)
 285 {
 286     struct kvm_xen_vcpu_attr xhsi;
 287
 288     xhsi.type = type;
 289     xhsi.u.gpa = gpa;
 290
 291     trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa);
 292
 293     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi);
 294 }
 295
 296 static int kvm_xen_set_vcpu_callback_vector(CPUState *cs)
 297 {
 298     uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
 299     struct kvm_xen_vcpu_attr xva;
 300
 301     xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR;
 302     xva.u.vector = vector;
 303
 304     trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector);
 305
 306     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva);
 307 }
 308
 309 static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data)
 310 {
 311     X86CPU *cpu = X86_CPU(cs);
 312     CPUX86State *env = &cpu->env;
 313
 314     env->xen_vcpu_callback_vector = data.host_int;
 315
 316     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 317         kvm_xen_set_vcpu_callback_vector(cs);
 318     }
 319 }
 320
 321 static int set_vcpu_info(CPUState *cs, uint64_t gpa)
 322 {
 323     X86CPU *cpu = X86_CPU(cs);
 324     CPUX86State *env = &cpu->env;
 325     MemoryRegionSection mrs = { .mr = NULL };
 326     void *vcpu_info_hva = NULL;
 327     int ret;
 328
 329     ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa);
 330     if (ret || gpa == INVALID_GPA) {
 331         goto out;
 332     }
 333
 334     mrs = memory_region_find(get_system_memory(), gpa,
 335                              sizeof(struct vcpu_info));
 336     if (mrs.mr && mrs.mr->ram_block &&
 337         !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
 338         vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block,
 339                                          mrs.offset_within_region);
 340     }
 341     if (!vcpu_info_hva) {
 342         if (mrs.mr) {
 343             memory_region_unref(mrs.mr);
 344             mrs.mr = NULL;
 345         }
 346         ret = -EINVAL;
 347     }
 348
 349  out:
 350     if (env->xen_vcpu_info_mr) {
 351         memory_region_unref(env->xen_vcpu_info_mr);
 352     }
 353     env->xen_vcpu_info_hva = vcpu_info_hva;
 354     env->xen_vcpu_info_mr = mrs.mr;
 355     return ret;
 356 }
 357
 358 static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data)
 359 {
 360     X86CPU *cpu = X86_CPU(cs);
 361     CPUX86State *env = &cpu->env;
 362
 363     env->xen_vcpu_info_default_gpa = data.host_ulong;
 364
 365     /* Changing the default does nothing if a vcpu_info was explicitly set. */
 366     if (env->xen_vcpu_info_gpa == INVALID_GPA) {
 367         set_vcpu_info(cs, env->xen_vcpu_info_default_gpa);
 368     }
 369 }
 370
 371 static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data)
 372 {
 373     X86CPU *cpu = X86_CPU(cs);
 374     CPUX86State *env = &cpu->env;
 375
 376     env->xen_vcpu_info_gpa = data.host_ulong;
 377
 378     set_vcpu_info(cs, env->xen_vcpu_info_gpa);
 379 }
 380
 381 void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id)
 382 {
 383     CPUState *cs = qemu_get_cpu(vcpu_id);
 384     if (!cs) {
 385         return NULL;
 386     }
 387
 388     return X86_CPU(cs)->env.xen_vcpu_info_hva;
 389 }
 390
 391 void kvm_xen_maybe_deassert_callback(CPUState *cs)
 392 {
 393     CPUX86State *env = &X86_CPU(cs)->env;
 394     struct vcpu_info *vi = env->xen_vcpu_info_hva;
 395     if (!vi) {
 396         return;
 397     }
 398
 399     /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */
 400     if (!vi->evtchn_upcall_pending) {
 401         bql_lock();
 402         /*
 403          * Check again now we have the lock, because it may have been
 404          * asserted in the interim. And we don't want to take the lock
 405          * every time because this is a fast path.
 406          */
 407         if (!vi->evtchn_upcall_pending) {
 408             X86_CPU(cs)->env.xen_callback_asserted = false;
 409             xen_evtchn_set_callback_level(0);
 410         }
 411         bql_unlock();
 412     }
 413 }
 414
 415 void kvm_xen_set_callback_asserted(void)
 416 {
 417     CPUState *cs = qemu_get_cpu(0);
 418
 419     if (cs) {
 420         X86_CPU(cs)->env.xen_callback_asserted = true;
 421     }
 422 }
 423
 424 bool kvm_xen_has_vcpu_callback_vector(void)
 425 {
 426     CPUState *cs = qemu_get_cpu(0);
 427
 428     return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector;
 429 }
 430
 431 void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type)
 432 {
 433     CPUState *cs = qemu_get_cpu(vcpu_id);
 434     uint8_t vector;
 435
 436     if (!cs) {
 437         return;
 438     }
 439
 440     vector = X86_CPU(cs)->env.xen_vcpu_callback_vector;
 441     if (vector) {
 442         /*
 443          * The per-vCPU callback vector injected via lapic. Just
 444          * deliver it as an MSI.
 445          */
 446         MSIMessage msg = {
 447             .address = APIC_DEFAULT_ADDRESS |
 448                        (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT),
 449             .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT),
 450         };
 451         kvm_irqchip_send_msi(kvm_state, msg);
 452         return;
 453     }
 454
 455     switch (type) {
 456     case HVM_PARAM_CALLBACK_TYPE_VECTOR:
 457         /*
 458          * If the evtchn_upcall_pending field in the vcpu_info is set, then
 459          * KVM will automatically deliver the vector on entering the vCPU
 460          * so all we have to do is kick it out.
 461          */
 462         qemu_cpu_kick(cs);
 463         break;
 464
 465     case HVM_PARAM_CALLBACK_TYPE_GSI:
 466     case HVM_PARAM_CALLBACK_TYPE_PCI_INTX:
 467         if (vcpu_id == 0) {
 468             xen_evtchn_set_callback_level(1);
 469         }
 470         break;
 471     }
 472 }
 473
 474 /* Must always be called with xen_timers_lock held */
 475 static int kvm_xen_set_vcpu_timer(CPUState *cs)
 476 {
 477     X86CPU *cpu = X86_CPU(cs);
 478     CPUX86State *env = &cpu->env;
 479
 480     struct kvm_xen_vcpu_attr va = {
 481         .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
 482         .u.timer.port = env->xen_virq[VIRQ_TIMER],
 483         .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
 484         .u.timer.expires_ns = env->xen_singleshot_timer_ns,
 485     };
 486
 487     return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va);
 488 }
 489
 490 static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data)
 491 {
 492     QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
 493     kvm_xen_set_vcpu_timer(cs);
 494 }
 495
 496 int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port)
 497 {
 498     CPUState *cs = qemu_get_cpu(vcpu_id);
 499
 500     if (!cs) {
 501         return -ENOENT;
 502     }
 503
 504     /* cpu.h doesn't include the actual Xen header. */
 505     qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS);
 506
 507     if (virq >= NR_VIRQS) {
 508         return -EINVAL;
 509     }
 510
 511     if (port && X86_CPU(cs)->env.xen_virq[virq]) {
 512         return -EEXIST;
 513     }
 514
 515     X86_CPU(cs)->env.xen_virq[virq] = port;
 516     if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) {
 517         async_run_on_cpu(cs, do_set_vcpu_timer_virq,
 518                          RUN_ON_CPU_HOST_INT(port));
 519     }
 520     return 0;
 521 }
 522
 523 static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data)
 524 {
 525     X86CPU *cpu = X86_CPU(cs);
 526     CPUX86State *env = &cpu->env;
 527
 528     env->xen_vcpu_time_info_gpa = data.host_ulong;
 529
 530     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 531                           env->xen_vcpu_time_info_gpa);
 532 }
 533
 534 static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data)
 535 {
 536     X86CPU *cpu = X86_CPU(cs);
 537     CPUX86State *env = &cpu->env;
 538
 539     env->xen_vcpu_runstate_gpa = data.host_ulong;
 540
 541     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 542                           env->xen_vcpu_runstate_gpa);
 543 }
 544
 545 static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data)
 546 {
 547     X86CPU *cpu = X86_CPU(cs);
 548     CPUX86State *env = &cpu->env;
 549
 550     env->xen_vcpu_info_gpa = INVALID_GPA;
 551     env->xen_vcpu_info_default_gpa = INVALID_GPA;
 552     env->xen_vcpu_time_info_gpa = INVALID_GPA;
 553     env->xen_vcpu_runstate_gpa = INVALID_GPA;
 554     env->xen_vcpu_callback_vector = 0;
 555     memset(env->xen_virq, 0, sizeof(env->xen_virq));
 556
 557     set_vcpu_info(cs, INVALID_GPA);
 558     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
 559                           INVALID_GPA);
 560     kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
 561                           INVALID_GPA);
 562     if (kvm_xen_has_cap(EVTCHN_SEND)) {
 563         kvm_xen_set_vcpu_callback_vector(cs);
 564
 565         QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
 566         env->xen_singleshot_timer_ns = 0;
 567         kvm_xen_set_vcpu_timer(cs);
 568     } else {
 569         vcpuop_stop_singleshot_timer(cs);
 570     };
 571
 572 }
 573
 574 static int xen_set_shared_info(uint64_t gfn)
 575 {
 576     uint64_t gpa = gfn << TARGET_PAGE_BITS;
 577     int i, err;
 578
 579     BQL_LOCK_GUARD();
 580
 581     /*
 582      * The xen_overlay device tells KVM about it too, since it had to
 583      * do that on migration load anyway (unless we're going to jump
 584      * through lots of hoops to maintain the fiction that this isn't
 585      * KVM-specific.
 586      */
 587     err = xen_overlay_map_shinfo_page(gpa);
 588     if (err) {
 589             return err;
 590     }
 591
 592     trace_kvm_xen_set_shared_info(gfn);
 593
 594     for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) {
 595         CPUState *cpu = qemu_get_cpu(i);
 596         if (cpu) {
 597             async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa,
 598                              RUN_ON_CPU_HOST_ULONG(gpa));
 599         }
 600         gpa += sizeof(vcpu_info_t);
 601     }
 602
 603     return err;
 604 }
 605
 606 static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn)
 607 {
 608     switch (space) {
 609     case XENMAPSPACE_shared_info:
 610         if (idx > 0) {
 611             return -EINVAL;
 612         }
 613         return xen_set_shared_info(gfn);
 614
 615     case XENMAPSPACE_grant_table:
 616         return xen_gnttab_map_page(idx, gfn);
 617
 618     case XENMAPSPACE_gmfn:
 619     case XENMAPSPACE_gmfn_range:
 620         return -ENOTSUP;
 621
 622     case XENMAPSPACE_gmfn_foreign:
 623     case XENMAPSPACE_dev_mmio:
 624         return -EPERM;
 625
 626     default:
 627         return -EINVAL;
 628     }
 629 }
 630
 631 static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu,
 632                              uint64_t arg)
 633 {
 634     struct xen_add_to_physmap xatp;
 635     CPUState *cs = CPU(cpu);
 636
 637     if (hypercall_compat32(exit->u.hcall.longmode)) {
 638         struct compat_xen_add_to_physmap xatp32;
 639
 640         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16);
 641         if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) {
 642             return -EFAULT;
 643         }
 644         xatp.domid = xatp32.domid;
 645         xatp.size = xatp32.size;
 646         xatp.space = xatp32.space;
 647         xatp.idx = xatp32.idx;
 648         xatp.gpfn = xatp32.gpfn;
 649     } else {
 650         if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) {
 651             return -EFAULT;
 652         }
 653     }
 654
 655     if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) {
 656         return -ESRCH;
 657     }
 658
 659     return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn);
 660 }
 661
 662 static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu,
 663                                    uint64_t arg)
 664 {
 665     struct xen_add_to_physmap_batch xatpb;
 666     unsigned long idxs_gva, gpfns_gva, errs_gva;
 667     CPUState *cs = CPU(cpu);
 668     size_t op_sz;
 669
 670     if (hypercall_compat32(exit->u.hcall.longmode)) {
 671         struct compat_xen_add_to_physmap_batch xatpb32;
 672
 673         qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20);
 674         if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) {
 675             return -EFAULT;
 676         }
 677         xatpb.domid = xatpb32.domid;
 678         xatpb.space = xatpb32.space;
 679         xatpb.size = xatpb32.size;
 680
 681         idxs_gva = xatpb32.idxs.c;
 682         gpfns_gva = xatpb32.gpfns.c;
 683         errs_gva = xatpb32.errs.c;
 684         op_sz = sizeof(uint32_t);
 685     } else {
 686         if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) {
 687             return -EFAULT;
 688         }
 689         op_sz = sizeof(unsigned long);
 690         idxs_gva = (unsigned long)xatpb.idxs.p;
 691         gpfns_gva = (unsigned long)xatpb.gpfns.p;
 692         errs_gva = (unsigned long)xatpb.errs.p;
 693     }
 694
 695     if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) {
 696         return -ESRCH;
 697     }
 698
 699     /* Explicitly invalid for the batch op. Not that we implement it anyway. */
 700     if (xatpb.space == XENMAPSPACE_gmfn_range) {
 701         return -EINVAL;
 702     }
 703
 704     while (xatpb.size--) {
 705         unsigned long idx = 0;
 706         unsigned long gpfn = 0;
 707         int err;
 708
 709         /* For 32-bit compat this only copies the low 32 bits of each */
 710         if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) ||
 711             kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) {
 712             return -EFAULT;
 713         }
 714         idxs_gva += op_sz;
 715         gpfns_gva += op_sz;
 716
 717         err = add_to_physmap_one(xatpb.space, idx, gpfn);
 718
 719         if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) {
 720             return -EFAULT;
 721         }
 722         errs_gva += sizeof(err);
 723     }
 724     return 0;
 725 }
 726
 727 static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 728                                    int cmd, uint64_t arg)
 729 {
 730     int err;
 731
 732     switch (cmd) {
 733     case XENMEM_add_to_physmap:
 734         err = do_add_to_physmap(exit, cpu, arg);
 735         break;
 736
 737     case XENMEM_add_to_physmap_batch:
 738         err = do_add_to_physmap_batch(exit, cpu, arg);
 739         break;
 740
 741     default:
 742         return false;
 743     }
 744
 745     exit->u.hcall.result = err;
 746     return true;
 747 }
 748
 749 static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu,
 750                              uint64_t arg)
 751 {
 752     CPUState *cs = CPU(cpu);
 753     struct xen_hvm_param hp;
 754     int err = 0;
 755
 756     /* No need for 32/64 compat handling */
 757     qemu_build_assert(sizeof(hp) == 16);
 758
 759     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
 760         err = -EFAULT;
 761         goto out;
 762     }
 763
 764     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
 765         err = -ESRCH;
 766         goto out;
 767     }
 768
 769     switch (hp.index) {
 770     case HVM_PARAM_CALLBACK_IRQ:
 771         bql_lock();
 772         err = xen_evtchn_set_callback_param(hp.value);
 773         bql_unlock();
 774         xen_set_long_mode(exit->u.hcall.longmode);
 775         break;
 776     default:
 777         return false;
 778     }
 779
 780 out:
 781     exit->u.hcall.result = err;
 782     return true;
 783 }
 784
 785 static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu,
 786                              uint64_t arg)
 787 {
 788     CPUState *cs = CPU(cpu);
 789     struct xen_hvm_param hp;
 790     int err = 0;
 791
 792     /* No need for 32/64 compat handling */
 793     qemu_build_assert(sizeof(hp) == 16);
 794
 795     if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) {
 796         err = -EFAULT;
 797         goto out;
 798     }
 799
 800     if (hp.domid != DOMID_SELF && hp.domid != xen_domid) {
 801         err = -ESRCH;
 802         goto out;
 803     }
 804
 805     switch (hp.index) {
 806     case HVM_PARAM_STORE_PFN:
 807         hp.value = XEN_SPECIAL_PFN(XENSTORE);
 808         break;
 809     case HVM_PARAM_STORE_EVTCHN:
 810         hp.value = xen_xenstore_get_port();
 811         break;
 812     case HVM_PARAM_CONSOLE_PFN:
 813         hp.value = xen_primary_console_get_pfn();
 814         if (!hp.value) {
 815             err = -EINVAL;
 816         }
 817         break;
 818     case HVM_PARAM_CONSOLE_EVTCHN:
 819         hp.value = xen_primary_console_get_port();
 820         if (!hp.value) {
 821             err = -EINVAL;
 822         }
 823         break;
 824     default:
 825         return false;
 826     }
 827
 828     if (!err && kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) {
 829         err = -EFAULT;
 830     }
 831 out:
 832     exit->u.hcall.result = err;
 833     return true;
 834 }
 835
 836 static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit,
 837                                               X86CPU *cpu, uint64_t arg)
 838 {
 839     struct xen_hvm_evtchn_upcall_vector up;
 840     CPUState *target_cs;
 841
 842     /* No need for 32/64 compat handling */
 843     qemu_build_assert(sizeof(up) == 8);
 844
 845     if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) {
 846         return -EFAULT;
 847     }
 848
 849     if (up.vector < 0x10) {
 850         return -EINVAL;
 851     }
 852
 853     target_cs = qemu_get_cpu(up.vcpu);
 854     if (!target_cs) {
 855         return -EINVAL;
 856     }
 857
 858     async_run_on_cpu(target_cs, do_set_vcpu_callback_vector,
 859                      RUN_ON_CPU_HOST_INT(up.vector));
 860     return 0;
 861 }
 862
 863 static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu,
 864                                  int cmd, uint64_t arg)
 865 {
 866     int ret = -ENOSYS;
 867     switch (cmd) {
 868     case HVMOP_set_evtchn_upcall_vector:
 869         ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg);
 870         break;
 871
 872     case HVMOP_pagetable_dying:
 873         ret = -ENOSYS;
 874         break;
 875
 876     case HVMOP_set_param:
 877         return handle_set_param(exit, cpu, arg);
 878
 879     case HVMOP_get_param:
 880         return handle_get_param(exit, cpu, arg);
 881
 882     default:
 883         return false;
 884     }
 885
 886     exit->u.hcall.result = ret;
 887     return true;
 888 }
 889
 890 static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target,
 891                                      uint64_t arg)
 892 {
 893     struct vcpu_register_vcpu_info rvi;
 894     uint64_t gpa;
 895
 896     /* No need for 32/64 compat handling */
 897     qemu_build_assert(sizeof(rvi) == 16);
 898     qemu_build_assert(sizeof(struct vcpu_info) == 64);
 899
 900     if (!target) {
 901         return -ENOENT;
 902     }
 903
 904     if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) {
 905         return -EFAULT;
 906     }
 907
 908     if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) {
 909         return -EINVAL;
 910     }
 911
 912     gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset);
 913     async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa));
 914     return 0;
 915 }
 916
 917 static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target,
 918                                           uint64_t arg)
 919 {
 920     struct vcpu_register_time_memory_area tma;
 921     uint64_t gpa;
 922     size_t len;
 923
 924     /* No need for 32/64 compat handling */
 925     qemu_build_assert(sizeof(tma) == 8);
 926     qemu_build_assert(sizeof(struct vcpu_time_info) == 32);
 927
 928     if (!target) {
 929         return -ENOENT;
 930     }
 931
 932     if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) {
 933         return -EFAULT;
 934     }
 935
 936     /*
 937      * Xen actually uses the GVA and does the translation through the guest
 938      * page tables each time. But Linux/KVM uses the GPA, on the assumption
 939      * that guests only ever use *global* addresses (kernel virtual addresses)
 940      * for it. If Linux is changed to redo the GVA→GPA translation each time,
 941      * it will offer a new vCPU attribute for that, and we'll use it instead.
 942      */
 943     if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) ||
 944         len < sizeof(struct vcpu_time_info)) {
 945         return -EFAULT;
 946     }
 947
 948     async_run_on_cpu(target, do_set_vcpu_time_info_gpa,
 949                      RUN_ON_CPU_HOST_ULONG(gpa));
 950     return 0;
 951 }
 952
 953 static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target,
 954                                          uint64_t arg)
 955 {
 956     struct vcpu_register_runstate_memory_area rma;
 957     uint64_t gpa;
 958     size_t len;
 959
 960     /* No need for 32/64 compat handling */
 961     qemu_build_assert(sizeof(rma) == 8);
 962     /* The runstate area actually does change size, but Linux copes. */
 963
 964     if (!target) {
 965         return -ENOENT;
 966     }
 967
 968     if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) {
 969         return -EFAULT;
 970     }
 971
 972     /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */
 973     if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) {
 974         return -EFAULT;
 975     }
 976
 977     async_run_on_cpu(target, do_set_vcpu_runstate_gpa,
 978                      RUN_ON_CPU_HOST_ULONG(gpa));
 979     return 0;
 980 }
 981
 982 static uint64_t kvm_get_current_ns(void)
 983 {
 984     struct kvm_clock_data data;
 985     int ret;
 986
 987     ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
 988     if (ret < 0) {
 989         fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
 990                 abort();
 991     }
 992
 993     return data.clock;
 994 }
 995
 996 static void xen_vcpu_singleshot_timer_event(void *opaque)
 997 {
 998     CPUState *cpu = opaque;
 999     CPUX86State *env = &X86_CPU(cpu)->env;
1000     uint16_t port = env->xen_virq[VIRQ_TIMER];
1001
1002     if (likely(port)) {
1003         xen_evtchn_set_port(port);
1004     }
1005
1006     qemu_mutex_lock(&env->xen_timers_lock);
1007     env->xen_singleshot_timer_ns = 0;
1008     qemu_mutex_unlock(&env->xen_timers_lock);
1009 }
1010
1011 static void xen_vcpu_periodic_timer_event(void *opaque)
1012 {
1013     CPUState *cpu = opaque;
1014     CPUX86State *env = &X86_CPU(cpu)->env;
1015     uint16_t port = env->xen_virq[VIRQ_TIMER];
1016     int64_t qemu_now;
1017
1018     if (likely(port)) {
1019         xen_evtchn_set_port(port);
1020     }
1021
1022     qemu_mutex_lock(&env->xen_timers_lock);
1023
1024     qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1025     timer_mod_ns(env->xen_periodic_timer,
1026                  qemu_now + env->xen_periodic_timer_period);
1027
1028     qemu_mutex_unlock(&env->xen_timers_lock);
1029 }
1030
1031 static int do_set_periodic_timer(CPUState *target, uint64_t period_ns)
1032 {
1033     CPUX86State *tenv = &X86_CPU(target)->env;
1034     int64_t qemu_now;
1035
1036     timer_del(tenv->xen_periodic_timer);
1037
1038     qemu_mutex_lock(&tenv->xen_timers_lock);
1039
1040     qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1041     timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns);
1042     tenv->xen_periodic_timer_period = period_ns;
1043
1044     qemu_mutex_unlock(&tenv->xen_timers_lock);
1045     return 0;
1046 }
1047
1048 #define MILLISECS(_ms)  ((int64_t)((_ms) * 1000000ULL))
1049 #define MICROSECS(_us)  ((int64_t)((_us) * 1000ULL))
1050 #define STIME_MAX ((time_t)((int64_t)~0ull >> 1))
1051 /* Chosen so (NOW() + delta) won't overflow without an uptime of 200 years */
1052 #define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2))
1053
1054 static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target,
1055                                      uint64_t arg)
1056 {
1057     struct vcpu_set_periodic_timer spt;
1058
1059     qemu_build_assert(sizeof(spt) == 8);
1060     if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) {
1061         return -EFAULT;
1062     }
1063
1064     if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) {
1065         return -EINVAL;
1066     }
1067
1068     return do_set_periodic_timer(target, spt.period_ns);
1069 }
1070
1071 static int vcpuop_stop_periodic_timer(CPUState *target)
1072 {
1073     CPUX86State *tenv = &X86_CPU(target)->env;
1074
1075     qemu_mutex_lock(&tenv->xen_timers_lock);
1076
1077     timer_del(tenv->xen_periodic_timer);
1078     tenv->xen_periodic_timer_period = 0;
1079
1080     qemu_mutex_unlock(&tenv->xen_timers_lock);
1081     return 0;
1082 }
1083
1084 /*
1085  * Userspace handling of timer, for older kernels.
1086  * Must always be called with xen_timers_lock held.
1087  */
1088 static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs,
1089                                    bool linux_wa)
1090 {
1091     CPUX86State *env = &X86_CPU(cs)->env;
1092     int64_t now = kvm_get_current_ns();
1093     int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL);
1094     int64_t delta = timeout_abs - now;
1095
1096     if (linux_wa && unlikely((int64_t)timeout_abs < 0 ||
1097                              (delta > 0 && (uint32_t)(delta >> 50) != 0))) {
1098         /*
1099          * Xen has a 'Linux workaround' in do_set_timer_op() which checks
1100          * for negative absolute timeout values (caused by integer
1101          * overflow), and for values about 13 days in the future (2^50ns)
1102          * which would be caused by jiffies overflow. For those cases, it
1103          * sets the timeout 100ms in the future (not *too* soon, since if
1104          * a guest really did set a long timeout on purpose we don't want
1105          * to keep churning CPU time by waking it up).
1106          */
1107         delta = (100 * SCALE_MS);
1108         timeout_abs = now + delta;
1109     }
1110
1111     timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta);
1112     env->xen_singleshot_timer_ns = now + delta;
1113     return 0;
1114 }
1115
1116 static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg)
1117 {
1118     struct vcpu_set_singleshot_timer sst = { 0 };
1119
1120     /*
1121      * The struct is a uint64_t followed by a uint32_t. On 32-bit that
1122      * makes it 12 bytes. On 64-bit it gets padded to 16. The parts
1123      * that get used are identical, and there's four bytes of padding
1124      * unused at the end. For true Xen compatibility we should attempt
1125      * to copy the full 16 bytes from 64-bit guests, and return -EFAULT
1126      * if we can't get the padding too. But that's daft. Just copy what
1127      * we need.
1128      */
1129     qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8);
1130     qemu_build_assert(sizeof(sst) >= 12);
1131
1132     if (kvm_copy_from_gva(cs, arg, &sst, 12)) {
1133         return -EFAULT;
1134     }
1135
1136     QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
1137
1138     /*
1139      * We ignore the VCPU_SSHOTTMR_future flag, just as Xen now does.
1140      * The only guest that ever used it, got it wrong.
1141      * https://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff;h=19c6cbd909
1142      */
1143     return do_set_singleshot_timer(cs, sst.timeout_abs_ns, false);
1144 }
1145
1146 static int vcpuop_stop_singleshot_timer(CPUState *cs)
1147 {
1148     CPUX86State *env = &X86_CPU(cs)->env;
1149
1150     qemu_mutex_lock(&env->xen_timers_lock);
1151
1152     timer_del(env->xen_singleshot_timer);
1153     env->xen_singleshot_timer_ns = 0;
1154
1155     qemu_mutex_unlock(&env->xen_timers_lock);
1156     return 0;
1157 }
1158
1159 static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1160                                        uint64_t timeout)
1161 {
1162     int err;
1163
1164     if (unlikely(timeout == 0)) {
1165         err = vcpuop_stop_singleshot_timer(CPU(cpu));
1166     } else {
1167         QEMU_LOCK_GUARD(&X86_CPU(cpu)->env.xen_timers_lock);
1168         err = do_set_singleshot_timer(CPU(cpu), timeout, true);
1169     }
1170     exit->u.hcall.result = err;
1171     return true;
1172 }
1173
1174 static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1175                                   int cmd, int vcpu_id, uint64_t arg)
1176 {
1177     CPUState *cs = CPU(cpu);
1178     CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id);
1179     int err;
1180
1181     if (!dest) {
1182         err = -ENOENT;
1183         goto out;
1184     }
1185
1186     switch (cmd) {
1187     case VCPUOP_register_runstate_memory_area:
1188         err = vcpuop_register_runstate_info(cs, dest, arg);
1189         break;
1190     case VCPUOP_register_vcpu_time_memory_area:
1191         err = vcpuop_register_vcpu_time_info(cs, dest, arg);
1192         break;
1193     case VCPUOP_register_vcpu_info:
1194         err = vcpuop_register_vcpu_info(cs, dest, arg);
1195         break;
1196     case VCPUOP_set_singleshot_timer: {
1197         if (cs->cpu_index == vcpu_id) {
1198             err = vcpuop_set_singleshot_timer(dest, arg);
1199         } else {
1200             err = -EINVAL;
1201         }
1202         break;
1203     }
1204     case VCPUOP_stop_singleshot_timer:
1205         if (cs->cpu_index == vcpu_id) {
1206             err = vcpuop_stop_singleshot_timer(dest);
1207         } else {
1208             err = -EINVAL;
1209         }
1210         break;
1211     case VCPUOP_set_periodic_timer: {
1212         err = vcpuop_set_periodic_timer(cs, dest, arg);
1213         break;
1214     }
1215     case VCPUOP_stop_periodic_timer:
1216         err = vcpuop_stop_periodic_timer(dest);
1217         break;
1218
1219     default:
1220         return false;
1221     }
1222
1223  out:
1224     exit->u.hcall.result = err;
1225     return true;
1226 }
1227
1228 static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1229                                     int cmd, uint64_t arg)
1230 {
1231     CPUState *cs = CPU(cpu);
1232     int err = -ENOSYS;
1233
1234     switch (cmd) {
1235     case EVTCHNOP_init_control:
1236     case EVTCHNOP_expand_array:
1237     case EVTCHNOP_set_priority:
1238         /* We do not support FIFO channels at this point */
1239         err = -ENOSYS;
1240         break;
1241
1242     case EVTCHNOP_status: {
1243         struct evtchn_status status;
1244
1245         qemu_build_assert(sizeof(status) == 24);
1246         if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) {
1247             err = -EFAULT;
1248             break;
1249         }
1250
1251         err = xen_evtchn_status_op(&status);
1252         if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) {
1253             err = -EFAULT;
1254         }
1255         break;
1256     }
1257     case EVTCHNOP_close: {
1258         struct evtchn_close close;
1259
1260         qemu_build_assert(sizeof(close) == 4);
1261         if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) {
1262             err = -EFAULT;
1263             break;
1264         }
1265
1266         err = xen_evtchn_close_op(&close);
1267         break;
1268     }
1269     case EVTCHNOP_unmask: {
1270         struct evtchn_unmask unmask;
1271
1272         qemu_build_assert(sizeof(unmask) == 4);
1273         if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) {
1274             err = -EFAULT;
1275             break;
1276         }
1277
1278         err = xen_evtchn_unmask_op(&unmask);
1279         break;
1280     }
1281     case EVTCHNOP_bind_virq: {
1282         struct evtchn_bind_virq virq;
1283
1284         qemu_build_assert(sizeof(virq) == 12);
1285         if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) {
1286             err = -EFAULT;
1287             break;
1288         }
1289
1290         err = xen_evtchn_bind_virq_op(&virq);
1291         if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) {
1292             err = -EFAULT;
1293         }
1294         break;
1295     }
1296     case EVTCHNOP_bind_pirq: {
1297         struct evtchn_bind_pirq pirq;
1298
1299         qemu_build_assert(sizeof(pirq) == 12);
1300         if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) {
1301             err = -EFAULT;
1302             break;
1303         }
1304
1305         err = xen_evtchn_bind_pirq_op(&pirq);
1306         if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) {
1307             err = -EFAULT;
1308         }
1309         break;
1310     }
1311     case EVTCHNOP_bind_ipi: {
1312         struct evtchn_bind_ipi ipi;
1313
1314         qemu_build_assert(sizeof(ipi) == 8);
1315         if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) {
1316             err = -EFAULT;
1317             break;
1318         }
1319
1320         err = xen_evtchn_bind_ipi_op(&ipi);
1321         if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) {
1322             err = -EFAULT;
1323         }
1324         break;
1325     }
1326     case EVTCHNOP_send: {
1327         struct evtchn_send send;
1328
1329         qemu_build_assert(sizeof(send) == 4);
1330         if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) {
1331             err = -EFAULT;
1332             break;
1333         }
1334
1335         err = xen_evtchn_send_op(&send);
1336         break;
1337     }
1338     case EVTCHNOP_alloc_unbound: {
1339         struct evtchn_alloc_unbound alloc;
1340
1341         qemu_build_assert(sizeof(alloc) == 8);
1342         if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) {
1343             err = -EFAULT;
1344             break;
1345         }
1346
1347         err = xen_evtchn_alloc_unbound_op(&alloc);
1348         if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) {
1349             err = -EFAULT;
1350         }
1351         break;
1352     }
1353     case EVTCHNOP_bind_interdomain: {
1354         struct evtchn_bind_interdomain interdomain;
1355
1356         qemu_build_assert(sizeof(interdomain) == 12);
1357         if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1358             err = -EFAULT;
1359             break;
1360         }
1361
1362         err = xen_evtchn_bind_interdomain_op(&interdomain);
1363         if (!err &&
1364             kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) {
1365             err = -EFAULT;
1366         }
1367         break;
1368     }
1369     case EVTCHNOP_bind_vcpu: {
1370         struct evtchn_bind_vcpu vcpu;
1371
1372         qemu_build_assert(sizeof(vcpu) == 8);
1373         if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) {
1374             err = -EFAULT;
1375             break;
1376         }
1377
1378         err = xen_evtchn_bind_vcpu_op(&vcpu);
1379         break;
1380     }
1381     case EVTCHNOP_reset: {
1382         struct evtchn_reset reset;
1383
1384         qemu_build_assert(sizeof(reset) == 2);
1385         if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) {
1386             err = -EFAULT;
1387             break;
1388         }
1389
1390         err = xen_evtchn_reset_op(&reset);
1391         break;
1392     }
1393     default:
1394         return false;
1395     }
1396
1397     exit->u.hcall.result = err;
1398     return true;
1399 }
1400
1401 int kvm_xen_soft_reset(void)
1402 {
1403     CPUState *cpu;
1404     int err;
1405
1406     assert(bql_locked());
1407
1408     trace_kvm_xen_soft_reset();
1409
1410     err = xen_evtchn_soft_reset();
1411     if (err) {
1412         return err;
1413     }
1414
1415     /*
1416      * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly,
1417      * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to
1418      * to deliver to the timer interrupt and treats that as 'disabled'.
1419      */
1420     err = xen_evtchn_set_callback_param(0);
1421     if (err) {
1422         return err;
1423     }
1424
1425     CPU_FOREACH(cpu) {
1426         async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL);
1427     }
1428
1429     err = xen_overlay_map_shinfo_page(INVALID_GFN);
1430     if (err) {
1431         return err;
1432     }
1433
1434     err = xen_gnttab_reset();
1435     if (err) {
1436         return err;
1437     }
1438
1439     err = xen_primary_console_reset();
1440     if (err) {
1441         return err;
1442     }
1443
1444     err = xen_xenstore_reset();
1445     if (err) {
1446         return err;
1447     }
1448
1449     return 0;
1450 }
1451
1452 static int schedop_shutdown(CPUState *cs, uint64_t arg)
1453 {
1454     struct sched_shutdown shutdown;
1455     int ret = 0;
1456
1457     /* No need for 32/64 compat handling */
1458     qemu_build_assert(sizeof(shutdown) == 4);
1459
1460     if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) {
1461         return -EFAULT;
1462     }
1463
1464     switch (shutdown.reason) {
1465     case SHUTDOWN_crash:
1466         cpu_dump_state(cs, stderr, CPU_DUMP_CODE);
1467         qemu_system_guest_panicked(NULL);
1468         break;
1469
1470     case SHUTDOWN_reboot:
1471         qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
1472         break;
1473
1474     case SHUTDOWN_poweroff:
1475         qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
1476         break;
1477
1478     case SHUTDOWN_soft_reset:
1479         bql_lock();
1480         ret = kvm_xen_soft_reset();
1481         bql_unlock();
1482         break;
1483
1484     default:
1485         ret = -EINVAL;
1486         break;
1487     }
1488
1489     return ret;
1490 }
1491
1492 static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1493                                    int cmd, uint64_t arg)
1494 {
1495     CPUState *cs = CPU(cpu);
1496     int err = -ENOSYS;
1497
1498     switch (cmd) {
1499     case SCHEDOP_shutdown:
1500         err = schedop_shutdown(cs, arg);
1501         break;
1502
1503     case SCHEDOP_poll:
1504         /*
1505          * Linux will panic if this doesn't work. Just yield; it's not
1506          * worth overthinking it because with event channel handling
1507          * in KVM, the kernel will intercept this and it will never
1508          * reach QEMU anyway. The semantics of the hypercall explicltly
1509          * permit spurious wakeups.
1510          */
1511     case SCHEDOP_yield:
1512         sched_yield();
1513         err = 0;
1514         break;
1515
1516     default:
1517         return false;
1518     }
1519
1520     exit->u.hcall.result = err;
1521     return true;
1522 }
1523
1524 static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1525                                     int cmd, uint64_t arg, int count)
1526 {
1527     CPUState *cs = CPU(cpu);
1528     int err;
1529
1530     switch (cmd) {
1531     case GNTTABOP_set_version: {
1532         struct gnttab_set_version set;
1533
1534         qemu_build_assert(sizeof(set) == 4);
1535         if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) {
1536             err = -EFAULT;
1537             break;
1538         }
1539
1540         err = xen_gnttab_set_version_op(&set);
1541         if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) {
1542             err = -EFAULT;
1543         }
1544         break;
1545     }
1546     case GNTTABOP_get_version: {
1547         struct gnttab_get_version get;
1548
1549         qemu_build_assert(sizeof(get) == 8);
1550         if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1551             err = -EFAULT;
1552             break;
1553         }
1554
1555         err = xen_gnttab_get_version_op(&get);
1556         if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1557             err = -EFAULT;
1558         }
1559         break;
1560     }
1561     case GNTTABOP_query_size: {
1562         struct gnttab_query_size size;
1563
1564         qemu_build_assert(sizeof(size) == 16);
1565         if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) {
1566             err = -EFAULT;
1567             break;
1568         }
1569
1570         err = xen_gnttab_query_size_op(&size);
1571         if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) {
1572             err = -EFAULT;
1573         }
1574         break;
1575     }
1576     case GNTTABOP_setup_table:
1577     case GNTTABOP_copy:
1578     case GNTTABOP_map_grant_ref:
1579     case GNTTABOP_unmap_grant_ref:
1580     case GNTTABOP_swap_grant_ref:
1581         return false;
1582
1583     default:
1584         /* Xen explicitly returns -ENOSYS to HVM guests for all others */
1585         err = -ENOSYS;
1586         break;
1587     }
1588
1589     exit->u.hcall.result = err;
1590     return true;
1591 }
1592
1593 static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu,
1594                                      int cmd, uint64_t arg)
1595 {
1596     CPUState *cs = CPU(cpu);
1597     int err;
1598
1599     switch (cmd) {
1600     case PHYSDEVOP_map_pirq: {
1601         struct physdev_map_pirq map;
1602
1603         if (hypercall_compat32(exit->u.hcall.longmode)) {
1604             struct compat_physdev_map_pirq *map32 = (void *)&map;
1605
1606             if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) {
1607                 return -EFAULT;
1608             }
1609
1610             /*
1611              * The only thing that's different is the alignment of the
1612              * uint64_t table_base at the end, which gets padding to make
1613              * it 64-bit aligned in the 64-bit version.
1614              */
1615             qemu_build_assert(sizeof(*map32) == 36);
1616             qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) ==
1617                               offsetof(struct compat_physdev_map_pirq, entry_nr));
1618             memmove(&map.table_base, &map32->table_base, sizeof(map.table_base));
1619         } else {
1620             if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) {
1621                 err = -EFAULT;
1622                 break;
1623             }
1624         }
1625         err = xen_physdev_map_pirq(&map);
1626         /*
1627          * Since table_base is an IN parameter and won't be changed, just
1628          * copy the size of the compat structure back to the guest.
1629          */
1630         if (!err && kvm_copy_to_gva(cs, arg, &map,
1631                                     sizeof(struct compat_physdev_map_pirq))) {
1632             err = -EFAULT;
1633         }
1634         break;
1635     }
1636     case PHYSDEVOP_unmap_pirq: {
1637         struct physdev_unmap_pirq unmap;
1638
1639         qemu_build_assert(sizeof(unmap) == 8);
1640         if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) {
1641             err = -EFAULT;
1642             break;
1643         }
1644
1645         err = xen_physdev_unmap_pirq(&unmap);
1646         if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) {
1647             err = -EFAULT;
1648         }
1649         break;
1650     }
1651     case PHYSDEVOP_eoi: {
1652         struct physdev_eoi eoi;
1653
1654         qemu_build_assert(sizeof(eoi) == 4);
1655         if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) {
1656             err = -EFAULT;
1657             break;
1658         }
1659
1660         err = xen_physdev_eoi_pirq(&eoi);
1661         if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) {
1662             err = -EFAULT;
1663         }
1664         break;
1665     }
1666     case PHYSDEVOP_irq_status_query: {
1667         struct physdev_irq_status_query query;
1668
1669         qemu_build_assert(sizeof(query) == 8);
1670         if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) {
1671             err = -EFAULT;
1672             break;
1673         }
1674
1675         err = xen_physdev_query_pirq(&query);
1676         if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) {
1677             err = -EFAULT;
1678         }
1679         break;
1680     }
1681     case PHYSDEVOP_get_free_pirq: {
1682         struct physdev_get_free_pirq get;
1683
1684         qemu_build_assert(sizeof(get) == 8);
1685         if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) {
1686             err = -EFAULT;
1687             break;
1688         }
1689
1690         err = xen_physdev_get_free_pirq(&get);
1691         if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) {
1692             err = -EFAULT;
1693         }
1694         break;
1695     }
1696     case PHYSDEVOP_pirq_eoi_gmfn_v2: /* FreeBSD 13 makes this hypercall */
1697         err = -ENOSYS;
1698         break;
1699
1700     default:
1701         return false;
1702     }
1703
1704     exit->u.hcall.result = err;
1705     return true;
1706 }
1707
1708 static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1709 {
1710     uint16_t code = exit->u.hcall.input;
1711
1712     if (exit->u.hcall.cpl > 0) {
1713         exit->u.hcall.result = -EPERM;
1714         return true;
1715     }
1716
1717     switch (code) {
1718     case __HYPERVISOR_set_timer_op:
1719         if (exit->u.hcall.longmode) {
1720             return kvm_xen_hcall_set_timer_op(exit, cpu,
1721                                               exit->u.hcall.params[0]);
1722         } else {
1723             /* In 32-bit mode, the 64-bit timer value is in two args. */
1724             uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 |
1725                 (uint32_t)exit->u.hcall.params[0];
1726             return kvm_xen_hcall_set_timer_op(exit, cpu, val);
1727         }
1728     case __HYPERVISOR_grant_table_op:
1729         return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0],
1730                                        exit->u.hcall.params[1],
1731                                        exit->u.hcall.params[2]);
1732     case __HYPERVISOR_sched_op:
1733         return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0],
1734                                       exit->u.hcall.params[1]);
1735     case __HYPERVISOR_event_channel_op:
1736         return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0],
1737                                        exit->u.hcall.params[1]);
1738     case __HYPERVISOR_vcpu_op:
1739         return kvm_xen_hcall_vcpu_op(exit, cpu,
1740                                      exit->u.hcall.params[0],
1741                                      exit->u.hcall.params[1],
1742                                      exit->u.hcall.params[2]);
1743     case __HYPERVISOR_hvm_op:
1744         return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0],
1745                                     exit->u.hcall.params[1]);
1746     case __HYPERVISOR_memory_op:
1747         return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0],
1748                                        exit->u.hcall.params[1]);
1749     case __HYPERVISOR_physdev_op:
1750         return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0],
1751                                         exit->u.hcall.params[1]);
1752     case __HYPERVISOR_xen_version:
1753         return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0],
1754                                          exit->u.hcall.params[1]);
1755     default:
1756         return false;
1757     }
1758 }
1759
1760 int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit)
1761 {
1762     if (exit->type != KVM_EXIT_XEN_HCALL) {
1763         return -1;
1764     }
1765
1766     /*
1767      * The kernel latches the guest 32/64 mode when the MSR is used to fill
1768      * the hypercall page. So if we see a hypercall in a mode that doesn't
1769      * match our own idea of the guest mode, fetch the kernel's idea of the
1770      * "long mode" to remain in sync.
1771      */
1772     if (exit->u.hcall.longmode != xen_is_long_mode()) {
1773         xen_sync_long_mode();
1774     }
1775
1776     if (!do_kvm_xen_handle_exit(cpu, exit)) {
1777         /*
1778          * Some hypercalls will be deliberately "implemented" by returning
1779          * -ENOSYS. This case is for hypercalls which are unexpected.
1780          */
1781         exit->u.hcall.result = -ENOSYS;
1782         qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %"
1783                       PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n",
1784                       (uint64_t)exit->u.hcall.input,
1785                       (uint64_t)exit->u.hcall.params[0],
1786                       (uint64_t)exit->u.hcall.params[1],
1787                       (uint64_t)exit->u.hcall.params[2]);
1788     }
1789
1790     trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl,
1791                             exit->u.hcall.input, exit->u.hcall.params[0],
1792                             exit->u.hcall.params[1], exit->u.hcall.params[2],
1793                             exit->u.hcall.result);
1794     return 0;
1795 }
1796
1797 uint16_t kvm_xen_get_gnttab_max_frames(void)
1798 {
1799     KVMState *s = KVM_STATE(current_accel());
1800     return s->xen_gnttab_max_frames;
1801 }
1802
1803 uint16_t kvm_xen_get_evtchn_max_pirq(void)
1804 {
1805     KVMState *s = KVM_STATE(current_accel());
1806     return s->xen_evtchn_max_pirq;
1807 }
1808
1809 int kvm_put_xen_state(CPUState *cs)
1810 {
1811     X86CPU *cpu = X86_CPU(cs);
1812     CPUX86State *env = &cpu->env;
1813     uint64_t gpa;
1814     int ret;
1815
1816     gpa = env->xen_vcpu_info_gpa;
1817     if (gpa == INVALID_GPA) {
1818         gpa = env->xen_vcpu_info_default_gpa;
1819     }
1820
1821     if (gpa != INVALID_GPA) {
1822         ret = set_vcpu_info(cs, gpa);
1823         if (ret < 0) {
1824             return ret;
1825         }
1826     }
1827
1828     gpa = env->xen_vcpu_time_info_gpa;
1829     if (gpa != INVALID_GPA) {
1830         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
1831                                     gpa);
1832         if (ret < 0) {
1833             return ret;
1834         }
1835     }
1836
1837     gpa = env->xen_vcpu_runstate_gpa;
1838     if (gpa != INVALID_GPA) {
1839         ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
1840                                     gpa);
1841         if (ret < 0) {
1842             return ret;
1843         }
1844     }
1845
1846     if (env->xen_periodic_timer_period) {
1847         ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period);
1848         if (ret < 0) {
1849             return ret;
1850         }
1851     }
1852
1853     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1854         /*
1855          * If the kernel has EVTCHN_SEND support then it handles timers too,
1856          * so the timer will be restored by kvm_xen_set_vcpu_timer() below.
1857          */
1858         QEMU_LOCK_GUARD(&env->xen_timers_lock);
1859         if (env->xen_singleshot_timer_ns) {
1860             ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns,
1861                                           false);
1862             if (ret < 0) {
1863                 return ret;
1864             }
1865         }
1866         return 0;
1867     }
1868
1869     if (env->xen_vcpu_callback_vector) {
1870         ret = kvm_xen_set_vcpu_callback_vector(cs);
1871         if (ret < 0) {
1872             return ret;
1873         }
1874     }
1875
1876     if (env->xen_virq[VIRQ_TIMER]) {
1877         do_set_vcpu_timer_virq(cs,
1878                                RUN_ON_CPU_HOST_INT(env->xen_virq[VIRQ_TIMER]));
1879     }
1880     return 0;
1881 }
1882
1883 int kvm_get_xen_state(CPUState *cs)
1884 {
1885     X86CPU *cpu = X86_CPU(cs);
1886     CPUX86State *env = &cpu->env;
1887     uint64_t gpa;
1888     int ret;
1889
1890     /*
1891      * The kernel does not mark vcpu_info as dirty when it delivers interrupts
1892      * to it. It's up to userspace to *assume* that any page shared thus is
1893      * always considered dirty. The shared_info page is different since it's
1894      * an overlay and migrated separately anyway.
1895      */
1896     gpa = env->xen_vcpu_info_gpa;
1897     if (gpa == INVALID_GPA) {
1898         gpa = env->xen_vcpu_info_default_gpa;
1899     }
1900     if (gpa != INVALID_GPA) {
1901         MemoryRegionSection mrs = memory_region_find(get_system_memory(),
1902                                                      gpa,
1903                                                      sizeof(struct vcpu_info));
1904         if (mrs.mr &&
1905             !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) {
1906             memory_region_set_dirty(mrs.mr, mrs.offset_within_region,
1907                                     sizeof(struct vcpu_info));
1908         }
1909     }
1910
1911     if (!kvm_xen_has_cap(EVTCHN_SEND)) {
1912         return 0;
1913     }
1914
1915     /*
1916      * If the kernel is accelerating timers, read out the current value of the
1917      * singleshot timer deadline.
1918      */
1919     if (env->xen_virq[VIRQ_TIMER]) {
1920         struct kvm_xen_vcpu_attr va = {
1921             .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
1922         };
1923         ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va);
1924         if (ret < 0) {
1925             return ret;
1926         }
1927
1928         /*
1929          * This locking is fairly pointless, and is here to appease Coverity.
1930          * There is an unavoidable race condition if a different vCPU sets a
1931          * timer for this vCPU after the value has been read out. But that's
1932          * OK in practice because *all* the vCPUs need to be stopped before
1933          * we set about migrating their state.
1934          */
1935         QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock);
1936         env->xen_singleshot_timer_ns = va.u.timer.expires_ns;
1937     }
1938
1939     return 0;
1940 }