target/i386/hax-all.c

   1 /*
   2  * QEMU HAX support
   3  *
   4  * Copyright IBM, Corp. 2008
   5  *           Red Hat, Inc. 2008
   6  *
   7  * Authors:
   8  *  Anthony Liguori   <aliguori@us.ibm.com>
   9  *  Glauber Costa     <gcosta@redhat.com>
  10  *
  11  * Copyright (c) 2011 Intel Corporation
  12  *  Written by:
  13  *  Jiang Yunhong<yunhong.jiang@intel.com>
  14  *  Xin Xiaohui<xiaohui.xin@intel.com>
  15  *  Zhang Xiantao<xiantao.zhang@intel.com>
  16  *
  17  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  18  * See the COPYING file in the top-level directory.
  19  *
  20  */
  21
  22 /*
  23  * HAX common code for both windows and darwin
  24  */
  25
  26 #include "qemu/osdep.h"
  27 #include "cpu.h"
  28 #include "exec/address-spaces.h"
  29
  30 #include "qemu-common.h"
  31 #include "hax-i386.h"
  32 #include "sysemu/accel.h"
  33 #include "sysemu/reset.h"
  34 #include "sysemu/runstate.h"
  35 #include "qemu/main-loop.h"
  36 #include "hw/boards.h"
  37
  38 #define DEBUG_HAX 0
  39
  40 #define DPRINTF(fmt, ...) \
  41     do { \
  42         if (DEBUG_HAX) { \
  43             fprintf(stdout, fmt, ## __VA_ARGS__); \
  44         } \
  45     } while (0)
  46
  47 /* Current version */
  48 const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
  49 /* Minimum HAX kernel version */
  50 const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
  51
  52 static bool hax_allowed;
  53
  54 struct hax_state hax_global;
  55
  56 static void hax_vcpu_sync_state(CPUArchState *env, int modified);
  57 static int hax_arch_get_registers(CPUArchState *env);
  58
  59 int hax_enabled(void)
  60 {
  61     return hax_allowed;
  62 }
  63
  64 int valid_hax_tunnel_size(uint16_t size)
  65 {
  66     return size >= sizeof(struct hax_tunnel);
  67 }
  68
  69 hax_fd hax_vcpu_get_fd(CPUArchState *env)
  70 {
  71     struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu;
  72     if (!vcpu) {
  73         return HAX_INVALID_FD;
  74     }
  75     return vcpu->fd;
  76 }
  77
  78 static int hax_get_capability(struct hax_state *hax)
  79 {
  80     int ret;
  81     struct hax_capabilityinfo capinfo, *cap = &capinfo;
  82
  83     ret = hax_capability(hax, cap);
  84     if (ret) {
  85         return ret;
  86     }
  87
  88     if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
  89         if (cap->winfo & HAX_CAP_FAILREASON_VT) {
  90             DPRINTF
  91                 ("VTX feature is not enabled, HAX driver will not work.\n");
  92         } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
  93             DPRINTF
  94                 ("NX feature is not enabled, HAX driver will not work.\n");
  95         }
  96         return -ENXIO;
  97
  98     }
  99
 100     if (!(cap->winfo & HAX_CAP_UG)) {
 101         fprintf(stderr, "UG mode is not supported by the hardware.\n");
 102         return -ENOTSUP;
 103     }
 104
 105     hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
 106
 107     if (cap->wstatus & HAX_CAP_MEMQUOTA) {
 108         if (cap->mem_quota < hax->mem_quota) {
 109             fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
 110             return -ENOSPC;
 111         }
 112     }
 113     return 0;
 114 }
 115
 116 static int hax_version_support(struct hax_state *hax)
 117 {
 118     int ret;
 119     struct hax_module_version version;
 120
 121     ret = hax_mod_version(hax, &version);
 122     if (ret < 0) {
 123         return 0;
 124     }
 125
 126     if (hax_min_version > version.cur_version) {
 127         fprintf(stderr, "Incompatible HAX module version %d,",
 128                 version.cur_version);
 129         fprintf(stderr, "requires minimum version %d\n", hax_min_version);
 130         return 0;
 131     }
 132     if (hax_cur_version < version.compat_version) {
 133         fprintf(stderr, "Incompatible QEMU HAX API version %x,",
 134                 hax_cur_version);
 135         fprintf(stderr, "requires minimum HAX API version %x\n",
 136                 version.compat_version);
 137         return 0;
 138     }
 139
 140     return 1;
 141 }
 142
 143 int hax_vcpu_create(int id)
 144 {
 145     struct hax_vcpu_state *vcpu = NULL;
 146     int ret;
 147
 148     if (!hax_global.vm) {
 149         fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
 150         return -1;
 151     }
 152
 153     if (hax_global.vm->vcpus[id]) {
 154         fprintf(stderr, "vcpu %x allocated already\n", id);
 155         return 0;
 156     }
 157
 158     vcpu = g_new0(struct hax_vcpu_state, 1);
 159
 160     ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 161     if (ret) {
 162         fprintf(stderr, "Failed to create vcpu %x\n", id);
 163         goto error;
 164     }
 165
 166     vcpu->vcpu_id = id;
 167     vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
 168     if (hax_invalid_fd(vcpu->fd)) {
 169         fprintf(stderr, "Failed to open the vcpu\n");
 170         ret = -ENODEV;
 171         goto error;
 172     }
 173
 174     hax_global.vm->vcpus[id] = vcpu;
 175
 176     ret = hax_host_setup_vcpu_channel(vcpu);
 177     if (ret) {
 178         fprintf(stderr, "Invalid hax tunnel size\n");
 179         ret = -EINVAL;
 180         goto error;
 181     }
 182     return 0;
 183
 184   error:
 185     /* vcpu and tunnel will be closed automatically */
 186     if (vcpu && !hax_invalid_fd(vcpu->fd)) {
 187         hax_close_fd(vcpu->fd);
 188     }
 189
 190     hax_global.vm->vcpus[id] = NULL;
 191     g_free(vcpu);
 192     return -1;
 193 }
 194
 195 int hax_vcpu_destroy(CPUState *cpu)
 196 {
 197     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 198
 199     if (!hax_global.vm) {
 200         fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
 201         return -1;
 202     }
 203
 204     if (!vcpu) {
 205         return 0;
 206     }
 207
 208     /*
 209      * 1. The hax_tunnel is also destroyed when vcpu is destroyed
 210      * 2. close fd will cause hax module vcpu be cleaned
 211      */
 212     hax_close_fd(vcpu->fd);
 213     hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
 214     g_free(vcpu);
 215     return 0;
 216 }
 217
 218 int hax_init_vcpu(CPUState *cpu)
 219 {
 220     int ret;
 221
 222     ret = hax_vcpu_create(cpu->cpu_index);
 223     if (ret < 0) {
 224         fprintf(stderr, "Failed to create HAX vcpu\n");
 225         exit(-1);
 226     }
 227
 228     cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
 229     cpu->vcpu_dirty = true;
 230     qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
 231
 232     return ret;
 233 }
 234
 235 struct hax_vm *hax_vm_create(struct hax_state *hax, int max_cpus)
 236 {
 237     struct hax_vm *vm;
 238     int vm_id = 0, ret, i;
 239
 240     if (hax_invalid_fd(hax->fd)) {
 241         return NULL;
 242     }
 243
 244     if (hax->vm) {
 245         return hax->vm;
 246     }
 247
 248     if (max_cpus > HAX_MAX_VCPU) {
 249         fprintf(stderr, "Maximum VCPU number QEMU supported is %d\n", HAX_MAX_VCPU);
 250         return NULL;
 251     }
 252
 253     vm = g_new0(struct hax_vm, 1);
 254
 255     ret = hax_host_create_vm(hax, &vm_id);
 256     if (ret) {
 257         fprintf(stderr, "Failed to create vm %x\n", ret);
 258         goto error;
 259     }
 260     vm->id = vm_id;
 261     vm->fd = hax_host_open_vm(hax, vm_id);
 262     if (hax_invalid_fd(vm->fd)) {
 263         fprintf(stderr, "Failed to open vm %d\n", vm_id);
 264         goto error;
 265     }
 266
 267     vm->numvcpus = max_cpus;
 268     vm->vcpus = g_new0(struct hax_vcpu_state *, vm->numvcpus);
 269     for (i = 0; i < vm->numvcpus; i++) {
 270         vm->vcpus[i] = NULL;
 271     }
 272
 273     hax->vm = vm;
 274     return vm;
 275
 276   error:
 277     g_free(vm);
 278     hax->vm = NULL;
 279     return NULL;
 280 }
 281
 282 int hax_vm_destroy(struct hax_vm *vm)
 283 {
 284     int i;
 285
 286     for (i = 0; i < vm->numvcpus; i++)
 287         if (vm->vcpus[i]) {
 288             fprintf(stderr, "VCPU should be cleaned before vm clean\n");
 289             return -1;
 290         }
 291     hax_close_fd(vm->fd);
 292     vm->numvcpus = 0;
 293     g_free(vm->vcpus);
 294     g_free(vm);
 295     hax_global.vm = NULL;
 296     return 0;
 297 }
 298
 299 static void hax_handle_interrupt(CPUState *cpu, int mask)
 300 {
 301     cpu->interrupt_request |= mask;
 302
 303     if (!qemu_cpu_is_self(cpu)) {
 304         qemu_cpu_kick(cpu);
 305     }
 306 }
 307
 308 static int hax_init(ram_addr_t ram_size, int max_cpus)
 309 {
 310     struct hax_state *hax = NULL;
 311     struct hax_qemu_version qversion;
 312     int ret;
 313
 314     hax = &hax_global;
 315
 316     memset(hax, 0, sizeof(struct hax_state));
 317     hax->mem_quota = ram_size;
 318
 319     hax->fd = hax_mod_open();
 320     if (hax_invalid_fd(hax->fd)) {
 321         hax->fd = 0;
 322         ret = -ENODEV;
 323         goto error;
 324     }
 325
 326     ret = hax_get_capability(hax);
 327
 328     if (ret) {
 329         if (ret != -ENOSPC) {
 330             ret = -EINVAL;
 331         }
 332         goto error;
 333     }
 334
 335     if (!hax_version_support(hax)) {
 336         ret = -EINVAL;
 337         goto error;
 338     }
 339
 340     hax->vm = hax_vm_create(hax, max_cpus);
 341     if (!hax->vm) {
 342         fprintf(stderr, "Failed to create HAX VM\n");
 343         ret = -EINVAL;
 344         goto error;
 345     }
 346
 347     hax_memory_init();
 348
 349     qversion.cur_version = hax_cur_version;
 350     qversion.min_version = hax_min_version;
 351     hax_notify_qemu_version(hax->vm->fd, &qversion);
 352     cpu_interrupt_handler = hax_handle_interrupt;
 353
 354     return ret;
 355   error:
 356     if (hax->vm) {
 357         hax_vm_destroy(hax->vm);
 358     }
 359     if (hax->fd) {
 360         hax_mod_close(hax);
 361     }
 362
 363     return ret;
 364 }
 365
 366 static int hax_accel_init(MachineState *ms)
 367 {
 368     int ret = hax_init(ms->ram_size, (int)ms->smp.max_cpus);
 369
 370     if (ret && (ret != -ENOSPC)) {
 371         fprintf(stderr, "No accelerator found.\n");
 372     } else {
 373         fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
 374                 !ret ? "working" : "not working",
 375                 !ret ? "fast virt" : "emulation");
 376     }
 377     return ret;
 378 }
 379
 380 static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
 381 {
 382     if (hft->direction < 2) {
 383         cpu_physical_memory_rw(hft->gpa, &hft->value, hft->size,
 384                                hft->direction);
 385     } else {
 386         /*
 387          * HAX API v4 supports transferring data between two MMIO addresses,
 388          * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
 389          *  hft->direction == 2: gpa ==> gpa2
 390          */
 391         uint64_t value;
 392         cpu_physical_memory_read(hft->gpa, &value, hft->size);
 393         cpu_physical_memory_write(hft->gpa2, &value, hft->size);
 394     }
 395
 396     return 0;
 397 }
 398
 399 static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
 400                          int direction, int size, int count, void *buffer)
 401 {
 402     uint8_t *ptr;
 403     int i;
 404     MemTxAttrs attrs = { 0 };
 405
 406     if (!df) {
 407         ptr = (uint8_t *) buffer;
 408     } else {
 409         ptr = buffer + size * count - size;
 410     }
 411     for (i = 0; i < count; i++) {
 412         address_space_rw(&address_space_io, port, attrs,
 413                          ptr, size, direction == HAX_EXIT_IO_OUT);
 414         if (!df) {
 415             ptr += size;
 416         } else {
 417             ptr -= size;
 418         }
 419     }
 420
 421     return 0;
 422 }
 423
 424 static int hax_vcpu_interrupt(CPUArchState *env)
 425 {
 426     CPUState *cpu = env_cpu(env);
 427     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 428     struct hax_tunnel *ht = vcpu->tunnel;
 429
 430     /*
 431      * Try to inject an interrupt if the guest can accept it
 432      * Unlike KVM, HAX kernel check for the eflags, instead of qemu
 433      */
 434     if (ht->ready_for_interrupt_injection &&
 435         (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 436         int irq;
 437
 438         irq = cpu_get_pic_interrupt(env);
 439         if (irq >= 0) {
 440             hax_inject_interrupt(env, irq);
 441             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 442         }
 443     }
 444
 445     /* If we have an interrupt but the guest is not ready to receive an
 446      * interrupt, request an interrupt window exit.  This will
 447      * cause a return to userspace as soon as the guest is ready to
 448      * receive interrupts. */
 449     if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 450         ht->request_interrupt_window = 1;
 451     } else {
 452         ht->request_interrupt_window = 0;
 453     }
 454     return 0;
 455 }
 456
 457 void hax_raise_event(CPUState *cpu)
 458 {
 459     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 460
 461     if (!vcpu) {
 462         return;
 463     }
 464     vcpu->tunnel->user_event_pending = 1;
 465 }
 466
 467 /*
 468  * Ask hax kernel module to run the CPU for us till:
 469  * 1. Guest crash or shutdown
 470  * 2. Need QEMU's emulation like guest execute MMIO instruction
 471  * 3. Guest execute HLT
 472  * 4. QEMU have Signal/event pending
 473  * 5. An unknown VMX exit happens
 474  */
 475 static int hax_vcpu_hax_exec(CPUArchState *env)
 476 {
 477     int ret = 0;
 478     CPUState *cpu = env_cpu(env);
 479     X86CPU *x86_cpu = X86_CPU(cpu);
 480     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 481     struct hax_tunnel *ht = vcpu->tunnel;
 482
 483     if (!hax_enabled()) {
 484         DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
 485         return 0;
 486     }
 487
 488     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 489         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 490         apic_poll_irq(x86_cpu->apic_state);
 491     }
 492
 493     /* After a vcpu is halted (either because it is an AP and has just been
 494      * reset, or because it has executed the HLT instruction), it will not be
 495      * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
 496      * for events that may change the halted state of this vcpu:
 497      *  a) Maskable interrupt, when RFLAGS.IF is 1;
 498      *     Note: env->eflags may not reflect the current RFLAGS state, because
 499      *           it is not updated after each hax_vcpu_run(). We cannot afford
 500      *           to fail to recognize any unhalt-by-maskable-interrupt event
 501      *           (in which case the vcpu will halt forever), and yet we cannot
 502      *           afford the overhead of hax_vcpu_sync_state(). The current
 503      *           solution is to err on the side of caution and have the HLT
 504      *           handler (see case HAX_EXIT_HLT below) unconditionally set the
 505      *           IF_MASK bit in env->eflags, which, in effect, disables the
 506      *           RFLAGS.IF check.
 507      *  b) NMI;
 508      *  c) INIT signal;
 509      *  d) SIPI signal.
 510      */
 511     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 512          (env->eflags & IF_MASK)) ||
 513         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 514         cpu->halted = 0;
 515     }
 516
 517     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 518         DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
 519                 cpu->cpu_index);
 520         do_cpu_init(x86_cpu);
 521         hax_vcpu_sync_state(env, 1);
 522     }
 523
 524     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 525         DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
 526                 cpu->cpu_index);
 527         hax_vcpu_sync_state(env, 0);
 528         do_cpu_sipi(x86_cpu);
 529         hax_vcpu_sync_state(env, 1);
 530     }
 531
 532     if (cpu->halted) {
 533         /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
 534          * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
 535          * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
 536          * until the vcpu is unhalted.
 537          */
 538         cpu->exception_index = EXCP_HLT;
 539         return 0;
 540     }
 541
 542     do {
 543         int hax_ret;
 544
 545         if (cpu->exit_request) {
 546             ret = 1;
 547             break;
 548         }
 549
 550         hax_vcpu_interrupt(env);
 551
 552         qemu_mutex_unlock_iothread();
 553         cpu_exec_start(cpu);
 554         hax_ret = hax_vcpu_run(vcpu);
 555         cpu_exec_end(cpu);
 556         qemu_mutex_lock_iothread();
 557
 558         /* Simply continue the vcpu_run if system call interrupted */
 559         if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
 560             DPRINTF("io window interrupted\n");
 561             continue;
 562         }
 563
 564         if (hax_ret < 0) {
 565             fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
 566             abort();
 567         }
 568         switch (ht->_exit_status) {
 569         case HAX_EXIT_IO:
 570             ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
 571                             ht->pio._direction,
 572                             ht->pio._size, ht->pio._count, vcpu->iobuf);
 573             break;
 574         case HAX_EXIT_FAST_MMIO:
 575             ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
 576             break;
 577         /* Guest state changed, currently only for shutdown */
 578         case HAX_EXIT_STATECHANGE:
 579             fprintf(stdout, "VCPU shutdown request\n");
 580             qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 581             hax_vcpu_sync_state(env, 0);
 582             ret = 1;
 583             break;
 584         case HAX_EXIT_UNKNOWN_VMEXIT:
 585             fprintf(stderr, "Unknown VMX exit %x from guest\n",
 586                     ht->_exit_reason);
 587             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 588             hax_vcpu_sync_state(env, 0);
 589             cpu_dump_state(cpu, stderr, 0);
 590             ret = -1;
 591             break;
 592         case HAX_EXIT_HLT:
 593             if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 594                 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 595                 /* hlt instruction with interrupt disabled is shutdown */
 596                 env->eflags |= IF_MASK;
 597                 cpu->halted = 1;
 598                 cpu->exception_index = EXCP_HLT;
 599                 ret = 1;
 600             }
 601             break;
 602         /* these situations will continue to hax module */
 603         case HAX_EXIT_INTERRUPT:
 604         case HAX_EXIT_PAUSED:
 605             break;
 606         case HAX_EXIT_MMIO:
 607             /* Should not happen on UG system */
 608             fprintf(stderr, "HAX: unsupported MMIO emulation\n");
 609             ret = -1;
 610             break;
 611         case HAX_EXIT_REAL:
 612             /* Should not happen on UG system */
 613             fprintf(stderr, "HAX: unimplemented real mode emulation\n");
 614             ret = -1;
 615             break;
 616         default:
 617             fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
 618             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 619             hax_vcpu_sync_state(env, 0);
 620             cpu_dump_state(cpu, stderr, 0);
 621             ret = 1;
 622             break;
 623         }
 624     } while (!ret);
 625
 626     if (cpu->exit_request) {
 627         cpu->exit_request = 0;
 628         cpu->exception_index = EXCP_INTERRUPT;
 629     }
 630     return ret < 0;
 631 }
 632
 633 static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 634 {
 635     CPUArchState *env = cpu->env_ptr;
 636
 637     hax_arch_get_registers(env);
 638     cpu->vcpu_dirty = true;
 639 }
 640
 641 void hax_cpu_synchronize_state(CPUState *cpu)
 642 {
 643     if (!cpu->vcpu_dirty) {
 644         run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
 645     }
 646 }
 647
 648 static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
 649                                               run_on_cpu_data arg)
 650 {
 651     CPUArchState *env = cpu->env_ptr;
 652
 653     hax_vcpu_sync_state(env, 1);
 654     cpu->vcpu_dirty = false;
 655 }
 656
 657 void hax_cpu_synchronize_post_reset(CPUState *cpu)
 658 {
 659     run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 660 }
 661
 662 static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 663 {
 664     CPUArchState *env = cpu->env_ptr;
 665
 666     hax_vcpu_sync_state(env, 1);
 667     cpu->vcpu_dirty = false;
 668 }
 669
 670 void hax_cpu_synchronize_post_init(CPUState *cpu)
 671 {
 672     run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 673 }
 674
 675 static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 676 {
 677     cpu->vcpu_dirty = true;
 678 }
 679
 680 void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
 681 {
 682     run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 683 }
 684
 685 int hax_smp_cpu_exec(CPUState *cpu)
 686 {
 687     CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
 688     int fatal;
 689     int ret;
 690
 691     while (1) {
 692         if (cpu->exception_index >= EXCP_INTERRUPT) {
 693             ret = cpu->exception_index;
 694             cpu->exception_index = -1;
 695             break;
 696         }
 697
 698         fatal = hax_vcpu_hax_exec(env);
 699
 700         if (fatal) {
 701             fprintf(stderr, "Unsupported HAX vcpu return\n");
 702             abort();
 703         }
 704     }
 705
 706     return ret;
 707 }
 708
 709 static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 710 {
 711     memset(lhs, 0, sizeof(struct segment_desc_t));
 712     lhs->selector = rhs->selector;
 713     lhs->base = rhs->base;
 714     lhs->limit = rhs->limit;
 715     lhs->type = 3;
 716     lhs->present = 1;
 717     lhs->dpl = 3;
 718     lhs->operand_size = 0;
 719     lhs->desc = 1;
 720     lhs->long_mode = 0;
 721     lhs->granularity = 0;
 722     lhs->available = 0;
 723 }
 724
 725 static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
 726 {
 727     lhs->selector = rhs->selector;
 728     lhs->base = rhs->base;
 729     lhs->limit = rhs->limit;
 730     lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
 731         | (rhs->present * DESC_P_MASK)
 732         | (rhs->dpl << DESC_DPL_SHIFT)
 733         | (rhs->operand_size << DESC_B_SHIFT)
 734         | (rhs->desc * DESC_S_MASK)
 735         | (rhs->long_mode << DESC_L_SHIFT)
 736         | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
 737 }
 738
 739 static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 740 {
 741     unsigned flags = rhs->flags;
 742
 743     memset(lhs, 0, sizeof(struct segment_desc_t));
 744     lhs->selector = rhs->selector;
 745     lhs->base = rhs->base;
 746     lhs->limit = rhs->limit;
 747     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
 748     lhs->present = (flags & DESC_P_MASK) != 0;
 749     lhs->dpl = rhs->selector & 3;
 750     lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
 751     lhs->desc = (flags & DESC_S_MASK) != 0;
 752     lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
 753     lhs->granularity = (flags & DESC_G_MASK) != 0;
 754     lhs->available = (flags & DESC_AVL_MASK) != 0;
 755 }
 756
 757 static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
 758 {
 759     target_ulong reg = *hax_reg;
 760
 761     if (set) {
 762         *hax_reg = *qemu_reg;
 763     } else {
 764         *qemu_reg = reg;
 765     }
 766 }
 767
 768 /* The sregs has been synced with HAX kernel already before this call */
 769 static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 770 {
 771     get_seg(&env->segs[R_CS], &sregs->_cs);
 772     get_seg(&env->segs[R_DS], &sregs->_ds);
 773     get_seg(&env->segs[R_ES], &sregs->_es);
 774     get_seg(&env->segs[R_FS], &sregs->_fs);
 775     get_seg(&env->segs[R_GS], &sregs->_gs);
 776     get_seg(&env->segs[R_SS], &sregs->_ss);
 777
 778     get_seg(&env->tr, &sregs->_tr);
 779     get_seg(&env->ldt, &sregs->_ldt);
 780     env->idt.limit = sregs->_idt.limit;
 781     env->idt.base = sregs->_idt.base;
 782     env->gdt.limit = sregs->_gdt.limit;
 783     env->gdt.base = sregs->_gdt.base;
 784     return 0;
 785 }
 786
 787 static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 788 {
 789     if ((env->eflags & VM_MASK)) {
 790         set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
 791         set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
 792         set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
 793         set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
 794         set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
 795         set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
 796     } else {
 797         set_seg(&sregs->_cs, &env->segs[R_CS]);
 798         set_seg(&sregs->_ds, &env->segs[R_DS]);
 799         set_seg(&sregs->_es, &env->segs[R_ES]);
 800         set_seg(&sregs->_fs, &env->segs[R_FS]);
 801         set_seg(&sregs->_gs, &env->segs[R_GS]);
 802         set_seg(&sregs->_ss, &env->segs[R_SS]);
 803
 804         if (env->cr[0] & CR0_PE_MASK) {
 805             /* force ss cpl to cs cpl */
 806             sregs->_ss.selector = (sregs->_ss.selector & ~3) |
 807                                   (sregs->_cs.selector & 3);
 808             sregs->_ss.dpl = sregs->_ss.selector & 3;
 809         }
 810     }
 811
 812     set_seg(&sregs->_tr, &env->tr);
 813     set_seg(&sregs->_ldt, &env->ldt);
 814     sregs->_idt.limit = env->idt.limit;
 815     sregs->_idt.base = env->idt.base;
 816     sregs->_gdt.limit = env->gdt.limit;
 817     sregs->_gdt.base = env->gdt.base;
 818     return 0;
 819 }
 820
 821 static int hax_sync_vcpu_register(CPUArchState *env, int set)
 822 {
 823     struct vcpu_state_t regs;
 824     int ret;
 825     memset(&regs, 0, sizeof(struct vcpu_state_t));
 826
 827     if (!set) {
 828         ret = hax_sync_vcpu_state(env, &regs, 0);
 829         if (ret < 0) {
 830             return -1;
 831         }
 832     }
 833
 834     /* generic register */
 835     hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
 836     hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
 837     hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
 838     hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
 839     hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
 840     hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
 841     hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
 842     hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
 843 #ifdef TARGET_X86_64
 844     hax_getput_reg(&regs._r8, &env->regs[8], set);
 845     hax_getput_reg(&regs._r9, &env->regs[9], set);
 846     hax_getput_reg(&regs._r10, &env->regs[10], set);
 847     hax_getput_reg(&regs._r11, &env->regs[11], set);
 848     hax_getput_reg(&regs._r12, &env->regs[12], set);
 849     hax_getput_reg(&regs._r13, &env->regs[13], set);
 850     hax_getput_reg(&regs._r14, &env->regs[14], set);
 851     hax_getput_reg(&regs._r15, &env->regs[15], set);
 852 #endif
 853     hax_getput_reg(&regs._rflags, &env->eflags, set);
 854     hax_getput_reg(&regs._rip, &env->eip, set);
 855
 856     if (set) {
 857         regs._cr0 = env->cr[0];
 858         regs._cr2 = env->cr[2];
 859         regs._cr3 = env->cr[3];
 860         regs._cr4 = env->cr[4];
 861         hax_set_segments(env, &regs);
 862     } else {
 863         env->cr[0] = regs._cr0;
 864         env->cr[2] = regs._cr2;
 865         env->cr[3] = regs._cr3;
 866         env->cr[4] = regs._cr4;
 867         hax_get_segments(env, &regs);
 868     }
 869
 870     if (set) {
 871         ret = hax_sync_vcpu_state(env, &regs, 1);
 872         if (ret < 0) {
 873             return -1;
 874         }
 875     }
 876     return 0;
 877 }
 878
 879 static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
 880                               uint64_t value)
 881 {
 882     item->entry = index;
 883     item->value = value;
 884 }
 885
 886 static int hax_get_msrs(CPUArchState *env)
 887 {
 888     struct hax_msr_data md;
 889     struct vmx_msr *msrs = md.entries;
 890     int ret, i, n;
 891
 892     n = 0;
 893     msrs[n++].entry = MSR_IA32_SYSENTER_CS;
 894     msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
 895     msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
 896     msrs[n++].entry = MSR_IA32_TSC;
 897 #ifdef TARGET_X86_64
 898     msrs[n++].entry = MSR_EFER;
 899     msrs[n++].entry = MSR_STAR;
 900     msrs[n++].entry = MSR_LSTAR;
 901     msrs[n++].entry = MSR_CSTAR;
 902     msrs[n++].entry = MSR_FMASK;
 903     msrs[n++].entry = MSR_KERNELGSBASE;
 904 #endif
 905     md.nr_msr = n;
 906     ret = hax_sync_msr(env, &md, 0);
 907     if (ret < 0) {
 908         return ret;
 909     }
 910
 911     for (i = 0; i < md.done; i++) {
 912         switch (msrs[i].entry) {
 913         case MSR_IA32_SYSENTER_CS:
 914             env->sysenter_cs = msrs[i].value;
 915             break;
 916         case MSR_IA32_SYSENTER_ESP:
 917             env->sysenter_esp = msrs[i].value;
 918             break;
 919         case MSR_IA32_SYSENTER_EIP:
 920             env->sysenter_eip = msrs[i].value;
 921             break;
 922         case MSR_IA32_TSC:
 923             env->tsc = msrs[i].value;
 924             break;
 925 #ifdef TARGET_X86_64
 926         case MSR_EFER:
 927             env->efer = msrs[i].value;
 928             break;
 929         case MSR_STAR:
 930             env->star = msrs[i].value;
 931             break;
 932         case MSR_LSTAR:
 933             env->lstar = msrs[i].value;
 934             break;
 935         case MSR_CSTAR:
 936             env->cstar = msrs[i].value;
 937             break;
 938         case MSR_FMASK:
 939             env->fmask = msrs[i].value;
 940             break;
 941         case MSR_KERNELGSBASE:
 942             env->kernelgsbase = msrs[i].value;
 943             break;
 944 #endif
 945         }
 946     }
 947
 948     return 0;
 949 }
 950
 951 static int hax_set_msrs(CPUArchState *env)
 952 {
 953     struct hax_msr_data md;
 954     struct vmx_msr *msrs;
 955     msrs = md.entries;
 956     int n = 0;
 957
 958     memset(&md, 0, sizeof(struct hax_msr_data));
 959     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
 960     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
 961     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
 962     hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
 963 #ifdef TARGET_X86_64
 964     hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
 965     hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
 966     hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
 967     hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
 968     hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
 969     hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
 970 #endif
 971     md.nr_msr = n;
 972     md.done = 0;
 973
 974     return hax_sync_msr(env, &md, 1);
 975 }
 976
 977 static int hax_get_fpu(CPUArchState *env)
 978 {
 979     struct fx_layout fpu;
 980     int i, ret;
 981
 982     ret = hax_sync_fpu(env, &fpu, 0);
 983     if (ret < 0) {
 984         return ret;
 985     }
 986
 987     env->fpstt = (fpu.fsw >> 11) & 7;
 988     env->fpus = fpu.fsw;
 989     env->fpuc = fpu.fcw;
 990     for (i = 0; i < 8; ++i) {
 991         env->fptags[i] = !((fpu.ftw >> i) & 1);
 992     }
 993     memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
 994
 995     for (i = 0; i < 8; i++) {
 996         env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
 997         env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
 998         if (CPU_NB_REGS > 8) {
 999             env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
1000             env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
1001         }
1002     }
1003     env->mxcsr = fpu.mxcsr;
1004
1005     return 0;
1006 }
1007
1008 static int hax_set_fpu(CPUArchState *env)
1009 {
1010     struct fx_layout fpu;
1011     int i;
1012
1013     memset(&fpu, 0, sizeof(fpu));
1014     fpu.fsw = env->fpus & ~(7 << 11);
1015     fpu.fsw |= (env->fpstt & 7) << 11;
1016     fpu.fcw = env->fpuc;
1017
1018     for (i = 0; i < 8; ++i) {
1019         fpu.ftw |= (!env->fptags[i]) << i;
1020     }
1021
1022     memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1023     for (i = 0; i < 8; i++) {
1024         stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1025         stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1026         if (CPU_NB_REGS > 8) {
1027             stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1028             stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1029         }
1030     }
1031
1032     fpu.mxcsr = env->mxcsr;
1033
1034     return hax_sync_fpu(env, &fpu, 1);
1035 }
1036
1037 static int hax_arch_get_registers(CPUArchState *env)
1038 {
1039     int ret;
1040
1041     ret = hax_sync_vcpu_register(env, 0);
1042     if (ret < 0) {
1043         return ret;
1044     }
1045
1046     ret = hax_get_fpu(env);
1047     if (ret < 0) {
1048         return ret;
1049     }
1050
1051     ret = hax_get_msrs(env);
1052     if (ret < 0) {
1053         return ret;
1054     }
1055
1056     x86_update_hflags(env);
1057     return 0;
1058 }
1059
1060 static int hax_arch_set_registers(CPUArchState *env)
1061 {
1062     int ret;
1063     ret = hax_sync_vcpu_register(env, 1);
1064
1065     if (ret < 0) {
1066         fprintf(stderr, "Failed to sync vcpu reg\n");
1067         return ret;
1068     }
1069     ret = hax_set_fpu(env);
1070     if (ret < 0) {
1071         fprintf(stderr, "FPU failed\n");
1072         return ret;
1073     }
1074     ret = hax_set_msrs(env);
1075     if (ret < 0) {
1076         fprintf(stderr, "MSR failed\n");
1077         return ret;
1078     }
1079
1080     return 0;
1081 }
1082
1083 static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1084 {
1085     if (hax_enabled()) {
1086         if (modified) {
1087             hax_arch_set_registers(env);
1088         } else {
1089             hax_arch_get_registers(env);
1090         }
1091     }
1092 }
1093
1094 /*
1095  * much simpler than kvm, at least in first stage because:
1096  * We don't need consider the device pass-through, we don't need
1097  * consider the framebuffer, and we may even remove the bios at all
1098  */
1099 int hax_sync_vcpus(void)
1100 {
1101     if (hax_enabled()) {
1102         CPUState *cpu;
1103
1104         cpu = first_cpu;
1105         if (!cpu) {
1106             return 0;
1107         }
1108
1109         for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1110             int ret;
1111
1112             ret = hax_arch_set_registers(cpu->env_ptr);
1113             if (ret < 0) {
1114                 return ret;
1115             }
1116         }
1117     }
1118
1119     return 0;
1120 }
1121
1122 void hax_reset_vcpu_state(void *opaque)
1123 {
1124     CPUState *cpu;
1125     for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1126         cpu->hax_vcpu->tunnel->user_event_pending = 0;
1127         cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1128     }
1129 }
1130
1131 static void hax_accel_class_init(ObjectClass *oc, void *data)
1132 {
1133     AccelClass *ac = ACCEL_CLASS(oc);
1134     ac->name = "HAX";
1135     ac->init_machine = hax_accel_init;
1136     ac->allowed = &hax_allowed;
1137 }
1138
1139 static const TypeInfo hax_accel_type = {
1140     .name = ACCEL_CLASS_NAME("hax"),
1141     .parent = TYPE_ACCEL,
1142     .class_init = hax_accel_class_init,
1143 };
1144
1145 static void hax_type_init(void)
1146 {
1147     type_register_static(&hax_accel_type);
1148 }
1149
1150 type_init(hax_type_init);