target/i386/hax-all.c

   1 /*
   2  * QEMU HAX support
   3  *
   4  * Copyright IBM, Corp. 2008
   5  *           Red Hat, Inc. 2008
   6  *
   7  * Authors:
   8  *  Anthony Liguori   <aliguori@us.ibm.com>
   9  *  Glauber Costa     <gcosta@redhat.com>
  10  *
  11  * Copyright (c) 2011 Intel Corporation
  12  *  Written by:
  13  *  Jiang Yunhong<yunhong.jiang@intel.com>
  14  *  Xin Xiaohui<xiaohui.xin@intel.com>
  15  *  Zhang Xiantao<xiantao.zhang@intel.com>
  16  *
  17  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  18  * See the COPYING file in the top-level directory.
  19  *
  20  */
  21
  22 /*
  23  * HAX common code for both windows and darwin
  24  */
  25
  26 #include "qemu/osdep.h"
  27 #include "cpu.h"
  28 #include "exec/address-spaces.h"
  29
  30 #include "qemu-common.h"
  31 #include "hax-i386.h"
  32 #include "sysemu/accel.h"
  33 #include "sysemu/reset.h"
  34 #include "sysemu/runstate.h"
  35 #include "hw/boards.h"
  36
  37 #include "hax-cpus.h"
  38
  39 #define DEBUG_HAX 0
  40
  41 #define DPRINTF(fmt, ...) \
  42     do { \
  43         if (DEBUG_HAX) { \
  44             fprintf(stdout, fmt, ## __VA_ARGS__); \
  45         } \
  46     } while (0)
  47
  48 /* Current version */
  49 const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
  50 /* Minimum HAX kernel version */
  51 const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
  52
  53 static bool hax_allowed;
  54
  55 struct hax_state hax_global;
  56
  57 static void hax_vcpu_sync_state(CPUArchState *env, int modified);
  58 static int hax_arch_get_registers(CPUArchState *env);
  59
  60 int hax_enabled(void)
  61 {
  62     return hax_allowed;
  63 }
  64
  65 int valid_hax_tunnel_size(uint16_t size)
  66 {
  67     return size >= sizeof(struct hax_tunnel);
  68 }
  69
  70 hax_fd hax_vcpu_get_fd(CPUArchState *env)
  71 {
  72     struct hax_vcpu_state *vcpu = env_cpu(env)->hax_vcpu;
  73     if (!vcpu) {
  74         return HAX_INVALID_FD;
  75     }
  76     return vcpu->fd;
  77 }
  78
  79 static int hax_get_capability(struct hax_state *hax)
  80 {
  81     int ret;
  82     struct hax_capabilityinfo capinfo, *cap = &capinfo;
  83
  84     ret = hax_capability(hax, cap);
  85     if (ret) {
  86         return ret;
  87     }
  88
  89     if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
  90         if (cap->winfo & HAX_CAP_FAILREASON_VT) {
  91             DPRINTF
  92                 ("VTX feature is not enabled, HAX driver will not work.\n");
  93         } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
  94             DPRINTF
  95                 ("NX feature is not enabled, HAX driver will not work.\n");
  96         }
  97         return -ENXIO;
  98
  99     }
 100
 101     if (!(cap->winfo & HAX_CAP_UG)) {
 102         fprintf(stderr, "UG mode is not supported by the hardware.\n");
 103         return -ENOTSUP;
 104     }
 105
 106     hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK);
 107
 108     if (cap->wstatus & HAX_CAP_MEMQUOTA) {
 109         if (cap->mem_quota < hax->mem_quota) {
 110             fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
 111             return -ENOSPC;
 112         }
 113     }
 114     return 0;
 115 }
 116
 117 static int hax_version_support(struct hax_state *hax)
 118 {
 119     int ret;
 120     struct hax_module_version version;
 121
 122     ret = hax_mod_version(hax, &version);
 123     if (ret < 0) {
 124         return 0;
 125     }
 126
 127     if (hax_min_version > version.cur_version) {
 128         fprintf(stderr, "Incompatible HAX module version %d,",
 129                 version.cur_version);
 130         fprintf(stderr, "requires minimum version %d\n", hax_min_version);
 131         return 0;
 132     }
 133     if (hax_cur_version < version.compat_version) {
 134         fprintf(stderr, "Incompatible QEMU HAX API version %x,",
 135                 hax_cur_version);
 136         fprintf(stderr, "requires minimum HAX API version %x\n",
 137                 version.compat_version);
 138         return 0;
 139     }
 140
 141     return 1;
 142 }
 143
 144 int hax_vcpu_create(int id)
 145 {
 146     struct hax_vcpu_state *vcpu = NULL;
 147     int ret;
 148
 149     if (!hax_global.vm) {
 150         fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
 151         return -1;
 152     }
 153
 154     if (hax_global.vm->vcpus[id]) {
 155         fprintf(stderr, "vcpu %x allocated already\n", id);
 156         return 0;
 157     }
 158
 159     vcpu = g_new0(struct hax_vcpu_state, 1);
 160
 161     ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 162     if (ret) {
 163         fprintf(stderr, "Failed to create vcpu %x\n", id);
 164         goto error;
 165     }
 166
 167     vcpu->vcpu_id = id;
 168     vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
 169     if (hax_invalid_fd(vcpu->fd)) {
 170         fprintf(stderr, "Failed to open the vcpu\n");
 171         ret = -ENODEV;
 172         goto error;
 173     }
 174
 175     hax_global.vm->vcpus[id] = vcpu;
 176
 177     ret = hax_host_setup_vcpu_channel(vcpu);
 178     if (ret) {
 179         fprintf(stderr, "Invalid hax tunnel size\n");
 180         ret = -EINVAL;
 181         goto error;
 182     }
 183     return 0;
 184
 185   error:
 186     /* vcpu and tunnel will be closed automatically */
 187     if (vcpu && !hax_invalid_fd(vcpu->fd)) {
 188         hax_close_fd(vcpu->fd);
 189     }
 190
 191     hax_global.vm->vcpus[id] = NULL;
 192     g_free(vcpu);
 193     return -1;
 194 }
 195
 196 int hax_vcpu_destroy(CPUState *cpu)
 197 {
 198     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 199
 200     if (!hax_global.vm) {
 201         fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
 202         return -1;
 203     }
 204
 205     if (!vcpu) {
 206         return 0;
 207     }
 208
 209     /*
 210      * 1. The hax_tunnel is also destroyed when vcpu is destroyed
 211      * 2. close fd will cause hax module vcpu be cleaned
 212      */
 213     hax_close_fd(vcpu->fd);
 214     hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
 215     g_free(vcpu);
 216     return 0;
 217 }
 218
 219 int hax_init_vcpu(CPUState *cpu)
 220 {
 221     int ret;
 222
 223     ret = hax_vcpu_create(cpu->cpu_index);
 224     if (ret < 0) {
 225         fprintf(stderr, "Failed to create HAX vcpu\n");
 226         exit(-1);
 227     }
 228
 229     cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
 230     cpu->vcpu_dirty = true;
 231     qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
 232
 233     return ret;
 234 }
 235
 236 struct hax_vm *hax_vm_create(struct hax_state *hax, int max_cpus)
 237 {
 238     struct hax_vm *vm;
 239     int vm_id = 0, ret, i;
 240
 241     if (hax_invalid_fd(hax->fd)) {
 242         return NULL;
 243     }
 244
 245     if (hax->vm) {
 246         return hax->vm;
 247     }
 248
 249     if (max_cpus > HAX_MAX_VCPU) {
 250         fprintf(stderr, "Maximum VCPU number QEMU supported is %d\n", HAX_MAX_VCPU);
 251         return NULL;
 252     }
 253
 254     vm = g_new0(struct hax_vm, 1);
 255
 256     ret = hax_host_create_vm(hax, &vm_id);
 257     if (ret) {
 258         fprintf(stderr, "Failed to create vm %x\n", ret);
 259         goto error;
 260     }
 261     vm->id = vm_id;
 262     vm->fd = hax_host_open_vm(hax, vm_id);
 263     if (hax_invalid_fd(vm->fd)) {
 264         fprintf(stderr, "Failed to open vm %d\n", vm_id);
 265         goto error;
 266     }
 267
 268     vm->numvcpus = max_cpus;
 269     vm->vcpus = g_new0(struct hax_vcpu_state *, vm->numvcpus);
 270     for (i = 0; i < vm->numvcpus; i++) {
 271         vm->vcpus[i] = NULL;
 272     }
 273
 274     hax->vm = vm;
 275     return vm;
 276
 277   error:
 278     g_free(vm);
 279     hax->vm = NULL;
 280     return NULL;
 281 }
 282
 283 int hax_vm_destroy(struct hax_vm *vm)
 284 {
 285     int i;
 286
 287     for (i = 0; i < vm->numvcpus; i++)
 288         if (vm->vcpus[i]) {
 289             fprintf(stderr, "VCPU should be cleaned before vm clean\n");
 290             return -1;
 291         }
 292     hax_close_fd(vm->fd);
 293     vm->numvcpus = 0;
 294     g_free(vm->vcpus);
 295     g_free(vm);
 296     hax_global.vm = NULL;
 297     return 0;
 298 }
 299
 300 static void hax_handle_interrupt(CPUState *cpu, int mask)
 301 {
 302     cpu->interrupt_request |= mask;
 303
 304     if (!qemu_cpu_is_self(cpu)) {
 305         qemu_cpu_kick(cpu);
 306     }
 307 }
 308
 309 static int hax_init(ram_addr_t ram_size, int max_cpus)
 310 {
 311     struct hax_state *hax = NULL;
 312     struct hax_qemu_version qversion;
 313     int ret;
 314
 315     hax = &hax_global;
 316
 317     memset(hax, 0, sizeof(struct hax_state));
 318     hax->mem_quota = ram_size;
 319
 320     hax->fd = hax_mod_open();
 321     if (hax_invalid_fd(hax->fd)) {
 322         hax->fd = 0;
 323         ret = -ENODEV;
 324         goto error;
 325     }
 326
 327     ret = hax_get_capability(hax);
 328
 329     if (ret) {
 330         if (ret != -ENOSPC) {
 331             ret = -EINVAL;
 332         }
 333         goto error;
 334     }
 335
 336     if (!hax_version_support(hax)) {
 337         ret = -EINVAL;
 338         goto error;
 339     }
 340
 341     hax->vm = hax_vm_create(hax, max_cpus);
 342     if (!hax->vm) {
 343         fprintf(stderr, "Failed to create HAX VM\n");
 344         ret = -EINVAL;
 345         goto error;
 346     }
 347
 348     hax_memory_init();
 349
 350     qversion.cur_version = hax_cur_version;
 351     qversion.min_version = hax_min_version;
 352     hax_notify_qemu_version(hax->vm->fd, &qversion);
 353     cpu_interrupt_handler = hax_handle_interrupt;
 354
 355     return ret;
 356   error:
 357     if (hax->vm) {
 358         hax_vm_destroy(hax->vm);
 359     }
 360     if (hax->fd) {
 361         hax_mod_close(hax);
 362     }
 363
 364     return ret;
 365 }
 366
 367 static int hax_accel_init(MachineState *ms)
 368 {
 369     int ret = hax_init(ms->ram_size, (int)ms->smp.max_cpus);
 370
 371     if (ret && (ret != -ENOSPC)) {
 372         fprintf(stderr, "No accelerator found.\n");
 373     } else {
 374         fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
 375                 !ret ? "working" : "not working",
 376                 !ret ? "fast virt" : "emulation");
 377     }
 378     if (ret == 0) {
 379         cpus_register_accel(&hax_cpus);
 380     }
 381     return ret;
 382 }
 383
 384 static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
 385 {
 386     if (hft->direction < 2) {
 387         cpu_physical_memory_rw(hft->gpa, &hft->value, hft->size,
 388                                hft->direction);
 389     } else {
 390         /*
 391          * HAX API v4 supports transferring data between two MMIO addresses,
 392          * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
 393          *  hft->direction == 2: gpa ==> gpa2
 394          */
 395         uint64_t value;
 396         cpu_physical_memory_read(hft->gpa, &value, hft->size);
 397         cpu_physical_memory_write(hft->gpa2, &value, hft->size);
 398     }
 399
 400     return 0;
 401 }
 402
 403 static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
 404                          int direction, int size, int count, void *buffer)
 405 {
 406     uint8_t *ptr;
 407     int i;
 408     MemTxAttrs attrs = { 0 };
 409
 410     if (!df) {
 411         ptr = (uint8_t *) buffer;
 412     } else {
 413         ptr = buffer + size * count - size;
 414     }
 415     for (i = 0; i < count; i++) {
 416         address_space_rw(&address_space_io, port, attrs,
 417                          ptr, size, direction == HAX_EXIT_IO_OUT);
 418         if (!df) {
 419             ptr += size;
 420         } else {
 421             ptr -= size;
 422         }
 423     }
 424
 425     return 0;
 426 }
 427
 428 static int hax_vcpu_interrupt(CPUArchState *env)
 429 {
 430     CPUState *cpu = env_cpu(env);
 431     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 432     struct hax_tunnel *ht = vcpu->tunnel;
 433
 434     /*
 435      * Try to inject an interrupt if the guest can accept it
 436      * Unlike KVM, HAX kernel check for the eflags, instead of qemu
 437      */
 438     if (ht->ready_for_interrupt_injection &&
 439         (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 440         int irq;
 441
 442         irq = cpu_get_pic_interrupt(env);
 443         if (irq >= 0) {
 444             hax_inject_interrupt(env, irq);
 445             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 446         }
 447     }
 448
 449     /* If we have an interrupt but the guest is not ready to receive an
 450      * interrupt, request an interrupt window exit.  This will
 451      * cause a return to userspace as soon as the guest is ready to
 452      * receive interrupts. */
 453     if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 454         ht->request_interrupt_window = 1;
 455     } else {
 456         ht->request_interrupt_window = 0;
 457     }
 458     return 0;
 459 }
 460
 461 void hax_raise_event(CPUState *cpu)
 462 {
 463     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 464
 465     if (!vcpu) {
 466         return;
 467     }
 468     vcpu->tunnel->user_event_pending = 1;
 469 }
 470
 471 /*
 472  * Ask hax kernel module to run the CPU for us till:
 473  * 1. Guest crash or shutdown
 474  * 2. Need QEMU's emulation like guest execute MMIO instruction
 475  * 3. Guest execute HLT
 476  * 4. QEMU have Signal/event pending
 477  * 5. An unknown VMX exit happens
 478  */
 479 static int hax_vcpu_hax_exec(CPUArchState *env)
 480 {
 481     int ret = 0;
 482     CPUState *cpu = env_cpu(env);
 483     X86CPU *x86_cpu = X86_CPU(cpu);
 484     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 485     struct hax_tunnel *ht = vcpu->tunnel;
 486
 487     if (!hax_enabled()) {
 488         DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
 489         return 0;
 490     }
 491
 492     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 493         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 494         apic_poll_irq(x86_cpu->apic_state);
 495     }
 496
 497     /* After a vcpu is halted (either because it is an AP and has just been
 498      * reset, or because it has executed the HLT instruction), it will not be
 499      * run (hax_vcpu_run()) until it is unhalted. The next few if blocks check
 500      * for events that may change the halted state of this vcpu:
 501      *  a) Maskable interrupt, when RFLAGS.IF is 1;
 502      *     Note: env->eflags may not reflect the current RFLAGS state, because
 503      *           it is not updated after each hax_vcpu_run(). We cannot afford
 504      *           to fail to recognize any unhalt-by-maskable-interrupt event
 505      *           (in which case the vcpu will halt forever), and yet we cannot
 506      *           afford the overhead of hax_vcpu_sync_state(). The current
 507      *           solution is to err on the side of caution and have the HLT
 508      *           handler (see case HAX_EXIT_HLT below) unconditionally set the
 509      *           IF_MASK bit in env->eflags, which, in effect, disables the
 510      *           RFLAGS.IF check.
 511      *  b) NMI;
 512      *  c) INIT signal;
 513      *  d) SIPI signal.
 514      */
 515     if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 516          (env->eflags & IF_MASK)) ||
 517         (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 518         cpu->halted = 0;
 519     }
 520
 521     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 522         DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
 523                 cpu->cpu_index);
 524         do_cpu_init(x86_cpu);
 525         hax_vcpu_sync_state(env, 1);
 526     }
 527
 528     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 529         DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
 530                 cpu->cpu_index);
 531         hax_vcpu_sync_state(env, 0);
 532         do_cpu_sipi(x86_cpu);
 533         hax_vcpu_sync_state(env, 1);
 534     }
 535
 536     if (cpu->halted) {
 537         /* If this vcpu is halted, we must not ask HAXM to run it. Instead, we
 538          * break out of hax_smp_cpu_exec() as if this vcpu had executed HLT.
 539          * That way, this vcpu thread will be trapped in qemu_wait_io_event(),
 540          * until the vcpu is unhalted.
 541          */
 542         cpu->exception_index = EXCP_HLT;
 543         return 0;
 544     }
 545
 546     do {
 547         int hax_ret;
 548
 549         if (cpu->exit_request) {
 550             ret = 1;
 551             break;
 552         }
 553
 554         hax_vcpu_interrupt(env);
 555
 556         qemu_mutex_unlock_iothread();
 557         cpu_exec_start(cpu);
 558         hax_ret = hax_vcpu_run(vcpu);
 559         cpu_exec_end(cpu);
 560         qemu_mutex_lock_iothread();
 561
 562         /* Simply continue the vcpu_run if system call interrupted */
 563         if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
 564             DPRINTF("io window interrupted\n");
 565             continue;
 566         }
 567
 568         if (hax_ret < 0) {
 569             fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
 570             abort();
 571         }
 572         switch (ht->_exit_status) {
 573         case HAX_EXIT_IO:
 574             ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
 575                             ht->pio._direction,
 576                             ht->pio._size, ht->pio._count, vcpu->iobuf);
 577             break;
 578         case HAX_EXIT_FAST_MMIO:
 579             ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
 580             break;
 581         /* Guest state changed, currently only for shutdown */
 582         case HAX_EXIT_STATECHANGE:
 583             fprintf(stdout, "VCPU shutdown request\n");
 584             qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN);
 585             hax_vcpu_sync_state(env, 0);
 586             ret = 1;
 587             break;
 588         case HAX_EXIT_UNKNOWN_VMEXIT:
 589             fprintf(stderr, "Unknown VMX exit %x from guest\n",
 590                     ht->_exit_reason);
 591             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 592             hax_vcpu_sync_state(env, 0);
 593             cpu_dump_state(cpu, stderr, 0);
 594             ret = -1;
 595             break;
 596         case HAX_EXIT_HLT:
 597             if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 598                 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 599                 /* hlt instruction with interrupt disabled is shutdown */
 600                 env->eflags |= IF_MASK;
 601                 cpu->halted = 1;
 602                 cpu->exception_index = EXCP_HLT;
 603                 ret = 1;
 604             }
 605             break;
 606         /* these situations will continue to hax module */
 607         case HAX_EXIT_INTERRUPT:
 608         case HAX_EXIT_PAUSED:
 609             break;
 610         case HAX_EXIT_MMIO:
 611             /* Should not happen on UG system */
 612             fprintf(stderr, "HAX: unsupported MMIO emulation\n");
 613             ret = -1;
 614             break;
 615         case HAX_EXIT_REAL:
 616             /* Should not happen on UG system */
 617             fprintf(stderr, "HAX: unimplemented real mode emulation\n");
 618             ret = -1;
 619             break;
 620         default:
 621             fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
 622             qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET);
 623             hax_vcpu_sync_state(env, 0);
 624             cpu_dump_state(cpu, stderr, 0);
 625             ret = 1;
 626             break;
 627         }
 628     } while (!ret);
 629
 630     if (cpu->exit_request) {
 631         cpu->exit_request = 0;
 632         cpu->exception_index = EXCP_INTERRUPT;
 633     }
 634     return ret < 0;
 635 }
 636
 637 static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 638 {
 639     CPUArchState *env = cpu->env_ptr;
 640
 641     hax_arch_get_registers(env);
 642     cpu->vcpu_dirty = true;
 643 }
 644
 645 void hax_cpu_synchronize_state(CPUState *cpu)
 646 {
 647     if (!cpu->vcpu_dirty) {
 648         run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
 649     }
 650 }
 651
 652 static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
 653                                               run_on_cpu_data arg)
 654 {
 655     CPUArchState *env = cpu->env_ptr;
 656
 657     hax_vcpu_sync_state(env, 1);
 658     cpu->vcpu_dirty = false;
 659 }
 660
 661 void hax_cpu_synchronize_post_reset(CPUState *cpu)
 662 {
 663     run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 664 }
 665
 666 static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 667 {
 668     CPUArchState *env = cpu->env_ptr;
 669
 670     hax_vcpu_sync_state(env, 1);
 671     cpu->vcpu_dirty = false;
 672 }
 673
 674 void hax_cpu_synchronize_post_init(CPUState *cpu)
 675 {
 676     run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 677 }
 678
 679 static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg)
 680 {
 681     cpu->vcpu_dirty = true;
 682 }
 683
 684 void hax_cpu_synchronize_pre_loadvm(CPUState *cpu)
 685 {
 686     run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
 687 }
 688
 689 int hax_smp_cpu_exec(CPUState *cpu)
 690 {
 691     CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
 692     int fatal;
 693     int ret;
 694
 695     while (1) {
 696         if (cpu->exception_index >= EXCP_INTERRUPT) {
 697             ret = cpu->exception_index;
 698             cpu->exception_index = -1;
 699             break;
 700         }
 701
 702         fatal = hax_vcpu_hax_exec(env);
 703
 704         if (fatal) {
 705             fprintf(stderr, "Unsupported HAX vcpu return\n");
 706             abort();
 707         }
 708     }
 709
 710     return ret;
 711 }
 712
 713 static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 714 {
 715     memset(lhs, 0, sizeof(struct segment_desc_t));
 716     lhs->selector = rhs->selector;
 717     lhs->base = rhs->base;
 718     lhs->limit = rhs->limit;
 719     lhs->type = 3;
 720     lhs->present = 1;
 721     lhs->dpl = 3;
 722     lhs->operand_size = 0;
 723     lhs->desc = 1;
 724     lhs->long_mode = 0;
 725     lhs->granularity = 0;
 726     lhs->available = 0;
 727 }
 728
 729 static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
 730 {
 731     lhs->selector = rhs->selector;
 732     lhs->base = rhs->base;
 733     lhs->limit = rhs->limit;
 734     lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
 735         | (rhs->present * DESC_P_MASK)
 736         | (rhs->dpl << DESC_DPL_SHIFT)
 737         | (rhs->operand_size << DESC_B_SHIFT)
 738         | (rhs->desc * DESC_S_MASK)
 739         | (rhs->long_mode << DESC_L_SHIFT)
 740         | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
 741 }
 742
 743 static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 744 {
 745     unsigned flags = rhs->flags;
 746
 747     memset(lhs, 0, sizeof(struct segment_desc_t));
 748     lhs->selector = rhs->selector;
 749     lhs->base = rhs->base;
 750     lhs->limit = rhs->limit;
 751     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
 752     lhs->present = (flags & DESC_P_MASK) != 0;
 753     lhs->dpl = rhs->selector & 3;
 754     lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
 755     lhs->desc = (flags & DESC_S_MASK) != 0;
 756     lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
 757     lhs->granularity = (flags & DESC_G_MASK) != 0;
 758     lhs->available = (flags & DESC_AVL_MASK) != 0;
 759 }
 760
 761 static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
 762 {
 763     target_ulong reg = *hax_reg;
 764
 765     if (set) {
 766         *hax_reg = *qemu_reg;
 767     } else {
 768         *qemu_reg = reg;
 769     }
 770 }
 771
 772 /* The sregs has been synced with HAX kernel already before this call */
 773 static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 774 {
 775     get_seg(&env->segs[R_CS], &sregs->_cs);
 776     get_seg(&env->segs[R_DS], &sregs->_ds);
 777     get_seg(&env->segs[R_ES], &sregs->_es);
 778     get_seg(&env->segs[R_FS], &sregs->_fs);
 779     get_seg(&env->segs[R_GS], &sregs->_gs);
 780     get_seg(&env->segs[R_SS], &sregs->_ss);
 781
 782     get_seg(&env->tr, &sregs->_tr);
 783     get_seg(&env->ldt, &sregs->_ldt);
 784     env->idt.limit = sregs->_idt.limit;
 785     env->idt.base = sregs->_idt.base;
 786     env->gdt.limit = sregs->_gdt.limit;
 787     env->gdt.base = sregs->_gdt.base;
 788     return 0;
 789 }
 790
 791 static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 792 {
 793     if ((env->eflags & VM_MASK)) {
 794         set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
 795         set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
 796         set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
 797         set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
 798         set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
 799         set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
 800     } else {
 801         set_seg(&sregs->_cs, &env->segs[R_CS]);
 802         set_seg(&sregs->_ds, &env->segs[R_DS]);
 803         set_seg(&sregs->_es, &env->segs[R_ES]);
 804         set_seg(&sregs->_fs, &env->segs[R_FS]);
 805         set_seg(&sregs->_gs, &env->segs[R_GS]);
 806         set_seg(&sregs->_ss, &env->segs[R_SS]);
 807
 808         if (env->cr[0] & CR0_PE_MASK) {
 809             /* force ss cpl to cs cpl */
 810             sregs->_ss.selector = (sregs->_ss.selector & ~3) |
 811                                   (sregs->_cs.selector & 3);
 812             sregs->_ss.dpl = sregs->_ss.selector & 3;
 813         }
 814     }
 815
 816     set_seg(&sregs->_tr, &env->tr);
 817     set_seg(&sregs->_ldt, &env->ldt);
 818     sregs->_idt.limit = env->idt.limit;
 819     sregs->_idt.base = env->idt.base;
 820     sregs->_gdt.limit = env->gdt.limit;
 821     sregs->_gdt.base = env->gdt.base;
 822     return 0;
 823 }
 824
 825 static int hax_sync_vcpu_register(CPUArchState *env, int set)
 826 {
 827     struct vcpu_state_t regs;
 828     int ret;
 829     memset(&regs, 0, sizeof(struct vcpu_state_t));
 830
 831     if (!set) {
 832         ret = hax_sync_vcpu_state(env, &regs, 0);
 833         if (ret < 0) {
 834             return -1;
 835         }
 836     }
 837
 838     /* generic register */
 839     hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
 840     hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
 841     hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
 842     hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
 843     hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
 844     hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
 845     hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
 846     hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
 847 #ifdef TARGET_X86_64
 848     hax_getput_reg(&regs._r8, &env->regs[8], set);
 849     hax_getput_reg(&regs._r9, &env->regs[9], set);
 850     hax_getput_reg(&regs._r10, &env->regs[10], set);
 851     hax_getput_reg(&regs._r11, &env->regs[11], set);
 852     hax_getput_reg(&regs._r12, &env->regs[12], set);
 853     hax_getput_reg(&regs._r13, &env->regs[13], set);
 854     hax_getput_reg(&regs._r14, &env->regs[14], set);
 855     hax_getput_reg(&regs._r15, &env->regs[15], set);
 856 #endif
 857     hax_getput_reg(&regs._rflags, &env->eflags, set);
 858     hax_getput_reg(&regs._rip, &env->eip, set);
 859
 860     if (set) {
 861         regs._cr0 = env->cr[0];
 862         regs._cr2 = env->cr[2];
 863         regs._cr3 = env->cr[3];
 864         regs._cr4 = env->cr[4];
 865         hax_set_segments(env, &regs);
 866     } else {
 867         env->cr[0] = regs._cr0;
 868         env->cr[2] = regs._cr2;
 869         env->cr[3] = regs._cr3;
 870         env->cr[4] = regs._cr4;
 871         hax_get_segments(env, &regs);
 872     }
 873
 874     if (set) {
 875         ret = hax_sync_vcpu_state(env, &regs, 1);
 876         if (ret < 0) {
 877             return -1;
 878         }
 879     }
 880     return 0;
 881 }
 882
 883 static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
 884                               uint64_t value)
 885 {
 886     item->entry = index;
 887     item->value = value;
 888 }
 889
 890 static int hax_get_msrs(CPUArchState *env)
 891 {
 892     struct hax_msr_data md;
 893     struct vmx_msr *msrs = md.entries;
 894     int ret, i, n;
 895
 896     n = 0;
 897     msrs[n++].entry = MSR_IA32_SYSENTER_CS;
 898     msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
 899     msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
 900     msrs[n++].entry = MSR_IA32_TSC;
 901 #ifdef TARGET_X86_64
 902     msrs[n++].entry = MSR_EFER;
 903     msrs[n++].entry = MSR_STAR;
 904     msrs[n++].entry = MSR_LSTAR;
 905     msrs[n++].entry = MSR_CSTAR;
 906     msrs[n++].entry = MSR_FMASK;
 907     msrs[n++].entry = MSR_KERNELGSBASE;
 908 #endif
 909     md.nr_msr = n;
 910     ret = hax_sync_msr(env, &md, 0);
 911     if (ret < 0) {
 912         return ret;
 913     }
 914
 915     for (i = 0; i < md.done; i++) {
 916         switch (msrs[i].entry) {
 917         case MSR_IA32_SYSENTER_CS:
 918             env->sysenter_cs = msrs[i].value;
 919             break;
 920         case MSR_IA32_SYSENTER_ESP:
 921             env->sysenter_esp = msrs[i].value;
 922             break;
 923         case MSR_IA32_SYSENTER_EIP:
 924             env->sysenter_eip = msrs[i].value;
 925             break;
 926         case MSR_IA32_TSC:
 927             env->tsc = msrs[i].value;
 928             break;
 929 #ifdef TARGET_X86_64
 930         case MSR_EFER:
 931             env->efer = msrs[i].value;
 932             break;
 933         case MSR_STAR:
 934             env->star = msrs[i].value;
 935             break;
 936         case MSR_LSTAR:
 937             env->lstar = msrs[i].value;
 938             break;
 939         case MSR_CSTAR:
 940             env->cstar = msrs[i].value;
 941             break;
 942         case MSR_FMASK:
 943             env->fmask = msrs[i].value;
 944             break;
 945         case MSR_KERNELGSBASE:
 946             env->kernelgsbase = msrs[i].value;
 947             break;
 948 #endif
 949         }
 950     }
 951
 952     return 0;
 953 }
 954
 955 static int hax_set_msrs(CPUArchState *env)
 956 {
 957     struct hax_msr_data md;
 958     struct vmx_msr *msrs;
 959     msrs = md.entries;
 960     int n = 0;
 961
 962     memset(&md, 0, sizeof(struct hax_msr_data));
 963     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
 964     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
 965     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
 966     hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
 967 #ifdef TARGET_X86_64
 968     hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
 969     hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
 970     hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
 971     hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
 972     hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
 973     hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
 974 #endif
 975     md.nr_msr = n;
 976     md.done = 0;
 977
 978     return hax_sync_msr(env, &md, 1);
 979 }
 980
 981 static int hax_get_fpu(CPUArchState *env)
 982 {
 983     struct fx_layout fpu;
 984     int i, ret;
 985
 986     ret = hax_sync_fpu(env, &fpu, 0);
 987     if (ret < 0) {
 988         return ret;
 989     }
 990
 991     env->fpstt = (fpu.fsw >> 11) & 7;
 992     env->fpus = fpu.fsw;
 993     env->fpuc = fpu.fcw;
 994     for (i = 0; i < 8; ++i) {
 995         env->fptags[i] = !((fpu.ftw >> i) & 1);
 996     }
 997     memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
 998
 999     for (i = 0; i < 8; i++) {
1000         env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
1001         env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
1002         if (CPU_NB_REGS > 8) {
1003             env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
1004             env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
1005         }
1006     }
1007     env->mxcsr = fpu.mxcsr;
1008
1009     return 0;
1010 }
1011
1012 static int hax_set_fpu(CPUArchState *env)
1013 {
1014     struct fx_layout fpu;
1015     int i;
1016
1017     memset(&fpu, 0, sizeof(fpu));
1018     fpu.fsw = env->fpus & ~(7 << 11);
1019     fpu.fsw |= (env->fpstt & 7) << 11;
1020     fpu.fcw = env->fpuc;
1021
1022     for (i = 0; i < 8; ++i) {
1023         fpu.ftw |= (!env->fptags[i]) << i;
1024     }
1025
1026     memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1027     for (i = 0; i < 8; i++) {
1028         stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1029         stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1030         if (CPU_NB_REGS > 8) {
1031             stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1032             stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1033         }
1034     }
1035
1036     fpu.mxcsr = env->mxcsr;
1037
1038     return hax_sync_fpu(env, &fpu, 1);
1039 }
1040
1041 static int hax_arch_get_registers(CPUArchState *env)
1042 {
1043     int ret;
1044
1045     ret = hax_sync_vcpu_register(env, 0);
1046     if (ret < 0) {
1047         return ret;
1048     }
1049
1050     ret = hax_get_fpu(env);
1051     if (ret < 0) {
1052         return ret;
1053     }
1054
1055     ret = hax_get_msrs(env);
1056     if (ret < 0) {
1057         return ret;
1058     }
1059
1060     x86_update_hflags(env);
1061     return 0;
1062 }
1063
1064 static int hax_arch_set_registers(CPUArchState *env)
1065 {
1066     int ret;
1067     ret = hax_sync_vcpu_register(env, 1);
1068
1069     if (ret < 0) {
1070         fprintf(stderr, "Failed to sync vcpu reg\n");
1071         return ret;
1072     }
1073     ret = hax_set_fpu(env);
1074     if (ret < 0) {
1075         fprintf(stderr, "FPU failed\n");
1076         return ret;
1077     }
1078     ret = hax_set_msrs(env);
1079     if (ret < 0) {
1080         fprintf(stderr, "MSR failed\n");
1081         return ret;
1082     }
1083
1084     return 0;
1085 }
1086
1087 static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1088 {
1089     if (hax_enabled()) {
1090         if (modified) {
1091             hax_arch_set_registers(env);
1092         } else {
1093             hax_arch_get_registers(env);
1094         }
1095     }
1096 }
1097
1098 /*
1099  * much simpler than kvm, at least in first stage because:
1100  * We don't need consider the device pass-through, we don't need
1101  * consider the framebuffer, and we may even remove the bios at all
1102  */
1103 int hax_sync_vcpus(void)
1104 {
1105     if (hax_enabled()) {
1106         CPUState *cpu;
1107
1108         cpu = first_cpu;
1109         if (!cpu) {
1110             return 0;
1111         }
1112
1113         for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1114             int ret;
1115
1116             ret = hax_arch_set_registers(cpu->env_ptr);
1117             if (ret < 0) {
1118                 return ret;
1119             }
1120         }
1121     }
1122
1123     return 0;
1124 }
1125
1126 void hax_reset_vcpu_state(void *opaque)
1127 {
1128     CPUState *cpu;
1129     for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1130         cpu->hax_vcpu->tunnel->user_event_pending = 0;
1131         cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1132     }
1133 }
1134
1135 static void hax_accel_class_init(ObjectClass *oc, void *data)
1136 {
1137     AccelClass *ac = ACCEL_CLASS(oc);
1138     ac->name = "HAX";
1139     ac->init_machine = hax_accel_init;
1140     ac->allowed = &hax_allowed;
1141 }
1142
1143 static const TypeInfo hax_accel_type = {
1144     .name = ACCEL_CLASS_NAME("hax"),
1145     .parent = TYPE_ACCEL,
1146     .class_init = hax_accel_class_init,
1147 };
1148
1149 static void hax_type_init(void)
1150 {
1151     type_register_static(&hax_accel_type);
1152 }
1153
1154 type_init(hax_type_init);