target/i386/hax-all.c

   1 /*
   2  * QEMU HAX support
   3  *
   4  * Copyright IBM, Corp. 2008
   5  *           Red Hat, Inc. 2008
   6  *
   7  * Authors:
   8  *  Anthony Liguori   <aliguori@us.ibm.com>
   9  *  Glauber Costa     <gcosta@redhat.com>
  10  *
  11  * Copyright (c) 2011 Intel Corporation
  12  *  Written by:
  13  *  Jiang Yunhong<yunhong.jiang@intel.com>
  14  *  Xin Xiaohui<xiaohui.xin@intel.com>
  15  *  Zhang Xiantao<xiantao.zhang@intel.com>
  16  *
  17  * This work is licensed under the terms of the GNU GPL, version 2 or later.
  18  * See the COPYING file in the top-level directory.
  19  *
  20  */
  21
  22 /*
  23  * HAX common code for both windows and darwin
  24  */
  25
  26 #include "qemu/osdep.h"
  27 #include "cpu.h"
  28 #include "exec/address-spaces.h"
  29 #include "exec/exec-all.h"
  30 #include "exec/ioport.h"
  31
  32 #include "qemu-common.h"
  33 #include "strings.h"
  34 #include "hax-i386.h"
  35 #include "sysemu/accel.h"
  36 #include "sysemu/sysemu.h"
  37 #include "qemu/main-loop.h"
  38 #include "hw/boards.h"
  39
  40 #define DEBUG_HAX 0
  41
  42 #define DPRINTF(fmt, ...) \
  43     do { \
  44         if (DEBUG_HAX) { \
  45             fprintf(stdout, fmt, ## __VA_ARGS__); \
  46         } \
  47     } while (0)
  48
  49 /* Current version */
  50 const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */
  51 /* Minimum HAX kernel version */
  52 const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */
  53
  54 static bool hax_allowed;
  55
  56 struct hax_state hax_global;
  57
  58 static void hax_vcpu_sync_state(CPUArchState *env, int modified);
  59 static int hax_arch_get_registers(CPUArchState *env);
  60
  61 int hax_enabled(void)
  62 {
  63     return hax_allowed;
  64 }
  65
  66 int valid_hax_tunnel_size(uint16_t size)
  67 {
  68     return size >= sizeof(struct hax_tunnel);
  69 }
  70
  71 hax_fd hax_vcpu_get_fd(CPUArchState *env)
  72 {
  73     struct hax_vcpu_state *vcpu = ENV_GET_CPU(env)->hax_vcpu;
  74     if (!vcpu) {
  75         return HAX_INVALID_FD;
  76     }
  77     return vcpu->fd;
  78 }
  79
  80 static int hax_get_capability(struct hax_state *hax)
  81 {
  82     int ret;
  83     struct hax_capabilityinfo capinfo, *cap = &capinfo;
  84
  85     ret = hax_capability(hax, cap);
  86     if (ret) {
  87         return ret;
  88     }
  89
  90     if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) {
  91         if (cap->winfo & HAX_CAP_FAILREASON_VT) {
  92             DPRINTF
  93                 ("VTX feature is not enabled, HAX driver will not work.\n");
  94         } else if (cap->winfo & HAX_CAP_FAILREASON_NX) {
  95             DPRINTF
  96                 ("NX feature is not enabled, HAX driver will not work.\n");
  97         }
  98         return -ENXIO;
  99
 100     }
 101
 102     if (!(cap->winfo & HAX_CAP_UG)) {
 103         fprintf(stderr, "UG mode is not supported by the hardware.\n");
 104         return -ENOTSUP;
 105     }
 106
 107     if (cap->wstatus & HAX_CAP_MEMQUOTA) {
 108         if (cap->mem_quota < hax->mem_quota) {
 109             fprintf(stderr, "The VM memory needed exceeds the driver limit.\n");
 110             return -ENOSPC;
 111         }
 112     }
 113     return 0;
 114 }
 115
 116 static int hax_version_support(struct hax_state *hax)
 117 {
 118     int ret;
 119     struct hax_module_version version;
 120
 121     ret = hax_mod_version(hax, &version);
 122     if (ret < 0) {
 123         return 0;
 124     }
 125
 126     if (hax_min_version > version.cur_version) {
 127         fprintf(stderr, "Incompatible HAX module version %d,",
 128                 version.cur_version);
 129         fprintf(stderr, "requires minimum version %d\n", hax_min_version);
 130         return 0;
 131     }
 132     if (hax_cur_version < version.compat_version) {
 133         fprintf(stderr, "Incompatible QEMU HAX API version %x,",
 134                 hax_cur_version);
 135         fprintf(stderr, "requires minimum HAX API version %x\n",
 136                 version.compat_version);
 137         return 0;
 138     }
 139
 140     return 1;
 141 }
 142
 143 int hax_vcpu_create(int id)
 144 {
 145     struct hax_vcpu_state *vcpu = NULL;
 146     int ret;
 147
 148     if (!hax_global.vm) {
 149         fprintf(stderr, "vcpu %x created failed, vm is null\n", id);
 150         return -1;
 151     }
 152
 153     if (hax_global.vm->vcpus[id]) {
 154         fprintf(stderr, "vcpu %x allocated already\n", id);
 155         return 0;
 156     }
 157
 158     vcpu = g_malloc(sizeof(struct hax_vcpu_state));
 159     if (!vcpu) {
 160         fprintf(stderr, "Failed to alloc vcpu state\n");
 161         return -ENOMEM;
 162     }
 163
 164     memset(vcpu, 0, sizeof(struct hax_vcpu_state));
 165
 166     ret = hax_host_create_vcpu(hax_global.vm->fd, id);
 167     if (ret) {
 168         fprintf(stderr, "Failed to create vcpu %x\n", id);
 169         goto error;
 170     }
 171
 172     vcpu->vcpu_id = id;
 173     vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id);
 174     if (hax_invalid_fd(vcpu->fd)) {
 175         fprintf(stderr, "Failed to open the vcpu\n");
 176         ret = -ENODEV;
 177         goto error;
 178     }
 179
 180     hax_global.vm->vcpus[id] = vcpu;
 181
 182     ret = hax_host_setup_vcpu_channel(vcpu);
 183     if (ret) {
 184         fprintf(stderr, "Invalid hax tunnel size\n");
 185         ret = -EINVAL;
 186         goto error;
 187     }
 188     return 0;
 189
 190   error:
 191     /* vcpu and tunnel will be closed automatically */
 192     if (vcpu && !hax_invalid_fd(vcpu->fd)) {
 193         hax_close_fd(vcpu->fd);
 194     }
 195
 196     hax_global.vm->vcpus[id] = NULL;
 197     g_free(vcpu);
 198     return -1;
 199 }
 200
 201 int hax_vcpu_destroy(CPUState *cpu)
 202 {
 203     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 204
 205     if (!hax_global.vm) {
 206         fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id);
 207         return -1;
 208     }
 209
 210     if (!vcpu) {
 211         return 0;
 212     }
 213
 214     /*
 215      * 1. The hax_tunnel is also destroied when vcpu destroy
 216      * 2. close fd will cause hax module vcpu be cleaned
 217      */
 218     hax_close_fd(vcpu->fd);
 219     hax_global.vm->vcpus[vcpu->vcpu_id] = NULL;
 220     g_free(vcpu);
 221     return 0;
 222 }
 223
 224 int hax_init_vcpu(CPUState *cpu)
 225 {
 226     int ret;
 227
 228     ret = hax_vcpu_create(cpu->cpu_index);
 229     if (ret < 0) {
 230         fprintf(stderr, "Failed to create HAX vcpu\n");
 231         exit(-1);
 232     }
 233
 234     cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index];
 235     cpu->hax_vcpu_dirty = true;
 236     qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr));
 237
 238     return ret;
 239 }
 240
 241 struct hax_vm *hax_vm_create(struct hax_state *hax)
 242 {
 243     struct hax_vm *vm;
 244     int vm_id = 0, ret;
 245
 246     if (hax_invalid_fd(hax->fd)) {
 247         return NULL;
 248     }
 249
 250     if (hax->vm) {
 251         return hax->vm;
 252     }
 253
 254     vm = g_malloc(sizeof(struct hax_vm));
 255     if (!vm) {
 256         return NULL;
 257     }
 258     memset(vm, 0, sizeof(struct hax_vm));
 259     ret = hax_host_create_vm(hax, &vm_id);
 260     if (ret) {
 261         fprintf(stderr, "Failed to create vm %x\n", ret);
 262         goto error;
 263     }
 264     vm->id = vm_id;
 265     vm->fd = hax_host_open_vm(hax, vm_id);
 266     if (hax_invalid_fd(vm->fd)) {
 267         fprintf(stderr, "Failed to open vm %d\n", vm_id);
 268         goto error;
 269     }
 270
 271     hax->vm = vm;
 272     return vm;
 273
 274   error:
 275     g_free(vm);
 276     hax->vm = NULL;
 277     return NULL;
 278 }
 279
 280 int hax_vm_destroy(struct hax_vm *vm)
 281 {
 282     int i;
 283
 284     for (i = 0; i < HAX_MAX_VCPU; i++)
 285         if (vm->vcpus[i]) {
 286             fprintf(stderr, "VCPU should be cleaned before vm clean\n");
 287             return -1;
 288         }
 289     hax_close_fd(vm->fd);
 290     g_free(vm);
 291     hax_global.vm = NULL;
 292     return 0;
 293 }
 294
 295 static void hax_handle_interrupt(CPUState *cpu, int mask)
 296 {
 297     cpu->interrupt_request |= mask;
 298
 299     if (!qemu_cpu_is_self(cpu)) {
 300         qemu_cpu_kick(cpu);
 301     }
 302 }
 303
 304 static int hax_init(ram_addr_t ram_size)
 305 {
 306     struct hax_state *hax = NULL;
 307     struct hax_qemu_version qversion;
 308     int ret;
 309
 310     hax = &hax_global;
 311
 312     memset(hax, 0, sizeof(struct hax_state));
 313     hax->mem_quota = ram_size;
 314
 315     hax->fd = hax_mod_open();
 316     if (hax_invalid_fd(hax->fd)) {
 317         hax->fd = 0;
 318         ret = -ENODEV;
 319         goto error;
 320     }
 321
 322     ret = hax_get_capability(hax);
 323
 324     if (ret) {
 325         if (ret != -ENOSPC) {
 326             ret = -EINVAL;
 327         }
 328         goto error;
 329     }
 330
 331     if (!hax_version_support(hax)) {
 332         ret = -EINVAL;
 333         goto error;
 334     }
 335
 336     hax->vm = hax_vm_create(hax);
 337     if (!hax->vm) {
 338         fprintf(stderr, "Failed to create HAX VM\n");
 339         ret = -EINVAL;
 340         goto error;
 341     }
 342
 343     hax_memory_init();
 344
 345     qversion.cur_version = hax_cur_version;
 346     qversion.min_version = hax_min_version;
 347     hax_notify_qemu_version(hax->vm->fd, &qversion);
 348     cpu_interrupt_handler = hax_handle_interrupt;
 349
 350     return ret;
 351   error:
 352     if (hax->vm) {
 353         hax_vm_destroy(hax->vm);
 354     }
 355     if (hax->fd) {
 356         hax_mod_close(hax);
 357     }
 358
 359     return ret;
 360 }
 361
 362 static int hax_accel_init(MachineState *ms)
 363 {
 364     int ret = hax_init(ms->ram_size);
 365
 366     if (ret && (ret != -ENOSPC)) {
 367         fprintf(stderr, "No accelerator found.\n");
 368     } else {
 369         fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n",
 370                 !ret ? "working" : "not working",
 371                 !ret ? "fast virt" : "emulation");
 372     }
 373     return ret;
 374 }
 375
 376 static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft)
 377 {
 378     if (hft->direction < 2) {
 379         cpu_physical_memory_rw(hft->gpa, (uint8_t *) &hft->value, hft->size,
 380                                hft->direction);
 381     } else {
 382         /*
 383          * HAX API v4 supports transferring data between two MMIO addresses,
 384          * hft->gpa and hft->gpa2 (instructions such as MOVS require this):
 385          *  hft->direction == 2: gpa ==> gpa2
 386          */
 387         uint64_t value;
 388         cpu_physical_memory_rw(hft->gpa, (uint8_t *) &value, hft->size, 0);
 389         cpu_physical_memory_rw(hft->gpa2, (uint8_t *) &value, hft->size, 1);
 390     }
 391
 392     return 0;
 393 }
 394
 395 static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port,
 396                          int direction, int size, int count, void *buffer)
 397 {
 398     uint8_t *ptr;
 399     int i;
 400     MemTxAttrs attrs = { 0 };
 401
 402     if (!df) {
 403         ptr = (uint8_t *) buffer;
 404     } else {
 405         ptr = buffer + size * count - size;
 406     }
 407     for (i = 0; i < count; i++) {
 408         address_space_rw(&address_space_io, port, attrs,
 409                          ptr, size, direction == HAX_EXIT_IO_OUT);
 410         if (!df) {
 411             ptr += size;
 412         } else {
 413             ptr -= size;
 414         }
 415     }
 416
 417     return 0;
 418 }
 419
 420 static int hax_vcpu_interrupt(CPUArchState *env)
 421 {
 422     CPUState *cpu = ENV_GET_CPU(env);
 423     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 424     struct hax_tunnel *ht = vcpu->tunnel;
 425
 426     /*
 427      * Try to inject an interrupt if the guest can accept it
 428      * Unlike KVM, HAX kernel check for the eflags, instead of qemu
 429      */
 430     if (ht->ready_for_interrupt_injection &&
 431         (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 432         int irq;
 433
 434         irq = cpu_get_pic_interrupt(env);
 435         if (irq >= 0) {
 436             hax_inject_interrupt(env, irq);
 437             cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
 438         }
 439     }
 440
 441     /* If we have an interrupt but the guest is not ready to receive an
 442      * interrupt, request an interrupt window exit.  This will
 443      * cause a return to userspace as soon as the guest is ready to
 444      * receive interrupts. */
 445     if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
 446         ht->request_interrupt_window = 1;
 447     } else {
 448         ht->request_interrupt_window = 0;
 449     }
 450     return 0;
 451 }
 452
 453 void hax_raise_event(CPUState *cpu)
 454 {
 455     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 456
 457     if (!vcpu) {
 458         return;
 459     }
 460     vcpu->tunnel->user_event_pending = 1;
 461 }
 462
 463 /*
 464  * Ask hax kernel module to run the CPU for us till:
 465  * 1. Guest crash or shutdown
 466  * 2. Need QEMU's emulation like guest execute MMIO instruction
 467  * 3. Guest execute HLT
 468  * 4. QEMU have Signal/event pending
 469  * 5. An unknown VMX exit happens
 470  */
 471 static int hax_vcpu_hax_exec(CPUArchState *env)
 472 {
 473     int ret = 0;
 474     CPUState *cpu = ENV_GET_CPU(env);
 475     X86CPU *x86_cpu = X86_CPU(cpu);
 476     struct hax_vcpu_state *vcpu = cpu->hax_vcpu;
 477     struct hax_tunnel *ht = vcpu->tunnel;
 478
 479     if (!hax_enabled()) {
 480         DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip);
 481         return 0;
 482     }
 483
 484     cpu->halted = 0;
 485
 486     if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
 487         cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
 488         apic_poll_irq(x86_cpu->apic_state);
 489     }
 490
 491     if (cpu->interrupt_request & CPU_INTERRUPT_INIT) {
 492         DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n",
 493                 cpu->cpu_index);
 494         do_cpu_init(x86_cpu);
 495         hax_vcpu_sync_state(env, 1);
 496     }
 497
 498     if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
 499         DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n",
 500                 cpu->cpu_index);
 501         hax_vcpu_sync_state(env, 0);
 502         do_cpu_sipi(x86_cpu);
 503         hax_vcpu_sync_state(env, 1);
 504     }
 505
 506     do {
 507         int hax_ret;
 508
 509         if (cpu->exit_request) {
 510             ret = 1;
 511             break;
 512         }
 513
 514         hax_vcpu_interrupt(env);
 515
 516         qemu_mutex_unlock_iothread();
 517         hax_ret = hax_vcpu_run(vcpu);
 518         qemu_mutex_lock_iothread();
 519         current_cpu = cpu;
 520
 521         /* Simply continue the vcpu_run if system call interrupted */
 522         if (hax_ret == -EINTR || hax_ret == -EAGAIN) {
 523             DPRINTF("io window interrupted\n");
 524             continue;
 525         }
 526
 527         if (hax_ret < 0) {
 528             fprintf(stderr, "vcpu run failed for vcpu  %x\n", vcpu->vcpu_id);
 529             abort();
 530         }
 531         switch (ht->_exit_status) {
 532         case HAX_EXIT_IO:
 533             ret = hax_handle_io(env, ht->pio._df, ht->pio._port,
 534                             ht->pio._direction,
 535                             ht->pio._size, ht->pio._count, vcpu->iobuf);
 536             break;
 537         case HAX_EXIT_FAST_MMIO:
 538             ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf);
 539             break;
 540         /* Guest state changed, currently only for shutdown */
 541         case HAX_EXIT_STATECHANGE:
 542             fprintf(stdout, "VCPU shutdown request\n");
 543             qemu_system_shutdown_request();
 544             hax_vcpu_sync_state(env, 0);
 545             ret = 1;
 546             break;
 547         case HAX_EXIT_UNKNOWN_VMEXIT:
 548             fprintf(stderr, "Unknown VMX exit %x from guest\n",
 549                     ht->_exit_reason);
 550             qemu_system_reset_request();
 551             hax_vcpu_sync_state(env, 0);
 552             cpu_dump_state(cpu, stderr, fprintf, 0);
 553             ret = -1;
 554             break;
 555         case HAX_EXIT_HLT:
 556             if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
 557                 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
 558                 /* hlt instruction with interrupt disabled is shutdown */
 559                 env->eflags |= IF_MASK;
 560                 cpu->halted = 1;
 561                 cpu->exception_index = EXCP_HLT;
 562                 ret = 1;
 563             }
 564             break;
 565         /* these situations will continue to hax module */
 566         case HAX_EXIT_INTERRUPT:
 567         case HAX_EXIT_PAUSED:
 568             break;
 569         case HAX_EXIT_MMIO:
 570             /* Should not happen on UG system */
 571             fprintf(stderr, "HAX: unsupported MMIO emulation\n");
 572             ret = -1;
 573             break;
 574         case HAX_EXIT_REAL:
 575             /* Should not happen on UG system */
 576             fprintf(stderr, "HAX: unimplemented real mode emulation\n");
 577             ret = -1;
 578             break;
 579         default:
 580             fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status);
 581             qemu_system_reset_request();
 582             hax_vcpu_sync_state(env, 0);
 583             cpu_dump_state(cpu, stderr, fprintf, 0);
 584             ret = 1;
 585             break;
 586         }
 587     } while (!ret);
 588
 589     if (cpu->exit_request) {
 590         cpu->exit_request = 0;
 591         cpu->exception_index = EXCP_INTERRUPT;
 592     }
 593     return ret < 0;
 594 }
 595
 596 static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
 597 {
 598     CPUArchState *env = cpu->env_ptr;
 599
 600     hax_arch_get_registers(env);
 601     cpu->hax_vcpu_dirty = true;
 602 }
 603
 604 void hax_cpu_synchronize_state(CPUState *cpu)
 605 {
 606     if (!cpu->hax_vcpu_dirty) {
 607         run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL);
 608     }
 609 }
 610
 611 static void do_hax_cpu_synchronize_post_reset(CPUState *cpu,
 612                                               run_on_cpu_data arg)
 613 {
 614     CPUArchState *env = cpu->env_ptr;
 615
 616     hax_vcpu_sync_state(env, 1);
 617     cpu->hax_vcpu_dirty = false;
 618 }
 619
 620 void hax_cpu_synchronize_post_reset(CPUState *cpu)
 621 {
 622     run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
 623 }
 624
 625 static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg)
 626 {
 627     CPUArchState *env = cpu->env_ptr;
 628
 629     hax_vcpu_sync_state(env, 1);
 630     cpu->hax_vcpu_dirty = false;
 631 }
 632
 633 void hax_cpu_synchronize_post_init(CPUState *cpu)
 634 {
 635     run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
 636 }
 637
 638 int hax_smp_cpu_exec(CPUState *cpu)
 639 {
 640     CPUArchState *env = (CPUArchState *) (cpu->env_ptr);
 641     int fatal;
 642     int ret;
 643
 644     while (1) {
 645         if (cpu->exception_index >= EXCP_INTERRUPT) {
 646             ret = cpu->exception_index;
 647             cpu->exception_index = -1;
 648             break;
 649         }
 650
 651         fatal = hax_vcpu_hax_exec(env);
 652
 653         if (fatal) {
 654             fprintf(stderr, "Unsupported HAX vcpu return\n");
 655             abort();
 656         }
 657     }
 658
 659     return ret;
 660 }
 661
 662 static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 663 {
 664     memset(lhs, 0, sizeof(struct segment_desc_t));
 665     lhs->selector = rhs->selector;
 666     lhs->base = rhs->base;
 667     lhs->limit = rhs->limit;
 668     lhs->type = 3;
 669     lhs->present = 1;
 670     lhs->dpl = 3;
 671     lhs->operand_size = 0;
 672     lhs->desc = 1;
 673     lhs->long_mode = 0;
 674     lhs->granularity = 0;
 675     lhs->available = 0;
 676 }
 677
 678 static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs)
 679 {
 680     lhs->selector = rhs->selector;
 681     lhs->base = rhs->base;
 682     lhs->limit = rhs->limit;
 683     lhs->flags = (rhs->type << DESC_TYPE_SHIFT)
 684         | (rhs->present * DESC_P_MASK)
 685         | (rhs->dpl << DESC_DPL_SHIFT)
 686         | (rhs->operand_size << DESC_B_SHIFT)
 687         | (rhs->desc * DESC_S_MASK)
 688         | (rhs->long_mode << DESC_L_SHIFT)
 689         | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK);
 690 }
 691
 692 static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs)
 693 {
 694     unsigned flags = rhs->flags;
 695
 696     memset(lhs, 0, sizeof(struct segment_desc_t));
 697     lhs->selector = rhs->selector;
 698     lhs->base = rhs->base;
 699     lhs->limit = rhs->limit;
 700     lhs->type = (flags >> DESC_TYPE_SHIFT) & 15;
 701     lhs->present = (flags & DESC_P_MASK) != 0;
 702     lhs->dpl = rhs->selector & 3;
 703     lhs->operand_size = (flags >> DESC_B_SHIFT) & 1;
 704     lhs->desc = (flags & DESC_S_MASK) != 0;
 705     lhs->long_mode = (flags >> DESC_L_SHIFT) & 1;
 706     lhs->granularity = (flags & DESC_G_MASK) != 0;
 707     lhs->available = (flags & DESC_AVL_MASK) != 0;
 708 }
 709
 710 static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set)
 711 {
 712     target_ulong reg = *hax_reg;
 713
 714     if (set) {
 715         *hax_reg = *qemu_reg;
 716     } else {
 717         *qemu_reg = reg;
 718     }
 719 }
 720
 721 /* The sregs has been synced with HAX kernel already before this call */
 722 static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 723 {
 724     get_seg(&env->segs[R_CS], &sregs->_cs);
 725     get_seg(&env->segs[R_DS], &sregs->_ds);
 726     get_seg(&env->segs[R_ES], &sregs->_es);
 727     get_seg(&env->segs[R_FS], &sregs->_fs);
 728     get_seg(&env->segs[R_GS], &sregs->_gs);
 729     get_seg(&env->segs[R_SS], &sregs->_ss);
 730
 731     get_seg(&env->tr, &sregs->_tr);
 732     get_seg(&env->ldt, &sregs->_ldt);
 733     env->idt.limit = sregs->_idt.limit;
 734     env->idt.base = sregs->_idt.base;
 735     env->gdt.limit = sregs->_gdt.limit;
 736     env->gdt.base = sregs->_gdt.base;
 737     return 0;
 738 }
 739
 740 static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs)
 741 {
 742     if ((env->eflags & VM_MASK)) {
 743         set_v8086_seg(&sregs->_cs, &env->segs[R_CS]);
 744         set_v8086_seg(&sregs->_ds, &env->segs[R_DS]);
 745         set_v8086_seg(&sregs->_es, &env->segs[R_ES]);
 746         set_v8086_seg(&sregs->_fs, &env->segs[R_FS]);
 747         set_v8086_seg(&sregs->_gs, &env->segs[R_GS]);
 748         set_v8086_seg(&sregs->_ss, &env->segs[R_SS]);
 749     } else {
 750         set_seg(&sregs->_cs, &env->segs[R_CS]);
 751         set_seg(&sregs->_ds, &env->segs[R_DS]);
 752         set_seg(&sregs->_es, &env->segs[R_ES]);
 753         set_seg(&sregs->_fs, &env->segs[R_FS]);
 754         set_seg(&sregs->_gs, &env->segs[R_GS]);
 755         set_seg(&sregs->_ss, &env->segs[R_SS]);
 756
 757         if (env->cr[0] & CR0_PE_MASK) {
 758             /* force ss cpl to cs cpl */
 759             sregs->_ss.selector = (sregs->_ss.selector & ~3) |
 760                                   (sregs->_cs.selector & 3);
 761             sregs->_ss.dpl = sregs->_ss.selector & 3;
 762         }
 763     }
 764
 765     set_seg(&sregs->_tr, &env->tr);
 766     set_seg(&sregs->_ldt, &env->ldt);
 767     sregs->_idt.limit = env->idt.limit;
 768     sregs->_idt.base = env->idt.base;
 769     sregs->_gdt.limit = env->gdt.limit;
 770     sregs->_gdt.base = env->gdt.base;
 771     return 0;
 772 }
 773
 774 /*
 775  * After get the state from the kernel module, some
 776  * qemu emulator state need be updated also
 777  */
 778 static int hax_setup_qemu_emulator(CPUArchState *env)
 779 {
 780
 781 #define HFLAG_COPY_MASK (~( \
 782   HF_CPL_MASK | HF_PE_MASK | HF_MP_MASK | HF_EM_MASK | \
 783   HF_TS_MASK | HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK | \
 784   HF_OSFXSR_MASK | HF_LMA_MASK | HF_CS32_MASK | \
 785   HF_SS32_MASK | HF_CS64_MASK | HF_ADDSEG_MASK))
 786
 787     uint32_t hflags;
 788
 789     hflags = (env->segs[R_CS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK;
 790     hflags |= (env->cr[0] & CR0_PE_MASK) << (HF_PE_SHIFT - CR0_PE_SHIFT);
 791     hflags |= (env->cr[0] << (HF_MP_SHIFT - CR0_MP_SHIFT)) &
 792         (HF_MP_MASK | HF_EM_MASK | HF_TS_MASK);
 793     hflags |= (env->eflags & (HF_TF_MASK | HF_VM_MASK | HF_IOPL_MASK));
 794     hflags |= (env->cr[4] & CR4_OSFXSR_MASK) <<
 795               (HF_OSFXSR_SHIFT - CR4_OSFXSR_SHIFT);
 796
 797     if (env->efer & MSR_EFER_LMA) {
 798         hflags |= HF_LMA_MASK;
 799     }
 800
 801     if ((hflags & HF_LMA_MASK) && (env->segs[R_CS].flags & DESC_L_MASK)) {
 802         hflags |= HF_CS32_MASK | HF_SS32_MASK | HF_CS64_MASK;
 803     } else {
 804         hflags |= (env->segs[R_CS].flags & DESC_B_MASK) >>
 805             (DESC_B_SHIFT - HF_CS32_SHIFT);
 806         hflags |= (env->segs[R_SS].flags & DESC_B_MASK) >>
 807             (DESC_B_SHIFT - HF_SS32_SHIFT);
 808         if (!(env->cr[0] & CR0_PE_MASK) ||
 809             (env->eflags & VM_MASK) || !(hflags & HF_CS32_MASK)) {
 810             hflags |= HF_ADDSEG_MASK;
 811         } else {
 812             hflags |= ((env->segs[R_DS].base |
 813                         env->segs[R_ES].base |
 814                         env->segs[R_SS].base) != 0) << HF_ADDSEG_SHIFT;
 815         }
 816     }
 817
 818     hflags &= ~HF_SMM_MASK;
 819
 820     env->hflags = (env->hflags & HFLAG_COPY_MASK) | hflags;
 821     return 0;
 822 }
 823
 824 static int hax_sync_vcpu_register(CPUArchState *env, int set)
 825 {
 826     struct vcpu_state_t regs;
 827     int ret;
 828     memset(&regs, 0, sizeof(struct vcpu_state_t));
 829
 830     if (!set) {
 831         ret = hax_sync_vcpu_state(env, &regs, 0);
 832         if (ret < 0) {
 833             return -1;
 834         }
 835     }
 836
 837     /* generic register */
 838     hax_getput_reg(&regs._rax, &env->regs[R_EAX], set);
 839     hax_getput_reg(&regs._rbx, &env->regs[R_EBX], set);
 840     hax_getput_reg(&regs._rcx, &env->regs[R_ECX], set);
 841     hax_getput_reg(&regs._rdx, &env->regs[R_EDX], set);
 842     hax_getput_reg(&regs._rsi, &env->regs[R_ESI], set);
 843     hax_getput_reg(&regs._rdi, &env->regs[R_EDI], set);
 844     hax_getput_reg(&regs._rsp, &env->regs[R_ESP], set);
 845     hax_getput_reg(&regs._rbp, &env->regs[R_EBP], set);
 846 #ifdef TARGET_X86_64
 847     hax_getput_reg(&regs._r8, &env->regs[8], set);
 848     hax_getput_reg(&regs._r9, &env->regs[9], set);
 849     hax_getput_reg(&regs._r10, &env->regs[10], set);
 850     hax_getput_reg(&regs._r11, &env->regs[11], set);
 851     hax_getput_reg(&regs._r12, &env->regs[12], set);
 852     hax_getput_reg(&regs._r13, &env->regs[13], set);
 853     hax_getput_reg(&regs._r14, &env->regs[14], set);
 854     hax_getput_reg(&regs._r15, &env->regs[15], set);
 855 #endif
 856     hax_getput_reg(&regs._rflags, &env->eflags, set);
 857     hax_getput_reg(&regs._rip, &env->eip, set);
 858
 859     if (set) {
 860         regs._cr0 = env->cr[0];
 861         regs._cr2 = env->cr[2];
 862         regs._cr3 = env->cr[3];
 863         regs._cr4 = env->cr[4];
 864         hax_set_segments(env, &regs);
 865     } else {
 866         env->cr[0] = regs._cr0;
 867         env->cr[2] = regs._cr2;
 868         env->cr[3] = regs._cr3;
 869         env->cr[4] = regs._cr4;
 870         hax_get_segments(env, &regs);
 871     }
 872
 873     if (set) {
 874         ret = hax_sync_vcpu_state(env, &regs, 1);
 875         if (ret < 0) {
 876             return -1;
 877         }
 878     }
 879     if (!set) {
 880         hax_setup_qemu_emulator(env);
 881     }
 882     return 0;
 883 }
 884
 885 static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index,
 886                               uint64_t value)
 887 {
 888     item->entry = index;
 889     item->value = value;
 890 }
 891
 892 static int hax_get_msrs(CPUArchState *env)
 893 {
 894     struct hax_msr_data md;
 895     struct vmx_msr *msrs = md.entries;
 896     int ret, i, n;
 897
 898     n = 0;
 899     msrs[n++].entry = MSR_IA32_SYSENTER_CS;
 900     msrs[n++].entry = MSR_IA32_SYSENTER_ESP;
 901     msrs[n++].entry = MSR_IA32_SYSENTER_EIP;
 902     msrs[n++].entry = MSR_IA32_TSC;
 903 #ifdef TARGET_X86_64
 904     msrs[n++].entry = MSR_EFER;
 905     msrs[n++].entry = MSR_STAR;
 906     msrs[n++].entry = MSR_LSTAR;
 907     msrs[n++].entry = MSR_CSTAR;
 908     msrs[n++].entry = MSR_FMASK;
 909     msrs[n++].entry = MSR_KERNELGSBASE;
 910 #endif
 911     md.nr_msr = n;
 912     ret = hax_sync_msr(env, &md, 0);
 913     if (ret < 0) {
 914         return ret;
 915     }
 916
 917     for (i = 0; i < md.done; i++) {
 918         switch (msrs[i].entry) {
 919         case MSR_IA32_SYSENTER_CS:
 920             env->sysenter_cs = msrs[i].value;
 921             break;
 922         case MSR_IA32_SYSENTER_ESP:
 923             env->sysenter_esp = msrs[i].value;
 924             break;
 925         case MSR_IA32_SYSENTER_EIP:
 926             env->sysenter_eip = msrs[i].value;
 927             break;
 928         case MSR_IA32_TSC:
 929             env->tsc = msrs[i].value;
 930             break;
 931 #ifdef TARGET_X86_64
 932         case MSR_EFER:
 933             env->efer = msrs[i].value;
 934             break;
 935         case MSR_STAR:
 936             env->star = msrs[i].value;
 937             break;
 938         case MSR_LSTAR:
 939             env->lstar = msrs[i].value;
 940             break;
 941         case MSR_CSTAR:
 942             env->cstar = msrs[i].value;
 943             break;
 944         case MSR_FMASK:
 945             env->fmask = msrs[i].value;
 946             break;
 947         case MSR_KERNELGSBASE:
 948             env->kernelgsbase = msrs[i].value;
 949             break;
 950 #endif
 951         }
 952     }
 953
 954     return 0;
 955 }
 956
 957 static int hax_set_msrs(CPUArchState *env)
 958 {
 959     struct hax_msr_data md;
 960     struct vmx_msr *msrs;
 961     msrs = md.entries;
 962     int n = 0;
 963
 964     memset(&md, 0, sizeof(struct hax_msr_data));
 965     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
 966     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
 967     hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
 968     hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
 969 #ifdef TARGET_X86_64
 970     hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer);
 971     hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
 972     hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
 973     hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
 974     hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
 975     hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
 976 #endif
 977     md.nr_msr = n;
 978     md.done = 0;
 979
 980     return hax_sync_msr(env, &md, 1);
 981 }
 982
 983 static int hax_get_fpu(CPUArchState *env)
 984 {
 985     struct fx_layout fpu;
 986     int i, ret;
 987
 988     ret = hax_sync_fpu(env, &fpu, 0);
 989     if (ret < 0) {
 990         return ret;
 991     }
 992
 993     env->fpstt = (fpu.fsw >> 11) & 7;
 994     env->fpus = fpu.fsw;
 995     env->fpuc = fpu.fcw;
 996     for (i = 0; i < 8; ++i) {
 997         env->fptags[i] = !((fpu.ftw >> i) & 1);
 998     }
 999     memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs));
1000
1001     for (i = 0; i < 8; i++) {
1002         env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]);
1003         env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]);
1004         if (CPU_NB_REGS > 8) {
1005             env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]);
1006             env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]);
1007         }
1008     }
1009     env->mxcsr = fpu.mxcsr;
1010
1011     return 0;
1012 }
1013
1014 static int hax_set_fpu(CPUArchState *env)
1015 {
1016     struct fx_layout fpu;
1017     int i;
1018
1019     memset(&fpu, 0, sizeof(fpu));
1020     fpu.fsw = env->fpus & ~(7 << 11);
1021     fpu.fsw |= (env->fpstt & 7) << 11;
1022     fpu.fcw = env->fpuc;
1023
1024     for (i = 0; i < 8; ++i) {
1025         fpu.ftw |= (!env->fptags[i]) << i;
1026     }
1027
1028     memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs));
1029     for (i = 0; i < 8; i++) {
1030         stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0));
1031         stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1));
1032         if (CPU_NB_REGS > 8) {
1033             stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0));
1034             stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1));
1035         }
1036     }
1037
1038     fpu.mxcsr = env->mxcsr;
1039
1040     return hax_sync_fpu(env, &fpu, 1);
1041 }
1042
1043 static int hax_arch_get_registers(CPUArchState *env)
1044 {
1045     int ret;
1046
1047     ret = hax_sync_vcpu_register(env, 0);
1048     if (ret < 0) {
1049         return ret;
1050     }
1051
1052     ret = hax_get_fpu(env);
1053     if (ret < 0) {
1054         return ret;
1055     }
1056
1057     ret = hax_get_msrs(env);
1058     if (ret < 0) {
1059         return ret;
1060     }
1061
1062     return 0;
1063 }
1064
1065 static int hax_arch_set_registers(CPUArchState *env)
1066 {
1067     int ret;
1068     ret = hax_sync_vcpu_register(env, 1);
1069
1070     if (ret < 0) {
1071         fprintf(stderr, "Failed to sync vcpu reg\n");
1072         return ret;
1073     }
1074     ret = hax_set_fpu(env);
1075     if (ret < 0) {
1076         fprintf(stderr, "FPU failed\n");
1077         return ret;
1078     }
1079     ret = hax_set_msrs(env);
1080     if (ret < 0) {
1081         fprintf(stderr, "MSR failed\n");
1082         return ret;
1083     }
1084
1085     return 0;
1086 }
1087
1088 static void hax_vcpu_sync_state(CPUArchState *env, int modified)
1089 {
1090     if (hax_enabled()) {
1091         if (modified) {
1092             hax_arch_set_registers(env);
1093         } else {
1094             hax_arch_get_registers(env);
1095         }
1096     }
1097 }
1098
1099 /*
1100  * much simpler than kvm, at least in first stage because:
1101  * We don't need consider the device pass-through, we don't need
1102  * consider the framebuffer, and we may even remove the bios at all
1103  */
1104 int hax_sync_vcpus(void)
1105 {
1106     if (hax_enabled()) {
1107         CPUState *cpu;
1108
1109         cpu = first_cpu;
1110         if (!cpu) {
1111             return 0;
1112         }
1113
1114         for (; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1115             int ret;
1116
1117             ret = hax_arch_set_registers(cpu->env_ptr);
1118             if (ret < 0) {
1119                 return ret;
1120             }
1121         }
1122     }
1123
1124     return 0;
1125 }
1126
1127 void hax_reset_vcpu_state(void *opaque)
1128 {
1129     CPUState *cpu;
1130     for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) {
1131         cpu->hax_vcpu->tunnel->user_event_pending = 0;
1132         cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0;
1133     }
1134 }
1135
1136 static void hax_accel_class_init(ObjectClass *oc, void *data)
1137 {
1138     AccelClass *ac = ACCEL_CLASS(oc);
1139     ac->name = "HAX";
1140     ac->init_machine = hax_accel_init;
1141     ac->allowed = &hax_allowed;
1142 }
1143
1144 static const TypeInfo hax_accel_type = {
1145     .name = ACCEL_CLASS_NAME("hax"),
1146     .parent = TYPE_ACCEL,
1147     .class_init = hax_accel_class_init,
1148 };
1149
1150 static void hax_type_init(void)
1151 {
1152     type_register_static(&hax_accel_type);
1153 }
1154
1155 type_init(hax_type_init);