target/mips: Replace gen_exception_end(EXCP_RI) by gen_rsvd_instruction
[qemu/ar7.git] / target / i386 / whpx / whpx-all.c
blob985ceba8f840d05c90ae195322142e12b405d9f4
1 /*
2 * QEMU Windows Hypervisor Platform accelerator (WHPX)
4 * Copyright Microsoft Corp. 2017
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
9 */
11 #include "qemu/osdep.h"
12 #include "cpu.h"
13 #include "exec/address-spaces.h"
14 #include "exec/ioport.h"
15 #include "qemu-common.h"
16 #include "sysemu/accel.h"
17 #include "sysemu/whpx.h"
18 #include "sysemu/cpus.h"
19 #include "sysemu/runstate.h"
20 #include "qemu/main-loop.h"
21 #include "hw/boards.h"
22 #include "hw/i386/ioapic.h"
23 #include "hw/i386/apic_internal.h"
24 #include "qemu/error-report.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-types-common.h"
27 #include "qapi/qapi-visit-common.h"
28 #include "migration/blocker.h"
29 #include <winerror.h>
31 #include "whpx-cpus.h"
32 #include "whpx-internal.h"
34 #define HYPERV_APIC_BUS_FREQUENCY (200000000ULL)
36 static const WHV_REGISTER_NAME whpx_register_names[] = {
38 /* X64 General purpose registers */
39 WHvX64RegisterRax,
40 WHvX64RegisterRcx,
41 WHvX64RegisterRdx,
42 WHvX64RegisterRbx,
43 WHvX64RegisterRsp,
44 WHvX64RegisterRbp,
45 WHvX64RegisterRsi,
46 WHvX64RegisterRdi,
47 WHvX64RegisterR8,
48 WHvX64RegisterR9,
49 WHvX64RegisterR10,
50 WHvX64RegisterR11,
51 WHvX64RegisterR12,
52 WHvX64RegisterR13,
53 WHvX64RegisterR14,
54 WHvX64RegisterR15,
55 WHvX64RegisterRip,
56 WHvX64RegisterRflags,
58 /* X64 Segment registers */
59 WHvX64RegisterEs,
60 WHvX64RegisterCs,
61 WHvX64RegisterSs,
62 WHvX64RegisterDs,
63 WHvX64RegisterFs,
64 WHvX64RegisterGs,
65 WHvX64RegisterLdtr,
66 WHvX64RegisterTr,
68 /* X64 Table registers */
69 WHvX64RegisterIdtr,
70 WHvX64RegisterGdtr,
72 /* X64 Control Registers */
73 WHvX64RegisterCr0,
74 WHvX64RegisterCr2,
75 WHvX64RegisterCr3,
76 WHvX64RegisterCr4,
77 WHvX64RegisterCr8,
79 /* X64 Debug Registers */
81 * WHvX64RegisterDr0,
82 * WHvX64RegisterDr1,
83 * WHvX64RegisterDr2,
84 * WHvX64RegisterDr3,
85 * WHvX64RegisterDr6,
86 * WHvX64RegisterDr7,
89 /* X64 Floating Point and Vector Registers */
90 WHvX64RegisterXmm0,
91 WHvX64RegisterXmm1,
92 WHvX64RegisterXmm2,
93 WHvX64RegisterXmm3,
94 WHvX64RegisterXmm4,
95 WHvX64RegisterXmm5,
96 WHvX64RegisterXmm6,
97 WHvX64RegisterXmm7,
98 WHvX64RegisterXmm8,
99 WHvX64RegisterXmm9,
100 WHvX64RegisterXmm10,
101 WHvX64RegisterXmm11,
102 WHvX64RegisterXmm12,
103 WHvX64RegisterXmm13,
104 WHvX64RegisterXmm14,
105 WHvX64RegisterXmm15,
106 WHvX64RegisterFpMmx0,
107 WHvX64RegisterFpMmx1,
108 WHvX64RegisterFpMmx2,
109 WHvX64RegisterFpMmx3,
110 WHvX64RegisterFpMmx4,
111 WHvX64RegisterFpMmx5,
112 WHvX64RegisterFpMmx6,
113 WHvX64RegisterFpMmx7,
114 WHvX64RegisterFpControlStatus,
115 WHvX64RegisterXmmControlStatus,
117 /* X64 MSRs */
118 WHvX64RegisterEfer,
119 #ifdef TARGET_X86_64
120 WHvX64RegisterKernelGsBase,
121 #endif
122 WHvX64RegisterApicBase,
123 /* WHvX64RegisterPat, */
124 WHvX64RegisterSysenterCs,
125 WHvX64RegisterSysenterEip,
126 WHvX64RegisterSysenterEsp,
127 WHvX64RegisterStar,
128 #ifdef TARGET_X86_64
129 WHvX64RegisterLstar,
130 WHvX64RegisterCstar,
131 WHvX64RegisterSfmask,
132 #endif
134 /* Interrupt / Event Registers */
136 * WHvRegisterPendingInterruption,
137 * WHvRegisterInterruptState,
138 * WHvRegisterPendingEvent0,
139 * WHvRegisterPendingEvent1
140 * WHvX64RegisterDeliverabilityNotifications,
144 struct whpx_register_set {
145 WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
148 struct whpx_vcpu {
149 WHV_EMULATOR_HANDLE emulator;
150 bool window_registered;
151 bool interruptable;
152 bool ready_for_pic_interrupt;
153 uint64_t tpr;
154 uint64_t apic_base;
155 bool interruption_pending;
157 /* Must be the last field as it may have a tail */
158 WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
161 static bool whpx_allowed;
162 static bool whp_dispatch_initialized;
163 static HMODULE hWinHvPlatform, hWinHvEmulation;
164 static uint32_t max_vcpu_index;
165 struct whpx_state whpx_global;
166 struct WHPDispatch whp_dispatch;
170 * VP support
173 static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu)
175 return (struct whpx_vcpu *)cpu->hax_vcpu;
178 static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86,
179 int r86)
181 WHV_X64_SEGMENT_REGISTER hs;
182 unsigned flags = qs->flags;
184 hs.Base = qs->base;
185 hs.Limit = qs->limit;
186 hs.Selector = qs->selector;
188 if (v86) {
189 hs.Attributes = 0;
190 hs.SegmentType = 3;
191 hs.Present = 1;
192 hs.DescriptorPrivilegeLevel = 3;
193 hs.NonSystemSegment = 1;
195 } else {
196 hs.Attributes = (flags >> DESC_TYPE_SHIFT);
198 if (r86) {
199 /* hs.Base &= 0xfffff; */
203 return hs;
206 static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
208 SegmentCache qs;
210 qs.base = hs->Base;
211 qs.limit = hs->Limit;
212 qs.selector = hs->Selector;
214 qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT;
216 return qs;
219 static int whpx_set_tsc(CPUState *cpu)
221 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
222 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
223 WHV_REGISTER_VALUE tsc_val;
224 HRESULT hr;
225 struct whpx_state *whpx = &whpx_global;
228 * Suspend the partition prior to setting the TSC to reduce the variance
229 * in TSC across vCPUs. When the first vCPU runs post suspend, the
230 * partition is automatically resumed.
232 if (whp_dispatch.WHvSuspendPartitionTime) {
235 * Unable to suspend partition while setting TSC is not a fatal
236 * error. It just increases the likelihood of TSC variance between
237 * vCPUs and some guest OS are able to handle that just fine.
239 hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
240 if (FAILED(hr)) {
241 warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
245 tsc_val.Reg64 = env->tsc;
246 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
247 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
248 if (FAILED(hr)) {
249 error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
250 return -1;
253 return 0;
256 static void whpx_set_registers(CPUState *cpu, int level)
258 struct whpx_state *whpx = &whpx_global;
259 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
260 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
261 X86CPU *x86_cpu = X86_CPU(cpu);
262 struct whpx_register_set vcxt;
263 HRESULT hr;
264 int idx;
265 int idx_next;
266 int i;
267 int v86, r86;
269 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
272 * Following MSRs have side effects on the guest or are too heavy for
273 * runtime. Limit them to full state update.
275 if (level >= WHPX_SET_RESET_STATE) {
276 whpx_set_tsc(cpu);
279 memset(&vcxt, 0, sizeof(struct whpx_register_set));
281 v86 = (env->eflags & VM_MASK);
282 r86 = !(env->cr[0] & CR0_PE_MASK);
284 vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
285 vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
287 idx = 0;
289 /* Indexes for first 16 registers match between HV and QEMU definitions */
290 idx_next = 16;
291 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
292 vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
294 idx = idx_next;
296 /* Same goes for RIP and RFLAGS */
297 assert(whpx_register_names[idx] == WHvX64RegisterRip);
298 vcxt.values[idx++].Reg64 = env->eip;
300 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
301 vcxt.values[idx++].Reg64 = env->eflags;
303 /* Translate 6+4 segment registers. HV and QEMU order matches */
304 assert(idx == WHvX64RegisterEs);
305 for (i = 0; i < 6; i += 1, idx += 1) {
306 vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86);
309 assert(idx == WHvX64RegisterLdtr);
310 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0);
312 assert(idx == WHvX64RegisterTr);
313 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0);
315 assert(idx == WHvX64RegisterIdtr);
316 vcxt.values[idx].Table.Base = env->idt.base;
317 vcxt.values[idx].Table.Limit = env->idt.limit;
318 idx += 1;
320 assert(idx == WHvX64RegisterGdtr);
321 vcxt.values[idx].Table.Base = env->gdt.base;
322 vcxt.values[idx].Table.Limit = env->gdt.limit;
323 idx += 1;
325 /* CR0, 2, 3, 4, 8 */
326 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
327 vcxt.values[idx++].Reg64 = env->cr[0];
328 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
329 vcxt.values[idx++].Reg64 = env->cr[2];
330 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
331 vcxt.values[idx++].Reg64 = env->cr[3];
332 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
333 vcxt.values[idx++].Reg64 = env->cr[4];
334 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
335 vcxt.values[idx++].Reg64 = vcpu->tpr;
337 /* 8 Debug Registers - Skipped */
339 /* 16 XMM registers */
340 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
341 idx_next = idx + 16;
342 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
343 vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
344 vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
346 idx = idx_next;
348 /* 8 FP registers */
349 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
350 for (i = 0; i < 8; i += 1, idx += 1) {
351 vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0);
352 /* vcxt.values[idx].Fp.AsUINT128.High64 =
353 env->fpregs[i].mmx.MMX_Q(1);
357 /* FP control status register */
358 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
359 vcxt.values[idx].FpControlStatus.FpControl = env->fpuc;
360 vcxt.values[idx].FpControlStatus.FpStatus =
361 (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
362 vcxt.values[idx].FpControlStatus.FpTag = 0;
363 for (i = 0; i < 8; ++i) {
364 vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i;
366 vcxt.values[idx].FpControlStatus.Reserved = 0;
367 vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop;
368 vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip;
369 idx += 1;
371 /* XMM control status register */
372 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
373 vcxt.values[idx].XmmControlStatus.LastFpRdp = 0;
374 vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr;
375 vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff;
376 idx += 1;
378 /* MSRs */
379 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
380 vcxt.values[idx++].Reg64 = env->efer;
381 #ifdef TARGET_X86_64
382 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
383 vcxt.values[idx++].Reg64 = env->kernelgsbase;
384 #endif
386 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
387 vcxt.values[idx++].Reg64 = vcpu->apic_base;
389 /* WHvX64RegisterPat - Skipped */
391 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
392 vcxt.values[idx++].Reg64 = env->sysenter_cs;
393 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
394 vcxt.values[idx++].Reg64 = env->sysenter_eip;
395 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
396 vcxt.values[idx++].Reg64 = env->sysenter_esp;
397 assert(whpx_register_names[idx] == WHvX64RegisterStar);
398 vcxt.values[idx++].Reg64 = env->star;
399 #ifdef TARGET_X86_64
400 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
401 vcxt.values[idx++].Reg64 = env->lstar;
402 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
403 vcxt.values[idx++].Reg64 = env->cstar;
404 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
405 vcxt.values[idx++].Reg64 = env->fmask;
406 #endif
408 /* Interrupt / Event Registers - Skipped */
410 assert(idx == RTL_NUMBER_OF(whpx_register_names));
412 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
413 whpx->partition, cpu->cpu_index,
414 whpx_register_names,
415 RTL_NUMBER_OF(whpx_register_names),
416 &vcxt.values[0]);
418 if (FAILED(hr)) {
419 error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
420 hr);
423 return;
426 static int whpx_get_tsc(CPUState *cpu)
428 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
429 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
430 WHV_REGISTER_VALUE tsc_val;
431 HRESULT hr;
432 struct whpx_state *whpx = &whpx_global;
434 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
435 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
436 if (FAILED(hr)) {
437 error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
438 return -1;
441 env->tsc = tsc_val.Reg64;
442 return 0;
445 static void whpx_get_registers(CPUState *cpu)
447 struct whpx_state *whpx = &whpx_global;
448 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
449 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
450 X86CPU *x86_cpu = X86_CPU(cpu);
451 struct whpx_register_set vcxt;
452 uint64_t tpr, apic_base;
453 HRESULT hr;
454 int idx;
455 int idx_next;
456 int i;
458 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
460 if (!env->tsc_valid) {
461 whpx_get_tsc(cpu);
462 env->tsc_valid = !runstate_is_running();
465 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
466 whpx->partition, cpu->cpu_index,
467 whpx_register_names,
468 RTL_NUMBER_OF(whpx_register_names),
469 &vcxt.values[0]);
470 if (FAILED(hr)) {
471 error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
472 hr);
475 idx = 0;
477 /* Indexes for first 16 registers match between HV and QEMU definitions */
478 idx_next = 16;
479 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
480 env->regs[idx] = vcxt.values[idx].Reg64;
482 idx = idx_next;
484 /* Same goes for RIP and RFLAGS */
485 assert(whpx_register_names[idx] == WHvX64RegisterRip);
486 env->eip = vcxt.values[idx++].Reg64;
487 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
488 env->eflags = vcxt.values[idx++].Reg64;
490 /* Translate 6+4 segment registers. HV and QEMU order matches */
491 assert(idx == WHvX64RegisterEs);
492 for (i = 0; i < 6; i += 1, idx += 1) {
493 env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment);
496 assert(idx == WHvX64RegisterLdtr);
497 env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment);
498 assert(idx == WHvX64RegisterTr);
499 env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment);
500 assert(idx == WHvX64RegisterIdtr);
501 env->idt.base = vcxt.values[idx].Table.Base;
502 env->idt.limit = vcxt.values[idx].Table.Limit;
503 idx += 1;
504 assert(idx == WHvX64RegisterGdtr);
505 env->gdt.base = vcxt.values[idx].Table.Base;
506 env->gdt.limit = vcxt.values[idx].Table.Limit;
507 idx += 1;
509 /* CR0, 2, 3, 4, 8 */
510 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
511 env->cr[0] = vcxt.values[idx++].Reg64;
512 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
513 env->cr[2] = vcxt.values[idx++].Reg64;
514 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
515 env->cr[3] = vcxt.values[idx++].Reg64;
516 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
517 env->cr[4] = vcxt.values[idx++].Reg64;
518 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
519 tpr = vcxt.values[idx++].Reg64;
520 if (tpr != vcpu->tpr) {
521 vcpu->tpr = tpr;
522 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
525 /* 8 Debug Registers - Skipped */
527 /* 16 XMM registers */
528 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
529 idx_next = idx + 16;
530 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
531 env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
532 env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
534 idx = idx_next;
536 /* 8 FP registers */
537 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
538 for (i = 0; i < 8; i += 1, idx += 1) {
539 env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64;
540 /* env->fpregs[i].mmx.MMX_Q(1) =
541 vcxt.values[idx].Fp.AsUINT128.High64;
545 /* FP control status register */
546 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
547 env->fpuc = vcxt.values[idx].FpControlStatus.FpControl;
548 env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7;
549 env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800;
550 for (i = 0; i < 8; ++i) {
551 env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1);
553 env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp;
554 env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip;
555 idx += 1;
557 /* XMM control status register */
558 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
559 env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl;
560 idx += 1;
562 /* MSRs */
563 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
564 env->efer = vcxt.values[idx++].Reg64;
565 #ifdef TARGET_X86_64
566 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
567 env->kernelgsbase = vcxt.values[idx++].Reg64;
568 #endif
570 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
571 apic_base = vcxt.values[idx++].Reg64;
572 if (apic_base != vcpu->apic_base) {
573 vcpu->apic_base = apic_base;
574 cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base);
577 /* WHvX64RegisterPat - Skipped */
579 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
580 env->sysenter_cs = vcxt.values[idx++].Reg64;
581 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
582 env->sysenter_eip = vcxt.values[idx++].Reg64;
583 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
584 env->sysenter_esp = vcxt.values[idx++].Reg64;
585 assert(whpx_register_names[idx] == WHvX64RegisterStar);
586 env->star = vcxt.values[idx++].Reg64;
587 #ifdef TARGET_X86_64
588 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
589 env->lstar = vcxt.values[idx++].Reg64;
590 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
591 env->cstar = vcxt.values[idx++].Reg64;
592 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
593 env->fmask = vcxt.values[idx++].Reg64;
594 #endif
596 /* Interrupt / Event Registers - Skipped */
598 assert(idx == RTL_NUMBER_OF(whpx_register_names));
600 if (whpx_apic_in_platform()) {
601 whpx_apic_get(x86_cpu->apic_state);
604 return;
607 static HRESULT CALLBACK whpx_emu_ioport_callback(
608 void *ctx,
609 WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
611 MemTxAttrs attrs = { 0 };
612 address_space_rw(&address_space_io, IoAccess->Port, attrs,
613 &IoAccess->Data, IoAccess->AccessSize,
614 IoAccess->Direction);
615 return S_OK;
618 static HRESULT CALLBACK whpx_emu_mmio_callback(
619 void *ctx,
620 WHV_EMULATOR_MEMORY_ACCESS_INFO *ma)
622 cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize,
623 ma->Direction);
624 return S_OK;
627 static HRESULT CALLBACK whpx_emu_getreg_callback(
628 void *ctx,
629 const WHV_REGISTER_NAME *RegisterNames,
630 UINT32 RegisterCount,
631 WHV_REGISTER_VALUE *RegisterValues)
633 HRESULT hr;
634 struct whpx_state *whpx = &whpx_global;
635 CPUState *cpu = (CPUState *)ctx;
637 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
638 whpx->partition, cpu->cpu_index,
639 RegisterNames, RegisterCount,
640 RegisterValues);
641 if (FAILED(hr)) {
642 error_report("WHPX: Failed to get virtual processor registers,"
643 " hr=%08lx", hr);
646 return hr;
649 static HRESULT CALLBACK whpx_emu_setreg_callback(
650 void *ctx,
651 const WHV_REGISTER_NAME *RegisterNames,
652 UINT32 RegisterCount,
653 const WHV_REGISTER_VALUE *RegisterValues)
655 HRESULT hr;
656 struct whpx_state *whpx = &whpx_global;
657 CPUState *cpu = (CPUState *)ctx;
659 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
660 whpx->partition, cpu->cpu_index,
661 RegisterNames, RegisterCount,
662 RegisterValues);
663 if (FAILED(hr)) {
664 error_report("WHPX: Failed to set virtual processor registers,"
665 " hr=%08lx", hr);
669 * The emulator just successfully wrote the register state. We clear the
670 * dirty state so we avoid the double write on resume of the VP.
672 cpu->vcpu_dirty = false;
674 return hr;
677 static HRESULT CALLBACK whpx_emu_translate_callback(
678 void *ctx,
679 WHV_GUEST_VIRTUAL_ADDRESS Gva,
680 WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
681 WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult,
682 WHV_GUEST_PHYSICAL_ADDRESS *Gpa)
684 HRESULT hr;
685 struct whpx_state *whpx = &whpx_global;
686 CPUState *cpu = (CPUState *)ctx;
687 WHV_TRANSLATE_GVA_RESULT res;
689 hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
690 Gva, TranslateFlags, &res, Gpa);
691 if (FAILED(hr)) {
692 error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
693 } else {
694 *TranslationResult = res.ResultCode;
697 return hr;
700 static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = {
701 .Size = sizeof(WHV_EMULATOR_CALLBACKS),
702 .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback,
703 .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback,
704 .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback,
705 .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback,
706 .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback,
709 static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
711 HRESULT hr;
712 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
713 WHV_EMULATOR_STATUS emu_status;
715 hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
716 vcpu->emulator, cpu,
717 &vcpu->exit_ctx.VpContext, ctx,
718 &emu_status);
719 if (FAILED(hr)) {
720 error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
721 return -1;
724 if (!emu_status.EmulationSuccessful) {
725 error_report("WHPX: Failed to emulate MMIO access with"
726 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
727 return -1;
730 return 0;
733 static int whpx_handle_portio(CPUState *cpu,
734 WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx)
736 HRESULT hr;
737 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
738 WHV_EMULATOR_STATUS emu_status;
740 hr = whp_dispatch.WHvEmulatorTryIoEmulation(
741 vcpu->emulator, cpu,
742 &vcpu->exit_ctx.VpContext, ctx,
743 &emu_status);
744 if (FAILED(hr)) {
745 error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
746 return -1;
749 if (!emu_status.EmulationSuccessful) {
750 error_report("WHPX: Failed to emulate PortIO access with"
751 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
752 return -1;
755 return 0;
758 static int whpx_handle_halt(CPUState *cpu)
760 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
761 int ret = 0;
763 qemu_mutex_lock_iothread();
764 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
765 (env->eflags & IF_MASK)) &&
766 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
767 cpu->exception_index = EXCP_HLT;
768 cpu->halted = true;
769 ret = 1;
771 qemu_mutex_unlock_iothread();
773 return ret;
776 static void whpx_vcpu_pre_run(CPUState *cpu)
778 HRESULT hr;
779 struct whpx_state *whpx = &whpx_global;
780 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
781 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
782 X86CPU *x86_cpu = X86_CPU(cpu);
783 int irq;
784 uint8_t tpr;
785 WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
786 UINT32 reg_count = 0;
787 WHV_REGISTER_VALUE reg_values[3];
788 WHV_REGISTER_NAME reg_names[3];
790 memset(&new_int, 0, sizeof(new_int));
791 memset(reg_values, 0, sizeof(reg_values));
793 qemu_mutex_lock_iothread();
795 /* Inject NMI */
796 if (!vcpu->interruption_pending &&
797 cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
798 if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
799 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
800 vcpu->interruptable = false;
801 new_int.InterruptionType = WHvX64PendingNmi;
802 new_int.InterruptionPending = 1;
803 new_int.InterruptionVector = 2;
805 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
806 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
811 * Force the VCPU out of its inner loop to process any INIT requests or
812 * commit pending TPR access.
814 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
815 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
816 !(env->hflags & HF_SMM_MASK)) {
817 cpu->exit_request = 1;
819 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
820 cpu->exit_request = 1;
824 /* Get pending hard interruption or replay one that was overwritten */
825 if (!whpx_apic_in_platform()) {
826 if (!vcpu->interruption_pending &&
827 vcpu->interruptable && (env->eflags & IF_MASK)) {
828 assert(!new_int.InterruptionPending);
829 if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
830 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
831 irq = cpu_get_pic_interrupt(env);
832 if (irq >= 0) {
833 new_int.InterruptionType = WHvX64PendingInterrupt;
834 new_int.InterruptionPending = 1;
835 new_int.InterruptionVector = irq;
840 /* Setup interrupt state if new one was prepared */
841 if (new_int.InterruptionPending) {
842 reg_values[reg_count].PendingInterruption = new_int;
843 reg_names[reg_count] = WHvRegisterPendingInterruption;
844 reg_count += 1;
846 } else if (vcpu->ready_for_pic_interrupt &&
847 (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
848 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
849 irq = cpu_get_pic_interrupt(env);
850 if (irq >= 0) {
851 reg_names[reg_count] = WHvRegisterPendingEvent;
852 reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT)
854 .EventPending = 1,
855 .EventType = WHvX64PendingEventExtInt,
856 .Vector = irq,
858 reg_count += 1;
862 /* Sync the TPR to the CR8 if was modified during the intercept */
863 tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
864 if (tpr != vcpu->tpr) {
865 vcpu->tpr = tpr;
866 reg_values[reg_count].Reg64 = tpr;
867 cpu->exit_request = 1;
868 reg_names[reg_count] = WHvX64RegisterCr8;
869 reg_count += 1;
872 /* Update the state of the interrupt delivery notification */
873 if (!vcpu->window_registered &&
874 cpu->interrupt_request & CPU_INTERRUPT_HARD) {
875 reg_values[reg_count].DeliverabilityNotifications =
876 (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) {
877 .InterruptNotification = 1
879 vcpu->window_registered = 1;
880 reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications;
881 reg_count += 1;
884 qemu_mutex_unlock_iothread();
885 vcpu->ready_for_pic_interrupt = false;
887 if (reg_count) {
888 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
889 whpx->partition, cpu->cpu_index,
890 reg_names, reg_count, reg_values);
891 if (FAILED(hr)) {
892 error_report("WHPX: Failed to set interrupt state registers,"
893 " hr=%08lx", hr);
897 return;
900 static void whpx_vcpu_post_run(CPUState *cpu)
902 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
903 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
904 X86CPU *x86_cpu = X86_CPU(cpu);
906 env->eflags = vcpu->exit_ctx.VpContext.Rflags;
908 uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8;
909 if (vcpu->tpr != tpr) {
910 vcpu->tpr = tpr;
911 qemu_mutex_lock_iothread();
912 cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr);
913 qemu_mutex_unlock_iothread();
916 vcpu->interruption_pending =
917 vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending;
919 vcpu->interruptable =
920 !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow;
922 return;
925 static void whpx_vcpu_process_async_events(CPUState *cpu)
927 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
928 X86CPU *x86_cpu = X86_CPU(cpu);
929 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
931 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
932 !(env->hflags & HF_SMM_MASK)) {
933 whpx_cpu_synchronize_state(cpu);
934 do_cpu_init(x86_cpu);
935 vcpu->interruptable = true;
938 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
939 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
940 apic_poll_irq(x86_cpu->apic_state);
943 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
944 (env->eflags & IF_MASK)) ||
945 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
946 cpu->halted = false;
949 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
950 whpx_cpu_synchronize_state(cpu);
951 do_cpu_sipi(x86_cpu);
954 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
955 cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
956 whpx_cpu_synchronize_state(cpu);
957 apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
958 env->tpr_access_type);
961 return;
964 static int whpx_vcpu_run(CPUState *cpu)
966 HRESULT hr;
967 struct whpx_state *whpx = &whpx_global;
968 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
969 int ret;
971 whpx_vcpu_process_async_events(cpu);
972 if (cpu->halted && !whpx_apic_in_platform()) {
973 cpu->exception_index = EXCP_HLT;
974 qatomic_set(&cpu->exit_request, false);
975 return 0;
978 qemu_mutex_unlock_iothread();
979 cpu_exec_start(cpu);
981 do {
982 if (cpu->vcpu_dirty) {
983 whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
984 cpu->vcpu_dirty = false;
987 whpx_vcpu_pre_run(cpu);
989 if (qatomic_read(&cpu->exit_request)) {
990 whpx_vcpu_kick(cpu);
993 hr = whp_dispatch.WHvRunVirtualProcessor(
994 whpx->partition, cpu->cpu_index,
995 &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
997 if (FAILED(hr)) {
998 error_report("WHPX: Failed to exec a virtual processor,"
999 " hr=%08lx", hr);
1000 ret = -1;
1001 break;
1004 whpx_vcpu_post_run(cpu);
1006 switch (vcpu->exit_ctx.ExitReason) {
1007 case WHvRunVpExitReasonMemoryAccess:
1008 ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess);
1009 break;
1011 case WHvRunVpExitReasonX64IoPortAccess:
1012 ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess);
1013 break;
1015 case WHvRunVpExitReasonX64InterruptWindow:
1016 vcpu->ready_for_pic_interrupt = 1;
1017 vcpu->window_registered = 0;
1018 ret = 0;
1019 break;
1021 case WHvRunVpExitReasonX64ApicEoi:
1022 assert(whpx_apic_in_platform());
1023 ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector);
1024 break;
1026 case WHvRunVpExitReasonX64Halt:
1027 ret = whpx_handle_halt(cpu);
1028 break;
1030 case WHvRunVpExitReasonX64ApicInitSipiTrap: {
1031 WHV_INTERRUPT_CONTROL ipi = {0};
1032 uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr;
1033 uint32_t delivery_mode =
1034 (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT;
1035 int dest_shorthand =
1036 (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT;
1037 bool broadcast = false;
1038 bool include_self = false;
1039 uint32_t i;
1041 /* We only registered for INIT and SIPI exits. */
1042 if ((delivery_mode != APIC_DM_INIT) &&
1043 (delivery_mode != APIC_DM_SIPI)) {
1044 error_report(
1045 "WHPX: Unexpected APIC exit that is not a INIT or SIPI");
1046 break;
1049 if (delivery_mode == APIC_DM_INIT) {
1050 ipi.Type = WHvX64InterruptTypeInit;
1051 } else {
1052 ipi.Type = WHvX64InterruptTypeSipi;
1055 ipi.DestinationMode =
1056 ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ?
1057 WHvX64InterruptDestinationModeLogical :
1058 WHvX64InterruptDestinationModePhysical;
1060 ipi.TriggerMode =
1061 ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ?
1062 WHvX64InterruptTriggerModeLevel :
1063 WHvX64InterruptTriggerModeEdge;
1065 ipi.Vector = icr & APIC_VECTOR_MASK;
1066 switch (dest_shorthand) {
1067 /* no shorthand. Bits 56-63 contain the destination. */
1068 case 0:
1069 ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK;
1070 hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
1071 &ipi, sizeof(ipi));
1072 if (FAILED(hr)) {
1073 error_report("WHPX: Failed to request interrupt hr=%08lx",
1074 hr);
1077 break;
1079 /* self */
1080 case 1:
1081 include_self = true;
1082 break;
1084 /* broadcast, including self */
1085 case 2:
1086 broadcast = true;
1087 include_self = true;
1088 break;
1090 /* broadcast, excluding self */
1091 case 3:
1092 broadcast = true;
1093 break;
1096 if (!broadcast && !include_self) {
1097 break;
1100 for (i = 0; i <= max_vcpu_index; i++) {
1101 if (i == cpu->cpu_index && !include_self) {
1102 continue;
1106 * Assuming that APIC Ids are identity mapped since
1107 * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers
1108 * are not handled yet and the hypervisor doesn't allow the
1109 * guest to modify the APIC ID.
1111 ipi.Destination = i;
1112 hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
1113 &ipi, sizeof(ipi));
1114 if (FAILED(hr)) {
1115 error_report(
1116 "WHPX: Failed to request SIPI for %d, hr=%08lx",
1117 i, hr);
1121 break;
1124 case WHvRunVpExitReasonCanceled:
1125 cpu->exception_index = EXCP_INTERRUPT;
1126 ret = 1;
1127 break;
1129 case WHvRunVpExitReasonX64MsrAccess: {
1130 WHV_REGISTER_VALUE reg_values[3] = {0};
1131 WHV_REGISTER_NAME reg_names[3];
1132 UINT32 reg_count;
1134 reg_names[0] = WHvX64RegisterRip;
1135 reg_names[1] = WHvX64RegisterRax;
1136 reg_names[2] = WHvX64RegisterRdx;
1138 reg_values[0].Reg64 =
1139 vcpu->exit_ctx.VpContext.Rip +
1140 vcpu->exit_ctx.VpContext.InstructionLength;
1143 * For all unsupported MSR access we:
1144 * ignore writes
1145 * return 0 on read.
1147 reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ?
1148 1 : 3;
1150 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1151 whpx->partition,
1152 cpu->cpu_index,
1153 reg_names, reg_count,
1154 reg_values);
1156 if (FAILED(hr)) {
1157 error_report("WHPX: Failed to set MsrAccess state "
1158 " registers, hr=%08lx", hr);
1160 ret = 0;
1161 break;
1163 case WHvRunVpExitReasonX64Cpuid: {
1164 WHV_REGISTER_VALUE reg_values[5];
1165 WHV_REGISTER_NAME reg_names[5];
1166 UINT32 reg_count = 5;
1167 UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0;
1168 X86CPU *x86_cpu = X86_CPU(cpu);
1169 CPUX86State *env = &x86_cpu->env;
1171 memset(reg_values, 0, sizeof(reg_values));
1173 rip = vcpu->exit_ctx.VpContext.Rip +
1174 vcpu->exit_ctx.VpContext.InstructionLength;
1175 cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax;
1178 * Ideally, these should be supplied to the hypervisor during VCPU
1179 * initialization and it should be able to satisfy this request.
1180 * But, currently, WHPX doesn't support setting CPUID values in the
1181 * hypervisor once the partition has been setup, which is too late
1182 * since VCPUs are realized later. For now, use the values from
1183 * QEMU to satisfy these requests, until WHPX adds support for
1184 * being able to set these values in the hypervisor at runtime.
1186 cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx,
1187 (UINT32 *)&rcx, (UINT32 *)&rdx);
1188 switch (cpuid_fn) {
1189 case 0x40000000:
1190 /* Expose the vmware cpu frequency cpuid leaf */
1191 rax = 0x40000010;
1192 rbx = rcx = rdx = 0;
1193 break;
1195 case 0x40000010:
1196 rax = env->tsc_khz;
1197 rbx = env->apic_bus_freq / 1000; /* Hz to KHz */
1198 rcx = rdx = 0;
1199 break;
1201 case 0x80000001:
1202 /* Remove any support of OSVW */
1203 rcx &= ~CPUID_EXT3_OSVW;
1204 break;
1207 reg_names[0] = WHvX64RegisterRip;
1208 reg_names[1] = WHvX64RegisterRax;
1209 reg_names[2] = WHvX64RegisterRcx;
1210 reg_names[3] = WHvX64RegisterRdx;
1211 reg_names[4] = WHvX64RegisterRbx;
1213 reg_values[0].Reg64 = rip;
1214 reg_values[1].Reg64 = rax;
1215 reg_values[2].Reg64 = rcx;
1216 reg_values[3].Reg64 = rdx;
1217 reg_values[4].Reg64 = rbx;
1219 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1220 whpx->partition, cpu->cpu_index,
1221 reg_names,
1222 reg_count,
1223 reg_values);
1225 if (FAILED(hr)) {
1226 error_report("WHPX: Failed to set CpuidAccess state registers,"
1227 " hr=%08lx", hr);
1229 ret = 0;
1230 break;
1232 case WHvRunVpExitReasonNone:
1233 case WHvRunVpExitReasonUnrecoverableException:
1234 case WHvRunVpExitReasonInvalidVpRegisterValue:
1235 case WHvRunVpExitReasonUnsupportedFeature:
1236 case WHvRunVpExitReasonException:
1237 default:
1238 error_report("WHPX: Unexpected VP exit code %d",
1239 vcpu->exit_ctx.ExitReason);
1240 whpx_get_registers(cpu);
1241 qemu_mutex_lock_iothread();
1242 qemu_system_guest_panicked(cpu_get_crash_info(cpu));
1243 qemu_mutex_unlock_iothread();
1244 break;
1247 } while (!ret);
1249 cpu_exec_end(cpu);
1250 qemu_mutex_lock_iothread();
1251 current_cpu = cpu;
1253 qatomic_set(&cpu->exit_request, false);
1255 return ret < 0;
1258 static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
1260 if (!cpu->vcpu_dirty) {
1261 whpx_get_registers(cpu);
1262 cpu->vcpu_dirty = true;
1266 static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
1267 run_on_cpu_data arg)
1269 whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
1270 cpu->vcpu_dirty = false;
1273 static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
1274 run_on_cpu_data arg)
1276 whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
1277 cpu->vcpu_dirty = false;
1280 static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
1281 run_on_cpu_data arg)
1283 cpu->vcpu_dirty = true;
1287 * CPU support.
1290 void whpx_cpu_synchronize_state(CPUState *cpu)
1292 if (!cpu->vcpu_dirty) {
1293 run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
1297 void whpx_cpu_synchronize_post_reset(CPUState *cpu)
1299 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
1302 void whpx_cpu_synchronize_post_init(CPUState *cpu)
1304 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
1307 void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
1309 run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
1313 * Vcpu support.
1316 static Error *whpx_migration_blocker;
1318 static void whpx_cpu_update_state(void *opaque, int running, RunState state)
1320 CPUX86State *env = opaque;
1322 if (running) {
1323 env->tsc_valid = false;
1327 int whpx_init_vcpu(CPUState *cpu)
1329 HRESULT hr;
1330 struct whpx_state *whpx = &whpx_global;
1331 struct whpx_vcpu *vcpu = NULL;
1332 Error *local_error = NULL;
1333 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
1334 X86CPU *x86_cpu = X86_CPU(cpu);
1335 UINT64 freq = 0;
1336 int ret;
1338 /* Add migration blockers for all unsupported features of the
1339 * Windows Hypervisor Platform
1341 if (whpx_migration_blocker == NULL) {
1342 error_setg(&whpx_migration_blocker,
1343 "State blocked due to non-migratable CPUID feature support,"
1344 "dirty memory tracking support, and XSAVE/XRSTOR support");
1346 (void)migrate_add_blocker(whpx_migration_blocker, &local_error);
1347 if (local_error) {
1348 error_report_err(local_error);
1349 migrate_del_blocker(whpx_migration_blocker);
1350 error_free(whpx_migration_blocker);
1351 ret = -EINVAL;
1352 goto error;
1356 vcpu = g_malloc0(sizeof(struct whpx_vcpu));
1358 if (!vcpu) {
1359 error_report("WHPX: Failed to allocte VCPU context.");
1360 ret = -ENOMEM;
1361 goto error;
1364 hr = whp_dispatch.WHvEmulatorCreateEmulator(
1365 &whpx_emu_callbacks,
1366 &vcpu->emulator);
1367 if (FAILED(hr)) {
1368 error_report("WHPX: Failed to setup instruction completion support,"
1369 " hr=%08lx", hr);
1370 ret = -EINVAL;
1371 goto error;
1374 hr = whp_dispatch.WHvCreateVirtualProcessor(
1375 whpx->partition, cpu->cpu_index, 0);
1376 if (FAILED(hr)) {
1377 error_report("WHPX: Failed to create a virtual processor,"
1378 " hr=%08lx", hr);
1379 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1380 ret = -EINVAL;
1381 goto error;
1385 * vcpu's TSC frequency is either specified by user, or use the value
1386 * provided by Hyper-V if the former is not present. In the latter case, we
1387 * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
1388 * frequency can be migrated later via this field.
1390 if (!env->tsc_khz) {
1391 hr = whp_dispatch.WHvGetCapability(
1392 WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq),
1393 NULL);
1394 if (hr != WHV_E_UNKNOWN_CAPABILITY) {
1395 if (FAILED(hr)) {
1396 printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr);
1397 } else {
1398 env->tsc_khz = freq / 1000; /* Hz to KHz */
1403 env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY;
1404 hr = whp_dispatch.WHvGetCapability(
1405 WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL);
1406 if (hr != WHV_E_UNKNOWN_CAPABILITY) {
1407 if (FAILED(hr)) {
1408 printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr);
1409 } else {
1410 env->apic_bus_freq = freq;
1415 * If the vmware cpuid frequency leaf option is set, and we have a valid
1416 * tsc value, trap the corresponding cpuid's.
1418 if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) {
1419 UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010};
1421 hr = whp_dispatch.WHvSetPartitionProperty(
1422 whpx->partition,
1423 WHvPartitionPropertyCodeCpuidExitList,
1424 cpuidExitList,
1425 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
1427 if (FAILED(hr)) {
1428 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1429 hr);
1430 ret = -EINVAL;
1431 goto error;
1435 vcpu->interruptable = true;
1436 cpu->vcpu_dirty = true;
1437 cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
1438 max_vcpu_index = max(max_vcpu_index, cpu->cpu_index);
1439 qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
1441 return 0;
1443 error:
1444 g_free(vcpu);
1446 return ret;
1449 int whpx_vcpu_exec(CPUState *cpu)
1451 int ret;
1452 int fatal;
1454 for (;;) {
1455 if (cpu->exception_index >= EXCP_INTERRUPT) {
1456 ret = cpu->exception_index;
1457 cpu->exception_index = -1;
1458 break;
1461 fatal = whpx_vcpu_run(cpu);
1463 if (fatal) {
1464 error_report("WHPX: Failed to exec a virtual processor");
1465 abort();
1469 return ret;
1472 void whpx_destroy_vcpu(CPUState *cpu)
1474 struct whpx_state *whpx = &whpx_global;
1475 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
1477 whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
1478 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1479 g_free(cpu->hax_vcpu);
1480 return;
1483 void whpx_vcpu_kick(CPUState *cpu)
1485 struct whpx_state *whpx = &whpx_global;
1486 whp_dispatch.WHvCancelRunVirtualProcessor(
1487 whpx->partition, cpu->cpu_index, 0);
1491 * Memory support.
1494 static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
1495 void *host_va, int add, int rom,
1496 const char *name)
1498 struct whpx_state *whpx = &whpx_global;
1499 HRESULT hr;
1502 if (add) {
1503 printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
1504 (void*)start_pa, (void*)size, host_va,
1505 (rom ? "ROM" : "RAM"), name);
1506 } else {
1507 printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n",
1508 (void*)start_pa, (void*)size, host_va, name);
1512 if (add) {
1513 hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
1514 host_va,
1515 start_pa,
1516 size,
1517 (WHvMapGpaRangeFlagRead |
1518 WHvMapGpaRangeFlagExecute |
1519 (rom ? 0 : WHvMapGpaRangeFlagWrite)));
1520 } else {
1521 hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
1522 start_pa,
1523 size);
1526 if (FAILED(hr)) {
1527 error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
1528 " Host:%p, hr=%08lx",
1529 (add ? "MAP" : "UNMAP"), name,
1530 (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
1534 static void whpx_process_section(MemoryRegionSection *section, int add)
1536 MemoryRegion *mr = section->mr;
1537 hwaddr start_pa = section->offset_within_address_space;
1538 ram_addr_t size = int128_get64(section->size);
1539 unsigned int delta;
1540 uint64_t host_va;
1542 if (!memory_region_is_ram(mr)) {
1543 return;
1546 delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
1547 delta &= ~qemu_real_host_page_mask;
1548 if (delta > size) {
1549 return;
1551 start_pa += delta;
1552 size -= delta;
1553 size &= qemu_real_host_page_mask;
1554 if (!size || (start_pa & ~qemu_real_host_page_mask)) {
1555 return;
1558 host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
1559 + section->offset_within_region + delta;
1561 whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
1562 memory_region_is_rom(mr), mr->name);
1565 static void whpx_region_add(MemoryListener *listener,
1566 MemoryRegionSection *section)
1568 memory_region_ref(section->mr);
1569 whpx_process_section(section, 1);
1572 static void whpx_region_del(MemoryListener *listener,
1573 MemoryRegionSection *section)
1575 whpx_process_section(section, 0);
1576 memory_region_unref(section->mr);
1579 static void whpx_transaction_begin(MemoryListener *listener)
1583 static void whpx_transaction_commit(MemoryListener *listener)
1587 static void whpx_log_sync(MemoryListener *listener,
1588 MemoryRegionSection *section)
1590 MemoryRegion *mr = section->mr;
1592 if (!memory_region_is_ram(mr)) {
1593 return;
1596 memory_region_set_dirty(mr, 0, int128_get64(section->size));
1599 static MemoryListener whpx_memory_listener = {
1600 .begin = whpx_transaction_begin,
1601 .commit = whpx_transaction_commit,
1602 .region_add = whpx_region_add,
1603 .region_del = whpx_region_del,
1604 .log_sync = whpx_log_sync,
1605 .priority = 10,
1608 static void whpx_memory_init(void)
1610 memory_listener_register(&whpx_memory_listener, &address_space_memory);
1614 * Load the functions from the given library, using the given handle. If a
1615 * handle is provided, it is used, otherwise the library is opened. The
1616 * handle will be updated on return with the opened one.
1618 static bool load_whp_dispatch_fns(HMODULE *handle,
1619 WHPFunctionList function_list)
1621 HMODULE hLib = *handle;
1623 #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
1624 #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
1625 #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
1626 whp_dispatch.function_name = \
1627 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1629 #define WHP_LOAD_FIELD(return_type, function_name, signature) \
1630 whp_dispatch.function_name = \
1631 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1632 if (!whp_dispatch.function_name) { \
1633 error_report("Could not load function %s", #function_name); \
1634 goto error; \
1637 #define WHP_LOAD_LIB(lib_name, handle_lib) \
1638 if (!handle_lib) { \
1639 handle_lib = LoadLibrary(lib_name); \
1640 if (!handle_lib) { \
1641 error_report("Could not load library %s.", lib_name); \
1642 goto error; \
1646 switch (function_list) {
1647 case WINHV_PLATFORM_FNS_DEFAULT:
1648 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1649 LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
1650 break;
1652 case WINHV_EMULATION_FNS_DEFAULT:
1653 WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
1654 LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
1655 break;
1657 case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
1658 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1659 LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
1660 break;
1663 *handle = hLib;
1664 return true;
1666 error:
1667 if (hLib) {
1668 FreeLibrary(hLib);
1671 return false;
1674 static void whpx_set_kernel_irqchip(Object *obj, Visitor *v,
1675 const char *name, void *opaque,
1676 Error **errp)
1678 struct whpx_state *whpx = &whpx_global;
1679 OnOffSplit mode;
1681 if (!visit_type_OnOffSplit(v, name, &mode, errp)) {
1682 return;
1685 switch (mode) {
1686 case ON_OFF_SPLIT_ON:
1687 whpx->kernel_irqchip_allowed = true;
1688 whpx->kernel_irqchip_required = true;
1689 break;
1691 case ON_OFF_SPLIT_OFF:
1692 whpx->kernel_irqchip_allowed = false;
1693 whpx->kernel_irqchip_required = false;
1694 break;
1696 case ON_OFF_SPLIT_SPLIT:
1697 error_setg(errp, "WHPX: split irqchip currently not supported");
1698 error_append_hint(errp,
1699 "Try without kernel-irqchip or with kernel-irqchip=on|off");
1700 break;
1702 default:
1704 * The value was checked in visit_type_OnOffSplit() above. If
1705 * we get here, then something is wrong in QEMU.
1707 abort();
1712 * Partition support
1715 static int whpx_accel_init(MachineState *ms)
1717 struct whpx_state *whpx;
1718 int ret;
1719 HRESULT hr;
1720 WHV_CAPABILITY whpx_cap;
1721 UINT32 whpx_cap_size;
1722 WHV_PARTITION_PROPERTY prop;
1723 UINT32 cpuidExitList[] = {1, 0x80000001};
1724 WHV_CAPABILITY_FEATURES features = {0};
1726 whpx = &whpx_global;
1728 if (!init_whp_dispatch()) {
1729 ret = -ENOSYS;
1730 goto error;
1733 whpx->mem_quota = ms->ram_size;
1735 hr = whp_dispatch.WHvGetCapability(
1736 WHvCapabilityCodeHypervisorPresent, &whpx_cap,
1737 sizeof(whpx_cap), &whpx_cap_size);
1738 if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
1739 error_report("WHPX: No accelerator found, hr=%08lx", hr);
1740 ret = -ENOSPC;
1741 goto error;
1744 hr = whp_dispatch.WHvGetCapability(
1745 WHvCapabilityCodeFeatures, &features, sizeof(features), NULL);
1746 if (FAILED(hr)) {
1747 error_report("WHPX: Failed to query capabilities, hr=%08lx", hr);
1748 ret = -EINVAL;
1749 goto error;
1752 hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
1753 if (FAILED(hr)) {
1754 error_report("WHPX: Failed to create partition, hr=%08lx", hr);
1755 ret = -EINVAL;
1756 goto error;
1759 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1760 prop.ProcessorCount = ms->smp.cpus;
1761 hr = whp_dispatch.WHvSetPartitionProperty(
1762 whpx->partition,
1763 WHvPartitionPropertyCodeProcessorCount,
1764 &prop,
1765 sizeof(WHV_PARTITION_PROPERTY));
1767 if (FAILED(hr)) {
1768 error_report("WHPX: Failed to set partition core count to %d,"
1769 " hr=%08lx", ms->smp.cores, hr);
1770 ret = -EINVAL;
1771 goto error;
1775 * Error out if WHP doesn't support apic emulation and user is requiring
1776 * it.
1778 if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation ||
1779 !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) {
1780 error_report("WHPX: kernel irqchip requested, but unavailable. "
1781 "Try without kernel-irqchip or with kernel-irqchip=off");
1782 ret = -EINVAL;
1783 goto error;
1786 if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation &&
1787 whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) {
1788 WHV_X64_LOCAL_APIC_EMULATION_MODE mode =
1789 WHvX64LocalApicEmulationModeXApic;
1790 printf("WHPX: setting APIC emulation mode in the hypervisor\n");
1791 hr = whp_dispatch.WHvSetPartitionProperty(
1792 whpx->partition,
1793 WHvPartitionPropertyCodeLocalApicEmulationMode,
1794 &mode,
1795 sizeof(mode));
1796 if (FAILED(hr)) {
1797 error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr);
1798 if (whpx->kernel_irqchip_required) {
1799 error_report("WHPX: kernel irqchip requested, but unavailable");
1800 ret = -EINVAL;
1801 goto error;
1803 } else {
1804 whpx->apic_in_platform = true;
1808 /* Register for MSR and CPUID exits */
1809 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1810 prop.ExtendedVmExits.X64MsrExit = 1;
1811 prop.ExtendedVmExits.X64CpuidExit = 1;
1812 if (whpx_apic_in_platform()) {
1813 prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1;
1816 hr = whp_dispatch.WHvSetPartitionProperty(
1817 whpx->partition,
1818 WHvPartitionPropertyCodeExtendedVmExits,
1819 &prop,
1820 sizeof(WHV_PARTITION_PROPERTY));
1821 if (FAILED(hr)) {
1822 error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr);
1823 ret = -EINVAL;
1824 goto error;
1827 hr = whp_dispatch.WHvSetPartitionProperty(
1828 whpx->partition,
1829 WHvPartitionPropertyCodeCpuidExitList,
1830 cpuidExitList,
1831 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
1833 if (FAILED(hr)) {
1834 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1835 hr);
1836 ret = -EINVAL;
1837 goto error;
1840 hr = whp_dispatch.WHvSetupPartition(whpx->partition);
1841 if (FAILED(hr)) {
1842 error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
1843 ret = -EINVAL;
1844 goto error;
1847 whpx_memory_init();
1849 cpus_register_accel(&whpx_cpus);
1851 printf("Windows Hypervisor Platform accelerator is operational\n");
1852 return 0;
1854 error:
1856 if (NULL != whpx->partition) {
1857 whp_dispatch.WHvDeletePartition(whpx->partition);
1858 whpx->partition = NULL;
1861 return ret;
1864 int whpx_enabled(void)
1866 return whpx_allowed;
1869 bool whpx_apic_in_platform(void) {
1870 return whpx_global.apic_in_platform;
1873 static void whpx_accel_class_init(ObjectClass *oc, void *data)
1875 AccelClass *ac = ACCEL_CLASS(oc);
1876 ac->name = "WHPX";
1877 ac->init_machine = whpx_accel_init;
1878 ac->allowed = &whpx_allowed;
1880 object_class_property_add(oc, "kernel-irqchip", "on|off|split",
1881 NULL, whpx_set_kernel_irqchip,
1882 NULL, NULL);
1883 object_class_property_set_description(oc, "kernel-irqchip",
1884 "Configure WHPX in-kernel irqchip");
1887 static void whpx_accel_instance_init(Object *obj)
1889 struct whpx_state *whpx = &whpx_global;
1891 memset(whpx, 0, sizeof(struct whpx_state));
1892 /* Turn on kernel-irqchip, by default */
1893 whpx->kernel_irqchip_allowed = true;
1896 static const TypeInfo whpx_accel_type = {
1897 .name = ACCEL_CLASS_NAME("whpx"),
1898 .parent = TYPE_ACCEL,
1899 .instance_init = whpx_accel_instance_init,
1900 .class_init = whpx_accel_class_init,
1903 static void whpx_type_init(void)
1905 type_register_static(&whpx_accel_type);
1908 bool init_whp_dispatch(void)
1910 if (whp_dispatch_initialized) {
1911 return true;
1914 if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
1915 goto error;
1918 if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
1919 goto error;
1922 assert(load_whp_dispatch_fns(&hWinHvPlatform,
1923 WINHV_PLATFORM_FNS_SUPPLEMENTAL));
1924 whp_dispatch_initialized = true;
1926 return true;
1927 error:
1928 if (hWinHvPlatform) {
1929 FreeLibrary(hWinHvPlatform);
1932 if (hWinHvEmulation) {
1933 FreeLibrary(hWinHvEmulation);
1936 return false;
1939 type_init(whpx_type_init);