target/arm: Let vfp_access_check() handle late NOCP checks
[qemu/ar7.git] / target / i386 / whpx / whpx-all.c
blobf832f286ac3d957b634cde5c998fdbb22b4672fd
1 /*
2 * QEMU Windows Hypervisor Platform accelerator (WHPX)
4 * Copyright Microsoft Corp. 2017
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
9 */
11 #include "qemu/osdep.h"
12 #include "cpu.h"
13 #include "exec/address-spaces.h"
14 #include "exec/ioport.h"
15 #include "qemu-common.h"
16 #include "qemu/accel.h"
17 #include "sysemu/whpx.h"
18 #include "sysemu/cpus.h"
19 #include "sysemu/runstate.h"
20 #include "qemu/main-loop.h"
21 #include "hw/boards.h"
22 #include "hw/i386/ioapic.h"
23 #include "hw/i386/apic_internal.h"
24 #include "qemu/error-report.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-types-common.h"
27 #include "qapi/qapi-visit-common.h"
28 #include "migration/blocker.h"
29 #include <winerror.h>
31 #include "whpx-internal.h"
32 #include "whpx-accel-ops.h"
34 #include <WinHvPlatform.h>
35 #include <WinHvEmulation.h>
37 #define HYPERV_APIC_BUS_FREQUENCY (200000000ULL)
39 static const WHV_REGISTER_NAME whpx_register_names[] = {
41 /* X64 General purpose registers */
42 WHvX64RegisterRax,
43 WHvX64RegisterRcx,
44 WHvX64RegisterRdx,
45 WHvX64RegisterRbx,
46 WHvX64RegisterRsp,
47 WHvX64RegisterRbp,
48 WHvX64RegisterRsi,
49 WHvX64RegisterRdi,
50 WHvX64RegisterR8,
51 WHvX64RegisterR9,
52 WHvX64RegisterR10,
53 WHvX64RegisterR11,
54 WHvX64RegisterR12,
55 WHvX64RegisterR13,
56 WHvX64RegisterR14,
57 WHvX64RegisterR15,
58 WHvX64RegisterRip,
59 WHvX64RegisterRflags,
61 /* X64 Segment registers */
62 WHvX64RegisterEs,
63 WHvX64RegisterCs,
64 WHvX64RegisterSs,
65 WHvX64RegisterDs,
66 WHvX64RegisterFs,
67 WHvX64RegisterGs,
68 WHvX64RegisterLdtr,
69 WHvX64RegisterTr,
71 /* X64 Table registers */
72 WHvX64RegisterIdtr,
73 WHvX64RegisterGdtr,
75 /* X64 Control Registers */
76 WHvX64RegisterCr0,
77 WHvX64RegisterCr2,
78 WHvX64RegisterCr3,
79 WHvX64RegisterCr4,
80 WHvX64RegisterCr8,
82 /* X64 Debug Registers */
84 * WHvX64RegisterDr0,
85 * WHvX64RegisterDr1,
86 * WHvX64RegisterDr2,
87 * WHvX64RegisterDr3,
88 * WHvX64RegisterDr6,
89 * WHvX64RegisterDr7,
92 /* X64 Floating Point and Vector Registers */
93 WHvX64RegisterXmm0,
94 WHvX64RegisterXmm1,
95 WHvX64RegisterXmm2,
96 WHvX64RegisterXmm3,
97 WHvX64RegisterXmm4,
98 WHvX64RegisterXmm5,
99 WHvX64RegisterXmm6,
100 WHvX64RegisterXmm7,
101 WHvX64RegisterXmm8,
102 WHvX64RegisterXmm9,
103 WHvX64RegisterXmm10,
104 WHvX64RegisterXmm11,
105 WHvX64RegisterXmm12,
106 WHvX64RegisterXmm13,
107 WHvX64RegisterXmm14,
108 WHvX64RegisterXmm15,
109 WHvX64RegisterFpMmx0,
110 WHvX64RegisterFpMmx1,
111 WHvX64RegisterFpMmx2,
112 WHvX64RegisterFpMmx3,
113 WHvX64RegisterFpMmx4,
114 WHvX64RegisterFpMmx5,
115 WHvX64RegisterFpMmx6,
116 WHvX64RegisterFpMmx7,
117 WHvX64RegisterFpControlStatus,
118 WHvX64RegisterXmmControlStatus,
120 /* X64 MSRs */
121 WHvX64RegisterEfer,
122 #ifdef TARGET_X86_64
123 WHvX64RegisterKernelGsBase,
124 #endif
125 WHvX64RegisterApicBase,
126 /* WHvX64RegisterPat, */
127 WHvX64RegisterSysenterCs,
128 WHvX64RegisterSysenterEip,
129 WHvX64RegisterSysenterEsp,
130 WHvX64RegisterStar,
131 #ifdef TARGET_X86_64
132 WHvX64RegisterLstar,
133 WHvX64RegisterCstar,
134 WHvX64RegisterSfmask,
135 #endif
137 /* Interrupt / Event Registers */
139 * WHvRegisterPendingInterruption,
140 * WHvRegisterInterruptState,
141 * WHvRegisterPendingEvent0,
142 * WHvRegisterPendingEvent1
143 * WHvX64RegisterDeliverabilityNotifications,
147 struct whpx_register_set {
148 WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)];
151 struct whpx_vcpu {
152 WHV_EMULATOR_HANDLE emulator;
153 bool window_registered;
154 bool interruptable;
155 bool ready_for_pic_interrupt;
156 uint64_t tpr;
157 uint64_t apic_base;
158 bool interruption_pending;
160 /* Must be the last field as it may have a tail */
161 WHV_RUN_VP_EXIT_CONTEXT exit_ctx;
164 static bool whpx_allowed;
165 static bool whp_dispatch_initialized;
166 static HMODULE hWinHvPlatform, hWinHvEmulation;
167 static uint32_t max_vcpu_index;
168 struct whpx_state whpx_global;
169 struct WHPDispatch whp_dispatch;
173 * VP support
176 static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu)
178 return (struct whpx_vcpu *)cpu->hax_vcpu;
181 static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86,
182 int r86)
184 WHV_X64_SEGMENT_REGISTER hs;
185 unsigned flags = qs->flags;
187 hs.Base = qs->base;
188 hs.Limit = qs->limit;
189 hs.Selector = qs->selector;
191 if (v86) {
192 hs.Attributes = 0;
193 hs.SegmentType = 3;
194 hs.Present = 1;
195 hs.DescriptorPrivilegeLevel = 3;
196 hs.NonSystemSegment = 1;
198 } else {
199 hs.Attributes = (flags >> DESC_TYPE_SHIFT);
201 if (r86) {
202 /* hs.Base &= 0xfffff; */
206 return hs;
209 static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs)
211 SegmentCache qs;
213 qs.base = hs->Base;
214 qs.limit = hs->Limit;
215 qs.selector = hs->Selector;
217 qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT;
219 return qs;
222 static int whpx_set_tsc(CPUState *cpu)
224 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
225 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
226 WHV_REGISTER_VALUE tsc_val;
227 HRESULT hr;
228 struct whpx_state *whpx = &whpx_global;
231 * Suspend the partition prior to setting the TSC to reduce the variance
232 * in TSC across vCPUs. When the first vCPU runs post suspend, the
233 * partition is automatically resumed.
235 if (whp_dispatch.WHvSuspendPartitionTime) {
238 * Unable to suspend partition while setting TSC is not a fatal
239 * error. It just increases the likelihood of TSC variance between
240 * vCPUs and some guest OS are able to handle that just fine.
242 hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition);
243 if (FAILED(hr)) {
244 warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr);
248 tsc_val.Reg64 = env->tsc;
249 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
250 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
251 if (FAILED(hr)) {
252 error_report("WHPX: Failed to set TSC, hr=%08lx", hr);
253 return -1;
256 return 0;
259 static void whpx_set_registers(CPUState *cpu, int level)
261 struct whpx_state *whpx = &whpx_global;
262 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
263 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
264 X86CPU *x86_cpu = X86_CPU(cpu);
265 struct whpx_register_set vcxt;
266 HRESULT hr;
267 int idx;
268 int idx_next;
269 int i;
270 int v86, r86;
272 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
275 * Following MSRs have side effects on the guest or are too heavy for
276 * runtime. Limit them to full state update.
278 if (level >= WHPX_SET_RESET_STATE) {
279 whpx_set_tsc(cpu);
282 memset(&vcxt, 0, sizeof(struct whpx_register_set));
284 v86 = (env->eflags & VM_MASK);
285 r86 = !(env->cr[0] & CR0_PE_MASK);
287 vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
288 vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state);
290 idx = 0;
292 /* Indexes for first 16 registers match between HV and QEMU definitions */
293 idx_next = 16;
294 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
295 vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx];
297 idx = idx_next;
299 /* Same goes for RIP and RFLAGS */
300 assert(whpx_register_names[idx] == WHvX64RegisterRip);
301 vcxt.values[idx++].Reg64 = env->eip;
303 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
304 vcxt.values[idx++].Reg64 = env->eflags;
306 /* Translate 6+4 segment registers. HV and QEMU order matches */
307 assert(idx == WHvX64RegisterEs);
308 for (i = 0; i < 6; i += 1, idx += 1) {
309 vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86);
312 assert(idx == WHvX64RegisterLdtr);
313 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0);
315 assert(idx == WHvX64RegisterTr);
316 vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0);
318 assert(idx == WHvX64RegisterIdtr);
319 vcxt.values[idx].Table.Base = env->idt.base;
320 vcxt.values[idx].Table.Limit = env->idt.limit;
321 idx += 1;
323 assert(idx == WHvX64RegisterGdtr);
324 vcxt.values[idx].Table.Base = env->gdt.base;
325 vcxt.values[idx].Table.Limit = env->gdt.limit;
326 idx += 1;
328 /* CR0, 2, 3, 4, 8 */
329 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
330 vcxt.values[idx++].Reg64 = env->cr[0];
331 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
332 vcxt.values[idx++].Reg64 = env->cr[2];
333 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
334 vcxt.values[idx++].Reg64 = env->cr[3];
335 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
336 vcxt.values[idx++].Reg64 = env->cr[4];
337 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
338 vcxt.values[idx++].Reg64 = vcpu->tpr;
340 /* 8 Debug Registers - Skipped */
342 /* 16 XMM registers */
343 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
344 idx_next = idx + 16;
345 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
346 vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0);
347 vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1);
349 idx = idx_next;
351 /* 8 FP registers */
352 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
353 for (i = 0; i < 8; i += 1, idx += 1) {
354 vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0);
355 /* vcxt.values[idx].Fp.AsUINT128.High64 =
356 env->fpregs[i].mmx.MMX_Q(1);
360 /* FP control status register */
361 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
362 vcxt.values[idx].FpControlStatus.FpControl = env->fpuc;
363 vcxt.values[idx].FpControlStatus.FpStatus =
364 (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
365 vcxt.values[idx].FpControlStatus.FpTag = 0;
366 for (i = 0; i < 8; ++i) {
367 vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i;
369 vcxt.values[idx].FpControlStatus.Reserved = 0;
370 vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop;
371 vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip;
372 idx += 1;
374 /* XMM control status register */
375 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
376 vcxt.values[idx].XmmControlStatus.LastFpRdp = 0;
377 vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr;
378 vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff;
379 idx += 1;
381 /* MSRs */
382 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
383 vcxt.values[idx++].Reg64 = env->efer;
384 #ifdef TARGET_X86_64
385 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
386 vcxt.values[idx++].Reg64 = env->kernelgsbase;
387 #endif
389 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
390 vcxt.values[idx++].Reg64 = vcpu->apic_base;
392 /* WHvX64RegisterPat - Skipped */
394 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
395 vcxt.values[idx++].Reg64 = env->sysenter_cs;
396 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
397 vcxt.values[idx++].Reg64 = env->sysenter_eip;
398 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
399 vcxt.values[idx++].Reg64 = env->sysenter_esp;
400 assert(whpx_register_names[idx] == WHvX64RegisterStar);
401 vcxt.values[idx++].Reg64 = env->star;
402 #ifdef TARGET_X86_64
403 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
404 vcxt.values[idx++].Reg64 = env->lstar;
405 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
406 vcxt.values[idx++].Reg64 = env->cstar;
407 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
408 vcxt.values[idx++].Reg64 = env->fmask;
409 #endif
411 /* Interrupt / Event Registers - Skipped */
413 assert(idx == RTL_NUMBER_OF(whpx_register_names));
415 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
416 whpx->partition, cpu->cpu_index,
417 whpx_register_names,
418 RTL_NUMBER_OF(whpx_register_names),
419 &vcxt.values[0]);
421 if (FAILED(hr)) {
422 error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
423 hr);
426 return;
429 static int whpx_get_tsc(CPUState *cpu)
431 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
432 WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc;
433 WHV_REGISTER_VALUE tsc_val;
434 HRESULT hr;
435 struct whpx_state *whpx = &whpx_global;
437 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
438 whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val);
439 if (FAILED(hr)) {
440 error_report("WHPX: Failed to get TSC, hr=%08lx", hr);
441 return -1;
444 env->tsc = tsc_val.Reg64;
445 return 0;
448 static void whpx_get_registers(CPUState *cpu)
450 struct whpx_state *whpx = &whpx_global;
451 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
452 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
453 X86CPU *x86_cpu = X86_CPU(cpu);
454 struct whpx_register_set vcxt;
455 uint64_t tpr, apic_base;
456 HRESULT hr;
457 int idx;
458 int idx_next;
459 int i;
461 assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu));
463 if (!env->tsc_valid) {
464 whpx_get_tsc(cpu);
465 env->tsc_valid = !runstate_is_running();
468 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
469 whpx->partition, cpu->cpu_index,
470 whpx_register_names,
471 RTL_NUMBER_OF(whpx_register_names),
472 &vcxt.values[0]);
473 if (FAILED(hr)) {
474 error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
475 hr);
478 idx = 0;
480 /* Indexes for first 16 registers match between HV and QEMU definitions */
481 idx_next = 16;
482 for (idx = 0; idx < CPU_NB_REGS; idx += 1) {
483 env->regs[idx] = vcxt.values[idx].Reg64;
485 idx = idx_next;
487 /* Same goes for RIP and RFLAGS */
488 assert(whpx_register_names[idx] == WHvX64RegisterRip);
489 env->eip = vcxt.values[idx++].Reg64;
490 assert(whpx_register_names[idx] == WHvX64RegisterRflags);
491 env->eflags = vcxt.values[idx++].Reg64;
493 /* Translate 6+4 segment registers. HV and QEMU order matches */
494 assert(idx == WHvX64RegisterEs);
495 for (i = 0; i < 6; i += 1, idx += 1) {
496 env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment);
499 assert(idx == WHvX64RegisterLdtr);
500 env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment);
501 assert(idx == WHvX64RegisterTr);
502 env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment);
503 assert(idx == WHvX64RegisterIdtr);
504 env->idt.base = vcxt.values[idx].Table.Base;
505 env->idt.limit = vcxt.values[idx].Table.Limit;
506 idx += 1;
507 assert(idx == WHvX64RegisterGdtr);
508 env->gdt.base = vcxt.values[idx].Table.Base;
509 env->gdt.limit = vcxt.values[idx].Table.Limit;
510 idx += 1;
512 /* CR0, 2, 3, 4, 8 */
513 assert(whpx_register_names[idx] == WHvX64RegisterCr0);
514 env->cr[0] = vcxt.values[idx++].Reg64;
515 assert(whpx_register_names[idx] == WHvX64RegisterCr2);
516 env->cr[2] = vcxt.values[idx++].Reg64;
517 assert(whpx_register_names[idx] == WHvX64RegisterCr3);
518 env->cr[3] = vcxt.values[idx++].Reg64;
519 assert(whpx_register_names[idx] == WHvX64RegisterCr4);
520 env->cr[4] = vcxt.values[idx++].Reg64;
521 assert(whpx_register_names[idx] == WHvX64RegisterCr8);
522 tpr = vcxt.values[idx++].Reg64;
523 if (tpr != vcpu->tpr) {
524 vcpu->tpr = tpr;
525 cpu_set_apic_tpr(x86_cpu->apic_state, tpr);
528 /* 8 Debug Registers - Skipped */
530 /* 16 XMM registers */
531 assert(whpx_register_names[idx] == WHvX64RegisterXmm0);
532 idx_next = idx + 16;
533 for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) {
534 env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64;
535 env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64;
537 idx = idx_next;
539 /* 8 FP registers */
540 assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0);
541 for (i = 0; i < 8; i += 1, idx += 1) {
542 env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64;
543 /* env->fpregs[i].mmx.MMX_Q(1) =
544 vcxt.values[idx].Fp.AsUINT128.High64;
548 /* FP control status register */
549 assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus);
550 env->fpuc = vcxt.values[idx].FpControlStatus.FpControl;
551 env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7;
552 env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800;
553 for (i = 0; i < 8; ++i) {
554 env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1);
556 env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp;
557 env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip;
558 idx += 1;
560 /* XMM control status register */
561 assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus);
562 env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl;
563 idx += 1;
565 /* MSRs */
566 assert(whpx_register_names[idx] == WHvX64RegisterEfer);
567 env->efer = vcxt.values[idx++].Reg64;
568 #ifdef TARGET_X86_64
569 assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase);
570 env->kernelgsbase = vcxt.values[idx++].Reg64;
571 #endif
573 assert(whpx_register_names[idx] == WHvX64RegisterApicBase);
574 apic_base = vcxt.values[idx++].Reg64;
575 if (apic_base != vcpu->apic_base) {
576 vcpu->apic_base = apic_base;
577 cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base);
580 /* WHvX64RegisterPat - Skipped */
582 assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs);
583 env->sysenter_cs = vcxt.values[idx++].Reg64;
584 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip);
585 env->sysenter_eip = vcxt.values[idx++].Reg64;
586 assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp);
587 env->sysenter_esp = vcxt.values[idx++].Reg64;
588 assert(whpx_register_names[idx] == WHvX64RegisterStar);
589 env->star = vcxt.values[idx++].Reg64;
590 #ifdef TARGET_X86_64
591 assert(whpx_register_names[idx] == WHvX64RegisterLstar);
592 env->lstar = vcxt.values[idx++].Reg64;
593 assert(whpx_register_names[idx] == WHvX64RegisterCstar);
594 env->cstar = vcxt.values[idx++].Reg64;
595 assert(whpx_register_names[idx] == WHvX64RegisterSfmask);
596 env->fmask = vcxt.values[idx++].Reg64;
597 #endif
599 /* Interrupt / Event Registers - Skipped */
601 assert(idx == RTL_NUMBER_OF(whpx_register_names));
603 if (whpx_apic_in_platform()) {
604 whpx_apic_get(x86_cpu->apic_state);
607 return;
610 static HRESULT CALLBACK whpx_emu_ioport_callback(
611 void *ctx,
612 WHV_EMULATOR_IO_ACCESS_INFO *IoAccess)
614 MemTxAttrs attrs = { 0 };
615 address_space_rw(&address_space_io, IoAccess->Port, attrs,
616 &IoAccess->Data, IoAccess->AccessSize,
617 IoAccess->Direction);
618 return S_OK;
621 static HRESULT CALLBACK whpx_emu_mmio_callback(
622 void *ctx,
623 WHV_EMULATOR_MEMORY_ACCESS_INFO *ma)
625 cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize,
626 ma->Direction);
627 return S_OK;
630 static HRESULT CALLBACK whpx_emu_getreg_callback(
631 void *ctx,
632 const WHV_REGISTER_NAME *RegisterNames,
633 UINT32 RegisterCount,
634 WHV_REGISTER_VALUE *RegisterValues)
636 HRESULT hr;
637 struct whpx_state *whpx = &whpx_global;
638 CPUState *cpu = (CPUState *)ctx;
640 hr = whp_dispatch.WHvGetVirtualProcessorRegisters(
641 whpx->partition, cpu->cpu_index,
642 RegisterNames, RegisterCount,
643 RegisterValues);
644 if (FAILED(hr)) {
645 error_report("WHPX: Failed to get virtual processor registers,"
646 " hr=%08lx", hr);
649 return hr;
652 static HRESULT CALLBACK whpx_emu_setreg_callback(
653 void *ctx,
654 const WHV_REGISTER_NAME *RegisterNames,
655 UINT32 RegisterCount,
656 const WHV_REGISTER_VALUE *RegisterValues)
658 HRESULT hr;
659 struct whpx_state *whpx = &whpx_global;
660 CPUState *cpu = (CPUState *)ctx;
662 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
663 whpx->partition, cpu->cpu_index,
664 RegisterNames, RegisterCount,
665 RegisterValues);
666 if (FAILED(hr)) {
667 error_report("WHPX: Failed to set virtual processor registers,"
668 " hr=%08lx", hr);
672 * The emulator just successfully wrote the register state. We clear the
673 * dirty state so we avoid the double write on resume of the VP.
675 cpu->vcpu_dirty = false;
677 return hr;
680 static HRESULT CALLBACK whpx_emu_translate_callback(
681 void *ctx,
682 WHV_GUEST_VIRTUAL_ADDRESS Gva,
683 WHV_TRANSLATE_GVA_FLAGS TranslateFlags,
684 WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult,
685 WHV_GUEST_PHYSICAL_ADDRESS *Gpa)
687 HRESULT hr;
688 struct whpx_state *whpx = &whpx_global;
689 CPUState *cpu = (CPUState *)ctx;
690 WHV_TRANSLATE_GVA_RESULT res;
692 hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index,
693 Gva, TranslateFlags, &res, Gpa);
694 if (FAILED(hr)) {
695 error_report("WHPX: Failed to translate GVA, hr=%08lx", hr);
696 } else {
697 *TranslationResult = res.ResultCode;
700 return hr;
703 static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = {
704 .Size = sizeof(WHV_EMULATOR_CALLBACKS),
705 .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback,
706 .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback,
707 .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback,
708 .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback,
709 .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback,
712 static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx)
714 HRESULT hr;
715 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
716 WHV_EMULATOR_STATUS emu_status;
718 hr = whp_dispatch.WHvEmulatorTryMmioEmulation(
719 vcpu->emulator, cpu,
720 &vcpu->exit_ctx.VpContext, ctx,
721 &emu_status);
722 if (FAILED(hr)) {
723 error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr);
724 return -1;
727 if (!emu_status.EmulationSuccessful) {
728 error_report("WHPX: Failed to emulate MMIO access with"
729 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
730 return -1;
733 return 0;
736 static int whpx_handle_portio(CPUState *cpu,
737 WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx)
739 HRESULT hr;
740 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
741 WHV_EMULATOR_STATUS emu_status;
743 hr = whp_dispatch.WHvEmulatorTryIoEmulation(
744 vcpu->emulator, cpu,
745 &vcpu->exit_ctx.VpContext, ctx,
746 &emu_status);
747 if (FAILED(hr)) {
748 error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr);
749 return -1;
752 if (!emu_status.EmulationSuccessful) {
753 error_report("WHPX: Failed to emulate PortIO access with"
754 " EmulatorReturnStatus: %u", emu_status.AsUINT32);
755 return -1;
758 return 0;
761 static int whpx_handle_halt(CPUState *cpu)
763 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
764 int ret = 0;
766 qemu_mutex_lock_iothread();
767 if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
768 (env->eflags & IF_MASK)) &&
769 !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
770 cpu->exception_index = EXCP_HLT;
771 cpu->halted = true;
772 ret = 1;
774 qemu_mutex_unlock_iothread();
776 return ret;
779 static void whpx_vcpu_pre_run(CPUState *cpu)
781 HRESULT hr;
782 struct whpx_state *whpx = &whpx_global;
783 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
784 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
785 X86CPU *x86_cpu = X86_CPU(cpu);
786 int irq;
787 uint8_t tpr;
788 WHV_X64_PENDING_INTERRUPTION_REGISTER new_int;
789 UINT32 reg_count = 0;
790 WHV_REGISTER_VALUE reg_values[3];
791 WHV_REGISTER_NAME reg_names[3];
793 memset(&new_int, 0, sizeof(new_int));
794 memset(reg_values, 0, sizeof(reg_values));
796 qemu_mutex_lock_iothread();
798 /* Inject NMI */
799 if (!vcpu->interruption_pending &&
800 cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) {
801 if (cpu->interrupt_request & CPU_INTERRUPT_NMI) {
802 cpu->interrupt_request &= ~CPU_INTERRUPT_NMI;
803 vcpu->interruptable = false;
804 new_int.InterruptionType = WHvX64PendingNmi;
805 new_int.InterruptionPending = 1;
806 new_int.InterruptionVector = 2;
808 if (cpu->interrupt_request & CPU_INTERRUPT_SMI) {
809 cpu->interrupt_request &= ~CPU_INTERRUPT_SMI;
814 * Force the VCPU out of its inner loop to process any INIT requests or
815 * commit pending TPR access.
817 if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
818 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
819 !(env->hflags & HF_SMM_MASK)) {
820 cpu->exit_request = 1;
822 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
823 cpu->exit_request = 1;
827 /* Get pending hard interruption or replay one that was overwritten */
828 if (!whpx_apic_in_platform()) {
829 if (!vcpu->interruption_pending &&
830 vcpu->interruptable && (env->eflags & IF_MASK)) {
831 assert(!new_int.InterruptionPending);
832 if (cpu->interrupt_request & CPU_INTERRUPT_HARD) {
833 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
834 irq = cpu_get_pic_interrupt(env);
835 if (irq >= 0) {
836 new_int.InterruptionType = WHvX64PendingInterrupt;
837 new_int.InterruptionPending = 1;
838 new_int.InterruptionVector = irq;
843 /* Setup interrupt state if new one was prepared */
844 if (new_int.InterruptionPending) {
845 reg_values[reg_count].PendingInterruption = new_int;
846 reg_names[reg_count] = WHvRegisterPendingInterruption;
847 reg_count += 1;
849 } else if (vcpu->ready_for_pic_interrupt &&
850 (cpu->interrupt_request & CPU_INTERRUPT_HARD)) {
851 cpu->interrupt_request &= ~CPU_INTERRUPT_HARD;
852 irq = cpu_get_pic_interrupt(env);
853 if (irq >= 0) {
854 reg_names[reg_count] = WHvRegisterPendingEvent;
855 reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT)
857 .EventPending = 1,
858 .EventType = WHvX64PendingEventExtInt,
859 .Vector = irq,
861 reg_count += 1;
865 /* Sync the TPR to the CR8 if was modified during the intercept */
866 tpr = cpu_get_apic_tpr(x86_cpu->apic_state);
867 if (tpr != vcpu->tpr) {
868 vcpu->tpr = tpr;
869 reg_values[reg_count].Reg64 = tpr;
870 cpu->exit_request = 1;
871 reg_names[reg_count] = WHvX64RegisterCr8;
872 reg_count += 1;
875 /* Update the state of the interrupt delivery notification */
876 if (!vcpu->window_registered &&
877 cpu->interrupt_request & CPU_INTERRUPT_HARD) {
878 reg_values[reg_count].DeliverabilityNotifications =
879 (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) {
880 .InterruptNotification = 1
882 vcpu->window_registered = 1;
883 reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications;
884 reg_count += 1;
887 qemu_mutex_unlock_iothread();
888 vcpu->ready_for_pic_interrupt = false;
890 if (reg_count) {
891 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
892 whpx->partition, cpu->cpu_index,
893 reg_names, reg_count, reg_values);
894 if (FAILED(hr)) {
895 error_report("WHPX: Failed to set interrupt state registers,"
896 " hr=%08lx", hr);
900 return;
903 static void whpx_vcpu_post_run(CPUState *cpu)
905 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
906 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
907 X86CPU *x86_cpu = X86_CPU(cpu);
909 env->eflags = vcpu->exit_ctx.VpContext.Rflags;
911 uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8;
912 if (vcpu->tpr != tpr) {
913 vcpu->tpr = tpr;
914 qemu_mutex_lock_iothread();
915 cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr);
916 qemu_mutex_unlock_iothread();
919 vcpu->interruption_pending =
920 vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending;
922 vcpu->interruptable =
923 !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow;
925 return;
928 static void whpx_vcpu_process_async_events(CPUState *cpu)
930 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
931 X86CPU *x86_cpu = X86_CPU(cpu);
932 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
934 if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) &&
935 !(env->hflags & HF_SMM_MASK)) {
936 whpx_cpu_synchronize_state(cpu);
937 do_cpu_init(x86_cpu);
938 vcpu->interruptable = true;
941 if (cpu->interrupt_request & CPU_INTERRUPT_POLL) {
942 cpu->interrupt_request &= ~CPU_INTERRUPT_POLL;
943 apic_poll_irq(x86_cpu->apic_state);
946 if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
947 (env->eflags & IF_MASK)) ||
948 (cpu->interrupt_request & CPU_INTERRUPT_NMI)) {
949 cpu->halted = false;
952 if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) {
953 whpx_cpu_synchronize_state(cpu);
954 do_cpu_sipi(x86_cpu);
957 if (cpu->interrupt_request & CPU_INTERRUPT_TPR) {
958 cpu->interrupt_request &= ~CPU_INTERRUPT_TPR;
959 whpx_cpu_synchronize_state(cpu);
960 apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip,
961 env->tpr_access_type);
964 return;
967 static int whpx_vcpu_run(CPUState *cpu)
969 HRESULT hr;
970 struct whpx_state *whpx = &whpx_global;
971 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
972 int ret;
974 whpx_vcpu_process_async_events(cpu);
975 if (cpu->halted && !whpx_apic_in_platform()) {
976 cpu->exception_index = EXCP_HLT;
977 qatomic_set(&cpu->exit_request, false);
978 return 0;
981 qemu_mutex_unlock_iothread();
982 cpu_exec_start(cpu);
984 do {
985 if (cpu->vcpu_dirty) {
986 whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE);
987 cpu->vcpu_dirty = false;
990 whpx_vcpu_pre_run(cpu);
992 if (qatomic_read(&cpu->exit_request)) {
993 whpx_vcpu_kick(cpu);
996 hr = whp_dispatch.WHvRunVirtualProcessor(
997 whpx->partition, cpu->cpu_index,
998 &vcpu->exit_ctx, sizeof(vcpu->exit_ctx));
1000 if (FAILED(hr)) {
1001 error_report("WHPX: Failed to exec a virtual processor,"
1002 " hr=%08lx", hr);
1003 ret = -1;
1004 break;
1007 whpx_vcpu_post_run(cpu);
1009 switch (vcpu->exit_ctx.ExitReason) {
1010 case WHvRunVpExitReasonMemoryAccess:
1011 ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess);
1012 break;
1014 case WHvRunVpExitReasonX64IoPortAccess:
1015 ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess);
1016 break;
1018 case WHvRunVpExitReasonX64InterruptWindow:
1019 vcpu->ready_for_pic_interrupt = 1;
1020 vcpu->window_registered = 0;
1021 ret = 0;
1022 break;
1024 case WHvRunVpExitReasonX64ApicEoi:
1025 assert(whpx_apic_in_platform());
1026 ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector);
1027 break;
1029 case WHvRunVpExitReasonX64Halt:
1030 ret = whpx_handle_halt(cpu);
1031 break;
1033 case WHvRunVpExitReasonX64ApicInitSipiTrap: {
1034 WHV_INTERRUPT_CONTROL ipi = {0};
1035 uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr;
1036 uint32_t delivery_mode =
1037 (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT;
1038 int dest_shorthand =
1039 (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT;
1040 bool broadcast = false;
1041 bool include_self = false;
1042 uint32_t i;
1044 /* We only registered for INIT and SIPI exits. */
1045 if ((delivery_mode != APIC_DM_INIT) &&
1046 (delivery_mode != APIC_DM_SIPI)) {
1047 error_report(
1048 "WHPX: Unexpected APIC exit that is not a INIT or SIPI");
1049 break;
1052 if (delivery_mode == APIC_DM_INIT) {
1053 ipi.Type = WHvX64InterruptTypeInit;
1054 } else {
1055 ipi.Type = WHvX64InterruptTypeSipi;
1058 ipi.DestinationMode =
1059 ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ?
1060 WHvX64InterruptDestinationModeLogical :
1061 WHvX64InterruptDestinationModePhysical;
1063 ipi.TriggerMode =
1064 ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ?
1065 WHvX64InterruptTriggerModeLevel :
1066 WHvX64InterruptTriggerModeEdge;
1068 ipi.Vector = icr & APIC_VECTOR_MASK;
1069 switch (dest_shorthand) {
1070 /* no shorthand. Bits 56-63 contain the destination. */
1071 case 0:
1072 ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK;
1073 hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
1074 &ipi, sizeof(ipi));
1075 if (FAILED(hr)) {
1076 error_report("WHPX: Failed to request interrupt hr=%08lx",
1077 hr);
1080 break;
1082 /* self */
1083 case 1:
1084 include_self = true;
1085 break;
1087 /* broadcast, including self */
1088 case 2:
1089 broadcast = true;
1090 include_self = true;
1091 break;
1093 /* broadcast, excluding self */
1094 case 3:
1095 broadcast = true;
1096 break;
1099 if (!broadcast && !include_self) {
1100 break;
1103 for (i = 0; i <= max_vcpu_index; i++) {
1104 if (i == cpu->cpu_index && !include_self) {
1105 continue;
1109 * Assuming that APIC Ids are identity mapped since
1110 * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers
1111 * are not handled yet and the hypervisor doesn't allow the
1112 * guest to modify the APIC ID.
1114 ipi.Destination = i;
1115 hr = whp_dispatch.WHvRequestInterrupt(whpx->partition,
1116 &ipi, sizeof(ipi));
1117 if (FAILED(hr)) {
1118 error_report(
1119 "WHPX: Failed to request SIPI for %d, hr=%08lx",
1120 i, hr);
1124 break;
1127 case WHvRunVpExitReasonCanceled:
1128 cpu->exception_index = EXCP_INTERRUPT;
1129 ret = 1;
1130 break;
1132 case WHvRunVpExitReasonX64MsrAccess: {
1133 WHV_REGISTER_VALUE reg_values[3] = {0};
1134 WHV_REGISTER_NAME reg_names[3];
1135 UINT32 reg_count;
1137 reg_names[0] = WHvX64RegisterRip;
1138 reg_names[1] = WHvX64RegisterRax;
1139 reg_names[2] = WHvX64RegisterRdx;
1141 reg_values[0].Reg64 =
1142 vcpu->exit_ctx.VpContext.Rip +
1143 vcpu->exit_ctx.VpContext.InstructionLength;
1146 * For all unsupported MSR access we:
1147 * ignore writes
1148 * return 0 on read.
1150 reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ?
1151 1 : 3;
1153 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1154 whpx->partition,
1155 cpu->cpu_index,
1156 reg_names, reg_count,
1157 reg_values);
1159 if (FAILED(hr)) {
1160 error_report("WHPX: Failed to set MsrAccess state "
1161 " registers, hr=%08lx", hr);
1163 ret = 0;
1164 break;
1166 case WHvRunVpExitReasonX64Cpuid: {
1167 WHV_REGISTER_VALUE reg_values[5];
1168 WHV_REGISTER_NAME reg_names[5];
1169 UINT32 reg_count = 5;
1170 UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0;
1171 X86CPU *x86_cpu = X86_CPU(cpu);
1172 CPUX86State *env = &x86_cpu->env;
1174 memset(reg_values, 0, sizeof(reg_values));
1176 rip = vcpu->exit_ctx.VpContext.Rip +
1177 vcpu->exit_ctx.VpContext.InstructionLength;
1178 cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax;
1181 * Ideally, these should be supplied to the hypervisor during VCPU
1182 * initialization and it should be able to satisfy this request.
1183 * But, currently, WHPX doesn't support setting CPUID values in the
1184 * hypervisor once the partition has been setup, which is too late
1185 * since VCPUs are realized later. For now, use the values from
1186 * QEMU to satisfy these requests, until WHPX adds support for
1187 * being able to set these values in the hypervisor at runtime.
1189 cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx,
1190 (UINT32 *)&rcx, (UINT32 *)&rdx);
1191 switch (cpuid_fn) {
1192 case 0x40000000:
1193 /* Expose the vmware cpu frequency cpuid leaf */
1194 rax = 0x40000010;
1195 rbx = rcx = rdx = 0;
1196 break;
1198 case 0x40000010:
1199 rax = env->tsc_khz;
1200 rbx = env->apic_bus_freq / 1000; /* Hz to KHz */
1201 rcx = rdx = 0;
1202 break;
1204 case 0x80000001:
1205 /* Remove any support of OSVW */
1206 rcx &= ~CPUID_EXT3_OSVW;
1207 break;
1210 reg_names[0] = WHvX64RegisterRip;
1211 reg_names[1] = WHvX64RegisterRax;
1212 reg_names[2] = WHvX64RegisterRcx;
1213 reg_names[3] = WHvX64RegisterRdx;
1214 reg_names[4] = WHvX64RegisterRbx;
1216 reg_values[0].Reg64 = rip;
1217 reg_values[1].Reg64 = rax;
1218 reg_values[2].Reg64 = rcx;
1219 reg_values[3].Reg64 = rdx;
1220 reg_values[4].Reg64 = rbx;
1222 hr = whp_dispatch.WHvSetVirtualProcessorRegisters(
1223 whpx->partition, cpu->cpu_index,
1224 reg_names,
1225 reg_count,
1226 reg_values);
1228 if (FAILED(hr)) {
1229 error_report("WHPX: Failed to set CpuidAccess state registers,"
1230 " hr=%08lx", hr);
1232 ret = 0;
1233 break;
1235 case WHvRunVpExitReasonNone:
1236 case WHvRunVpExitReasonUnrecoverableException:
1237 case WHvRunVpExitReasonInvalidVpRegisterValue:
1238 case WHvRunVpExitReasonUnsupportedFeature:
1239 case WHvRunVpExitReasonException:
1240 default:
1241 error_report("WHPX: Unexpected VP exit code %d",
1242 vcpu->exit_ctx.ExitReason);
1243 whpx_get_registers(cpu);
1244 qemu_mutex_lock_iothread();
1245 qemu_system_guest_panicked(cpu_get_crash_info(cpu));
1246 qemu_mutex_unlock_iothread();
1247 break;
1250 } while (!ret);
1252 cpu_exec_end(cpu);
1253 qemu_mutex_lock_iothread();
1254 current_cpu = cpu;
1256 qatomic_set(&cpu->exit_request, false);
1258 return ret < 0;
1261 static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg)
1263 if (!cpu->vcpu_dirty) {
1264 whpx_get_registers(cpu);
1265 cpu->vcpu_dirty = true;
1269 static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu,
1270 run_on_cpu_data arg)
1272 whpx_set_registers(cpu, WHPX_SET_RESET_STATE);
1273 cpu->vcpu_dirty = false;
1276 static void do_whpx_cpu_synchronize_post_init(CPUState *cpu,
1277 run_on_cpu_data arg)
1279 whpx_set_registers(cpu, WHPX_SET_FULL_STATE);
1280 cpu->vcpu_dirty = false;
1283 static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu,
1284 run_on_cpu_data arg)
1286 cpu->vcpu_dirty = true;
1290 * CPU support.
1293 void whpx_cpu_synchronize_state(CPUState *cpu)
1295 if (!cpu->vcpu_dirty) {
1296 run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL);
1300 void whpx_cpu_synchronize_post_reset(CPUState *cpu)
1302 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL);
1305 void whpx_cpu_synchronize_post_init(CPUState *cpu)
1307 run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL);
1310 void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu)
1312 run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL);
1316 * Vcpu support.
1319 static Error *whpx_migration_blocker;
1321 static void whpx_cpu_update_state(void *opaque, bool running, RunState state)
1323 CPUX86State *env = opaque;
1325 if (running) {
1326 env->tsc_valid = false;
1330 int whpx_init_vcpu(CPUState *cpu)
1332 HRESULT hr;
1333 struct whpx_state *whpx = &whpx_global;
1334 struct whpx_vcpu *vcpu = NULL;
1335 Error *local_error = NULL;
1336 struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr);
1337 X86CPU *x86_cpu = X86_CPU(cpu);
1338 UINT64 freq = 0;
1339 int ret;
1341 /* Add migration blockers for all unsupported features of the
1342 * Windows Hypervisor Platform
1344 if (whpx_migration_blocker == NULL) {
1345 error_setg(&whpx_migration_blocker,
1346 "State blocked due to non-migratable CPUID feature support,"
1347 "dirty memory tracking support, and XSAVE/XRSTOR support");
1349 (void)migrate_add_blocker(whpx_migration_blocker, &local_error);
1350 if (local_error) {
1351 error_report_err(local_error);
1352 migrate_del_blocker(whpx_migration_blocker);
1353 error_free(whpx_migration_blocker);
1354 ret = -EINVAL;
1355 goto error;
1359 vcpu = g_malloc0(sizeof(struct whpx_vcpu));
1361 if (!vcpu) {
1362 error_report("WHPX: Failed to allocte VCPU context.");
1363 ret = -ENOMEM;
1364 goto error;
1367 hr = whp_dispatch.WHvEmulatorCreateEmulator(
1368 &whpx_emu_callbacks,
1369 &vcpu->emulator);
1370 if (FAILED(hr)) {
1371 error_report("WHPX: Failed to setup instruction completion support,"
1372 " hr=%08lx", hr);
1373 ret = -EINVAL;
1374 goto error;
1377 hr = whp_dispatch.WHvCreateVirtualProcessor(
1378 whpx->partition, cpu->cpu_index, 0);
1379 if (FAILED(hr)) {
1380 error_report("WHPX: Failed to create a virtual processor,"
1381 " hr=%08lx", hr);
1382 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1383 ret = -EINVAL;
1384 goto error;
1388 * vcpu's TSC frequency is either specified by user, or use the value
1389 * provided by Hyper-V if the former is not present. In the latter case, we
1390 * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
1391 * frequency can be migrated later via this field.
1393 if (!env->tsc_khz) {
1394 hr = whp_dispatch.WHvGetCapability(
1395 WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq),
1396 NULL);
1397 if (hr != WHV_E_UNKNOWN_CAPABILITY) {
1398 if (FAILED(hr)) {
1399 printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr);
1400 } else {
1401 env->tsc_khz = freq / 1000; /* Hz to KHz */
1406 env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY;
1407 hr = whp_dispatch.WHvGetCapability(
1408 WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL);
1409 if (hr != WHV_E_UNKNOWN_CAPABILITY) {
1410 if (FAILED(hr)) {
1411 printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr);
1412 } else {
1413 env->apic_bus_freq = freq;
1418 * If the vmware cpuid frequency leaf option is set, and we have a valid
1419 * tsc value, trap the corresponding cpuid's.
1421 if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) {
1422 UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010};
1424 hr = whp_dispatch.WHvSetPartitionProperty(
1425 whpx->partition,
1426 WHvPartitionPropertyCodeCpuidExitList,
1427 cpuidExitList,
1428 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
1430 if (FAILED(hr)) {
1431 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1432 hr);
1433 ret = -EINVAL;
1434 goto error;
1438 vcpu->interruptable = true;
1439 cpu->vcpu_dirty = true;
1440 cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu;
1441 max_vcpu_index = max(max_vcpu_index, cpu->cpu_index);
1442 qemu_add_vm_change_state_handler(whpx_cpu_update_state, cpu->env_ptr);
1444 return 0;
1446 error:
1447 g_free(vcpu);
1449 return ret;
1452 int whpx_vcpu_exec(CPUState *cpu)
1454 int ret;
1455 int fatal;
1457 for (;;) {
1458 if (cpu->exception_index >= EXCP_INTERRUPT) {
1459 ret = cpu->exception_index;
1460 cpu->exception_index = -1;
1461 break;
1464 fatal = whpx_vcpu_run(cpu);
1466 if (fatal) {
1467 error_report("WHPX: Failed to exec a virtual processor");
1468 abort();
1472 return ret;
1475 void whpx_destroy_vcpu(CPUState *cpu)
1477 struct whpx_state *whpx = &whpx_global;
1478 struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu);
1480 whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index);
1481 whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator);
1482 g_free(cpu->hax_vcpu);
1483 return;
1486 void whpx_vcpu_kick(CPUState *cpu)
1488 struct whpx_state *whpx = &whpx_global;
1489 whp_dispatch.WHvCancelRunVirtualProcessor(
1490 whpx->partition, cpu->cpu_index, 0);
1494 * Memory support.
1497 static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size,
1498 void *host_va, int add, int rom,
1499 const char *name)
1501 struct whpx_state *whpx = &whpx_global;
1502 HRESULT hr;
1505 if (add) {
1506 printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
1507 (void*)start_pa, (void*)size, host_va,
1508 (rom ? "ROM" : "RAM"), name);
1509 } else {
1510 printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n",
1511 (void*)start_pa, (void*)size, host_va, name);
1515 if (add) {
1516 hr = whp_dispatch.WHvMapGpaRange(whpx->partition,
1517 host_va,
1518 start_pa,
1519 size,
1520 (WHvMapGpaRangeFlagRead |
1521 WHvMapGpaRangeFlagExecute |
1522 (rom ? 0 : WHvMapGpaRangeFlagWrite)));
1523 } else {
1524 hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition,
1525 start_pa,
1526 size);
1529 if (FAILED(hr)) {
1530 error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
1531 " Host:%p, hr=%08lx",
1532 (add ? "MAP" : "UNMAP"), name,
1533 (void *)(uintptr_t)start_pa, (void *)size, host_va, hr);
1537 static void whpx_process_section(MemoryRegionSection *section, int add)
1539 MemoryRegion *mr = section->mr;
1540 hwaddr start_pa = section->offset_within_address_space;
1541 ram_addr_t size = int128_get64(section->size);
1542 unsigned int delta;
1543 uint64_t host_va;
1545 if (!memory_region_is_ram(mr)) {
1546 return;
1549 delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask);
1550 delta &= ~qemu_real_host_page_mask;
1551 if (delta > size) {
1552 return;
1554 start_pa += delta;
1555 size -= delta;
1556 size &= qemu_real_host_page_mask;
1557 if (!size || (start_pa & ~qemu_real_host_page_mask)) {
1558 return;
1561 host_va = (uintptr_t)memory_region_get_ram_ptr(mr)
1562 + section->offset_within_region + delta;
1564 whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add,
1565 memory_region_is_rom(mr), mr->name);
1568 static void whpx_region_add(MemoryListener *listener,
1569 MemoryRegionSection *section)
1571 memory_region_ref(section->mr);
1572 whpx_process_section(section, 1);
1575 static void whpx_region_del(MemoryListener *listener,
1576 MemoryRegionSection *section)
1578 whpx_process_section(section, 0);
1579 memory_region_unref(section->mr);
1582 static void whpx_transaction_begin(MemoryListener *listener)
1586 static void whpx_transaction_commit(MemoryListener *listener)
1590 static void whpx_log_sync(MemoryListener *listener,
1591 MemoryRegionSection *section)
1593 MemoryRegion *mr = section->mr;
1595 if (!memory_region_is_ram(mr)) {
1596 return;
1599 memory_region_set_dirty(mr, 0, int128_get64(section->size));
1602 static MemoryListener whpx_memory_listener = {
1603 .begin = whpx_transaction_begin,
1604 .commit = whpx_transaction_commit,
1605 .region_add = whpx_region_add,
1606 .region_del = whpx_region_del,
1607 .log_sync = whpx_log_sync,
1608 .priority = 10,
1611 static void whpx_memory_init(void)
1613 memory_listener_register(&whpx_memory_listener, &address_space_memory);
1617 * Load the functions from the given library, using the given handle. If a
1618 * handle is provided, it is used, otherwise the library is opened. The
1619 * handle will be updated on return with the opened one.
1621 static bool load_whp_dispatch_fns(HMODULE *handle,
1622 WHPFunctionList function_list)
1624 HMODULE hLib = *handle;
1626 #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
1627 #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
1628 #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
1629 whp_dispatch.function_name = \
1630 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1632 #define WHP_LOAD_FIELD(return_type, function_name, signature) \
1633 whp_dispatch.function_name = \
1634 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1635 if (!whp_dispatch.function_name) { \
1636 error_report("Could not load function %s", #function_name); \
1637 goto error; \
1640 #define WHP_LOAD_LIB(lib_name, handle_lib) \
1641 if (!handle_lib) { \
1642 handle_lib = LoadLibrary(lib_name); \
1643 if (!handle_lib) { \
1644 error_report("Could not load library %s.", lib_name); \
1645 goto error; \
1649 switch (function_list) {
1650 case WINHV_PLATFORM_FNS_DEFAULT:
1651 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1652 LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD)
1653 break;
1655 case WINHV_EMULATION_FNS_DEFAULT:
1656 WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib)
1657 LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD)
1658 break;
1660 case WINHV_PLATFORM_FNS_SUPPLEMENTAL:
1661 WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib)
1662 LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL)
1663 break;
1666 *handle = hLib;
1667 return true;
1669 error:
1670 if (hLib) {
1671 FreeLibrary(hLib);
1674 return false;
1677 static void whpx_set_kernel_irqchip(Object *obj, Visitor *v,
1678 const char *name, void *opaque,
1679 Error **errp)
1681 struct whpx_state *whpx = &whpx_global;
1682 OnOffSplit mode;
1684 if (!visit_type_OnOffSplit(v, name, &mode, errp)) {
1685 return;
1688 switch (mode) {
1689 case ON_OFF_SPLIT_ON:
1690 whpx->kernel_irqchip_allowed = true;
1691 whpx->kernel_irqchip_required = true;
1692 break;
1694 case ON_OFF_SPLIT_OFF:
1695 whpx->kernel_irqchip_allowed = false;
1696 whpx->kernel_irqchip_required = false;
1697 break;
1699 case ON_OFF_SPLIT_SPLIT:
1700 error_setg(errp, "WHPX: split irqchip currently not supported");
1701 error_append_hint(errp,
1702 "Try without kernel-irqchip or with kernel-irqchip=on|off");
1703 break;
1705 default:
1707 * The value was checked in visit_type_OnOffSplit() above. If
1708 * we get here, then something is wrong in QEMU.
1710 abort();
1715 * Partition support
1718 static int whpx_accel_init(MachineState *ms)
1720 struct whpx_state *whpx;
1721 int ret;
1722 HRESULT hr;
1723 WHV_CAPABILITY whpx_cap;
1724 UINT32 whpx_cap_size;
1725 WHV_PARTITION_PROPERTY prop;
1726 UINT32 cpuidExitList[] = {1, 0x80000001};
1727 WHV_CAPABILITY_FEATURES features = {0};
1729 whpx = &whpx_global;
1731 if (!init_whp_dispatch()) {
1732 ret = -ENOSYS;
1733 goto error;
1736 whpx->mem_quota = ms->ram_size;
1738 hr = whp_dispatch.WHvGetCapability(
1739 WHvCapabilityCodeHypervisorPresent, &whpx_cap,
1740 sizeof(whpx_cap), &whpx_cap_size);
1741 if (FAILED(hr) || !whpx_cap.HypervisorPresent) {
1742 error_report("WHPX: No accelerator found, hr=%08lx", hr);
1743 ret = -ENOSPC;
1744 goto error;
1747 hr = whp_dispatch.WHvGetCapability(
1748 WHvCapabilityCodeFeatures, &features, sizeof(features), NULL);
1749 if (FAILED(hr)) {
1750 error_report("WHPX: Failed to query capabilities, hr=%08lx", hr);
1751 ret = -EINVAL;
1752 goto error;
1755 hr = whp_dispatch.WHvCreatePartition(&whpx->partition);
1756 if (FAILED(hr)) {
1757 error_report("WHPX: Failed to create partition, hr=%08lx", hr);
1758 ret = -EINVAL;
1759 goto error;
1762 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1763 prop.ProcessorCount = ms->smp.cpus;
1764 hr = whp_dispatch.WHvSetPartitionProperty(
1765 whpx->partition,
1766 WHvPartitionPropertyCodeProcessorCount,
1767 &prop,
1768 sizeof(WHV_PARTITION_PROPERTY));
1770 if (FAILED(hr)) {
1771 error_report("WHPX: Failed to set partition core count to %d,"
1772 " hr=%08lx", ms->smp.cores, hr);
1773 ret = -EINVAL;
1774 goto error;
1778 * Error out if WHP doesn't support apic emulation and user is requiring
1779 * it.
1781 if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation ||
1782 !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) {
1783 error_report("WHPX: kernel irqchip requested, but unavailable. "
1784 "Try without kernel-irqchip or with kernel-irqchip=off");
1785 ret = -EINVAL;
1786 goto error;
1789 if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation &&
1790 whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) {
1791 WHV_X64_LOCAL_APIC_EMULATION_MODE mode =
1792 WHvX64LocalApicEmulationModeXApic;
1793 printf("WHPX: setting APIC emulation mode in the hypervisor\n");
1794 hr = whp_dispatch.WHvSetPartitionProperty(
1795 whpx->partition,
1796 WHvPartitionPropertyCodeLocalApicEmulationMode,
1797 &mode,
1798 sizeof(mode));
1799 if (FAILED(hr)) {
1800 error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr);
1801 if (whpx->kernel_irqchip_required) {
1802 error_report("WHPX: kernel irqchip requested, but unavailable");
1803 ret = -EINVAL;
1804 goto error;
1806 } else {
1807 whpx->apic_in_platform = true;
1811 /* Register for MSR and CPUID exits */
1812 memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY));
1813 prop.ExtendedVmExits.X64MsrExit = 1;
1814 prop.ExtendedVmExits.X64CpuidExit = 1;
1815 if (whpx_apic_in_platform()) {
1816 prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1;
1819 hr = whp_dispatch.WHvSetPartitionProperty(
1820 whpx->partition,
1821 WHvPartitionPropertyCodeExtendedVmExits,
1822 &prop,
1823 sizeof(WHV_PARTITION_PROPERTY));
1824 if (FAILED(hr)) {
1825 error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr);
1826 ret = -EINVAL;
1827 goto error;
1830 hr = whp_dispatch.WHvSetPartitionProperty(
1831 whpx->partition,
1832 WHvPartitionPropertyCodeCpuidExitList,
1833 cpuidExitList,
1834 RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32));
1836 if (FAILED(hr)) {
1837 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1838 hr);
1839 ret = -EINVAL;
1840 goto error;
1843 hr = whp_dispatch.WHvSetupPartition(whpx->partition);
1844 if (FAILED(hr)) {
1845 error_report("WHPX: Failed to setup partition, hr=%08lx", hr);
1846 ret = -EINVAL;
1847 goto error;
1850 whpx_memory_init();
1852 printf("Windows Hypervisor Platform accelerator is operational\n");
1853 return 0;
1855 error:
1857 if (NULL != whpx->partition) {
1858 whp_dispatch.WHvDeletePartition(whpx->partition);
1859 whpx->partition = NULL;
1862 return ret;
1865 int whpx_enabled(void)
1867 return whpx_allowed;
1870 bool whpx_apic_in_platform(void) {
1871 return whpx_global.apic_in_platform;
1874 static void whpx_accel_class_init(ObjectClass *oc, void *data)
1876 AccelClass *ac = ACCEL_CLASS(oc);
1877 ac->name = "WHPX";
1878 ac->init_machine = whpx_accel_init;
1879 ac->allowed = &whpx_allowed;
1881 object_class_property_add(oc, "kernel-irqchip", "on|off|split",
1882 NULL, whpx_set_kernel_irqchip,
1883 NULL, NULL);
1884 object_class_property_set_description(oc, "kernel-irqchip",
1885 "Configure WHPX in-kernel irqchip");
1888 static void whpx_accel_instance_init(Object *obj)
1890 struct whpx_state *whpx = &whpx_global;
1892 memset(whpx, 0, sizeof(struct whpx_state));
1893 /* Turn on kernel-irqchip, by default */
1894 whpx->kernel_irqchip_allowed = true;
1897 static const TypeInfo whpx_accel_type = {
1898 .name = ACCEL_CLASS_NAME("whpx"),
1899 .parent = TYPE_ACCEL,
1900 .instance_init = whpx_accel_instance_init,
1901 .class_init = whpx_accel_class_init,
1904 static void whpx_type_init(void)
1906 type_register_static(&whpx_accel_type);
1909 bool init_whp_dispatch(void)
1911 if (whp_dispatch_initialized) {
1912 return true;
1915 if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) {
1916 goto error;
1919 if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) {
1920 goto error;
1923 assert(load_whp_dispatch_fns(&hWinHvPlatform,
1924 WINHV_PLATFORM_FNS_SUPPLEMENTAL));
1925 whp_dispatch_initialized = true;
1927 return true;
1928 error:
1929 if (hWinHvPlatform) {
1930 FreeLibrary(hWinHvPlatform);
1933 if (hWinHvEmulation) {
1934 FreeLibrary(hWinHvEmulation);
1937 return false;
1940 type_init(whpx_type_init);