2 * QEMU Windows Hypervisor Platform accelerator (WHPX)
4 * Copyright Microsoft Corp. 2017
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
11 #include "qemu/osdep.h"
13 #include "exec/address-spaces.h"
14 #include "exec/ioport.h"
15 #include "qemu-common.h"
16 #include "sysemu/accel.h"
17 #include "sysemu/whpx.h"
18 #include "sysemu/cpus.h"
19 #include "sysemu/runstate.h"
20 #include "qemu/main-loop.h"
21 #include "hw/boards.h"
22 #include "hw/i386/ioapic.h"
23 #include "hw/i386/apic_internal.h"
24 #include "qemu/error-report.h"
25 #include "qapi/error.h"
26 #include "qapi/qapi-types-common.h"
27 #include "qapi/qapi-visit-common.h"
28 #include "migration/blocker.h"
31 #include "whpx-cpus.h"
32 #include "whpx-internal.h"
34 #define HYPERV_APIC_BUS_FREQUENCY (200000000ULL)
36 static const WHV_REGISTER_NAME whpx_register_names
[] = {
38 /* X64 General purpose registers */
58 /* X64 Segment registers */
68 /* X64 Table registers */
72 /* X64 Control Registers */
79 /* X64 Debug Registers */
89 /* X64 Floating Point and Vector Registers */
106 WHvX64RegisterFpMmx0
,
107 WHvX64RegisterFpMmx1
,
108 WHvX64RegisterFpMmx2
,
109 WHvX64RegisterFpMmx3
,
110 WHvX64RegisterFpMmx4
,
111 WHvX64RegisterFpMmx5
,
112 WHvX64RegisterFpMmx6
,
113 WHvX64RegisterFpMmx7
,
114 WHvX64RegisterFpControlStatus
,
115 WHvX64RegisterXmmControlStatus
,
120 WHvX64RegisterKernelGsBase
,
122 WHvX64RegisterApicBase
,
123 /* WHvX64RegisterPat, */
124 WHvX64RegisterSysenterCs
,
125 WHvX64RegisterSysenterEip
,
126 WHvX64RegisterSysenterEsp
,
131 WHvX64RegisterSfmask
,
134 /* Interrupt / Event Registers */
136 * WHvRegisterPendingInterruption,
137 * WHvRegisterInterruptState,
138 * WHvRegisterPendingEvent0,
139 * WHvRegisterPendingEvent1
140 * WHvX64RegisterDeliverabilityNotifications,
144 struct whpx_register_set
{
145 WHV_REGISTER_VALUE values
[RTL_NUMBER_OF(whpx_register_names
)];
149 WHV_EMULATOR_HANDLE emulator
;
150 bool window_registered
;
152 bool ready_for_pic_interrupt
;
155 bool interruption_pending
;
157 /* Must be the last field as it may have a tail */
158 WHV_RUN_VP_EXIT_CONTEXT exit_ctx
;
161 static bool whpx_allowed
;
162 static bool whp_dispatch_initialized
;
163 static HMODULE hWinHvPlatform
, hWinHvEmulation
;
164 static uint32_t max_vcpu_index
;
165 struct whpx_state whpx_global
;
166 struct WHPDispatch whp_dispatch
;
173 static struct whpx_vcpu
*get_whpx_vcpu(CPUState
*cpu
)
175 return (struct whpx_vcpu
*)cpu
->hax_vcpu
;
178 static WHV_X64_SEGMENT_REGISTER
whpx_seg_q2h(const SegmentCache
*qs
, int v86
,
181 WHV_X64_SEGMENT_REGISTER hs
;
182 unsigned flags
= qs
->flags
;
185 hs
.Limit
= qs
->limit
;
186 hs
.Selector
= qs
->selector
;
192 hs
.DescriptorPrivilegeLevel
= 3;
193 hs
.NonSystemSegment
= 1;
196 hs
.Attributes
= (flags
>> DESC_TYPE_SHIFT
);
199 /* hs.Base &= 0xfffff; */
206 static SegmentCache
whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER
*hs
)
211 qs
.limit
= hs
->Limit
;
212 qs
.selector
= hs
->Selector
;
214 qs
.flags
= ((uint32_t)hs
->Attributes
) << DESC_TYPE_SHIFT
;
219 static int whpx_set_tsc(CPUState
*cpu
)
221 struct CPUX86State
*env
= (CPUArchState
*)(cpu
->env_ptr
);
222 WHV_REGISTER_NAME tsc_reg
= WHvX64RegisterTsc
;
223 WHV_REGISTER_VALUE tsc_val
;
225 struct whpx_state
*whpx
= &whpx_global
;
228 * Suspend the partition prior to setting the TSC to reduce the variance
229 * in TSC across vCPUs. When the first vCPU runs post suspend, the
230 * partition is automatically resumed.
232 if (whp_dispatch
.WHvSuspendPartitionTime
) {
235 * Unable to suspend partition while setting TSC is not a fatal
236 * error. It just increases the likelihood of TSC variance between
237 * vCPUs and some guest OS are able to handle that just fine.
239 hr
= whp_dispatch
.WHvSuspendPartitionTime(whpx
->partition
);
241 warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr
);
245 tsc_val
.Reg64
= env
->tsc
;
246 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
247 whpx
->partition
, cpu
->cpu_index
, &tsc_reg
, 1, &tsc_val
);
249 error_report("WHPX: Failed to set TSC, hr=%08lx", hr
);
256 static void whpx_set_registers(CPUState
*cpu
, int level
)
258 struct whpx_state
*whpx
= &whpx_global
;
259 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
260 struct CPUX86State
*env
= (CPUArchState
*)(cpu
->env_ptr
);
261 X86CPU
*x86_cpu
= X86_CPU(cpu
);
262 struct whpx_register_set vcxt
;
269 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
272 * Following MSRs have side effects on the guest or are too heavy for
273 * runtime. Limit them to full state update.
275 if (level
>= WHPX_SET_RESET_STATE
) {
279 memset(&vcxt
, 0, sizeof(struct whpx_register_set
));
281 v86
= (env
->eflags
& VM_MASK
);
282 r86
= !(env
->cr
[0] & CR0_PE_MASK
);
284 vcpu
->tpr
= cpu_get_apic_tpr(x86_cpu
->apic_state
);
285 vcpu
->apic_base
= cpu_get_apic_base(x86_cpu
->apic_state
);
289 /* Indexes for first 16 registers match between HV and QEMU definitions */
291 for (idx
= 0; idx
< CPU_NB_REGS
; idx
+= 1) {
292 vcxt
.values
[idx
].Reg64
= (uint64_t)env
->regs
[idx
];
296 /* Same goes for RIP and RFLAGS */
297 assert(whpx_register_names
[idx
] == WHvX64RegisterRip
);
298 vcxt
.values
[idx
++].Reg64
= env
->eip
;
300 assert(whpx_register_names
[idx
] == WHvX64RegisterRflags
);
301 vcxt
.values
[idx
++].Reg64
= env
->eflags
;
303 /* Translate 6+4 segment registers. HV and QEMU order matches */
304 assert(idx
== WHvX64RegisterEs
);
305 for (i
= 0; i
< 6; i
+= 1, idx
+= 1) {
306 vcxt
.values
[idx
].Segment
= whpx_seg_q2h(&env
->segs
[i
], v86
, r86
);
309 assert(idx
== WHvX64RegisterLdtr
);
310 vcxt
.values
[idx
++].Segment
= whpx_seg_q2h(&env
->ldt
, 0, 0);
312 assert(idx
== WHvX64RegisterTr
);
313 vcxt
.values
[idx
++].Segment
= whpx_seg_q2h(&env
->tr
, 0, 0);
315 assert(idx
== WHvX64RegisterIdtr
);
316 vcxt
.values
[idx
].Table
.Base
= env
->idt
.base
;
317 vcxt
.values
[idx
].Table
.Limit
= env
->idt
.limit
;
320 assert(idx
== WHvX64RegisterGdtr
);
321 vcxt
.values
[idx
].Table
.Base
= env
->gdt
.base
;
322 vcxt
.values
[idx
].Table
.Limit
= env
->gdt
.limit
;
325 /* CR0, 2, 3, 4, 8 */
326 assert(whpx_register_names
[idx
] == WHvX64RegisterCr0
);
327 vcxt
.values
[idx
++].Reg64
= env
->cr
[0];
328 assert(whpx_register_names
[idx
] == WHvX64RegisterCr2
);
329 vcxt
.values
[idx
++].Reg64
= env
->cr
[2];
330 assert(whpx_register_names
[idx
] == WHvX64RegisterCr3
);
331 vcxt
.values
[idx
++].Reg64
= env
->cr
[3];
332 assert(whpx_register_names
[idx
] == WHvX64RegisterCr4
);
333 vcxt
.values
[idx
++].Reg64
= env
->cr
[4];
334 assert(whpx_register_names
[idx
] == WHvX64RegisterCr8
);
335 vcxt
.values
[idx
++].Reg64
= vcpu
->tpr
;
337 /* 8 Debug Registers - Skipped */
339 /* 16 XMM registers */
340 assert(whpx_register_names
[idx
] == WHvX64RegisterXmm0
);
342 for (i
= 0; i
< sizeof(env
->xmm_regs
) / sizeof(ZMMReg
); i
+= 1, idx
+= 1) {
343 vcxt
.values
[idx
].Reg128
.Low64
= env
->xmm_regs
[i
].ZMM_Q(0);
344 vcxt
.values
[idx
].Reg128
.High64
= env
->xmm_regs
[i
].ZMM_Q(1);
349 assert(whpx_register_names
[idx
] == WHvX64RegisterFpMmx0
);
350 for (i
= 0; i
< 8; i
+= 1, idx
+= 1) {
351 vcxt
.values
[idx
].Fp
.AsUINT128
.Low64
= env
->fpregs
[i
].mmx
.MMX_Q(0);
352 /* vcxt.values[idx].Fp.AsUINT128.High64 =
353 env->fpregs[i].mmx.MMX_Q(1);
357 /* FP control status register */
358 assert(whpx_register_names
[idx
] == WHvX64RegisterFpControlStatus
);
359 vcxt
.values
[idx
].FpControlStatus
.FpControl
= env
->fpuc
;
360 vcxt
.values
[idx
].FpControlStatus
.FpStatus
=
361 (env
->fpus
& ~0x3800) | (env
->fpstt
& 0x7) << 11;
362 vcxt
.values
[idx
].FpControlStatus
.FpTag
= 0;
363 for (i
= 0; i
< 8; ++i
) {
364 vcxt
.values
[idx
].FpControlStatus
.FpTag
|= (!env
->fptags
[i
]) << i
;
366 vcxt
.values
[idx
].FpControlStatus
.Reserved
= 0;
367 vcxt
.values
[idx
].FpControlStatus
.LastFpOp
= env
->fpop
;
368 vcxt
.values
[idx
].FpControlStatus
.LastFpRip
= env
->fpip
;
371 /* XMM control status register */
372 assert(whpx_register_names
[idx
] == WHvX64RegisterXmmControlStatus
);
373 vcxt
.values
[idx
].XmmControlStatus
.LastFpRdp
= 0;
374 vcxt
.values
[idx
].XmmControlStatus
.XmmStatusControl
= env
->mxcsr
;
375 vcxt
.values
[idx
].XmmControlStatus
.XmmStatusControlMask
= 0x0000ffff;
379 assert(whpx_register_names
[idx
] == WHvX64RegisterEfer
);
380 vcxt
.values
[idx
++].Reg64
= env
->efer
;
382 assert(whpx_register_names
[idx
] == WHvX64RegisterKernelGsBase
);
383 vcxt
.values
[idx
++].Reg64
= env
->kernelgsbase
;
386 assert(whpx_register_names
[idx
] == WHvX64RegisterApicBase
);
387 vcxt
.values
[idx
++].Reg64
= vcpu
->apic_base
;
389 /* WHvX64RegisterPat - Skipped */
391 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterCs
);
392 vcxt
.values
[idx
++].Reg64
= env
->sysenter_cs
;
393 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEip
);
394 vcxt
.values
[idx
++].Reg64
= env
->sysenter_eip
;
395 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEsp
);
396 vcxt
.values
[idx
++].Reg64
= env
->sysenter_esp
;
397 assert(whpx_register_names
[idx
] == WHvX64RegisterStar
);
398 vcxt
.values
[idx
++].Reg64
= env
->star
;
400 assert(whpx_register_names
[idx
] == WHvX64RegisterLstar
);
401 vcxt
.values
[idx
++].Reg64
= env
->lstar
;
402 assert(whpx_register_names
[idx
] == WHvX64RegisterCstar
);
403 vcxt
.values
[idx
++].Reg64
= env
->cstar
;
404 assert(whpx_register_names
[idx
] == WHvX64RegisterSfmask
);
405 vcxt
.values
[idx
++].Reg64
= env
->fmask
;
408 /* Interrupt / Event Registers - Skipped */
410 assert(idx
== RTL_NUMBER_OF(whpx_register_names
));
412 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
413 whpx
->partition
, cpu
->cpu_index
,
415 RTL_NUMBER_OF(whpx_register_names
),
419 error_report("WHPX: Failed to set virtual processor context, hr=%08lx",
426 static int whpx_get_tsc(CPUState
*cpu
)
428 struct CPUX86State
*env
= (CPUArchState
*)(cpu
->env_ptr
);
429 WHV_REGISTER_NAME tsc_reg
= WHvX64RegisterTsc
;
430 WHV_REGISTER_VALUE tsc_val
;
432 struct whpx_state
*whpx
= &whpx_global
;
434 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
435 whpx
->partition
, cpu
->cpu_index
, &tsc_reg
, 1, &tsc_val
);
437 error_report("WHPX: Failed to get TSC, hr=%08lx", hr
);
441 env
->tsc
= tsc_val
.Reg64
;
445 static void whpx_get_registers(CPUState
*cpu
)
447 struct whpx_state
*whpx
= &whpx_global
;
448 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
449 struct CPUX86State
*env
= (CPUArchState
*)(cpu
->env_ptr
);
450 X86CPU
*x86_cpu
= X86_CPU(cpu
);
451 struct whpx_register_set vcxt
;
452 uint64_t tpr
, apic_base
;
458 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
460 if (!env
->tsc_valid
) {
462 env
->tsc_valid
= !runstate_is_running();
465 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
466 whpx
->partition
, cpu
->cpu_index
,
468 RTL_NUMBER_OF(whpx_register_names
),
471 error_report("WHPX: Failed to get virtual processor context, hr=%08lx",
477 /* Indexes for first 16 registers match between HV and QEMU definitions */
479 for (idx
= 0; idx
< CPU_NB_REGS
; idx
+= 1) {
480 env
->regs
[idx
] = vcxt
.values
[idx
].Reg64
;
484 /* Same goes for RIP and RFLAGS */
485 assert(whpx_register_names
[idx
] == WHvX64RegisterRip
);
486 env
->eip
= vcxt
.values
[idx
++].Reg64
;
487 assert(whpx_register_names
[idx
] == WHvX64RegisterRflags
);
488 env
->eflags
= vcxt
.values
[idx
++].Reg64
;
490 /* Translate 6+4 segment registers. HV and QEMU order matches */
491 assert(idx
== WHvX64RegisterEs
);
492 for (i
= 0; i
< 6; i
+= 1, idx
+= 1) {
493 env
->segs
[i
] = whpx_seg_h2q(&vcxt
.values
[idx
].Segment
);
496 assert(idx
== WHvX64RegisterLdtr
);
497 env
->ldt
= whpx_seg_h2q(&vcxt
.values
[idx
++].Segment
);
498 assert(idx
== WHvX64RegisterTr
);
499 env
->tr
= whpx_seg_h2q(&vcxt
.values
[idx
++].Segment
);
500 assert(idx
== WHvX64RegisterIdtr
);
501 env
->idt
.base
= vcxt
.values
[idx
].Table
.Base
;
502 env
->idt
.limit
= vcxt
.values
[idx
].Table
.Limit
;
504 assert(idx
== WHvX64RegisterGdtr
);
505 env
->gdt
.base
= vcxt
.values
[idx
].Table
.Base
;
506 env
->gdt
.limit
= vcxt
.values
[idx
].Table
.Limit
;
509 /* CR0, 2, 3, 4, 8 */
510 assert(whpx_register_names
[idx
] == WHvX64RegisterCr0
);
511 env
->cr
[0] = vcxt
.values
[idx
++].Reg64
;
512 assert(whpx_register_names
[idx
] == WHvX64RegisterCr2
);
513 env
->cr
[2] = vcxt
.values
[idx
++].Reg64
;
514 assert(whpx_register_names
[idx
] == WHvX64RegisterCr3
);
515 env
->cr
[3] = vcxt
.values
[idx
++].Reg64
;
516 assert(whpx_register_names
[idx
] == WHvX64RegisterCr4
);
517 env
->cr
[4] = vcxt
.values
[idx
++].Reg64
;
518 assert(whpx_register_names
[idx
] == WHvX64RegisterCr8
);
519 tpr
= vcxt
.values
[idx
++].Reg64
;
520 if (tpr
!= vcpu
->tpr
) {
522 cpu_set_apic_tpr(x86_cpu
->apic_state
, tpr
);
525 /* 8 Debug Registers - Skipped */
527 /* 16 XMM registers */
528 assert(whpx_register_names
[idx
] == WHvX64RegisterXmm0
);
530 for (i
= 0; i
< sizeof(env
->xmm_regs
) / sizeof(ZMMReg
); i
+= 1, idx
+= 1) {
531 env
->xmm_regs
[i
].ZMM_Q(0) = vcxt
.values
[idx
].Reg128
.Low64
;
532 env
->xmm_regs
[i
].ZMM_Q(1) = vcxt
.values
[idx
].Reg128
.High64
;
537 assert(whpx_register_names
[idx
] == WHvX64RegisterFpMmx0
);
538 for (i
= 0; i
< 8; i
+= 1, idx
+= 1) {
539 env
->fpregs
[i
].mmx
.MMX_Q(0) = vcxt
.values
[idx
].Fp
.AsUINT128
.Low64
;
540 /* env->fpregs[i].mmx.MMX_Q(1) =
541 vcxt.values[idx].Fp.AsUINT128.High64;
545 /* FP control status register */
546 assert(whpx_register_names
[idx
] == WHvX64RegisterFpControlStatus
);
547 env
->fpuc
= vcxt
.values
[idx
].FpControlStatus
.FpControl
;
548 env
->fpstt
= (vcxt
.values
[idx
].FpControlStatus
.FpStatus
>> 11) & 0x7;
549 env
->fpus
= vcxt
.values
[idx
].FpControlStatus
.FpStatus
& ~0x3800;
550 for (i
= 0; i
< 8; ++i
) {
551 env
->fptags
[i
] = !((vcxt
.values
[idx
].FpControlStatus
.FpTag
>> i
) & 1);
553 env
->fpop
= vcxt
.values
[idx
].FpControlStatus
.LastFpOp
;
554 env
->fpip
= vcxt
.values
[idx
].FpControlStatus
.LastFpRip
;
557 /* XMM control status register */
558 assert(whpx_register_names
[idx
] == WHvX64RegisterXmmControlStatus
);
559 env
->mxcsr
= vcxt
.values
[idx
].XmmControlStatus
.XmmStatusControl
;
563 assert(whpx_register_names
[idx
] == WHvX64RegisterEfer
);
564 env
->efer
= vcxt
.values
[idx
++].Reg64
;
566 assert(whpx_register_names
[idx
] == WHvX64RegisterKernelGsBase
);
567 env
->kernelgsbase
= vcxt
.values
[idx
++].Reg64
;
570 assert(whpx_register_names
[idx
] == WHvX64RegisterApicBase
);
571 apic_base
= vcxt
.values
[idx
++].Reg64
;
572 if (apic_base
!= vcpu
->apic_base
) {
573 vcpu
->apic_base
= apic_base
;
574 cpu_set_apic_base(x86_cpu
->apic_state
, vcpu
->apic_base
);
577 /* WHvX64RegisterPat - Skipped */
579 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterCs
);
580 env
->sysenter_cs
= vcxt
.values
[idx
++].Reg64
;
581 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEip
);
582 env
->sysenter_eip
= vcxt
.values
[idx
++].Reg64
;
583 assert(whpx_register_names
[idx
] == WHvX64RegisterSysenterEsp
);
584 env
->sysenter_esp
= vcxt
.values
[idx
++].Reg64
;
585 assert(whpx_register_names
[idx
] == WHvX64RegisterStar
);
586 env
->star
= vcxt
.values
[idx
++].Reg64
;
588 assert(whpx_register_names
[idx
] == WHvX64RegisterLstar
);
589 env
->lstar
= vcxt
.values
[idx
++].Reg64
;
590 assert(whpx_register_names
[idx
] == WHvX64RegisterCstar
);
591 env
->cstar
= vcxt
.values
[idx
++].Reg64
;
592 assert(whpx_register_names
[idx
] == WHvX64RegisterSfmask
);
593 env
->fmask
= vcxt
.values
[idx
++].Reg64
;
596 /* Interrupt / Event Registers - Skipped */
598 assert(idx
== RTL_NUMBER_OF(whpx_register_names
));
600 if (whpx_apic_in_platform()) {
601 whpx_apic_get(x86_cpu
->apic_state
);
607 static HRESULT CALLBACK
whpx_emu_ioport_callback(
609 WHV_EMULATOR_IO_ACCESS_INFO
*IoAccess
)
611 MemTxAttrs attrs
= { 0 };
612 address_space_rw(&address_space_io
, IoAccess
->Port
, attrs
,
613 &IoAccess
->Data
, IoAccess
->AccessSize
,
614 IoAccess
->Direction
);
618 static HRESULT CALLBACK
whpx_emu_mmio_callback(
620 WHV_EMULATOR_MEMORY_ACCESS_INFO
*ma
)
622 cpu_physical_memory_rw(ma
->GpaAddress
, ma
->Data
, ma
->AccessSize
,
627 static HRESULT CALLBACK
whpx_emu_getreg_callback(
629 const WHV_REGISTER_NAME
*RegisterNames
,
630 UINT32 RegisterCount
,
631 WHV_REGISTER_VALUE
*RegisterValues
)
634 struct whpx_state
*whpx
= &whpx_global
;
635 CPUState
*cpu
= (CPUState
*)ctx
;
637 hr
= whp_dispatch
.WHvGetVirtualProcessorRegisters(
638 whpx
->partition
, cpu
->cpu_index
,
639 RegisterNames
, RegisterCount
,
642 error_report("WHPX: Failed to get virtual processor registers,"
649 static HRESULT CALLBACK
whpx_emu_setreg_callback(
651 const WHV_REGISTER_NAME
*RegisterNames
,
652 UINT32 RegisterCount
,
653 const WHV_REGISTER_VALUE
*RegisterValues
)
656 struct whpx_state
*whpx
= &whpx_global
;
657 CPUState
*cpu
= (CPUState
*)ctx
;
659 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
660 whpx
->partition
, cpu
->cpu_index
,
661 RegisterNames
, RegisterCount
,
664 error_report("WHPX: Failed to set virtual processor registers,"
669 * The emulator just successfully wrote the register state. We clear the
670 * dirty state so we avoid the double write on resume of the VP.
672 cpu
->vcpu_dirty
= false;
677 static HRESULT CALLBACK
whpx_emu_translate_callback(
679 WHV_GUEST_VIRTUAL_ADDRESS Gva
,
680 WHV_TRANSLATE_GVA_FLAGS TranslateFlags
,
681 WHV_TRANSLATE_GVA_RESULT_CODE
*TranslationResult
,
682 WHV_GUEST_PHYSICAL_ADDRESS
*Gpa
)
685 struct whpx_state
*whpx
= &whpx_global
;
686 CPUState
*cpu
= (CPUState
*)ctx
;
687 WHV_TRANSLATE_GVA_RESULT res
;
689 hr
= whp_dispatch
.WHvTranslateGva(whpx
->partition
, cpu
->cpu_index
,
690 Gva
, TranslateFlags
, &res
, Gpa
);
692 error_report("WHPX: Failed to translate GVA, hr=%08lx", hr
);
694 *TranslationResult
= res
.ResultCode
;
700 static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks
= {
701 .Size
= sizeof(WHV_EMULATOR_CALLBACKS
),
702 .WHvEmulatorIoPortCallback
= whpx_emu_ioport_callback
,
703 .WHvEmulatorMemoryCallback
= whpx_emu_mmio_callback
,
704 .WHvEmulatorGetVirtualProcessorRegisters
= whpx_emu_getreg_callback
,
705 .WHvEmulatorSetVirtualProcessorRegisters
= whpx_emu_setreg_callback
,
706 .WHvEmulatorTranslateGvaPage
= whpx_emu_translate_callback
,
709 static int whpx_handle_mmio(CPUState
*cpu
, WHV_MEMORY_ACCESS_CONTEXT
*ctx
)
712 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
713 WHV_EMULATOR_STATUS emu_status
;
715 hr
= whp_dispatch
.WHvEmulatorTryMmioEmulation(
717 &vcpu
->exit_ctx
.VpContext
, ctx
,
720 error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr
);
724 if (!emu_status
.EmulationSuccessful
) {
725 error_report("WHPX: Failed to emulate MMIO access with"
726 " EmulatorReturnStatus: %u", emu_status
.AsUINT32
);
733 static int whpx_handle_portio(CPUState
*cpu
,
734 WHV_X64_IO_PORT_ACCESS_CONTEXT
*ctx
)
737 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
738 WHV_EMULATOR_STATUS emu_status
;
740 hr
= whp_dispatch
.WHvEmulatorTryIoEmulation(
742 &vcpu
->exit_ctx
.VpContext
, ctx
,
745 error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr
);
749 if (!emu_status
.EmulationSuccessful
) {
750 error_report("WHPX: Failed to emulate PortIO access with"
751 " EmulatorReturnStatus: %u", emu_status
.AsUINT32
);
758 static int whpx_handle_halt(CPUState
*cpu
)
760 struct CPUX86State
*env
= (CPUArchState
*)(cpu
->env_ptr
);
763 qemu_mutex_lock_iothread();
764 if (!((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
765 (env
->eflags
& IF_MASK
)) &&
766 !(cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
767 cpu
->exception_index
= EXCP_HLT
;
771 qemu_mutex_unlock_iothread();
776 static void whpx_vcpu_pre_run(CPUState
*cpu
)
779 struct whpx_state
*whpx
= &whpx_global
;
780 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
781 struct CPUX86State
*env
= (CPUArchState
*)(cpu
->env_ptr
);
782 X86CPU
*x86_cpu
= X86_CPU(cpu
);
785 WHV_X64_PENDING_INTERRUPTION_REGISTER new_int
;
786 UINT32 reg_count
= 0;
787 WHV_REGISTER_VALUE reg_values
[3];
788 WHV_REGISTER_NAME reg_names
[3];
790 memset(&new_int
, 0, sizeof(new_int
));
791 memset(reg_values
, 0, sizeof(reg_values
));
793 qemu_mutex_lock_iothread();
796 if (!vcpu
->interruption_pending
&&
797 cpu
->interrupt_request
& (CPU_INTERRUPT_NMI
| CPU_INTERRUPT_SMI
)) {
798 if (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
) {
799 cpu
->interrupt_request
&= ~CPU_INTERRUPT_NMI
;
800 vcpu
->interruptable
= false;
801 new_int
.InterruptionType
= WHvX64PendingNmi
;
802 new_int
.InterruptionPending
= 1;
803 new_int
.InterruptionVector
= 2;
805 if (cpu
->interrupt_request
& CPU_INTERRUPT_SMI
) {
806 cpu
->interrupt_request
&= ~CPU_INTERRUPT_SMI
;
811 * Force the VCPU out of its inner loop to process any INIT requests or
812 * commit pending TPR access.
814 if (cpu
->interrupt_request
& (CPU_INTERRUPT_INIT
| CPU_INTERRUPT_TPR
)) {
815 if ((cpu
->interrupt_request
& CPU_INTERRUPT_INIT
) &&
816 !(env
->hflags
& HF_SMM_MASK
)) {
817 cpu
->exit_request
= 1;
819 if (cpu
->interrupt_request
& CPU_INTERRUPT_TPR
) {
820 cpu
->exit_request
= 1;
824 /* Get pending hard interruption or replay one that was overwritten */
825 if (!whpx_apic_in_platform()) {
826 if (!vcpu
->interruption_pending
&&
827 vcpu
->interruptable
&& (env
->eflags
& IF_MASK
)) {
828 assert(!new_int
.InterruptionPending
);
829 if (cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) {
830 cpu
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
831 irq
= cpu_get_pic_interrupt(env
);
833 new_int
.InterruptionType
= WHvX64PendingInterrupt
;
834 new_int
.InterruptionPending
= 1;
835 new_int
.InterruptionVector
= irq
;
840 /* Setup interrupt state if new one was prepared */
841 if (new_int
.InterruptionPending
) {
842 reg_values
[reg_count
].PendingInterruption
= new_int
;
843 reg_names
[reg_count
] = WHvRegisterPendingInterruption
;
846 } else if (vcpu
->ready_for_pic_interrupt
&&
847 (cpu
->interrupt_request
& CPU_INTERRUPT_HARD
)) {
848 cpu
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
849 irq
= cpu_get_pic_interrupt(env
);
851 reg_names
[reg_count
] = WHvRegisterPendingEvent
;
852 reg_values
[reg_count
].ExtIntEvent
= (WHV_X64_PENDING_EXT_INT_EVENT
)
855 .EventType
= WHvX64PendingEventExtInt
,
862 /* Sync the TPR to the CR8 if was modified during the intercept */
863 tpr
= cpu_get_apic_tpr(x86_cpu
->apic_state
);
864 if (tpr
!= vcpu
->tpr
) {
866 reg_values
[reg_count
].Reg64
= tpr
;
867 cpu
->exit_request
= 1;
868 reg_names
[reg_count
] = WHvX64RegisterCr8
;
872 /* Update the state of the interrupt delivery notification */
873 if (!vcpu
->window_registered
&&
874 cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) {
875 reg_values
[reg_count
].DeliverabilityNotifications
=
876 (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER
) {
877 .InterruptNotification
= 1
879 vcpu
->window_registered
= 1;
880 reg_names
[reg_count
] = WHvX64RegisterDeliverabilityNotifications
;
884 qemu_mutex_unlock_iothread();
885 vcpu
->ready_for_pic_interrupt
= false;
888 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
889 whpx
->partition
, cpu
->cpu_index
,
890 reg_names
, reg_count
, reg_values
);
892 error_report("WHPX: Failed to set interrupt state registers,"
900 static void whpx_vcpu_post_run(CPUState
*cpu
)
902 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
903 struct CPUX86State
*env
= (CPUArchState
*)(cpu
->env_ptr
);
904 X86CPU
*x86_cpu
= X86_CPU(cpu
);
906 env
->eflags
= vcpu
->exit_ctx
.VpContext
.Rflags
;
908 uint64_t tpr
= vcpu
->exit_ctx
.VpContext
.Cr8
;
909 if (vcpu
->tpr
!= tpr
) {
911 qemu_mutex_lock_iothread();
912 cpu_set_apic_tpr(x86_cpu
->apic_state
, vcpu
->tpr
);
913 qemu_mutex_unlock_iothread();
916 vcpu
->interruption_pending
=
917 vcpu
->exit_ctx
.VpContext
.ExecutionState
.InterruptionPending
;
919 vcpu
->interruptable
=
920 !vcpu
->exit_ctx
.VpContext
.ExecutionState
.InterruptShadow
;
925 static void whpx_vcpu_process_async_events(CPUState
*cpu
)
927 struct CPUX86State
*env
= (CPUArchState
*)(cpu
->env_ptr
);
928 X86CPU
*x86_cpu
= X86_CPU(cpu
);
929 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
931 if ((cpu
->interrupt_request
& CPU_INTERRUPT_INIT
) &&
932 !(env
->hflags
& HF_SMM_MASK
)) {
933 whpx_cpu_synchronize_state(cpu
);
934 do_cpu_init(x86_cpu
);
935 vcpu
->interruptable
= true;
938 if (cpu
->interrupt_request
& CPU_INTERRUPT_POLL
) {
939 cpu
->interrupt_request
&= ~CPU_INTERRUPT_POLL
;
940 apic_poll_irq(x86_cpu
->apic_state
);
943 if (((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
944 (env
->eflags
& IF_MASK
)) ||
945 (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
949 if (cpu
->interrupt_request
& CPU_INTERRUPT_SIPI
) {
950 whpx_cpu_synchronize_state(cpu
);
951 do_cpu_sipi(x86_cpu
);
954 if (cpu
->interrupt_request
& CPU_INTERRUPT_TPR
) {
955 cpu
->interrupt_request
&= ~CPU_INTERRUPT_TPR
;
956 whpx_cpu_synchronize_state(cpu
);
957 apic_handle_tpr_access_report(x86_cpu
->apic_state
, env
->eip
,
958 env
->tpr_access_type
);
964 static int whpx_vcpu_run(CPUState
*cpu
)
967 struct whpx_state
*whpx
= &whpx_global
;
968 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
971 whpx_vcpu_process_async_events(cpu
);
972 if (cpu
->halted
&& !whpx_apic_in_platform()) {
973 cpu
->exception_index
= EXCP_HLT
;
974 qatomic_set(&cpu
->exit_request
, false);
978 qemu_mutex_unlock_iothread();
982 if (cpu
->vcpu_dirty
) {
983 whpx_set_registers(cpu
, WHPX_SET_RUNTIME_STATE
);
984 cpu
->vcpu_dirty
= false;
987 whpx_vcpu_pre_run(cpu
);
989 if (qatomic_read(&cpu
->exit_request
)) {
993 hr
= whp_dispatch
.WHvRunVirtualProcessor(
994 whpx
->partition
, cpu
->cpu_index
,
995 &vcpu
->exit_ctx
, sizeof(vcpu
->exit_ctx
));
998 error_report("WHPX: Failed to exec a virtual processor,"
1004 whpx_vcpu_post_run(cpu
);
1006 switch (vcpu
->exit_ctx
.ExitReason
) {
1007 case WHvRunVpExitReasonMemoryAccess
:
1008 ret
= whpx_handle_mmio(cpu
, &vcpu
->exit_ctx
.MemoryAccess
);
1011 case WHvRunVpExitReasonX64IoPortAccess
:
1012 ret
= whpx_handle_portio(cpu
, &vcpu
->exit_ctx
.IoPortAccess
);
1015 case WHvRunVpExitReasonX64InterruptWindow
:
1016 vcpu
->ready_for_pic_interrupt
= 1;
1017 vcpu
->window_registered
= 0;
1021 case WHvRunVpExitReasonX64ApicEoi
:
1022 assert(whpx_apic_in_platform());
1023 ioapic_eoi_broadcast(vcpu
->exit_ctx
.ApicEoi
.InterruptVector
);
1026 case WHvRunVpExitReasonX64Halt
:
1027 ret
= whpx_handle_halt(cpu
);
1030 case WHvRunVpExitReasonX64ApicInitSipiTrap
: {
1031 WHV_INTERRUPT_CONTROL ipi
= {0};
1032 uint64_t icr
= vcpu
->exit_ctx
.ApicInitSipi
.ApicIcr
;
1033 uint32_t delivery_mode
=
1034 (icr
& APIC_ICR_DELIV_MOD
) >> APIC_ICR_DELIV_MOD_SHIFT
;
1035 int dest_shorthand
=
1036 (icr
& APIC_ICR_DEST_SHORT
) >> APIC_ICR_DEST_SHORT_SHIFT
;
1037 bool broadcast
= false;
1038 bool include_self
= false;
1041 /* We only registered for INIT and SIPI exits. */
1042 if ((delivery_mode
!= APIC_DM_INIT
) &&
1043 (delivery_mode
!= APIC_DM_SIPI
)) {
1045 "WHPX: Unexpected APIC exit that is not a INIT or SIPI");
1049 if (delivery_mode
== APIC_DM_INIT
) {
1050 ipi
.Type
= WHvX64InterruptTypeInit
;
1052 ipi
.Type
= WHvX64InterruptTypeSipi
;
1055 ipi
.DestinationMode
=
1056 ((icr
& APIC_ICR_DEST_MOD
) >> APIC_ICR_DEST_MOD_SHIFT
) ?
1057 WHvX64InterruptDestinationModeLogical
:
1058 WHvX64InterruptDestinationModePhysical
;
1061 ((icr
& APIC_ICR_TRIGGER_MOD
) >> APIC_ICR_TRIGGER_MOD_SHIFT
) ?
1062 WHvX64InterruptTriggerModeLevel
:
1063 WHvX64InterruptTriggerModeEdge
;
1065 ipi
.Vector
= icr
& APIC_VECTOR_MASK
;
1066 switch (dest_shorthand
) {
1067 /* no shorthand. Bits 56-63 contain the destination. */
1069 ipi
.Destination
= (icr
>> 56) & APIC_VECTOR_MASK
;
1070 hr
= whp_dispatch
.WHvRequestInterrupt(whpx
->partition
,
1073 error_report("WHPX: Failed to request interrupt hr=%08lx",
1081 include_self
= true;
1084 /* broadcast, including self */
1087 include_self
= true;
1090 /* broadcast, excluding self */
1096 if (!broadcast
&& !include_self
) {
1100 for (i
= 0; i
<= max_vcpu_index
; i
++) {
1101 if (i
== cpu
->cpu_index
&& !include_self
) {
1106 * Assuming that APIC Ids are identity mapped since
1107 * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers
1108 * are not handled yet and the hypervisor doesn't allow the
1109 * guest to modify the APIC ID.
1111 ipi
.Destination
= i
;
1112 hr
= whp_dispatch
.WHvRequestInterrupt(whpx
->partition
,
1116 "WHPX: Failed to request SIPI for %d, hr=%08lx",
1124 case WHvRunVpExitReasonCanceled
:
1125 cpu
->exception_index
= EXCP_INTERRUPT
;
1129 case WHvRunVpExitReasonX64MsrAccess
: {
1130 WHV_REGISTER_VALUE reg_values
[3] = {0};
1131 WHV_REGISTER_NAME reg_names
[3];
1134 reg_names
[0] = WHvX64RegisterRip
;
1135 reg_names
[1] = WHvX64RegisterRax
;
1136 reg_names
[2] = WHvX64RegisterRdx
;
1138 reg_values
[0].Reg64
=
1139 vcpu
->exit_ctx
.VpContext
.Rip
+
1140 vcpu
->exit_ctx
.VpContext
.InstructionLength
;
1143 * For all unsupported MSR access we:
1147 reg_count
= vcpu
->exit_ctx
.MsrAccess
.AccessInfo
.IsWrite
?
1150 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
1153 reg_names
, reg_count
,
1157 error_report("WHPX: Failed to set MsrAccess state "
1158 " registers, hr=%08lx", hr
);
1163 case WHvRunVpExitReasonX64Cpuid
: {
1164 WHV_REGISTER_VALUE reg_values
[5];
1165 WHV_REGISTER_NAME reg_names
[5];
1166 UINT32 reg_count
= 5;
1167 UINT64 cpuid_fn
, rip
= 0, rax
= 0, rcx
= 0, rdx
= 0, rbx
= 0;
1168 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1169 CPUX86State
*env
= &x86_cpu
->env
;
1171 memset(reg_values
, 0, sizeof(reg_values
));
1173 rip
= vcpu
->exit_ctx
.VpContext
.Rip
+
1174 vcpu
->exit_ctx
.VpContext
.InstructionLength
;
1175 cpuid_fn
= vcpu
->exit_ctx
.CpuidAccess
.Rax
;
1178 * Ideally, these should be supplied to the hypervisor during VCPU
1179 * initialization and it should be able to satisfy this request.
1180 * But, currently, WHPX doesn't support setting CPUID values in the
1181 * hypervisor once the partition has been setup, which is too late
1182 * since VCPUs are realized later. For now, use the values from
1183 * QEMU to satisfy these requests, until WHPX adds support for
1184 * being able to set these values in the hypervisor at runtime.
1186 cpu_x86_cpuid(env
, cpuid_fn
, 0, (UINT32
*)&rax
, (UINT32
*)&rbx
,
1187 (UINT32
*)&rcx
, (UINT32
*)&rdx
);
1190 /* Expose the vmware cpu frequency cpuid leaf */
1192 rbx
= rcx
= rdx
= 0;
1197 rbx
= env
->apic_bus_freq
/ 1000; /* Hz to KHz */
1202 /* Remove any support of OSVW */
1203 rcx
&= ~CPUID_EXT3_OSVW
;
1207 reg_names
[0] = WHvX64RegisterRip
;
1208 reg_names
[1] = WHvX64RegisterRax
;
1209 reg_names
[2] = WHvX64RegisterRcx
;
1210 reg_names
[3] = WHvX64RegisterRdx
;
1211 reg_names
[4] = WHvX64RegisterRbx
;
1213 reg_values
[0].Reg64
= rip
;
1214 reg_values
[1].Reg64
= rax
;
1215 reg_values
[2].Reg64
= rcx
;
1216 reg_values
[3].Reg64
= rdx
;
1217 reg_values
[4].Reg64
= rbx
;
1219 hr
= whp_dispatch
.WHvSetVirtualProcessorRegisters(
1220 whpx
->partition
, cpu
->cpu_index
,
1226 error_report("WHPX: Failed to set CpuidAccess state registers,"
1232 case WHvRunVpExitReasonNone
:
1233 case WHvRunVpExitReasonUnrecoverableException
:
1234 case WHvRunVpExitReasonInvalidVpRegisterValue
:
1235 case WHvRunVpExitReasonUnsupportedFeature
:
1236 case WHvRunVpExitReasonException
:
1238 error_report("WHPX: Unexpected VP exit code %d",
1239 vcpu
->exit_ctx
.ExitReason
);
1240 whpx_get_registers(cpu
);
1241 qemu_mutex_lock_iothread();
1242 qemu_system_guest_panicked(cpu_get_crash_info(cpu
));
1243 qemu_mutex_unlock_iothread();
1250 qemu_mutex_lock_iothread();
1253 qatomic_set(&cpu
->exit_request
, false);
1258 static void do_whpx_cpu_synchronize_state(CPUState
*cpu
, run_on_cpu_data arg
)
1260 if (!cpu
->vcpu_dirty
) {
1261 whpx_get_registers(cpu
);
1262 cpu
->vcpu_dirty
= true;
1266 static void do_whpx_cpu_synchronize_post_reset(CPUState
*cpu
,
1267 run_on_cpu_data arg
)
1269 whpx_set_registers(cpu
, WHPX_SET_RESET_STATE
);
1270 cpu
->vcpu_dirty
= false;
1273 static void do_whpx_cpu_synchronize_post_init(CPUState
*cpu
,
1274 run_on_cpu_data arg
)
1276 whpx_set_registers(cpu
, WHPX_SET_FULL_STATE
);
1277 cpu
->vcpu_dirty
= false;
1280 static void do_whpx_cpu_synchronize_pre_loadvm(CPUState
*cpu
,
1281 run_on_cpu_data arg
)
1283 cpu
->vcpu_dirty
= true;
1290 void whpx_cpu_synchronize_state(CPUState
*cpu
)
1292 if (!cpu
->vcpu_dirty
) {
1293 run_on_cpu(cpu
, do_whpx_cpu_synchronize_state
, RUN_ON_CPU_NULL
);
1297 void whpx_cpu_synchronize_post_reset(CPUState
*cpu
)
1299 run_on_cpu(cpu
, do_whpx_cpu_synchronize_post_reset
, RUN_ON_CPU_NULL
);
1302 void whpx_cpu_synchronize_post_init(CPUState
*cpu
)
1304 run_on_cpu(cpu
, do_whpx_cpu_synchronize_post_init
, RUN_ON_CPU_NULL
);
1307 void whpx_cpu_synchronize_pre_loadvm(CPUState
*cpu
)
1309 run_on_cpu(cpu
, do_whpx_cpu_synchronize_pre_loadvm
, RUN_ON_CPU_NULL
);
1316 static Error
*whpx_migration_blocker
;
1318 static void whpx_cpu_update_state(void *opaque
, int running
, RunState state
)
1320 CPUX86State
*env
= opaque
;
1323 env
->tsc_valid
= false;
1327 int whpx_init_vcpu(CPUState
*cpu
)
1330 struct whpx_state
*whpx
= &whpx_global
;
1331 struct whpx_vcpu
*vcpu
= NULL
;
1332 Error
*local_error
= NULL
;
1333 struct CPUX86State
*env
= (CPUArchState
*)(cpu
->env_ptr
);
1334 X86CPU
*x86_cpu
= X86_CPU(cpu
);
1338 /* Add migration blockers for all unsupported features of the
1339 * Windows Hypervisor Platform
1341 if (whpx_migration_blocker
== NULL
) {
1342 error_setg(&whpx_migration_blocker
,
1343 "State blocked due to non-migratable CPUID feature support,"
1344 "dirty memory tracking support, and XSAVE/XRSTOR support");
1346 (void)migrate_add_blocker(whpx_migration_blocker
, &local_error
);
1348 error_report_err(local_error
);
1349 migrate_del_blocker(whpx_migration_blocker
);
1350 error_free(whpx_migration_blocker
);
1356 vcpu
= g_malloc0(sizeof(struct whpx_vcpu
));
1359 error_report("WHPX: Failed to allocte VCPU context.");
1364 hr
= whp_dispatch
.WHvEmulatorCreateEmulator(
1365 &whpx_emu_callbacks
,
1368 error_report("WHPX: Failed to setup instruction completion support,"
1374 hr
= whp_dispatch
.WHvCreateVirtualProcessor(
1375 whpx
->partition
, cpu
->cpu_index
, 0);
1377 error_report("WHPX: Failed to create a virtual processor,"
1379 whp_dispatch
.WHvEmulatorDestroyEmulator(vcpu
->emulator
);
1385 * vcpu's TSC frequency is either specified by user, or use the value
1386 * provided by Hyper-V if the former is not present. In the latter case, we
1387 * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC
1388 * frequency can be migrated later via this field.
1390 if (!env
->tsc_khz
) {
1391 hr
= whp_dispatch
.WHvGetCapability(
1392 WHvCapabilityCodeProcessorClockFrequency
, &freq
, sizeof(freq
),
1394 if (hr
!= WHV_E_UNKNOWN_CAPABILITY
) {
1396 printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr
);
1398 env
->tsc_khz
= freq
/ 1000; /* Hz to KHz */
1403 env
->apic_bus_freq
= HYPERV_APIC_BUS_FREQUENCY
;
1404 hr
= whp_dispatch
.WHvGetCapability(
1405 WHvCapabilityCodeInterruptClockFrequency
, &freq
, sizeof(freq
), NULL
);
1406 if (hr
!= WHV_E_UNKNOWN_CAPABILITY
) {
1408 printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr
);
1410 env
->apic_bus_freq
= freq
;
1415 * If the vmware cpuid frequency leaf option is set, and we have a valid
1416 * tsc value, trap the corresponding cpuid's.
1418 if (x86_cpu
->vmware_cpuid_freq
&& env
->tsc_khz
) {
1419 UINT32 cpuidExitList
[] = {1, 0x80000001, 0x40000000, 0x40000010};
1421 hr
= whp_dispatch
.WHvSetPartitionProperty(
1423 WHvPartitionPropertyCodeCpuidExitList
,
1425 RTL_NUMBER_OF(cpuidExitList
) * sizeof(UINT32
));
1428 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1435 vcpu
->interruptable
= true;
1436 cpu
->vcpu_dirty
= true;
1437 cpu
->hax_vcpu
= (struct hax_vcpu_state
*)vcpu
;
1438 max_vcpu_index
= max(max_vcpu_index
, cpu
->cpu_index
);
1439 qemu_add_vm_change_state_handler(whpx_cpu_update_state
, cpu
->env_ptr
);
1449 int whpx_vcpu_exec(CPUState
*cpu
)
1455 if (cpu
->exception_index
>= EXCP_INTERRUPT
) {
1456 ret
= cpu
->exception_index
;
1457 cpu
->exception_index
= -1;
1461 fatal
= whpx_vcpu_run(cpu
);
1464 error_report("WHPX: Failed to exec a virtual processor");
1472 void whpx_destroy_vcpu(CPUState
*cpu
)
1474 struct whpx_state
*whpx
= &whpx_global
;
1475 struct whpx_vcpu
*vcpu
= get_whpx_vcpu(cpu
);
1477 whp_dispatch
.WHvDeleteVirtualProcessor(whpx
->partition
, cpu
->cpu_index
);
1478 whp_dispatch
.WHvEmulatorDestroyEmulator(vcpu
->emulator
);
1479 g_free(cpu
->hax_vcpu
);
1483 void whpx_vcpu_kick(CPUState
*cpu
)
1485 struct whpx_state
*whpx
= &whpx_global
;
1486 whp_dispatch
.WHvCancelRunVirtualProcessor(
1487 whpx
->partition
, cpu
->cpu_index
, 0);
1494 static void whpx_update_mapping(hwaddr start_pa
, ram_addr_t size
,
1495 void *host_va
, int add
, int rom
,
1498 struct whpx_state
*whpx
= &whpx_global
;
1503 printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n",
1504 (void*)start_pa, (void*)size, host_va,
1505 (rom ? "ROM" : "RAM"), name);
1507 printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n",
1508 (void*)start_pa, (void*)size, host_va, name);
1513 hr
= whp_dispatch
.WHvMapGpaRange(whpx
->partition
,
1517 (WHvMapGpaRangeFlagRead
|
1518 WHvMapGpaRangeFlagExecute
|
1519 (rom
? 0 : WHvMapGpaRangeFlagWrite
)));
1521 hr
= whp_dispatch
.WHvUnmapGpaRange(whpx
->partition
,
1527 error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes,"
1528 " Host:%p, hr=%08lx",
1529 (add
? "MAP" : "UNMAP"), name
,
1530 (void *)(uintptr_t)start_pa
, (void *)size
, host_va
, hr
);
1534 static void whpx_process_section(MemoryRegionSection
*section
, int add
)
1536 MemoryRegion
*mr
= section
->mr
;
1537 hwaddr start_pa
= section
->offset_within_address_space
;
1538 ram_addr_t size
= int128_get64(section
->size
);
1542 if (!memory_region_is_ram(mr
)) {
1546 delta
= qemu_real_host_page_size
- (start_pa
& ~qemu_real_host_page_mask
);
1547 delta
&= ~qemu_real_host_page_mask
;
1553 size
&= qemu_real_host_page_mask
;
1554 if (!size
|| (start_pa
& ~qemu_real_host_page_mask
)) {
1558 host_va
= (uintptr_t)memory_region_get_ram_ptr(mr
)
1559 + section
->offset_within_region
+ delta
;
1561 whpx_update_mapping(start_pa
, size
, (void *)(uintptr_t)host_va
, add
,
1562 memory_region_is_rom(mr
), mr
->name
);
1565 static void whpx_region_add(MemoryListener
*listener
,
1566 MemoryRegionSection
*section
)
1568 memory_region_ref(section
->mr
);
1569 whpx_process_section(section
, 1);
1572 static void whpx_region_del(MemoryListener
*listener
,
1573 MemoryRegionSection
*section
)
1575 whpx_process_section(section
, 0);
1576 memory_region_unref(section
->mr
);
1579 static void whpx_transaction_begin(MemoryListener
*listener
)
1583 static void whpx_transaction_commit(MemoryListener
*listener
)
1587 static void whpx_log_sync(MemoryListener
*listener
,
1588 MemoryRegionSection
*section
)
1590 MemoryRegion
*mr
= section
->mr
;
1592 if (!memory_region_is_ram(mr
)) {
1596 memory_region_set_dirty(mr
, 0, int128_get64(section
->size
));
1599 static MemoryListener whpx_memory_listener
= {
1600 .begin
= whpx_transaction_begin
,
1601 .commit
= whpx_transaction_commit
,
1602 .region_add
= whpx_region_add
,
1603 .region_del
= whpx_region_del
,
1604 .log_sync
= whpx_log_sync
,
1608 static void whpx_memory_init(void)
1610 memory_listener_register(&whpx_memory_listener
, &address_space_memory
);
1614 * Load the functions from the given library, using the given handle. If a
1615 * handle is provided, it is used, otherwise the library is opened. The
1616 * handle will be updated on return with the opened one.
1618 static bool load_whp_dispatch_fns(HMODULE
*handle
,
1619 WHPFunctionList function_list
)
1621 HMODULE hLib
= *handle
;
1623 #define WINHV_PLATFORM_DLL "WinHvPlatform.dll"
1624 #define WINHV_EMULATION_DLL "WinHvEmulation.dll"
1625 #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \
1626 whp_dispatch.function_name = \
1627 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1629 #define WHP_LOAD_FIELD(return_type, function_name, signature) \
1630 whp_dispatch.function_name = \
1631 (function_name ## _t)GetProcAddress(hLib, #function_name); \
1632 if (!whp_dispatch.function_name) { \
1633 error_report("Could not load function %s", #function_name); \
1637 #define WHP_LOAD_LIB(lib_name, handle_lib) \
1638 if (!handle_lib) { \
1639 handle_lib = LoadLibrary(lib_name); \
1640 if (!handle_lib) { \
1641 error_report("Could not load library %s.", lib_name); \
1646 switch (function_list) {
1647 case WINHV_PLATFORM_FNS_DEFAULT
:
1648 WHP_LOAD_LIB(WINHV_PLATFORM_DLL
, hLib
)
1649 LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD
)
1652 case WINHV_EMULATION_FNS_DEFAULT
:
1653 WHP_LOAD_LIB(WINHV_EMULATION_DLL
, hLib
)
1654 LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD
)
1657 case WINHV_PLATFORM_FNS_SUPPLEMENTAL
:
1658 WHP_LOAD_LIB(WINHV_PLATFORM_DLL
, hLib
)
1659 LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL
)
1674 static void whpx_set_kernel_irqchip(Object
*obj
, Visitor
*v
,
1675 const char *name
, void *opaque
,
1678 struct whpx_state
*whpx
= &whpx_global
;
1681 if (!visit_type_OnOffSplit(v
, name
, &mode
, errp
)) {
1686 case ON_OFF_SPLIT_ON
:
1687 whpx
->kernel_irqchip_allowed
= true;
1688 whpx
->kernel_irqchip_required
= true;
1691 case ON_OFF_SPLIT_OFF
:
1692 whpx
->kernel_irqchip_allowed
= false;
1693 whpx
->kernel_irqchip_required
= false;
1696 case ON_OFF_SPLIT_SPLIT
:
1697 error_setg(errp
, "WHPX: split irqchip currently not supported");
1698 error_append_hint(errp
,
1699 "Try without kernel-irqchip or with kernel-irqchip=on|off");
1704 * The value was checked in visit_type_OnOffSplit() above. If
1705 * we get here, then something is wrong in QEMU.
1715 static int whpx_accel_init(MachineState
*ms
)
1717 struct whpx_state
*whpx
;
1720 WHV_CAPABILITY whpx_cap
;
1721 UINT32 whpx_cap_size
;
1722 WHV_PARTITION_PROPERTY prop
;
1723 UINT32 cpuidExitList
[] = {1, 0x80000001};
1724 WHV_CAPABILITY_FEATURES features
= {0};
1726 whpx
= &whpx_global
;
1728 if (!init_whp_dispatch()) {
1733 whpx
->mem_quota
= ms
->ram_size
;
1735 hr
= whp_dispatch
.WHvGetCapability(
1736 WHvCapabilityCodeHypervisorPresent
, &whpx_cap
,
1737 sizeof(whpx_cap
), &whpx_cap_size
);
1738 if (FAILED(hr
) || !whpx_cap
.HypervisorPresent
) {
1739 error_report("WHPX: No accelerator found, hr=%08lx", hr
);
1744 hr
= whp_dispatch
.WHvGetCapability(
1745 WHvCapabilityCodeFeatures
, &features
, sizeof(features
), NULL
);
1747 error_report("WHPX: Failed to query capabilities, hr=%08lx", hr
);
1752 hr
= whp_dispatch
.WHvCreatePartition(&whpx
->partition
);
1754 error_report("WHPX: Failed to create partition, hr=%08lx", hr
);
1759 memset(&prop
, 0, sizeof(WHV_PARTITION_PROPERTY
));
1760 prop
.ProcessorCount
= ms
->smp
.cpus
;
1761 hr
= whp_dispatch
.WHvSetPartitionProperty(
1763 WHvPartitionPropertyCodeProcessorCount
,
1765 sizeof(WHV_PARTITION_PROPERTY
));
1768 error_report("WHPX: Failed to set partition core count to %d,"
1769 " hr=%08lx", ms
->smp
.cores
, hr
);
1775 * Error out if WHP doesn't support apic emulation and user is requiring
1778 if (whpx
->kernel_irqchip_required
&& (!features
.LocalApicEmulation
||
1779 !whp_dispatch
.WHvSetVirtualProcessorInterruptControllerState2
)) {
1780 error_report("WHPX: kernel irqchip requested, but unavailable. "
1781 "Try without kernel-irqchip or with kernel-irqchip=off");
1786 if (whpx
->kernel_irqchip_allowed
&& features
.LocalApicEmulation
&&
1787 whp_dispatch
.WHvSetVirtualProcessorInterruptControllerState2
) {
1788 WHV_X64_LOCAL_APIC_EMULATION_MODE mode
=
1789 WHvX64LocalApicEmulationModeXApic
;
1790 printf("WHPX: setting APIC emulation mode in the hypervisor\n");
1791 hr
= whp_dispatch
.WHvSetPartitionProperty(
1793 WHvPartitionPropertyCodeLocalApicEmulationMode
,
1797 error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr
);
1798 if (whpx
->kernel_irqchip_required
) {
1799 error_report("WHPX: kernel irqchip requested, but unavailable");
1804 whpx
->apic_in_platform
= true;
1808 /* Register for MSR and CPUID exits */
1809 memset(&prop
, 0, sizeof(WHV_PARTITION_PROPERTY
));
1810 prop
.ExtendedVmExits
.X64MsrExit
= 1;
1811 prop
.ExtendedVmExits
.X64CpuidExit
= 1;
1812 if (whpx_apic_in_platform()) {
1813 prop
.ExtendedVmExits
.X64ApicInitSipiExitTrap
= 1;
1816 hr
= whp_dispatch
.WHvSetPartitionProperty(
1818 WHvPartitionPropertyCodeExtendedVmExits
,
1820 sizeof(WHV_PARTITION_PROPERTY
));
1822 error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr
);
1827 hr
= whp_dispatch
.WHvSetPartitionProperty(
1829 WHvPartitionPropertyCodeCpuidExitList
,
1831 RTL_NUMBER_OF(cpuidExitList
) * sizeof(UINT32
));
1834 error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx",
1840 hr
= whp_dispatch
.WHvSetupPartition(whpx
->partition
);
1842 error_report("WHPX: Failed to setup partition, hr=%08lx", hr
);
1849 cpus_register_accel(&whpx_cpus
);
1851 printf("Windows Hypervisor Platform accelerator is operational\n");
1856 if (NULL
!= whpx
->partition
) {
1857 whp_dispatch
.WHvDeletePartition(whpx
->partition
);
1858 whpx
->partition
= NULL
;
1864 int whpx_enabled(void)
1866 return whpx_allowed
;
1869 bool whpx_apic_in_platform(void) {
1870 return whpx_global
.apic_in_platform
;
1873 static void whpx_accel_class_init(ObjectClass
*oc
, void *data
)
1875 AccelClass
*ac
= ACCEL_CLASS(oc
);
1877 ac
->init_machine
= whpx_accel_init
;
1878 ac
->allowed
= &whpx_allowed
;
1880 object_class_property_add(oc
, "kernel-irqchip", "on|off|split",
1881 NULL
, whpx_set_kernel_irqchip
,
1883 object_class_property_set_description(oc
, "kernel-irqchip",
1884 "Configure WHPX in-kernel irqchip");
1887 static void whpx_accel_instance_init(Object
*obj
)
1889 struct whpx_state
*whpx
= &whpx_global
;
1891 memset(whpx
, 0, sizeof(struct whpx_state
));
1892 /* Turn on kernel-irqchip, by default */
1893 whpx
->kernel_irqchip_allowed
= true;
1896 static const TypeInfo whpx_accel_type
= {
1897 .name
= ACCEL_CLASS_NAME("whpx"),
1898 .parent
= TYPE_ACCEL
,
1899 .instance_init
= whpx_accel_instance_init
,
1900 .class_init
= whpx_accel_class_init
,
1903 static void whpx_type_init(void)
1905 type_register_static(&whpx_accel_type
);
1908 bool init_whp_dispatch(void)
1910 if (whp_dispatch_initialized
) {
1914 if (!load_whp_dispatch_fns(&hWinHvPlatform
, WINHV_PLATFORM_FNS_DEFAULT
)) {
1918 if (!load_whp_dispatch_fns(&hWinHvEmulation
, WINHV_EMULATION_FNS_DEFAULT
)) {
1922 assert(load_whp_dispatch_fns(&hWinHvPlatform
,
1923 WINHV_PLATFORM_FNS_SUPPLEMENTAL
));
1924 whp_dispatch_initialized
= true;
1928 if (hWinHvPlatform
) {
1929 FreeLibrary(hWinHvPlatform
);
1932 if (hWinHvEmulation
) {
1933 FreeLibrary(hWinHvEmulation
);
1939 type_init(whpx_type_init
);