2 * Copyright (c) 2003-2008 Fabrice Bellard
3 * Copyright (C) 2016 Veertu Inc,
4 * Copyright (C) 2017 Google Inc,
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this program; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
26 #include "x86_descr.h"
27 #include "x86_decode.h"
28 #include "sysemu/hw_accel.h"
30 #include "hw/i386/apic_internal.h"
32 #include <Hypervisor/hv.h>
33 #include <Hypervisor/hv_vmx.h>
35 void hvf_set_segment(CPUState
*cs
, struct vmx_segment
*vmx_seg
,
36 SegmentCache
*qseg
, bool is_tr
)
38 vmx_seg
->sel
= qseg
->selector
;
39 vmx_seg
->base
= qseg
->base
;
40 vmx_seg
->limit
= qseg
->limit
;
42 if (!qseg
->selector
&& !x86_is_real(cs
) && !is_tr
) {
43 /* the TR register is usable after processor reset despite
44 * having a null selector */
45 vmx_seg
->ar
= 1 << 16;
48 vmx_seg
->ar
= (qseg
->flags
>> DESC_TYPE_SHIFT
) & 0xf;
49 vmx_seg
->ar
|= ((qseg
->flags
>> DESC_G_SHIFT
) & 1) << 15;
50 vmx_seg
->ar
|= ((qseg
->flags
>> DESC_B_SHIFT
) & 1) << 14;
51 vmx_seg
->ar
|= ((qseg
->flags
>> DESC_L_SHIFT
) & 1) << 13;
52 vmx_seg
->ar
|= ((qseg
->flags
>> DESC_AVL_SHIFT
) & 1) << 12;
53 vmx_seg
->ar
|= ((qseg
->flags
>> DESC_P_SHIFT
) & 1) << 7;
54 vmx_seg
->ar
|= ((qseg
->flags
>> DESC_DPL_SHIFT
) & 3) << 5;
55 vmx_seg
->ar
|= ((qseg
->flags
>> DESC_S_SHIFT
) & 1) << 4;
58 void hvf_get_segment(SegmentCache
*qseg
, struct vmx_segment
*vmx_seg
)
60 qseg
->limit
= vmx_seg
->limit
;
61 qseg
->base
= vmx_seg
->base
;
62 qseg
->selector
= vmx_seg
->sel
;
63 qseg
->flags
= ((vmx_seg
->ar
& 0xf) << DESC_TYPE_SHIFT
) |
64 (((vmx_seg
->ar
>> 4) & 1) << DESC_S_SHIFT
) |
65 (((vmx_seg
->ar
>> 5) & 3) << DESC_DPL_SHIFT
) |
66 (((vmx_seg
->ar
>> 7) & 1) << DESC_P_SHIFT
) |
67 (((vmx_seg
->ar
>> 12) & 1) << DESC_AVL_SHIFT
) |
68 (((vmx_seg
->ar
>> 13) & 1) << DESC_L_SHIFT
) |
69 (((vmx_seg
->ar
>> 14) & 1) << DESC_B_SHIFT
) |
70 (((vmx_seg
->ar
>> 15) & 1) << DESC_G_SHIFT
);
73 void hvf_put_xsave(CPUState
*cs
)
75 void *xsave
= X86_CPU(cs
)->env
.xsave_buf
;
76 uint32_t xsave_len
= X86_CPU(cs
)->env
.xsave_buf_len
;
78 x86_cpu_xsave_all_areas(X86_CPU(cs
), xsave
, xsave_len
);
80 if (hv_vcpu_write_fpstate(cs
->accel
->fd
, xsave
, xsave_len
)) {
85 static void hvf_put_segments(CPUState
*cs
)
87 CPUX86State
*env
= &X86_CPU(cs
)->env
;
88 struct vmx_segment seg
;
90 wvmcs(cs
->accel
->fd
, VMCS_GUEST_IDTR_LIMIT
, env
->idt
.limit
);
91 wvmcs(cs
->accel
->fd
, VMCS_GUEST_IDTR_BASE
, env
->idt
.base
);
93 wvmcs(cs
->accel
->fd
, VMCS_GUEST_GDTR_LIMIT
, env
->gdt
.limit
);
94 wvmcs(cs
->accel
->fd
, VMCS_GUEST_GDTR_BASE
, env
->gdt
.base
);
96 /* wvmcs(cs->accel->fd, VMCS_GUEST_CR2, env->cr[2]); */
97 wvmcs(cs
->accel
->fd
, VMCS_GUEST_CR3
, env
->cr
[3]);
99 wvmcs(cs
->accel
->fd
, VMCS_GUEST_IA32_EFER
, env
->efer
);
101 macvm_set_cr4(cs
->accel
->fd
, env
->cr
[4]);
102 macvm_set_cr0(cs
->accel
->fd
, env
->cr
[0]);
104 hvf_set_segment(cs
, &seg
, &env
->segs
[R_CS
], false);
105 vmx_write_segment_descriptor(cs
, &seg
, R_CS
);
107 hvf_set_segment(cs
, &seg
, &env
->segs
[R_DS
], false);
108 vmx_write_segment_descriptor(cs
, &seg
, R_DS
);
110 hvf_set_segment(cs
, &seg
, &env
->segs
[R_ES
], false);
111 vmx_write_segment_descriptor(cs
, &seg
, R_ES
);
113 hvf_set_segment(cs
, &seg
, &env
->segs
[R_SS
], false);
114 vmx_write_segment_descriptor(cs
, &seg
, R_SS
);
116 hvf_set_segment(cs
, &seg
, &env
->segs
[R_FS
], false);
117 vmx_write_segment_descriptor(cs
, &seg
, R_FS
);
119 hvf_set_segment(cs
, &seg
, &env
->segs
[R_GS
], false);
120 vmx_write_segment_descriptor(cs
, &seg
, R_GS
);
122 hvf_set_segment(cs
, &seg
, &env
->tr
, true);
123 vmx_write_segment_descriptor(cs
, &seg
, R_TR
);
125 hvf_set_segment(cs
, &seg
, &env
->ldt
, false);
126 vmx_write_segment_descriptor(cs
, &seg
, R_LDTR
);
129 void hvf_put_msrs(CPUState
*cs
)
131 CPUX86State
*env
= &X86_CPU(cs
)->env
;
133 hv_vcpu_write_msr(cs
->accel
->fd
, MSR_IA32_SYSENTER_CS
,
135 hv_vcpu_write_msr(cs
->accel
->fd
, MSR_IA32_SYSENTER_ESP
,
137 hv_vcpu_write_msr(cs
->accel
->fd
, MSR_IA32_SYSENTER_EIP
,
140 hv_vcpu_write_msr(cs
->accel
->fd
, MSR_STAR
, env
->star
);
143 hv_vcpu_write_msr(cs
->accel
->fd
, MSR_CSTAR
, env
->cstar
);
144 hv_vcpu_write_msr(cs
->accel
->fd
, MSR_KERNELGSBASE
, env
->kernelgsbase
);
145 hv_vcpu_write_msr(cs
->accel
->fd
, MSR_FMASK
, env
->fmask
);
146 hv_vcpu_write_msr(cs
->accel
->fd
, MSR_LSTAR
, env
->lstar
);
149 hv_vcpu_write_msr(cs
->accel
->fd
, MSR_GSBASE
, env
->segs
[R_GS
].base
);
150 hv_vcpu_write_msr(cs
->accel
->fd
, MSR_FSBASE
, env
->segs
[R_FS
].base
);
154 void hvf_get_xsave(CPUState
*cs
)
156 void *xsave
= X86_CPU(cs
)->env
.xsave_buf
;
157 uint32_t xsave_len
= X86_CPU(cs
)->env
.xsave_buf_len
;
159 if (hv_vcpu_read_fpstate(cs
->accel
->fd
, xsave
, xsave_len
)) {
163 x86_cpu_xrstor_all_areas(X86_CPU(cs
), xsave
, xsave_len
);
166 static void hvf_get_segments(CPUState
*cs
)
168 CPUX86State
*env
= &X86_CPU(cs
)->env
;
170 struct vmx_segment seg
;
172 env
->interrupt_injected
= -1;
174 vmx_read_segment_descriptor(cs
, &seg
, R_CS
);
175 hvf_get_segment(&env
->segs
[R_CS
], &seg
);
177 vmx_read_segment_descriptor(cs
, &seg
, R_DS
);
178 hvf_get_segment(&env
->segs
[R_DS
], &seg
);
180 vmx_read_segment_descriptor(cs
, &seg
, R_ES
);
181 hvf_get_segment(&env
->segs
[R_ES
], &seg
);
183 vmx_read_segment_descriptor(cs
, &seg
, R_FS
);
184 hvf_get_segment(&env
->segs
[R_FS
], &seg
);
186 vmx_read_segment_descriptor(cs
, &seg
, R_GS
);
187 hvf_get_segment(&env
->segs
[R_GS
], &seg
);
189 vmx_read_segment_descriptor(cs
, &seg
, R_SS
);
190 hvf_get_segment(&env
->segs
[R_SS
], &seg
);
192 vmx_read_segment_descriptor(cs
, &seg
, R_TR
);
193 hvf_get_segment(&env
->tr
, &seg
);
195 vmx_read_segment_descriptor(cs
, &seg
, R_LDTR
);
196 hvf_get_segment(&env
->ldt
, &seg
);
198 env
->idt
.limit
= rvmcs(cs
->accel
->fd
, VMCS_GUEST_IDTR_LIMIT
);
199 env
->idt
.base
= rvmcs(cs
->accel
->fd
, VMCS_GUEST_IDTR_BASE
);
200 env
->gdt
.limit
= rvmcs(cs
->accel
->fd
, VMCS_GUEST_GDTR_LIMIT
);
201 env
->gdt
.base
= rvmcs(cs
->accel
->fd
, VMCS_GUEST_GDTR_BASE
);
203 env
->cr
[0] = rvmcs(cs
->accel
->fd
, VMCS_GUEST_CR0
);
205 env
->cr
[3] = rvmcs(cs
->accel
->fd
, VMCS_GUEST_CR3
);
206 env
->cr
[4] = rvmcs(cs
->accel
->fd
, VMCS_GUEST_CR4
);
208 env
->efer
= rvmcs(cs
->accel
->fd
, VMCS_GUEST_IA32_EFER
);
211 void hvf_get_msrs(CPUState
*cs
)
213 CPUX86State
*env
= &X86_CPU(cs
)->env
;
216 hv_vcpu_read_msr(cs
->accel
->fd
, MSR_IA32_SYSENTER_CS
, &tmp
);
217 env
->sysenter_cs
= tmp
;
219 hv_vcpu_read_msr(cs
->accel
->fd
, MSR_IA32_SYSENTER_ESP
, &tmp
);
220 env
->sysenter_esp
= tmp
;
222 hv_vcpu_read_msr(cs
->accel
->fd
, MSR_IA32_SYSENTER_EIP
, &tmp
);
223 env
->sysenter_eip
= tmp
;
225 hv_vcpu_read_msr(cs
->accel
->fd
, MSR_STAR
, &env
->star
);
228 hv_vcpu_read_msr(cs
->accel
->fd
, MSR_CSTAR
, &env
->cstar
);
229 hv_vcpu_read_msr(cs
->accel
->fd
, MSR_KERNELGSBASE
, &env
->kernelgsbase
);
230 hv_vcpu_read_msr(cs
->accel
->fd
, MSR_FMASK
, &env
->fmask
);
231 hv_vcpu_read_msr(cs
->accel
->fd
, MSR_LSTAR
, &env
->lstar
);
234 hv_vcpu_read_msr(cs
->accel
->fd
, MSR_IA32_APICBASE
, &tmp
);
236 env
->tsc
= rdtscp() + rvmcs(cs
->accel
->fd
, VMCS_TSC_OFFSET
);
239 int hvf_put_registers(CPUState
*cs
)
241 X86CPU
*x86cpu
= X86_CPU(cs
);
242 CPUX86State
*env
= &x86cpu
->env
;
244 wreg(cs
->accel
->fd
, HV_X86_RAX
, env
->regs
[R_EAX
]);
245 wreg(cs
->accel
->fd
, HV_X86_RBX
, env
->regs
[R_EBX
]);
246 wreg(cs
->accel
->fd
, HV_X86_RCX
, env
->regs
[R_ECX
]);
247 wreg(cs
->accel
->fd
, HV_X86_RDX
, env
->regs
[R_EDX
]);
248 wreg(cs
->accel
->fd
, HV_X86_RBP
, env
->regs
[R_EBP
]);
249 wreg(cs
->accel
->fd
, HV_X86_RSP
, env
->regs
[R_ESP
]);
250 wreg(cs
->accel
->fd
, HV_X86_RSI
, env
->regs
[R_ESI
]);
251 wreg(cs
->accel
->fd
, HV_X86_RDI
, env
->regs
[R_EDI
]);
252 wreg(cs
->accel
->fd
, HV_X86_R8
, env
->regs
[8]);
253 wreg(cs
->accel
->fd
, HV_X86_R9
, env
->regs
[9]);
254 wreg(cs
->accel
->fd
, HV_X86_R10
, env
->regs
[10]);
255 wreg(cs
->accel
->fd
, HV_X86_R11
, env
->regs
[11]);
256 wreg(cs
->accel
->fd
, HV_X86_R12
, env
->regs
[12]);
257 wreg(cs
->accel
->fd
, HV_X86_R13
, env
->regs
[13]);
258 wreg(cs
->accel
->fd
, HV_X86_R14
, env
->regs
[14]);
259 wreg(cs
->accel
->fd
, HV_X86_R15
, env
->regs
[15]);
260 wreg(cs
->accel
->fd
, HV_X86_RFLAGS
, env
->eflags
);
261 wreg(cs
->accel
->fd
, HV_X86_RIP
, env
->eip
);
263 wreg(cs
->accel
->fd
, HV_X86_XCR0
, env
->xcr0
);
267 hvf_put_segments(cs
);
271 wreg(cs
->accel
->fd
, HV_X86_DR0
, env
->dr
[0]);
272 wreg(cs
->accel
->fd
, HV_X86_DR1
, env
->dr
[1]);
273 wreg(cs
->accel
->fd
, HV_X86_DR2
, env
->dr
[2]);
274 wreg(cs
->accel
->fd
, HV_X86_DR3
, env
->dr
[3]);
275 wreg(cs
->accel
->fd
, HV_X86_DR4
, env
->dr
[4]);
276 wreg(cs
->accel
->fd
, HV_X86_DR5
, env
->dr
[5]);
277 wreg(cs
->accel
->fd
, HV_X86_DR6
, env
->dr
[6]);
278 wreg(cs
->accel
->fd
, HV_X86_DR7
, env
->dr
[7]);
283 int hvf_get_registers(CPUState
*cs
)
285 X86CPU
*x86cpu
= X86_CPU(cs
);
286 CPUX86State
*env
= &x86cpu
->env
;
288 env
->regs
[R_EAX
] = rreg(cs
->accel
->fd
, HV_X86_RAX
);
289 env
->regs
[R_EBX
] = rreg(cs
->accel
->fd
, HV_X86_RBX
);
290 env
->regs
[R_ECX
] = rreg(cs
->accel
->fd
, HV_X86_RCX
);
291 env
->regs
[R_EDX
] = rreg(cs
->accel
->fd
, HV_X86_RDX
);
292 env
->regs
[R_EBP
] = rreg(cs
->accel
->fd
, HV_X86_RBP
);
293 env
->regs
[R_ESP
] = rreg(cs
->accel
->fd
, HV_X86_RSP
);
294 env
->regs
[R_ESI
] = rreg(cs
->accel
->fd
, HV_X86_RSI
);
295 env
->regs
[R_EDI
] = rreg(cs
->accel
->fd
, HV_X86_RDI
);
296 env
->regs
[8] = rreg(cs
->accel
->fd
, HV_X86_R8
);
297 env
->regs
[9] = rreg(cs
->accel
->fd
, HV_X86_R9
);
298 env
->regs
[10] = rreg(cs
->accel
->fd
, HV_X86_R10
);
299 env
->regs
[11] = rreg(cs
->accel
->fd
, HV_X86_R11
);
300 env
->regs
[12] = rreg(cs
->accel
->fd
, HV_X86_R12
);
301 env
->regs
[13] = rreg(cs
->accel
->fd
, HV_X86_R13
);
302 env
->regs
[14] = rreg(cs
->accel
->fd
, HV_X86_R14
);
303 env
->regs
[15] = rreg(cs
->accel
->fd
, HV_X86_R15
);
305 env
->eflags
= rreg(cs
->accel
->fd
, HV_X86_RFLAGS
);
306 env
->eip
= rreg(cs
->accel
->fd
, HV_X86_RIP
);
309 env
->xcr0
= rreg(cs
->accel
->fd
, HV_X86_XCR0
);
311 hvf_get_segments(cs
);
314 env
->dr
[0] = rreg(cs
->accel
->fd
, HV_X86_DR0
);
315 env
->dr
[1] = rreg(cs
->accel
->fd
, HV_X86_DR1
);
316 env
->dr
[2] = rreg(cs
->accel
->fd
, HV_X86_DR2
);
317 env
->dr
[3] = rreg(cs
->accel
->fd
, HV_X86_DR3
);
318 env
->dr
[4] = rreg(cs
->accel
->fd
, HV_X86_DR4
);
319 env
->dr
[5] = rreg(cs
->accel
->fd
, HV_X86_DR5
);
320 env
->dr
[6] = rreg(cs
->accel
->fd
, HV_X86_DR6
);
321 env
->dr
[7] = rreg(cs
->accel
->fd
, HV_X86_DR7
);
323 x86_update_hflags(env
);
327 static void vmx_set_int_window_exiting(CPUState
*cs
)
330 val
= rvmcs(cs
->accel
->fd
, VMCS_PRI_PROC_BASED_CTLS
);
331 wvmcs(cs
->accel
->fd
, VMCS_PRI_PROC_BASED_CTLS
, val
|
332 VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING
);
335 void vmx_clear_int_window_exiting(CPUState
*cs
)
338 val
= rvmcs(cs
->accel
->fd
, VMCS_PRI_PROC_BASED_CTLS
);
339 wvmcs(cs
->accel
->fd
, VMCS_PRI_PROC_BASED_CTLS
, val
&
340 ~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING
);
343 bool hvf_inject_interrupts(CPUState
*cs
)
345 X86CPU
*x86cpu
= X86_CPU(cs
);
346 CPUX86State
*env
= &x86cpu
->env
;
350 bool have_event
= true;
351 if (env
->interrupt_injected
!= -1) {
352 vector
= env
->interrupt_injected
;
354 intr_type
= VMCS_INTR_T_SWINTR
;
356 intr_type
= VMCS_INTR_T_HWINTR
;
358 } else if (env
->exception_nr
!= -1) {
359 vector
= env
->exception_nr
;
360 if (vector
== EXCP03_INT3
|| vector
== EXCP04_INTO
) {
361 intr_type
= VMCS_INTR_T_SWEXCEPTION
;
363 intr_type
= VMCS_INTR_T_HWEXCEPTION
;
365 } else if (env
->nmi_injected
) {
367 intr_type
= VMCS_INTR_T_NMI
;
374 info
= vector
| intr_type
| VMCS_INTR_VALID
;
375 uint64_t reason
= rvmcs(cs
->accel
->fd
, VMCS_EXIT_REASON
);
376 if (env
->nmi_injected
&& reason
!= EXIT_REASON_TASK_SWITCH
) {
377 vmx_clear_nmi_blocking(cs
);
380 if (!(env
->hflags2
& HF2_NMI_MASK
) || intr_type
!= VMCS_INTR_T_NMI
) {
381 info
&= ~(1 << 12); /* clear undefined bit */
382 if (intr_type
== VMCS_INTR_T_SWINTR
||
383 intr_type
== VMCS_INTR_T_SWEXCEPTION
) {
384 wvmcs(cs
->accel
->fd
, VMCS_ENTRY_INST_LENGTH
, env
->ins_len
);
387 if (env
->has_error_code
) {
388 wvmcs(cs
->accel
->fd
, VMCS_ENTRY_EXCEPTION_ERROR
,
390 /* Indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */
391 info
|= VMCS_INTR_DEL_ERRCODE
;
393 /*printf("reinject %lx err %d\n", info, err);*/
394 wvmcs(cs
->accel
->fd
, VMCS_ENTRY_INTR_INFO
, info
);
398 if (cs
->interrupt_request
& CPU_INTERRUPT_NMI
) {
399 if (!(env
->hflags2
& HF2_NMI_MASK
) && !(info
& VMCS_INTR_VALID
)) {
400 cs
->interrupt_request
&= ~CPU_INTERRUPT_NMI
;
401 info
= VMCS_INTR_VALID
| VMCS_INTR_T_NMI
| EXCP02_NMI
;
402 wvmcs(cs
->accel
->fd
, VMCS_ENTRY_INTR_INFO
, info
);
404 vmx_set_nmi_window_exiting(cs
);
408 if (!(env
->hflags
& HF_INHIBIT_IRQ_MASK
) &&
409 (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
410 (env
->eflags
& IF_MASK
) && !(info
& VMCS_INTR_VALID
)) {
411 int line
= cpu_get_pic_interrupt(env
);
412 cs
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
414 wvmcs(cs
->accel
->fd
, VMCS_ENTRY_INTR_INFO
, line
|
415 VMCS_INTR_VALID
| VMCS_INTR_T_HWINTR
);
418 if (cs
->interrupt_request
& CPU_INTERRUPT_HARD
) {
419 vmx_set_int_window_exiting(cs
);
421 return (cs
->interrupt_request
422 & (CPU_INTERRUPT_INIT
| CPU_INTERRUPT_TPR
));
425 int hvf_process_events(CPUState
*cs
)
427 X86CPU
*cpu
= X86_CPU(cs
);
428 CPUX86State
*env
= &cpu
->env
;
430 if (!cs
->accel
->dirty
) {
431 /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */
432 env
->eflags
= rreg(cs
->accel
->fd
, HV_X86_RFLAGS
);
435 if (cs
->interrupt_request
& CPU_INTERRUPT_INIT
) {
436 cpu_synchronize_state(cs
);
440 if (cs
->interrupt_request
& CPU_INTERRUPT_POLL
) {
441 cs
->interrupt_request
&= ~CPU_INTERRUPT_POLL
;
442 apic_poll_irq(cpu
->apic_state
);
444 if (((cs
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
445 (env
->eflags
& IF_MASK
)) ||
446 (cs
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
449 if (cs
->interrupt_request
& CPU_INTERRUPT_SIPI
) {
450 cpu_synchronize_state(cs
);
453 if (cs
->interrupt_request
& CPU_INTERRUPT_TPR
) {
454 cs
->interrupt_request
&= ~CPU_INTERRUPT_TPR
;
455 cpu_synchronize_state(cs
);
456 apic_handle_tpr_access_report(cpu
->apic_state
, env
->eip
,
457 env
->tpr_access_type
);