2 * Copyright (c) 2018-2019 Maxime Villard, All rights reserved.
4 * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU.
6 * This work is licensed under the terms of the GNU GPL, version 2 or later.
7 * See the COPYING file in the top-level directory.
10 #include "qemu/osdep.h"
12 #include "exec/address-spaces.h"
13 #include "exec/ioport.h"
14 #include "qemu-common.h"
15 #include "qemu/accel.h"
16 #include "sysemu/nvmm.h"
17 #include "sysemu/cpus.h"
18 #include "sysemu/runstate.h"
19 #include "qemu/main-loop.h"
20 #include "qemu/error-report.h"
21 #include "qapi/error.h"
22 #include "qemu/queue.h"
23 #include "migration/blocker.h"
26 #include "nvmm-accel-ops.h"
31 struct nvmm_vcpu vcpu
;
35 /* Window-exiting for INTs/NMIs. */
39 /* The guest is in an interrupt shadow (POP SS, etc). */
44 struct nvmm_capability cap
;
45 struct nvmm_machine mach
;
48 /* -------------------------------------------------------------------------- */
50 static bool nvmm_allowed
;
51 static struct qemu_machine qemu_mach
;
53 static struct qemu_vcpu
*
54 get_qemu_vcpu(CPUState
*cpu
)
56 return (struct qemu_vcpu
*)cpu
->hax_vcpu
;
59 static struct nvmm_machine
*
62 return &qemu_mach
.mach
;
65 /* -------------------------------------------------------------------------- */
68 nvmm_set_segment(struct nvmm_x64_state_seg
*nseg
, const SegmentCache
*qseg
)
70 uint32_t attrib
= qseg
->flags
;
72 nseg
->selector
= qseg
->selector
;
73 nseg
->limit
= qseg
->limit
;
74 nseg
->base
= qseg
->base
;
75 nseg
->attrib
.type
= __SHIFTOUT(attrib
, DESC_TYPE_MASK
);
76 nseg
->attrib
.s
= __SHIFTOUT(attrib
, DESC_S_MASK
);
77 nseg
->attrib
.dpl
= __SHIFTOUT(attrib
, DESC_DPL_MASK
);
78 nseg
->attrib
.p
= __SHIFTOUT(attrib
, DESC_P_MASK
);
79 nseg
->attrib
.avl
= __SHIFTOUT(attrib
, DESC_AVL_MASK
);
80 nseg
->attrib
.l
= __SHIFTOUT(attrib
, DESC_L_MASK
);
81 nseg
->attrib
.def
= __SHIFTOUT(attrib
, DESC_B_MASK
);
82 nseg
->attrib
.g
= __SHIFTOUT(attrib
, DESC_G_MASK
);
86 nvmm_set_registers(CPUState
*cpu
)
88 struct CPUX86State
*env
= (CPUArchState
*)cpu
->env_ptr
;
89 struct nvmm_machine
*mach
= get_nvmm_mach();
90 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
91 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
92 struct nvmm_x64_state
*state
= vcpu
->state
;
97 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
100 state
->gprs
[NVMM_X64_GPR_RAX
] = env
->regs
[R_EAX
];
101 state
->gprs
[NVMM_X64_GPR_RCX
] = env
->regs
[R_ECX
];
102 state
->gprs
[NVMM_X64_GPR_RDX
] = env
->regs
[R_EDX
];
103 state
->gprs
[NVMM_X64_GPR_RBX
] = env
->regs
[R_EBX
];
104 state
->gprs
[NVMM_X64_GPR_RSP
] = env
->regs
[R_ESP
];
105 state
->gprs
[NVMM_X64_GPR_RBP
] = env
->regs
[R_EBP
];
106 state
->gprs
[NVMM_X64_GPR_RSI
] = env
->regs
[R_ESI
];
107 state
->gprs
[NVMM_X64_GPR_RDI
] = env
->regs
[R_EDI
];
109 state
->gprs
[NVMM_X64_GPR_R8
] = env
->regs
[R_R8
];
110 state
->gprs
[NVMM_X64_GPR_R9
] = env
->regs
[R_R9
];
111 state
->gprs
[NVMM_X64_GPR_R10
] = env
->regs
[R_R10
];
112 state
->gprs
[NVMM_X64_GPR_R11
] = env
->regs
[R_R11
];
113 state
->gprs
[NVMM_X64_GPR_R12
] = env
->regs
[R_R12
];
114 state
->gprs
[NVMM_X64_GPR_R13
] = env
->regs
[R_R13
];
115 state
->gprs
[NVMM_X64_GPR_R14
] = env
->regs
[R_R14
];
116 state
->gprs
[NVMM_X64_GPR_R15
] = env
->regs
[R_R15
];
119 /* RIP and RFLAGS. */
120 state
->gprs
[NVMM_X64_GPR_RIP
] = env
->eip
;
121 state
->gprs
[NVMM_X64_GPR_RFLAGS
] = env
->eflags
;
124 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_CS
], &env
->segs
[R_CS
]);
125 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_DS
], &env
->segs
[R_DS
]);
126 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_ES
], &env
->segs
[R_ES
]);
127 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_FS
], &env
->segs
[R_FS
]);
128 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_GS
], &env
->segs
[R_GS
]);
129 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_SS
], &env
->segs
[R_SS
]);
131 /* Special segments. */
132 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_GDT
], &env
->gdt
);
133 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_LDT
], &env
->ldt
);
134 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_TR
], &env
->tr
);
135 nvmm_set_segment(&state
->segs
[NVMM_X64_SEG_IDT
], &env
->idt
);
137 /* Control registers. */
138 state
->crs
[NVMM_X64_CR_CR0
] = env
->cr
[0];
139 state
->crs
[NVMM_X64_CR_CR2
] = env
->cr
[2];
140 state
->crs
[NVMM_X64_CR_CR3
] = env
->cr
[3];
141 state
->crs
[NVMM_X64_CR_CR4
] = env
->cr
[4];
142 state
->crs
[NVMM_X64_CR_CR8
] = qcpu
->tpr
;
143 state
->crs
[NVMM_X64_CR_XCR0
] = env
->xcr0
;
145 /* Debug registers. */
146 state
->drs
[NVMM_X64_DR_DR0
] = env
->dr
[0];
147 state
->drs
[NVMM_X64_DR_DR1
] = env
->dr
[1];
148 state
->drs
[NVMM_X64_DR_DR2
] = env
->dr
[2];
149 state
->drs
[NVMM_X64_DR_DR3
] = env
->dr
[3];
150 state
->drs
[NVMM_X64_DR_DR6
] = env
->dr
[6];
151 state
->drs
[NVMM_X64_DR_DR7
] = env
->dr
[7];
154 state
->fpu
.fx_cw
= env
->fpuc
;
155 state
->fpu
.fx_sw
= (env
->fpus
& ~0x3800) | ((env
->fpstt
& 0x7) << 11);
156 state
->fpu
.fx_tw
= 0;
157 for (i
= 0; i
< 8; i
++) {
158 state
->fpu
.fx_tw
|= (!env
->fptags
[i
]) << i
;
160 state
->fpu
.fx_opcode
= env
->fpop
;
161 state
->fpu
.fx_ip
.fa_64
= env
->fpip
;
162 state
->fpu
.fx_dp
.fa_64
= env
->fpdp
;
163 state
->fpu
.fx_mxcsr
= env
->mxcsr
;
164 state
->fpu
.fx_mxcsr_mask
= 0x0000FFFF;
165 assert(sizeof(state
->fpu
.fx_87_ac
) == sizeof(env
->fpregs
));
166 memcpy(state
->fpu
.fx_87_ac
, env
->fpregs
, sizeof(env
->fpregs
));
167 for (i
= 0; i
< CPU_NB_REGS
; i
++) {
168 memcpy(&state
->fpu
.fx_xmm
[i
].xmm_bytes
[0],
169 &env
->xmm_regs
[i
].ZMM_Q(0), 8);
170 memcpy(&state
->fpu
.fx_xmm
[i
].xmm_bytes
[8],
171 &env
->xmm_regs
[i
].ZMM_Q(1), 8);
175 state
->msrs
[NVMM_X64_MSR_EFER
] = env
->efer
;
176 state
->msrs
[NVMM_X64_MSR_STAR
] = env
->star
;
178 state
->msrs
[NVMM_X64_MSR_LSTAR
] = env
->lstar
;
179 state
->msrs
[NVMM_X64_MSR_CSTAR
] = env
->cstar
;
180 state
->msrs
[NVMM_X64_MSR_SFMASK
] = env
->fmask
;
181 state
->msrs
[NVMM_X64_MSR_KERNELGSBASE
] = env
->kernelgsbase
;
183 state
->msrs
[NVMM_X64_MSR_SYSENTER_CS
] = env
->sysenter_cs
;
184 state
->msrs
[NVMM_X64_MSR_SYSENTER_ESP
] = env
->sysenter_esp
;
185 state
->msrs
[NVMM_X64_MSR_SYSENTER_EIP
] = env
->sysenter_eip
;
186 state
->msrs
[NVMM_X64_MSR_PAT
] = env
->pat
;
187 state
->msrs
[NVMM_X64_MSR_TSC
] = env
->tsc
;
190 NVMM_X64_STATE_SEGS
|
191 NVMM_X64_STATE_GPRS
|
194 NVMM_X64_STATE_MSRS
|
197 ret
= nvmm_vcpu_setstate(mach
, vcpu
, bitmap
);
199 error_report("NVMM: Failed to set virtual processor context,"
205 nvmm_get_segment(SegmentCache
*qseg
, const struct nvmm_x64_state_seg
*nseg
)
207 qseg
->selector
= nseg
->selector
;
208 qseg
->limit
= nseg
->limit
;
209 qseg
->base
= nseg
->base
;
212 __SHIFTIN((uint32_t)nseg
->attrib
.type
, DESC_TYPE_MASK
) |
213 __SHIFTIN((uint32_t)nseg
->attrib
.s
, DESC_S_MASK
) |
214 __SHIFTIN((uint32_t)nseg
->attrib
.dpl
, DESC_DPL_MASK
) |
215 __SHIFTIN((uint32_t)nseg
->attrib
.p
, DESC_P_MASK
) |
216 __SHIFTIN((uint32_t)nseg
->attrib
.avl
, DESC_AVL_MASK
) |
217 __SHIFTIN((uint32_t)nseg
->attrib
.l
, DESC_L_MASK
) |
218 __SHIFTIN((uint32_t)nseg
->attrib
.def
, DESC_B_MASK
) |
219 __SHIFTIN((uint32_t)nseg
->attrib
.g
, DESC_G_MASK
);
223 nvmm_get_registers(CPUState
*cpu
)
225 struct CPUX86State
*env
= (CPUArchState
*)cpu
->env_ptr
;
226 struct nvmm_machine
*mach
= get_nvmm_mach();
227 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
228 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
229 X86CPU
*x86_cpu
= X86_CPU(cpu
);
230 struct nvmm_x64_state
*state
= vcpu
->state
;
231 uint64_t bitmap
, tpr
;
235 assert(cpu_is_stopped(cpu
) || qemu_cpu_is_self(cpu
));
238 NVMM_X64_STATE_SEGS
|
239 NVMM_X64_STATE_GPRS
|
242 NVMM_X64_STATE_MSRS
|
245 ret
= nvmm_vcpu_getstate(mach
, vcpu
, bitmap
);
247 error_report("NVMM: Failed to get virtual processor context,"
252 env
->regs
[R_EAX
] = state
->gprs
[NVMM_X64_GPR_RAX
];
253 env
->regs
[R_ECX
] = state
->gprs
[NVMM_X64_GPR_RCX
];
254 env
->regs
[R_EDX
] = state
->gprs
[NVMM_X64_GPR_RDX
];
255 env
->regs
[R_EBX
] = state
->gprs
[NVMM_X64_GPR_RBX
];
256 env
->regs
[R_ESP
] = state
->gprs
[NVMM_X64_GPR_RSP
];
257 env
->regs
[R_EBP
] = state
->gprs
[NVMM_X64_GPR_RBP
];
258 env
->regs
[R_ESI
] = state
->gprs
[NVMM_X64_GPR_RSI
];
259 env
->regs
[R_EDI
] = state
->gprs
[NVMM_X64_GPR_RDI
];
261 env
->regs
[R_R8
] = state
->gprs
[NVMM_X64_GPR_R8
];
262 env
->regs
[R_R9
] = state
->gprs
[NVMM_X64_GPR_R9
];
263 env
->regs
[R_R10
] = state
->gprs
[NVMM_X64_GPR_R10
];
264 env
->regs
[R_R11
] = state
->gprs
[NVMM_X64_GPR_R11
];
265 env
->regs
[R_R12
] = state
->gprs
[NVMM_X64_GPR_R12
];
266 env
->regs
[R_R13
] = state
->gprs
[NVMM_X64_GPR_R13
];
267 env
->regs
[R_R14
] = state
->gprs
[NVMM_X64_GPR_R14
];
268 env
->regs
[R_R15
] = state
->gprs
[NVMM_X64_GPR_R15
];
271 /* RIP and RFLAGS. */
272 env
->eip
= state
->gprs
[NVMM_X64_GPR_RIP
];
273 env
->eflags
= state
->gprs
[NVMM_X64_GPR_RFLAGS
];
276 nvmm_get_segment(&env
->segs
[R_ES
], &state
->segs
[NVMM_X64_SEG_ES
]);
277 nvmm_get_segment(&env
->segs
[R_CS
], &state
->segs
[NVMM_X64_SEG_CS
]);
278 nvmm_get_segment(&env
->segs
[R_SS
], &state
->segs
[NVMM_X64_SEG_SS
]);
279 nvmm_get_segment(&env
->segs
[R_DS
], &state
->segs
[NVMM_X64_SEG_DS
]);
280 nvmm_get_segment(&env
->segs
[R_FS
], &state
->segs
[NVMM_X64_SEG_FS
]);
281 nvmm_get_segment(&env
->segs
[R_GS
], &state
->segs
[NVMM_X64_SEG_GS
]);
283 /* Special segments. */
284 nvmm_get_segment(&env
->gdt
, &state
->segs
[NVMM_X64_SEG_GDT
]);
285 nvmm_get_segment(&env
->ldt
, &state
->segs
[NVMM_X64_SEG_LDT
]);
286 nvmm_get_segment(&env
->tr
, &state
->segs
[NVMM_X64_SEG_TR
]);
287 nvmm_get_segment(&env
->idt
, &state
->segs
[NVMM_X64_SEG_IDT
]);
289 /* Control registers. */
290 env
->cr
[0] = state
->crs
[NVMM_X64_CR_CR0
];
291 env
->cr
[2] = state
->crs
[NVMM_X64_CR_CR2
];
292 env
->cr
[3] = state
->crs
[NVMM_X64_CR_CR3
];
293 env
->cr
[4] = state
->crs
[NVMM_X64_CR_CR4
];
294 tpr
= state
->crs
[NVMM_X64_CR_CR8
];
295 if (tpr
!= qcpu
->tpr
) {
297 cpu_set_apic_tpr(x86_cpu
->apic_state
, tpr
);
299 env
->xcr0
= state
->crs
[NVMM_X64_CR_XCR0
];
301 /* Debug registers. */
302 env
->dr
[0] = state
->drs
[NVMM_X64_DR_DR0
];
303 env
->dr
[1] = state
->drs
[NVMM_X64_DR_DR1
];
304 env
->dr
[2] = state
->drs
[NVMM_X64_DR_DR2
];
305 env
->dr
[3] = state
->drs
[NVMM_X64_DR_DR3
];
306 env
->dr
[6] = state
->drs
[NVMM_X64_DR_DR6
];
307 env
->dr
[7] = state
->drs
[NVMM_X64_DR_DR7
];
310 env
->fpuc
= state
->fpu
.fx_cw
;
311 env
->fpstt
= (state
->fpu
.fx_sw
>> 11) & 0x7;
312 env
->fpus
= state
->fpu
.fx_sw
& ~0x3800;
313 for (i
= 0; i
< 8; i
++) {
314 env
->fptags
[i
] = !((state
->fpu
.fx_tw
>> i
) & 1);
316 env
->fpop
= state
->fpu
.fx_opcode
;
317 env
->fpip
= state
->fpu
.fx_ip
.fa_64
;
318 env
->fpdp
= state
->fpu
.fx_dp
.fa_64
;
319 env
->mxcsr
= state
->fpu
.fx_mxcsr
;
320 assert(sizeof(state
->fpu
.fx_87_ac
) == sizeof(env
->fpregs
));
321 memcpy(env
->fpregs
, state
->fpu
.fx_87_ac
, sizeof(env
->fpregs
));
322 for (i
= 0; i
< CPU_NB_REGS
; i
++) {
323 memcpy(&env
->xmm_regs
[i
].ZMM_Q(0),
324 &state
->fpu
.fx_xmm
[i
].xmm_bytes
[0], 8);
325 memcpy(&env
->xmm_regs
[i
].ZMM_Q(1),
326 &state
->fpu
.fx_xmm
[i
].xmm_bytes
[8], 8);
330 env
->efer
= state
->msrs
[NVMM_X64_MSR_EFER
];
331 env
->star
= state
->msrs
[NVMM_X64_MSR_STAR
];
333 env
->lstar
= state
->msrs
[NVMM_X64_MSR_LSTAR
];
334 env
->cstar
= state
->msrs
[NVMM_X64_MSR_CSTAR
];
335 env
->fmask
= state
->msrs
[NVMM_X64_MSR_SFMASK
];
336 env
->kernelgsbase
= state
->msrs
[NVMM_X64_MSR_KERNELGSBASE
];
338 env
->sysenter_cs
= state
->msrs
[NVMM_X64_MSR_SYSENTER_CS
];
339 env
->sysenter_esp
= state
->msrs
[NVMM_X64_MSR_SYSENTER_ESP
];
340 env
->sysenter_eip
= state
->msrs
[NVMM_X64_MSR_SYSENTER_EIP
];
341 env
->pat
= state
->msrs
[NVMM_X64_MSR_PAT
];
342 env
->tsc
= state
->msrs
[NVMM_X64_MSR_TSC
];
344 x86_update_hflags(env
);
348 nvmm_can_take_int(CPUState
*cpu
)
350 struct CPUX86State
*env
= (CPUArchState
*)cpu
->env_ptr
;
351 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
352 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
353 struct nvmm_machine
*mach
= get_nvmm_mach();
355 if (qcpu
->int_window_exit
) {
359 if (qcpu
->int_shadow
|| !(env
->eflags
& IF_MASK
)) {
360 struct nvmm_x64_state
*state
= vcpu
->state
;
362 /* Exit on interrupt window. */
363 nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_INTR
);
364 state
->intr
.int_window_exiting
= 1;
365 nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_INTR
);
374 nvmm_can_take_nmi(CPUState
*cpu
)
376 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
379 * Contrary to INTs, NMIs always schedule an exit when they are
380 * completed. Therefore, if window-exiting is enabled, it means
383 if (qcpu
->nmi_window_exit
) {
391 * Called before the VCPU is run. We inject events generated by the I/O
392 * thread, and synchronize the guest TPR.
395 nvmm_vcpu_pre_run(CPUState
*cpu
)
397 struct CPUX86State
*env
= (CPUArchState
*)cpu
->env_ptr
;
398 struct nvmm_machine
*mach
= get_nvmm_mach();
399 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
400 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
401 X86CPU
*x86_cpu
= X86_CPU(cpu
);
402 struct nvmm_x64_state
*state
= vcpu
->state
;
403 struct nvmm_vcpu_event
*event
= vcpu
->event
;
404 bool has_event
= false;
405 bool sync_tpr
= false;
409 qemu_mutex_lock_iothread();
411 tpr
= cpu_get_apic_tpr(x86_cpu
->apic_state
);
412 if (tpr
!= qcpu
->tpr
) {
418 * Force the VCPU out of its inner loop to process any INIT requests
419 * or commit pending TPR access.
421 if (cpu
->interrupt_request
& (CPU_INTERRUPT_INIT
| CPU_INTERRUPT_TPR
)) {
422 cpu
->exit_request
= 1;
425 if (!has_event
&& (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
426 if (nvmm_can_take_nmi(cpu
)) {
427 cpu
->interrupt_request
&= ~CPU_INTERRUPT_NMI
;
428 event
->type
= NVMM_VCPU_EVENT_INTR
;
434 if (!has_event
&& (cpu
->interrupt_request
& CPU_INTERRUPT_HARD
)) {
435 if (nvmm_can_take_int(cpu
)) {
436 cpu
->interrupt_request
&= ~CPU_INTERRUPT_HARD
;
437 event
->type
= NVMM_VCPU_EVENT_INTR
;
438 event
->vector
= cpu_get_pic_interrupt(env
);
443 /* Don't want SMIs. */
444 if (cpu
->interrupt_request
& CPU_INTERRUPT_SMI
) {
445 cpu
->interrupt_request
&= ~CPU_INTERRUPT_SMI
;
449 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_CRS
);
451 error_report("NVMM: Failed to get CPU state,"
455 state
->crs
[NVMM_X64_CR_CR8
] = qcpu
->tpr
;
457 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_CRS
);
459 error_report("NVMM: Failed to set CPU state,"
465 ret
= nvmm_vcpu_inject(mach
, vcpu
);
467 error_report("NVMM: Failed to inject event,"
472 qemu_mutex_unlock_iothread();
476 * Called after the VCPU ran. We synchronize the host view of the TPR and
480 nvmm_vcpu_post_run(CPUState
*cpu
, struct nvmm_vcpu_exit
*exit
)
482 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
483 struct CPUX86State
*env
= (CPUArchState
*)cpu
->env_ptr
;
484 X86CPU
*x86_cpu
= X86_CPU(cpu
);
487 env
->eflags
= exit
->exitstate
.rflags
;
488 qcpu
->int_shadow
= exit
->exitstate
.int_shadow
;
489 qcpu
->int_window_exit
= exit
->exitstate
.int_window_exiting
;
490 qcpu
->nmi_window_exit
= exit
->exitstate
.nmi_window_exiting
;
492 tpr
= exit
->exitstate
.cr8
;
493 if (qcpu
->tpr
!= tpr
) {
495 qemu_mutex_lock_iothread();
496 cpu_set_apic_tpr(x86_cpu
->apic_state
, qcpu
->tpr
);
497 qemu_mutex_unlock_iothread();
501 /* -------------------------------------------------------------------------- */
504 nvmm_io_callback(struct nvmm_io
*io
)
506 MemTxAttrs attrs
= { 0 };
509 ret
= address_space_rw(&address_space_io
, io
->port
, attrs
, io
->data
,
511 if (ret
!= MEMTX_OK
) {
512 error_report("NVMM: I/O Transaction Failed "
513 "[%s, port=%u, size=%zu]", (io
->in
? "in" : "out"),
517 /* Needed, otherwise infinite loop. */
518 current_cpu
->vcpu_dirty
= false;
522 nvmm_mem_callback(struct nvmm_mem
*mem
)
524 cpu_physical_memory_rw(mem
->gpa
, mem
->data
, mem
->size
, mem
->write
);
526 /* Needed, otherwise infinite loop. */
527 current_cpu
->vcpu_dirty
= false;
530 static struct nvmm_assist_callbacks nvmm_callbacks
= {
531 .io
= nvmm_io_callback
,
532 .mem
= nvmm_mem_callback
535 /* -------------------------------------------------------------------------- */
538 nvmm_handle_mem(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
542 ret
= nvmm_assist_mem(mach
, vcpu
);
544 error_report("NVMM: Mem Assist Failed [gpa=%p]",
545 (void *)vcpu
->exit
->u
.mem
.gpa
);
552 nvmm_handle_io(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
556 ret
= nvmm_assist_io(mach
, vcpu
);
558 error_report("NVMM: I/O Assist Failed [port=%d]",
559 (int)vcpu
->exit
->u
.io
.port
);
566 nvmm_handle_rdmsr(struct nvmm_machine
*mach
, CPUState
*cpu
,
567 struct nvmm_vcpu_exit
*exit
)
569 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
570 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
571 X86CPU
*x86_cpu
= X86_CPU(cpu
);
572 struct nvmm_x64_state
*state
= vcpu
->state
;
576 switch (exit
->u
.rdmsr
.msr
) {
577 case MSR_IA32_APICBASE
:
578 val
= cpu_get_apic_base(x86_cpu
->apic_state
);
581 case MSR_MTRRdefType
:
586 default: /* More MSRs to add? */
588 error_report("NVMM: Unexpected RDMSR 0x%x, ignored",
593 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
598 state
->gprs
[NVMM_X64_GPR_RAX
] = (val
& 0xFFFFFFFF);
599 state
->gprs
[NVMM_X64_GPR_RDX
] = (val
>> 32);
600 state
->gprs
[NVMM_X64_GPR_RIP
] = exit
->u
.rdmsr
.npc
;
602 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
611 nvmm_handle_wrmsr(struct nvmm_machine
*mach
, CPUState
*cpu
,
612 struct nvmm_vcpu_exit
*exit
)
614 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
615 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
616 X86CPU
*x86_cpu
= X86_CPU(cpu
);
617 struct nvmm_x64_state
*state
= vcpu
->state
;
621 val
= exit
->u
.wrmsr
.val
;
623 switch (exit
->u
.wrmsr
.msr
) {
624 case MSR_IA32_APICBASE
:
625 cpu_set_apic_base(x86_cpu
->apic_state
, val
);
627 case MSR_MTRRdefType
:
630 default: /* More MSRs to add? */
631 error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored",
632 exit
->u
.wrmsr
.msr
, val
);
636 ret
= nvmm_vcpu_getstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
641 state
->gprs
[NVMM_X64_GPR_RIP
] = exit
->u
.wrmsr
.npc
;
643 ret
= nvmm_vcpu_setstate(mach
, vcpu
, NVMM_X64_STATE_GPRS
);
652 nvmm_handle_halted(struct nvmm_machine
*mach
, CPUState
*cpu
,
653 struct nvmm_vcpu_exit
*exit
)
655 struct CPUX86State
*env
= (CPUArchState
*)cpu
->env_ptr
;
658 qemu_mutex_lock_iothread();
660 if (!((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
661 (env
->eflags
& IF_MASK
)) &&
662 !(cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
663 cpu
->exception_index
= EXCP_HLT
;
668 qemu_mutex_unlock_iothread();
674 nvmm_inject_ud(struct nvmm_machine
*mach
, struct nvmm_vcpu
*vcpu
)
676 struct nvmm_vcpu_event
*event
= vcpu
->event
;
678 event
->type
= NVMM_VCPU_EVENT_EXCP
;
680 event
->u
.excp
.error
= 0;
682 return nvmm_vcpu_inject(mach
, vcpu
);
686 nvmm_vcpu_loop(CPUState
*cpu
)
688 struct CPUX86State
*env
= (CPUArchState
*)cpu
->env_ptr
;
689 struct nvmm_machine
*mach
= get_nvmm_mach();
690 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
691 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
692 X86CPU
*x86_cpu
= X86_CPU(cpu
);
693 struct nvmm_vcpu_exit
*exit
= vcpu
->exit
;
697 * Some asynchronous events must be handled outside of the inner
698 * VCPU loop. They are handled here.
700 if (cpu
->interrupt_request
& CPU_INTERRUPT_INIT
) {
701 nvmm_cpu_synchronize_state(cpu
);
702 do_cpu_init(x86_cpu
);
703 /* set int/nmi windows back to the reset state */
705 if (cpu
->interrupt_request
& CPU_INTERRUPT_POLL
) {
706 cpu
->interrupt_request
&= ~CPU_INTERRUPT_POLL
;
707 apic_poll_irq(x86_cpu
->apic_state
);
709 if (((cpu
->interrupt_request
& CPU_INTERRUPT_HARD
) &&
710 (env
->eflags
& IF_MASK
)) ||
711 (cpu
->interrupt_request
& CPU_INTERRUPT_NMI
)) {
714 if (cpu
->interrupt_request
& CPU_INTERRUPT_SIPI
) {
715 nvmm_cpu_synchronize_state(cpu
);
716 do_cpu_sipi(x86_cpu
);
718 if (cpu
->interrupt_request
& CPU_INTERRUPT_TPR
) {
719 cpu
->interrupt_request
&= ~CPU_INTERRUPT_TPR
;
720 nvmm_cpu_synchronize_state(cpu
);
721 apic_handle_tpr_access_report(x86_cpu
->apic_state
, env
->eip
,
722 env
->tpr_access_type
);
726 cpu
->exception_index
= EXCP_HLT
;
727 qatomic_set(&cpu
->exit_request
, false);
731 qemu_mutex_unlock_iothread();
738 if (cpu
->vcpu_dirty
) {
739 nvmm_set_registers(cpu
);
740 cpu
->vcpu_dirty
= false;
744 cpu
->exception_index
= EXCP_INTERRUPT
;
750 nvmm_vcpu_pre_run(cpu
);
752 if (qatomic_read(&cpu
->exit_request
)) {
753 nvmm_vcpu_stop(vcpu
);
756 /* Read exit_request before the kernel reads the immediate exit flag */
758 ret
= nvmm_vcpu_run(mach
, vcpu
);
760 error_report("NVMM: Failed to exec a virtual processor,"
765 nvmm_vcpu_post_run(cpu
, exit
);
767 switch (exit
->reason
) {
768 case NVMM_VCPU_EXIT_NONE
:
770 case NVMM_VCPU_EXIT_STOPPED
:
772 * The kernel cleared the immediate exit flag; cpu->exit_request
773 * must be cleared after
778 case NVMM_VCPU_EXIT_MEMORY
:
779 ret
= nvmm_handle_mem(mach
, vcpu
);
781 case NVMM_VCPU_EXIT_IO
:
782 ret
= nvmm_handle_io(mach
, vcpu
);
784 case NVMM_VCPU_EXIT_INT_READY
:
785 case NVMM_VCPU_EXIT_NMI_READY
:
786 case NVMM_VCPU_EXIT_TPR_CHANGED
:
788 case NVMM_VCPU_EXIT_HALTED
:
789 ret
= nvmm_handle_halted(mach
, cpu
, exit
);
791 case NVMM_VCPU_EXIT_SHUTDOWN
:
792 qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET
);
793 cpu
->exception_index
= EXCP_INTERRUPT
;
796 case NVMM_VCPU_EXIT_RDMSR
:
797 ret
= nvmm_handle_rdmsr(mach
, cpu
, exit
);
799 case NVMM_VCPU_EXIT_WRMSR
:
800 ret
= nvmm_handle_wrmsr(mach
, cpu
, exit
);
802 case NVMM_VCPU_EXIT_MONITOR
:
803 case NVMM_VCPU_EXIT_MWAIT
:
804 ret
= nvmm_inject_ud(mach
, vcpu
);
807 error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]",
808 exit
->reason
, exit
->u
.inv
.hwcode
);
809 nvmm_get_registers(cpu
);
810 qemu_mutex_lock_iothread();
811 qemu_system_guest_panicked(cpu_get_crash_info(cpu
));
812 qemu_mutex_unlock_iothread();
819 qemu_mutex_lock_iothread();
821 qatomic_set(&cpu
->exit_request
, false);
826 /* -------------------------------------------------------------------------- */
829 do_nvmm_cpu_synchronize_state(CPUState
*cpu
, run_on_cpu_data arg
)
831 nvmm_get_registers(cpu
);
832 cpu
->vcpu_dirty
= true;
836 do_nvmm_cpu_synchronize_post_reset(CPUState
*cpu
, run_on_cpu_data arg
)
838 nvmm_set_registers(cpu
);
839 cpu
->vcpu_dirty
= false;
843 do_nvmm_cpu_synchronize_post_init(CPUState
*cpu
, run_on_cpu_data arg
)
845 nvmm_set_registers(cpu
);
846 cpu
->vcpu_dirty
= false;
850 do_nvmm_cpu_synchronize_pre_loadvm(CPUState
*cpu
, run_on_cpu_data arg
)
852 cpu
->vcpu_dirty
= true;
855 void nvmm_cpu_synchronize_state(CPUState
*cpu
)
857 if (!cpu
->vcpu_dirty
) {
858 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_state
, RUN_ON_CPU_NULL
);
862 void nvmm_cpu_synchronize_post_reset(CPUState
*cpu
)
864 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_post_reset
, RUN_ON_CPU_NULL
);
867 void nvmm_cpu_synchronize_post_init(CPUState
*cpu
)
869 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_post_init
, RUN_ON_CPU_NULL
);
872 void nvmm_cpu_synchronize_pre_loadvm(CPUState
*cpu
)
874 run_on_cpu(cpu
, do_nvmm_cpu_synchronize_pre_loadvm
, RUN_ON_CPU_NULL
);
877 /* -------------------------------------------------------------------------- */
879 static Error
*nvmm_migration_blocker
;
882 * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM
883 * and another thread signaling the vCPU thread to exit.
887 nvmm_ipi_signal(int sigcpu
)
890 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(current_cpu
);
891 struct nvmm_vcpu
*vcpu
= &qcpu
->vcpu
;
892 nvmm_vcpu_stop(vcpu
);
897 nvmm_init_cpu_signals(void)
899 struct sigaction sigact
;
902 /* Install the IPI handler. */
903 memset(&sigact
, 0, sizeof(sigact
));
904 sigact
.sa_handler
= nvmm_ipi_signal
;
905 sigaction(SIG_IPI
, &sigact
, NULL
);
907 /* Allow IPIs on the current thread. */
908 sigprocmask(SIG_BLOCK
, NULL
, &set
);
909 sigdelset(&set
, SIG_IPI
);
910 pthread_sigmask(SIG_SETMASK
, &set
, NULL
);
914 nvmm_init_vcpu(CPUState
*cpu
)
916 struct nvmm_machine
*mach
= get_nvmm_mach();
917 struct nvmm_vcpu_conf_cpuid cpuid
;
918 struct nvmm_vcpu_conf_tpr tpr
;
919 Error
*local_error
= NULL
;
920 struct qemu_vcpu
*qcpu
;
923 nvmm_init_cpu_signals();
925 if (nvmm_migration_blocker
== NULL
) {
926 error_setg(&nvmm_migration_blocker
,
927 "NVMM: Migration not supported");
929 (void)migrate_add_blocker(nvmm_migration_blocker
, &local_error
);
931 error_report_err(local_error
);
932 migrate_del_blocker(nvmm_migration_blocker
);
933 error_free(nvmm_migration_blocker
);
938 qcpu
= g_malloc0(sizeof(*qcpu
));
940 error_report("NVMM: Failed to allocate VCPU context.");
944 ret
= nvmm_vcpu_create(mach
, cpu
->cpu_index
, &qcpu
->vcpu
);
947 error_report("NVMM: Failed to create a virtual processor,"
953 memset(&cpuid
, 0, sizeof(cpuid
));
955 cpuid
.leaf
= 0x00000001;
956 cpuid
.u
.mask
.set
.edx
= CPUID_MCE
| CPUID_MCA
| CPUID_MTRR
;
957 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_CPUID
,
961 error_report("NVMM: Failed to configure a virtual processor,"
967 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_CALLBACKS
,
971 error_report("NVMM: Failed to configure a virtual processor,"
977 if (qemu_mach
.cap
.arch
.vcpu_conf_support
& NVMM_CAP_ARCH_VCPU_CONF_TPR
) {
978 memset(&tpr
, 0, sizeof(tpr
));
979 tpr
.exit_changed
= 1;
980 ret
= nvmm_vcpu_configure(mach
, &qcpu
->vcpu
, NVMM_VCPU_CONF_TPR
, &tpr
);
983 error_report("NVMM: Failed to configure a virtual processor,"
990 cpu
->vcpu_dirty
= true;
991 cpu
->hax_vcpu
= (struct hax_vcpu_state
*)qcpu
;
997 nvmm_vcpu_exec(CPUState
*cpu
)
1002 if (cpu
->exception_index
>= EXCP_INTERRUPT
) {
1003 ret
= cpu
->exception_index
;
1004 cpu
->exception_index
= -1;
1008 fatal
= nvmm_vcpu_loop(cpu
);
1011 error_report("NVMM: Failed to execute a VCPU.");
1020 nvmm_destroy_vcpu(CPUState
*cpu
)
1022 struct nvmm_machine
*mach
= get_nvmm_mach();
1023 struct qemu_vcpu
*qcpu
= get_qemu_vcpu(cpu
);
1025 nvmm_vcpu_destroy(mach
, &qcpu
->vcpu
);
1026 g_free(cpu
->hax_vcpu
);
1029 /* -------------------------------------------------------------------------- */
1032 nvmm_update_mapping(hwaddr start_pa
, ram_addr_t size
, uintptr_t hva
,
1033 bool add
, bool rom
, const char *name
)
1035 struct nvmm_machine
*mach
= get_nvmm_mach();
1039 prot
= PROT_READ
| PROT_EXEC
;
1043 ret
= nvmm_gpa_map(mach
, hva
, start_pa
, size
, prot
);
1045 ret
= nvmm_gpa_unmap(mach
, hva
, start_pa
, size
);
1049 error_report("NVMM: Failed to %s GPA range '%s' PA:%p, "
1050 "Size:%p bytes, HostVA:%p, error=%d",
1051 (add
? "map" : "unmap"), name
, (void *)(uintptr_t)start_pa
,
1052 (void *)size
, (void *)hva
, errno
);
1057 nvmm_process_section(MemoryRegionSection
*section
, int add
)
1059 MemoryRegion
*mr
= section
->mr
;
1060 hwaddr start_pa
= section
->offset_within_address_space
;
1061 ram_addr_t size
= int128_get64(section
->size
);
1065 if (!memory_region_is_ram(mr
)) {
1069 /* Adjust start_pa and size so that they are page-aligned. */
1070 delta
= qemu_real_host_page_size
- (start_pa
& ~qemu_real_host_page_mask
);
1071 delta
&= ~qemu_real_host_page_mask
;
1077 size
&= qemu_real_host_page_mask
;
1078 if (!size
|| (start_pa
& ~qemu_real_host_page_mask
)) {
1082 hva
= (uintptr_t)memory_region_get_ram_ptr(mr
) +
1083 section
->offset_within_region
+ delta
;
1085 nvmm_update_mapping(start_pa
, size
, hva
, add
,
1086 memory_region_is_rom(mr
), mr
->name
);
1090 nvmm_region_add(MemoryListener
*listener
, MemoryRegionSection
*section
)
1092 memory_region_ref(section
->mr
);
1093 nvmm_process_section(section
, 1);
1097 nvmm_region_del(MemoryListener
*listener
, MemoryRegionSection
*section
)
1099 nvmm_process_section(section
, 0);
1100 memory_region_unref(section
->mr
);
1104 nvmm_transaction_begin(MemoryListener
*listener
)
1110 nvmm_transaction_commit(MemoryListener
*listener
)
1116 nvmm_log_sync(MemoryListener
*listener
, MemoryRegionSection
*section
)
1118 MemoryRegion
*mr
= section
->mr
;
1120 if (!memory_region_is_ram(mr
)) {
1124 memory_region_set_dirty(mr
, 0, int128_get64(section
->size
));
1127 static MemoryListener nvmm_memory_listener
= {
1128 .begin
= nvmm_transaction_begin
,
1129 .commit
= nvmm_transaction_commit
,
1130 .region_add
= nvmm_region_add
,
1131 .region_del
= nvmm_region_del
,
1132 .log_sync
= nvmm_log_sync
,
1137 nvmm_ram_block_added(RAMBlockNotifier
*n
, void *host
, size_t size
)
1139 struct nvmm_machine
*mach
= get_nvmm_mach();
1140 uintptr_t hva
= (uintptr_t)host
;
1143 ret
= nvmm_hva_map(mach
, hva
, size
);
1146 error_report("NVMM: Failed to map HVA, HostVA:%p "
1147 "Size:%p bytes, error=%d",
1148 (void *)hva
, (void *)size
, errno
);
1152 static struct RAMBlockNotifier nvmm_ram_notifier
= {
1153 .ram_block_added
= nvmm_ram_block_added
1156 /* -------------------------------------------------------------------------- */
1159 nvmm_accel_init(MachineState
*ms
)
1166 error_report("NVMM: Initialization failed, error=%d", errno
);
1170 ret
= nvmm_capability(&qemu_mach
.cap
);
1173 error_report("NVMM: Unable to fetch capability, error=%d", errno
);
1176 if (qemu_mach
.cap
.version
< NVMM_KERN_VERSION
) {
1177 error_report("NVMM: Unsupported version %u", qemu_mach
.cap
.version
);
1178 return -EPROGMISMATCH
;
1180 if (qemu_mach
.cap
.state_size
!= sizeof(struct nvmm_x64_state
)) {
1181 error_report("NVMM: Wrong state size %u", qemu_mach
.cap
.state_size
);
1182 return -EPROGMISMATCH
;
1185 ret
= nvmm_machine_create(&qemu_mach
.mach
);
1188 error_report("NVMM: Machine creation failed, error=%d", errno
);
1192 memory_listener_register(&nvmm_memory_listener
, &address_space_memory
);
1193 ram_block_notifier_add(&nvmm_ram_notifier
);
1195 printf("NetBSD Virtual Machine Monitor accelerator is operational\n");
1202 return nvmm_allowed
;
1206 nvmm_accel_class_init(ObjectClass
*oc
, void *data
)
1208 AccelClass
*ac
= ACCEL_CLASS(oc
);
1210 ac
->init_machine
= nvmm_accel_init
;
1211 ac
->allowed
= &nvmm_allowed
;
1214 static const TypeInfo nvmm_accel_type
= {
1215 .name
= ACCEL_CLASS_NAME("nvmm"),
1216 .parent
= TYPE_ACCEL
,
1217 .class_init
= nvmm_accel_class_init
,
1221 nvmm_type_init(void)
1223 type_register_static(&nvmm_accel_type
);
1226 type_init(nvmm_type_init
);