2 * Copyright (c) 2008 The FreeBSD Project
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
32 #include <sys/sysproto.h>
38 #include <vm/vm_extern.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_object.h>
41 #include <vm/vm_param.h>
42 #include <machine/_inttypes.h>
43 #include <machine/specialreg.h>
44 #include <machine/segments.h>
45 #include <machine/vmcb.h>
47 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
48 #define MSRPM_SIZE (8*1024)
51 static void *iopm
= NULL
; /* Should I allocate a vm_object_t instead? */
52 static void *msrpm
= NULL
; /* Should I allocate a vm_object_t instead? */
54 static void *hsave_area
= NULL
;
82 unsigned long vmcb_pa
;
84 unsigned long regs
[NR_VCPU_REGS
];
85 u_int64_t host_gs_base
;
89 struct guestvm
*guest_vm
;
95 struct vcpu
*vcpus
[MAX_VCPUS
];
103 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
104 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
105 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
106 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
107 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
108 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
110 #define TD_GET_VCPU(td) td->vcpu
112 #define TD_SET_VCPU(td, vcpu) do { \
116 #define PROC_GET_GUESTVM(p) p->p_guestvm
118 #define PROC_SET_GUESTVM(p, guestvm) do { \
119 p->p_guestvm = guestvm; \
122 #define GET_GUESTVM(thread) (TD_GET_VCPU(thread)->guest_vm)
125 print_vmcb_seg(struct vmcb_seg
* vmcb_seg
, const char* name
)
127 printf("%s Selector\n", name
);
128 printf("Selector : %" PRIx16
"\n", vmcb_seg
->selector
);
129 printf("Attributes : %" PRIx16
"\n", vmcb_seg
->attrib
);
130 printf("Limit : %" PRIx32
"\n", vmcb_seg
->limit
);
131 printf("Base Address : %" PRIx64
"\n", vmcb_seg
->base
);
136 print_vmcb(struct vmcb
*vmcb
)
138 printf("VMCB Control Area\n");
139 printf("Intercept CR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_cr_reads
);
140 printf("Intercept CR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_cr_writes
);
141 printf("Intercept DR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_dr_reads
);
142 printf("Intercept DR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_dr_writes
);
143 printf("Intercept Exceptions : %" PRIx32
"\n", vmcb
->control
.intercept_exceptions
);
144 printf("Intercepts : %" PRIx64
"\n", vmcb
->control
.intercepts
);
145 printf("Reserved 1: \n");
146 for(int i
=0; i
< 44; i
++) {
147 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
150 printf("IOPM Base PA : %" PRIx64
"\n", vmcb
->control
.iopm_base_pa
);
151 printf("MSRPM Base PA : %" PRIx64
"\n", vmcb
->control
.msrpm_base_pa
);
152 printf("TSC Offset : %" PRIx64
"\n", vmcb
->control
.tsc_offset
);
153 printf("Guest ASID : %" PRIx32
"\n", vmcb
->control
.guest_asid
);
154 printf("TLB Control : %" PRIx8
"\n", vmcb
->control
.tlb_control
);
155 printf("Reserved 2 : \n");
156 for(int i
=0; i
< 3; i
++) {
157 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
160 printf("Virtual TPR : %" PRIx8
"\n", vmcb
->control
.v_tpr
);
161 printf("Virtual IRQ : %" PRIx8
"\n", vmcb
->control
.v_irq
);
162 printf("Virtual Interrupt : %" PRIx8
"\n", vmcb
->control
.v_intr
);
163 printf("Virtual Interrupt Masking: %" PRIx8
"\n", vmcb
->control
.v_intr_masking
);
164 printf("Virtual Interrupt Vector : %" PRIx8
"\n", vmcb
->control
.v_intr_vector
);
165 printf("Reserved 6 : \n");
166 for(int i
=0; i
< 3; i
++) {
167 printf("%" PRIx8
"", vmcb
->control
.reserved_6
[i
]); /* Should be Zero */
170 printf("Interrupt Shadow : %" PRIx8
"\n", vmcb
->control
.intr_shadow
);
171 printf("Reserved 7 : \n");
172 for(int i
=0; i
< 7; i
++) {
173 printf("%" PRIx8
"", vmcb
->control
.reserved_7
[i
]); /* Should be Zero */
176 printf("Exit Code : %" PRIx64
"\n", vmcb
->control
.exit_code
);
177 printf("Exit Info 1 : %" PRIx64
"\n", vmcb
->control
.exit_info_1
);
178 printf("Exit Info 2 : %" PRIx64
"\n", vmcb
->control
.exit_info_2
);
179 printf("Exit Interrupt Info : %" PRIx32
"\n", vmcb
->control
.exit_int_info
);
180 printf("Exit Interrupt Info Err Code: %" PRIx32
"\n", vmcb
->control
.exit_int_info_err_code
);
181 printf("Nested Control : %" PRIx64
"\n", vmcb
->control
.nested_ctl
);
182 printf("Reserved 8 : \n");
183 for(int i
=0; i
< 16; i
++) {
184 printf("%" PRIx8
"", vmcb
->control
.reserved_8
[i
]); /* Should be Zero */
187 printf("Event Injection : %" PRIx64
"\n", vmcb
->control
.event_inj
);
188 printf("Nested CR3 : %" PRIx64
"\n", vmcb
->control
.nested_cr3
);
189 printf("LBR Virtualization Enable: %" PRIx64
"\n", vmcb
->control
.lbr_virt_enable
);
190 printf("Reserved 9 : \n");
191 for(int i
=0; i
< 832; i
++) {
192 printf("%" PRIx8
"", vmcb
->control
.reserved_9
[i
]); /* Should be Zero */
198 printf("VMCB Save Area\n");
199 print_vmcb_seg(&(vmcb
->save
.es
), "ES");
200 print_vmcb_seg(&(vmcb
->save
.es
), "CS");
201 print_vmcb_seg(&(vmcb
->save
.es
), "SS");
202 print_vmcb_seg(&(vmcb
->save
.es
), "DS");
203 print_vmcb_seg(&(vmcb
->save
.es
), "FS");
204 print_vmcb_seg(&(vmcb
->save
.es
), "GS");
205 print_vmcb_seg(&(vmcb
->save
.es
), "GDTR");
206 print_vmcb_seg(&(vmcb
->save
.es
), "LDTR");
207 print_vmcb_seg(&(vmcb
->save
.es
), "IDTR");
208 print_vmcb_seg(&(vmcb
->save
.es
), "TR");
209 printf("Reserved 1 : \n");
210 for(int i
=0; i
< 43; i
++) {
211 printf("%" PRIx8
"", vmcb
->save
.reserved_1
[i
]); /* Should be Zero */
214 printf("Current Processor Level : %" PRIx8
"\n", vmcb
->save
.cpl
);
215 printf("Reserved 2 : \n");
216 for(int i
=0; i
< 4; i
++) {
217 printf("%" PRIx8
"", vmcb
->save
.reserved_2
[i
]); /* Should be Zero */
220 printf("EFER : %" PRIx64
"\n", vmcb
->save
.efer
);
221 printf("Reserved 3 : \n");
222 for(int i
=0; i
< 112; i
++) {
223 printf("%" PRIx8
"", vmcb
->save
.reserved_3
[i
]); /* Should be Zero */
226 printf("Control Register 4 : %" PRIx64
"\n", vmcb
->save
.cr4
);
227 printf("Control Register 3 : %" PRIx64
"\n", vmcb
->save
.cr3
);
228 printf("Control Register 0 : %" PRIx64
"\n", vmcb
->save
.cr0
);
229 printf("Debug Register 7 : %" PRIx64
"\n", vmcb
->save
.dr7
);
230 printf("Debug Register 6 : %" PRIx64
"\n", vmcb
->save
.dr6
);
231 printf("RFlags : %" PRIx64
"\n", vmcb
->save
.rflags
);
232 printf("RIP : %" PRIx64
"\n", vmcb
->save
.rip
);
233 printf("Reserved 4 : \n");
234 for(int i
=0; i
< 88; i
++) {
235 printf("%" PRIx8
"", vmcb
->save
.reserved_4
[i
]); /* Should be Zero */
238 printf("RSP : %" PRIx64
"\n", vmcb
->save
.rsp
);
239 printf("Reserved 5 : \n");
240 for(int i
=0; i
< 24; i
++) {
241 printf("%" PRIx8
"", vmcb
->save
.reserved_5
[i
]); /* Should be Zero */
244 printf("RAX : %" PRIx64
"\n", vmcb
->save
.rax
);
245 printf("STAR : %" PRIx64
"\n", vmcb
->save
.star
);
246 printf("LSTAR : %" PRIx64
"\n", vmcb
->save
.lstar
);
247 printf("CSTAR : %" PRIx64
"\n", vmcb
->save
.cstar
);
248 printf("SFMASK : %" PRIx64
"\n", vmcb
->save
.sfmask
);
249 printf("Kernel GS Base : %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
250 printf("SYSENTER CS : %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
251 printf("SYSENTER ESP : %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
252 printf("SYSENTER EIP : %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
253 printf("Control Register 2 : %" PRIx64
"\n", vmcb
->save
.cr2
);
254 printf("Reserved 6 : \n");
255 for(int i
=0; i
< 32; i
++) {
256 printf("%" PRIx8
"", vmcb
->save
.reserved_6
[i
]); /* Should be Zero */
259 printf("Global PAT : %" PRIx64
"\n", vmcb
->save
.g_pat
);
260 printf("Debug Control : %" PRIx64
"\n", vmcb
->save
.dbg_ctl
);
261 printf("BR From : %" PRIx64
"\n", vmcb
->save
.br_from
);
262 printf("BR To : %" PRIx64
"\n", vmcb
->save
.br_to
);
263 printf("Last Exception From : %" PRIx64
"\n", vmcb
->save
.last_excp_from
);
264 printf("Last Exception To : %" PRIx64
"\n", vmcb
->save
.last_excp_to
);
271 print_tss_desc(struct system_segment_descriptor
*tss_desc
)
273 printf("TSS desc @ %p:\n", tss_desc
);
274 printf("sd_lolimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lolimit
);
275 printf("sd_lobase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lobase
);
276 printf("sd_type: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_type
);
277 printf("sd_dpl: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_dpl
);
278 printf("sd_p: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_p
);
279 printf("sd_hilimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hilimit
);
280 printf("sd_xx0: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx0
);
281 printf("sd_gran: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_gran
);
282 printf("sd_hibase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hibase
);
283 printf("sd_xx1: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx1
);
284 printf("sd_mbz: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_mbz
);
285 printf("sd_xx2: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx2
);
290 print_tss(struct system_segment_descriptor
*tss_desc
)
296 base
= (u_int32_t
*) ((((u_int64_t
) tss_desc
->sd_hibase
) << 24) | ((u_int64_t
) tss_desc
->sd_lobase
));
297 limit
= ((tss_desc
->sd_hilimit
<< 16) | tss_desc
->sd_lolimit
) / 4;
299 printf("TSS: @ %p\n", base
);
300 for (i
= 0; i
<= limit
; i
++)
301 printf("%x: 0x%" PRIx32
"\n", i
, base
[i
]);
307 print_vmcb_save_area(struct vmcb
*vmcb
)
309 printf("VMCB save area:\n");
310 printf(" cs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
311 vmcb
->save
.cs
.selector
,
312 vmcb
->save
.cs
.attrib
,
315 printf(" fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
316 vmcb
->save
.fs
.selector
,
317 vmcb
->save
.fs
.attrib
,
320 printf(" gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
321 vmcb
->save
.gs
.selector
,
322 vmcb
->save
.gs
.attrib
,
325 printf(" tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
326 vmcb
->save
.tr
.selector
,
327 vmcb
->save
.tr
.attrib
,
330 printf(" ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
331 vmcb
->save
.ldtr
.selector
,
332 vmcb
->save
.ldtr
.attrib
,
333 vmcb
->save
.ldtr
.limit
,
334 vmcb
->save
.ldtr
.base
);
335 printf(" rip: %" PRIx64
"\n", vmcb
->save
.rip
);
336 printf(" kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
337 printf(" star: %" PRIx64
"\n", vmcb
->save
.star
);
338 printf(" lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
339 printf(" cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
340 printf(" sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
341 printf(" sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
342 printf(" sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
343 printf(" sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
348 vmrun_assert(struct vmcb
*vmcb
)
350 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
356 // The following are illegal:
359 A((vmcb
->save
.efer
& 0x0000000000001000) == 0);
361 // CR0.CD is zero and CR0.NW is set
362 A( ((vmcb
->save
.cr0
& 0x0000000040000000) == 0) &&
363 ((vmcb
->save
.cr0
& 0x0000000020000000) != 0));
365 // CR0[63:32] are not zero.
366 A((vmcb
->save
.cr0
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
368 // Any MBZ bit of CR3 is set.
369 A((vmcb
->save
.cr3
& 0xFFF0000000000000) != 0);
371 // CR4[63:11] are not zero.
372 A((vmcb
->save
.cr4
& 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
374 // DR6[63:32] are not zero.
375 A((vmcb
->save
.dr6
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
377 // DR7[63:32] are not zero.
378 A((vmcb
->save
.dr7
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
380 // EFER[63:15] are not zero.
381 A((vmcb
->save
.efer
& 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
383 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
384 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
386 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
387 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
388 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
389 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0));
391 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
392 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
393 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
394 ((vmcb
->save
.cr0
& 0x0000000000000001) == 0));
396 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
397 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
398 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
399 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
400 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0) &&
401 ((vmcb
->save
.cs
.attrib
& 0x0200) != 0) &&
402 ((vmcb
->save
.cs
.attrib
& 0x0400) != 0));
404 // The VMRUN intercept bit is clear.
405 A((vmcb
->control
.intercepts
& 0x0000000100000000) == 0);
407 // The MSR or IOIO intercept tables extend to a physical address that is
408 // greater than or equal to the maximum supported physical address.
410 // Illegal event injection (see Section 15.19 on page 391).
412 // ASID is equal to zero.
413 A(vmcb
->control
.guest_asid
== 0);
415 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
416 // combination that is otherwise illegal (see Section 15.18).
418 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
419 // sign-extend to 63 bits) all base addresses in the segment registers
420 // that have been loaded.
428 fkvm_vcpu_run(struct vcpu
*vcpu
)
437 u_short ldt_selector
;
439 unsigned long host_cr2
;
440 unsigned long host_dr6
;
441 unsigned long host_dr7
;
443 struct system_segment_descriptor
*tss_desc
;
448 printf("begin fkvm_vcpu_run\n");
452 if (vmrun_assert(vmcb
))
455 tss_desc
= (struct system_segment_descriptor
*) (&gdt
[GPROC0_SEL
]);
456 sel
= GSEL(GPROC0_SEL
, SEL_KPL
);
458 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
459 // print_tss_desc(tss_desc);
460 // print_tss(tss_desc);
462 print_vmcb_save_area(vmcb
);
463 printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu
->regs
[VCPU_REGS_RIP
]);
466 vmcb
->save
.rax
= vcpu
->regs
[VCPU_REGS_RAX
];
467 vmcb
->save
.rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
468 vmcb
->save
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
470 /* meh: kvm has pre_svm_run(svm); */
472 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
473 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
477 ldt_selector
= rldt();
478 // printf("fs selector: %hx\n", fs_selector);
479 // printf("gs selector: %hx\n", gs_selector);
480 // printf("ldt selector: %hx\n", ldt_selector);
487 vmcb
->save
.cr2
= vcpu
->cr2
;
490 /* meh: dr7? db_regs? */
492 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
493 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
494 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
495 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
497 star
= rdmsr(MSR_STAR
);
498 lstar
= rdmsr(MSR_LSTAR
);
499 cstar
= rdmsr(MSR_CSTAR
);
500 sfmask
= rdmsr(MSR_SF_MASK
);
504 __asm
__volatile (SVM_CLGI
);
511 "mov %c[rbx](%[svm]), %%rbx \n\t"
512 "mov %c[rcx](%[svm]), %%rcx \n\t"
513 "mov %c[rdx](%[svm]), %%rdx \n\t"
514 "mov %c[rsi](%[svm]), %%rsi \n\t"
515 "mov %c[rdi](%[svm]), %%rdi \n\t"
516 "mov %c[rbp](%[svm]), %%rbp \n\t"
517 "mov %c[r8](%[svm]), %%r8 \n\t"
518 "mov %c[r9](%[svm]), %%r9 \n\t"
519 "mov %c[r10](%[svm]), %%r10 \n\t"
520 "mov %c[r11](%[svm]), %%r11 \n\t"
521 "mov %c[r12](%[svm]), %%r12 \n\t"
522 "mov %c[r13](%[svm]), %%r13 \n\t"
523 "mov %c[r14](%[svm]), %%r14 \n\t"
524 "mov %c[r15](%[svm]), %%r15 \n\t"
526 /* Enter guest mode */
528 "mov %c[vmcb](%[svm]), %%rax \n\t"
534 /* Save guest registers, load host registers */
535 "mov %%rbx, %c[rbx](%[svm]) \n\t"
536 "mov %%rcx, %c[rcx](%[svm]) \n\t"
537 "mov %%rdx, %c[rdx](%[svm]) \n\t"
538 "mov %%rsi, %c[rsi](%[svm]) \n\t"
539 "mov %%rdi, %c[rdi](%[svm]) \n\t"
540 "mov %%rbp, %c[rbp](%[svm]) \n\t"
541 "mov %%r8, %c[r8](%[svm]) \n\t"
542 "mov %%r9, %c[r9](%[svm]) \n\t"
543 "mov %%r10, %c[r10](%[svm]) \n\t"
544 "mov %%r11, %c[r11](%[svm]) \n\t"
545 "mov %%r12, %c[r12](%[svm]) \n\t"
546 "mov %%r13, %c[r13](%[svm]) \n\t"
547 "mov %%r14, %c[r14](%[svm]) \n\t"
548 "mov %%r15, %c[r15](%[svm]) \n\t"
552 [vmcb
]"i"(offsetof(struct vcpu
, vmcb_pa
)),
553 [rbx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBX
])),
554 [rcx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RCX
])),
555 [rdx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDX
])),
556 [rsi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RSI
])),
557 [rdi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDI
])),
558 [rbp
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBP
])),
559 [r8
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R8
])),
560 [r9
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R9
])),
561 [r10
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R10
])),
562 [r11
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R11
])),
563 [r12
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R12
])),
564 [r13
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R13
])),
565 [r14
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R14
])),
566 [r15
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R15
]))
568 "rbx", "rcx", "rdx", "rsi", "rdi",
569 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
573 /* meh: dr7? db_regs? */
575 vcpu
->cr2
= vmcb
->save
.cr2
;
577 vcpu
->regs
[VCPU_REGS_RAX
] = vmcb
->save
.rax
;
578 vcpu
->regs
[VCPU_REGS_RSP
] = vmcb
->save
.rsp
;
579 vcpu
->regs
[VCPU_REGS_RIP
] = vmcb
->save
.rip
;
586 load_fs(fs_selector
);
587 load_gs(gs_selector
);
590 wrmsr(MSR_GSBASE
, vcpu
->host_gs_base
);
592 tss_desc
->sd_type
= SDT_SYSTSS
;
595 wrmsr(MSR_STAR
, star
);
596 wrmsr(MSR_LSTAR
, lstar
);
597 wrmsr(MSR_CSTAR
, cstar
);
598 wrmsr(MSR_SF_MASK
, sfmask
);
602 __asm
__volatile (SVM_STGI
);
606 printf("exit_code: %" PRIx64
"\n", vmcb
->control
.exit_code
);
608 // print_tss_desc(tss_desc);
609 // print_tss(tss_desc);
611 print_vmcb_save_area(vmcb
);
619 _fkvm_init_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
622 seg
->attrib
= VMCB_SELECTOR_P_MASK
| attrib
;
628 fkvm_init_seg(struct vmcb_seg
*seg
)
630 _fkvm_init_seg(seg
, VMCB_SELECTOR_S_MASK
| VMCB_SELECTOR_WRITE_MASK
);
634 fkvm_init_sys_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
636 _fkvm_init_seg(seg
, attrib
);
640 fkvm_iopm_alloc(void)
642 return contigmalloc(IOPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
646 fkvm_iopm_init(void *iopm
)
648 memset(iopm
, 0xff, IOPM_SIZE
); /* TODO: we may want to allow access to PC debug port */
652 fkvm_iopm_free(void *iopm
)
654 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
658 fkvm_msrpm_alloc(void)
660 return contigmalloc(MSRPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
664 fkvm_msrpm_init(void *msrpm
)
666 memset(msrpm
, 0xff, MSRPM_SIZE
); /* TODO: we may want to allow some MSR accesses */
670 fkvm_msrpm_free(void *msrpm
)
672 contigfree(msrpm
, MSRPM_SIZE
, M_DEVBUF
);
676 fkvm_hsave_area_alloc(void)
678 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
682 fkvm_hsave_area_init(void *hsave_area
)
687 fkvm_hsave_area_free(void *hsave_area
)
689 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
692 static struct vmspace
*
693 fkvm_make_vmspace(void)
697 sp
= vmspace_alloc(0, 0xffffffffffffffff);
699 printf("vmspace_alloc failed\n");
707 fkvm_destroy_vmspace(struct vmspace
* sp
)
713 fkvm_vmcb_alloc(void)
715 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL,
720 fkvm_vmcb_init(struct vmcb
*vmcb
)
722 struct vmcb_control_area
*control
= &vmcb
->control
;
723 struct vmcb_save_area
*save
= &vmcb
->save
;
725 control
->intercept_cr_reads
= INTERCEPT_CR4_MASK
;
727 control
->intercept_cr_writes
= INTERCEPT_CR4_MASK
|
730 control
->intercept_dr_reads
= INTERCEPT_DR0_MASK
|
735 control
->intercept_dr_writes
= INTERCEPT_DR0_MASK
|
742 control
->intercept_exceptions
= (1 << IDT_UD
) | // Invalid Opcode
743 (1 << IDT_MC
); // Machine Check
745 control
->intercepts
= INTERCEPT_INTR
|
752 INTERCEPT_IOIO_PROT
|
764 INTERCEPT_MWAIT_UNCOND
;
766 control
->iopm_base_pa
= vtophys(iopm
);
767 control
->msrpm_base_pa
= vtophys(msrpm
);
768 control
->tsc_offset
= 0;
770 /* TODO: remove this once we assign asid's to distinct VM's */
771 control
->guest_asid
= 1;
772 control
->tlb_control
= VMCB_TLB_CONTROL_FLUSH_ALL
;
774 /* let v_tpr default to 0 */
775 /* let v_irq default to 0 */
776 /* let v_intr default to 0 */
778 control
->v_intr_masking
= 1;
780 /* let v_intr_vector default to 0 */
781 /* let intr_shadow default to 0 */
782 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
783 exit_int_info_err_code default to 0 */
785 control
->nested_ctl
= 1;
787 /* let event_inj default to 0 */
789 // (nested_cr3 is later)
791 /* let lbr_virt_enable default to 0 */
794 fkvm_init_seg(&save
->ds
);
795 fkvm_init_seg(&save
->es
);
796 fkvm_init_seg(&save
->fs
);
797 fkvm_init_seg(&save
->gs
);
798 fkvm_init_seg(&save
->ss
);
800 _fkvm_init_seg(&save
->cs
, VMCB_SELECTOR_READ_MASK
| VMCB_SELECTOR_S_MASK
|
801 VMCB_SELECTOR_CODE_MASK
);
802 save
->cs
.selector
= 0xf000;
803 save
->cs
.base
= 0xffff0000;
805 save
->gdtr
.limit
= 0xffff;
806 save
->idtr
.limit
= 0xffff;
808 fkvm_init_sys_seg(&save
->ldtr
, SDT_SYSLDT
);
809 fkvm_init_sys_seg(&save
->tr
, SDT_SYS286BSY
);
811 save
->g_pat
= PAT_VALUE(PAT_WRITE_BACK
, 0) | PAT_VALUE(PAT_WRITE_THROUGH
, 1) |
812 PAT_VALUE(PAT_UNCACHED
, 2) | PAT_VALUE(PAT_UNCACHEABLE
, 3) |
813 PAT_VALUE(PAT_WRITE_BACK
, 4) | PAT_VALUE(PAT_WRITE_THROUGH
, 5) |
814 PAT_VALUE(PAT_UNCACHED
, 6) | PAT_VALUE(PAT_UNCACHEABLE
, 7);
816 /* CR0 = 6000_0010h at boot */
817 save
->cr0
= CR0_ET
| CR0_NW
| CR0_CD
;
818 save
->dr6
= 0xffff0ff0;
821 save
->rip
= 0x0000fff0;
823 save
->efer
= EFER_SVME
;
827 fkvm_vmcb_free(struct vmcb
*vmcb
)
829 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
833 fkvm_vcpu_create(struct guestvm
*guest_vm
)
836 vcpu
= malloc(sizeof(struct vcpu
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
838 vcpu
->vmcb
= fkvm_vmcb_alloc();
839 vcpu
->vmcb_pa
= vtophys(vcpu
->vmcb
);
840 printf("vmcb = 0x%p\n", vcpu
->vmcb
);
841 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu
->vmcb_pa
);
843 fkvm_vmcb_init(vcpu
->vmcb
);
844 vcpu
->vmcb
->control
.nested_cr3
= guest_vm
->nested_cr3
;
845 vcpu
->regs
[VCPU_REGS_RIP
] = vcpu
->vmcb
->save
.rip
;
847 vcpu
->guest_vm
= guest_vm
;
853 fkvm_vcpu_destroy(struct vcpu
*vcpu
)
855 fkvm_vmcb_free(vcpu
->vmcb
);
856 free(vcpu
, M_DEVBUF
);
859 static struct guestvm
*
860 fkvm_guestvm_alloc(void)
862 return malloc(sizeof(struct guestvm
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
866 fkvm_guestvm_free(struct guestvm
* guest_vm
)
868 free(guest_vm
, M_DEVBUF
);
872 fkvm_guestvm_add_vcpu(struct guestvm
*guest_vm
, struct vcpu
*vcpu
)
874 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = vcpu
;
875 guest_vm
->nr_vcpus
++; /* TODO: Probably not safe to increment */
876 /* How about a lock to protect all of this? */
881 fkvm_userpoke(struct thread
*td
, struct fkvm_userpoke_args
*uap
)
883 printf("fkvm_userpoke\n");
888 fkvm_mem_has_entry(vm_map_entry_t expected_entry
, vm_map_t vm_map
, vm_offset_t vaddr
)
890 vm_map_entry_t lookup_entry
;
891 vm_object_t throwaway_object
;
892 vm_pindex_t throwaway_pindex
;
893 vm_prot_t throwaway_prot
;
894 boolean_t throwaway_wired
;
897 error
= vm_map_lookup(&vm_map
, /* IN/OUT */
899 VM_PROT_READ
|VM_PROT_WRITE
,
900 &lookup_entry
, /* OUT */
901 &throwaway_object
, /* OUT */
902 &throwaway_pindex
, /* OUT */
903 &throwaway_prot
, /* OUT */
904 &throwaway_wired
); /* OUT */
905 if (error
!= KERN_SUCCESS
)
907 vm_map_lookup_done(vm_map
, lookup_entry
);
908 return (lookup_entry
== expected_entry
);
912 fkvm_guest_check_range(struct guestvm
*guest_vm
, uint64_t start
, uint64_t end
)
914 vm_map_t guest_vm_map
;
915 vm_map_entry_t lookup_entry
;
916 vm_object_t throwaway_object
;
917 vm_pindex_t throwaway_pindex
;
918 vm_prot_t throwaway_prot
;
919 boolean_t throwaway_wired
;
923 guest_vm_map
= &guest_vm
->sp
->vm_map
;
925 error
= vm_map_lookup(&guest_vm_map
, /* IN/OUT */
927 VM_PROT_READ
|VM_PROT_WRITE
,
928 &lookup_entry
, /* OUT */
929 &throwaway_object
, /* OUT */
930 &throwaway_pindex
, /* OUT */
931 &throwaway_prot
, /* OUT */
932 &throwaway_wired
); /* OUT */
933 if (error
!= KERN_SUCCESS
)
936 if (fkvm_mem_has_entry(lookup_entry
, guest_vm_map
, end
))
941 vm_map_lookup_done(guest_vm_map
, lookup_entry
);
946 /* This function can only be called with multiples of page sizes */
947 /* vaddr as NULL overloads to fkvm_guest_check_range */
949 fkvm_set_user_mem_region(struct thread
*td
, struct fkvm_set_user_mem_region_args
*uap
)
951 struct guestvm
*guest_vm
= GET_GUESTVM(td
);
956 struct vmspace
*user_vm_space
;
957 vm_map_t user_vm_map
;
959 vm_object_t vm_object
;
960 vm_pindex_t vm_object_pindex
;
961 vm_ooffset_t vm_object_offset
;
962 vm_prot_t throwaway_prot
;
963 boolean_t throwaway_wired
;
964 vm_map_entry_t lookup_entry
;
968 start
= uap
->guest_pa
;
969 end
= uap
->guest_pa
+ uap
->size
- 1;
970 printf("start: 0x%" PRIx64
" bytes\n", start
);
971 printf("end: 0x%" PRIx64
" bytes\n", end
);
974 return fkvm_guest_check_range(guest_vm
, start
, end
);
976 user_vm_space
= td
->td_proc
->p_vmspace
;
977 user_vm_map
= &user_vm_space
->vm_map
;
978 printf("user vm space: %p\n", user_vm_space
);
979 printf("user vm map: %p\n", user_vm_map
);
981 error
= vm_map_lookup(&user_vm_map
, /* IN/OUT */
983 VM_PROT_READ
|VM_PROT_WRITE
,
984 &lookup_entry
, /* OUT */
985 &vm_object
, /* OUT */
986 &vm_object_pindex
, /* OUT */
987 &throwaway_prot
, /* OUT */
988 &throwaway_wired
); /* OUT */
989 if (error
!= KERN_SUCCESS
) {
990 printf("vm_map_lookup failed: %d\n", error
);
994 if (!fkvm_mem_has_entry(lookup_entry
, user_vm_map
, uap
->vaddr
+ uap
->size
)) {
995 printf("end of range not contained in same vm map entry as start\n");
999 printf("vm object: %p\n", vm_object
);
1000 printf(" size: %d pages\n", (int) vm_object
->size
);
1002 vm_object_offset
= IDX_TO_OFF(vm_object_pindex
);
1003 printf("vm_ooffset: 0x%" PRIx64
"\n", vm_object_offset
);
1005 vm_object_reference(vm_object
); // TODO: this might be a mem leak
1007 vm_map_lookup_done(user_vm_map
, lookup_entry
);
1009 error
= vm_map_insert(&guest_vm
->sp
->vm_map
,
1014 VM_PROT_ALL
, VM_PROT_ALL
,
1016 if (error
!= KERN_SUCCESS
) {
1017 printf("vm_map_insert failed: %d\n", error
);
1019 case KERN_INVALID_ADDRESS
:
1032 fkvm_create_vm(struct thread
*td
, struct fkvm_create_vm_args
*uap
)
1035 struct guestvm
*guest_vm
;
1037 printf("SYSCALL : fkvm_create_vm\n");
1039 /* Allocate Guest VM */
1040 guest_vm
= fkvm_guestvm_alloc();
1042 /* Set up the vm address space */
1043 guest_vm
->sp
= fkvm_make_vmspace();
1044 if (guest_vm
->sp
== NULL
) {
1045 fkvm_guestvm_free(guest_vm
);
1048 guest_vm
->nested_cr3
= vtophys(vmspace_pmap(guest_vm
->sp
)->pm_pml4
);
1051 printf(" vm space: %p\n", guest_vm
->sp
);
1052 printf(" vm map: %p\n", &guest_vm
->sp
->vm_map
);
1053 printf(" ncr3: 0x%" PRIx64
"\n", guest_vm
->nested_cr3
);
1055 /* Allocate VCPU0 */
1056 vcpu
= fkvm_vcpu_create(guest_vm
);
1057 fkvm_guestvm_add_vcpu(guest_vm
, vcpu
);
1059 PROC_SET_GUESTVM(td
->td_proc
, guest_vm
);
1060 TD_SET_VCPU(td
, vcpu
);
1062 printf("fkvm_create_vm done\n");
1067 fkvm_destroy_vm(struct thread
*td
, struct fkvm_destroy_vm_args
*uap
)
1069 struct guestvm
*guest_vm
= GET_GUESTVM(td
);
1071 /* Destroy the VCPUs */
1072 while (guest_vm
->nr_vcpus
> 0) {
1073 guest_vm
->nr_vcpus
--;
1074 fkvm_vcpu_destroy(guest_vm
->vcpus
[guest_vm
->nr_vcpus
]);
1075 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = NULL
;
1078 /* Destroy the vmspace */
1079 fkvm_destroy_vmspace(guest_vm
->sp
);
1081 /* Destroy the Guest VM itself */
1082 fkvm_guestvm_free(guest_vm
);
1088 fkvm_vm_run(struct thread
*td
, struct fkvm_vm_run_args
*uap
)
1090 struct vcpu
*vcpu
= TD_GET_VCPU(td
);
1091 struct guestvm
*guest_vm
= GET_GUESTVM(td
);
1092 struct vmcb
*vmcb
= vcpu
->vmcb
;
1095 fkvm_vcpu_run(vcpu
);
1097 switch (vmcb
->control
.exit_code
) {
1099 case VMCB_EXIT_EXCP_BASE
... (VMCB_EXIT_EXCP_BASE
+ 31): {
1102 excp_vector
= vmcb
->control
.exit_code
- VMCB_EXIT_EXCP_BASE
;
1104 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
1110 case VMCB_EXIT_INTR
: {
1111 printf("VMCB_EXIT_INTR - nothing to do\n");
1115 case VMCB_EXIT_NPF
: {
1116 /* EXITINFO1 contains fault error code */
1117 /* EXITINFO2 contains the guest physical address causing the fault. */
1119 u_int64_t fault_code
;
1120 u_int64_t fault_gpa
;
1122 vm_prot_t fault_type
;
1126 fault_code
= vmcb
->control
.exit_info_1
;
1127 fault_gpa
= vmcb
->control
.exit_info_2
;
1129 printf("VMCB_EXIT_NPF:\n");
1130 printf("gpa=0x%" PRIx64
"\n", fault_gpa
);
1131 printf("fault code=0x%" PRIx64
" [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
1133 (fault_code
& PGEX_P
) != 0,
1134 (fault_code
& PGEX_W
) != 0,
1135 (fault_code
& PGEX_U
) != 0,
1136 (fault_code
& PGEX_I
) != 0);
1138 if (fault_code
& PGEX_W
)
1139 fault_type
= VM_PROT_WRITE
;
1140 else if (fault_code
& PGEX_I
)
1141 fault_type
= VM_PROT_EXECUTE
;
1143 fault_type
= VM_PROT_READ
;
1145 fault_flags
= 0; /* TODO: is that right? */
1146 rc
= vm_fault(&guest_vm
->sp
->vm_map
, (fault_gpa
& (~PAGE_MASK
)), fault_type
, fault_flags
);
1147 if (rc
!= KERN_SUCCESS
) {
1148 printf("vm_fault failed: %d\n", rc
);
1153 printf("Unhandled vmexit:\n"
1154 " code: 0x%" PRIx64
"\n"
1155 " info1: 0x%" PRIx64
"\n"
1156 " info2: 0x%" PRIx64
"\n",
1157 vmcb
->control
.exit_code
,
1158 vmcb
->control
.exit_info_1
,
1159 vmcb
->control
.exit_info_2
);
1170 fkvm_create_vcpu(struct thread
*td
, struct fkvm_create_vcpu_args
*uap
)
1172 struct guestvm
*guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1176 vcpu
= fkvm_vcpu_create(guest_vm
);
1177 fkvm_guestvm_add_vcpu(guest_vm
, vcpu
);
1179 TD_SET_VCPU(td
, vcpu
);
1184 fkvm_load(void *unused
)
1188 printf("fkvm_load\n");
1189 printf("sizeof(struct vmcb) = %" PRIx64
"\n", sizeof(struct vmcb
));
1195 /* TODO: check for the presense of extensions */
1197 /* allocate structures */
1198 hsave_area
= fkvm_hsave_area_alloc();
1199 iopm
= fkvm_iopm_alloc();
1200 msrpm
= fkvm_msrpm_alloc();
1202 /* Initialize structures */
1203 fkvm_hsave_area_init(hsave_area
);
1204 fkvm_iopm_init(iopm
);
1205 fkvm_msrpm_init(msrpm
);
1207 /* Enable SVM in EFER */
1208 efer
= rdmsr(MSR_EFER
);
1209 printf("EFER = %" PRIx64
"\n", efer
);
1210 wrmsr(MSR_EFER
, efer
| EFER_SVME
);
1211 efer
= rdmsr(MSR_EFER
);
1212 printf("new EFER = %" PRIx64
"\n", efer
);
1214 /* Write Host save address in MSR_VM_HSAVE_PA */
1215 wrmsr(MSR_VM_HSAVE_PA
, vtophys(hsave_area
));
1217 SYSINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_load
, NULL
);
1220 fkvm_unload(void *unused
)
1222 printf("fkvm_unload\n");
1224 if (msrpm
!= NULL
) {
1225 fkvm_msrpm_free(iopm
);
1229 fkvm_iopm_free(iopm
);
1232 if (hsave_area
!= NULL
) {
1233 fkvm_hsave_area_free(hsave_area
);
1237 SYSUNINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_unload
, NULL
);