2 * Copyright (c) 2008 The FreeBSD Project
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
32 #include <sys/sysproto.h>
38 #include <vm/vm_extern.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_object.h>
41 #include <vm/vm_param.h>
42 #include <machine/_inttypes.h>
43 #include <machine/specialreg.h>
44 #include <machine/segments.h>
45 #include <machine/vmcb.h>
47 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
48 #define MSRPM_SIZE (8*1024)
51 static void *iopm
= NULL
; /* Should I allocate a vm_object_t instead? */
52 static void *msrpm
= NULL
; /* Should I allocate a vm_object_t instead? */
54 static void *hsave_area
= NULL
;
82 unsigned long vmcb_pa
;
84 unsigned long regs
[NR_VCPU_REGS
];
85 u_int64_t host_gs_base
;
89 struct guestvm
*guest_vm
;
95 struct vcpu
*vcpus
[MAX_VCPUS
];
101 u_int64_t nested_cr3
;
104 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
105 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
106 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
107 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
108 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
109 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
111 #define GET_GUESTVM(thread) (struct guestvm *)((struct vcpu *)TD_GET_VCPU(thread))->guest_vm
114 print_vmcb_seg(struct vmcb_seg
* vmcb_seg
, const char* name
)
116 printf("%s Selector\n", name
);
117 printf("Selector : %" PRIx16
"\n", vmcb_seg
->selector
);
118 printf("Attributes : %" PRIx16
"\n", vmcb_seg
->attrib
);
119 printf("Limit : %" PRIx32
"\n", vmcb_seg
->limit
);
120 printf("Base Address : %" PRIx64
"\n", vmcb_seg
->base
);
125 print_vmcb(struct vmcb
*vmcb
)
127 printf("VMCB Control Area\n");
128 printf("Intercept CR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_cr_reads
);
129 printf("Intercept CR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_cr_writes
);
130 printf("Intercept DR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_dr_reads
);
131 printf("Intercept DR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_dr_writes
);
132 printf("Intercept Exceptions : %" PRIx32
"\n", vmcb
->control
.intercept_exceptions
);
133 printf("Intercepts : %" PRIx64
"\n", vmcb
->control
.intercepts
);
134 printf("Reserved 1: \n");
135 for(int i
=0; i
< 44; i
++) {
136 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
139 printf("IOPM Base PA : %" PRIx64
"\n", vmcb
->control
.iopm_base_pa
);
140 printf("MSRPM Base PA : %" PRIx64
"\n", vmcb
->control
.msrpm_base_pa
);
141 printf("TSC Offset : %" PRIx64
"\n", vmcb
->control
.tsc_offset
);
142 printf("Guest ASID : %" PRIx32
"\n", vmcb
->control
.guest_asid
);
143 printf("TLB Control : %" PRIx8
"\n", vmcb
->control
.tlb_control
);
144 printf("Reserved 2 : \n");
145 for(int i
=0; i
< 3; i
++) {
146 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
149 printf("Virtual TPR : %" PRIx8
"\n", vmcb
->control
.v_tpr
);
150 printf("Virtual IRQ : %" PRIx8
"\n", vmcb
->control
.v_irq
);
151 printf("Virtual Interrupt : %" PRIx8
"\n", vmcb
->control
.v_intr
);
152 printf("Virtual Interrupt Masking: %" PRIx8
"\n", vmcb
->control
.v_intr_masking
);
153 printf("Virtual Interrupt Vector : %" PRIx8
"\n", vmcb
->control
.v_intr_vector
);
154 printf("Reserved 6 : \n");
155 for(int i
=0; i
< 3; i
++) {
156 printf("%" PRIx8
"", vmcb
->control
.reserved_6
[i
]); /* Should be Zero */
159 printf("Interrupt Shadow : %" PRIx8
"\n", vmcb
->control
.intr_shadow
);
160 printf("Reserved 7 : \n");
161 for(int i
=0; i
< 7; i
++) {
162 printf("%" PRIx8
"", vmcb
->control
.reserved_7
[i
]); /* Should be Zero */
165 printf("Exit Code : %" PRIx64
"\n", vmcb
->control
.exit_code
);
166 printf("Exit Info 1 : %" PRIx64
"\n", vmcb
->control
.exit_info_1
);
167 printf("Exit Info 2 : %" PRIx64
"\n", vmcb
->control
.exit_info_2
);
168 printf("Exit Interrupt Info : %" PRIx32
"\n", vmcb
->control
.exit_int_info
);
169 printf("Exit Interrupt Info Err Code: %" PRIx32
"\n", vmcb
->control
.exit_int_info_err_code
);
170 printf("Nested Control : %" PRIx64
"\n", vmcb
->control
.nested_ctl
);
171 printf("Reserved 8 : \n");
172 for(int i
=0; i
< 16; i
++) {
173 printf("%" PRIx8
"", vmcb
->control
.reserved_8
[i
]); /* Should be Zero */
176 printf("Event Injection : %" PRIx64
"\n", vmcb
->control
.event_inj
);
177 printf("Nested CR3 : %" PRIx64
"\n", vmcb
->control
.nested_cr3
);
178 printf("LBR Virtualization Enable: %" PRIx64
"\n", vmcb
->control
.lbr_virt_enable
);
179 printf("Reserved 9 : \n");
180 for(int i
=0; i
< 832; i
++) {
181 printf("%" PRIx8
"", vmcb
->control
.reserved_9
[i
]); /* Should be Zero */
187 printf("VMCB Save Area\n");
188 print_vmcb_seg(&(vmcb
->save
.es
), "ES");
189 print_vmcb_seg(&(vmcb
->save
.es
), "CS");
190 print_vmcb_seg(&(vmcb
->save
.es
), "SS");
191 print_vmcb_seg(&(vmcb
->save
.es
), "DS");
192 print_vmcb_seg(&(vmcb
->save
.es
), "FS");
193 print_vmcb_seg(&(vmcb
->save
.es
), "GS");
194 print_vmcb_seg(&(vmcb
->save
.es
), "GDTR");
195 print_vmcb_seg(&(vmcb
->save
.es
), "LDTR");
196 print_vmcb_seg(&(vmcb
->save
.es
), "IDTR");
197 print_vmcb_seg(&(vmcb
->save
.es
), "TR");
198 printf("Reserved 1 : \n");
199 for(int i
=0; i
< 43; i
++) {
200 printf("%" PRIx8
"", vmcb
->save
.reserved_1
[i
]); /* Should be Zero */
203 printf("Current Processor Level : %" PRIx8
"\n", vmcb
->save
.cpl
);
204 printf("Reserved 2 : \n");
205 for(int i
=0; i
< 4; i
++) {
206 printf("%" PRIx8
"", vmcb
->save
.reserved_2
[i
]); /* Should be Zero */
209 printf("EFER : %" PRIx64
"\n", vmcb
->save
.efer
);
210 printf("Reserved 3 : \n");
211 for(int i
=0; i
< 112; i
++) {
212 printf("%" PRIx8
"", vmcb
->save
.reserved_3
[i
]); /* Should be Zero */
215 printf("Control Register 4 : %" PRIx64
"\n", vmcb
->save
.cr4
);
216 printf("Control Register 3 : %" PRIx64
"\n", vmcb
->save
.cr3
);
217 printf("Control Register 0 : %" PRIx64
"\n", vmcb
->save
.cr0
);
218 printf("Debug Register 7 : %" PRIx64
"\n", vmcb
->save
.dr7
);
219 printf("Debug Register 6 : %" PRIx64
"\n", vmcb
->save
.dr6
);
220 printf("RFlags : %" PRIx64
"\n", vmcb
->save
.rflags
);
221 printf("RIP : %" PRIx64
"\n", vmcb
->save
.rip
);
222 printf("Reserved 4 : \n");
223 for(int i
=0; i
< 88; i
++) {
224 printf("%" PRIx8
"", vmcb
->save
.reserved_4
[i
]); /* Should be Zero */
227 printf("RSP : %" PRIx64
"\n", vmcb
->save
.rsp
);
228 printf("Reserved 5 : \n");
229 for(int i
=0; i
< 24; i
++) {
230 printf("%" PRIx8
"", vmcb
->save
.reserved_5
[i
]); /* Should be Zero */
233 printf("RAX : %" PRIx64
"\n", vmcb
->save
.rax
);
234 printf("STAR : %" PRIx64
"\n", vmcb
->save
.star
);
235 printf("LSTAR : %" PRIx64
"\n", vmcb
->save
.lstar
);
236 printf("CSTAR : %" PRIx64
"\n", vmcb
->save
.cstar
);
237 printf("SFMASK : %" PRIx64
"\n", vmcb
->save
.sfmask
);
238 printf("Kernel GS Base : %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
239 printf("SYSENTER CS : %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
240 printf("SYSENTER ESP : %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
241 printf("SYSENTER EIP : %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
242 printf("Control Register 2 : %" PRIx64
"\n", vmcb
->save
.cr2
);
243 printf("Reserved 6 : \n");
244 for(int i
=0; i
< 32; i
++) {
245 printf("%" PRIx8
"", vmcb
->save
.reserved_6
[i
]); /* Should be Zero */
248 printf("Global PAT : %" PRIx64
"\n", vmcb
->save
.g_pat
);
249 printf("Debug Control : %" PRIx64
"\n", vmcb
->save
.dbg_ctl
);
250 printf("BR From : %" PRIx64
"\n", vmcb
->save
.br_from
);
251 printf("BR To : %" PRIx64
"\n", vmcb
->save
.br_to
);
252 printf("Last Exception From : %" PRIx64
"\n", vmcb
->save
.last_excp_from
);
253 printf("Last Exception To : %" PRIx64
"\n", vmcb
->save
.last_excp_to
);
259 print_tss_desc(struct system_segment_descriptor
*tss_desc
)
261 printf("TSS desc @ %p:\n", tss_desc
);
262 printf("sd_lolimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lolimit
);
263 printf("sd_lobase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lobase
);
264 printf("sd_type: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_type
);
265 printf("sd_dpl: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_dpl
);
266 printf("sd_p: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_p
);
267 printf("sd_hilimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hilimit
);
268 printf("sd_xx0: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx0
);
269 printf("sd_gran: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_gran
);
270 printf("sd_hibase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hibase
);
271 printf("sd_xx1: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx1
);
272 printf("sd_mbz: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_mbz
);
273 printf("sd_xx2: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx2
);
278 print_tss(struct system_segment_descriptor
*tss_desc
)
284 base
= (u_int32_t
*) ((((u_int64_t
) tss_desc
->sd_hibase
) << 24) | ((u_int64_t
) tss_desc
->sd_lobase
));
285 limit
= ((tss_desc
->sd_hilimit
<< 16) | tss_desc
->sd_lolimit
) / 4;
287 printf("TSS: @ %p\n", base
);
288 for (i
= 0; i
<= limit
; i
++)
289 printf("%x: 0x%" PRIx32
"\n", i
, base
[i
]);
294 print_vmcb_save_area(struct vmcb
*vmcb
)
296 printf("VMCB save area:\n");
297 printf("fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
298 vmcb
->save
.fs
.selector
,
299 vmcb
->save
.fs
.attrib
,
302 printf("gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
303 vmcb
->save
.gs
.selector
,
304 vmcb
->save
.gs
.attrib
,
307 printf("tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
308 vmcb
->save
.tr
.selector
,
309 vmcb
->save
.tr
.attrib
,
312 printf("ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
313 vmcb
->save
.ldtr
.selector
,
314 vmcb
->save
.ldtr
.attrib
,
315 vmcb
->save
.ldtr
.limit
,
316 vmcb
->save
.ldtr
.base
);
317 printf("kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
318 printf("star: %" PRIx64
"\n", vmcb
->save
.star
);
319 printf("lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
320 printf("cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
321 printf("sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
322 printf("sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
323 printf("sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
324 printf("sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
329 vmrun_assert(struct vmcb
*vmcb
)
331 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
337 // The following are illegal:
340 A((vmcb
->save
.efer
& 0x0000000000001000) == 0);
342 // CR0.CD is zero and CR0.NW is set
343 A( ((vmcb
->save
.cr0
& 0x0000000040000000) == 0) &&
344 ((vmcb
->save
.cr0
& 0x0000000020000000) != 0));
346 // CR0[63:32] are not zero.
347 A((vmcb
->save
.cr0
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
349 // Any MBZ bit of CR3 is set.
350 A((vmcb
->save
.cr3
& 0xFFF0000000000000) != 0);
352 // CR4[63:11] are not zero.
353 A((vmcb
->save
.cr4
& 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
355 // DR6[63:32] are not zero.
356 A((vmcb
->save
.dr6
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
358 // DR7[63:32] are not zero.
359 A((vmcb
->save
.dr7
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
361 // EFER[63:15] are not zero.
362 A((vmcb
->save
.efer
& 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
364 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
365 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
367 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
368 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
369 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
370 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0));
372 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
373 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
374 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
375 ((vmcb
->save
.cr0
& 0x0000000000000001) == 0));
377 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
378 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
379 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
380 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
381 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0) &&
382 ((vmcb
->save
.cs
.attrib
& 0x0200) != 0) &&
383 ((vmcb
->save
.cs
.attrib
& 0x0400) != 0));
385 // The VMRUN intercept bit is clear.
386 A((vmcb
->control
.intercepts
& 0x0000000100000000) == 0);
388 // The MSR or IOIO intercept tables extend to a physical address that is
389 // greater than or equal to the maximum supported physical address.
391 // Illegal event injection (see Section 15.19 on page 391).
393 // ASID is equal to zero.
394 A(vmcb
->control
.guest_asid
== 0);
396 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
397 // combination that is otherwise illegal (see Section 15.18).
399 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
400 // sign-extend to 63 bits) all base addresses in the segment registers
401 // that have been loaded.
409 fkvm_vcpu_run(struct vcpu
*vcpu
, struct vmcb
*vmcb
)
418 u_short ldt_selector
;
420 unsigned long host_cr2
;
421 unsigned long host_dr6
;
422 unsigned long host_dr7
;
424 struct system_segment_descriptor
*tss_desc
;
427 printf("begin fkvm_vcpu_run\n");
429 if (vmrun_assert(vmcb
))
433 tss_desc
= (struct system_segment_descriptor
*) (&gdt
[GPROC0_SEL
]);
434 sel
= GSEL(GPROC0_SEL
, SEL_KPL
);
436 printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64
"\n", sel
);
437 print_tss_desc(tss_desc
);
440 print_vmcb_save_area(vmcb
);
443 vcpu
->vmcb_pa
= vtophys(vmcb
);
444 printf("vmcb = 0x%p\n", vmcb
);
445 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu
->vmcb_pa
);
447 vmcb
->save
.rax
= vcpu
->regs
[VCPU_REGS_RAX
];
448 vmcb
->save
.rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
449 vmcb
->save
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
451 /* meh: kvm has pre_svm_run(svm); */
453 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
454 printf("host_gs_base: 0x%" PRIx64
"\n", vcpu
->host_gs_base
);
458 ldt_selector
= rldt();
459 printf("fs selector: %hx\n", fs_selector
);
460 printf("gs selector: %hx\n", gs_selector
);
461 printf("ldt selector: %hx\n", ldt_selector
);
468 vmcb
->save
.cr2
= vcpu
->cr2
;
471 /* meh: dr7? db_regs? */
473 printf("MSR_STAR: %" PRIx64
"\n", rdmsr(MSR_STAR
));
474 printf("MSR_LSTAR: %" PRIx64
"\n", rdmsr(MSR_LSTAR
));
475 printf("MSR_CSTAR: %" PRIx64
"\n", rdmsr(MSR_CSTAR
));
476 printf("MSR_SF_MASK: %" PRIx64
"\n", rdmsr(MSR_SF_MASK
));
478 star
= rdmsr(MSR_STAR
);
479 lstar
= rdmsr(MSR_LSTAR
);
480 cstar
= rdmsr(MSR_CSTAR
);
481 sfmask
= rdmsr(MSR_SF_MASK
);
485 __asm
__volatile (SVM_CLGI
);
492 "mov %c[rbx](%[svm]), %%rbx \n\t"
493 "mov %c[rcx](%[svm]), %%rcx \n\t"
494 "mov %c[rdx](%[svm]), %%rdx \n\t"
495 "mov %c[rsi](%[svm]), %%rsi \n\t"
496 "mov %c[rdi](%[svm]), %%rdi \n\t"
497 "mov %c[rbp](%[svm]), %%rbp \n\t"
498 "mov %c[r8](%[svm]), %%r8 \n\t"
499 "mov %c[r9](%[svm]), %%r9 \n\t"
500 "mov %c[r10](%[svm]), %%r10 \n\t"
501 "mov %c[r11](%[svm]), %%r11 \n\t"
502 "mov %c[r12](%[svm]), %%r12 \n\t"
503 "mov %c[r13](%[svm]), %%r13 \n\t"
504 "mov %c[r14](%[svm]), %%r14 \n\t"
505 "mov %c[r15](%[svm]), %%r15 \n\t"
507 /* Enter guest mode */
509 "mov %c[vmcb](%[svm]), %%rax \n\t"
515 /* Save guest registers, load host registers */
516 "mov %%rbx, %c[rbx](%[svm]) \n\t"
517 "mov %%rcx, %c[rcx](%[svm]) \n\t"
518 "mov %%rdx, %c[rdx](%[svm]) \n\t"
519 "mov %%rsi, %c[rsi](%[svm]) \n\t"
520 "mov %%rdi, %c[rdi](%[svm]) \n\t"
521 "mov %%rbp, %c[rbp](%[svm]) \n\t"
522 "mov %%r8, %c[r8](%[svm]) \n\t"
523 "mov %%r9, %c[r9](%[svm]) \n\t"
524 "mov %%r10, %c[r10](%[svm]) \n\t"
525 "mov %%r11, %c[r11](%[svm]) \n\t"
526 "mov %%r12, %c[r12](%[svm]) \n\t"
527 "mov %%r13, %c[r13](%[svm]) \n\t"
528 "mov %%r14, %c[r14](%[svm]) \n\t"
529 "mov %%r15, %c[r15](%[svm]) \n\t"
533 [vmcb
]"i"(offsetof(struct vcpu
, vmcb_pa
)),
534 [rbx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBX
])),
535 [rcx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RCX
])),
536 [rdx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDX
])),
537 [rsi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RSI
])),
538 [rdi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDI
])),
539 [rbp
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBP
])),
540 [r8
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R8
])),
541 [r9
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R9
])),
542 [r10
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R10
])),
543 [r11
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R11
])),
544 [r12
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R12
])),
545 [r13
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R13
])),
546 [r14
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R14
])),
547 [r15
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R15
]))
549 "rbx", "rcx", "rdx", "rsi", "rdi",
550 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
554 /* meh: dr7? db_regs? */
556 vcpu
->cr2
= vmcb
->save
.cr2
;
558 vcpu
->regs
[VCPU_REGS_RAX
] = vmcb
->save
.rax
;
559 vcpu
->regs
[VCPU_REGS_RSP
] = vmcb
->save
.rsp
;
560 vcpu
->regs
[VCPU_REGS_RIP
] = vmcb
->save
.rip
;
567 load_fs(fs_selector
);
568 load_gs(gs_selector
);
571 wrmsr(MSR_GSBASE
, vcpu
->host_gs_base
);
573 tss_desc
->sd_type
= SDT_SYSTSS
;
576 wrmsr(MSR_STAR
, star
);
577 wrmsr(MSR_LSTAR
, lstar
);
578 wrmsr(MSR_CSTAR
, cstar
);
579 wrmsr(MSR_SF_MASK
, sfmask
);
583 __asm
__volatile (SVM_STGI
);
587 printf("exit_code: %" PRIx64
"\n", vmcb
->control
.exit_code
);
589 printf("MSR_STAR: %" PRIx64
"\n", rdmsr(MSR_STAR
));
590 printf("MSR_LSTAR: %" PRIx64
"\n", rdmsr(MSR_LSTAR
));
591 printf("MSR_CSTAR: %" PRIx64
"\n", rdmsr(MSR_CSTAR
));
592 printf("MSR_SF_MASK: %" PRIx64
"\n", rdmsr(MSR_SF_MASK
));
596 ldt_selector
= rldt();
597 printf("fs selector: %hx\n", fs_selector
);
598 printf("gs selector: %hx\n", gs_selector
);
599 printf("ldt selector: %hx\n", ldt_selector
);
601 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
602 printf("host_gs_base: 0x%" PRIx64
"\n", vcpu
->host_gs_base
);
604 print_tss_desc(tss_desc
);
607 print_vmcb_save_area(vmcb
);
615 _fkvm_init_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
618 seg
->attrib
= VMCB_SELECTOR_P_MASK
| attrib
;
624 fkvm_init_seg(struct vmcb_seg
*seg
)
626 _fkvm_init_seg(seg
, VMCB_SELECTOR_S_MASK
| VMCB_SELECTOR_WRITE_MASK
);
630 fkvm_init_sys_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
632 _fkvm_init_seg(seg
, attrib
);
636 fkvm_iopm_init(void *iopm
)
638 memset(iopm
, 0xff, IOPM_SIZE
); /* TODO: we may want to allow access to PC debug port */
642 fkvm_msrpm_init(void *msrpm
)
644 memset(msrpm
, 0xff, MSRPM_SIZE
); /* TODO: we may want to allow some MSR accesses */
649 fkvm_make_vm_map(struct vmspace
**sp
)
653 _sp
= vmspace_alloc(0, 0xffffffffffffffff);
655 printf("vmspace_alloc failed\n");
660 return vtophys(vmspace_pmap(_sp
)->pm_pml4
);
673 fkvm_vmcb_init(struct vmcb
*vmcb
)
675 struct vmcb_control_area
*control
= &vmcb
->control
;
676 struct vmcb_save_area
*save
= &vmcb
->save
;
678 control
->intercept_cr_reads
= INTERCEPT_CR4_MASK
;
680 control
->intercept_cr_writes
= INTERCEPT_CR4_MASK
|
683 control
->intercept_dr_reads
= INTERCEPT_DR0_MASK
|
688 control
->intercept_dr_writes
= INTERCEPT_DR0_MASK
|
695 control
->intercept_exceptions
= (1 << IDT_UD
) | // Invalid Opcode
696 (1 << IDT_MC
); // Machine Check
698 control
->intercepts
= INTERCEPT_INTR
|
705 INTERCEPT_IOIO_PROT
|
717 INTERCEPT_MWAIT_UNCOND
;
719 control
->iopm_base_pa
= vtophys(iopm
);
720 control
->msrpm_base_pa
= vtophys(msrpm
);
721 control
->tsc_offset
= 0;
723 /* TODO: remove this once we assign asid's to distinct VM's */
724 control
->guest_asid
= 1;
725 control
->tlb_control
= VMCB_TLB_CONTROL_FLUSH_ALL
;
727 /* let v_tpr default to 0 */
728 /* let v_irq default to 0 */
729 /* let v_intr default to 0 */
731 control
->v_intr_masking
= 1;
733 /* let v_intr_vector default to 0 */
734 /* let intr_shadow default to 0 */
735 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
736 exit_int_info_err_code default to 0 */
738 control
->nested_ctl
= 1;
740 /* let event_inj default to 0 */
742 // (nested_cr3 is later)
744 /* let lbr_virt_enable default to 0 */
747 fkvm_init_seg(&save
->ds
);
748 fkvm_init_seg(&save
->es
);
749 fkvm_init_seg(&save
->fs
);
750 fkvm_init_seg(&save
->gs
);
751 fkvm_init_seg(&save
->ss
);
753 _fkvm_init_seg(&save
->cs
, VMCB_SELECTOR_READ_MASK
| VMCB_SELECTOR_S_MASK
|
754 VMCB_SELECTOR_CODE_MASK
);
755 save
->cs
.selector
= 0xf000;
756 save
->cs
.base
= 0xffff0000;
758 save
->gdtr
.limit
= 0xffff;
759 save
->idtr
.limit
= 0xffff;
761 fkvm_init_sys_seg(&save
->ldtr
, SDT_SYSLDT
);
762 fkvm_init_sys_seg(&save
->tr
, SDT_SYS286BSY
);
764 save
->g_pat
= PAT_VALUE(PAT_WRITE_BACK
, 0) | PAT_VALUE(PAT_WRITE_THROUGH
, 1) |
765 PAT_VALUE(PAT_UNCACHED
, 2) | PAT_VALUE(PAT_UNCACHEABLE
, 3) |
766 PAT_VALUE(PAT_WRITE_BACK
, 4) | PAT_VALUE(PAT_WRITE_THROUGH
, 5) |
767 PAT_VALUE(PAT_UNCACHED
, 6) | PAT_VALUE(PAT_UNCACHEABLE
, 7);
769 /* CR0 = 6000_0010h at boot */
770 save
->cr0
= CR0_ET
| CR0_NW
| CR0_CD
;
771 save
->dr6
= 0xffff0ff0;
774 save
->rip
= 0x0000fff0;
776 save
->efer
= EFER_SVME
;
778 //control->nested_cr3 = fkvm_make_vm_map();
780 printf("ncr3: %" PRIx64
"\n", control
->nested_cr3
);
785 fkvm_userpoke(struct thread
*td
, struct fkvm_userpoke_args
*uap
)
787 printf("fkvm_userpoke\n");
792 /* This function can only be called with multiples of page sizes */
794 fkvm_set_user_mem_region(struct thread
*td
, struct fkvm_set_user_mem_region_args
*uap
)
796 struct guestvm
*guest_vm
= GET_GUESTVM(td
);
803 error
= fget(td
, uap
->fd
, &fp
);
809 printf(" size: %d bytes\n", (int) shmfd
->shm_size
);
810 printf(" vm object: %p\n", shmfd
->shm_object
);
811 printf(" size: %d pages\n", (int) shmfd
->shm_object
->size
);
813 start
= uap
->guest_pa
;
814 end
= uap
->guest_pa
+ uap
->size
;
815 printf("start: %d bytes\n", (int) start
);
816 printf("end: %d bytes\n", (int) end
);
818 vm_object_reference(shmfd
->shm_object
); // TODO: this might be a mem leak
819 error
= vm_map_insert(&guest_vm
->sp
->vm_map
,
824 VM_PROT_ALL
, VM_PROT_ALL
,
826 if (error
!= KERN_SUCCESS
) {
827 printf("vm_map_insert failed: %d\n", error
);
835 fkvm_create_vm(struct thread
*td
, struct fkvm_create_vm_args
*uap
)
838 struct guestvm
*guest_vm
;
840 printf("SYSCALL : fkvm_create_vm\n");
842 /* Allocate Guest VM */
843 guest_vm
= (struct guestvm
*)malloc(sizeof(struct guestvm
), M_DEVBUF
,
848 /* TODO: Set up the vm address space */
851 vcpu
= (struct vcpu
*)malloc(sizeof(struct vcpu
), M_DEVBUF
, M_WAITOK
);
855 guest_vm
->vcpus
[0] = vcpu
;
856 guest_vm
->nr_vcpus
= 1;
857 vcpu
->guest_vm
= guest_vm
;
859 TD_SET_VCPU(td
, vcpu
);
862 vcpu
->vmcb
= (struct vmcb
*)contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL,
864 if(vcpu
->vmcb
== NULL
)
867 /* Initialize VMCB */
868 fkvm_vmcb_init(vcpu
->vmcb
);
871 vcpu
->vmcb
->control
.nested_cr3
= fkvm_make_vm_map(&guest_vm
->sp
);
872 guest_vm
->nested_cr3
= vcpu
->vmcb
->control
.nested_cr3
;
873 printf("fkvm_create_vm done. ncr3 %" PRIx64
"\n", guest_vm
->nested_cr3
);
877 contigfree(vcpu
, PAGE_SIZE
, M_DEVBUF
);
879 contigfree(guest_vm
, PAGE_SIZE
, M_DEVBUF
);
885 fkvm_destroy_vm(struct thread
*td
, struct fkvm_destroy_vm_args
*uap
)
887 struct guestvm
*guest_vm
= GET_GUESTVM(td
);
890 /* Destroy the VCPUs */
891 for(i
= 0; i
< guest_vm
->nr_vcpus
; i
++) {
892 if(guest_vm
->vcpus
[i
] != NULL
) {
893 if(guest_vm
->vcpus
[i
]->vmcb
!= NULL
)
894 contigfree(guest_vm
->vcpus
[i
]->vmcb
, PAGE_SIZE
, M_DEVBUF
);
895 contigfree(guest_vm
->vcpus
[i
], PAGE_SIZE
, M_DEVBUF
);
899 /* Destroy the Guest VM itself */
900 contigfree(guest_vm
, PAGE_SIZE
, M_DEVBUF
);
906 fkvm_vm_run(struct thread
*td
, struct fkvm_vm_run_args
*uap
)
908 struct vcpu
*vcpu
= TD_GET_VCPU(td
);
909 struct guestvm
*guest_vm
= GET_GUESTVM(td
);
910 struct vmcb
*vmcb
= vcpu
->vmcb
;
912 fkvm_vcpu_run(vcpu
, vmcb
);
914 switch (vmcb
->control
.exit_code
) {
916 case VMCB_EXIT_EXCP_BASE
... (VMCB_EXIT_EXCP_BASE
+ 31): {
919 excp_vector
= vmcb
->control
.exit_code
- VMCB_EXIT_EXCP_BASE
;
921 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
926 case VMCB_EXIT_INTR
: {
927 printf("VMCB_EXIT_INTR - nothing to do\n");
931 case VMCB_EXIT_NPF
: {
932 /* EXITINFO1 contains fault error code */
933 /* EXITINFO2 contains the guest physical address causing the fault. */
935 u_int64_t fault_code
;
938 vm_prot_t fault_type
;
942 fault_code
= vmcb
->control
.exit_info_1
;
943 fault_gpa
= vmcb
->control
.exit_info_2
;
945 printf("VMCB_EXIT_NPF:\n");
946 printf("gpa=0x%" PRIx64
"\n", fault_gpa
);
947 printf("fault code=0x%" PRIx64
" [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
949 (fault_code
& PGEX_P
) != 0,
950 (fault_code
& PGEX_W
) != 0,
951 (fault_code
& PGEX_U
) != 0,
952 (fault_code
& PGEX_I
) != 0);
954 if (fault_code
& PGEX_W
)
955 fault_type
= VM_PROT_WRITE
;
956 else if (fault_code
& PGEX_I
)
957 fault_type
= VM_PROT_EXECUTE
;
959 fault_type
= VM_PROT_READ
;
961 fault_flags
= 0; /* TODO: is that right? */
962 rc
= vm_fault(&guest_vm
->sp
->vm_map
, fault_gpa
, fault_type
, fault_flags
);
963 if (rc
!= KERN_SUCCESS
)
964 printf("vm_fault failed: %d\n", rc
);
968 printf("Unhandled vmexit:\n"
969 " code: 0x%" PRIx64
"\n"
970 " info1: 0x%" PRIx64
"\n"
971 " info2: 0x%" PRIx64
"\n",
972 vmcb
->control
.exit_code
,
973 vmcb
->control
.exit_info_1
,
974 vmcb
->control
.exit_info_2
);
982 fkvm_create_vcpu(struct thread
*td
, struct fkvm_create_vcpu_args
*uap
)
984 struct guestvm
*guest_vm
= NULL
; /* TODO: How to get this? */
988 vcpu
= (struct vcpu
*)malloc(sizeof(struct vcpu
), M_DEVBUF
, M_WAITOK
);
992 guest_vm
->nr_vcpus
++; /* TODO: Probably not safe to increment */
993 /* How about a lock to protect all of this? */
995 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = vcpu
;
996 vcpu
->guest_vm
= guest_vm
;
1001 vcpu
->vmcb
= (struct vmcb
*)contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL,
1003 if(vcpu
->vmcb
== NULL
)
1006 /* Initialize VMCB */
1007 fkvm_vmcb_init(vcpu
->vmcb
);
1008 vcpu
->vmcb
->control
.nested_cr3
= guest_vm
->nested_cr3
;
1013 contigfree(vcpu
, PAGE_SIZE
, M_DEVBUF
);
1019 fkvm_load(void *unused
)
1023 printf("fkvm_load\n");
1024 printf("sizeof(struct vmcb) = %" PRIx64
"\n", sizeof(struct vmcb
));
1026 /* TODO: check for the presense of extensions */
1028 hsave_area
= contigmalloc(PAGE_SIZE
, M_DEVBUF
, 0, 0, -1UL,
1030 if(hsave_area
== NULL
)
1033 iopm
= contigmalloc(IOPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
1037 msrpm
= contigmalloc(MSRPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
1041 /* Initialize iopm and msrpm */
1042 fkvm_iopm_init(iopm
);
1043 fkvm_msrpm_init(msrpm
);
1045 /* Enable SVM in EFER */
1046 efer
= rdmsr(MSR_EFER
);
1047 printf("EFER = %" PRIx64
"\n", efer
);
1048 wrmsr(MSR_EFER
, efer
| EFER_SVME
);
1049 efer
= rdmsr(MSR_EFER
);
1050 printf("new EFER = %" PRIx64
"\n", efer
);
1052 /* Write Host save address in MSR_VM_HSAVE_PA */
1053 wrmsr(MSR_VM_HSAVE_PA
, vtophys(hsave_area
));
1058 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
1061 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
1064 SYSINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_load
, NULL
);
1067 fkvm_unload(void *unused
)
1069 printf("fkvm_unload\n");
1073 contigfree(msrpm
, MSRPM_SIZE
, M_DEVBUF
);
1076 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
1078 if(hsave_area
!= NULL
)
1079 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
1081 SYSUNINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_unload
, NULL
);