add print_vmcb_save_area()
[freebsd-src/fkvm-freebsd.git] / sys / kern / kern_fkvm.c
blobcfccd288c8ff308dcc9cc8846cded08d06e7bccd
1 /*-
2 * Copyright (c) 2008 The FreeBSD Project
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
32 #include <sys/sysproto.h>
33 #include <sys/file.h>
34 #include <sys/mman.h>
35 #include <sys/proc.h>
36 #include <vm/vm.h>
37 #include <vm/pmap.h>
38 #include <vm/vm_extern.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_object.h>
41 #include <vm/vm_param.h>
42 #include <machine/_inttypes.h>
43 #include <machine/specialreg.h>
44 #include <machine/segments.h>
45 #include <machine/vmcb.h>
47 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
48 #define MSRPM_SIZE (8*1024)
50 /* fkvm data */
51 static void *iopm = NULL; /* Should I allocate a vm_object_t instead? */
52 static void *msrpm = NULL; /* Should I allocate a vm_object_t instead? */
54 static void *hsave_area = NULL;
56 /* per-guest data */
58 enum {
59 VCPU_REGS_RAX = 0,
60 VCPU_REGS_RCX = 1,
61 VCPU_REGS_RDX = 2,
62 VCPU_REGS_RBX = 3,
63 VCPU_REGS_RSP = 4,
64 VCPU_REGS_RBP = 5,
65 VCPU_REGS_RSI = 6,
66 VCPU_REGS_RDI = 7,
67 VCPU_REGS_R8 = 8,
68 VCPU_REGS_R9 = 9,
69 VCPU_REGS_R10 = 10,
70 VCPU_REGS_R11 = 11,
71 VCPU_REGS_R12 = 12,
72 VCPU_REGS_R13 = 13,
73 VCPU_REGS_R14 = 14,
74 VCPU_REGS_R15 = 15,
75 VCPU_REGS_RIP,
76 NR_VCPU_REGS
79 struct vcpu {
80 /* VCPU data */
81 struct vmcb *vmcb;
82 unsigned long vmcb_pa;
84 unsigned long regs[NR_VCPU_REGS];
85 u_int64_t host_gs_base;
86 u_int64_t cr2;
87 u_int64_t cr3;
89 struct guestvm *guest_vm;
92 #define MAX_VCPUS 8
94 struct guestvm {
95 struct vcpu *vcpus[MAX_VCPUS];
96 int nr_vcpus;
98 struct vmspace *sp;
99 vm_object_t vm_obj;
101 u_int64_t nested_cr3;
104 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
105 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
106 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
107 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
108 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
109 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
111 #define GET_GUESTVM(thread) (struct guestvm *)((struct vcpu *)TD_GET_VCPU(thread))->guest_vm
113 static void
114 print_vmcb_seg(struct vmcb_seg* vmcb_seg, const char* name)
116 printf("%s Selector\n", name);
117 printf("Selector : %" PRIx16 "\n", vmcb_seg->selector);
118 printf("Attributes : %" PRIx16 "\n", vmcb_seg->attrib);
119 printf("Limit : %" PRIx32 "\n", vmcb_seg->limit);
120 printf("Base Address : %" PRIx64 "\n", vmcb_seg->base);
121 printf("\n");
124 static void
125 print_vmcb(struct vmcb *vmcb)
127 printf("VMCB Control Area\n");
128 printf("Intercept CR Reads : %" PRIx16 "\n", vmcb->control.intercept_cr_reads);
129 printf("Intercept CR Writes : %" PRIx16 "\n", vmcb->control.intercept_cr_writes);
130 printf("Intercept DR Reads : %" PRIx16 "\n", vmcb->control.intercept_dr_reads);
131 printf("Intercept DR Writes : %" PRIx16 "\n", vmcb->control.intercept_dr_writes);
132 printf("Intercept Exceptions : %" PRIx32 "\n", vmcb->control.intercept_exceptions);
133 printf("Intercepts : %" PRIx64 "\n", vmcb->control.intercepts);
134 printf("Reserved 1: \n");
135 for(int i=0; i < 44; i++) {
136 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
138 printf("\n");
139 printf("IOPM Base PA : %" PRIx64 "\n", vmcb->control.iopm_base_pa);
140 printf("MSRPM Base PA : %" PRIx64 "\n", vmcb->control.msrpm_base_pa);
141 printf("TSC Offset : %" PRIx64 "\n", vmcb->control.tsc_offset);
142 printf("Guest ASID : %" PRIx32 "\n", vmcb->control.guest_asid);
143 printf("TLB Control : %" PRIx8 "\n", vmcb->control.tlb_control);
144 printf("Reserved 2 : \n");
145 for(int i=0; i < 3; i++) {
146 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
148 printf("\n");
149 printf("Virtual TPR : %" PRIx8 "\n", vmcb->control.v_tpr);
150 printf("Virtual IRQ : %" PRIx8 "\n", vmcb->control.v_irq);
151 printf("Virtual Interrupt : %" PRIx8 "\n", vmcb->control.v_intr);
152 printf("Virtual Interrupt Masking: %" PRIx8 "\n", vmcb->control.v_intr_masking);
153 printf("Virtual Interrupt Vector : %" PRIx8 "\n", vmcb->control.v_intr_vector);
154 printf("Reserved 6 : \n");
155 for(int i=0; i < 3; i++) {
156 printf("%" PRIx8 "", vmcb->control.reserved_6[i]); /* Should be Zero */
158 printf("\n");
159 printf("Interrupt Shadow : %" PRIx8 "\n", vmcb->control.intr_shadow);
160 printf("Reserved 7 : \n");
161 for(int i=0; i < 7; i++) {
162 printf("%" PRIx8 "", vmcb->control.reserved_7[i]); /* Should be Zero */
164 printf("\n");
165 printf("Exit Code : %" PRIx64 "\n", vmcb->control.exit_code);
166 printf("Exit Info 1 : %" PRIx64 "\n", vmcb->control.exit_info_1);
167 printf("Exit Info 2 : %" PRIx64 "\n", vmcb->control.exit_info_2);
168 printf("Exit Interrupt Info : %" PRIx32 "\n", vmcb->control.exit_int_info);
169 printf("Exit Interrupt Info Err Code: %" PRIx32 "\n", vmcb->control.exit_int_info_err_code);
170 printf("Nested Control : %" PRIx64 "\n", vmcb->control.nested_ctl);
171 printf("Reserved 8 : \n");
172 for(int i=0; i < 16; i++) {
173 printf("%" PRIx8 "", vmcb->control.reserved_8[i]); /* Should be Zero */
175 printf("\n");
176 printf("Event Injection : %" PRIx64 "\n", vmcb->control.event_inj);
177 printf("Nested CR3 : %" PRIx64 "\n", vmcb->control.nested_cr3);
178 printf("LBR Virtualization Enable: %" PRIx64 "\n", vmcb->control.lbr_virt_enable);
179 printf("Reserved 9 : \n");
180 for(int i=0; i < 832; i++) {
181 printf("%" PRIx8 "", vmcb->control.reserved_9[i]); /* Should be Zero */
183 printf("\n");
185 printf("\n");
187 printf("VMCB Save Area\n");
188 print_vmcb_seg(&(vmcb->save.es), "ES");
189 print_vmcb_seg(&(vmcb->save.es), "CS");
190 print_vmcb_seg(&(vmcb->save.es), "SS");
191 print_vmcb_seg(&(vmcb->save.es), "DS");
192 print_vmcb_seg(&(vmcb->save.es), "FS");
193 print_vmcb_seg(&(vmcb->save.es), "GS");
194 print_vmcb_seg(&(vmcb->save.es), "GDTR");
195 print_vmcb_seg(&(vmcb->save.es), "LDTR");
196 print_vmcb_seg(&(vmcb->save.es), "IDTR");
197 print_vmcb_seg(&(vmcb->save.es), "TR");
198 printf("Reserved 1 : \n");
199 for(int i=0; i < 43; i++) {
200 printf("%" PRIx8 "", vmcb->save.reserved_1[i]); /* Should be Zero */
202 printf("\n");
203 printf("Current Processor Level : %" PRIx8 "\n", vmcb->save.cpl);
204 printf("Reserved 2 : \n");
205 for(int i=0; i < 4; i++) {
206 printf("%" PRIx8 "", vmcb->save.reserved_2[i]); /* Should be Zero */
208 printf("\n");
209 printf("EFER : %" PRIx64 "\n", vmcb->save.efer);
210 printf("Reserved 3 : \n");
211 for(int i=0; i < 112; i++) {
212 printf("%" PRIx8 "", vmcb->save.reserved_3[i]); /* Should be Zero */
214 printf("\n");
215 printf("Control Register 4 : %" PRIx64 "\n", vmcb->save.cr4);
216 printf("Control Register 3 : %" PRIx64 "\n", vmcb->save.cr3);
217 printf("Control Register 0 : %" PRIx64 "\n", vmcb->save.cr0);
218 printf("Debug Register 7 : %" PRIx64 "\n", vmcb->save.dr7);
219 printf("Debug Register 6 : %" PRIx64 "\n", vmcb->save.dr6);
220 printf("RFlags : %" PRIx64 "\n", vmcb->save.rflags);
221 printf("RIP : %" PRIx64 "\n", vmcb->save.rip);
222 printf("Reserved 4 : \n");
223 for(int i=0; i < 88; i++) {
224 printf("%" PRIx8 "", vmcb->save.reserved_4[i]); /* Should be Zero */
226 printf("\n");
227 printf("RSP : %" PRIx64 "\n", vmcb->save.rsp);
228 printf("Reserved 5 : \n");
229 for(int i=0; i < 24; i++) {
230 printf("%" PRIx8 "", vmcb->save.reserved_5[i]); /* Should be Zero */
232 printf("\n");
233 printf("RAX : %" PRIx64 "\n", vmcb->save.rax);
234 printf("STAR : %" PRIx64 "\n", vmcb->save.star);
235 printf("LSTAR : %" PRIx64 "\n", vmcb->save.lstar);
236 printf("CSTAR : %" PRIx64 "\n", vmcb->save.cstar);
237 printf("SFMASK : %" PRIx64 "\n", vmcb->save.sfmask);
238 printf("Kernel GS Base : %" PRIx64 "\n", vmcb->save.kernel_gs_base);
239 printf("SYSENTER CS : %" PRIx64 "\n", vmcb->save.sysenter_cs);
240 printf("SYSENTER ESP : %" PRIx64 "\n", vmcb->save.sysenter_esp);
241 printf("SYSENTER EIP : %" PRIx64 "\n", vmcb->save.sysenter_eip);
242 printf("Control Register 2 : %" PRIx64 "\n", vmcb->save.cr2);
243 printf("Reserved 6 : \n");
244 for(int i=0; i < 32; i++) {
245 printf("%" PRIx8 "", vmcb->save.reserved_6[i]); /* Should be Zero */
247 printf("\n");
248 printf("Global PAT : %" PRIx64 "\n", vmcb->save.g_pat);
249 printf("Debug Control : %" PRIx64 "\n", vmcb->save.dbg_ctl);
250 printf("BR From : %" PRIx64 "\n", vmcb->save.br_from);
251 printf("BR To : %" PRIx64 "\n", vmcb->save.br_to);
252 printf("Last Exception From : %" PRIx64 "\n", vmcb->save.last_excp_from);
253 printf("Last Exception To : %" PRIx64 "\n", vmcb->save.last_excp_to);
255 printf("\n\n");
258 static void
259 print_tss_desc(struct system_segment_descriptor *tss_desc)
261 printf("TSS desc @ %p:\n", tss_desc);
262 printf("sd_lolimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lolimit);
263 printf("sd_lobase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lobase);
264 printf("sd_type: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_type);
265 printf("sd_dpl: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_dpl);
266 printf("sd_p: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_p);
267 printf("sd_hilimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hilimit);
268 printf("sd_xx0: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx0);
269 printf("sd_gran: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_gran);
270 printf("sd_hibase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hibase);
271 printf("sd_xx1: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx1);
272 printf("sd_mbz: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_mbz);
273 printf("sd_xx2: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx2);
274 printf("\n\n");
277 static void
278 print_tss(struct system_segment_descriptor *tss_desc)
280 u_int32_t *base;
281 int limit;
282 int i;
284 base = (u_int32_t*) ((((u_int64_t) tss_desc->sd_hibase) << 24) | ((u_int64_t) tss_desc->sd_lobase));
285 limit = ((tss_desc->sd_hilimit << 16) | tss_desc->sd_lolimit) / 4;
287 printf("TSS: @ %p\n", base);
288 for (i = 0; i <= limit; i++)
289 printf("%x: 0x%" PRIx32 "\n", i, base[i]);
290 printf("\n\n");
293 static void
294 print_vmcb_save_area(struct vmcb *vmcb)
296 printf("VMCB save area:\n");
297 printf("fs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
298 vmcb->save.fs.selector,
299 vmcb->save.fs.attrib,
300 vmcb->save.fs.limit,
301 vmcb->save.fs.base);
302 printf("gs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
303 vmcb->save.gs.selector,
304 vmcb->save.gs.attrib,
305 vmcb->save.gs.limit,
306 vmcb->save.gs.base);
307 printf("tr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
308 vmcb->save.tr.selector,
309 vmcb->save.tr.attrib,
310 vmcb->save.tr.limit,
311 vmcb->save.tr.base);
312 printf("ldtr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
313 vmcb->save.ldtr.selector,
314 vmcb->save.ldtr.attrib,
315 vmcb->save.ldtr.limit,
316 vmcb->save.ldtr.base);
317 printf("kernel_gs_base: %" PRIx64 "\n", vmcb->save.kernel_gs_base);
318 printf("star: %" PRIx64 "\n", vmcb->save.star);
319 printf("lstar: %" PRIx64 "\n", vmcb->save.lstar);
320 printf("cstar: %" PRIx64 "\n", vmcb->save.cstar);
321 printf("sfmask: %" PRIx64 "\n", vmcb->save.sfmask);
322 printf("sysenter_cs: %" PRIx64 "\n", vmcb->save.sysenter_cs);
323 printf("sysenter_esp: %" PRIx64 "\n", vmcb->save.sysenter_esp);
324 printf("sysenter_eip: %" PRIx64 "\n", vmcb->save.sysenter_eip);
325 printf("\n\n");
328 static int
329 vmrun_assert(struct vmcb *vmcb)
331 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
333 int bad;
335 bad = 0;
337 // The following are illegal:
339 //EFER.SVME is zero.
340 A((vmcb->save.efer & 0x0000000000001000) == 0);
342 // CR0.CD is zero and CR0.NW is set
343 A( ((vmcb->save.cr0 & 0x0000000040000000) == 0) &&
344 ((vmcb->save.cr0 & 0x0000000020000000) != 0));
346 // CR0[63:32] are not zero.
347 A((vmcb->save.cr0 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
349 // Any MBZ bit of CR3 is set.
350 A((vmcb->save.cr3 & 0xFFF0000000000000) != 0);
352 // CR4[63:11] are not zero.
353 A((vmcb->save.cr4 & 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
355 // DR6[63:32] are not zero.
356 A((vmcb->save.dr6 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
358 // DR7[63:32] are not zero.
359 A((vmcb->save.dr7 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
361 // EFER[63:15] are not zero.
362 A((vmcb->save.efer & 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
364 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
365 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
367 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
368 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
369 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
370 ((vmcb->save.cr4 & 0x0000000000000020) != 0));
372 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
373 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
374 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
375 ((vmcb->save.cr0 & 0x0000000000000001) == 0));
377 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
378 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
379 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
380 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
381 ((vmcb->save.cr4 & 0x0000000000000020) != 0) &&
382 ((vmcb->save.cs.attrib & 0x0200) != 0) &&
383 ((vmcb->save.cs.attrib & 0x0400) != 0));
385 // The VMRUN intercept bit is clear.
386 A((vmcb->control.intercepts & 0x0000000100000000) == 0);
388 // The MSR or IOIO intercept tables extend to a physical address that is
389 // greater than or equal to the maximum supported physical address.
391 // Illegal event injection (see Section 15.19 on page 391).
393 // ASID is equal to zero.
394 A(vmcb->control.guest_asid == 0);
396 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
397 // combination that is otherwise illegal (see Section 15.18).
399 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
400 // sign-extend to 63 bits) all base addresses in the segment registers
401 // that have been loaded.
403 return bad;
405 #undef A
408 static void
409 fkvm_vcpu_run(struct vcpu *vcpu, struct vmcb *vmcb)
411 u_int64_t lstar;
412 u_int64_t cstar;
413 u_int64_t star;
414 u_int64_t sfmask;
416 u_short fs_selector;
417 u_short gs_selector;
418 u_short ldt_selector;
420 unsigned long host_cr2;
421 unsigned long host_dr6;
422 unsigned long host_dr7;
424 struct system_segment_descriptor *tss_desc;
425 u_int64_t sel;
427 printf("begin fkvm_vcpu_run\n");
429 if (vmrun_assert(vmcb))
430 return;
433 tss_desc = (struct system_segment_descriptor*) (&gdt[GPROC0_SEL]);
434 sel = GSEL(GPROC0_SEL, SEL_KPL);
436 printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
437 print_tss_desc(tss_desc);
438 print_tss(tss_desc);
440 print_vmcb_save_area(vmcb);
441 // disable_intr();
443 vcpu->vmcb_pa = vtophys(vmcb);
444 printf("vmcb = 0x%p\n", vmcb);
445 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu->vmcb_pa);
447 vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
448 vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
449 vmcb->save.rip = vcpu->regs[VCPU_REGS_RIP];
451 /* meh: kvm has pre_svm_run(svm); */
453 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
454 printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
456 fs_selector = rfs();
457 gs_selector = rgs();
458 ldt_selector = rldt();
459 printf("fs selector: %hx\n", fs_selector);
460 printf("gs selector: %hx\n", gs_selector);
461 printf("ldt selector: %hx\n", ldt_selector);
463 host_cr2 = rcr2();
465 host_dr6 = rdr6();
466 host_dr7 = rdr7();
468 vmcb->save.cr2 = vcpu->cr2;
469 /* meh: cr3? */
471 /* meh: dr7? db_regs? */
473 printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
474 printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
475 printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
476 printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
478 star = rdmsr(MSR_STAR);
479 lstar = rdmsr(MSR_LSTAR);
480 cstar = rdmsr(MSR_CSTAR);
481 sfmask = rdmsr(MSR_SF_MASK);
483 printf("CLGI...\n");
485 __asm __volatile (SVM_CLGI);
488 // enable_intr();
490 #define R "r"
491 __asm __volatile (
492 "push %%"R"bp; \n\t"
493 "mov %c[rbx](%[svm]), %%"R"bx \n\t"
494 "mov %c[rcx](%[svm]), %%"R"cx \n\t"
495 "mov %c[rdx](%[svm]), %%"R"dx \n\t"
496 "mov %c[rsi](%[svm]), %%"R"si \n\t"
497 "mov %c[rdi](%[svm]), %%"R"di \n\t"
498 "mov %c[rbp](%[svm]), %%"R"bp \n\t"
499 "mov %c[r8](%[svm]), %%r8 \n\t"
500 "mov %c[r9](%[svm]), %%r9 \n\t"
501 "mov %c[r10](%[svm]), %%r10 \n\t"
502 "mov %c[r11](%[svm]), %%r11 \n\t"
503 "mov %c[r12](%[svm]), %%r12 \n\t"
504 "mov %c[r13](%[svm]), %%r13 \n\t"
505 "mov %c[r14](%[svm]), %%r14 \n\t"
506 "mov %c[r15](%[svm]), %%r15 \n\t"
508 /* Enter guest mode */
509 "push %%"R"ax \n\t"
510 "mov %c[vmcb](%[svm]), %%"R"ax \n\t"
511 SVM_VMLOAD "\n\t"
512 SVM_VMRUN "\n\t"
513 SVM_VMSAVE "\n\t"
514 "pop %%"R"ax \n\t"
516 /* Save guest registers, load host registers */
517 "mov %%"R"bx, %c[rbx](%[svm]) \n\t"
518 "mov %%"R"cx, %c[rcx](%[svm]) \n\t"
519 "mov %%"R"dx, %c[rdx](%[svm]) \n\t"
520 "mov %%"R"si, %c[rsi](%[svm]) \n\t"
521 "mov %%"R"di, %c[rdi](%[svm]) \n\t"
522 "mov %%"R"bp, %c[rbp](%[svm]) \n\t"
523 "mov %%r8, %c[r8](%[svm]) \n\t"
524 "mov %%r9, %c[r9](%[svm]) \n\t"
525 "mov %%r10, %c[r10](%[svm]) \n\t"
526 "mov %%r11, %c[r11](%[svm]) \n\t"
527 "mov %%r12, %c[r12](%[svm]) \n\t"
528 "mov %%r13, %c[r13](%[svm]) \n\t"
529 "mov %%r14, %c[r14](%[svm]) \n\t"
530 "mov %%r15, %c[r15](%[svm]) \n\t"
531 "pop %%"R"bp"
533 : [svm]"a"(vcpu),
534 [vmcb]"i"(offsetof(struct vcpu, vmcb_pa)),
535 [rbx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBX])),
536 [rcx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RCX])),
537 [rdx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDX])),
538 [rsi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RSI])),
539 [rdi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDI])),
540 [rbp]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBP])),
541 [r8 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R8 ])),
542 [r9 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R9 ])),
543 [r10]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R10])),
544 [r11]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R11])),
545 [r12]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R12])),
546 [r13]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R13])),
547 [r14]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R14])),
548 [r15]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R15]))
549 : "cc", "memory",
550 R"bx", R"cx", R"dx", R"si", R"di",
551 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
555 /* meh: dr7? db_regs? */
557 vcpu->cr2 = vmcb->save.cr2;
559 vcpu->regs[VCPU_REGS_RAX] = vmcb->save.rax;
560 vcpu->regs[VCPU_REGS_RSP] = vmcb->save.rsp;
561 vcpu->regs[VCPU_REGS_RIP] = vmcb->save.rip;
563 load_dr6(host_dr6);
564 load_dr7(host_dr7);
566 load_cr2(host_cr2);
568 load_fs(fs_selector);
569 load_gs(gs_selector);
570 lldt(ldt_selector);
572 wrmsr(MSR_GSBASE, vcpu->host_gs_base);
574 tss_desc->sd_type = SDT_SYSTSS;
575 ltr(sel);
577 wrmsr(MSR_STAR, star);
578 wrmsr(MSR_LSTAR, lstar);
579 wrmsr(MSR_CSTAR, cstar);
580 wrmsr(MSR_SF_MASK, sfmask);
582 // disable_intr();
584 __asm __volatile (SVM_STGI);
586 printf("STGI\n");
588 printf("exit_code: %" PRIx64 "\n", vmcb->control.exit_code);
590 printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
591 printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
592 printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
593 printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
595 fs_selector = rfs();
596 gs_selector = rgs();
597 ldt_selector = rldt();
598 printf("fs selector: %hx\n", fs_selector);
599 printf("gs selector: %hx\n", gs_selector);
600 printf("ldt selector: %hx\n", ldt_selector);
602 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
603 printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
605 print_tss_desc(tss_desc);
606 print_tss(tss_desc);
608 print_vmcb_save_area(vmcb);
610 // enable_intr();
612 /* meh: next_rip */
615 static void
616 _fkvm_init_seg(struct vmcb_seg *seg, uint16_t attrib)
618 seg->selector = 0;
619 seg->attrib = VMCB_SELECTOR_P_MASK | attrib;
620 seg->limit = 0xffff;
621 seg->base = 0;
624 static inline void
625 fkvm_init_seg(struct vmcb_seg *seg)
627 _fkvm_init_seg(seg, VMCB_SELECTOR_S_MASK | VMCB_SELECTOR_WRITE_MASK);
630 static inline void
631 fkvm_init_sys_seg(struct vmcb_seg *seg, uint16_t attrib)
633 _fkvm_init_seg(seg, attrib);
636 static void
637 fkvm_iopm_init(void *iopm)
639 memset(iopm, 0xff, IOPM_SIZE); /* TODO: we may want to allow access to PC debug port */
642 static void
643 fkvm_msrpm_init(void *msrpm)
645 memset(msrpm, 0xff, MSRPM_SIZE); /* TODO: we may want to allow some MSR accesses */
649 static u_int64_t
650 fkvm_make_vm_map(struct vmspace **sp)
652 struct vmspace *_sp;
654 _sp = vmspace_alloc(0, 0xffffffffffffffff);
655 if (_sp == NULL) {
656 printf("vmspace_alloc failed\n");
657 goto fail;
660 *sp = _sp;
661 return vtophys(vmspace_pmap(_sp)->pm_pml4);
663 fail:
664 if (_sp != NULL) {
665 vmspace_free(_sp);
666 *sp = NULL;
668 return 0;
673 static void
674 fkvm_vmcb_init(struct vmcb *vmcb)
676 struct vmcb_control_area *control = &vmcb->control;
677 struct vmcb_save_area *save = &vmcb->save;
679 control->intercept_cr_reads = INTERCEPT_CR4_MASK;
681 control->intercept_cr_writes = INTERCEPT_CR4_MASK |
682 INTERCEPT_CR8_MASK;
684 control->intercept_dr_reads = INTERCEPT_DR0_MASK |
685 INTERCEPT_DR1_MASK |
686 INTERCEPT_DR2_MASK |
687 INTERCEPT_DR3_MASK;
689 control->intercept_dr_writes = INTERCEPT_DR0_MASK |
690 INTERCEPT_DR1_MASK |
691 INTERCEPT_DR2_MASK |
692 INTERCEPT_DR3_MASK |
693 INTERCEPT_DR5_MASK |
694 INTERCEPT_DR7_MASK;
696 control->intercept_exceptions = (1 << IDT_UD) | // Invalid Opcode
697 (1 << IDT_MC); // Machine Check
699 control->intercepts = INTERCEPT_INTR |
700 INTERCEPT_NMI |
701 INTERCEPT_SMI |
702 INTERCEPT_CPUID |
703 INTERCEPT_INVD |
704 INTERCEPT_HLT |
705 INTERCEPT_INVLPGA |
706 INTERCEPT_IOIO_PROT |
707 INTERCEPT_MSR_PROT |
708 INTERCEPT_SHUTDOWN |
709 INTERCEPT_VMRUN |
710 INTERCEPT_VMMCALL |
711 INTERCEPT_VMLOAD |
712 INTERCEPT_VMSAVE |
713 INTERCEPT_STGI |
714 INTERCEPT_CLGI |
715 INTERCEPT_SKINIT |
716 INTERCEPT_WBINVD |
717 INTERCEPT_MONITOR |
718 INTERCEPT_MWAIT_UNCOND;
720 control->iopm_base_pa = vtophys(iopm);
721 control->msrpm_base_pa = vtophys(msrpm);
722 control->tsc_offset = 0;
724 /* TODO: remove this once we assign asid's to distinct VM's */
725 control->guest_asid = 1;
726 control->tlb_control = VMCB_TLB_CONTROL_FLUSH_ALL;
728 /* let v_tpr default to 0 */
729 /* let v_irq default to 0 */
730 /* let v_intr default to 0 */
732 control->v_intr_masking = 1;
734 /* let v_intr_vector default to 0 */
735 /* let intr_shadow default to 0 */
736 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
737 exit_int_info_err_code default to 0 */
739 control->nested_ctl = 1;
741 /* let event_inj default to 0 */
743 // (nested_cr3 is later)
745 /* let lbr_virt_enable default to 0 */
748 fkvm_init_seg(&save->ds);
749 fkvm_init_seg(&save->es);
750 fkvm_init_seg(&save->fs);
751 fkvm_init_seg(&save->gs);
752 fkvm_init_seg(&save->ss);
754 _fkvm_init_seg(&save->cs, VMCB_SELECTOR_READ_MASK | VMCB_SELECTOR_S_MASK |
755 VMCB_SELECTOR_CODE_MASK);
756 save->cs.selector = 0xf000;
757 save->cs.base = 0xffff0000;
759 save->gdtr.limit = 0xffff;
760 save->idtr.limit = 0xffff;
762 fkvm_init_sys_seg(&save->ldtr, SDT_SYSLDT);
763 fkvm_init_sys_seg(&save->tr, SDT_SYS286BSY);
765 save->g_pat = PAT_VALUE(PAT_WRITE_BACK, 0) | PAT_VALUE(PAT_WRITE_THROUGH, 1) |
766 PAT_VALUE(PAT_UNCACHED, 2) | PAT_VALUE(PAT_UNCACHEABLE, 3) |
767 PAT_VALUE(PAT_WRITE_BACK, 4) | PAT_VALUE(PAT_WRITE_THROUGH, 5) |
768 PAT_VALUE(PAT_UNCACHED, 6) | PAT_VALUE(PAT_UNCACHEABLE, 7);
770 /* CR0 = 6000_0010h at boot */
771 save->cr0 = CR0_ET | CR0_NW | CR0_CD;
772 save->dr6 = 0xffff0ff0;
773 save->dr7 = 0x400;
774 save->rflags = 2;
775 save->rip = 0x0000fff0;
777 save->efer = EFER_SVME;
779 //control->nested_cr3 = fkvm_make_vm_map();
781 printf("ncr3: %" PRIx64 "\n", control->nested_cr3);
786 fkvm_userpoke(struct thread *td, struct fkvm_userpoke_args *uap)
788 printf("fkvm_userpoke\n");
789 return 1;
792 /* System Calls */
793 /* This function can only be called with multiples of page sizes */
795 fkvm_set_user_mem_region(struct thread *td, struct fkvm_set_user_mem_region_args *uap)
797 struct guestvm *guest_vm = GET_GUESTVM(td);
798 struct file *fp;
799 struct shmfd *shmfd;
800 vm_offset_t start;
801 vm_offset_t end;
802 int error;
804 error = fget(td, uap->fd, &fp);
805 if(error)
806 return error;
808 shmfd = fp->f_data;
809 printf("shm:\n");
810 printf(" size: %d bytes\n", (int) shmfd->shm_size);
811 printf(" vm object: %p\n", shmfd->shm_object);
812 printf(" size: %d pages\n", (int) shmfd->shm_object->size);
814 start = uap->guest_pa;
815 end = uap->guest_pa + uap->size;
816 printf("start: %d bytes\n", (int) start);
817 printf("end: %d bytes\n", (int) end);
819 vm_object_reference(shmfd->shm_object); // TODO: this might be a mem leak
820 error = vm_map_insert(&guest_vm->sp->vm_map,
821 shmfd->shm_object,
822 uap->vm_ooffset,
823 start,
824 end,
825 VM_PROT_ALL, VM_PROT_ALL,
827 if (error != KERN_SUCCESS) {
828 printf("vm_map_insert failed: %d\n", error);
829 return 1;
832 return 0;
836 fkvm_create_vm(struct thread *td, struct fkvm_create_vm_args *uap)
838 struct vcpu *vcpu;
839 struct guestvm *guest_vm;
841 printf("SYSCALL : fkvm_create_vm\n");
843 /* Allocate Guest VM */
844 guest_vm = (struct guestvm *)malloc(sizeof(struct guestvm), M_DEVBUF,
845 M_WAITOK);
846 if(guest_vm == NULL)
847 return 0;
849 /* TODO: Set up the vm address space */
851 /* Allocate VCPU0 */
852 vcpu = (struct vcpu*)malloc(sizeof(struct vcpu), M_DEVBUF, M_WAITOK);
853 if(vcpu == NULL)
854 goto errout0;
856 guest_vm->vcpus[0] = vcpu;
857 guest_vm->nr_vcpus = 1;
858 vcpu->guest_vm = guest_vm;
860 TD_SET_VCPU(td, vcpu);
862 /* Allocate VMCB */
863 vcpu->vmcb = (struct vmcb *)contigmalloc(PAGE_SIZE, M_DEVBUF, M_ZERO, 0, -1UL,
864 PAGE_SIZE, 0);
865 if(vcpu->vmcb == NULL)
866 goto errout1;
868 /* Initialize VMCB */
869 fkvm_vmcb_init(vcpu->vmcb);
871 /* TODO: Ugly */
872 vcpu->vmcb->control.nested_cr3 = fkvm_make_vm_map(&guest_vm->sp);
873 guest_vm->nested_cr3 = vcpu->vmcb->control.nested_cr3;
874 printf("fkvm_create_vm done. ncr3 %" PRIx64 "\n", guest_vm->nested_cr3);
875 return 1;
877 errout1:
878 contigfree(vcpu, PAGE_SIZE, M_DEVBUF);
879 errout0:
880 contigfree(guest_vm, PAGE_SIZE, M_DEVBUF);
882 return 0;
886 fkvm_destroy_vm(struct thread *td, struct fkvm_destroy_vm_args *uap)
888 struct guestvm *guest_vm = GET_GUESTVM(td);
889 int i;
891 /* Destroy the VCPUs */
892 for(i = 0; i < guest_vm->nr_vcpus; i++) {
893 if(guest_vm->vcpus[i] != NULL) {
894 if(guest_vm->vcpus[i]->vmcb != NULL)
895 contigfree(guest_vm->vcpus[i]->vmcb, PAGE_SIZE, M_DEVBUF);
896 contigfree(guest_vm->vcpus[i], PAGE_SIZE, M_DEVBUF);
900 /* Destroy the Guest VM itself */
901 contigfree(guest_vm, PAGE_SIZE, M_DEVBUF);
903 return 1;
907 fkvm_vm_run(struct thread *td, struct fkvm_vm_run_args *uap)
909 struct vcpu *vcpu = TD_GET_VCPU(td);
910 struct guestvm *guest_vm = GET_GUESTVM(td);
911 struct vmcb *vmcb = vcpu->vmcb;
913 fkvm_vcpu_run(vcpu, vmcb);
915 switch (vmcb->control.exit_code) {
917 case VMCB_EXIT_EXCP_BASE ... (VMCB_EXIT_EXCP_BASE + 31): {
918 int excp_vector;
920 excp_vector = vmcb->control.exit_code - VMCB_EXIT_EXCP_BASE;
922 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
923 excp_vector);
924 break;
927 case VMCB_EXIT_INTR: {
928 printf("VMCB_EXIT_INTR - nothing to do\n");
929 break;
932 case VMCB_EXIT_NPF: {
933 /* EXITINFO1 contains fault error code */
934 /* EXITINFO2 contains the guest physical address causing the fault. */
936 u_int64_t fault_code;
937 u_int64_t fault_gpa;
939 vm_prot_t fault_type;
940 int fault_flags;
941 int rc;
943 fault_code = vmcb->control.exit_info_1;
944 fault_gpa = vmcb->control.exit_info_2;
946 printf("VMCB_EXIT_NPF:\n");
947 printf("gpa=0x%" PRIx64 "\n", fault_gpa);
948 printf("fault code=0x%" PRIx64 " [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
949 fault_code,
950 (fault_code & PGEX_P) != 0,
951 (fault_code & PGEX_W) != 0,
952 (fault_code & PGEX_U) != 0,
953 (fault_code & PGEX_I) != 0);
955 if (fault_code & PGEX_W)
956 fault_type = VM_PROT_WRITE;
957 else if (fault_code & PGEX_I)
958 fault_type = VM_PROT_EXECUTE;
959 else
960 fault_type = VM_PROT_READ;
962 fault_flags = 0; /* TODO: is that right? */
963 rc = vm_fault(&guest_vm->sp->vm_map, fault_gpa, fault_type, fault_flags);
964 if (rc != KERN_SUCCESS)
965 printf("vm_fault failed: %d\n", rc);
966 break;
968 default:
969 printf("Unhandled vmexit:\n"
970 " code: 0x%" PRIx64 "\n"
971 " info1: 0x%" PRIx64 "\n"
972 " info2: 0x%" PRIx64 "\n",
973 vmcb->control.exit_code,
974 vmcb->control.exit_info_1,
975 vmcb->control.exit_info_2);
976 print_vmcb(vmcb);
979 return 1;
982 int
983 fkvm_create_vcpu(struct thread *td, struct fkvm_create_vcpu_args *uap)
985 struct guestvm *guest_vm = NULL; /* TODO: How to get this? */
986 struct vcpu *vcpu;
988 /* Allocate VCPU */
989 vcpu = (struct vcpu*)malloc(sizeof(struct vcpu), M_DEVBUF, M_WAITOK);
990 if(vcpu == NULL)
991 return 0;
993 guest_vm->nr_vcpus++; /* TODO: Probably not safe to increment */
994 /* How about a lock to protect all of this? */
996 guest_vm->vcpus[guest_vm->nr_vcpus] = vcpu;
997 vcpu->guest_vm = guest_vm;
999 td->vcpu = vcpu;
1001 /* Allocate VMCB */
1002 vcpu->vmcb = (struct vmcb *)contigmalloc(PAGE_SIZE, M_DEVBUF, M_ZERO, 0, -1UL,
1003 PAGE_SIZE, 0);
1004 if(vcpu->vmcb == NULL)
1005 goto errout0;
1007 /* Initialize VMCB */
1008 fkvm_vmcb_init(vcpu->vmcb);
1009 vcpu->vmcb->control.nested_cr3 = guest_vm->nested_cr3;
1011 return 1;
1013 errout0:
1014 contigfree(vcpu, PAGE_SIZE, M_DEVBUF);
1016 return 0;
1019 static void
1020 fkvm_load(void *unused)
1022 u_int64_t efer;
1024 printf("fkvm_load\n");
1025 printf("sizeof(struct vmcb) = %" PRIx64 "\n", sizeof(struct vmcb));
1027 /* TODO: check for the presense of extensions */
1029 hsave_area = contigmalloc(PAGE_SIZE, M_DEVBUF, 0, 0, -1UL,
1030 PAGE_SIZE, 0);
1031 if(hsave_area == NULL)
1032 return;
1034 iopm = contigmalloc(IOPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
1035 if(iopm == NULL)
1036 goto errout0;
1038 msrpm = contigmalloc(MSRPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
1039 if(msrpm == NULL)
1040 goto errout1;
1042 /* Initialize iopm and msrpm */
1043 fkvm_iopm_init(iopm);
1044 fkvm_msrpm_init(msrpm);
1046 /* Enable SVM in EFER */
1047 efer = rdmsr(MSR_EFER);
1048 printf("EFER = %" PRIx64 "\n", efer);
1049 wrmsr(MSR_EFER, efer | EFER_SVME);
1050 efer = rdmsr(MSR_EFER);
1051 printf("new EFER = %" PRIx64 "\n", efer);
1053 /* Write Host save address in MSR_VM_HSAVE_PA */
1054 wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave_area));
1056 return;
1058 errout1:
1059 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
1060 iopm = NULL;
1061 errout0:
1062 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
1063 hsave_area = NULL;
1065 SYSINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_load, NULL);
1067 static void
1068 fkvm_unload(void *unused)
1070 printf("fkvm_unload\n");
1071 /* TODO */
1073 if(msrpm != NULL)
1074 contigfree(msrpm, MSRPM_SIZE, M_DEVBUF);
1076 if(iopm != NULL)
1077 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
1079 if(hsave_area != NULL)
1080 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
1082 SYSUNINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_unload, NULL);