implements fkvm_unset_user_mem_region
[freebsd-src/fkvm-freebsd.git] / sys / kern / kern_fkvm.c
blobb8921f287f26a7a4989beeb7a333bf350a65ecf0
1 /*-
2 * Copyright (c) 2008 The FreeBSD Project
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
32 #include <sys/sysproto.h>
33 #include <sys/file.h>
34 #include <sys/mman.h>
35 #include <sys/proc.h>
36 #include <vm/vm.h>
37 #include <vm/pmap.h>
38 #include <vm/vm_extern.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_object.h>
41 #include <vm/vm_param.h>
42 #include <machine/_inttypes.h>
43 #include <machine/specialreg.h>
44 #include <machine/segments.h>
45 #include <machine/vmcb.h>
47 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
48 #define MSRPM_SIZE (8*1024)
50 /* fkvm data */
51 static void *iopm = NULL; /* Should I allocate a vm_object_t instead? */
52 static void *msrpm = NULL; /* Should I allocate a vm_object_t instead? */
54 static void *hsave_area = NULL;
56 /* per-guest data */
58 enum {
59 VCPU_REGS_RAX = 0,
60 VCPU_REGS_RCX = 1,
61 VCPU_REGS_RDX = 2,
62 VCPU_REGS_RBX = 3,
63 VCPU_REGS_RSP = 4,
64 VCPU_REGS_RBP = 5,
65 VCPU_REGS_RSI = 6,
66 VCPU_REGS_RDI = 7,
67 VCPU_REGS_R8 = 8,
68 VCPU_REGS_R9 = 9,
69 VCPU_REGS_R10 = 10,
70 VCPU_REGS_R11 = 11,
71 VCPU_REGS_R12 = 12,
72 VCPU_REGS_R13 = 13,
73 VCPU_REGS_R14 = 14,
74 VCPU_REGS_R15 = 15,
75 VCPU_REGS_RIP,
76 NR_VCPU_REGS
79 struct vcpu {
80 /* VCPU data */
81 struct vmcb *vmcb;
82 unsigned long vmcb_pa;
84 unsigned long regs[NR_VCPU_REGS];
85 u_int64_t host_gs_base;
86 u_int64_t cr2;
87 u_int64_t cr3;
89 struct guestvm *guest_vm;
92 #define MAX_VCPUS 8
94 struct guestvm {
95 struct vcpu *vcpus[MAX_VCPUS];
96 int nr_vcpus;
98 struct vmspace *sp;
99 u_int64_t nested_cr3;
103 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
104 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
105 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
106 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
107 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
108 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
110 #define TD_GET_VCPU(td) td->vcpu
112 #define TD_SET_VCPU(td, vcpu) do { \
113 td->vcpu = vcpu; \
114 } while (0)
116 #define PROC_GET_GUESTVM(p) p->p_guestvm
118 #define PROC_SET_GUESTVM(p, guestvm) do { \
119 p->p_guestvm = guestvm; \
120 } while (0)
122 #define GET_GUESTVM(thread) (TD_GET_VCPU(thread)->guest_vm)
124 static void
125 print_vmcb_seg(struct vmcb_seg* vmcb_seg, const char* name)
127 printf("%s Selector\n", name);
128 printf("Selector : %" PRIx16 "\n", vmcb_seg->selector);
129 printf("Attributes : %" PRIx16 "\n", vmcb_seg->attrib);
130 printf("Limit : %" PRIx32 "\n", vmcb_seg->limit);
131 printf("Base Address : %" PRIx64 "\n", vmcb_seg->base);
132 printf("\n");
135 static void
136 print_vmcb(struct vmcb *vmcb)
138 printf("VMCB Control Area\n");
139 printf("Intercept CR Reads : %" PRIx16 "\n", vmcb->control.intercept_cr_reads);
140 printf("Intercept CR Writes : %" PRIx16 "\n", vmcb->control.intercept_cr_writes);
141 printf("Intercept DR Reads : %" PRIx16 "\n", vmcb->control.intercept_dr_reads);
142 printf("Intercept DR Writes : %" PRIx16 "\n", vmcb->control.intercept_dr_writes);
143 printf("Intercept Exceptions : %" PRIx32 "\n", vmcb->control.intercept_exceptions);
144 printf("Intercepts : %" PRIx64 "\n", vmcb->control.intercepts);
145 printf("Reserved 1: \n");
146 for(int i=0; i < 44; i++) {
147 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
149 printf("\n");
150 printf("IOPM Base PA : %" PRIx64 "\n", vmcb->control.iopm_base_pa);
151 printf("MSRPM Base PA : %" PRIx64 "\n", vmcb->control.msrpm_base_pa);
152 printf("TSC Offset : %" PRIx64 "\n", vmcb->control.tsc_offset);
153 printf("Guest ASID : %" PRIx32 "\n", vmcb->control.guest_asid);
154 printf("TLB Control : %" PRIx8 "\n", vmcb->control.tlb_control);
155 printf("Reserved 2 : \n");
156 for(int i=0; i < 3; i++) {
157 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
159 printf("\n");
160 printf("Virtual TPR : %" PRIx8 "\n", vmcb->control.v_tpr);
161 printf("Virtual IRQ : %" PRIx8 "\n", vmcb->control.v_irq);
162 printf("Virtual Interrupt : %" PRIx8 "\n", vmcb->control.v_intr);
163 printf("Virtual Interrupt Masking: %" PRIx8 "\n", vmcb->control.v_intr_masking);
164 printf("Virtual Interrupt Vector : %" PRIx8 "\n", vmcb->control.v_intr_vector);
165 printf("Reserved 6 : \n");
166 for(int i=0; i < 3; i++) {
167 printf("%" PRIx8 "", vmcb->control.reserved_6[i]); /* Should be Zero */
169 printf("\n");
170 printf("Interrupt Shadow : %" PRIx8 "\n", vmcb->control.intr_shadow);
171 printf("Reserved 7 : \n");
172 for(int i=0; i < 7; i++) {
173 printf("%" PRIx8 "", vmcb->control.reserved_7[i]); /* Should be Zero */
175 printf("\n");
176 printf("Exit Code : %" PRIx64 "\n", vmcb->control.exit_code);
177 printf("Exit Info 1 : %" PRIx64 "\n", vmcb->control.exit_info_1);
178 printf("Exit Info 2 : %" PRIx64 "\n", vmcb->control.exit_info_2);
179 printf("Exit Interrupt Info : %" PRIx32 "\n", vmcb->control.exit_int_info);
180 printf("Exit Interrupt Info Err Code: %" PRIx32 "\n", vmcb->control.exit_int_info_err_code);
181 printf("Nested Control : %" PRIx64 "\n", vmcb->control.nested_ctl);
182 printf("Reserved 8 : \n");
183 for(int i=0; i < 16; i++) {
184 printf("%" PRIx8 "", vmcb->control.reserved_8[i]); /* Should be Zero */
186 printf("\n");
187 printf("Event Injection : %" PRIx64 "\n", vmcb->control.event_inj);
188 printf("Nested CR3 : %" PRIx64 "\n", vmcb->control.nested_cr3);
189 printf("LBR Virtualization Enable: %" PRIx64 "\n", vmcb->control.lbr_virt_enable);
190 printf("Reserved 9 : \n");
191 for(int i=0; i < 832; i++) {
192 printf("%" PRIx8 "", vmcb->control.reserved_9[i]); /* Should be Zero */
194 printf("\n");
196 printf("\n");
198 printf("VMCB Save Area\n");
199 print_vmcb_seg(&(vmcb->save.es), "ES");
200 print_vmcb_seg(&(vmcb->save.es), "CS");
201 print_vmcb_seg(&(vmcb->save.es), "SS");
202 print_vmcb_seg(&(vmcb->save.es), "DS");
203 print_vmcb_seg(&(vmcb->save.es), "FS");
204 print_vmcb_seg(&(vmcb->save.es), "GS");
205 print_vmcb_seg(&(vmcb->save.es), "GDTR");
206 print_vmcb_seg(&(vmcb->save.es), "LDTR");
207 print_vmcb_seg(&(vmcb->save.es), "IDTR");
208 print_vmcb_seg(&(vmcb->save.es), "TR");
209 printf("Reserved 1 : \n");
210 for(int i=0; i < 43; i++) {
211 printf("%" PRIx8 "", vmcb->save.reserved_1[i]); /* Should be Zero */
213 printf("\n");
214 printf("Current Processor Level : %" PRIx8 "\n", vmcb->save.cpl);
215 printf("Reserved 2 : \n");
216 for(int i=0; i < 4; i++) {
217 printf("%" PRIx8 "", vmcb->save.reserved_2[i]); /* Should be Zero */
219 printf("\n");
220 printf("EFER : %" PRIx64 "\n", vmcb->save.efer);
221 printf("Reserved 3 : \n");
222 for(int i=0; i < 112; i++) {
223 printf("%" PRIx8 "", vmcb->save.reserved_3[i]); /* Should be Zero */
225 printf("\n");
226 printf("Control Register 4 : %" PRIx64 "\n", vmcb->save.cr4);
227 printf("Control Register 3 : %" PRIx64 "\n", vmcb->save.cr3);
228 printf("Control Register 0 : %" PRIx64 "\n", vmcb->save.cr0);
229 printf("Debug Register 7 : %" PRIx64 "\n", vmcb->save.dr7);
230 printf("Debug Register 6 : %" PRIx64 "\n", vmcb->save.dr6);
231 printf("RFlags : %" PRIx64 "\n", vmcb->save.rflags);
232 printf("RIP : %" PRIx64 "\n", vmcb->save.rip);
233 printf("Reserved 4 : \n");
234 for(int i=0; i < 88; i++) {
235 printf("%" PRIx8 "", vmcb->save.reserved_4[i]); /* Should be Zero */
237 printf("\n");
238 printf("RSP : %" PRIx64 "\n", vmcb->save.rsp);
239 printf("Reserved 5 : \n");
240 for(int i=0; i < 24; i++) {
241 printf("%" PRIx8 "", vmcb->save.reserved_5[i]); /* Should be Zero */
243 printf("\n");
244 printf("RAX : %" PRIx64 "\n", vmcb->save.rax);
245 printf("STAR : %" PRIx64 "\n", vmcb->save.star);
246 printf("LSTAR : %" PRIx64 "\n", vmcb->save.lstar);
247 printf("CSTAR : %" PRIx64 "\n", vmcb->save.cstar);
248 printf("SFMASK : %" PRIx64 "\n", vmcb->save.sfmask);
249 printf("Kernel GS Base : %" PRIx64 "\n", vmcb->save.kernel_gs_base);
250 printf("SYSENTER CS : %" PRIx64 "\n", vmcb->save.sysenter_cs);
251 printf("SYSENTER ESP : %" PRIx64 "\n", vmcb->save.sysenter_esp);
252 printf("SYSENTER EIP : %" PRIx64 "\n", vmcb->save.sysenter_eip);
253 printf("Control Register 2 : %" PRIx64 "\n", vmcb->save.cr2);
254 printf("Reserved 6 : \n");
255 for(int i=0; i < 32; i++) {
256 printf("%" PRIx8 "", vmcb->save.reserved_6[i]); /* Should be Zero */
258 printf("\n");
259 printf("Global PAT : %" PRIx64 "\n", vmcb->save.g_pat);
260 printf("Debug Control : %" PRIx64 "\n", vmcb->save.dbg_ctl);
261 printf("BR From : %" PRIx64 "\n", vmcb->save.br_from);
262 printf("BR To : %" PRIx64 "\n", vmcb->save.br_to);
263 printf("Last Exception From : %" PRIx64 "\n", vmcb->save.last_excp_from);
264 printf("Last Exception To : %" PRIx64 "\n", vmcb->save.last_excp_to);
266 printf("\n\n");
269 #if 0
270 static void
271 print_tss_desc(struct system_segment_descriptor *tss_desc)
273 printf("TSS desc @ %p:\n", tss_desc);
274 printf("sd_lolimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lolimit);
275 printf("sd_lobase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lobase);
276 printf("sd_type: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_type);
277 printf("sd_dpl: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_dpl);
278 printf("sd_p: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_p);
279 printf("sd_hilimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hilimit);
280 printf("sd_xx0: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx0);
281 printf("sd_gran: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_gran);
282 printf("sd_hibase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hibase);
283 printf("sd_xx1: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx1);
284 printf("sd_mbz: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_mbz);
285 printf("sd_xx2: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx2);
286 printf("\n\n");
289 static void
290 print_tss(struct system_segment_descriptor *tss_desc)
292 u_int32_t *base;
293 int limit;
294 int i;
296 base = (u_int32_t*) ((((u_int64_t) tss_desc->sd_hibase) << 24) | ((u_int64_t) tss_desc->sd_lobase));
297 limit = ((tss_desc->sd_hilimit << 16) | tss_desc->sd_lolimit) / 4;
299 printf("TSS: @ %p\n", base);
300 for (i = 0; i <= limit; i++)
301 printf("%x: 0x%" PRIx32 "\n", i, base[i]);
302 printf("\n\n");
304 #endif
306 static void
307 print_vmcb_save_area(struct vmcb *vmcb)
309 printf("VMCB save area:\n");
310 printf(" cs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
311 vmcb->save.cs.selector,
312 vmcb->save.cs.attrib,
313 vmcb->save.cs.limit,
314 vmcb->save.cs.base);
315 printf(" fs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
316 vmcb->save.fs.selector,
317 vmcb->save.fs.attrib,
318 vmcb->save.fs.limit,
319 vmcb->save.fs.base);
320 printf(" gs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
321 vmcb->save.gs.selector,
322 vmcb->save.gs.attrib,
323 vmcb->save.gs.limit,
324 vmcb->save.gs.base);
325 printf(" tr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
326 vmcb->save.tr.selector,
327 vmcb->save.tr.attrib,
328 vmcb->save.tr.limit,
329 vmcb->save.tr.base);
330 printf(" ldtr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
331 vmcb->save.ldtr.selector,
332 vmcb->save.ldtr.attrib,
333 vmcb->save.ldtr.limit,
334 vmcb->save.ldtr.base);
335 printf(" rip: %" PRIx64 "\n", vmcb->save.rip);
336 printf(" kernel_gs_base: %" PRIx64 "\n", vmcb->save.kernel_gs_base);
337 printf(" star: %" PRIx64 "\n", vmcb->save.star);
338 printf(" lstar: %" PRIx64 "\n", vmcb->save.lstar);
339 printf(" cstar: %" PRIx64 "\n", vmcb->save.cstar);
340 printf(" sfmask: %" PRIx64 "\n", vmcb->save.sfmask);
341 printf(" sysenter_cs: %" PRIx64 "\n", vmcb->save.sysenter_cs);
342 printf(" sysenter_esp: %" PRIx64 "\n", vmcb->save.sysenter_esp);
343 printf(" sysenter_eip: %" PRIx64 "\n", vmcb->save.sysenter_eip);
344 printf("\n\n");
347 static int
348 vmrun_assert(struct vmcb *vmcb)
350 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
352 int bad;
354 bad = 0;
356 // The following are illegal:
358 //EFER.SVME is zero.
359 A((vmcb->save.efer & 0x0000000000001000) == 0);
361 // CR0.CD is zero and CR0.NW is set
362 A( ((vmcb->save.cr0 & 0x0000000040000000) == 0) &&
363 ((vmcb->save.cr0 & 0x0000000020000000) != 0));
365 // CR0[63:32] are not zero.
366 A((vmcb->save.cr0 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
368 // Any MBZ bit of CR3 is set.
369 A((vmcb->save.cr3 & 0xFFF0000000000000) != 0);
371 // CR4[63:11] are not zero.
372 A((vmcb->save.cr4 & 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
374 // DR6[63:32] are not zero.
375 A((vmcb->save.dr6 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
377 // DR7[63:32] are not zero.
378 A((vmcb->save.dr7 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
380 // EFER[63:15] are not zero.
381 A((vmcb->save.efer & 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
383 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
384 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
386 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
387 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
388 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
389 ((vmcb->save.cr4 & 0x0000000000000020) != 0));
391 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
392 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
393 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
394 ((vmcb->save.cr0 & 0x0000000000000001) == 0));
396 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
397 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
398 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
399 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
400 ((vmcb->save.cr4 & 0x0000000000000020) != 0) &&
401 ((vmcb->save.cs.attrib & 0x0200) != 0) &&
402 ((vmcb->save.cs.attrib & 0x0400) != 0));
404 // The VMRUN intercept bit is clear.
405 A((vmcb->control.intercepts & 0x0000000100000000) == 0);
407 // The MSR or IOIO intercept tables extend to a physical address that is
408 // greater than or equal to the maximum supported physical address.
410 // Illegal event injection (see Section 15.19 on page 391).
412 // ASID is equal to zero.
413 A(vmcb->control.guest_asid == 0);
415 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
416 // combination that is otherwise illegal (see Section 15.18).
418 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
419 // sign-extend to 63 bits) all base addresses in the segment registers
420 // that have been loaded.
422 return bad;
424 #undef A
427 static void
428 fkvm_vcpu_run(struct vcpu *vcpu)
430 u_int64_t lstar;
431 u_int64_t cstar;
432 u_int64_t star;
433 u_int64_t sfmask;
435 u_short fs_selector;
436 u_short gs_selector;
437 u_short ldt_selector;
439 unsigned long host_cr2;
440 unsigned long host_dr6;
441 unsigned long host_dr7;
443 struct system_segment_descriptor *tss_desc;
444 u_int64_t sel;
446 struct vmcb *vmcb;
448 printf("begin fkvm_vcpu_run\n");
450 vmcb = vcpu->vmcb;
452 if (vmrun_assert(vmcb))
453 return;
455 tss_desc = (struct system_segment_descriptor*) (&gdt[GPROC0_SEL]);
456 sel = GSEL(GPROC0_SEL, SEL_KPL);
458 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
459 // print_tss_desc(tss_desc);
460 // print_tss(tss_desc);
462 print_vmcb_save_area(vmcb);
463 printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu->regs[VCPU_REGS_RIP]);
464 // disable_intr();
466 vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
467 vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
468 vmcb->save.rip = vcpu->regs[VCPU_REGS_RIP];
470 /* meh: kvm has pre_svm_run(svm); */
472 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
473 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
475 fs_selector = rfs();
476 gs_selector = rgs();
477 ldt_selector = rldt();
478 // printf("fs selector: %hx\n", fs_selector);
479 // printf("gs selector: %hx\n", gs_selector);
480 // printf("ldt selector: %hx\n", ldt_selector);
482 host_cr2 = rcr2();
484 host_dr6 = rdr6();
485 host_dr7 = rdr7();
487 vmcb->save.cr2 = vcpu->cr2;
488 /* meh: cr3? */
490 /* meh: dr7? db_regs? */
492 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
493 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
494 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
495 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
497 star = rdmsr(MSR_STAR);
498 lstar = rdmsr(MSR_LSTAR);
499 cstar = rdmsr(MSR_CSTAR);
500 sfmask = rdmsr(MSR_SF_MASK);
502 printf("CLGI...\n");
504 __asm __volatile (SVM_CLGI);
507 // enable_intr();
509 __asm __volatile (
510 "push %%rbp; \n\t"
511 "mov %c[rbx](%[svm]), %%rbx \n\t"
512 "mov %c[rcx](%[svm]), %%rcx \n\t"
513 "mov %c[rdx](%[svm]), %%rdx \n\t"
514 "mov %c[rsi](%[svm]), %%rsi \n\t"
515 "mov %c[rdi](%[svm]), %%rdi \n\t"
516 "mov %c[rbp](%[svm]), %%rbp \n\t"
517 "mov %c[r8](%[svm]), %%r8 \n\t"
518 "mov %c[r9](%[svm]), %%r9 \n\t"
519 "mov %c[r10](%[svm]), %%r10 \n\t"
520 "mov %c[r11](%[svm]), %%r11 \n\t"
521 "mov %c[r12](%[svm]), %%r12 \n\t"
522 "mov %c[r13](%[svm]), %%r13 \n\t"
523 "mov %c[r14](%[svm]), %%r14 \n\t"
524 "mov %c[r15](%[svm]), %%r15 \n\t"
526 /* Enter guest mode */
527 "push %%rax \n\t"
528 "mov %c[vmcb](%[svm]), %%rax \n\t"
529 SVM_VMLOAD "\n\t"
530 SVM_VMRUN "\n\t"
531 SVM_VMSAVE "\n\t"
532 "pop %%rax \n\t"
534 /* Save guest registers, load host registers */
535 "mov %%rbx, %c[rbx](%[svm]) \n\t"
536 "mov %%rcx, %c[rcx](%[svm]) \n\t"
537 "mov %%rdx, %c[rdx](%[svm]) \n\t"
538 "mov %%rsi, %c[rsi](%[svm]) \n\t"
539 "mov %%rdi, %c[rdi](%[svm]) \n\t"
540 "mov %%rbp, %c[rbp](%[svm]) \n\t"
541 "mov %%r8, %c[r8](%[svm]) \n\t"
542 "mov %%r9, %c[r9](%[svm]) \n\t"
543 "mov %%r10, %c[r10](%[svm]) \n\t"
544 "mov %%r11, %c[r11](%[svm]) \n\t"
545 "mov %%r12, %c[r12](%[svm]) \n\t"
546 "mov %%r13, %c[r13](%[svm]) \n\t"
547 "mov %%r14, %c[r14](%[svm]) \n\t"
548 "mov %%r15, %c[r15](%[svm]) \n\t"
549 "pop %%rbp"
551 : [svm]"a"(vcpu),
552 [vmcb]"i"(offsetof(struct vcpu, vmcb_pa)),
553 [rbx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBX])),
554 [rcx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RCX])),
555 [rdx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDX])),
556 [rsi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RSI])),
557 [rdi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDI])),
558 [rbp]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBP])),
559 [r8 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R8 ])),
560 [r9 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R9 ])),
561 [r10]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R10])),
562 [r11]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R11])),
563 [r12]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R12])),
564 [r13]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R13])),
565 [r14]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R14])),
566 [r15]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R15]))
567 : "cc", "memory",
568 "rbx", "rcx", "rdx", "rsi", "rdi",
569 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
573 /* meh: dr7? db_regs? */
575 vcpu->cr2 = vmcb->save.cr2;
577 vcpu->regs[VCPU_REGS_RAX] = vmcb->save.rax;
578 vcpu->regs[VCPU_REGS_RSP] = vmcb->save.rsp;
579 vcpu->regs[VCPU_REGS_RIP] = vmcb->save.rip;
581 load_dr6(host_dr6);
582 load_dr7(host_dr7);
584 load_cr2(host_cr2);
586 load_fs(fs_selector);
587 load_gs(gs_selector);
588 lldt(ldt_selector);
590 wrmsr(MSR_GSBASE, vcpu->host_gs_base);
592 tss_desc->sd_type = SDT_SYSTSS;
593 ltr(sel);
595 wrmsr(MSR_STAR, star);
596 wrmsr(MSR_LSTAR, lstar);
597 wrmsr(MSR_CSTAR, cstar);
598 wrmsr(MSR_SF_MASK, sfmask);
600 // disable_intr();
602 __asm __volatile (SVM_STGI);
604 printf("STGI\n");
606 printf("exit_code: %" PRIx64 "\n", vmcb->control.exit_code);
608 // print_tss_desc(tss_desc);
609 // print_tss(tss_desc);
611 print_vmcb_save_area(vmcb);
613 // enable_intr();
615 /* meh: next_rip */
618 static void
619 _fkvm_init_seg(struct vmcb_seg *seg, uint16_t attrib)
621 seg->selector = 0;
622 seg->attrib = VMCB_SELECTOR_P_MASK | attrib;
623 seg->limit = 0xffff;
624 seg->base = 0;
627 static inline void
628 fkvm_init_seg(struct vmcb_seg *seg)
630 _fkvm_init_seg(seg, VMCB_SELECTOR_S_MASK | VMCB_SELECTOR_WRITE_MASK);
633 static inline void
634 fkvm_init_sys_seg(struct vmcb_seg *seg, uint16_t attrib)
636 _fkvm_init_seg(seg, attrib);
639 static void*
640 fkvm_iopm_alloc(void)
642 return contigmalloc(IOPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
645 static void
646 fkvm_iopm_init(void *iopm)
648 memset(iopm, 0xff, IOPM_SIZE); /* TODO: we may want to allow access to PC debug port */
651 static void
652 fkvm_iopm_free(void *iopm)
654 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
657 static void*
658 fkvm_msrpm_alloc(void)
660 return contigmalloc(MSRPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
663 static void
664 fkvm_msrpm_init(void *msrpm)
666 memset(msrpm, 0xff, MSRPM_SIZE); /* TODO: we may want to allow some MSR accesses */
669 static void
670 fkvm_msrpm_free(void *msrpm)
672 contigfree(msrpm, MSRPM_SIZE, M_DEVBUF);
675 static void*
676 fkvm_hsave_area_alloc(void)
678 return contigmalloc(PAGE_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
681 static void
682 fkvm_hsave_area_init(void *hsave_area)
686 static void
687 fkvm_hsave_area_free(void *hsave_area)
689 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
692 static struct vmspace*
693 fkvm_make_vmspace(void)
695 struct vmspace *sp;
697 sp = vmspace_alloc(0, 0xffffffffffffffff);
698 if (sp == NULL) {
699 printf("vmspace_alloc failed\n");
700 return NULL;
703 return sp;
706 static void
707 fkvm_destroy_vmspace(struct vmspace* sp)
709 vmspace_free(sp);
712 static struct vmcb*
713 fkvm_vmcb_alloc(void)
715 return contigmalloc(PAGE_SIZE, M_DEVBUF, M_ZERO, 0, -1UL,
716 PAGE_SIZE, 0);
719 static void
720 fkvm_vmcb_init(struct vmcb *vmcb)
722 struct vmcb_control_area *control = &vmcb->control;
723 struct vmcb_save_area *save = &vmcb->save;
725 control->intercept_cr_reads = INTERCEPT_CR4_MASK;
727 control->intercept_cr_writes = INTERCEPT_CR4_MASK |
728 INTERCEPT_CR8_MASK;
730 control->intercept_dr_reads = INTERCEPT_DR0_MASK |
731 INTERCEPT_DR1_MASK |
732 INTERCEPT_DR2_MASK |
733 INTERCEPT_DR3_MASK;
735 control->intercept_dr_writes = INTERCEPT_DR0_MASK |
736 INTERCEPT_DR1_MASK |
737 INTERCEPT_DR2_MASK |
738 INTERCEPT_DR3_MASK |
739 INTERCEPT_DR5_MASK |
740 INTERCEPT_DR7_MASK;
742 control->intercept_exceptions = (1 << IDT_UD) | // Invalid Opcode
743 (1 << IDT_MC); // Machine Check
745 control->intercepts = INTERCEPT_INTR |
746 INTERCEPT_NMI |
747 INTERCEPT_SMI |
748 INTERCEPT_CPUID |
749 INTERCEPT_INVD |
750 INTERCEPT_HLT |
751 INTERCEPT_INVLPGA |
752 INTERCEPT_IOIO_PROT |
753 INTERCEPT_MSR_PROT |
754 INTERCEPT_SHUTDOWN |
755 INTERCEPT_VMRUN |
756 INTERCEPT_VMMCALL |
757 INTERCEPT_VMLOAD |
758 INTERCEPT_VMSAVE |
759 INTERCEPT_STGI |
760 INTERCEPT_CLGI |
761 INTERCEPT_SKINIT |
762 INTERCEPT_WBINVD |
763 INTERCEPT_MONITOR |
764 INTERCEPT_MWAIT_UNCOND;
766 control->iopm_base_pa = vtophys(iopm);
767 control->msrpm_base_pa = vtophys(msrpm);
768 control->tsc_offset = 0;
770 /* TODO: remove this once we assign asid's to distinct VM's */
771 control->guest_asid = 1;
772 control->tlb_control = VMCB_TLB_CONTROL_FLUSH_ALL;
774 /* let v_tpr default to 0 */
775 /* let v_irq default to 0 */
776 /* let v_intr default to 0 */
778 control->v_intr_masking = 1;
780 /* let v_intr_vector default to 0 */
781 /* let intr_shadow default to 0 */
782 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
783 exit_int_info_err_code default to 0 */
785 control->nested_ctl = 1;
787 /* let event_inj default to 0 */
789 // (nested_cr3 is later)
791 /* let lbr_virt_enable default to 0 */
794 fkvm_init_seg(&save->ds);
795 fkvm_init_seg(&save->es);
796 fkvm_init_seg(&save->fs);
797 fkvm_init_seg(&save->gs);
798 fkvm_init_seg(&save->ss);
800 _fkvm_init_seg(&save->cs, VMCB_SELECTOR_READ_MASK | VMCB_SELECTOR_S_MASK |
801 VMCB_SELECTOR_CODE_MASK);
802 save->cs.selector = 0xf000;
803 save->cs.base = 0xffff0000;
805 save->gdtr.limit = 0xffff;
806 save->idtr.limit = 0xffff;
808 fkvm_init_sys_seg(&save->ldtr, SDT_SYSLDT);
809 fkvm_init_sys_seg(&save->tr, SDT_SYS286BSY);
811 save->g_pat = PAT_VALUE(PAT_WRITE_BACK, 0) | PAT_VALUE(PAT_WRITE_THROUGH, 1) |
812 PAT_VALUE(PAT_UNCACHED, 2) | PAT_VALUE(PAT_UNCACHEABLE, 3) |
813 PAT_VALUE(PAT_WRITE_BACK, 4) | PAT_VALUE(PAT_WRITE_THROUGH, 5) |
814 PAT_VALUE(PAT_UNCACHED, 6) | PAT_VALUE(PAT_UNCACHEABLE, 7);
816 /* CR0 = 6000_0010h at boot */
817 save->cr0 = CR0_ET | CR0_NW | CR0_CD;
818 save->dr6 = 0xffff0ff0;
819 save->dr7 = 0x400;
820 save->rflags = 2;
821 save->rip = 0x0000fff0;
823 save->efer = EFER_SVME;
826 static void
827 fkvm_vmcb_free(struct vmcb *vmcb)
829 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
832 static struct vcpu*
833 fkvm_vcpu_create(struct guestvm *guest_vm)
835 struct vcpu *vcpu;
836 vcpu = malloc(sizeof(struct vcpu), M_DEVBUF, M_WAITOK|M_ZERO);
838 vcpu->vmcb = fkvm_vmcb_alloc();
839 vcpu->vmcb_pa = vtophys(vcpu->vmcb);
840 printf("vmcb = 0x%p\n", vcpu->vmcb);
841 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu->vmcb_pa);
843 fkvm_vmcb_init(vcpu->vmcb);
844 vcpu->vmcb->control.nested_cr3 = guest_vm->nested_cr3;
845 vcpu->regs[VCPU_REGS_RIP] = vcpu->vmcb->save.rip;
847 vcpu->guest_vm = guest_vm;
849 return vcpu;
852 static void
853 fkvm_vcpu_destroy(struct vcpu *vcpu)
855 fkvm_vmcb_free(vcpu->vmcb);
856 free(vcpu, M_DEVBUF);
859 static struct guestvm*
860 fkvm_guestvm_alloc(void)
862 return malloc(sizeof(struct guestvm), M_DEVBUF, M_WAITOK|M_ZERO);
865 static void
866 fkvm_guestvm_free(struct guestvm* guest_vm)
868 free(guest_vm, M_DEVBUF);
871 static void
872 fkvm_guestvm_add_vcpu(struct guestvm *guest_vm, struct vcpu *vcpu)
874 guest_vm->vcpus[guest_vm->nr_vcpus] = vcpu;
875 guest_vm->nr_vcpus++; /* TODO: Probably not safe to increment */
876 /* How about a lock to protect all of this? */
881 fkvm_userpoke(struct thread *td, struct fkvm_userpoke_args *uap)
883 printf("fkvm_userpoke\n");
884 return ENOSYS;
887 static int
888 fkvm_mem_has_entry(vm_map_entry_t expected_entry, vm_map_t vm_map, vm_offset_t vaddr)
890 vm_map_entry_t lookup_entry;
891 vm_object_t throwaway_object;
892 vm_pindex_t throwaway_pindex;
893 vm_prot_t throwaway_prot;
894 boolean_t throwaway_wired;
895 int error;
897 error = vm_map_lookup(&vm_map, /* IN/OUT */
898 vaddr,
899 VM_PROT_READ|VM_PROT_WRITE,
900 &lookup_entry, /* OUT */
901 &throwaway_object, /* OUT */
902 &throwaway_pindex, /* OUT */
903 &throwaway_prot, /* OUT */
904 &throwaway_wired); /* OUT */
905 if (error != KERN_SUCCESS)
906 return 0;
907 vm_map_lookup_done(vm_map, lookup_entry);
908 return (lookup_entry == expected_entry);
911 static int
912 fkvm_guest_check_range(struct guestvm *guest_vm, uint64_t start, uint64_t end)
914 vm_map_t guest_vm_map;
915 vm_map_entry_t lookup_entry;
916 vm_object_t throwaway_object;
917 vm_pindex_t throwaway_pindex;
918 vm_prot_t throwaway_prot;
919 boolean_t throwaway_wired;
920 int ret;
921 int error;
923 guest_vm_map = &guest_vm->sp->vm_map;
925 error = vm_map_lookup(&guest_vm_map, /* IN/OUT */
926 start,
927 VM_PROT_READ|VM_PROT_WRITE,
928 &lookup_entry, /* OUT */
929 &throwaway_object, /* OUT */
930 &throwaway_pindex, /* OUT */
931 &throwaway_prot, /* OUT */
932 &throwaway_wired); /* OUT */
933 if (error != KERN_SUCCESS)
934 return EFAULT;
936 if (fkvm_mem_has_entry(lookup_entry, guest_vm_map, end))
937 ret = 0;
938 else
939 ret = EFAULT;
941 vm_map_lookup_done(guest_vm_map, lookup_entry);
942 return ret;
945 /* System Calls */
946 /* This function can only be called with multiples of page sizes */
947 /* vaddr as NULL overloads to fkvm_guest_check_range */
949 fkvm_set_user_mem_region(struct thread *td, struct fkvm_set_user_mem_region_args *uap)
951 struct guestvm *guest_vm = GET_GUESTVM(td);
953 vm_offset_t start;
954 vm_offset_t end;
956 struct vmspace *user_vm_space;
957 vm_map_t user_vm_map;
959 vm_object_t vm_object;
960 vm_pindex_t vm_object_pindex;
961 vm_ooffset_t vm_object_offset;
962 vm_prot_t throwaway_prot;
963 boolean_t throwaway_wired;
964 vm_map_entry_t lookup_entry;
966 int error;
968 start = uap->guest_pa;
969 end = uap->guest_pa + uap->size - 1;
970 printf("start: 0x%" PRIx64 " bytes\n", start);
971 printf("end: 0x%" PRIx64 " bytes\n", end);
973 if (uap->vaddr == 0)
974 return fkvm_guest_check_range(guest_vm, start, end);
976 user_vm_space = td->td_proc->p_vmspace;
977 user_vm_map = &user_vm_space->vm_map;
978 printf("user vm space: %p\n", user_vm_space);
979 printf("user vm map: %p\n", user_vm_map);
981 error = vm_map_lookup(&user_vm_map, /* IN/OUT */
982 uap->vaddr,
983 VM_PROT_READ|VM_PROT_WRITE,
984 &lookup_entry, /* OUT */
985 &vm_object, /* OUT */
986 &vm_object_pindex, /* OUT */
987 &throwaway_prot, /* OUT */
988 &throwaway_wired); /* OUT */
989 if (error != KERN_SUCCESS) {
990 printf("vm_map_lookup failed: %d\n", error);
991 return EFAULT;
994 if (!fkvm_mem_has_entry(lookup_entry, user_vm_map, uap->vaddr + uap->size)) {
995 printf("end of range not contained in same vm map entry as start\n");
996 return EFAULT;
999 printf("vm object: %p\n", vm_object);
1000 printf(" size: %d pages\n", (int) vm_object->size);
1002 vm_object_offset = IDX_TO_OFF(vm_object_pindex);
1003 printf("vm_ooffset: 0x%" PRIx64 "\n", vm_object_offset);
1005 vm_object_reference(vm_object); // TODO: this might be a mem leak
1007 vm_map_lookup_done(user_vm_map, lookup_entry);
1009 error = vm_map_insert(&guest_vm->sp->vm_map,
1010 vm_object,
1011 vm_object_offset,
1012 start,
1013 end,
1014 VM_PROT_ALL, VM_PROT_ALL,
1016 if (error != KERN_SUCCESS) {
1017 printf("vm_map_insert failed: %d\n", error);
1018 switch (error) {
1019 case KERN_INVALID_ADDRESS:
1020 return EINVAL;
1021 case KERN_NO_SPACE:
1022 return ENOMEM;
1023 default:
1024 return 1;
1028 return 0;
1032 fkvm_unset_user_mem_region(struct thread *td, struct fkvm_unset_user_mem_region_args *uap)
1034 struct guestvm *guest_vm = GET_GUESTVM(td);
1036 vm_offset_t start;
1037 vm_offset_t end;
1039 vm_map_t guest_vm_map;
1041 int error;
1043 start = uap->guest_pa;
1044 end = uap->guest_pa + uap->size - 1;
1045 printf("start: 0x%" PRIx64 " bytes\n", start);
1046 printf("end: 0x%" PRIx64 " bytes\n", end);
1048 guest_vm_map = &guest_vm->sp->vm_map;
1050 error = vm_map_remove(guest_vm_map, start, end);
1051 if (error != KERN_SUCCESS)
1052 return -1;
1054 return 0;
1058 fkvm_create_vm(struct thread *td, struct fkvm_create_vm_args *uap)
1060 struct vcpu *vcpu;
1061 struct guestvm *guest_vm;
1063 printf("SYSCALL : fkvm_create_vm\n");
1065 /* Allocate Guest VM */
1066 guest_vm = fkvm_guestvm_alloc();
1068 /* Set up the vm address space */
1069 guest_vm->sp = fkvm_make_vmspace();
1070 if (guest_vm->sp == NULL) {
1071 fkvm_guestvm_free(guest_vm);
1072 return ENOMEM;
1074 guest_vm->nested_cr3 = vtophys(vmspace_pmap(guest_vm->sp)->pm_pml4);
1076 printf("guest:\n");
1077 printf(" vm space: %p\n", guest_vm->sp);
1078 printf(" vm map: %p\n", &guest_vm->sp->vm_map);
1079 printf(" ncr3: 0x%" PRIx64 "\n", guest_vm->nested_cr3);
1081 /* Allocate VCPU0 */
1082 vcpu = fkvm_vcpu_create(guest_vm);
1083 fkvm_guestvm_add_vcpu(guest_vm, vcpu);
1085 PROC_SET_GUESTVM(td->td_proc, guest_vm);
1086 TD_SET_VCPU(td, vcpu);
1088 printf("fkvm_create_vm done\n");
1089 return 0;
1093 fkvm_destroy_vm(struct thread *td, struct fkvm_destroy_vm_args *uap)
1095 struct guestvm *guest_vm = GET_GUESTVM(td);
1097 /* Destroy the VCPUs */
1098 while (guest_vm->nr_vcpus > 0) {
1099 guest_vm->nr_vcpus--;
1100 fkvm_vcpu_destroy(guest_vm->vcpus[guest_vm->nr_vcpus]);
1101 guest_vm->vcpus[guest_vm->nr_vcpus] = NULL;
1104 /* Destroy the vmspace */
1105 fkvm_destroy_vmspace(guest_vm->sp);
1107 /* Destroy the Guest VM itself */
1108 fkvm_guestvm_free(guest_vm);
1110 return 0;
1114 fkvm_vm_run(struct thread *td, struct fkvm_vm_run_args *uap)
1116 struct vcpu *vcpu = TD_GET_VCPU(td);
1117 struct guestvm *guest_vm = GET_GUESTVM(td);
1118 struct vmcb *vmcb = vcpu->vmcb;
1119 int ret = 0;
1121 fkvm_vcpu_run(vcpu);
1123 switch (vmcb->control.exit_code) {
1125 case VMCB_EXIT_EXCP_BASE ... (VMCB_EXIT_EXCP_BASE + 31): {
1126 int excp_vector;
1128 excp_vector = vmcb->control.exit_code - VMCB_EXIT_EXCP_BASE;
1130 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
1131 excp_vector);
1132 ret = ENOSYS;
1133 break;
1136 case VMCB_EXIT_INTR: {
1137 printf("VMCB_EXIT_INTR - nothing to do\n");
1138 break;
1141 case VMCB_EXIT_NPF: {
1142 /* EXITINFO1 contains fault error code */
1143 /* EXITINFO2 contains the guest physical address causing the fault. */
1145 u_int64_t fault_code;
1146 u_int64_t fault_gpa;
1148 vm_prot_t fault_type;
1149 int fault_flags;
1150 int rc;
1152 fault_code = vmcb->control.exit_info_1;
1153 fault_gpa = vmcb->control.exit_info_2;
1155 printf("VMCB_EXIT_NPF:\n");
1156 printf("gpa=0x%" PRIx64 "\n", fault_gpa);
1157 printf("fault code=0x%" PRIx64 " [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
1158 fault_code,
1159 (fault_code & PGEX_P) != 0,
1160 (fault_code & PGEX_W) != 0,
1161 (fault_code & PGEX_U) != 0,
1162 (fault_code & PGEX_I) != 0);
1164 if (fault_code & PGEX_W)
1165 fault_type = VM_PROT_WRITE;
1166 else if (fault_code & PGEX_I)
1167 fault_type = VM_PROT_EXECUTE;
1168 else
1169 fault_type = VM_PROT_READ;
1171 fault_flags = 0; /* TODO: is that right? */
1172 rc = vm_fault(&guest_vm->sp->vm_map, (fault_gpa & (~PAGE_MASK)), fault_type, fault_flags);
1173 if (rc != KERN_SUCCESS) {
1174 printf("vm_fault failed: %d\n", rc);
1176 break;
1178 default:
1179 printf("Unhandled vmexit:\n"
1180 " code: 0x%" PRIx64 "\n"
1181 " info1: 0x%" PRIx64 "\n"
1182 " info2: 0x%" PRIx64 "\n",
1183 vmcb->control.exit_code,
1184 vmcb->control.exit_info_1,
1185 vmcb->control.exit_info_2);
1186 print_vmcb(vmcb);
1187 ret = ENOSYS;
1190 printf("\n\n");
1192 return ret;
1196 fkvm_create_vcpu(struct thread *td, struct fkvm_create_vcpu_args *uap)
1198 struct guestvm *guest_vm = PROC_GET_GUESTVM(td->td_proc);
1199 struct vcpu *vcpu;
1201 /* Allocate VCPU */
1202 vcpu = fkvm_vcpu_create(guest_vm);
1203 fkvm_guestvm_add_vcpu(guest_vm, vcpu);
1205 TD_SET_VCPU(td, vcpu);
1206 return 0;
1209 static void
1210 fkvm_load(void *unused)
1212 u_int64_t efer;
1214 printf("fkvm_load\n");
1215 printf("sizeof(struct vmcb) = %" PRIx64 "\n", sizeof(struct vmcb));
1217 hsave_area = NULL;
1218 iopm = NULL;
1219 msrpm = NULL;
1221 /* TODO: check for the presense of extensions */
1223 /* allocate structures */
1224 hsave_area = fkvm_hsave_area_alloc();
1225 iopm = fkvm_iopm_alloc();
1226 msrpm = fkvm_msrpm_alloc();
1228 /* Initialize structures */
1229 fkvm_hsave_area_init(hsave_area);
1230 fkvm_iopm_init(iopm);
1231 fkvm_msrpm_init(msrpm);
1233 /* Enable SVM in EFER */
1234 efer = rdmsr(MSR_EFER);
1235 printf("EFER = %" PRIx64 "\n", efer);
1236 wrmsr(MSR_EFER, efer | EFER_SVME);
1237 efer = rdmsr(MSR_EFER);
1238 printf("new EFER = %" PRIx64 "\n", efer);
1240 /* Write Host save address in MSR_VM_HSAVE_PA */
1241 wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave_area));
1243 SYSINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_load, NULL);
1245 static void
1246 fkvm_unload(void *unused)
1248 printf("fkvm_unload\n");
1250 if (msrpm != NULL) {
1251 fkvm_msrpm_free(iopm);
1252 msrpm = NULL;
1254 if (iopm != NULL) {
1255 fkvm_iopm_free(iopm);
1256 iopm = NULL;
1258 if (hsave_area != NULL) {
1259 fkvm_hsave_area_free(hsave_area);
1260 hsave_area = NULL;
1263 SYSUNINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_unload, NULL);