stub get/set sregs
[freebsd-src/fkvm-freebsd.git] / sys / kern / kern_fkvm.c
blob8ae11b88abc8938d50606175a793391aed193789
1 /*-
2 * Copyright (c) 2008 The FreeBSD Project
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
32 #include <sys/sysproto.h>
33 #include <sys/file.h>
34 #include <sys/mman.h>
35 #include <sys/proc.h>
36 #include <vm/vm.h>
37 #include <vm/pmap.h>
38 #include <vm/vm_extern.h>
39 #include <vm/vm_map.h>
40 #include <vm/vm_object.h>
41 #include <vm/vm_param.h>
42 #include <machine/_inttypes.h>
43 #include <machine/specialreg.h>
44 #include <machine/segments.h>
45 #include <machine/vmcb.h>
47 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
48 #define MSRPM_SIZE (8*1024)
50 /* fkvm data */
51 static void *iopm = NULL; /* Should I allocate a vm_object_t instead? */
52 static void *msrpm = NULL; /* Should I allocate a vm_object_t instead? */
54 static void *hsave_area = NULL;
56 /* per-guest data */
58 enum {
59 VCPU_REGS_RAX = 0,
60 VCPU_REGS_RCX = 1,
61 VCPU_REGS_RDX = 2,
62 VCPU_REGS_RBX = 3,
63 VCPU_REGS_RSP = 4,
64 VCPU_REGS_RBP = 5,
65 VCPU_REGS_RSI = 6,
66 VCPU_REGS_RDI = 7,
67 VCPU_REGS_R8 = 8,
68 VCPU_REGS_R9 = 9,
69 VCPU_REGS_R10 = 10,
70 VCPU_REGS_R11 = 11,
71 VCPU_REGS_R12 = 12,
72 VCPU_REGS_R13 = 13,
73 VCPU_REGS_R14 = 14,
74 VCPU_REGS_R15 = 15,
75 VCPU_REGS_RIP,
76 NR_VCPU_REGS
79 struct vcpu {
80 /* VCPU data */
81 struct vmcb *vmcb;
82 unsigned long vmcb_pa;
84 unsigned long regs[NR_VCPU_REGS];
85 u_int64_t host_gs_base;
86 u_int64_t cr2;
87 u_int64_t cr3;
89 struct guestvm *guest_vm;
92 #define MAX_VCPUS 8
94 struct guestvm {
95 struct vcpu *vcpus[MAX_VCPUS];
96 int nr_vcpus;
98 struct vmspace *sp;
99 u_int64_t nested_cr3;
103 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
104 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
105 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
106 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
107 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
108 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
110 #define TD_GET_VCPU(td) td->vcpu
112 #define TD_SET_VCPU(td, vcpu) do { \
113 td->vcpu = vcpu; \
114 } while (0)
116 #define PROC_GET_GUESTVM(p) p->p_guestvm
118 #define PROC_SET_GUESTVM(p, guestvm) do { \
119 p->p_guestvm = guestvm; \
120 } while (0)
122 static void
123 print_vmcb_seg(struct vmcb_seg* vmcb_seg, const char* name)
125 printf("%s Selector\n", name);
126 printf("Selector : %" PRIx16 "\n", vmcb_seg->selector);
127 printf("Attributes : %" PRIx16 "\n", vmcb_seg->attrib);
128 printf("Limit : %" PRIx32 "\n", vmcb_seg->limit);
129 printf("Base Address : %" PRIx64 "\n", vmcb_seg->base);
130 printf("\n");
133 static void
134 print_vmcb(struct vmcb *vmcb)
136 printf("VMCB Control Area\n");
137 printf("Intercept CR Reads : %" PRIx16 "\n", vmcb->control.intercept_cr_reads);
138 printf("Intercept CR Writes : %" PRIx16 "\n", vmcb->control.intercept_cr_writes);
139 printf("Intercept DR Reads : %" PRIx16 "\n", vmcb->control.intercept_dr_reads);
140 printf("Intercept DR Writes : %" PRIx16 "\n", vmcb->control.intercept_dr_writes);
141 printf("Intercept Exceptions : %" PRIx32 "\n", vmcb->control.intercept_exceptions);
142 printf("Intercepts : %" PRIx64 "\n", vmcb->control.intercepts);
143 printf("Reserved 1: \n");
144 for(int i=0; i < 44; i++) {
145 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
147 printf("\n");
148 printf("IOPM Base PA : %" PRIx64 "\n", vmcb->control.iopm_base_pa);
149 printf("MSRPM Base PA : %" PRIx64 "\n", vmcb->control.msrpm_base_pa);
150 printf("TSC Offset : %" PRIx64 "\n", vmcb->control.tsc_offset);
151 printf("Guest ASID : %" PRIx32 "\n", vmcb->control.guest_asid);
152 printf("TLB Control : %" PRIx8 "\n", vmcb->control.tlb_control);
153 printf("Reserved 2 : \n");
154 for(int i=0; i < 3; i++) {
155 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
157 printf("\n");
158 printf("Virtual TPR : %" PRIx8 "\n", vmcb->control.v_tpr);
159 printf("Virtual IRQ : %" PRIx8 "\n", vmcb->control.v_irq);
160 printf("Virtual Interrupt : %" PRIx8 "\n", vmcb->control.v_intr);
161 printf("Virtual Interrupt Masking: %" PRIx8 "\n", vmcb->control.v_intr_masking);
162 printf("Virtual Interrupt Vector : %" PRIx8 "\n", vmcb->control.v_intr_vector);
163 printf("Reserved 6 : \n");
164 for(int i=0; i < 3; i++) {
165 printf("%" PRIx8 "", vmcb->control.reserved_6[i]); /* Should be Zero */
167 printf("\n");
168 printf("Interrupt Shadow : %" PRIx8 "\n", vmcb->control.intr_shadow);
169 printf("Reserved 7 : \n");
170 for(int i=0; i < 7; i++) {
171 printf("%" PRIx8 "", vmcb->control.reserved_7[i]); /* Should be Zero */
173 printf("\n");
174 printf("Exit Code : %" PRIx64 "\n", vmcb->control.exit_code);
175 printf("Exit Info 1 : %" PRIx64 "\n", vmcb->control.exit_info_1);
176 printf("Exit Info 2 : %" PRIx64 "\n", vmcb->control.exit_info_2);
177 printf("Exit Interrupt Info : %" PRIx32 "\n", vmcb->control.exit_int_info);
178 printf("Exit Interrupt Info Err Code: %" PRIx32 "\n", vmcb->control.exit_int_info_err_code);
179 printf("Nested Control : %" PRIx64 "\n", vmcb->control.nested_ctl);
180 printf("Reserved 8 : \n");
181 for(int i=0; i < 16; i++) {
182 printf("%" PRIx8 "", vmcb->control.reserved_8[i]); /* Should be Zero */
184 printf("\n");
185 printf("Event Injection : %" PRIx64 "\n", vmcb->control.event_inj);
186 printf("Nested CR3 : %" PRIx64 "\n", vmcb->control.nested_cr3);
187 printf("LBR Virtualization Enable: %" PRIx64 "\n", vmcb->control.lbr_virt_enable);
188 printf("Reserved 9 : \n");
189 for(int i=0; i < 832; i++) {
190 printf("%" PRIx8 "", vmcb->control.reserved_9[i]); /* Should be Zero */
192 printf("\n");
194 printf("\n");
196 printf("VMCB Save Area\n");
197 print_vmcb_seg(&(vmcb->save.es), "ES");
198 print_vmcb_seg(&(vmcb->save.es), "CS");
199 print_vmcb_seg(&(vmcb->save.es), "SS");
200 print_vmcb_seg(&(vmcb->save.es), "DS");
201 print_vmcb_seg(&(vmcb->save.es), "FS");
202 print_vmcb_seg(&(vmcb->save.es), "GS");
203 print_vmcb_seg(&(vmcb->save.es), "GDTR");
204 print_vmcb_seg(&(vmcb->save.es), "LDTR");
205 print_vmcb_seg(&(vmcb->save.es), "IDTR");
206 print_vmcb_seg(&(vmcb->save.es), "TR");
207 printf("Reserved 1 : \n");
208 for(int i=0; i < 43; i++) {
209 printf("%" PRIx8 "", vmcb->save.reserved_1[i]); /* Should be Zero */
211 printf("\n");
212 printf("Current Processor Level : %" PRIx8 "\n", vmcb->save.cpl);
213 printf("Reserved 2 : \n");
214 for(int i=0; i < 4; i++) {
215 printf("%" PRIx8 "", vmcb->save.reserved_2[i]); /* Should be Zero */
217 printf("\n");
218 printf("EFER : %" PRIx64 "\n", vmcb->save.efer);
219 printf("Reserved 3 : \n");
220 for(int i=0; i < 112; i++) {
221 printf("%" PRIx8 "", vmcb->save.reserved_3[i]); /* Should be Zero */
223 printf("\n");
224 printf("Control Register 4 : %" PRIx64 "\n", vmcb->save.cr4);
225 printf("Control Register 3 : %" PRIx64 "\n", vmcb->save.cr3);
226 printf("Control Register 0 : %" PRIx64 "\n", vmcb->save.cr0);
227 printf("Debug Register 7 : %" PRIx64 "\n", vmcb->save.dr7);
228 printf("Debug Register 6 : %" PRIx64 "\n", vmcb->save.dr6);
229 printf("RFlags : %" PRIx64 "\n", vmcb->save.rflags);
230 printf("RIP : %" PRIx64 "\n", vmcb->save.rip);
231 printf("Reserved 4 : \n");
232 for(int i=0; i < 88; i++) {
233 printf("%" PRIx8 "", vmcb->save.reserved_4[i]); /* Should be Zero */
235 printf("\n");
236 printf("RSP : %" PRIx64 "\n", vmcb->save.rsp);
237 printf("Reserved 5 : \n");
238 for(int i=0; i < 24; i++) {
239 printf("%" PRIx8 "", vmcb->save.reserved_5[i]); /* Should be Zero */
241 printf("\n");
242 printf("RAX : %" PRIx64 "\n", vmcb->save.rax);
243 printf("STAR : %" PRIx64 "\n", vmcb->save.star);
244 printf("LSTAR : %" PRIx64 "\n", vmcb->save.lstar);
245 printf("CSTAR : %" PRIx64 "\n", vmcb->save.cstar);
246 printf("SFMASK : %" PRIx64 "\n", vmcb->save.sfmask);
247 printf("Kernel GS Base : %" PRIx64 "\n", vmcb->save.kernel_gs_base);
248 printf("SYSENTER CS : %" PRIx64 "\n", vmcb->save.sysenter_cs);
249 printf("SYSENTER ESP : %" PRIx64 "\n", vmcb->save.sysenter_esp);
250 printf("SYSENTER EIP : %" PRIx64 "\n", vmcb->save.sysenter_eip);
251 printf("Control Register 2 : %" PRIx64 "\n", vmcb->save.cr2);
252 printf("Reserved 6 : \n");
253 for(int i=0; i < 32; i++) {
254 printf("%" PRIx8 "", vmcb->save.reserved_6[i]); /* Should be Zero */
256 printf("\n");
257 printf("Global PAT : %" PRIx64 "\n", vmcb->save.g_pat);
258 printf("Debug Control : %" PRIx64 "\n", vmcb->save.dbg_ctl);
259 printf("BR From : %" PRIx64 "\n", vmcb->save.br_from);
260 printf("BR To : %" PRIx64 "\n", vmcb->save.br_to);
261 printf("Last Exception From : %" PRIx64 "\n", vmcb->save.last_excp_from);
262 printf("Last Exception To : %" PRIx64 "\n", vmcb->save.last_excp_to);
264 printf("\n\n");
267 #if 0
268 static void
269 print_tss_desc(struct system_segment_descriptor *tss_desc)
271 printf("TSS desc @ %p:\n", tss_desc);
272 printf("sd_lolimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lolimit);
273 printf("sd_lobase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lobase);
274 printf("sd_type: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_type);
275 printf("sd_dpl: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_dpl);
276 printf("sd_p: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_p);
277 printf("sd_hilimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hilimit);
278 printf("sd_xx0: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx0);
279 printf("sd_gran: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_gran);
280 printf("sd_hibase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hibase);
281 printf("sd_xx1: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx1);
282 printf("sd_mbz: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_mbz);
283 printf("sd_xx2: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx2);
284 printf("\n\n");
287 static void
288 print_tss(struct system_segment_descriptor *tss_desc)
290 u_int32_t *base;
291 int limit;
292 int i;
294 base = (u_int32_t*) ((((u_int64_t) tss_desc->sd_hibase) << 24) | ((u_int64_t) tss_desc->sd_lobase));
295 limit = ((tss_desc->sd_hilimit << 16) | tss_desc->sd_lolimit) / 4;
297 printf("TSS: @ %p\n", base);
298 for (i = 0; i <= limit; i++)
299 printf("%x: 0x%" PRIx32 "\n", i, base[i]);
300 printf("\n\n");
302 #endif
304 static void
305 print_vmcb_save_area(struct vmcb *vmcb)
307 printf("VMCB save area:\n");
308 printf(" cs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
309 vmcb->save.cs.selector,
310 vmcb->save.cs.attrib,
311 vmcb->save.cs.limit,
312 vmcb->save.cs.base);
313 printf(" fs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
314 vmcb->save.fs.selector,
315 vmcb->save.fs.attrib,
316 vmcb->save.fs.limit,
317 vmcb->save.fs.base);
318 printf(" gs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
319 vmcb->save.gs.selector,
320 vmcb->save.gs.attrib,
321 vmcb->save.gs.limit,
322 vmcb->save.gs.base);
323 printf(" tr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
324 vmcb->save.tr.selector,
325 vmcb->save.tr.attrib,
326 vmcb->save.tr.limit,
327 vmcb->save.tr.base);
328 printf(" ldtr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
329 vmcb->save.ldtr.selector,
330 vmcb->save.ldtr.attrib,
331 vmcb->save.ldtr.limit,
332 vmcb->save.ldtr.base);
333 printf(" rip: %" PRIx64 "\n", vmcb->save.rip);
334 printf(" kernel_gs_base: %" PRIx64 "\n", vmcb->save.kernel_gs_base);
335 printf(" star: %" PRIx64 "\n", vmcb->save.star);
336 printf(" lstar: %" PRIx64 "\n", vmcb->save.lstar);
337 printf(" cstar: %" PRIx64 "\n", vmcb->save.cstar);
338 printf(" sfmask: %" PRIx64 "\n", vmcb->save.sfmask);
339 printf(" sysenter_cs: %" PRIx64 "\n", vmcb->save.sysenter_cs);
340 printf(" sysenter_esp: %" PRIx64 "\n", vmcb->save.sysenter_esp);
341 printf(" sysenter_eip: %" PRIx64 "\n", vmcb->save.sysenter_eip);
342 printf("\n\n");
345 static int
346 vmrun_assert(struct vmcb *vmcb)
348 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
350 int bad;
352 bad = 0;
354 // The following are illegal:
356 //EFER.SVME is zero.
357 A((vmcb->save.efer & 0x0000000000001000) == 0);
359 // CR0.CD is zero and CR0.NW is set
360 A( ((vmcb->save.cr0 & 0x0000000040000000) == 0) &&
361 ((vmcb->save.cr0 & 0x0000000020000000) != 0));
363 // CR0[63:32] are not zero.
364 A((vmcb->save.cr0 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
366 // Any MBZ bit of CR3 is set.
367 A((vmcb->save.cr3 & 0xFFF0000000000000) != 0);
369 // CR4[63:11] are not zero.
370 A((vmcb->save.cr4 & 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
372 // DR6[63:32] are not zero.
373 A((vmcb->save.dr6 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
375 // DR7[63:32] are not zero.
376 A((vmcb->save.dr7 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
378 // EFER[63:15] are not zero.
379 A((vmcb->save.efer & 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
381 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
382 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
384 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
385 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
386 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
387 ((vmcb->save.cr4 & 0x0000000000000020) != 0));
389 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
390 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
391 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
392 ((vmcb->save.cr0 & 0x0000000000000001) == 0));
394 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
395 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
396 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
397 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
398 ((vmcb->save.cr4 & 0x0000000000000020) != 0) &&
399 ((vmcb->save.cs.attrib & 0x0200) != 0) &&
400 ((vmcb->save.cs.attrib & 0x0400) != 0));
402 // The VMRUN intercept bit is clear.
403 A((vmcb->control.intercepts & 0x0000000100000000) == 0);
405 // The MSR or IOIO intercept tables extend to a physical address that is
406 // greater than or equal to the maximum supported physical address.
408 // Illegal event injection (see Section 15.19 on page 391).
410 // ASID is equal to zero.
411 A(vmcb->control.guest_asid == 0);
413 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
414 // combination that is otherwise illegal (see Section 15.18).
416 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
417 // sign-extend to 63 bits) all base addresses in the segment registers
418 // that have been loaded.
420 return bad;
422 #undef A
425 static void
426 fkvm_vcpu_run(struct vcpu *vcpu)
428 u_int64_t lstar;
429 u_int64_t cstar;
430 u_int64_t star;
431 u_int64_t sfmask;
433 u_short fs_selector;
434 u_short gs_selector;
435 u_short ldt_selector;
437 unsigned long host_cr2;
438 unsigned long host_dr6;
439 unsigned long host_dr7;
441 struct system_segment_descriptor *tss_desc;
442 u_int64_t sel;
444 struct vmcb *vmcb;
446 printf("begin fkvm_vcpu_run\n");
448 vmcb = vcpu->vmcb;
450 if (vmrun_assert(vmcb))
451 return;
453 tss_desc = (struct system_segment_descriptor*) (&gdt[GPROC0_SEL]);
454 sel = GSEL(GPROC0_SEL, SEL_KPL);
456 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
457 // print_tss_desc(tss_desc);
458 // print_tss(tss_desc);
460 print_vmcb_save_area(vmcb);
461 printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu->regs[VCPU_REGS_RIP]);
462 // disable_intr();
464 vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
465 vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
466 vmcb->save.rip = vcpu->regs[VCPU_REGS_RIP];
468 /* meh: kvm has pre_svm_run(svm); */
470 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
471 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
473 fs_selector = rfs();
474 gs_selector = rgs();
475 ldt_selector = rldt();
476 // printf("fs selector: %hx\n", fs_selector);
477 // printf("gs selector: %hx\n", gs_selector);
478 // printf("ldt selector: %hx\n", ldt_selector);
480 host_cr2 = rcr2();
482 host_dr6 = rdr6();
483 host_dr7 = rdr7();
485 vmcb->save.cr2 = vcpu->cr2;
486 /* meh: cr3? */
488 /* meh: dr7? db_regs? */
490 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
491 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
492 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
493 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
495 star = rdmsr(MSR_STAR);
496 lstar = rdmsr(MSR_LSTAR);
497 cstar = rdmsr(MSR_CSTAR);
498 sfmask = rdmsr(MSR_SF_MASK);
500 printf("CLGI...\n");
502 __asm __volatile (SVM_CLGI);
505 // enable_intr();
507 __asm __volatile (
508 "push %%rbp; \n\t"
509 "mov %c[rbx](%[svm]), %%rbx \n\t"
510 "mov %c[rcx](%[svm]), %%rcx \n\t"
511 "mov %c[rdx](%[svm]), %%rdx \n\t"
512 "mov %c[rsi](%[svm]), %%rsi \n\t"
513 "mov %c[rdi](%[svm]), %%rdi \n\t"
514 "mov %c[rbp](%[svm]), %%rbp \n\t"
515 "mov %c[r8](%[svm]), %%r8 \n\t"
516 "mov %c[r9](%[svm]), %%r9 \n\t"
517 "mov %c[r10](%[svm]), %%r10 \n\t"
518 "mov %c[r11](%[svm]), %%r11 \n\t"
519 "mov %c[r12](%[svm]), %%r12 \n\t"
520 "mov %c[r13](%[svm]), %%r13 \n\t"
521 "mov %c[r14](%[svm]), %%r14 \n\t"
522 "mov %c[r15](%[svm]), %%r15 \n\t"
524 /* Enter guest mode */
525 "push %%rax \n\t"
526 "mov %c[vmcb](%[svm]), %%rax \n\t"
527 SVM_VMLOAD "\n\t"
528 SVM_VMRUN "\n\t"
529 SVM_VMSAVE "\n\t"
530 "pop %%rax \n\t"
532 /* Save guest registers, load host registers */
533 "mov %%rbx, %c[rbx](%[svm]) \n\t"
534 "mov %%rcx, %c[rcx](%[svm]) \n\t"
535 "mov %%rdx, %c[rdx](%[svm]) \n\t"
536 "mov %%rsi, %c[rsi](%[svm]) \n\t"
537 "mov %%rdi, %c[rdi](%[svm]) \n\t"
538 "mov %%rbp, %c[rbp](%[svm]) \n\t"
539 "mov %%r8, %c[r8](%[svm]) \n\t"
540 "mov %%r9, %c[r9](%[svm]) \n\t"
541 "mov %%r10, %c[r10](%[svm]) \n\t"
542 "mov %%r11, %c[r11](%[svm]) \n\t"
543 "mov %%r12, %c[r12](%[svm]) \n\t"
544 "mov %%r13, %c[r13](%[svm]) \n\t"
545 "mov %%r14, %c[r14](%[svm]) \n\t"
546 "mov %%r15, %c[r15](%[svm]) \n\t"
547 "pop %%rbp"
549 : [svm]"a"(vcpu),
550 [vmcb]"i"(offsetof(struct vcpu, vmcb_pa)),
551 [rbx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBX])),
552 [rcx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RCX])),
553 [rdx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDX])),
554 [rsi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RSI])),
555 [rdi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDI])),
556 [rbp]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBP])),
557 [r8 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R8 ])),
558 [r9 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R9 ])),
559 [r10]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R10])),
560 [r11]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R11])),
561 [r12]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R12])),
562 [r13]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R13])),
563 [r14]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R14])),
564 [r15]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R15]))
565 : "cc", "memory",
566 "rbx", "rcx", "rdx", "rsi", "rdi",
567 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
571 /* meh: dr7? db_regs? */
573 vcpu->cr2 = vmcb->save.cr2;
575 vcpu->regs[VCPU_REGS_RAX] = vmcb->save.rax;
576 vcpu->regs[VCPU_REGS_RSP] = vmcb->save.rsp;
577 vcpu->regs[VCPU_REGS_RIP] = vmcb->save.rip;
579 load_dr6(host_dr6);
580 load_dr7(host_dr7);
582 load_cr2(host_cr2);
584 load_fs(fs_selector);
585 load_gs(gs_selector);
586 lldt(ldt_selector);
588 wrmsr(MSR_GSBASE, vcpu->host_gs_base);
590 tss_desc->sd_type = SDT_SYSTSS;
591 ltr(sel);
593 wrmsr(MSR_STAR, star);
594 wrmsr(MSR_LSTAR, lstar);
595 wrmsr(MSR_CSTAR, cstar);
596 wrmsr(MSR_SF_MASK, sfmask);
598 // disable_intr();
600 __asm __volatile (SVM_STGI);
602 printf("STGI\n");
604 printf("exit_code: %" PRIx64 "\n", vmcb->control.exit_code);
606 // print_tss_desc(tss_desc);
607 // print_tss(tss_desc);
609 print_vmcb_save_area(vmcb);
611 // enable_intr();
613 /* meh: next_rip */
616 static void
617 _fkvm_init_seg(struct vmcb_seg *seg, uint16_t attrib)
619 seg->selector = 0;
620 seg->attrib = VMCB_SELECTOR_P_MASK | attrib;
621 seg->limit = 0xffff;
622 seg->base = 0;
625 static inline void
626 fkvm_init_seg(struct vmcb_seg *seg)
628 _fkvm_init_seg(seg, VMCB_SELECTOR_S_MASK | VMCB_SELECTOR_WRITE_MASK);
631 static inline void
632 fkvm_init_sys_seg(struct vmcb_seg *seg, uint16_t attrib)
634 _fkvm_init_seg(seg, attrib);
637 static void*
638 fkvm_iopm_alloc(void)
640 return contigmalloc(IOPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
643 static void
644 fkvm_iopm_init(void *iopm)
646 memset(iopm, 0xff, IOPM_SIZE); /* TODO: we may want to allow access to PC debug port */
649 static void
650 fkvm_iopm_free(void *iopm)
652 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
655 static void*
656 fkvm_msrpm_alloc(void)
658 return contigmalloc(MSRPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
661 static void
662 fkvm_msrpm_init(void *msrpm)
664 memset(msrpm, 0xff, MSRPM_SIZE); /* TODO: we may want to allow some MSR accesses */
667 static void
668 fkvm_msrpm_free(void *msrpm)
670 contigfree(msrpm, MSRPM_SIZE, M_DEVBUF);
673 static void*
674 fkvm_hsave_area_alloc(void)
676 return contigmalloc(PAGE_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
679 static void
680 fkvm_hsave_area_init(void *hsave_area)
684 static void
685 fkvm_hsave_area_free(void *hsave_area)
687 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
690 static struct vmspace*
691 fkvm_make_vmspace(void)
693 struct vmspace *sp;
695 sp = vmspace_alloc(0, 0xffffffffffffffff);
696 if (sp == NULL) {
697 printf("vmspace_alloc failed\n");
698 return NULL;
701 return sp;
704 static void
705 fkvm_destroy_vmspace(struct vmspace* sp)
707 vmspace_free(sp);
710 static struct vmcb*
711 fkvm_vmcb_alloc(void)
713 return contigmalloc(PAGE_SIZE, M_DEVBUF, M_ZERO, 0, -1UL,
714 PAGE_SIZE, 0);
717 static void
718 fkvm_vmcb_init(struct vmcb *vmcb)
720 struct vmcb_control_area *control = &vmcb->control;
721 struct vmcb_save_area *save = &vmcb->save;
723 control->intercept_cr_reads = INTERCEPT_CR4_MASK;
725 control->intercept_cr_writes = INTERCEPT_CR4_MASK |
726 INTERCEPT_CR8_MASK;
728 control->intercept_dr_reads = INTERCEPT_DR0_MASK |
729 INTERCEPT_DR1_MASK |
730 INTERCEPT_DR2_MASK |
731 INTERCEPT_DR3_MASK;
733 control->intercept_dr_writes = INTERCEPT_DR0_MASK |
734 INTERCEPT_DR1_MASK |
735 INTERCEPT_DR2_MASK |
736 INTERCEPT_DR3_MASK |
737 INTERCEPT_DR5_MASK |
738 INTERCEPT_DR7_MASK;
740 control->intercept_exceptions = (1 << IDT_UD) | // Invalid Opcode
741 (1 << IDT_MC); // Machine Check
743 control->intercepts = INTERCEPT_INTR |
744 INTERCEPT_NMI |
745 INTERCEPT_SMI |
746 INTERCEPT_CPUID |
747 INTERCEPT_INVD |
748 INTERCEPT_HLT |
749 INTERCEPT_INVLPGA |
750 INTERCEPT_IOIO_PROT |
751 INTERCEPT_MSR_PROT |
752 INTERCEPT_SHUTDOWN |
753 INTERCEPT_VMRUN |
754 INTERCEPT_VMMCALL |
755 INTERCEPT_VMLOAD |
756 INTERCEPT_VMSAVE |
757 INTERCEPT_STGI |
758 INTERCEPT_CLGI |
759 INTERCEPT_SKINIT |
760 INTERCEPT_WBINVD |
761 INTERCEPT_MONITOR |
762 INTERCEPT_MWAIT_UNCOND;
764 control->iopm_base_pa = vtophys(iopm);
765 control->msrpm_base_pa = vtophys(msrpm);
766 control->tsc_offset = 0;
768 /* TODO: remove this once we assign asid's to distinct VM's */
769 control->guest_asid = 1;
770 control->tlb_control = VMCB_TLB_CONTROL_FLUSH_ALL;
772 /* let v_tpr default to 0 */
773 /* let v_irq default to 0 */
774 /* let v_intr default to 0 */
776 control->v_intr_masking = 1;
778 /* let v_intr_vector default to 0 */
779 /* let intr_shadow default to 0 */
780 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
781 exit_int_info_err_code default to 0 */
783 control->nested_ctl = 1;
785 /* let event_inj default to 0 */
787 // (nested_cr3 is later)
789 /* let lbr_virt_enable default to 0 */
792 fkvm_init_seg(&save->ds);
793 fkvm_init_seg(&save->es);
794 fkvm_init_seg(&save->fs);
795 fkvm_init_seg(&save->gs);
796 fkvm_init_seg(&save->ss);
798 _fkvm_init_seg(&save->cs, VMCB_SELECTOR_READ_MASK | VMCB_SELECTOR_S_MASK |
799 VMCB_SELECTOR_CODE_MASK);
800 save->cs.selector = 0xf000;
801 save->cs.base = 0xffff0000;
803 save->gdtr.limit = 0xffff;
804 save->idtr.limit = 0xffff;
806 fkvm_init_sys_seg(&save->ldtr, SDT_SYSLDT);
807 fkvm_init_sys_seg(&save->tr, SDT_SYS286BSY);
809 save->g_pat = PAT_VALUE(PAT_WRITE_BACK, 0) | PAT_VALUE(PAT_WRITE_THROUGH, 1) |
810 PAT_VALUE(PAT_UNCACHED, 2) | PAT_VALUE(PAT_UNCACHEABLE, 3) |
811 PAT_VALUE(PAT_WRITE_BACK, 4) | PAT_VALUE(PAT_WRITE_THROUGH, 5) |
812 PAT_VALUE(PAT_UNCACHED, 6) | PAT_VALUE(PAT_UNCACHEABLE, 7);
814 /* CR0 = 6000_0010h at boot */
815 save->cr0 = CR0_ET | CR0_NW | CR0_CD;
816 save->dr6 = 0xffff0ff0;
817 save->dr7 = 0x400;
818 save->rflags = 2;
819 save->rip = 0x0000fff0;
821 save->efer = EFER_SVME;
824 static void
825 fkvm_vmcb_free(struct vmcb *vmcb)
827 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
830 static struct vcpu*
831 fkvm_vcpu_create(struct guestvm *guest_vm)
833 struct vcpu *vcpu;
834 vcpu = malloc(sizeof(struct vcpu), M_DEVBUF, M_WAITOK|M_ZERO);
836 vcpu->vmcb = fkvm_vmcb_alloc();
837 vcpu->vmcb_pa = vtophys(vcpu->vmcb);
838 printf("vmcb = 0x%p\n", vcpu->vmcb);
839 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu->vmcb_pa);
841 fkvm_vmcb_init(vcpu->vmcb);
842 vcpu->vmcb->control.nested_cr3 = guest_vm->nested_cr3;
843 vcpu->regs[VCPU_REGS_RIP] = vcpu->vmcb->save.rip;
845 vcpu->guest_vm = guest_vm;
847 return vcpu;
850 static void
851 fkvm_vcpu_destroy(struct vcpu *vcpu)
853 fkvm_vmcb_free(vcpu->vmcb);
854 free(vcpu, M_DEVBUF);
857 static struct guestvm*
858 fkvm_guestvm_alloc(void)
860 return malloc(sizeof(struct guestvm), M_DEVBUF, M_WAITOK|M_ZERO);
863 static void
864 fkvm_guestvm_free(struct guestvm* guest_vm)
866 free(guest_vm, M_DEVBUF);
869 static void
870 fkvm_guestvm_add_vcpu(struct guestvm *guest_vm, struct vcpu *vcpu)
872 guest_vm->vcpus[guest_vm->nr_vcpus] = vcpu;
873 guest_vm->nr_vcpus++; /* TODO: Probably not safe to increment */
874 /* How about a lock to protect all of this? */
879 fkvm_userpoke(struct thread *td, struct fkvm_userpoke_args *uap)
881 printf("fkvm_userpoke\n");
882 return ENOSYS;
885 static int
886 fkvm_mem_has_entry(vm_map_entry_t expected_entry, vm_map_t vm_map, vm_offset_t vaddr)
888 vm_map_entry_t lookup_entry;
889 vm_object_t throwaway_object;
890 vm_pindex_t throwaway_pindex;
891 vm_prot_t throwaway_prot;
892 boolean_t throwaway_wired;
893 int error;
895 error = vm_map_lookup(&vm_map, /* IN/OUT */
896 vaddr,
897 VM_PROT_READ|VM_PROT_WRITE,
898 &lookup_entry, /* OUT */
899 &throwaway_object, /* OUT */
900 &throwaway_pindex, /* OUT */
901 &throwaway_prot, /* OUT */
902 &throwaway_wired); /* OUT */
903 if (error != KERN_SUCCESS)
904 return 0;
905 vm_map_lookup_done(vm_map, lookup_entry);
906 return (lookup_entry == expected_entry);
909 static int
910 fkvm_guest_check_range(struct guestvm *guest_vm, uint64_t start, uint64_t end)
912 vm_map_t guest_vm_map;
913 vm_map_entry_t lookup_entry;
914 vm_object_t throwaway_object;
915 vm_pindex_t throwaway_pindex;
916 vm_prot_t throwaway_prot;
917 boolean_t throwaway_wired;
918 int ret;
919 int error;
921 guest_vm_map = &guest_vm->sp->vm_map;
923 error = vm_map_lookup(&guest_vm_map, /* IN/OUT */
924 start,
925 VM_PROT_READ|VM_PROT_WRITE,
926 &lookup_entry, /* OUT */
927 &throwaway_object, /* OUT */
928 &throwaway_pindex, /* OUT */
929 &throwaway_prot, /* OUT */
930 &throwaway_wired); /* OUT */
931 if (error != KERN_SUCCESS)
932 return EFAULT;
933 vm_map_lookup_done(guest_vm_map, lookup_entry);
936 TODO: We can't actually nest the lookups:
937 panic: _sx_xlock_hard: recursed on non-recursive sx user map @ ../../../vm/vm_map.c:3115
938 Therefore, I've moved the lookup_done above for now, but we really need a lock here.
940 Maybe it's better to use vm_map_lookup_entry directly.
944 if (fkvm_mem_has_entry(lookup_entry, guest_vm_map, end))
945 ret = 0;
946 else
947 ret = EFAULT;
949 return ret;
952 enum {
953 FKVM_REGS_TYPE_REGS=1,
954 FKVM_REGS_TYPE_SREGS=2,
957 struct fkvm_regs_regs {
958 uint64_t rax;
959 uint64_t rbx;
960 uint64_t rcx;
961 uint64_t rdx;
962 uint64_t rsi;
963 uint64_t rdi;
964 uint64_t rsp;
965 uint64_t rbp;
966 uint64_t r8;
967 uint64_t r9;
968 uint64_t r10;
969 uint64_t r11;
970 uint64_t r12;
971 uint64_t r13;
972 uint64_t r14;
973 uint64_t r15;
974 uint64_t rip;
975 uint64_t rflags;
978 struct fkvm_seg {
979 uint64_t base;
980 uint32_t limit;
981 uint16_t selectors;
982 uint8_t type;
983 uint8_t present;
984 uint8_t dpl;
985 uint8_t db;
986 uint8_t s;
987 uint8_t l;
988 uint8_t g;
989 uint8_t avl;
990 uint8_t unusable;
991 uint8_t padding;
994 struct fkvm_dtable {
995 uint64_t base;
996 uint16_t limit;
997 uint16_t padding[3];
1000 struct fkvm_regs_sregs {
1001 struct fkvm_seg cs;
1002 struct fkvm_seg ds;
1003 struct fkvm_seg es;
1004 struct fkvm_seg fs;
1005 struct fkvm_seg gs;
1006 struct fkvm_seg ss;
1007 struct fkvm_seg tr;
1008 struct fkvm_seg ldt;
1009 struct fkvm_dtable gdt;
1010 struct fkvm_dtable idt;
1011 uint64_t cr0;
1012 uint64_t cr2;
1013 uint64_t cr3;
1014 uint64_t cr4;
1015 uint64_t cr8;
1016 uint64_t efer;
1017 uint64_t apic_base;
1018 #define KVM_NR_INTERRUPTS 255
1019 uint64_t interrupt_bitmap[(KVM_NR_INTERRUPTS + 63) / 64];
1022 static void
1023 fkvm_get_regs_regs(struct vcpu *vcpu, struct fkvm_regs_regs *out)
1025 out->rax = vcpu->regs[VCPU_REGS_RAX];
1026 out->rbx = vcpu->regs[VCPU_REGS_RBX];
1027 out->rcx = vcpu->regs[VCPU_REGS_RCX];
1028 out->rdx = vcpu->regs[VCPU_REGS_RDX];
1029 out->rsi = vcpu->regs[VCPU_REGS_RSI];
1030 out->rdi = vcpu->regs[VCPU_REGS_RDI];
1031 out->rsp = vcpu->regs[VCPU_REGS_RSP];
1032 out->rbp = vcpu->regs[VCPU_REGS_RBP];
1033 out->r8 = vcpu->regs[VCPU_REGS_R8];
1034 out->r9 = vcpu->regs[VCPU_REGS_R9];
1035 out->r10 = vcpu->regs[VCPU_REGS_R10];
1036 out->r11 = vcpu->regs[VCPU_REGS_R11];
1037 out->r12 = vcpu->regs[VCPU_REGS_R12];
1038 out->r13 = vcpu->regs[VCPU_REGS_R13];
1039 out->r14 = vcpu->regs[VCPU_REGS_R14];
1040 out->r15 = vcpu->regs[VCPU_REGS_R15];
1041 out->rip = vcpu->regs[VCPU_REGS_RIP];
1042 out->rflags = vcpu->vmcb->save.rflags;
1045 static void
1046 fkvm_set_regs_regs(struct vcpu *vcpu, const struct fkvm_regs_regs *in)
1048 vcpu->regs[VCPU_REGS_RAX] = in->rax;
1049 vcpu->regs[VCPU_REGS_RBX] = in->rbx;
1050 vcpu->regs[VCPU_REGS_RCX] = in->rcx;
1051 vcpu->regs[VCPU_REGS_RDX] = in->rdx;
1052 vcpu->regs[VCPU_REGS_RSI] = in->rsi;
1053 vcpu->regs[VCPU_REGS_RDI] = in->rdi;
1054 vcpu->regs[VCPU_REGS_RSP] = in->rsp;
1055 vcpu->regs[VCPU_REGS_RBP] = in->rbp;
1056 vcpu->regs[VCPU_REGS_R8] = in->r8;
1057 vcpu->regs[VCPU_REGS_R9] = in->r9;
1058 vcpu->regs[VCPU_REGS_R10] = in->r10;
1059 vcpu->regs[VCPU_REGS_R11] = in->r11;
1060 vcpu->regs[VCPU_REGS_R12] = in->r12;
1061 vcpu->regs[VCPU_REGS_R13] = in->r13;
1062 vcpu->regs[VCPU_REGS_R14] = in->r14;
1063 vcpu->regs[VCPU_REGS_R15] = in->r15;
1064 vcpu->regs[VCPU_REGS_RIP] = in->rip;
1065 vcpu->vmcb->save.rflags = in->rflags;
1068 static void
1069 fkvm_get_regs_sregs(struct vcpu *vcpu, struct fkvm_regs_sregs *out)
1071 /* XXX */
1074 static void
1075 fkvm_set_regs_sregs(struct vcpu *vcpu, const struct fkvm_regs_sregs *in)
1077 /* XXX */
1080 /* System Calls */
1083 fkvm_get_regs(struct thread *td, struct fkvm_get_regs_args *uap)
1085 struct vcpu *vcpu = TD_GET_VCPU(td);
1087 switch (uap->type) {
1089 case FKVM_REGS_TYPE_REGS: {
1090 struct fkvm_regs_regs out;
1091 fkvm_get_regs_regs(vcpu, &out);
1092 return copyout(&out, uap->regs, sizeof(out));
1095 case FKVM_REGS_TYPE_SREGS: {
1096 struct fkvm_regs_sregs out;
1097 fkvm_get_regs_sregs(vcpu, &out);
1098 return copyout(&out, uap->regs, sizeof(out));
1102 default:
1103 return EINVAL;
1108 fkvm_set_regs(struct thread *td, struct fkvm_set_regs_args *uap)
1110 struct vcpu *vcpu = TD_GET_VCPU(td);
1111 int error = 0;
1113 switch (uap->type) {
1115 case FKVM_REGS_TYPE_REGS: {
1116 struct fkvm_regs_regs in;
1117 error = copyin(uap->regs, &in, sizeof(in));
1118 if (error != 0)
1119 return error;
1120 fkvm_set_regs_regs(vcpu, &in);
1121 return 0;
1124 case FKVM_REGS_TYPE_SREGS: {
1125 struct fkvm_regs_sregs in;
1126 error = copyin(uap->regs, &in, sizeof(in));
1127 if (error != 0)
1128 return error;
1129 fkvm_set_regs_sregs(vcpu, &in);
1130 return 0;
1133 default:
1134 return EINVAL;
1138 /* This function can only be called with multiples of page sizes */
1139 /* vaddr as NULL overloads to fkvm_guest_check_range */
1141 fkvm_set_user_mem_region(struct thread *td, struct fkvm_set_user_mem_region_args *uap)
1143 struct guestvm *guest_vm = PROC_GET_GUESTVM(td->td_proc);
1145 vm_offset_t start;
1146 vm_offset_t end;
1148 struct vmspace *user_vm_space;
1149 vm_map_t user_vm_map;
1151 vm_object_t vm_object;
1152 vm_pindex_t vm_object_pindex;
1153 vm_ooffset_t vm_object_offset;
1154 vm_prot_t throwaway_prot;
1155 boolean_t throwaway_wired;
1156 vm_map_entry_t lookup_entry;
1158 int error;
1160 start = uap->guest_pa;
1161 end = uap->guest_pa + uap->size - 1;
1162 printf("start: 0x%" PRIx64 " bytes\n", start);
1163 printf("end: 0x%" PRIx64 " bytes\n", end);
1165 if (uap->vaddr == 0)
1166 return fkvm_guest_check_range(guest_vm, start, end);
1168 user_vm_space = td->td_proc->p_vmspace;
1169 user_vm_map = &user_vm_space->vm_map;
1170 printf("user vm space: %p\n", user_vm_space);
1171 printf("user vm map: %p\n", user_vm_map);
1173 error = vm_map_lookup(&user_vm_map, /* IN/OUT */
1174 uap->vaddr,
1175 VM_PROT_READ|VM_PROT_WRITE,
1176 &lookup_entry, /* OUT */
1177 &vm_object, /* OUT */
1178 &vm_object_pindex, /* OUT */
1179 &throwaway_prot, /* OUT */
1180 &throwaway_wired); /* OUT */
1181 if (error != KERN_SUCCESS) {
1182 printf("vm_map_lookup failed: %d\n", error);
1183 return EFAULT;
1186 /* TODO: Trust the user that the full region is valid.
1187 * This is very bad. See the note in fkvm_guest_check_range
1188 * on nesting vm lookups. */
1189 #if 0
1190 if (!fkvm_mem_has_entry(lookup_entry, user_vm_map, uap->vaddr + uap->size)) {
1191 printf("end of range not contained in same vm map entry as start\n");
1192 return EFAULT;
1194 #endif
1196 printf("vm object: %p\n", vm_object);
1197 printf(" size: %d pages\n", (int) vm_object->size);
1199 vm_object_offset = IDX_TO_OFF(vm_object_pindex);
1200 printf("vm_ooffset: 0x%" PRIx64 "\n", vm_object_offset);
1202 vm_object_reference(vm_object); // TODO: this might be a mem leak
1204 vm_map_lookup_done(user_vm_map, lookup_entry);
1206 error = vm_map_insert(&guest_vm->sp->vm_map,
1207 vm_object,
1208 vm_object_offset,
1209 start,
1210 end,
1211 VM_PROT_ALL, VM_PROT_ALL,
1213 if (error != KERN_SUCCESS) {
1214 printf("vm_map_insert failed: %d\n", error);
1215 switch (error) {
1216 case KERN_INVALID_ADDRESS:
1217 return EINVAL;
1218 case KERN_NO_SPACE:
1219 return ENOMEM;
1220 default:
1221 return 1;
1225 return 0;
1229 fkvm_unset_user_mem_region(struct thread *td, struct fkvm_unset_user_mem_region_args *uap)
1231 struct guestvm *guest_vm = PROC_GET_GUESTVM(td->td_proc);
1233 vm_offset_t start;
1234 vm_offset_t end;
1236 vm_map_t guest_vm_map;
1238 int error;
1240 start = uap->guest_pa;
1241 end = uap->guest_pa + uap->size - 1;
1242 printf("start: 0x%" PRIx64 " bytes\n", start);
1243 printf("end: 0x%" PRIx64 " bytes\n", end);
1245 guest_vm_map = &guest_vm->sp->vm_map;
1247 error = vm_map_remove(guest_vm_map, start, end);
1248 if (error != KERN_SUCCESS)
1249 return -1;
1251 return 0;
1255 fkvm_create_vm(struct thread *td, struct fkvm_create_vm_args *uap)
1257 struct guestvm *guest_vm;
1259 printf("SYSCALL : fkvm_create_vm\n");
1261 /* Allocate Guest VM */
1262 guest_vm = fkvm_guestvm_alloc();
1264 /* Set up the vm address space */
1265 guest_vm->sp = fkvm_make_vmspace();
1266 if (guest_vm->sp == NULL) {
1267 fkvm_guestvm_free(guest_vm);
1268 return ENOMEM;
1270 guest_vm->nested_cr3 = vtophys(vmspace_pmap(guest_vm->sp)->pm_pml4);
1272 printf("guest:\n");
1273 printf(" vm space: %p\n", guest_vm->sp);
1274 printf(" vm map: %p\n", &guest_vm->sp->vm_map);
1275 printf(" ncr3: 0x%" PRIx64 "\n", guest_vm->nested_cr3);
1277 PROC_SET_GUESTVM(td->td_proc, guest_vm);
1279 printf("fkvm_create_vm done\n");
1280 return 0;
1284 fkvm_destroy_vm(struct thread *td, struct fkvm_destroy_vm_args *uap)
1286 struct guestvm *guest_vm = PROC_GET_GUESTVM(td->td_proc);
1288 /* Destroy the VCPUs */
1289 while (guest_vm->nr_vcpus > 0) {
1290 guest_vm->nr_vcpus--;
1291 fkvm_vcpu_destroy(guest_vm->vcpus[guest_vm->nr_vcpus]);
1292 guest_vm->vcpus[guest_vm->nr_vcpus] = NULL;
1295 /* Destroy the vmspace */
1296 fkvm_destroy_vmspace(guest_vm->sp);
1298 /* Destroy the Guest VM itself */
1299 fkvm_guestvm_free(guest_vm);
1301 return 0;
1305 fkvm_vm_run(struct thread *td, struct fkvm_vm_run_args *uap)
1307 struct vcpu *vcpu = TD_GET_VCPU(td);
1308 struct guestvm *guest_vm = vcpu->guest_vm;
1309 struct vmcb *vmcb = vcpu->vmcb;
1310 int ret = 0;
1312 fkvm_vcpu_run(vcpu);
1314 switch (vmcb->control.exit_code) {
1316 case VMCB_EXIT_EXCP_BASE ... (VMCB_EXIT_EXCP_BASE + 31): {
1317 int excp_vector;
1319 excp_vector = vmcb->control.exit_code - VMCB_EXIT_EXCP_BASE;
1321 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
1322 excp_vector);
1323 ret = ENOSYS;
1324 break;
1327 case VMCB_EXIT_INTR: {
1328 printf("VMCB_EXIT_INTR - nothing to do\n");
1329 break;
1332 case VMCB_EXIT_NPF: {
1333 /* EXITINFO1 contains fault error code */
1334 /* EXITINFO2 contains the guest physical address causing the fault. */
1336 u_int64_t fault_code;
1337 u_int64_t fault_gpa;
1339 vm_prot_t fault_type;
1340 int fault_flags;
1341 int rc;
1343 fault_code = vmcb->control.exit_info_1;
1344 fault_gpa = vmcb->control.exit_info_2;
1346 printf("VMCB_EXIT_NPF:\n");
1347 printf("gpa=0x%" PRIx64 "\n", fault_gpa);
1348 printf("fault code=0x%" PRIx64 " [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
1349 fault_code,
1350 (fault_code & PGEX_P) != 0,
1351 (fault_code & PGEX_W) != 0,
1352 (fault_code & PGEX_U) != 0,
1353 (fault_code & PGEX_I) != 0);
1355 if (fault_code & PGEX_W)
1356 fault_type = VM_PROT_WRITE;
1357 else if (fault_code & PGEX_I)
1358 fault_type = VM_PROT_EXECUTE;
1359 else
1360 fault_type = VM_PROT_READ;
1362 fault_flags = 0; /* TODO: is that right? */
1363 rc = vm_fault(&guest_vm->sp->vm_map, (fault_gpa & (~PAGE_MASK)), fault_type, fault_flags);
1364 if (rc != KERN_SUCCESS) {
1365 printf("vm_fault failed: %d\n", rc);
1367 break;
1369 default:
1370 printf("Unhandled vmexit:\n"
1371 " code: 0x%" PRIx64 "\n"
1372 " info1: 0x%" PRIx64 "\n"
1373 " info2: 0x%" PRIx64 "\n",
1374 vmcb->control.exit_code,
1375 vmcb->control.exit_info_1,
1376 vmcb->control.exit_info_2);
1377 print_vmcb(vmcb);
1378 ret = ENOSYS;
1381 printf("\n\n");
1383 return ret;
1387 fkvm_create_vcpu(struct thread *td, struct fkvm_create_vcpu_args *uap)
1389 struct guestvm *guest_vm = PROC_GET_GUESTVM(td->td_proc);
1390 struct vcpu *vcpu;
1392 /* Allocate VCPU */
1393 printf("fkvm_create_vcpu: td = %p\n", td);
1394 vcpu = fkvm_vcpu_create(guest_vm);
1395 fkvm_guestvm_add_vcpu(guest_vm, vcpu);
1397 TD_SET_VCPU(td, vcpu);
1398 printf("fkvm_create_vcpu: vcpu = %p\n", vcpu);
1399 return 0;
1402 static void
1403 fkvm_load(void *unused)
1405 u_int64_t efer;
1407 printf("fkvm_load\n");
1408 printf("sizeof(struct vmcb) = %" PRIx64 "\n", sizeof(struct vmcb));
1410 hsave_area = NULL;
1411 iopm = NULL;
1412 msrpm = NULL;
1414 /* TODO: check for the presense of extensions */
1416 /* allocate structures */
1417 hsave_area = fkvm_hsave_area_alloc();
1418 iopm = fkvm_iopm_alloc();
1419 msrpm = fkvm_msrpm_alloc();
1421 /* Initialize structures */
1422 fkvm_hsave_area_init(hsave_area);
1423 fkvm_iopm_init(iopm);
1424 fkvm_msrpm_init(msrpm);
1426 /* Enable SVM in EFER */
1427 efer = rdmsr(MSR_EFER);
1428 printf("EFER = %" PRIx64 "\n", efer);
1429 wrmsr(MSR_EFER, efer | EFER_SVME);
1430 efer = rdmsr(MSR_EFER);
1431 printf("new EFER = %" PRIx64 "\n", efer);
1433 /* Write Host save address in MSR_VM_HSAVE_PA */
1434 wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave_area));
1436 SYSINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_load, NULL);
1438 static void
1439 fkvm_unload(void *unused)
1441 printf("fkvm_unload\n");
1443 if (msrpm != NULL) {
1444 fkvm_msrpm_free(iopm);
1445 msrpm = NULL;
1447 if (iopm != NULL) {
1448 fkvm_iopm_free(iopm);
1449 iopm = NULL;
1451 if (hsave_area != NULL) {
1452 fkvm_hsave_area_free(hsave_area);
1453 hsave_area = NULL;
1456 SYSUNINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_unload, NULL);