handle VMCB_EXIT_NPF
[freebsd-src/fkvm-freebsd.git] / sys / kern / kern_fkvm.c
blob2ddc1b596919e674cb99703df214cc2d2887438d
1 /*-
2 * Copyright (c) 2008 The FreeBSD Project
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
32 #include <vm/vm.h>
33 #include <vm/pmap.h>
34 #include <vm/vm_extern.h>
35 #include <vm/vm_map.h>
36 #include <vm/vm_object.h>
37 #include <vm/vm_param.h>
38 #include <machine/_inttypes.h>
39 #include <machine/specialreg.h>
40 #include <machine/segments.h>
41 #include <machine/vmcb.h>
43 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
44 #define MSRPM_SIZE (8*1024)
46 /* fkvm data */
47 static void *iopm = NULL; /* Should I allocate a vm_object_t instead? */
48 static void * msrpm = NULL; /* Should I allocate a vm_object_t instead? */
50 /* per-guest data */
52 /* VCPU data */
53 static void *hsave_area = NULL;
54 static struct vmcb *vmcb = NULL;
56 enum {
57 VCPU_REGS_RAX = 0,
58 VCPU_REGS_RCX = 1,
59 VCPU_REGS_RDX = 2,
60 VCPU_REGS_RBX = 3,
61 VCPU_REGS_RSP = 4,
62 VCPU_REGS_RBP = 5,
63 VCPU_REGS_RSI = 6,
64 VCPU_REGS_RDI = 7,
65 VCPU_REGS_R8 = 8,
66 VCPU_REGS_R9 = 9,
67 VCPU_REGS_R10 = 10,
68 VCPU_REGS_R11 = 11,
69 VCPU_REGS_R12 = 12,
70 VCPU_REGS_R13 = 13,
71 VCPU_REGS_R14 = 14,
72 VCPU_REGS_R15 = 15,
73 VCPU_REGS_RIP,
74 NR_VCPU_REGS
77 static struct vcpu {
78 unsigned long vmcb_pa;
79 unsigned long regs[NR_VCPU_REGS];
80 u_int64_t host_gs_base;
81 u_int64_t cr2;
82 u_int64_t cr3;
83 } vcpu;
85 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
86 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
87 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
88 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
89 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
90 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
92 static void
93 print_tss_desc(struct system_segment_descriptor *tss_desc)
95 printf("TSS desc @ %p:\n", tss_desc);
96 printf("sd_lolimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lolimit);
97 printf("sd_lobase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lobase);
98 printf("sd_type: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_type);
99 printf("sd_dpl: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_dpl);
100 printf("sd_p: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_p);
101 printf("sd_hilimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hilimit);
102 printf("sd_xx0: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx0);
103 printf("sd_gran: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_gran);
104 printf("sd_hibase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hibase);
105 printf("sd_xx1: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx1);
106 printf("sd_mbz: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_mbz);
107 printf("sd_xx2: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx2);
108 printf("\n\n");
111 static void
112 print_tss(struct system_segment_descriptor *tss_desc)
114 u_int32_t *base;
115 int limit;
116 int i;
118 base = (u_int32_t*) ((((u_int64_t) tss_desc->sd_hibase) << 24) | ((u_int64_t) tss_desc->sd_lobase));
119 limit = ((tss_desc->sd_hilimit << 16) | tss_desc->sd_lolimit) / 4;
121 printf("TSS: @ %p\n", base);
122 for (i = 0; i <= limit; i++)
123 printf("%x: 0x%" PRIx32 "\n", i, base[i]);
124 printf("\n\n");
127 static int
128 vmrun_assert(struct vmcb *vmcb)
130 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
132 int bad;
134 bad = 0;
136 // The following are illegal:
138 //EFER.SVME is zero.
139 A((vmcb->save.efer & 0x0000000000001000) == 0);
141 // CR0.CD is zero and CR0.NW is set
142 A( ((vmcb->save.cr0 & 0x0000000040000000) == 0) &&
143 ((vmcb->save.cr0 & 0x0000000020000000) != 0));
145 // CR0[63:32] are not zero.
146 A((vmcb->save.cr0 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
148 // Any MBZ bit of CR3 is set.
149 A((vmcb->save.cr3 & 0xFFF0000000000000) != 0);
151 // CR4[63:11] are not zero.
152 A((vmcb->save.cr4 & 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
154 // DR6[63:32] are not zero.
155 A((vmcb->save.dr6 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
157 // DR7[63:32] are not zero.
158 A((vmcb->save.dr7 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
160 // EFER[63:15] are not zero.
161 A((vmcb->save.efer & 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
163 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
164 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
166 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
167 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
168 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
169 ((vmcb->save.cr4 & 0x0000000000000020) != 0));
171 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
172 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
173 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
174 ((vmcb->save.cr0 & 0x0000000000000001) == 0));
176 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
177 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
178 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
179 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
180 ((vmcb->save.cr4 & 0x0000000000000020) != 0) &&
181 ((vmcb->save.cs.attrib & 0x0200) != 0) &&
182 ((vmcb->save.cs.attrib & 0x0400) != 0));
184 // The VMRUN intercept bit is clear.
185 A((vmcb->control.intercepts & 0x0000000100000000) == 0);
187 // The MSR or IOIO intercept tables extend to a physical address that is
188 // greater than or equal to the maximum supported physical address.
190 // Illegal event injection (see Section 15.19 on page 391).
192 // ASID is equal to zero.
193 A(vmcb->control.guest_asid == 0);
195 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
196 // combination that is otherwise illegal (see Section 15.18).
198 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
199 // sign-extend to 63 bits) all base addresses in the segment registers
200 // that have been loaded.
202 return bad;
204 #undef A
207 static void
208 fkvm_vcpu_run(struct vcpu *vcpu, struct vmcb *vmcb)
210 u_int64_t lstar;
211 u_int64_t cstar;
212 u_int64_t star;
213 u_int64_t sfmask;
215 u_short fs_selector;
216 u_short gs_selector;
217 u_short ldt_selector;
219 unsigned long host_cr2;
220 unsigned long host_dr6;
221 unsigned long host_dr7;
223 struct system_segment_descriptor *tss_desc;
224 u_int64_t sel;
226 printf("begin fkvm_vcpu_run\n");
228 if (vmrun_assert(vmcb))
229 return;
232 tss_desc = (struct system_segment_descriptor*) (&gdt[GPROC0_SEL]);
233 sel = GSEL(GPROC0_SEL, SEL_KPL);
235 printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
236 print_tss_desc(tss_desc);
237 print_tss(tss_desc);
239 printf("VMCB save area:\n");
240 printf("fs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
241 vmcb->save.fs.selector,
242 vmcb->save.fs.attrib,
243 vmcb->save.fs.limit,
244 vmcb->save.fs.base);
245 printf("gs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
246 vmcb->save.gs.selector,
247 vmcb->save.gs.attrib,
248 vmcb->save.gs.limit,
249 vmcb->save.gs.base);
250 printf("tr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
251 vmcb->save.tr.selector,
252 vmcb->save.tr.attrib,
253 vmcb->save.tr.limit,
254 vmcb->save.tr.base);
255 printf("ldtr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
256 vmcb->save.ldtr.selector,
257 vmcb->save.ldtr.attrib,
258 vmcb->save.ldtr.limit,
259 vmcb->save.ldtr.base);
260 printf("kernel_gs_base: %" PRIx64 "\n", vmcb->save.kernel_gs_base);
261 printf("star: %" PRIx64 "\n", vmcb->save.star);
262 printf("lstar: %" PRIx64 "\n", vmcb->save.lstar);
263 printf("cstar: %" PRIx64 "\n", vmcb->save.cstar);
264 printf("sfmask: %" PRIx64 "\n", vmcb->save.sfmask);
265 printf("sysenter_cs: %" PRIx64 "\n", vmcb->save.sysenter_cs);
266 printf("sysenter_esp: %" PRIx64 "\n", vmcb->save.sysenter_esp);
267 printf("sysenter_eip: %" PRIx64 "\n", vmcb->save.sysenter_eip);
268 printf("\n\n");
270 // disable_intr();
272 vcpu->vmcb_pa = vtophys(vmcb);
273 printf("vmcb = 0x%p\n", vmcb);
274 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu->vmcb_pa);
276 vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
277 vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
278 vmcb->save.rip = vcpu->regs[VCPU_REGS_RIP];
280 /* meh: kvm has pre_svm_run(svm); */
282 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
283 printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
285 fs_selector = rfs();
286 gs_selector = rgs();
287 ldt_selector = rldt();
288 printf("fs selector: %hx\n", fs_selector);
289 printf("gs selector: %hx\n", gs_selector);
290 printf("ldt selector: %hx\n", ldt_selector);
292 host_cr2 = rcr2();
294 host_dr6 = rdr6();
295 host_dr7 = rdr7();
297 vmcb->save.cr2 = vcpu->cr2;
298 /* meh: cr3? */
300 /* meh: dr7? db_regs? */
302 printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
303 printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
304 printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
305 printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
307 star = rdmsr(MSR_STAR);
308 lstar = rdmsr(MSR_LSTAR);
309 cstar = rdmsr(MSR_CSTAR);
310 sfmask = rdmsr(MSR_SF_MASK);
312 printf("CLGI...\n");
314 __asm __volatile (SVM_CLGI);
317 // enable_intr();
319 #define R "r"
320 __asm __volatile (
321 "push %%"R"bp; \n\t"
322 "mov %c[rbx](%[svm]), %%"R"bx \n\t"
323 "mov %c[rcx](%[svm]), %%"R"cx \n\t"
324 "mov %c[rdx](%[svm]), %%"R"dx \n\t"
325 "mov %c[rsi](%[svm]), %%"R"si \n\t"
326 "mov %c[rdi](%[svm]), %%"R"di \n\t"
327 "mov %c[rbp](%[svm]), %%"R"bp \n\t"
328 "mov %c[r8](%[svm]), %%r8 \n\t"
329 "mov %c[r9](%[svm]), %%r9 \n\t"
330 "mov %c[r10](%[svm]), %%r10 \n\t"
331 "mov %c[r11](%[svm]), %%r11 \n\t"
332 "mov %c[r12](%[svm]), %%r12 \n\t"
333 "mov %c[r13](%[svm]), %%r13 \n\t"
334 "mov %c[r14](%[svm]), %%r14 \n\t"
335 "mov %c[r15](%[svm]), %%r15 \n\t"
337 /* Enter guest mode */
338 "push %%"R"ax \n\t"
339 "mov %c[vmcb](%[svm]), %%"R"ax \n\t"
340 SVM_VMLOAD "\n\t"
341 SVM_VMRUN "\n\t"
342 SVM_VMSAVE "\n\t"
343 "pop %%"R"ax \n\t"
345 /* Save guest registers, load host registers */
346 "mov %%"R"bx, %c[rbx](%[svm]) \n\t"
347 "mov %%"R"cx, %c[rcx](%[svm]) \n\t"
348 "mov %%"R"dx, %c[rdx](%[svm]) \n\t"
349 "mov %%"R"si, %c[rsi](%[svm]) \n\t"
350 "mov %%"R"di, %c[rdi](%[svm]) \n\t"
351 "mov %%"R"bp, %c[rbp](%[svm]) \n\t"
352 "mov %%r8, %c[r8](%[svm]) \n\t"
353 "mov %%r9, %c[r9](%[svm]) \n\t"
354 "mov %%r10, %c[r10](%[svm]) \n\t"
355 "mov %%r11, %c[r11](%[svm]) \n\t"
356 "mov %%r12, %c[r12](%[svm]) \n\t"
357 "mov %%r13, %c[r13](%[svm]) \n\t"
358 "mov %%r14, %c[r14](%[svm]) \n\t"
359 "mov %%r15, %c[r15](%[svm]) \n\t"
360 "pop %%"R"bp"
362 : [svm]"a"(vcpu),
363 [vmcb]"i"(offsetof(struct vcpu, vmcb_pa)),
364 [rbx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBX])),
365 [rcx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RCX])),
366 [rdx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDX])),
367 [rsi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RSI])),
368 [rdi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDI])),
369 [rbp]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBP])),
370 [r8 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R8 ])),
371 [r9 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R9 ])),
372 [r10]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R10])),
373 [r11]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R11])),
374 [r12]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R12])),
375 [r13]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R13])),
376 [r14]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R14])),
377 [r15]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R15]))
378 : "cc", "memory",
379 R"bx", R"cx", R"dx", R"si", R"di",
380 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
384 /* meh: dr7? db_regs? */
386 vcpu->cr2 = vmcb->save.cr2;
388 vcpu->regs[VCPU_REGS_RAX] = vmcb->save.rax;
389 vcpu->regs[VCPU_REGS_RSP] = vmcb->save.rsp;
390 vcpu->regs[VCPU_REGS_RIP] = vmcb->save.rip;
392 load_dr6(host_dr6);
393 load_dr7(host_dr7);
395 load_cr2(host_cr2);
397 load_fs(fs_selector);
398 load_gs(gs_selector);
399 lldt(ldt_selector);
401 wrmsr(MSR_GSBASE, vcpu->host_gs_base);
403 tss_desc->sd_type = SDT_SYSTSS;
404 ltr(sel);
406 wrmsr(MSR_STAR, star);
407 wrmsr(MSR_LSTAR, lstar);
408 wrmsr(MSR_CSTAR, cstar);
409 wrmsr(MSR_SF_MASK, sfmask);
411 // disable_intr();
413 __asm __volatile (SVM_STGI);
415 printf("STGI\n");
417 printf("exit_code: %" PRIx64 "\n", vmcb->control.exit_code);
419 printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
420 printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
421 printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
422 printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
424 fs_selector = rfs();
425 gs_selector = rgs();
426 ldt_selector = rldt();
427 printf("fs selector: %hx\n", fs_selector);
428 printf("gs selector: %hx\n", gs_selector);
429 printf("ldt selector: %hx\n", ldt_selector);
431 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
432 printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
434 print_tss_desc(tss_desc);
435 print_tss(tss_desc);
437 printf("VMCB save area:\n");
438 printf("fs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
439 vmcb->save.fs.selector,
440 vmcb->save.fs.attrib,
441 vmcb->save.fs.limit,
442 vmcb->save.fs.base);
443 printf("gs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
444 vmcb->save.gs.selector,
445 vmcb->save.gs.attrib,
446 vmcb->save.gs.limit,
447 vmcb->save.gs.base);
448 printf("tr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
449 vmcb->save.tr.selector,
450 vmcb->save.tr.attrib,
451 vmcb->save.tr.limit,
452 vmcb->save.tr.base);
453 printf("ldtr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
454 vmcb->save.ldtr.selector,
455 vmcb->save.ldtr.attrib,
456 vmcb->save.ldtr.limit,
457 vmcb->save.ldtr.base);
458 printf("kernel_gs_base: %" PRIx64 "\n", vmcb->save.kernel_gs_base);
459 printf("star: %" PRIx64 "\n", vmcb->save.star);
460 printf("lstar: %" PRIx64 "\n", vmcb->save.lstar);
461 printf("cstar: %" PRIx64 "\n", vmcb->save.cstar);
462 printf("sfmask: %" PRIx64 "\n", vmcb->save.sfmask);
463 printf("sysenter_cs: %" PRIx64 "\n", vmcb->save.sysenter_cs);
464 printf("sysenter_esp: %" PRIx64 "\n", vmcb->save.sysenter_esp);
465 printf("sysenter_eip: %" PRIx64 "\n", vmcb->save.sysenter_eip);
466 printf("\n\n");
468 // enable_intr();
470 /* meh: next_rip */
473 static void
474 _fkvm_init_seg(struct vmcb_seg *seg, uint16_t attrib)
476 seg->selector = 0;
477 seg->attrib = VMCB_SELECTOR_P_MASK | attrib;
478 seg->limit = 0xffff;
479 seg->base = 0;
482 static inline void
483 fkvm_init_seg(struct vmcb_seg *seg)
485 _fkvm_init_seg(seg, VMCB_SELECTOR_S_MASK | VMCB_SELECTOR_WRITE_MASK);
488 static inline void
489 fkvm_init_sys_seg(struct vmcb_seg *seg, uint16_t attrib)
491 _fkvm_init_seg(seg, attrib);
494 static void
495 fkvm_iopm_init(void *iopm)
497 memset(iopm, 0xff, IOPM_SIZE); /* TODO: we may want to allow access to PC debug port */
500 static void
501 fkvm_msrpm_init(void *msrpm)
503 memset(msrpm, 0xff, MSRPM_SIZE); /* TODO: we may want to allow some MSR accesses */
506 static struct vmspace *sp;
508 static u_int64_t
509 fkvm_make_vm_map(void)
511 int rc;
512 vm_object_t obj = NULL;
514 sp = vmspace_alloc(0, 0xffffffffffffffff);
515 if (sp == NULL) {
516 printf("vmspace_alloc failed\n");
517 goto fail;
520 obj = vm_object_allocate(OBJT_DEFAULT, 0xffffffffffffffff >> PAGE_SHIFT);
522 vm_object_reference(obj);
523 rc = vm_map_insert(&sp->vm_map,
524 obj,
525 0, 0, 0xffffffffffffffff >> PAGE_SHIFT,
526 VM_PROT_ALL, VM_PROT_ALL,
528 if (rc != KERN_SUCCESS) {
529 printf("vm_map_insert failed: %d\n", rc);
530 vm_object_deallocate(obj);
531 goto fail;
534 return vtophys(vmspace_pmap(sp)->pm_pml4);
536 fail:
537 if (obj != NULL) {
538 vm_object_deallocate(obj);
539 obj = NULL;
541 if (sp != NULL) {
542 vmspace_free(sp);
543 sp = NULL;
545 return 0;
550 static void
551 fkvm_vmcb_init(struct vmcb *vmcb)
553 struct vmcb_control_area *control = &vmcb->control;
554 struct vmcb_save_area *save = &vmcb->save;
556 control->intercept_cr_reads = INTERCEPT_CR4_MASK;
558 control->intercept_cr_writes = INTERCEPT_CR4_MASK |
559 INTERCEPT_CR8_MASK;
561 control->intercept_dr_reads = INTERCEPT_DR0_MASK |
562 INTERCEPT_DR1_MASK |
563 INTERCEPT_DR2_MASK |
564 INTERCEPT_DR3_MASK;
566 control->intercept_dr_writes = INTERCEPT_DR0_MASK |
567 INTERCEPT_DR1_MASK |
568 INTERCEPT_DR2_MASK |
569 INTERCEPT_DR3_MASK |
570 INTERCEPT_DR5_MASK |
571 INTERCEPT_DR7_MASK;
573 control->intercept_exceptions = (1 << IDT_UD) | // Invalid Opcode
574 (1 << IDT_MC); // Machine Check
576 control->intercepts = INTERCEPT_INTR |
577 INTERCEPT_NMI |
578 INTERCEPT_SMI |
579 INTERCEPT_CPUID |
580 INTERCEPT_INVD |
581 INTERCEPT_HLT |
582 INTERCEPT_INVLPGA |
583 INTERCEPT_IOIO_PROT |
584 INTERCEPT_MSR_PROT |
585 INTERCEPT_SHUTDOWN |
586 INTERCEPT_VMRUN |
587 INTERCEPT_VMMCALL |
588 INTERCEPT_VMLOAD |
589 INTERCEPT_VMSAVE |
590 INTERCEPT_STGI |
591 INTERCEPT_CLGI |
592 INTERCEPT_SKINIT |
593 INTERCEPT_WBINVD |
594 INTERCEPT_MONITOR |
595 INTERCEPT_MWAIT_UNCOND;
597 control->iopm_base_pa = vtophys(iopm);
598 control->msrpm_base_pa = vtophys(msrpm);
599 control->tsc_offset = 0;
601 /* TODO: remove this once we assign asid's to distinct VM's */
602 control->guest_asid = 1;
603 control->tlb_control = VMCB_TLB_CONTROL_FLUSH_ALL;
605 /* let v_tpr default to 0 */
606 /* let v_irq default to 0 */
607 /* let v_intr default to 0 */
609 control->v_intr_masking = 1;
611 /* let v_intr_vector default to 0 */
612 /* let intr_shadow default to 0 */
613 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
614 exit_int_info_err_code default to 0 */
616 control->nested_ctl = 1;
618 /* let event_inj default to 0 */
620 // (nested_cr3 is later)
622 /* let lbr_virt_enable default to 0 */
625 fkvm_init_seg(&save->es);
626 fkvm_init_seg(&save->ss);
627 fkvm_init_seg(&save->ds);
628 fkvm_init_seg(&save->fs);
629 fkvm_init_seg(&save->gs);
631 _fkvm_init_seg(&save->cs, VMCB_SELECTOR_READ_MASK | VMCB_SELECTOR_S_MASK |
632 VMCB_SELECTOR_CODE_MASK);
633 save->cs.selector = 0xf000;
634 save->cs.base = 0xffff0000;
636 save->gdtr.limit = 0xffff;
637 save->idtr.limit = 0xffff;
639 fkvm_init_sys_seg(&save->ldtr, SDT_SYSLDT);
640 fkvm_init_sys_seg(&save->tr, SDT_SYS286BSY);
642 save->g_pat = PAT_VALUE(PAT_WRITE_BACK, 0) | PAT_VALUE(PAT_WRITE_THROUGH, 1) |
643 PAT_VALUE(PAT_UNCACHED, 2) | PAT_VALUE(PAT_UNCACHEABLE, 3) |
644 PAT_VALUE(PAT_WRITE_BACK, 4) | PAT_VALUE(PAT_WRITE_THROUGH, 5) |
645 PAT_VALUE(PAT_UNCACHED, 6) | PAT_VALUE(PAT_UNCACHEABLE, 7);
647 /* CR0_ET is forced to 1 by processor */
648 save->cr0 = CR0_ET;
649 save->dr6 = 0xffff0ff0;
650 save->dr7 = 0x400;
651 //save->rflags = 2; /* It seems like bit 1 is reserved. This line makes no sense. */
652 save->rip = 0x0000fff0;
654 save->efer = 0x0000000000001000;
656 control->nested_cr3 = fkvm_make_vm_map();
657 printf("ncr3: %" PRIx64 "\n", control->nested_cr3);
661 vm_object_t obj = NULL;
663 int fkvm_userpoke(void *data);
666 fkvm_userpoke(void *data)
668 printf("fkvm_userpoke\n");
670 /* VMRUN */
671 fkvm_vcpu_run(&vcpu, vmcb);
673 switch (vmcb->control.exit_code) {
674 case VMCB_EXIT_NPF: {
675 /* EXITINFO1 contains fault error code */
676 /* EXITINFO2 contains the guest physical address causing the fault. */
678 u_int64_t fault_code;
679 u_int64_t fault_gpa;
681 vm_prot_t fault_type;
682 int fault_flags;
683 int rc;
685 fault_code = vmcb->control.exit_info_1;
686 fault_gpa = vmcb->control.exit_info_2;
688 printf("VMCB_EXIT_NPF:\n");
689 printf("gpa=0x%" PRIx64 "\n", fault_gpa);
690 printf("fault code=0x%" PRIx64 " [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
691 fault_code,
692 (fault_code & PGEX_P) != 0,
693 (fault_code & PGEX_W) != 0,
694 (fault_code & PGEX_U) != 0,
695 (fault_code & PGEX_I) != 0);
697 if (fault_code & PGEX_W)
698 fault_type = VM_PROT_WRITE;
699 else if (fault_code & PGEX_I)
700 fault_type = VM_PROT_EXECUTE;
701 else
702 fault_type = VM_PROT_READ;
704 fault_flags = 0; /* TODO: is that right? */
705 rc = vm_fault(&sp->vm_map, fault_gpa, fault_type, fault_flags);
706 if (rc != KERN_SUCCESS)
707 printf("vm_fault failed: %d\n", rc);
708 break;
710 default:
711 printf("Unhandled vmexit, code=0x%" PRIx64 "\n", vmcb->control.exit_code);
714 return 1;
717 static void
718 fkvm_load(void *unused)
720 u_int64_t efer;
722 printf("fkvm_load\n");
723 printf("sizeof(struct vmcb) = %" PRIx64 "\n", sizeof(struct vmcb));
725 /* TODO: check for the presense of extensions */
727 hsave_area = contigmalloc(PAGE_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
728 if(hsave_area == NULL)
729 return;
731 vmcb = (struct vmcb *)contigmalloc(PAGE_SIZE, M_DEVBUF, M_ZERO, 0, -1UL, PAGE_SIZE, 0);
732 if(vmcb == NULL)
733 goto errout0;
735 iopm = contigmalloc(IOPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
736 if(iopm == NULL)
737 goto errout1;
739 msrpm = contigmalloc(MSRPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
740 if(msrpm == NULL)
741 goto errout2;
743 /* Initialize iopm and msrpm */
744 fkvm_iopm_init(iopm);
745 fkvm_msrpm_init(msrpm);
747 /* Initialize VMCB */
748 fkvm_vmcb_init(vmcb);
750 /* Enable SVM in EFER */
751 efer = rdmsr(MSR_EFER);
752 printf("EFER = %" PRIx64 "\n", efer);
753 wrmsr(MSR_EFER, efer | EFER_SVME);
754 efer = rdmsr(MSR_EFER);
755 printf("new EFER = %" PRIx64 "\n", efer);
757 /* Write Host save address in MSR_VM_HSAVE_PA */
758 wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave_area));
760 return;
762 errout2:
763 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
764 iopm = NULL;
765 errout1:
766 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
767 vmcb = NULL;
768 errout0:
769 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
770 hsave_area = NULL;
772 SYSINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_load, NULL);
774 static void
775 fkvm_unload(void *unused)
777 printf("fkvm_unload\n");
778 /* TODO */
780 if(msrpm != NULL)
781 contigfree(msrpm, MSRPM_SIZE, M_DEVBUF);
783 if(iopm != NULL)
784 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
786 if(vmcb != NULL)
787 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
789 if(hsave_area != NULL)
790 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
792 SYSUNINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_unload, NULL);