set efer in guest to get past vmrun exit code -1
[freebsd-src/fkvm-freebsd.git] / sys / kern / kern_fkvm.c
blobf9643f3c70f084e85a956385928875f488b5dd29
1 /*-
2 * Copyright (c) 2008 The FreeBSD Project
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
32 #include <vm/vm.h>
33 #include <vm/pmap.h>
34 #include <vm/vm_extern.h>
35 #include <vm/vm_map.h>
36 #include <vm/vm_object.h>
37 #include <vm/vm_param.h>
38 #include <machine/_inttypes.h>
39 #include <machine/specialreg.h>
40 #include <machine/segments.h>
41 #include <machine/vmcb.h>
43 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
44 #define MSRPM_SIZE (8*1024)
46 /* fkvm data */
47 static void *iopm = NULL; /* Should I allocate a vm_object_t instead? */
48 static void * msrpm = NULL; /* Should I allocate a vm_object_t instead? */
50 /* per-guest data */
52 /* VCPU data */
53 static void *hsave_area = NULL;
54 static struct vmcb *vmcb = NULL;
56 enum {
57 VCPU_REGS_RAX = 0,
58 VCPU_REGS_RCX = 1,
59 VCPU_REGS_RDX = 2,
60 VCPU_REGS_RBX = 3,
61 VCPU_REGS_RSP = 4,
62 VCPU_REGS_RBP = 5,
63 VCPU_REGS_RSI = 6,
64 VCPU_REGS_RDI = 7,
65 VCPU_REGS_R8 = 8,
66 VCPU_REGS_R9 = 9,
67 VCPU_REGS_R10 = 10,
68 VCPU_REGS_R11 = 11,
69 VCPU_REGS_R12 = 12,
70 VCPU_REGS_R13 = 13,
71 VCPU_REGS_R14 = 14,
72 VCPU_REGS_R15 = 15,
73 VCPU_REGS_RIP,
74 NR_VCPU_REGS
77 static struct vcpu {
78 unsigned long vmcb_pa;
79 unsigned long regs[NR_VCPU_REGS];
80 u_int64_t host_gs_base;
81 u_int64_t cr2;
82 u_int64_t cr3;
83 } vcpu;
85 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
86 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
87 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
88 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
89 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
90 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
92 static void
93 print_tss_desc(struct system_segment_descriptor *tss_desc)
95 printf("TSS desc @ %p:\n", tss_desc);
96 printf("sd_lolimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lolimit);
97 printf("sd_lobase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lobase);
98 printf("sd_type: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_type);
99 printf("sd_dpl: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_dpl);
100 printf("sd_p: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_p);
101 printf("sd_hilimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hilimit);
102 printf("sd_xx0: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx0);
103 printf("sd_gran: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_gran);
104 printf("sd_hibase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hibase);
105 printf("sd_xx1: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx1);
106 printf("sd_mbz: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_mbz);
107 printf("sd_xx2: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx2);
108 printf("\n\n");
111 static void
112 print_tss(struct system_segment_descriptor *tss_desc)
114 u_int32_t *base;
115 int limit;
116 int i;
118 base = (u_int32_t*) ((((u_int64_t) tss_desc->sd_hibase) << 24) | ((u_int64_t) tss_desc->sd_lobase));
119 limit = ((tss_desc->sd_hilimit << 16) | tss_desc->sd_lolimit) / 4;
121 printf("TSS: @ %p\n", base);
122 for (i = 0; i <= limit; i++)
123 printf("%x: 0x%" PRIx32 "\n", i, base[i]);
124 printf("\n\n");
127 static int
128 vmrun_assert(struct vmcb *vmcb)
130 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
132 int bad;
134 bad = 0;
136 // The following are illegal:
138 //EFER.SVME is zero.
139 A((vmcb->save.efer & 0x0000000000001000) == 0);
141 // CR0.CD is zero and CR0.NW is set
142 A( ((vmcb->save.cr0 & 0x0000000040000000) == 0) &&
143 ((vmcb->save.cr0 & 0x0000000020000000) != 0));
145 // CR0[63:32] are not zero.
146 A((vmcb->save.cr0 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
148 // Any MBZ bit of CR3 is set.
149 A((vmcb->save.cr3 & 0xFFF0000000000000) != 0);
151 // CR4[63:11] are not zero.
152 A((vmcb->save.cr4 & 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
154 // DR6[63:32] are not zero.
155 A((vmcb->save.dr6 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
157 // DR7[63:32] are not zero.
158 A((vmcb->save.dr7 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
160 // EFER[63:15] are not zero.
161 A((vmcb->save.efer & 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
163 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
164 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
166 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
167 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
168 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
169 ((vmcb->save.cr4 & 0x0000000000000020) != 0));
171 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
172 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
173 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
174 ((vmcb->save.cr0 & 0x0000000000000001) == 0));
176 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
177 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
178 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
179 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
180 ((vmcb->save.cr4 & 0x0000000000000020) != 0) &&
181 ((vmcb->save.cs.attrib & 0x0200) != 0) &&
182 ((vmcb->save.cs.attrib & 0x0400) != 0));
184 // The VMRUN intercept bit is clear.
185 A((vmcb->control.intercepts & 0x0000000100000000) == 0);
187 // The MSR or IOIO intercept tables extend to a physical address that is
188 // greater than or equal to the maximum supported physical address.
190 // Illegal event injection (see Section 15.19 on page 391).
192 // ASID is equal to zero.
193 A(vmcb->control.guest_asid == 0);
195 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
196 // combination that is otherwise illegal (see Section 15.18).
198 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
199 // sign-extend to 63 bits) all base addresses in the segment registers
200 // that have been loaded.
202 return bad;
204 #undef A
207 static void
208 fkvm_vcpu_run(struct vcpu *vcpu, struct vmcb *vmcb)
210 u_int64_t lstar;
211 u_int64_t cstar;
212 u_int64_t star;
213 u_int64_t sfmask;
215 u_short fs_selector;
216 u_short gs_selector;
217 u_short ldt_selector;
219 unsigned long host_cr2;
220 unsigned long host_dr6;
221 unsigned long host_dr7;
223 struct system_segment_descriptor *tss_desc;
224 u_int64_t sel;
226 printf("begin fkvm_vcpu_run\n");
228 if (vmrun_assert(vmcb))
229 return;
232 tss_desc = (struct system_segment_descriptor*) (&gdt[GPROC0_SEL]);
233 sel = GSEL(GPROC0_SEL, SEL_KPL);
235 printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
236 print_tss_desc(tss_desc);
237 print_tss(tss_desc);
239 printf("VMCB save area:\n");
240 printf("fs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
241 vmcb->save.fs.selector,
242 vmcb->save.fs.attrib,
243 vmcb->save.fs.limit,
244 vmcb->save.fs.base);
245 printf("gs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
246 vmcb->save.gs.selector,
247 vmcb->save.gs.attrib,
248 vmcb->save.gs.limit,
249 vmcb->save.gs.base);
250 printf("tr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
251 vmcb->save.tr.selector,
252 vmcb->save.tr.attrib,
253 vmcb->save.tr.limit,
254 vmcb->save.tr.base);
255 printf("ldtr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
256 vmcb->save.ldtr.selector,
257 vmcb->save.ldtr.attrib,
258 vmcb->save.ldtr.limit,
259 vmcb->save.ldtr.base);
260 printf("kernel_gs_base: %" PRIx64 "\n", vmcb->save.kernel_gs_base);
261 printf("star: %" PRIx64 "\n", vmcb->save.star);
262 printf("lstar: %" PRIx64 "\n", vmcb->save.lstar);
263 printf("cstar: %" PRIx64 "\n", vmcb->save.cstar);
264 printf("sfmask: %" PRIx64 "\n", vmcb->save.sfmask);
265 printf("sysenter_cs: %" PRIx64 "\n", vmcb->save.sysenter_cs);
266 printf("sysenter_esp: %" PRIx64 "\n", vmcb->save.sysenter_esp);
267 printf("sysenter_eip: %" PRIx64 "\n", vmcb->save.sysenter_eip);
268 printf("\n\n");
270 // disable_intr();
272 vcpu->vmcb_pa = vtophys(vmcb);
273 printf("vmcb = 0x%p\n", vmcb);
274 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu->vmcb_pa);
276 vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
277 vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
278 vmcb->save.rip = vcpu->regs[VCPU_REGS_RIP];
280 /* meh: kvm has pre_svm_run(svm); */
282 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
283 printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
285 fs_selector = rfs();
286 gs_selector = rgs();
287 ldt_selector = rldt();
288 printf("fs selector: %hx\n", fs_selector);
289 printf("gs selector: %hx\n", gs_selector);
290 printf("ldt selector: %hx\n", ldt_selector);
292 host_cr2 = rcr2();
294 host_dr6 = rdr6();
295 host_dr7 = rdr7();
297 vmcb->save.cr2 = vcpu->cr2;
298 /* meh: cr3? */
300 /* meh: dr7? db_regs? */
302 printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
303 printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
304 printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
305 printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
307 star = rdmsr(MSR_STAR);
308 lstar = rdmsr(MSR_LSTAR);
309 cstar = rdmsr(MSR_CSTAR);
310 sfmask = rdmsr(MSR_SF_MASK);
312 printf("CLGI...\n");
314 __asm __volatile (SVM_CLGI);
317 // enable_intr();
319 #define R "r"
320 __asm __volatile (
321 "push %%"R"bp; \n\t"
322 "mov %c[rbx](%[svm]), %%"R"bx \n\t"
323 "mov %c[rcx](%[svm]), %%"R"cx \n\t"
324 "mov %c[rdx](%[svm]), %%"R"dx \n\t"
325 "mov %c[rsi](%[svm]), %%"R"si \n\t"
326 "mov %c[rdi](%[svm]), %%"R"di \n\t"
327 "mov %c[rbp](%[svm]), %%"R"bp \n\t"
328 "mov %c[r8](%[svm]), %%r8 \n\t"
329 "mov %c[r9](%[svm]), %%r9 \n\t"
330 "mov %c[r10](%[svm]), %%r10 \n\t"
331 "mov %c[r11](%[svm]), %%r11 \n\t"
332 "mov %c[r12](%[svm]), %%r12 \n\t"
333 "mov %c[r13](%[svm]), %%r13 \n\t"
334 "mov %c[r14](%[svm]), %%r14 \n\t"
335 "mov %c[r15](%[svm]), %%r15 \n\t"
337 /* Enter guest mode */
338 "push %%"R"ax \n\t"
339 "mov %c[vmcb](%[svm]), %%"R"ax \n\t"
340 SVM_VMLOAD "\n\t"
341 SVM_VMRUN "\n\t"
342 SVM_VMSAVE "\n\t"
343 "pop %%"R"ax \n\t"
345 /* Save guest registers, load host registers */
346 "mov %%"R"bx, %c[rbx](%[svm]) \n\t"
347 "mov %%"R"cx, %c[rcx](%[svm]) \n\t"
348 "mov %%"R"dx, %c[rdx](%[svm]) \n\t"
349 "mov %%"R"si, %c[rsi](%[svm]) \n\t"
350 "mov %%"R"di, %c[rdi](%[svm]) \n\t"
351 "mov %%"R"bp, %c[rbp](%[svm]) \n\t"
352 "mov %%r8, %c[r8](%[svm]) \n\t"
353 "mov %%r9, %c[r9](%[svm]) \n\t"
354 "mov %%r10, %c[r10](%[svm]) \n\t"
355 "mov %%r11, %c[r11](%[svm]) \n\t"
356 "mov %%r12, %c[r12](%[svm]) \n\t"
357 "mov %%r13, %c[r13](%[svm]) \n\t"
358 "mov %%r14, %c[r14](%[svm]) \n\t"
359 "mov %%r15, %c[r15](%[svm]) \n\t"
360 "pop %%"R"bp"
362 : [svm]"a"(vcpu),
363 [vmcb]"i"(offsetof(struct vcpu, vmcb_pa)),
364 [rbx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBX])),
365 [rcx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RCX])),
366 [rdx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDX])),
367 [rsi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RSI])),
368 [rdi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDI])),
369 [rbp]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBP])),
370 [r8 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R8 ])),
371 [r9 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R9 ])),
372 [r10]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R10])),
373 [r11]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R11])),
374 [r12]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R12])),
375 [r13]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R13])),
376 [r14]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R14])),
377 [r15]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R15]))
378 : "cc", "memory",
379 R"bx", R"cx", R"dx", R"si", R"di",
380 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
384 /* meh: dr7? db_regs? */
386 vcpu->cr2 = vmcb->save.cr2;
388 vcpu->regs[VCPU_REGS_RAX] = vmcb->save.rax;
389 vcpu->regs[VCPU_REGS_RSP] = vmcb->save.rsp;
390 vcpu->regs[VCPU_REGS_RIP] = vmcb->save.rip;
392 load_dr6(host_dr6);
393 load_dr7(host_dr7);
395 load_cr2(host_cr2);
397 load_fs(fs_selector);
398 load_gs(gs_selector);
399 lldt(ldt_selector);
401 wrmsr(MSR_GSBASE, vcpu->host_gs_base);
403 tss_desc->sd_type = SDT_SYSTSS;
404 ltr(sel);
406 wrmsr(MSR_STAR, star);
407 wrmsr(MSR_LSTAR, lstar);
408 wrmsr(MSR_CSTAR, cstar);
409 wrmsr(MSR_SF_MASK, sfmask);
411 // disable_intr();
413 __asm __volatile (SVM_STGI);
415 printf("STGI\n");
417 printf("exit_code: %" PRIx64 "\n", vmcb->control.exit_code);
419 printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
420 printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
421 printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
422 printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
424 fs_selector = rfs();
425 gs_selector = rgs();
426 ldt_selector = rldt();
427 printf("fs selector: %hx\n", fs_selector);
428 printf("gs selector: %hx\n", gs_selector);
429 printf("ldt selector: %hx\n", ldt_selector);
431 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
432 printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
434 print_tss_desc(tss_desc);
435 print_tss(tss_desc);
437 printf("VMCB save area:\n");
438 printf("fs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
439 vmcb->save.fs.selector,
440 vmcb->save.fs.attrib,
441 vmcb->save.fs.limit,
442 vmcb->save.fs.base);
443 printf("gs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
444 vmcb->save.gs.selector,
445 vmcb->save.gs.attrib,
446 vmcb->save.gs.limit,
447 vmcb->save.gs.base);
448 printf("tr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
449 vmcb->save.tr.selector,
450 vmcb->save.tr.attrib,
451 vmcb->save.tr.limit,
452 vmcb->save.tr.base);
453 printf("ldtr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
454 vmcb->save.ldtr.selector,
455 vmcb->save.ldtr.attrib,
456 vmcb->save.ldtr.limit,
457 vmcb->save.ldtr.base);
458 printf("kernel_gs_base: %" PRIx64 "\n", vmcb->save.kernel_gs_base);
459 printf("star: %" PRIx64 "\n", vmcb->save.star);
460 printf("lstar: %" PRIx64 "\n", vmcb->save.lstar);
461 printf("cstar: %" PRIx64 "\n", vmcb->save.cstar);
462 printf("sfmask: %" PRIx64 "\n", vmcb->save.sfmask);
463 printf("sysenter_cs: %" PRIx64 "\n", vmcb->save.sysenter_cs);
464 printf("sysenter_esp: %" PRIx64 "\n", vmcb->save.sysenter_esp);
465 printf("sysenter_eip: %" PRIx64 "\n", vmcb->save.sysenter_eip);
466 printf("\n\n");
468 // enable_intr();
470 /* meh: next_rip */
473 static void
474 _fkvm_init_seg(struct vmcb_seg *seg, uint16_t attrib)
476 seg->selector = 0;
477 seg->attrib = VMCB_SELECTOR_P_MASK | attrib;
478 seg->limit = 0xffff;
479 seg->base = 0;
482 static inline void
483 fkvm_init_seg(struct vmcb_seg *seg)
485 _fkvm_init_seg(seg, VMCB_SELECTOR_S_MASK | VMCB_SELECTOR_WRITE_MASK);
488 static inline void
489 fkvm_init_sys_seg(struct vmcb_seg *seg, uint16_t attrib)
491 _fkvm_init_seg(seg, attrib);
494 static void
495 fkvm_iopm_init(void *iopm)
497 memset(iopm, 0xff, IOPM_SIZE); /* TODO: we may want to allow access to PC debug port */
500 static void
501 fkvm_msrpm_init(void *msrpm)
503 memset(msrpm, 0xff, MSRPM_SIZE); /* TODO: we may want to allow some MSR accesses */
506 static u_int64_t
507 fkvm_make_vm_map(void)
509 int rc;
510 struct vmspace *sp = NULL;
511 vm_object_t obj = NULL;
513 sp = vmspace_alloc(0, 0xffffffffffffffff);
514 if (sp == NULL) {
515 printf("vmspace_alloc failed\n");
516 goto fail;
519 obj = vm_object_allocate(OBJT_DEFAULT, 0xffffffffffffffff >> PAGE_SHIFT);
521 vm_object_reference(obj);
522 rc = vm_map_insert(&sp->vm_map,
523 obj,
524 0, 0, 0xffffffffffffffff >> PAGE_SHIFT,
525 VM_PROT_ALL, VM_PROT_ALL,
527 if (rc != KERN_SUCCESS) {
528 printf("vm_map_insert failed: %d\n", rc);
529 vm_object_deallocate(obj);
530 goto fail;
533 return vtophys(vmspace_pmap(sp)->pm_pml4);
535 fail:
536 if (obj != NULL) {
537 vm_object_deallocate(obj);
538 obj = NULL;
540 if (sp != NULL) {
541 vmspace_free(sp);
542 sp = NULL;
544 return 0;
549 static void
550 fkvm_vmcb_init(struct vmcb *vmcb)
552 struct vmcb_control_area *control = &vmcb->control;
553 struct vmcb_save_area *save = &vmcb->save;
555 control->intercept_cr_reads = INTERCEPT_CR4_MASK;
557 control->intercept_cr_writes = INTERCEPT_CR4_MASK |
558 INTERCEPT_CR8_MASK;
560 control->intercept_dr_reads = INTERCEPT_DR0_MASK |
561 INTERCEPT_DR1_MASK |
562 INTERCEPT_DR2_MASK |
563 INTERCEPT_DR3_MASK;
565 control->intercept_dr_writes = INTERCEPT_DR0_MASK |
566 INTERCEPT_DR1_MASK |
567 INTERCEPT_DR2_MASK |
568 INTERCEPT_DR3_MASK |
569 INTERCEPT_DR5_MASK |
570 INTERCEPT_DR7_MASK;
572 control->intercept_exceptions = (1 << IDT_UD) | // Invalid Opcode
573 (1 << IDT_MC); // Machine Check
575 control->intercepts = INTERCEPT_INTR |
576 INTERCEPT_NMI |
577 INTERCEPT_SMI |
578 INTERCEPT_CPUID |
579 INTERCEPT_INVD |
580 INTERCEPT_HLT |
581 INTERCEPT_INVLPGA |
582 INTERCEPT_IOIO_PROT |
583 INTERCEPT_MSR_PROT |
584 INTERCEPT_SHUTDOWN |
585 INTERCEPT_VMRUN |
586 INTERCEPT_VMMCALL |
587 INTERCEPT_VMLOAD |
588 INTERCEPT_VMSAVE |
589 INTERCEPT_STGI |
590 INTERCEPT_CLGI |
591 INTERCEPT_SKINIT |
592 INTERCEPT_WBINVD |
593 INTERCEPT_MONITOR |
594 INTERCEPT_MWAIT_UNCOND;
596 control->iopm_base_pa = vtophys(iopm);
597 control->msrpm_base_pa = vtophys(msrpm);
598 control->tsc_offset = 0;
600 /* TODO: remove this once we assign asid's to distinct VM's */
601 control->guest_asid = 1;
602 control->tlb_control = VMCB_TLB_CONTROL_FLUSH_ALL;
604 /* let v_tpr default to 0 */
605 /* let v_irq default to 0 */
606 /* let v_intr default to 0 */
608 control->v_intr_masking = 1;
610 /* let v_intr_vector default to 0 */
611 /* let intr_shadow default to 0 */
612 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
613 exit_int_info_err_code default to 0 */
615 control->nested_ctl = 1;
617 /* let event_inj default to 0 */
619 // (nested_cr3 is later)
621 /* let lbr_virt_enable default to 0 */
624 fkvm_init_seg(&save->es);
625 fkvm_init_seg(&save->ss);
626 fkvm_init_seg(&save->ds);
627 fkvm_init_seg(&save->fs);
628 fkvm_init_seg(&save->gs);
630 _fkvm_init_seg(&save->cs, VMCB_SELECTOR_READ_MASK | VMCB_SELECTOR_S_MASK |
631 VMCB_SELECTOR_CODE_MASK);
632 save->cs.selector = 0xf000;
633 save->cs.base = 0xffff0000;
635 save->gdtr.limit = 0xffff;
636 save->idtr.limit = 0xffff;
638 fkvm_init_sys_seg(&save->ldtr, SDT_SYSLDT);
639 fkvm_init_sys_seg(&save->tr, SDT_SYS286BSY);
641 save->g_pat = PAT_VALUE(PAT_WRITE_BACK, 0) | PAT_VALUE(PAT_WRITE_THROUGH, 1) |
642 PAT_VALUE(PAT_UNCACHED, 2) | PAT_VALUE(PAT_UNCACHEABLE, 3) |
643 PAT_VALUE(PAT_WRITE_BACK, 4) | PAT_VALUE(PAT_WRITE_THROUGH, 5) |
644 PAT_VALUE(PAT_UNCACHED, 6) | PAT_VALUE(PAT_UNCACHEABLE, 7);
646 /* CR0_ET is forced to 1 by processor */
647 save->cr0 = CR0_ET;
648 save->dr6 = 0xffff0ff0;
649 save->dr7 = 0x400;
650 //save->rflags = 2; /* It seems like bit 1 is reserved. This line makes no sense. */
651 save->rip = 0x0000fff0;
653 save->efer = 0x0000000000001000;
655 control->nested_cr3 = fkvm_make_vm_map();
656 printf("ncr3: %" PRIx64 "\n", control->nested_cr3);
660 struct vmspace *sp = NULL;
661 vm_object_t obj = NULL;
663 int fkvm_userpoke(void *data);
666 fkvm_userpoke(void *data)
668 printf("fkvm_userpoke\n");
670 /* VMRUN */
671 fkvm_vcpu_run(&vcpu, vmcb);
673 return 1;
676 static void
677 fkvm_load(void *unused)
679 u_int64_t efer;
681 printf("fkvm_load\n");
682 printf("sizeof(struct vmcb) = %" PRIx64 "\n", sizeof(struct vmcb));
684 /* TODO: check for the presense of extensions */
686 hsave_area = contigmalloc(PAGE_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
687 if(hsave_area == NULL)
688 return;
690 vmcb = (struct vmcb *)contigmalloc(PAGE_SIZE, M_DEVBUF, M_ZERO, 0, -1UL, PAGE_SIZE, 0);
691 if(vmcb == NULL)
692 goto errout0;
694 iopm = contigmalloc(IOPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
695 if(iopm == NULL)
696 goto errout1;
698 msrpm = contigmalloc(MSRPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
699 if(msrpm == NULL)
700 goto errout2;
702 /* Initialize iopm and msrpm */
703 fkvm_iopm_init(iopm);
704 fkvm_msrpm_init(msrpm);
706 /* Initialize VMCB */
707 fkvm_vmcb_init(vmcb);
709 /* Enable SVM in EFER */
710 efer = rdmsr(MSR_EFER);
711 printf("EFER = %" PRIx64 "\n", efer);
712 wrmsr(MSR_EFER, efer | EFER_SVME);
713 efer = rdmsr(MSR_EFER);
714 printf("new EFER = %" PRIx64 "\n", efer);
716 /* Write Host save address in MSR_VM_HSAVE_PA */
717 wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave_area));
719 return;
721 errout2:
722 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
723 iopm = NULL;
724 errout1:
725 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
726 vmcb = NULL;
727 errout0:
728 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
729 hsave_area = NULL;
731 SYSINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_load, NULL);
733 static void
734 fkvm_unload(void *unused)
736 printf("fkvm_unload\n");
737 /* TODO */
739 if(msrpm != NULL)
740 contigfree(msrpm, MSRPM_SIZE, M_DEVBUF);
742 if(iopm != NULL)
743 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
745 if(vmcb != NULL)
746 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
748 if(hsave_area != NULL)
749 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
751 SYSUNINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_unload, NULL);