PROC_GET_GUESTVM: push NULL warning out to callers
[freebsd-src/fkvm-freebsd.git] / sys / kern / kern_fkvm.c
blob591e0ebc6ca4984c91363c46ef381bf2735528dd
1 /*-
2 * Copyright (c) 2008 The FreeBSD Project
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
27 #include <sys/fkvm.h>
28 #include <sys/cdefs.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/malloc.h>
33 #include <sys/sysproto.h>
34 #include <sys/file.h>
35 #include <sys/mman.h>
36 #include <sys/proc.h>
37 #include <sys/eventhandler.h>
38 #include <vm/vm.h>
39 #include <vm/pmap.h>
40 #include <vm/vm_extern.h>
41 #include <vm/vm_map.h>
42 #include <vm/vm_object.h>
43 #include <vm/vm_param.h>
44 #include <machine/_inttypes.h>
45 #include <machine/specialreg.h>
46 #include <machine/segments.h>
47 #include <machine/vmcb.h>
50 /* Definitions for Port IO */
51 #define PORT_SHIFT 16
52 #define ADDR_SHIFT 7
53 #define SIZE_SHIFT 4
54 #define REP_SHIFT 3
55 #define STR_SHIFT 2
56 #define TYPE_SHIFT 0
58 #define PORT_MASK 0xFFFF0000
59 #define ADDR_MASK (7 << ADDR_SHIFT)
60 #define SIZE_MASK (7 << SIZE_SHIFT)
61 #define REP_MASK (1 << REP_SHIFT)
62 #define STR_MASK (1 << STR_SHIFT)
63 #define TYPE_MASK (1 << TYPE_SHIFT)
64 /* End Definitions for Port IO */
66 #define PMIO_PAGE_OFFSET 1
68 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
69 #define MSRPM_SIZE (8*1024)
71 /* fkvm data */
73 static int fkvm_loaded = 0;
75 static void *iopm = NULL; /* Should I allocate a vm_object_t instead? */
76 static void *msrpm = NULL; /* Should I allocate a vm_object_t instead? */
78 static void *hsave_area = NULL;
80 static eventhandler_tag exit_tag;
82 /* per-guest data */
84 enum {
85 VCPU_REGS_RAX = 0,
86 VCPU_REGS_RCX = 1,
87 VCPU_REGS_RDX = 2,
88 VCPU_REGS_RBX = 3,
89 VCPU_REGS_RSP = 4,
90 VCPU_REGS_RBP = 5,
91 VCPU_REGS_RSI = 6,
92 VCPU_REGS_RDI = 7,
93 VCPU_REGS_R8 = 8,
94 VCPU_REGS_R9 = 9,
95 VCPU_REGS_R10 = 10,
96 VCPU_REGS_R11 = 11,
97 VCPU_REGS_R12 = 12,
98 VCPU_REGS_R13 = 13,
99 VCPU_REGS_R14 = 14,
100 VCPU_REGS_R15 = 15,
101 VCPU_REGS_RIP,
102 NR_VCPU_REGS
105 struct vcpu {
106 /* VCPU data */
107 struct vmcb *vmcb;
108 unsigned long vmcb_pa;
110 unsigned long regs[NR_VCPU_REGS];
111 u_int64_t host_gs_base;
112 u_int64_t cr2;
113 u_int64_t cr3;
115 struct guestvm *guest_vm;
118 struct guestvm {
119 struct vcpu *vcpus[MAX_VCPUS];
120 int nr_vcpus;
122 struct vmspace *sp;
123 u_int64_t nested_cr3;
127 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
128 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
129 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
130 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
131 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
132 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
134 static inline struct vcpu *
135 TD_GET_VCPU(struct thread *td)
137 struct vcpu *vcpu;
138 vcpu = td->vcpu;
139 if (vcpu == NULL)
140 printf("TD_GET_VCPU -> NULL\n");
141 return vcpu;
144 static inline void
145 TD_SET_VCPU(struct thread *td, struct vcpu *vcpu)
147 td->vcpu = vcpu;
150 static inline struct guestvm *
151 PROC_GET_GUESTVM(struct proc *proc)
153 struct guestvm *guestvm;
154 guestvm = proc->p_guestvm;
155 return guestvm;
158 static inline void
159 PROC_SET_GUESTVM(struct proc *proc, struct guestvm *guestvm)
161 proc->p_guestvm = guestvm; \
164 static void
165 print_vmcb_seg(struct vmcb_seg* vmcb_seg, const char* name)
167 printf("%s Selector\n", name);
168 printf("Selector : %" PRIx16 "\n", vmcb_seg->selector);
169 printf("Attributes : %" PRIx16 "\n", vmcb_seg->attrib);
170 printf("Limit : %" PRIx32 "\n", vmcb_seg->limit);
171 printf("Base Address : %" PRIx64 "\n", vmcb_seg->base);
172 printf("\n");
175 static void
176 print_vmcb(struct vmcb *vmcb)
178 printf("VMCB Control Area\n");
179 printf("Intercept CR Reads : %" PRIx16 "\n", vmcb->control.intercept_cr_reads);
180 printf("Intercept CR Writes : %" PRIx16 "\n", vmcb->control.intercept_cr_writes);
181 printf("Intercept DR Reads : %" PRIx16 "\n", vmcb->control.intercept_dr_reads);
182 printf("Intercept DR Writes : %" PRIx16 "\n", vmcb->control.intercept_dr_writes);
183 printf("Intercept Exceptions : %" PRIx32 "\n", vmcb->control.intercept_exceptions);
184 printf("Intercepts : %" PRIx64 "\n", vmcb->control.intercepts);
185 printf("Reserved 1: \n");
186 for(int i=0; i < 44; i++) {
187 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
189 printf("\n");
190 printf("IOPM Base PA : %" PRIx64 "\n", vmcb->control.iopm_base_pa);
191 printf("MSRPM Base PA : %" PRIx64 "\n", vmcb->control.msrpm_base_pa);
192 printf("TSC Offset : %" PRIx64 "\n", vmcb->control.tsc_offset);
193 printf("Guest ASID : %" PRIx32 "\n", vmcb->control.guest_asid);
194 printf("TLB Control : %" PRIx8 "\n", vmcb->control.tlb_control);
195 printf("Reserved 2 : \n");
196 for(int i=0; i < 3; i++) {
197 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
199 printf("\n");
200 printf("Virtual TPR : %" PRIx8 "\n", vmcb->control.v_tpr);
201 printf("Virtual IRQ : %" PRIx8 "\n", vmcb->control.v_irq);
202 printf("Virtual Interrupt : %" PRIx8 "\n", vmcb->control.v_intr);
203 printf("Virtual Interrupt Masking: %" PRIx8 "\n", vmcb->control.v_intr_masking);
204 printf("Virtual Interrupt Vector : %" PRIx8 "\n", vmcb->control.v_intr_vector);
205 printf("Reserved 6 : \n");
206 for(int i=0; i < 3; i++) {
207 printf("%" PRIx8 "", vmcb->control.reserved_6[i]); /* Should be Zero */
209 printf("\n");
210 printf("Interrupt Shadow : %" PRIx8 "\n", vmcb->control.intr_shadow);
211 printf("Reserved 7 : \n");
212 for(int i=0; i < 7; i++) {
213 printf("%" PRIx8 "", vmcb->control.reserved_7[i]); /* Should be Zero */
215 printf("\n");
216 printf("Exit Code : %" PRIx64 "\n", vmcb->control.exit_code);
217 printf("Exit Info 1 : %" PRIx64 "\n", vmcb->control.exit_info_1);
218 printf("Exit Info 2 : %" PRIx64 "\n", vmcb->control.exit_info_2);
219 printf("Exit Interrupt Info : %" PRIx32 "\n", vmcb->control.exit_int_info);
220 printf("Exit Interrupt Info Err Code: %" PRIx32 "\n", vmcb->control.exit_int_info_err_code);
221 printf("Nested Control : %" PRIx64 "\n", vmcb->control.nested_ctl);
222 printf("Reserved 8 : \n");
223 for(int i=0; i < 16; i++) {
224 printf("%" PRIx8 "", vmcb->control.reserved_8[i]); /* Should be Zero */
226 printf("\n");
227 printf("Event Injection : %" PRIx64 "\n", vmcb->control.event_inj);
228 printf("Nested CR3 : %" PRIx64 "\n", vmcb->control.nested_cr3);
229 printf("LBR Virtualization Enable: %" PRIx64 "\n", vmcb->control.lbr_virt_enable);
230 printf("Reserved 9 : \n");
231 for(int i=0; i < 832; i++) {
232 printf("%" PRIx8 "", vmcb->control.reserved_9[i]); /* Should be Zero */
234 printf("\n");
236 printf("\n");
238 printf("VMCB Save Area\n");
239 print_vmcb_seg(&(vmcb->save.es), "ES");
240 print_vmcb_seg(&(vmcb->save.es), "CS");
241 print_vmcb_seg(&(vmcb->save.es), "SS");
242 print_vmcb_seg(&(vmcb->save.es), "DS");
243 print_vmcb_seg(&(vmcb->save.es), "FS");
244 print_vmcb_seg(&(vmcb->save.es), "GS");
245 print_vmcb_seg(&(vmcb->save.es), "GDTR");
246 print_vmcb_seg(&(vmcb->save.es), "LDTR");
247 print_vmcb_seg(&(vmcb->save.es), "IDTR");
248 print_vmcb_seg(&(vmcb->save.es), "TR");
249 printf("Reserved 1 : \n");
250 for(int i=0; i < 43; i++) {
251 printf("%" PRIx8 "", vmcb->save.reserved_1[i]); /* Should be Zero */
253 printf("\n");
254 printf("Current Processor Level : %" PRIx8 "\n", vmcb->save.cpl);
255 printf("Reserved 2 : \n");
256 for(int i=0; i < 4; i++) {
257 printf("%" PRIx8 "", vmcb->save.reserved_2[i]); /* Should be Zero */
259 printf("\n");
260 printf("EFER : %" PRIx64 "\n", vmcb->save.efer);
261 printf("Reserved 3 : \n");
262 for(int i=0; i < 112; i++) {
263 printf("%" PRIx8 "", vmcb->save.reserved_3[i]); /* Should be Zero */
265 printf("\n");
266 printf("Control Register 4 : %" PRIx64 "\n", vmcb->save.cr4);
267 printf("Control Register 3 : %" PRIx64 "\n", vmcb->save.cr3);
268 printf("Control Register 0 : %" PRIx64 "\n", vmcb->save.cr0);
269 printf("Debug Register 7 : %" PRIx64 "\n", vmcb->save.dr7);
270 printf("Debug Register 6 : %" PRIx64 "\n", vmcb->save.dr6);
271 printf("RFlags : %" PRIx64 "\n", vmcb->save.rflags);
272 printf("RIP : %" PRIx64 "\n", vmcb->save.rip);
273 printf("Reserved 4 : \n");
274 for(int i=0; i < 88; i++) {
275 printf("%" PRIx8 "", vmcb->save.reserved_4[i]); /* Should be Zero */
277 printf("\n");
278 printf("RSP : %" PRIx64 "\n", vmcb->save.rsp);
279 printf("Reserved 5 : \n");
280 for(int i=0; i < 24; i++) {
281 printf("%" PRIx8 "", vmcb->save.reserved_5[i]); /* Should be Zero */
283 printf("\n");
284 printf("RAX : %" PRIx64 "\n", vmcb->save.rax);
285 printf("STAR : %" PRIx64 "\n", vmcb->save.star);
286 printf("LSTAR : %" PRIx64 "\n", vmcb->save.lstar);
287 printf("CSTAR : %" PRIx64 "\n", vmcb->save.cstar);
288 printf("SFMASK : %" PRIx64 "\n", vmcb->save.sfmask);
289 printf("Kernel GS Base : %" PRIx64 "\n", vmcb->save.kernel_gs_base);
290 printf("SYSENTER CS : %" PRIx64 "\n", vmcb->save.sysenter_cs);
291 printf("SYSENTER ESP : %" PRIx64 "\n", vmcb->save.sysenter_esp);
292 printf("SYSENTER EIP : %" PRIx64 "\n", vmcb->save.sysenter_eip);
293 printf("Control Register 2 : %" PRIx64 "\n", vmcb->save.cr2);
294 printf("Reserved 6 : \n");
295 for(int i=0; i < 32; i++) {
296 printf("%" PRIx8 "", vmcb->save.reserved_6[i]); /* Should be Zero */
298 printf("\n");
299 printf("Global PAT : %" PRIx64 "\n", vmcb->save.g_pat);
300 printf("Debug Control : %" PRIx64 "\n", vmcb->save.dbg_ctl);
301 printf("BR From : %" PRIx64 "\n", vmcb->save.br_from);
302 printf("BR To : %" PRIx64 "\n", vmcb->save.br_to);
303 printf("Last Exception From : %" PRIx64 "\n", vmcb->save.last_excp_from);
304 printf("Last Exception To : %" PRIx64 "\n", vmcb->save.last_excp_to);
306 printf("\n\n");
309 #if 0
310 static void
311 print_tss_desc(struct system_segment_descriptor *tss_desc)
313 printf("TSS desc @ %p:\n", tss_desc);
314 printf("sd_lolimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lolimit);
315 printf("sd_lobase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lobase);
316 printf("sd_type: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_type);
317 printf("sd_dpl: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_dpl);
318 printf("sd_p: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_p);
319 printf("sd_hilimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hilimit);
320 printf("sd_xx0: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx0);
321 printf("sd_gran: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_gran);
322 printf("sd_hibase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hibase);
323 printf("sd_xx1: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx1);
324 printf("sd_mbz: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_mbz);
325 printf("sd_xx2: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx2);
326 printf("\n\n");
329 static void
330 print_tss(struct system_segment_descriptor *tss_desc)
332 u_int32_t *base;
333 int limit;
334 int i;
336 base = (u_int32_t*) ((((u_int64_t) tss_desc->sd_hibase) << 24) | ((u_int64_t) tss_desc->sd_lobase));
337 limit = ((tss_desc->sd_hilimit << 16) | tss_desc->sd_lolimit) / 4;
339 printf("TSS: @ %p\n", base);
340 for (i = 0; i <= limit; i++)
341 printf("%x: 0x%" PRIx32 "\n", i, base[i]);
342 printf("\n\n");
344 #endif
346 static void
347 print_vmcb_save_area(struct vmcb *vmcb)
349 printf("VMCB save area:\n");
350 printf(" cs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
351 vmcb->save.cs.selector,
352 vmcb->save.cs.attrib,
353 vmcb->save.cs.limit,
354 vmcb->save.cs.base);
355 printf(" fs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
356 vmcb->save.fs.selector,
357 vmcb->save.fs.attrib,
358 vmcb->save.fs.limit,
359 vmcb->save.fs.base);
360 printf(" gs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
361 vmcb->save.gs.selector,
362 vmcb->save.gs.attrib,
363 vmcb->save.gs.limit,
364 vmcb->save.gs.base);
365 printf(" tr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
366 vmcb->save.tr.selector,
367 vmcb->save.tr.attrib,
368 vmcb->save.tr.limit,
369 vmcb->save.tr.base);
370 printf(" ldtr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
371 vmcb->save.ldtr.selector,
372 vmcb->save.ldtr.attrib,
373 vmcb->save.ldtr.limit,
374 vmcb->save.ldtr.base);
375 printf(" rip: %" PRIx64 "\n", vmcb->save.rip);
376 printf(" kernel_gs_base: %" PRIx64 "\n", vmcb->save.kernel_gs_base);
377 printf(" star: %" PRIx64 "\n", vmcb->save.star);
378 printf(" lstar: %" PRIx64 "\n", vmcb->save.lstar);
379 printf(" cstar: %" PRIx64 "\n", vmcb->save.cstar);
380 printf(" sfmask: %" PRIx64 "\n", vmcb->save.sfmask);
381 printf(" sysenter_cs: %" PRIx64 "\n", vmcb->save.sysenter_cs);
382 printf(" sysenter_esp: %" PRIx64 "\n", vmcb->save.sysenter_esp);
383 printf(" sysenter_eip: %" PRIx64 "\n", vmcb->save.sysenter_eip);
384 printf("\n\n");
387 static int
388 vmrun_assert(struct vmcb *vmcb)
390 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
392 int bad;
394 bad = 0;
396 // The following are illegal:
398 //EFER.SVME is zero.
399 A((vmcb->save.efer & 0x0000000000001000) == 0);
401 // CR0.CD is zero and CR0.NW is set
402 A( ((vmcb->save.cr0 & 0x0000000040000000) == 0) &&
403 ((vmcb->save.cr0 & 0x0000000020000000) != 0));
405 // CR0[63:32] are not zero.
406 A((vmcb->save.cr0 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
408 // Any MBZ bit of CR3 is set.
409 A((vmcb->save.cr3 & 0xFFF0000000000000) != 0);
411 // CR4[63:11] are not zero.
412 A((vmcb->save.cr4 & 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
414 // DR6[63:32] are not zero.
415 A((vmcb->save.dr6 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
417 // DR7[63:32] are not zero.
418 A((vmcb->save.dr7 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
420 // EFER[63:15] are not zero.
421 A((vmcb->save.efer & 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
423 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
424 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
426 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
427 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
428 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
429 ((vmcb->save.cr4 & 0x0000000000000020) != 0));
431 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
432 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
433 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
434 ((vmcb->save.cr0 & 0x0000000000000001) == 0));
436 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
437 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
438 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
439 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
440 ((vmcb->save.cr4 & 0x0000000000000020) != 0) &&
441 ((vmcb->save.cs.attrib & 0x0200) != 0) &&
442 ((vmcb->save.cs.attrib & 0x0400) != 0));
444 // The VMRUN intercept bit is clear.
445 A((vmcb->control.intercepts & 0x0000000100000000) == 0);
447 // The MSR or IOIO intercept tables extend to a physical address that is
448 // greater than or equal to the maximum supported physical address.
450 // Illegal event injection (see Section 15.19 on page 391).
452 // ASID is equal to zero.
453 A(vmcb->control.guest_asid == 0);
455 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
456 // combination that is otherwise illegal (see Section 15.18).
458 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
459 // sign-extend to 63 bits) all base addresses in the segment registers
460 // that have been loaded.
462 return bad;
464 #undef A
467 static void
468 fkvm_vcpu_run(struct vcpu *vcpu)
470 u_int64_t lstar;
471 u_int64_t cstar;
472 u_int64_t star;
473 u_int64_t sfmask;
475 u_short fs_selector;
476 u_short gs_selector;
477 u_short ldt_selector;
479 unsigned long host_cr2;
480 unsigned long host_dr6;
481 unsigned long host_dr7;
483 struct system_segment_descriptor *tss_desc;
484 u_int64_t sel;
486 struct vmcb *vmcb;
488 printf("begin fkvm_vcpu_run\n");
490 vmcb = vcpu->vmcb;
492 if (vmrun_assert(vmcb))
493 return;
495 tss_desc = (struct system_segment_descriptor*) (&gdt[GPROC0_SEL]);
496 sel = GSEL(GPROC0_SEL, SEL_KPL);
498 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
499 // print_tss_desc(tss_desc);
500 // print_tss(tss_desc);
502 print_vmcb_save_area(vmcb);
503 printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu->regs[VCPU_REGS_RIP]);
504 // disable_intr();
506 vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
507 vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
508 vmcb->save.rip = vcpu->regs[VCPU_REGS_RIP];
510 /* meh: kvm has pre_svm_run(svm); */
512 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
513 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
515 fs_selector = rfs();
516 gs_selector = rgs();
517 ldt_selector = rldt();
518 // printf("fs selector: %hx\n", fs_selector);
519 // printf("gs selector: %hx\n", gs_selector);
520 // printf("ldt selector: %hx\n", ldt_selector);
522 host_cr2 = rcr2();
524 host_dr6 = rdr6();
525 host_dr7 = rdr7();
527 vmcb->save.cr2 = vcpu->cr2;
528 /* meh: cr3? */
529 // TODO: something with apic_base?
531 /* meh: dr7? db_regs? */
533 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
534 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
535 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
536 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
538 star = rdmsr(MSR_STAR);
539 lstar = rdmsr(MSR_LSTAR);
540 cstar = rdmsr(MSR_CSTAR);
541 sfmask = rdmsr(MSR_SF_MASK);
543 printf("CLGI...\n");
545 __asm __volatile (SVM_CLGI);
548 // enable_intr();
550 __asm __volatile (
551 "push %%rbp; \n\t"
552 "mov %c[rbx](%[svm]), %%rbx \n\t"
553 "mov %c[rcx](%[svm]), %%rcx \n\t"
554 "mov %c[rdx](%[svm]), %%rdx \n\t"
555 "mov %c[rsi](%[svm]), %%rsi \n\t"
556 "mov %c[rdi](%[svm]), %%rdi \n\t"
557 "mov %c[rbp](%[svm]), %%rbp \n\t"
558 "mov %c[r8](%[svm]), %%r8 \n\t"
559 "mov %c[r9](%[svm]), %%r9 \n\t"
560 "mov %c[r10](%[svm]), %%r10 \n\t"
561 "mov %c[r11](%[svm]), %%r11 \n\t"
562 "mov %c[r12](%[svm]), %%r12 \n\t"
563 "mov %c[r13](%[svm]), %%r13 \n\t"
564 "mov %c[r14](%[svm]), %%r14 \n\t"
565 "mov %c[r15](%[svm]), %%r15 \n\t"
567 /* Enter guest mode */
568 "push %%rax \n\t"
569 "mov %c[vmcb](%[svm]), %%rax \n\t"
570 SVM_VMLOAD "\n\t"
571 SVM_VMRUN "\n\t"
572 SVM_VMSAVE "\n\t"
573 "pop %%rax \n\t"
575 /* Save guest registers, load host registers */
576 "mov %%rbx, %c[rbx](%[svm]) \n\t"
577 "mov %%rcx, %c[rcx](%[svm]) \n\t"
578 "mov %%rdx, %c[rdx](%[svm]) \n\t"
579 "mov %%rsi, %c[rsi](%[svm]) \n\t"
580 "mov %%rdi, %c[rdi](%[svm]) \n\t"
581 "mov %%rbp, %c[rbp](%[svm]) \n\t"
582 "mov %%r8, %c[r8](%[svm]) \n\t"
583 "mov %%r9, %c[r9](%[svm]) \n\t"
584 "mov %%r10, %c[r10](%[svm]) \n\t"
585 "mov %%r11, %c[r11](%[svm]) \n\t"
586 "mov %%r12, %c[r12](%[svm]) \n\t"
587 "mov %%r13, %c[r13](%[svm]) \n\t"
588 "mov %%r14, %c[r14](%[svm]) \n\t"
589 "mov %%r15, %c[r15](%[svm]) \n\t"
590 "pop %%rbp"
592 : [svm]"a"(vcpu),
593 [vmcb]"i"(offsetof(struct vcpu, vmcb_pa)),
594 [rbx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBX])),
595 [rcx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RCX])),
596 [rdx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDX])),
597 [rsi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RSI])),
598 [rdi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDI])),
599 [rbp]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBP])),
600 [r8 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R8 ])),
601 [r9 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R9 ])),
602 [r10]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R10])),
603 [r11]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R11])),
604 [r12]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R12])),
605 [r13]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R13])),
606 [r14]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R14])),
607 [r15]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R15]))
608 : "cc", "memory",
609 "rbx", "rcx", "rdx", "rsi", "rdi",
610 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
614 /* meh: dr7? db_regs? */
616 vcpu->cr2 = vmcb->save.cr2;
618 vcpu->regs[VCPU_REGS_RAX] = vmcb->save.rax;
619 vcpu->regs[VCPU_REGS_RSP] = vmcb->save.rsp;
620 vcpu->regs[VCPU_REGS_RIP] = vmcb->save.rip;
622 load_dr6(host_dr6);
623 load_dr7(host_dr7);
625 load_cr2(host_cr2);
627 load_fs(fs_selector);
628 load_gs(gs_selector);
629 lldt(ldt_selector);
631 wrmsr(MSR_GSBASE, vcpu->host_gs_base);
633 tss_desc->sd_type = SDT_SYSTSS;
634 ltr(sel);
636 wrmsr(MSR_STAR, star);
637 wrmsr(MSR_LSTAR, lstar);
638 wrmsr(MSR_CSTAR, cstar);
639 wrmsr(MSR_SF_MASK, sfmask);
641 // disable_intr();
643 __asm __volatile (SVM_STGI);
645 printf("STGI\n");
647 printf("exit_code: %" PRIx64 "\n", vmcb->control.exit_code);
649 // print_tss_desc(tss_desc);
650 // print_tss(tss_desc);
652 print_vmcb_save_area(vmcb);
654 // enable_intr();
656 /* meh: next_rip */
659 static void
660 _fkvm_init_seg(struct vmcb_seg *seg, uint16_t attrib)
662 seg->selector = 0;
663 seg->attrib = VMCB_SELECTOR_P_MASK | attrib;
664 seg->limit = 0xffff;
665 seg->base = 0;
668 static inline void
669 fkvm_init_seg(struct vmcb_seg *seg)
671 _fkvm_init_seg(seg, VMCB_SELECTOR_S_MASK | VMCB_SELECTOR_WRITE_MASK);
674 static inline void
675 fkvm_init_sys_seg(struct vmcb_seg *seg, uint16_t attrib)
677 _fkvm_init_seg(seg, attrib);
680 static void*
681 fkvm_iopm_alloc(void)
683 return contigmalloc(IOPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
686 static void
687 fkvm_iopm_init(void *iopm)
689 memset(iopm, 0xff, IOPM_SIZE); /* TODO: we may want to allow access to PC debug port */
692 static void
693 fkvm_iopm_free(void *iopm)
695 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
698 static void*
699 fkvm_msrpm_alloc(void)
701 return contigmalloc(MSRPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
704 static void
705 fkvm_msrpm_init(void *msrpm)
707 memset(msrpm, 0xff, MSRPM_SIZE); /* TODO: we may want to allow some MSR accesses */
710 static void
711 fkvm_msrpm_free(void *msrpm)
713 contigfree(msrpm, MSRPM_SIZE, M_DEVBUF);
716 static void*
717 fkvm_hsave_area_alloc(void)
719 return contigmalloc(PAGE_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
722 static void
723 fkvm_hsave_area_init(void *hsave_area)
727 static void
728 fkvm_hsave_area_free(void *hsave_area)
730 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
733 static struct vmspace*
734 fkvm_make_vmspace(void)
736 struct vmspace *sp;
738 sp = vmspace_alloc(0, 0xffffffffffffffff);
739 if (sp == NULL) {
740 printf("vmspace_alloc failed\n");
741 return NULL;
744 return sp;
747 static void
748 fkvm_destroy_vmspace(struct vmspace* sp)
750 vmspace_free(sp);
753 static struct vmcb*
754 fkvm_vmcb_alloc(void)
756 return contigmalloc(PAGE_SIZE, M_DEVBUF, M_ZERO, 0, -1UL,
757 PAGE_SIZE, 0);
760 static void
761 fkvm_vmcb_init(struct vmcb *vmcb)
763 struct vmcb_control_area *control = &vmcb->control;
764 struct vmcb_save_area *save = &vmcb->save;
766 control->intercept_cr_reads = INTERCEPT_CR4_MASK;
768 control->intercept_cr_writes = INTERCEPT_CR4_MASK |
769 INTERCEPT_CR8_MASK;
771 control->intercept_dr_reads = INTERCEPT_DR0_MASK |
772 INTERCEPT_DR1_MASK |
773 INTERCEPT_DR2_MASK |
774 INTERCEPT_DR3_MASK;
776 control->intercept_dr_writes = INTERCEPT_DR0_MASK |
777 INTERCEPT_DR1_MASK |
778 INTERCEPT_DR2_MASK |
779 INTERCEPT_DR3_MASK |
780 INTERCEPT_DR5_MASK |
781 INTERCEPT_DR7_MASK;
783 control->intercept_exceptions = (1 << IDT_UD) | // Invalid Opcode
784 (1 << IDT_MC); // Machine Check
786 control->intercepts = INTERCEPT_INTR |
787 INTERCEPT_NMI |
788 INTERCEPT_SMI |
789 INTERCEPT_CPUID |
790 INTERCEPT_INVD |
791 INTERCEPT_HLT |
792 INTERCEPT_INVLPGA |
793 INTERCEPT_IOIO_PROT |
794 INTERCEPT_MSR_PROT |
795 INTERCEPT_SHUTDOWN |
796 INTERCEPT_VMRUN |
797 INTERCEPT_VMMCALL |
798 INTERCEPT_VMLOAD |
799 INTERCEPT_VMSAVE |
800 INTERCEPT_STGI |
801 INTERCEPT_CLGI |
802 INTERCEPT_SKINIT |
803 INTERCEPT_WBINVD |
804 INTERCEPT_MONITOR |
805 INTERCEPT_MWAIT_UNCOND;
807 control->iopm_base_pa = vtophys(iopm);
808 control->msrpm_base_pa = vtophys(msrpm);
809 control->tsc_offset = 0;
811 /* TODO: remove this once we assign asid's to distinct VM's */
812 control->guest_asid = 1;
813 control->tlb_control = VMCB_TLB_CONTROL_FLUSH_ALL;
815 /* let v_tpr default to 0 */
816 /* let v_irq default to 0 */
817 /* let v_intr default to 0 */
819 control->v_intr_masking = 1;
821 /* let v_intr_vector default to 0 */
822 /* let intr_shadow default to 0 */
823 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
824 exit_int_info_err_code default to 0 */
826 control->nested_ctl = 1;
828 /* let event_inj default to 0 */
830 // (nested_cr3 is later)
832 /* let lbr_virt_enable default to 0 */
835 fkvm_init_seg(&save->ds);
836 fkvm_init_seg(&save->es);
837 fkvm_init_seg(&save->fs);
838 fkvm_init_seg(&save->gs);
839 fkvm_init_seg(&save->ss);
841 _fkvm_init_seg(&save->cs, VMCB_SELECTOR_READ_MASK | VMCB_SELECTOR_S_MASK |
842 VMCB_SELECTOR_CODE_MASK);
843 save->cs.selector = 0xf000;
844 save->cs.base = 0xffff0000;
846 save->gdtr.limit = 0xffff;
847 save->idtr.limit = 0xffff;
849 fkvm_init_sys_seg(&save->ldtr, SDT_SYSLDT);
850 fkvm_init_sys_seg(&save->tr, SDT_SYS286BSY);
852 save->g_pat = PAT_VALUE(PAT_WRITE_BACK, 0) | PAT_VALUE(PAT_WRITE_THROUGH, 1) |
853 PAT_VALUE(PAT_UNCACHED, 2) | PAT_VALUE(PAT_UNCACHEABLE, 3) |
854 PAT_VALUE(PAT_WRITE_BACK, 4) | PAT_VALUE(PAT_WRITE_THROUGH, 5) |
855 PAT_VALUE(PAT_UNCACHED, 6) | PAT_VALUE(PAT_UNCACHEABLE, 7);
857 /* CR0 = 6000_0010h at boot */
858 save->cr0 = CR0_ET | CR0_NW | CR0_CD;
859 save->dr6 = 0xffff0ff0;
860 save->dr7 = 0x400;
861 save->rflags = 2;
862 save->rip = 0x0000fff0;
864 save->efer = EFER_SVME;
867 static void
868 fkvm_vmcb_free(struct vmcb *vmcb)
870 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
873 static struct vcpu*
874 fkvm_vcpu_create(struct guestvm *guest_vm)
876 struct vcpu *vcpu;
877 vcpu = malloc(sizeof(struct vcpu), M_DEVBUF, M_WAITOK|M_ZERO);
879 vcpu->vmcb = fkvm_vmcb_alloc();
880 vcpu->vmcb_pa = vtophys(vcpu->vmcb);
881 printf("vmcb = 0x%p\n", vcpu->vmcb);
882 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu->vmcb_pa);
884 fkvm_vmcb_init(vcpu->vmcb);
885 vcpu->vmcb->control.nested_cr3 = guest_vm->nested_cr3;
886 vcpu->regs[VCPU_REGS_RIP] = vcpu->vmcb->save.rip;
888 vcpu->guest_vm = guest_vm;
890 return vcpu;
893 static void
894 fkvm_vcpu_destroy(struct vcpu *vcpu)
896 fkvm_vmcb_free(vcpu->vmcb);
897 free(vcpu, M_DEVBUF);
900 static struct guestvm*
901 fkvm_guestvm_alloc(void)
903 return malloc(sizeof(struct guestvm), M_DEVBUF, M_WAITOK|M_ZERO);
906 static void
907 fkvm_guestvm_free(struct guestvm* guest_vm)
909 free(guest_vm, M_DEVBUF);
912 static void
913 fkvm_guestvm_add_vcpu(struct guestvm *guest_vm, struct vcpu *vcpu)
915 guest_vm->vcpus[guest_vm->nr_vcpus] = vcpu;
916 guest_vm->nr_vcpus++; /* TODO: Probably not safe to increment */
917 /* How about a lock to protect all of this? */
922 fkvm_userpoke(struct thread *td, struct fkvm_userpoke_args *uap)
924 printf("fkvm_userpoke\n");
926 if (!fkvm_loaded)
927 return ENODEV;
929 return ENOSYS;
932 static int
933 fkvm_mem_has_entry(vm_map_entry_t expected_entry, vm_map_t vm_map, vm_offset_t vaddr)
935 vm_map_entry_t lookup_entry;
936 vm_object_t throwaway_object;
937 vm_pindex_t throwaway_pindex;
938 vm_prot_t throwaway_prot;
939 boolean_t throwaway_wired;
940 int error;
942 error = vm_map_lookup(&vm_map, /* IN/OUT */
943 vaddr,
944 VM_PROT_READ|VM_PROT_WRITE,
945 &lookup_entry, /* OUT */
946 &throwaway_object, /* OUT */
947 &throwaway_pindex, /* OUT */
948 &throwaway_prot, /* OUT */
949 &throwaway_wired); /* OUT */
950 if (error != KERN_SUCCESS)
951 return 0;
952 vm_map_lookup_done(vm_map, lookup_entry);
953 return (lookup_entry == expected_entry);
956 static int
957 fkvm_guest_check_range(struct guestvm *guest_vm, uint64_t start, uint64_t end)
959 vm_map_t guest_vm_map;
960 vm_map_entry_t lookup_entry;
961 vm_object_t throwaway_object;
962 vm_pindex_t throwaway_pindex;
963 vm_prot_t throwaway_prot;
964 boolean_t throwaway_wired;
965 int ret;
966 int error;
968 guest_vm_map = &guest_vm->sp->vm_map;
970 error = vm_map_lookup(&guest_vm_map, /* IN/OUT */
971 start,
972 VM_PROT_READ|VM_PROT_WRITE,
973 &lookup_entry, /* OUT */
974 &throwaway_object, /* OUT */
975 &throwaway_pindex, /* OUT */
976 &throwaway_prot, /* OUT */
977 &throwaway_wired); /* OUT */
978 if (error != KERN_SUCCESS)
979 return EFAULT;
980 vm_map_lookup_done(guest_vm_map, lookup_entry);
983 TODO: We can't actually nest the lookups:
984 panic: _sx_xlock_hard: recursed on non-recursive sx user map @ ../../../vm/vm_map.c:3115
985 Therefore, I've moved the lookup_done above for now, but we really need a lock here.
987 Maybe it's better to use vm_map_lookup_entry directly.
991 if (fkvm_mem_has_entry(lookup_entry, guest_vm_map, end))
992 ret = 0;
993 else
994 ret = EFAULT;
996 return ret;
999 static void
1000 fkvm_get_regs_regs(struct vcpu *vcpu, struct kvm_regs *out)
1002 out->rax = vcpu->regs[VCPU_REGS_RAX];
1003 out->rbx = vcpu->regs[VCPU_REGS_RBX];
1004 out->rcx = vcpu->regs[VCPU_REGS_RCX];
1005 out->rdx = vcpu->regs[VCPU_REGS_RDX];
1006 out->rsi = vcpu->regs[VCPU_REGS_RSI];
1007 out->rdi = vcpu->regs[VCPU_REGS_RDI];
1008 out->rsp = vcpu->regs[VCPU_REGS_RSP];
1009 out->rbp = vcpu->regs[VCPU_REGS_RBP];
1010 out->r8 = vcpu->regs[VCPU_REGS_R8];
1011 out->r9 = vcpu->regs[VCPU_REGS_R9];
1012 out->r10 = vcpu->regs[VCPU_REGS_R10];
1013 out->r11 = vcpu->regs[VCPU_REGS_R11];
1014 out->r12 = vcpu->regs[VCPU_REGS_R12];
1015 out->r13 = vcpu->regs[VCPU_REGS_R13];
1016 out->r14 = vcpu->regs[VCPU_REGS_R14];
1017 out->r15 = vcpu->regs[VCPU_REGS_R15];
1018 out->rip = vcpu->regs[VCPU_REGS_RIP];
1019 out->rflags = vcpu->vmcb->save.rflags;
1022 static void
1023 fkvm_set_regs_regs(struct vcpu *vcpu, const struct kvm_regs *in)
1025 vcpu->regs[VCPU_REGS_RAX] = in->rax;
1026 vcpu->regs[VCPU_REGS_RBX] = in->rbx;
1027 vcpu->regs[VCPU_REGS_RCX] = in->rcx;
1028 vcpu->regs[VCPU_REGS_RDX] = in->rdx;
1029 vcpu->regs[VCPU_REGS_RSI] = in->rsi;
1030 vcpu->regs[VCPU_REGS_RDI] = in->rdi;
1031 vcpu->regs[VCPU_REGS_RSP] = in->rsp;
1032 vcpu->regs[VCPU_REGS_RBP] = in->rbp;
1033 vcpu->regs[VCPU_REGS_R8] = in->r8;
1034 vcpu->regs[VCPU_REGS_R9] = in->r9;
1035 vcpu->regs[VCPU_REGS_R10] = in->r10;
1036 vcpu->regs[VCPU_REGS_R11] = in->r11;
1037 vcpu->regs[VCPU_REGS_R12] = in->r12;
1038 vcpu->regs[VCPU_REGS_R13] = in->r13;
1039 vcpu->regs[VCPU_REGS_R14] = in->r14;
1040 vcpu->regs[VCPU_REGS_R15] = in->r15;
1041 vcpu->regs[VCPU_REGS_RIP] = in->rip;
1042 vcpu->vmcb->save.rflags = in->rflags;
1045 static void
1046 fkvm_set_vmcb_dtable(struct vmcb_seg *vmcb_seg, struct kvm_dtable *fkvm_dtable)
1048 vmcb_seg->base = fkvm_dtable->base;
1049 vmcb_seg->limit = fkvm_dtable->limit;
1052 static void
1053 fkvm_set_vmcb_seg(struct vmcb_seg *vmcb_seg, struct kvm_segment *fkvm_seg)
1055 vmcb_seg->base = fkvm_seg->base;
1056 vmcb_seg->limit = fkvm_seg->limit;
1057 vmcb_seg->selector = fkvm_seg->selector;
1058 if (fkvm_seg->unusable)
1059 vmcb_seg->attrib=0;
1060 else {
1061 vmcb_seg->attrib = (fkvm_seg->type & VMCB_SELECTOR_TYPE_MASK);
1062 vmcb_seg->attrib |= (fkvm_seg->s & 1) << VMCB_SELECTOR_S_SHIFT;
1063 vmcb_seg->attrib |= (fkvm_seg->dpl & 3) << VMCB_SELECTOR_DPL_SHIFT;
1064 vmcb_seg->attrib |= (fkvm_seg->present & 1) << VMCB_SELECTOR_P_SHIFT;
1065 vmcb_seg->attrib |= (fkvm_seg->avl & 1) << VMCB_SELECTOR_AVL_SHIFT;
1066 vmcb_seg->attrib |= (fkvm_seg->l & 1) << VMCB_SELECTOR_L_SHIFT;
1067 vmcb_seg->attrib |= (fkvm_seg->db & 1) << VMCB_SELECTOR_DB_SHIFT;
1068 vmcb_seg->attrib |= (fkvm_seg->g & 1) << VMCB_SELECTOR_G_SHIFT;
1072 static void
1073 fkvm_set_cr8(struct vcpu *vcpu, uint64_t cr8)
1075 // TODO: if cr8 has reserved bits inject GP Fault, return
1077 vcpu->vmcb->control.v_tpr = (uint8_t) cr8;
1080 static void
1081 fkvm_set_efer(struct vcpu *vcpu, uint64_t efer)
1083 struct vmcb *vmcb = vcpu->vmcb;
1084 //if efer has reserved bits set: inject GP Fault
1086 if (vmcb->save.cr0 & CR0_PG) { //If paging is enabled do not allow changes to LME
1087 if ((vmcb->save.efer & EFER_LME) != (efer & EFER_LME)) {
1088 printf("fkvm_set_efer: attempt to change LME while paging\n");
1089 //inject GP fault
1093 vmcb->save.efer = efer | EFER_SVME;
1096 static void
1097 fkvm_get_regs_sregs(struct vcpu *vcpu, struct kvm_sregs *out)
1099 /* XXX */
1102 static void
1103 fkvm_set_regs_sregs(struct vcpu *vcpu, struct kvm_sregs *in)
1105 struct vmcb *vmcb = vcpu->vmcb;
1107 fkvm_set_vmcb_seg(&vmcb->save.cs, &in->cs);
1108 fkvm_set_vmcb_seg(&vmcb->save.ds, &in->ds);
1109 fkvm_set_vmcb_seg(&vmcb->save.es, &in->es);
1110 fkvm_set_vmcb_seg(&vmcb->save.fs, &in->fs);
1111 fkvm_set_vmcb_seg(&vmcb->save.gs, &in->gs);
1112 fkvm_set_vmcb_seg(&vmcb->save.ss, &in->ss);
1113 fkvm_set_vmcb_seg(&vmcb->save.tr, &in->tr);
1114 fkvm_set_vmcb_seg(&vmcb->save.ldtr, &in->ldt);
1116 vmcb->save.cpl = (vmcb->save.cs.attrib >> VMCB_SELECTOR_DPL_SHIFT) & 3;
1118 fkvm_set_vmcb_dtable(&vmcb->save.idtr, &in->idt);
1119 fkvm_set_vmcb_dtable(&vmcb->save.gdtr, &in->gdt);
1121 vcpu->cr2 = in->cr2;
1122 vcpu->cr3 = in->cr3;
1124 fkvm_set_cr8(vcpu, in->cr8);
1125 fkvm_set_efer(vcpu, in->efer);
1126 /* TODO: apic_base */
1127 vmcb->save.cr0 = in->cr0;
1128 vmcb->save.cr4 = in->cr4;
1129 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1132 static void
1133 fkvm_get_regs_msrs(struct vcpu *vcpu, uint32_t nmsrs, struct kvm_msr_entry *entries) {
1134 /* XXX */
1137 static void
1138 fkvm_set_reg_msr(struct vcpu *vcpu, uint32_t index, uint64_t data) {
1139 struct vmcb *vmcb = vcpu->vmcb;
1141 switch(index) {
1143 case MSR_TSC: {
1144 uint64_t tsc;
1146 tsc = rdtsc();
1147 vmcb->control.tsc_offset = data - tsc;
1148 break;
1151 case MSR_STAR: {
1152 vmcb->save.star = data;
1153 break;
1156 case MSR_LSTAR: {
1157 vmcb->save.lstar = data;
1158 break;
1161 case MSR_CSTAR: {
1162 vmcb->save.cstar = data;
1163 break;
1166 case MSR_GSBASE: {
1167 vmcb->save.kernel_gs_base = data;
1168 break;
1171 case MSR_SF_MASK: {
1172 vmcb->save.sfmask = data;
1173 break;
1176 case MSR_SYSENTER_CS_MSR: {
1177 vmcb->save.sysenter_cs = data;
1178 break;
1181 case MSR_SYSENTER_EIP_MSR: {
1182 vmcb->save.sysenter_eip = data;
1183 break;
1186 case MSR_SYSENTER_ESP_MSR: {
1187 vmcb->save.sysenter_esp = data;
1188 break;
1191 case MSR_DEBUGCTLMSR: {
1192 printf("unimplemented at %d\n", __LINE__);
1193 break;
1196 case MSR_PERFEVSEL0 ... MSR_PERFEVSEL3:
1197 case MSR_PERFCTR0 ... MSR_PERFCTR3: {
1198 printf("unimplemented at %d\n", __LINE__);
1199 break;
1202 case MSR_EFER: {
1203 fkvm_set_efer(vcpu, data);
1204 break;
1207 case MSR_MC0_STATUS: {
1208 printf("unimplemented at %d\n", __LINE__);
1209 break;
1212 case MSR_MCG_STATUS: {
1213 printf("unimplemented at %d\n", __LINE__);
1214 break;
1217 case MSR_MCG_CTL: {
1218 printf("unimplemented at %d\n", __LINE__);
1219 break;
1222 //TODO: MSR_IA32_UCODE_REV
1223 //TODO: MSR_IA32_UCODE_WRITE
1224 //TODO: 0x200 ... 0x2ff: set_msr_mtrr
1226 case MSR_APICBASE: {
1227 printf("unimplemented at %d\n", __LINE__);
1228 break;
1231 case MSR_IA32_MISC_ENABLE: {
1232 printf("unimplemented at %d\n", __LINE__);
1233 break;
1236 //TODO: MSR_KVM_WALL_CLOCK
1237 //TODO: MSR_KVM_SYSTEM_TIME
1239 default:
1240 printf("Did not set unimplemented msr: 0x%" PRIx32 "\n", index);
1244 static void
1245 fkvm_set_regs_msrs(struct vcpu *vcpu, uint32_t nmsrs, struct kvm_msr_entry *entries) {
1246 int i;
1248 for (i = 0; i < nmsrs; i++) {
1249 fkvm_set_reg_msr(vcpu, entries[i].index, entries[i].data);
1253 /* System Calls */
1256 fkvm_get_regs(struct thread *td, struct fkvm_get_regs_args *uap)
1258 struct vcpu *vcpu;
1259 int error;
1261 if (!fkvm_loaded)
1262 return ENODEV;
1264 vcpu = TD_GET_VCPU(td);
1265 if (vcpu == NULL)
1266 return ENODEV;
1268 switch (uap->type) {
1270 case FKVM_REGS_TYPE_REGS: {
1271 struct kvm_regs out;
1272 fkvm_get_regs_regs(vcpu, &out);
1273 return copyout(&out, uap->regs, sizeof(out));
1276 case FKVM_REGS_TYPE_SREGS: {
1277 struct kvm_sregs out;
1278 fkvm_get_regs_sregs(vcpu, &out);
1279 return copyout(&out, uap->regs, sizeof(out));
1282 case FKVM_REGS_TYPE_MSRS: {
1283 struct kvm_msrs out;
1284 struct kvm_msrs *user_msrs;
1285 struct kvm_msr_entry *entries;
1286 int size;
1288 user_msrs = (struct kvm_msrs *)uap->regs;
1290 error = copyin(uap->regs, &out, sizeof(out));
1291 if (error != 0)
1292 return error;
1294 size = sizeof(*entries) * out.nmsrs;
1295 entries = malloc(size, M_DEVBUF, M_WAITOK|M_ZERO);
1296 if (entries == NULL)
1297 return ENOMEM;
1299 error = copyin(user_msrs->entries, entries, size);
1300 if (error != 0) {
1301 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1302 free(entries, M_DEVBUF);
1303 return error;
1306 fkvm_get_regs_msrs(vcpu, out.nmsrs, entries);
1308 error = copyout(user_msrs->entries, entries, size);
1309 if (error != 0) {
1310 printf("FKVM_REGS_TYPE_MSRS: unable to copyout entries\n");
1313 free(entries, M_DEVBUF);
1314 return error;
1317 default:
1318 return EINVAL;
1323 fkvm_set_regs(struct thread *td, struct fkvm_set_regs_args *uap)
1325 struct vcpu *vcpu;
1326 int error = 0;
1328 vcpu = TD_GET_VCPU(td);
1329 if (vcpu == NULL)
1330 return ENODEV;
1332 switch (uap->type) {
1334 case FKVM_REGS_TYPE_REGS: {
1335 struct kvm_regs in;
1336 error = copyin(uap->regs, &in, sizeof(in));
1337 if (error != 0)
1338 return error;
1339 fkvm_set_regs_regs(vcpu, &in);
1340 return 0;
1343 case FKVM_REGS_TYPE_SREGS: {
1344 struct kvm_sregs in;
1345 error = copyin(uap->regs, &in, sizeof(in));
1346 if (error != 0)
1347 return error;
1348 fkvm_set_regs_sregs(vcpu, &in);
1349 return 0;
1352 case FKVM_REGS_TYPE_MSRS: {
1353 struct kvm_msrs in;
1354 struct kvm_msrs *user_msrs;
1355 struct kvm_msr_entry *entries;
1356 int size;
1358 user_msrs = (struct kvm_msrs *)uap->regs;
1360 error = copyin(uap->regs, &in, sizeof(in));
1361 if (error != 0)
1362 return error;
1364 size = sizeof(*entries) * in.nmsrs;
1365 entries = malloc(size, M_DEVBUF, M_WAITOK|M_ZERO);
1366 if (entries == NULL)
1367 return ENOMEM;
1369 error = copyin(user_msrs->entries, entries, size);
1370 if (error != 0) {
1371 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1372 free(entries, M_DEVBUF);
1373 return error;
1376 fkvm_set_regs_msrs(vcpu, in.nmsrs, entries);
1378 free(entries, M_DEVBUF);
1379 return error;
1382 default:
1383 return EINVAL;
1387 /* This function can only be called with multiples of page sizes */
1388 /* vaddr as NULL overloads to fkvm_guest_check_range */
1390 fkvm_set_user_mem_region(struct thread *td, struct fkvm_set_user_mem_region_args *uap)
1392 struct guestvm *guest_vm;
1394 vm_offset_t start;
1395 vm_offset_t end;
1397 struct vmspace *user_vm_space;
1398 vm_map_t user_vm_map;
1400 vm_object_t vm_object;
1401 vm_pindex_t vm_object_pindex;
1402 vm_ooffset_t vm_object_offset;
1403 vm_prot_t throwaway_prot;
1404 boolean_t throwaway_wired;
1405 vm_map_entry_t lookup_entry;
1407 int error;
1409 guest_vm = PROC_GET_GUESTVM(td->td_proc);
1410 if (guest_vm == NULL) {
1411 printf("PROC_GET_GUESTVM -> NULL\n");
1412 return ENODEV;
1415 start = uap->guest_pa;
1416 end = uap->guest_pa + uap->size - 1;
1417 printf("start: 0x%" PRIx64 " bytes\n", start);
1418 printf("end: 0x%" PRIx64 " bytes\n", end);
1420 if (uap->vaddr == 0)
1421 return fkvm_guest_check_range(guest_vm, start, end);
1423 user_vm_space = td->td_proc->p_vmspace;
1424 user_vm_map = &user_vm_space->vm_map;
1425 printf("user vm space: %p\n", user_vm_space);
1426 printf("user vm map: %p\n", user_vm_map);
1428 error = vm_map_lookup(&user_vm_map, /* IN/OUT */
1429 uap->vaddr,
1430 VM_PROT_READ|VM_PROT_WRITE,
1431 &lookup_entry, /* OUT */
1432 &vm_object, /* OUT */
1433 &vm_object_pindex, /* OUT */
1434 &throwaway_prot, /* OUT */
1435 &throwaway_wired); /* OUT */
1436 if (error != KERN_SUCCESS) {
1437 printf("vm_map_lookup failed: %d\n", error);
1438 return EFAULT;
1441 /* TODO: Trust the user that the full region is valid.
1442 * This is very bad. See the note in fkvm_guest_check_range
1443 * on nesting vm lookups. */
1444 #if 0
1445 if (!fkvm_mem_has_entry(lookup_entry, user_vm_map, uap->vaddr + uap->size)) {
1446 printf("end of range not contained in same vm map entry as start\n");
1447 return EFAULT;
1449 #endif
1451 printf("vm object: %p\n", vm_object);
1452 printf(" size: %d pages\n", (int) vm_object->size);
1454 vm_object_offset = IDX_TO_OFF(vm_object_pindex);
1455 printf("vm_ooffset: 0x%" PRIx64 "\n", vm_object_offset);
1457 vm_object_reference(vm_object); // TODO: this might be a mem leak
1459 vm_map_lookup_done(user_vm_map, lookup_entry);
1461 error = vm_map_insert(&guest_vm->sp->vm_map,
1462 vm_object,
1463 vm_object_offset,
1464 start,
1465 end,
1466 VM_PROT_ALL, VM_PROT_ALL,
1468 if (error != KERN_SUCCESS) {
1469 printf("vm_map_insert failed: %d\n", error);
1470 switch (error) {
1471 case KERN_INVALID_ADDRESS:
1472 return EINVAL;
1473 case KERN_NO_SPACE:
1474 return ENOMEM;
1475 default:
1476 return 1;
1480 return 0;
1484 fkvm_unset_user_mem_region(struct thread *td, struct fkvm_unset_user_mem_region_args *uap)
1486 struct guestvm *guest_vm;
1488 if (!fkvm_loaded)
1489 return ENODEV;
1491 guest_vm = PROC_GET_GUESTVM(td->td_proc);
1492 if (guest_vm == NULL) {
1493 printf("PROC_GET_GUESTVM -> NULL\n");
1494 return ENODEV;
1497 vm_offset_t start;
1498 vm_offset_t end;
1500 vm_map_t guest_vm_map;
1502 int error;
1504 start = uap->guest_pa;
1505 end = uap->guest_pa + uap->size - 1;
1506 printf("start: 0x%" PRIx64 " bytes\n", start);
1507 printf("end: 0x%" PRIx64 " bytes\n", end);
1509 guest_vm_map = &guest_vm->sp->vm_map;
1511 error = vm_map_remove(guest_vm_map, start, end);
1512 if (error != KERN_SUCCESS)
1513 return -1;
1515 return 0;
1519 fkvm_create_vm(struct thread *td, struct fkvm_create_vm_args *uap)
1521 struct guestvm *guest_vm;
1523 printf("SYSCALL : fkvm_create_vm\n");
1525 if (!fkvm_loaded)
1526 return ENODEV;
1528 /* Allocate Guest VM */
1529 guest_vm = fkvm_guestvm_alloc();
1531 /* Set up the vm address space */
1532 guest_vm->sp = fkvm_make_vmspace();
1533 if (guest_vm->sp == NULL) {
1534 fkvm_guestvm_free(guest_vm);
1535 return ENOMEM;
1537 guest_vm->nested_cr3 = vtophys(vmspace_pmap(guest_vm->sp)->pm_pml4);
1539 printf("guest:\n");
1540 printf(" vm space: %p\n", guest_vm->sp);
1541 printf(" vm map: %p\n", &guest_vm->sp->vm_map);
1542 printf(" ncr3: 0x%" PRIx64 "\n", guest_vm->nested_cr3);
1544 PROC_SET_GUESTVM(td->td_proc, guest_vm);
1546 printf("fkvm_create_vm done\n");
1547 return 0;
1550 static void
1551 fkvm_destroy_vm(struct guestvm *guest_vm)
1553 /* Destroy the VCPUs */
1554 while (guest_vm->nr_vcpus > 0) {
1555 guest_vm->nr_vcpus--;
1556 fkvm_vcpu_destroy(guest_vm->vcpus[guest_vm->nr_vcpus]);
1557 guest_vm->vcpus[guest_vm->nr_vcpus] = NULL;
1560 /* Destroy the vmspace */
1561 if (guest_vm->sp != NULL)
1562 fkvm_destroy_vmspace(guest_vm->sp);
1564 /* Destroy the Guest VM itself */
1565 fkvm_guestvm_free(guest_vm);
1568 static int
1569 intercept_ioio(struct vcpu *vcpu, struct kvm_run *kvm_run, uint64_t ioio_info, uint64_t rip)
1571 struct vmcb *vmcb = vcpu->vmcb;
1572 uint8_t str;
1573 //uint8_t rep;
1575 str = (ioio_info & STR_MASK) >> STR_SHIFT;
1576 if (str) {
1577 printf("%s operation requested, not yet implemented, \n",
1578 kvm_run->u.io.in ? "INS" : "OUTS");
1579 return 1;
1582 kvm_run->u.io.port = ioio_info >> PORT_SHIFT;
1583 kvm_run->u.io.in = ioio_info & TYPE_MASK;
1585 kvm_run->u.io.size = (ioio_info & SIZE_MASK) >> SIZE_SHIFT;
1587 kvm_run->u.io.data_offset = PAGE_SIZE;
1588 kvm_run->u.io.pio_data = vcpu->regs[VCPU_REGS_RAX];
1590 /* We need to remove the Interrupt Shadow Flag from the VMCB (see 15.20.5 in AMD_Vol2) */
1591 vmcb->control.intr_shadow = 0;
1593 //rep = (ioio_info & REP_MASK) >> REP_SHIFT;
1594 /* TODO: Research more into Direction Flag checked in KVM; DF bit in RFLAGS */
1596 /* set the next rip in the VMCB save area for now */
1597 /* TODO: Store rIP in vm_run structure until we absolutely need it */
1598 vcpu->regs[VCPU_REGS_RIP] = rip;
1600 return 0;
1603 static void
1604 intercept_shutdown(struct vcpu *vcpu)
1606 struct vmcb *vmcb = vcpu->vmcb;
1607 memset(vmcb, 0, PAGE_SIZE);
1608 fkvm_vmcb_init(vmcb);
1612 fkvm_vm_run(struct thread *td, struct fkvm_vm_run_args *uap)
1614 struct vcpu *vcpu;
1615 struct guestvm *guest_vm;
1616 struct vmcb *vmcb;
1617 int error;
1618 int ret = 0;
1619 struct kvm_run kvm_run;
1621 if (!fkvm_loaded)
1622 return ENODEV;
1624 vcpu = TD_GET_VCPU(td);
1625 if (vcpu == NULL)
1626 return ENODEV;
1628 guest_vm = vcpu->guest_vm;
1629 vmcb = vcpu->vmcb;
1631 error = copyin(uap->run, &kvm_run, sizeof(struct kvm_run));
1632 if (error != 0)
1633 return error;
1635 fkvm_set_cr8(vcpu, kvm_run.cr8);
1637 kvm_run.exit_reason = KVM_EXIT_CONTINUE;
1639 while(kvm_run.exit_reason == KVM_EXIT_CONTINUE) {
1640 fkvm_vcpu_run(vcpu);
1642 switch (vmcb->control.exit_code) {
1644 case VMCB_EXIT_EXCP_BASE ... (VMCB_EXIT_EXCP_BASE + 31): {
1645 int excp_vector;
1647 excp_vector = vmcb->control.exit_code - VMCB_EXIT_EXCP_BASE;
1649 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
1650 excp_vector);
1651 ret = ENOSYS;
1652 break;
1655 case VMCB_EXIT_INTR: {
1656 printf("VMCB_EXIT_INTR - nothing to do\n");
1657 /* Handled by host OS already */
1658 kvm_run.exit_reason = KVM_EXIT_CONTINUE;
1659 break;
1662 case VMCB_EXIT_NPF: {
1663 /* EXITINFO1 contains fault error code */
1664 /* EXITINFO2 contains the guest physical address causing the fault. */
1666 u_int64_t fault_code;
1667 u_int64_t fault_gpa;
1669 vm_prot_t fault_type;
1670 int fault_flags;
1671 int rc;
1673 fault_code = vmcb->control.exit_info_1;
1674 fault_gpa = vmcb->control.exit_info_2;
1675 kvm_run.exit_reason = KVM_EXIT_CONTINUE;
1677 printf("VMCB_EXIT_NPF:\n");
1678 printf("gpa=0x%" PRIx64 "\n", fault_gpa);
1679 printf("fault code=0x%" PRIx64 " [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
1680 fault_code,
1681 (fault_code & PGEX_P) != 0,
1682 (fault_code & PGEX_W) != 0,
1683 (fault_code & PGEX_U) != 0,
1684 (fault_code & PGEX_I) != 0);
1686 if (fault_code & PGEX_W)
1687 fault_type = VM_PROT_WRITE;
1688 else if (fault_code & PGEX_I)
1689 fault_type = VM_PROT_EXECUTE;
1690 else
1691 fault_type = VM_PROT_READ;
1693 fault_flags = 0; /* TODO: is that right? */
1694 rc = vm_fault(&guest_vm->sp->vm_map, (fault_gpa & (~PAGE_MASK)), fault_type, fault_flags);
1695 if (rc != KERN_SUCCESS) {
1696 printf("vm_fault failed: %d\n", rc);
1697 kvm_run.exit_reason = KVM_EXIT_UNKNOWN;
1700 break;
1702 case VMCB_EXIT_WRITE_CR8:
1703 kvm_run.exit_reason = KVM_EXIT_SET_TPR;
1704 break;
1705 case VMCB_EXIT_NMI:
1706 kvm_run.exit_reason = KVM_EXIT_NMI;
1707 break;
1708 case VMCB_EXIT_HLT:
1709 kvm_run.exit_reason = KVM_EXIT_HLT;
1710 break;
1711 case VMCB_EXIT_SHUTDOWN:
1712 intercept_shutdown(vcpu);
1713 kvm_run.exit_reason = KVM_EXIT_SHUTDOWN;
1714 break;
1715 case VMCB_EXIT_IOIO:
1716 intercept_ioio(vcpu, &kvm_run,
1717 vmcb->control.exit_info_1,
1718 vmcb->control.exit_info_2);
1719 kvm_run.exit_reason = KVM_EXIT_IO;
1720 break;
1721 case VMCB_EXIT_READ_CR0:
1722 case VMCB_EXIT_READ_CR3:
1723 case VMCB_EXIT_READ_CR4:
1724 case VMCB_EXIT_READ_CR8:
1725 case VMCB_EXIT_WRITE_CR0:
1726 case VMCB_EXIT_WRITE_CR3:
1727 case VMCB_EXIT_WRITE_CR4:
1728 case VMCB_EXIT_READ_DR0:
1729 case VMCB_EXIT_READ_DR1:
1730 case VMCB_EXIT_READ_DR2:
1731 case VMCB_EXIT_READ_DR3:
1732 case VMCB_EXIT_WRITE_DR0:
1733 case VMCB_EXIT_WRITE_DR1:
1734 case VMCB_EXIT_WRITE_DR2:
1735 case VMCB_EXIT_WRITE_DR3:
1736 case VMCB_EXIT_WRITE_DR5:
1737 case VMCB_EXIT_WRITE_DR7:
1738 case VMCB_EXIT_SMI:
1739 case VMCB_EXIT_INIT:
1740 case VMCB_EXIT_VINTR:
1741 case VMCB_EXIT_CR0_SEL_WRITE:
1742 case VMCB_EXIT_CPUID:
1743 case VMCB_EXIT_INVD:
1744 case VMCB_EXIT_INVLPG:
1745 case VMCB_EXIT_INVLPGA:
1746 case VMCB_EXIT_MSR:
1747 case VMCB_EXIT_TASK_SWITCH:
1748 case VMCB_EXIT_VMRUN:
1749 case VMCB_EXIT_VMMCALL:
1750 case VMCB_EXIT_VMLOAD:
1751 case VMCB_EXIT_VMSAVE:
1752 case VMCB_EXIT_STGI:
1753 case VMCB_EXIT_CLGI:
1754 case VMCB_EXIT_SKINIT:
1755 case VMCB_EXIT_WBINVD:
1756 case VMCB_EXIT_MONITOR:
1757 case VMCB_EXIT_MWAIT_UNCOND:
1758 default:
1759 printf("Unhandled vmexit:\n"
1760 " code: 0x%" PRIx64 "\n"
1761 " info1: 0x%" PRIx64 "\n"
1762 " info2: 0x%" PRIx64 "\n",
1763 vmcb->control.exit_code,
1764 vmcb->control.exit_info_1,
1765 vmcb->control.exit_info_2);
1766 print_vmcb(vmcb);
1767 ret = ENOSYS;
1768 kvm_run.exit_reason = KVM_EXIT_UNKNOWN;
1772 printf("\n\n");
1774 /* TODO: check copyout ret val */
1775 copyout(&kvm_run, uap->run, sizeof(struct kvm_run));
1776 printf("sizeof(struct kvm_run) = %" PRIu64 "\n", sizeof(struct kvm_run));
1778 return ret;
1782 fkvm_create_vcpu(struct thread *td, struct fkvm_create_vcpu_args *uap)
1784 struct guestvm *guest_vm;
1785 struct vcpu *vcpu;
1787 if (!fkvm_loaded)
1788 return ENODEV;
1790 guest_vm = PROC_GET_GUESTVM(td->td_proc);
1791 if (guest_vm == NULL) {
1792 printf("PROC_GET_GUESTVM -> NULL\n");
1793 return ENODEV;
1796 /* Allocate VCPU */
1797 printf("fkvm_create_vcpu: td = %p\n", td);
1798 vcpu = fkvm_vcpu_create(guest_vm);
1799 fkvm_guestvm_add_vcpu(guest_vm, vcpu);
1801 TD_SET_VCPU(td, vcpu);
1802 printf("fkvm_create_vcpu: vcpu = %p\n", vcpu);
1803 return 0;
1806 static int
1807 fkvm_check_cpu_extension(void)
1809 u_int cpu_exthigh;
1810 u_int regs[4];
1811 u_int64_t vmcr;
1813 printf("fkvm_check_cpu_extension\n");
1815 /* Assumption: the architecture supports the cpuid instruction */
1817 /* Check if CPUID extended function 8000_0001h is supported. */
1818 do_cpuid(0x80000000, regs);
1819 cpu_exthigh = regs[0];
1821 printf("cpu_exthigh = %u\n", cpu_exthigh);
1823 if(cpu_exthigh >= 0x80000001) {
1824 /* Execute CPUID extended function 8000_0001h */
1825 do_cpuid(0x80000001, regs);
1826 printf("EAX = %u\n", regs[0]);
1828 if((regs[0] & 0x2) == 0) { /* Check SVM bit */
1829 printf("SVM not available\n");
1830 goto fail; /* SVM not available */
1833 vmcr = rdmsr(0xc0010114); /* Read VM_CR MSR */
1834 if((vmcr & 0x8) == 0) { /* Check SVMDIS bit */
1835 printf("vmcr = %" PRIx64 "\n", vmcr);
1836 printf("SVM allowed\n");
1837 return KERN_SUCCESS; /* SVM allowed */
1840 /* Execute CPUID extended function 8000_000ah */
1841 do_cpuid(0x8000000a, regs);
1842 if((regs[3] & 0x2) == 0) { /* Check SVM_LOCK bit */
1843 /* SVM disabled at bios; not unlockable.
1844 * User must change a BIOS setting to enable SVM.
1846 printf("EDX = %u\n", regs[3]);
1847 printf("SVM disabled at bios\n");
1848 goto fail;
1849 } else {
1850 /* TODO:
1851 * SVM may be unlockable;
1852 * consult the BIOS or TPM to obtain the key.
1854 printf("EDX = %u\n", regs[3]);
1855 printf("SVM maybe unlockable\n");
1856 goto fail;
1859 fail:
1860 return KERN_FAILURE;
1863 static void
1864 fkvm_proc_exit(void *arg, struct proc *p)
1866 struct guestvm *guest_vm;
1868 guest_vm = PROC_GET_GUESTVM(p);
1869 if (guest_vm == NULL)
1870 return;
1872 fkvm_destroy_vm(guest_vm);
1873 PROC_SET_GUESTVM(p, NULL);
1876 static void
1877 fkvm_load(void *unused)
1879 u_int64_t efer;
1880 int error;
1882 printf("fkvm_load\n");
1883 printf("sizeof(struct vmcb) = %" PRIx64 "\n", sizeof(struct vmcb));
1885 hsave_area = NULL;
1886 iopm = NULL;
1887 msrpm = NULL;
1889 /* check if SVM is supported */
1890 error = fkvm_check_cpu_extension();
1891 if(error != KERN_SUCCESS) {
1892 printf("ERROR: SVM extension not available\n");
1893 return;
1896 exit_tag = EVENTHANDLER_REGISTER(process_exit, fkvm_proc_exit, NULL,
1897 EVENTHANDLER_PRI_ANY);
1899 /* allocate structures */
1900 hsave_area = fkvm_hsave_area_alloc();
1901 iopm = fkvm_iopm_alloc();
1902 msrpm = fkvm_msrpm_alloc();
1904 /* Initialize structures */
1905 fkvm_hsave_area_init(hsave_area);
1906 fkvm_iopm_init(iopm);
1907 fkvm_msrpm_init(msrpm);
1909 /* Enable SVM in EFER */
1910 efer = rdmsr(MSR_EFER);
1911 printf("EFER = %" PRIx64 "\n", efer);
1912 wrmsr(MSR_EFER, efer | EFER_SVME);
1913 efer = rdmsr(MSR_EFER);
1914 printf("new EFER = %" PRIx64 "\n", efer);
1916 /* Write Host save address in MSR_VM_HSAVE_PA */
1917 wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave_area));
1919 fkvm_loaded = 1;
1921 SYSINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_load, NULL);
1923 static void
1924 fkvm_unload(void *unused)
1926 printf("fkvm_unload\n");
1928 if (!fkvm_loaded) {
1929 printf("fkvm_unload: fkvm not loaded");
1930 return;
1933 EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
1935 if (msrpm != NULL) {
1936 fkvm_msrpm_free(iopm);
1937 msrpm = NULL;
1939 if (iopm != NULL) {
1940 fkvm_iopm_free(iopm);
1941 iopm = NULL;
1943 if (hsave_area != NULL) {
1944 fkvm_hsave_area_free(hsave_area);
1945 hsave_area = NULL;
1948 SYSUNINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_unload, NULL);