Fixes file headers
[freebsd-src/fkvm-freebsd.git] / sys / kern / kern_fkvm.c
blob909647e89d6d49e8e4f74db5dc685e61bd5efac0
1 /*-
2 * Copyright (c) 2008 Brent Stephens <brents@rice.edu>
3 * Copyright (c) 2008 Diego Ongaro <diego.ongaro@rice.edu>
4 * Copyright (c) 2008 Kaushik Kumar Ram <kaushik@rice.edu>
5 * Copyright (c) 2008 Oleg Pesok <olegpesok@gmail.com>
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
30 #include <sys/fkvm.h>
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/sysproto.h>
37 #include <sys/file.h>
38 #include <sys/mman.h>
39 #include <sys/proc.h>
40 #include <sys/eventhandler.h>
41 #include <vm/vm.h>
42 #include <vm/pmap.h>
43 #include <vm/vm_extern.h>
44 #include <vm/vm_map.h>
45 #include <vm/vm_object.h>
46 #include <vm/vm_param.h>
47 #include <machine/_inttypes.h>
48 #include <machine/specialreg.h>
49 #include <machine/segments.h>
50 #include <machine/vmcb.h>
53 /* Definitions for Port IO */
54 #define PORT_SHIFT 16
55 #define ADDR_SHIFT 7
56 #define SIZE_SHIFT 4
57 #define REP_SHIFT 3
58 #define STR_SHIFT 2
59 #define TYPE_SHIFT 0
61 #define PORT_MASK 0xFFFF0000
62 #define ADDR_MASK (7 << ADDR_SHIFT)
63 #define SIZE_MASK (7 << SIZE_SHIFT)
64 #define REP_MASK (1 << REP_SHIFT)
65 #define STR_MASK (1 << STR_SHIFT)
66 #define TYPE_MASK (1 << TYPE_SHIFT)
67 /* End Definitions for Port IO */
69 #define PMIO_PAGE_OFFSET 1
71 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
72 #define MSRPM_SIZE (8*1024)
74 /* fkvm data */
76 static int fkvm_loaded = 0;
78 static void *iopm = NULL; /* Should I allocate a vm_object_t instead? */
79 static void *msrpm = NULL; /* Should I allocate a vm_object_t instead? */
81 static void *hsave_area = NULL;
83 static eventhandler_tag exit_tag;
85 /* per-guest data */
87 enum {
88 VCPU_REGS_RAX = 0,
89 VCPU_REGS_RCX = 1,
90 VCPU_REGS_RDX = 2,
91 VCPU_REGS_RBX = 3,
92 VCPU_REGS_RSP = 4,
93 VCPU_REGS_RBP = 5,
94 VCPU_REGS_RSI = 6,
95 VCPU_REGS_RDI = 7,
96 VCPU_REGS_R8 = 8,
97 VCPU_REGS_R9 = 9,
98 VCPU_REGS_R10 = 10,
99 VCPU_REGS_R11 = 11,
100 VCPU_REGS_R12 = 12,
101 VCPU_REGS_R13 = 13,
102 VCPU_REGS_R14 = 14,
103 VCPU_REGS_R15 = 15,
104 VCPU_REGS_RIP,
105 NR_VCPU_REGS
108 struct vcpu {
109 /* VCPU data */
110 struct vmcb *vmcb;
111 unsigned long vmcb_pa;
113 unsigned long regs[NR_VCPU_REGS];
114 u_int64_t host_gs_base;
115 u_int64_t cr2;
116 u_int64_t cr3;
118 struct guestvm *guest_vm;
121 struct guestvm {
122 struct vcpu *vcpus[MAX_VCPUS];
123 int nr_vcpus;
125 struct vmspace *sp;
126 u_int64_t nested_cr3;
130 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
131 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
132 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
133 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
134 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
135 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
137 static inline struct vcpu *
138 TD_GET_VCPU(struct thread *td)
140 struct vcpu *vcpu;
141 vcpu = td->vcpu;
142 if (vcpu == NULL)
143 printf("TD_GET_VCPU -> NULL\n");
144 return vcpu;
147 static inline void
148 TD_SET_VCPU(struct thread *td, struct vcpu *vcpu)
150 td->vcpu = vcpu;
153 static inline struct guestvm *
154 PROC_GET_GUESTVM(struct proc *proc)
156 struct guestvm *guestvm;
157 guestvm = proc->p_guestvm;
158 return guestvm;
161 static inline void
162 PROC_SET_GUESTVM(struct proc *proc, struct guestvm *guestvm)
164 proc->p_guestvm = guestvm; \
167 static void
168 print_vmcb_seg(struct vmcb_seg* vmcb_seg, const char* name)
170 printf("%s Selector\n", name);
171 printf("Selector : %" PRIx16 "\n", vmcb_seg->selector);
172 printf("Attributes : %" PRIx16 "\n", vmcb_seg->attrib);
173 printf("Limit : %" PRIx32 "\n", vmcb_seg->limit);
174 printf("Base Address : %" PRIx64 "\n", vmcb_seg->base);
175 printf("\n");
178 static void
179 print_vmcb(struct vmcb *vmcb)
181 printf("VMCB Control Area\n");
182 printf("Intercept CR Reads : %" PRIx16 "\n", vmcb->control.intercept_cr_reads);
183 printf("Intercept CR Writes : %" PRIx16 "\n", vmcb->control.intercept_cr_writes);
184 printf("Intercept DR Reads : %" PRIx16 "\n", vmcb->control.intercept_dr_reads);
185 printf("Intercept DR Writes : %" PRIx16 "\n", vmcb->control.intercept_dr_writes);
186 printf("Intercept Exceptions : %" PRIx32 "\n", vmcb->control.intercept_exceptions);
187 printf("Intercepts : %" PRIx64 "\n", vmcb->control.intercepts);
188 printf("Reserved 1: \n");
189 for(int i=0; i < 44; i++) {
190 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
192 printf("\n");
193 printf("IOPM Base PA : %" PRIx64 "\n", vmcb->control.iopm_base_pa);
194 printf("MSRPM Base PA : %" PRIx64 "\n", vmcb->control.msrpm_base_pa);
195 printf("TSC Offset : %" PRIx64 "\n", vmcb->control.tsc_offset);
196 printf("Guest ASID : %" PRIx32 "\n", vmcb->control.guest_asid);
197 printf("TLB Control : %" PRIx8 "\n", vmcb->control.tlb_control);
198 printf("Reserved 2 : \n");
199 for(int i=0; i < 3; i++) {
200 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
202 printf("\n");
203 printf("Virtual TPR : %" PRIx8 "\n", vmcb->control.v_tpr);
204 printf("Virtual IRQ : %" PRIx8 "\n", vmcb->control.v_irq);
205 printf("Virtual Interrupt : %" PRIx8 "\n", vmcb->control.v_intr);
206 printf("Virtual Interrupt Masking: %" PRIx8 "\n", vmcb->control.v_intr_masking);
207 printf("Virtual Interrupt Vector : %" PRIx8 "\n", vmcb->control.v_intr_vector);
208 printf("Reserved 6 : \n");
209 for(int i=0; i < 3; i++) {
210 printf("%" PRIx8 "", vmcb->control.reserved_6[i]); /* Should be Zero */
212 printf("\n");
213 printf("Interrupt Shadow : %" PRIx8 "\n", vmcb->control.intr_shadow);
214 printf("Reserved 7 : \n");
215 for(int i=0; i < 7; i++) {
216 printf("%" PRIx8 "", vmcb->control.reserved_7[i]); /* Should be Zero */
218 printf("\n");
219 printf("Exit Code : %" PRIx64 "\n", vmcb->control.exit_code);
220 printf("Exit Info 1 : %" PRIx64 "\n", vmcb->control.exit_info_1);
221 printf("Exit Info 2 : %" PRIx64 "\n", vmcb->control.exit_info_2);
222 printf("Exit Interrupt Info : %" PRIx32 "\n", vmcb->control.exit_int_info);
223 printf("Exit Interrupt Info Err Code: %" PRIx32 "\n", vmcb->control.exit_int_info_err_code);
224 printf("Nested Control : %" PRIx64 "\n", vmcb->control.nested_ctl);
225 printf("Reserved 8 : \n");
226 for(int i=0; i < 16; i++) {
227 printf("%" PRIx8 "", vmcb->control.reserved_8[i]); /* Should be Zero */
229 printf("\n");
230 printf("Event Injection : %" PRIx64 "\n", vmcb->control.event_inj);
231 printf("Nested CR3 : %" PRIx64 "\n", vmcb->control.nested_cr3);
232 printf("LBR Virtualization Enable: %" PRIx64 "\n", vmcb->control.lbr_virt_enable);
233 printf("Reserved 9 : \n");
234 for(int i=0; i < 832; i++) {
235 printf("%" PRIx8 "", vmcb->control.reserved_9[i]); /* Should be Zero */
237 printf("\n");
239 printf("\n");
241 printf("VMCB Save Area\n");
242 print_vmcb_seg(&(vmcb->save.es), "ES");
243 print_vmcb_seg(&(vmcb->save.es), "CS");
244 print_vmcb_seg(&(vmcb->save.es), "SS");
245 print_vmcb_seg(&(vmcb->save.es), "DS");
246 print_vmcb_seg(&(vmcb->save.es), "FS");
247 print_vmcb_seg(&(vmcb->save.es), "GS");
248 print_vmcb_seg(&(vmcb->save.es), "GDTR");
249 print_vmcb_seg(&(vmcb->save.es), "LDTR");
250 print_vmcb_seg(&(vmcb->save.es), "IDTR");
251 print_vmcb_seg(&(vmcb->save.es), "TR");
252 printf("Reserved 1 : \n");
253 for(int i=0; i < 43; i++) {
254 printf("%" PRIx8 "", vmcb->save.reserved_1[i]); /* Should be Zero */
256 printf("\n");
257 printf("Current Processor Level : %" PRIx8 "\n", vmcb->save.cpl);
258 printf("Reserved 2 : \n");
259 for(int i=0; i < 4; i++) {
260 printf("%" PRIx8 "", vmcb->save.reserved_2[i]); /* Should be Zero */
262 printf("\n");
263 printf("EFER : %" PRIx64 "\n", vmcb->save.efer);
264 printf("Reserved 3 : \n");
265 for(int i=0; i < 112; i++) {
266 printf("%" PRIx8 "", vmcb->save.reserved_3[i]); /* Should be Zero */
268 printf("\n");
269 printf("Control Register 4 : %" PRIx64 "\n", vmcb->save.cr4);
270 printf("Control Register 3 : %" PRIx64 "\n", vmcb->save.cr3);
271 printf("Control Register 0 : %" PRIx64 "\n", vmcb->save.cr0);
272 printf("Debug Register 7 : %" PRIx64 "\n", vmcb->save.dr7);
273 printf("Debug Register 6 : %" PRIx64 "\n", vmcb->save.dr6);
274 printf("RFlags : %" PRIx64 "\n", vmcb->save.rflags);
275 printf("RIP : %" PRIx64 "\n", vmcb->save.rip);
276 printf("Reserved 4 : \n");
277 for(int i=0; i < 88; i++) {
278 printf("%" PRIx8 "", vmcb->save.reserved_4[i]); /* Should be Zero */
280 printf("\n");
281 printf("RSP : %" PRIx64 "\n", vmcb->save.rsp);
282 printf("Reserved 5 : \n");
283 for(int i=0; i < 24; i++) {
284 printf("%" PRIx8 "", vmcb->save.reserved_5[i]); /* Should be Zero */
286 printf("\n");
287 printf("RAX : %" PRIx64 "\n", vmcb->save.rax);
288 printf("STAR : %" PRIx64 "\n", vmcb->save.star);
289 printf("LSTAR : %" PRIx64 "\n", vmcb->save.lstar);
290 printf("CSTAR : %" PRIx64 "\n", vmcb->save.cstar);
291 printf("SFMASK : %" PRIx64 "\n", vmcb->save.sfmask);
292 printf("Kernel GS Base : %" PRIx64 "\n", vmcb->save.kernel_gs_base);
293 printf("SYSENTER CS : %" PRIx64 "\n", vmcb->save.sysenter_cs);
294 printf("SYSENTER ESP : %" PRIx64 "\n", vmcb->save.sysenter_esp);
295 printf("SYSENTER EIP : %" PRIx64 "\n", vmcb->save.sysenter_eip);
296 printf("Control Register 2 : %" PRIx64 "\n", vmcb->save.cr2);
297 printf("Reserved 6 : \n");
298 for(int i=0; i < 32; i++) {
299 printf("%" PRIx8 "", vmcb->save.reserved_6[i]); /* Should be Zero */
301 printf("\n");
302 printf("Global PAT : %" PRIx64 "\n", vmcb->save.g_pat);
303 printf("Debug Control : %" PRIx64 "\n", vmcb->save.dbg_ctl);
304 printf("BR From : %" PRIx64 "\n", vmcb->save.br_from);
305 printf("BR To : %" PRIx64 "\n", vmcb->save.br_to);
306 printf("Last Exception From : %" PRIx64 "\n", vmcb->save.last_excp_from);
307 printf("Last Exception To : %" PRIx64 "\n", vmcb->save.last_excp_to);
309 printf("\n\n");
312 #if 0
313 static void
314 print_tss_desc(struct system_segment_descriptor *tss_desc)
316 printf("TSS desc @ %p:\n", tss_desc);
317 printf("sd_lolimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lolimit);
318 printf("sd_lobase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lobase);
319 printf("sd_type: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_type);
320 printf("sd_dpl: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_dpl);
321 printf("sd_p: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_p);
322 printf("sd_hilimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hilimit);
323 printf("sd_xx0: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx0);
324 printf("sd_gran: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_gran);
325 printf("sd_hibase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hibase);
326 printf("sd_xx1: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx1);
327 printf("sd_mbz: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_mbz);
328 printf("sd_xx2: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx2);
329 printf("\n\n");
332 static void
333 print_tss(struct system_segment_descriptor *tss_desc)
335 u_int32_t *base;
336 int limit;
337 int i;
339 base = (u_int32_t*) ((((u_int64_t) tss_desc->sd_hibase) << 24) | ((u_int64_t) tss_desc->sd_lobase));
340 limit = ((tss_desc->sd_hilimit << 16) | tss_desc->sd_lolimit) / 4;
342 printf("TSS: @ %p\n", base);
343 for (i = 0; i <= limit; i++)
344 printf("%x: 0x%" PRIx32 "\n", i, base[i]);
345 printf("\n\n");
347 #endif
349 static void
350 print_vmcb_save_area(struct vmcb *vmcb)
352 printf("VMCB save area:\n");
353 printf(" cs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
354 vmcb->save.cs.selector,
355 vmcb->save.cs.attrib,
356 vmcb->save.cs.limit,
357 vmcb->save.cs.base);
358 printf(" fs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
359 vmcb->save.fs.selector,
360 vmcb->save.fs.attrib,
361 vmcb->save.fs.limit,
362 vmcb->save.fs.base);
363 printf(" gs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
364 vmcb->save.gs.selector,
365 vmcb->save.gs.attrib,
366 vmcb->save.gs.limit,
367 vmcb->save.gs.base);
368 printf(" tr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
369 vmcb->save.tr.selector,
370 vmcb->save.tr.attrib,
371 vmcb->save.tr.limit,
372 vmcb->save.tr.base);
373 printf(" ldtr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
374 vmcb->save.ldtr.selector,
375 vmcb->save.ldtr.attrib,
376 vmcb->save.ldtr.limit,
377 vmcb->save.ldtr.base);
378 printf(" rip: %" PRIx64 "\n", vmcb->save.rip);
379 printf(" kernel_gs_base: %" PRIx64 "\n", vmcb->save.kernel_gs_base);
380 printf(" star: %" PRIx64 "\n", vmcb->save.star);
381 printf(" lstar: %" PRIx64 "\n", vmcb->save.lstar);
382 printf(" cstar: %" PRIx64 "\n", vmcb->save.cstar);
383 printf(" sfmask: %" PRIx64 "\n", vmcb->save.sfmask);
384 printf(" sysenter_cs: %" PRIx64 "\n", vmcb->save.sysenter_cs);
385 printf(" sysenter_esp: %" PRIx64 "\n", vmcb->save.sysenter_esp);
386 printf(" sysenter_eip: %" PRIx64 "\n", vmcb->save.sysenter_eip);
387 printf("\n\n");
390 static int
391 vmrun_assert(struct vmcb *vmcb)
393 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
395 int bad;
397 bad = 0;
399 // The following are illegal:
401 //EFER.SVME is zero.
402 A((vmcb->save.efer & 0x0000000000001000) == 0);
404 // CR0.CD is zero and CR0.NW is set
405 A( ((vmcb->save.cr0 & 0x0000000040000000) == 0) &&
406 ((vmcb->save.cr0 & 0x0000000020000000) != 0));
408 // CR0[63:32] are not zero.
409 A((vmcb->save.cr0 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
411 // Any MBZ bit of CR3 is set.
412 A((vmcb->save.cr3 & 0xFFF0000000000000) != 0);
414 // CR4[63:11] are not zero.
415 A((vmcb->save.cr4 & 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
417 // DR6[63:32] are not zero.
418 A((vmcb->save.dr6 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
420 // DR7[63:32] are not zero.
421 A((vmcb->save.dr7 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
423 // EFER[63:15] are not zero.
424 A((vmcb->save.efer & 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
426 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
427 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
429 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
430 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
431 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
432 ((vmcb->save.cr4 & 0x0000000000000020) != 0));
434 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
435 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
436 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
437 ((vmcb->save.cr0 & 0x0000000000000001) == 0));
439 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
440 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
441 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
442 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
443 ((vmcb->save.cr4 & 0x0000000000000020) != 0) &&
444 ((vmcb->save.cs.attrib & 0x0200) != 0) &&
445 ((vmcb->save.cs.attrib & 0x0400) != 0));
447 // The VMRUN intercept bit is clear.
448 A((vmcb->control.intercepts & 0x0000000100000000) == 0);
450 // The MSR or IOIO intercept tables extend to a physical address that is
451 // greater than or equal to the maximum supported physical address.
453 // Illegal event injection (see Section 15.19 on page 391).
455 // ASID is equal to zero.
456 A(vmcb->control.guest_asid == 0);
458 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
459 // combination that is otherwise illegal (see Section 15.18).
461 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
462 // sign-extend to 63 bits) all base addresses in the segment registers
463 // that have been loaded.
465 return bad;
467 #undef A
470 static void
471 fkvm_vcpu_run(struct vcpu *vcpu)
473 u_int64_t lstar;
474 u_int64_t cstar;
475 u_int64_t star;
476 u_int64_t sfmask;
478 u_short fs_selector;
479 u_short gs_selector;
480 u_short ldt_selector;
482 unsigned long host_cr2;
483 unsigned long host_dr6;
484 unsigned long host_dr7;
486 struct system_segment_descriptor *tss_desc;
487 u_int64_t sel;
489 struct vmcb *vmcb;
491 printf("begin fkvm_vcpu_run\n");
493 vmcb = vcpu->vmcb;
495 if (vmrun_assert(vmcb))
496 return;
498 tss_desc = (struct system_segment_descriptor*) (&gdt[GPROC0_SEL]);
499 sel = GSEL(GPROC0_SEL, SEL_KPL);
501 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
502 // print_tss_desc(tss_desc);
503 // print_tss(tss_desc);
505 print_vmcb_save_area(vmcb);
506 printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu->regs[VCPU_REGS_RIP]);
507 // disable_intr();
509 vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
510 vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
511 vmcb->save.rip = vcpu->regs[VCPU_REGS_RIP];
513 /* meh: kvm has pre_svm_run(svm); */
515 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
516 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
518 fs_selector = rfs();
519 gs_selector = rgs();
520 ldt_selector = rldt();
521 // printf("fs selector: %hx\n", fs_selector);
522 // printf("gs selector: %hx\n", gs_selector);
523 // printf("ldt selector: %hx\n", ldt_selector);
525 host_cr2 = rcr2();
527 host_dr6 = rdr6();
528 host_dr7 = rdr7();
530 vmcb->save.cr2 = vcpu->cr2;
531 /* meh: cr3? */
532 // TODO: something with apic_base?
534 /* meh: dr7? db_regs? */
536 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
537 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
538 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
539 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
541 star = rdmsr(MSR_STAR);
542 lstar = rdmsr(MSR_LSTAR);
543 cstar = rdmsr(MSR_CSTAR);
544 sfmask = rdmsr(MSR_SF_MASK);
546 printf("CLGI...\n");
548 __asm __volatile (SVM_CLGI);
551 // enable_intr();
553 __asm __volatile (
554 "push %%rbp; \n\t"
555 "mov %c[rbx](%[svm]), %%rbx \n\t"
556 "mov %c[rcx](%[svm]), %%rcx \n\t"
557 "mov %c[rdx](%[svm]), %%rdx \n\t"
558 "mov %c[rsi](%[svm]), %%rsi \n\t"
559 "mov %c[rdi](%[svm]), %%rdi \n\t"
560 "mov %c[rbp](%[svm]), %%rbp \n\t"
561 "mov %c[r8](%[svm]), %%r8 \n\t"
562 "mov %c[r9](%[svm]), %%r9 \n\t"
563 "mov %c[r10](%[svm]), %%r10 \n\t"
564 "mov %c[r11](%[svm]), %%r11 \n\t"
565 "mov %c[r12](%[svm]), %%r12 \n\t"
566 "mov %c[r13](%[svm]), %%r13 \n\t"
567 "mov %c[r14](%[svm]), %%r14 \n\t"
568 "mov %c[r15](%[svm]), %%r15 \n\t"
570 /* Enter guest mode */
571 "push %%rax \n\t"
572 "mov %c[vmcb](%[svm]), %%rax \n\t"
573 SVM_VMLOAD "\n\t"
574 SVM_VMRUN "\n\t"
575 SVM_VMSAVE "\n\t"
576 "pop %%rax \n\t"
578 /* Save guest registers, load host registers */
579 "mov %%rbx, %c[rbx](%[svm]) \n\t"
580 "mov %%rcx, %c[rcx](%[svm]) \n\t"
581 "mov %%rdx, %c[rdx](%[svm]) \n\t"
582 "mov %%rsi, %c[rsi](%[svm]) \n\t"
583 "mov %%rdi, %c[rdi](%[svm]) \n\t"
584 "mov %%rbp, %c[rbp](%[svm]) \n\t"
585 "mov %%r8, %c[r8](%[svm]) \n\t"
586 "mov %%r9, %c[r9](%[svm]) \n\t"
587 "mov %%r10, %c[r10](%[svm]) \n\t"
588 "mov %%r11, %c[r11](%[svm]) \n\t"
589 "mov %%r12, %c[r12](%[svm]) \n\t"
590 "mov %%r13, %c[r13](%[svm]) \n\t"
591 "mov %%r14, %c[r14](%[svm]) \n\t"
592 "mov %%r15, %c[r15](%[svm]) \n\t"
593 "pop %%rbp"
595 : [svm]"a"(vcpu),
596 [vmcb]"i"(offsetof(struct vcpu, vmcb_pa)),
597 [rbx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBX])),
598 [rcx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RCX])),
599 [rdx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDX])),
600 [rsi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RSI])),
601 [rdi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDI])),
602 [rbp]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBP])),
603 [r8 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R8 ])),
604 [r9 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R9 ])),
605 [r10]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R10])),
606 [r11]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R11])),
607 [r12]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R12])),
608 [r13]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R13])),
609 [r14]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R14])),
610 [r15]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R15]))
611 : "cc", "memory",
612 "rbx", "rcx", "rdx", "rsi", "rdi",
613 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
617 /* meh: dr7? db_regs? */
619 vcpu->cr2 = vmcb->save.cr2;
621 vcpu->regs[VCPU_REGS_RAX] = vmcb->save.rax;
622 vcpu->regs[VCPU_REGS_RSP] = vmcb->save.rsp;
623 vcpu->regs[VCPU_REGS_RIP] = vmcb->save.rip;
625 load_dr6(host_dr6);
626 load_dr7(host_dr7);
628 load_cr2(host_cr2);
630 load_fs(fs_selector);
631 load_gs(gs_selector);
632 lldt(ldt_selector);
634 wrmsr(MSR_GSBASE, vcpu->host_gs_base);
636 tss_desc->sd_type = SDT_SYSTSS;
637 ltr(sel);
639 wrmsr(MSR_STAR, star);
640 wrmsr(MSR_LSTAR, lstar);
641 wrmsr(MSR_CSTAR, cstar);
642 wrmsr(MSR_SF_MASK, sfmask);
644 // disable_intr();
646 __asm __volatile (SVM_STGI);
648 printf("STGI\n");
650 printf("exit_code: %" PRIx64 "\n", vmcb->control.exit_code);
652 // print_tss_desc(tss_desc);
653 // print_tss(tss_desc);
655 print_vmcb_save_area(vmcb);
657 // enable_intr();
659 /* meh: next_rip */
662 static void
663 _fkvm_init_seg(struct vmcb_seg *seg, uint16_t attrib)
665 seg->selector = 0;
666 seg->attrib = VMCB_SELECTOR_P_MASK | attrib;
667 seg->limit = 0xffff;
668 seg->base = 0;
671 static inline void
672 fkvm_init_seg(struct vmcb_seg *seg)
674 _fkvm_init_seg(seg, VMCB_SELECTOR_S_MASK | VMCB_SELECTOR_WRITE_MASK);
677 static inline void
678 fkvm_init_sys_seg(struct vmcb_seg *seg, uint16_t attrib)
680 _fkvm_init_seg(seg, attrib);
683 static void*
684 fkvm_iopm_alloc(void)
686 return contigmalloc(IOPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
689 static void
690 fkvm_iopm_init(void *iopm)
692 memset(iopm, 0xff, IOPM_SIZE); /* TODO: we may want to allow access to PC debug port */
695 static void
696 fkvm_iopm_free(void *iopm)
698 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
701 static void*
702 fkvm_msrpm_alloc(void)
704 return contigmalloc(MSRPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
707 static void
708 fkvm_msrpm_init(void *msrpm)
710 memset(msrpm, 0xff, MSRPM_SIZE); /* TODO: we may want to allow some MSR accesses */
713 static void
714 fkvm_msrpm_free(void *msrpm)
716 contigfree(msrpm, MSRPM_SIZE, M_DEVBUF);
719 static void*
720 fkvm_hsave_area_alloc(void)
722 return contigmalloc(PAGE_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
725 static void
726 fkvm_hsave_area_init(void *hsave_area)
730 static void
731 fkvm_hsave_area_free(void *hsave_area)
733 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
736 static struct vmspace*
737 fkvm_make_vmspace(void)
739 struct vmspace *sp;
741 sp = vmspace_alloc(0, 0xffffffffffffffff);
742 if (sp == NULL) {
743 printf("vmspace_alloc failed\n");
744 return NULL;
747 return sp;
750 static void
751 fkvm_destroy_vmspace(struct vmspace* sp)
753 vmspace_free(sp);
756 static struct vmcb*
757 fkvm_vmcb_alloc(void)
759 return contigmalloc(PAGE_SIZE, M_DEVBUF, M_ZERO, 0, -1UL,
760 PAGE_SIZE, 0);
763 static void
764 fkvm_vmcb_init(struct vmcb *vmcb)
766 struct vmcb_control_area *control = &vmcb->control;
767 struct vmcb_save_area *save = &vmcb->save;
769 control->intercept_cr_reads = INTERCEPT_CR4_MASK;
771 control->intercept_cr_writes = INTERCEPT_CR4_MASK |
772 INTERCEPT_CR8_MASK;
774 control->intercept_dr_reads = INTERCEPT_DR0_MASK |
775 INTERCEPT_DR1_MASK |
776 INTERCEPT_DR2_MASK |
777 INTERCEPT_DR3_MASK;
779 control->intercept_dr_writes = INTERCEPT_DR0_MASK |
780 INTERCEPT_DR1_MASK |
781 INTERCEPT_DR2_MASK |
782 INTERCEPT_DR3_MASK |
783 INTERCEPT_DR5_MASK |
784 INTERCEPT_DR7_MASK;
786 control->intercept_exceptions = (1 << IDT_UD) | // Invalid Opcode
787 (1 << IDT_MC); // Machine Check
789 control->intercepts = INTERCEPT_INTR |
790 INTERCEPT_NMI |
791 INTERCEPT_SMI |
792 INTERCEPT_CPUID |
793 INTERCEPT_INVD |
794 INTERCEPT_HLT |
795 INTERCEPT_INVLPGA |
796 INTERCEPT_IOIO_PROT |
797 INTERCEPT_MSR_PROT |
798 INTERCEPT_SHUTDOWN |
799 INTERCEPT_VMRUN |
800 INTERCEPT_VMMCALL |
801 INTERCEPT_VMLOAD |
802 INTERCEPT_VMSAVE |
803 INTERCEPT_STGI |
804 INTERCEPT_CLGI |
805 INTERCEPT_SKINIT |
806 INTERCEPT_WBINVD |
807 INTERCEPT_MONITOR |
808 INTERCEPT_MWAIT_UNCOND;
810 control->iopm_base_pa = vtophys(iopm);
811 control->msrpm_base_pa = vtophys(msrpm);
812 control->tsc_offset = 0;
814 /* TODO: remove this once we assign asid's to distinct VM's */
815 control->guest_asid = 1;
816 control->tlb_control = VMCB_TLB_CONTROL_FLUSH_ALL;
818 /* let v_tpr default to 0 */
819 /* let v_irq default to 0 */
820 /* let v_intr default to 0 */
822 control->v_intr_masking = 1;
824 /* let v_intr_vector default to 0 */
825 /* let intr_shadow default to 0 */
826 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
827 exit_int_info_err_code default to 0 */
829 control->nested_ctl = 1;
831 /* let event_inj default to 0 */
833 // (nested_cr3 is later)
835 /* let lbr_virt_enable default to 0 */
838 fkvm_init_seg(&save->ds);
839 fkvm_init_seg(&save->es);
840 fkvm_init_seg(&save->fs);
841 fkvm_init_seg(&save->gs);
842 fkvm_init_seg(&save->ss);
844 _fkvm_init_seg(&save->cs, VMCB_SELECTOR_READ_MASK | VMCB_SELECTOR_S_MASK |
845 VMCB_SELECTOR_CODE_MASK);
846 save->cs.selector = 0xf000;
847 save->cs.base = 0xffff0000;
849 save->gdtr.limit = 0xffff;
850 save->idtr.limit = 0xffff;
852 fkvm_init_sys_seg(&save->ldtr, SDT_SYSLDT);
853 fkvm_init_sys_seg(&save->tr, SDT_SYS286BSY);
855 save->g_pat = PAT_VALUE(PAT_WRITE_BACK, 0) | PAT_VALUE(PAT_WRITE_THROUGH, 1) |
856 PAT_VALUE(PAT_UNCACHED, 2) | PAT_VALUE(PAT_UNCACHEABLE, 3) |
857 PAT_VALUE(PAT_WRITE_BACK, 4) | PAT_VALUE(PAT_WRITE_THROUGH, 5) |
858 PAT_VALUE(PAT_UNCACHED, 6) | PAT_VALUE(PAT_UNCACHEABLE, 7);
860 /* CR0 = 6000_0010h at boot */
861 save->cr0 = CR0_ET | CR0_NW | CR0_CD;
862 save->dr6 = 0xffff0ff0;
863 save->dr7 = 0x400;
864 save->rflags = 2;
865 save->rip = 0x0000fff0;
867 save->efer = EFER_SVME;
870 static void
871 fkvm_vmcb_free(struct vmcb *vmcb)
873 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
876 static struct vcpu*
877 fkvm_vcpu_create(struct guestvm *guest_vm)
879 struct vcpu *vcpu;
880 vcpu = malloc(sizeof(struct vcpu), M_DEVBUF, M_WAITOK|M_ZERO);
882 vcpu->vmcb = fkvm_vmcb_alloc();
883 vcpu->vmcb_pa = vtophys(vcpu->vmcb);
884 printf("vmcb = 0x%p\n", vcpu->vmcb);
885 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu->vmcb_pa);
887 fkvm_vmcb_init(vcpu->vmcb);
888 vcpu->vmcb->control.nested_cr3 = guest_vm->nested_cr3;
889 vcpu->regs[VCPU_REGS_RIP] = vcpu->vmcb->save.rip;
891 vcpu->guest_vm = guest_vm;
893 return vcpu;
896 static void
897 fkvm_vcpu_destroy(struct vcpu *vcpu)
899 fkvm_vmcb_free(vcpu->vmcb);
900 free(vcpu, M_DEVBUF);
903 static struct guestvm*
904 fkvm_guestvm_alloc(void)
906 return malloc(sizeof(struct guestvm), M_DEVBUF, M_WAITOK|M_ZERO);
909 static void
910 fkvm_guestvm_free(struct guestvm* guest_vm)
912 free(guest_vm, M_DEVBUF);
915 static void
916 fkvm_guestvm_add_vcpu(struct guestvm *guest_vm, struct vcpu *vcpu)
918 guest_vm->vcpus[guest_vm->nr_vcpus] = vcpu;
919 guest_vm->nr_vcpus++; /* TODO: Probably not safe to increment */
920 /* How about a lock to protect all of this? */
925 fkvm_userpoke(struct thread *td, struct fkvm_userpoke_args *uap)
927 printf("fkvm_userpoke\n");
929 if (!fkvm_loaded)
930 return ENODEV;
932 return ENOSYS;
935 static int
936 fkvm_mem_has_entry(vm_map_entry_t expected_entry, vm_map_t vm_map, vm_offset_t vaddr)
938 vm_map_entry_t lookup_entry;
939 vm_object_t throwaway_object;
940 vm_pindex_t throwaway_pindex;
941 vm_prot_t throwaway_prot;
942 boolean_t throwaway_wired;
943 int error;
945 error = vm_map_lookup(&vm_map, /* IN/OUT */
946 vaddr,
947 VM_PROT_READ|VM_PROT_WRITE,
948 &lookup_entry, /* OUT */
949 &throwaway_object, /* OUT */
950 &throwaway_pindex, /* OUT */
951 &throwaway_prot, /* OUT */
952 &throwaway_wired); /* OUT */
953 if (error != KERN_SUCCESS)
954 return 0;
955 vm_map_lookup_done(vm_map, lookup_entry);
956 return (lookup_entry == expected_entry);
959 static int
960 fkvm_guest_check_range(struct guestvm *guest_vm, uint64_t start, uint64_t end)
962 vm_map_t guest_vm_map;
963 vm_map_entry_t lookup_entry;
964 vm_object_t throwaway_object;
965 vm_pindex_t throwaway_pindex;
966 vm_prot_t throwaway_prot;
967 boolean_t throwaway_wired;
968 int ret;
969 int error;
971 guest_vm_map = &guest_vm->sp->vm_map;
973 error = vm_map_lookup(&guest_vm_map, /* IN/OUT */
974 start,
975 VM_PROT_READ|VM_PROT_WRITE,
976 &lookup_entry, /* OUT */
977 &throwaway_object, /* OUT */
978 &throwaway_pindex, /* OUT */
979 &throwaway_prot, /* OUT */
980 &throwaway_wired); /* OUT */
981 if (error != KERN_SUCCESS)
982 return EFAULT;
983 vm_map_lookup_done(guest_vm_map, lookup_entry);
986 TODO: We can't actually nest the lookups:
987 panic: _sx_xlock_hard: recursed on non-recursive sx user map @ ../../../vm/vm_map.c:3115
988 Therefore, I've moved the lookup_done above for now, but we really need a lock here.
990 Maybe it's better to use vm_map_lookup_entry directly.
994 if (fkvm_mem_has_entry(lookup_entry, guest_vm_map, end))
995 ret = 0;
996 else
997 ret = EFAULT;
999 return ret;
1002 static void
1003 fkvm_get_regs_regs(struct vcpu *vcpu, struct kvm_regs *out)
1005 out->rax = vcpu->regs[VCPU_REGS_RAX];
1006 out->rbx = vcpu->regs[VCPU_REGS_RBX];
1007 out->rcx = vcpu->regs[VCPU_REGS_RCX];
1008 out->rdx = vcpu->regs[VCPU_REGS_RDX];
1009 out->rsi = vcpu->regs[VCPU_REGS_RSI];
1010 out->rdi = vcpu->regs[VCPU_REGS_RDI];
1011 out->rsp = vcpu->regs[VCPU_REGS_RSP];
1012 out->rbp = vcpu->regs[VCPU_REGS_RBP];
1013 out->r8 = vcpu->regs[VCPU_REGS_R8];
1014 out->r9 = vcpu->regs[VCPU_REGS_R9];
1015 out->r10 = vcpu->regs[VCPU_REGS_R10];
1016 out->r11 = vcpu->regs[VCPU_REGS_R11];
1017 out->r12 = vcpu->regs[VCPU_REGS_R12];
1018 out->r13 = vcpu->regs[VCPU_REGS_R13];
1019 out->r14 = vcpu->regs[VCPU_REGS_R14];
1020 out->r15 = vcpu->regs[VCPU_REGS_R15];
1021 out->rip = vcpu->regs[VCPU_REGS_RIP];
1022 out->rflags = vcpu->vmcb->save.rflags;
1025 static void
1026 fkvm_set_regs_regs(struct vcpu *vcpu, const struct kvm_regs *in)
1028 vcpu->regs[VCPU_REGS_RAX] = in->rax;
1029 vcpu->regs[VCPU_REGS_RBX] = in->rbx;
1030 vcpu->regs[VCPU_REGS_RCX] = in->rcx;
1031 vcpu->regs[VCPU_REGS_RDX] = in->rdx;
1032 vcpu->regs[VCPU_REGS_RSI] = in->rsi;
1033 vcpu->regs[VCPU_REGS_RDI] = in->rdi;
1034 vcpu->regs[VCPU_REGS_RSP] = in->rsp;
1035 vcpu->regs[VCPU_REGS_RBP] = in->rbp;
1036 vcpu->regs[VCPU_REGS_R8] = in->r8;
1037 vcpu->regs[VCPU_REGS_R9] = in->r9;
1038 vcpu->regs[VCPU_REGS_R10] = in->r10;
1039 vcpu->regs[VCPU_REGS_R11] = in->r11;
1040 vcpu->regs[VCPU_REGS_R12] = in->r12;
1041 vcpu->regs[VCPU_REGS_R13] = in->r13;
1042 vcpu->regs[VCPU_REGS_R14] = in->r14;
1043 vcpu->regs[VCPU_REGS_R15] = in->r15;
1044 vcpu->regs[VCPU_REGS_RIP] = in->rip;
1045 vcpu->vmcb->save.rflags = in->rflags;
1048 static void
1049 fkvm_set_vmcb_dtable(struct vmcb_seg *vmcb_seg, struct kvm_dtable *fkvm_dtable)
1051 vmcb_seg->base = fkvm_dtable->base;
1052 vmcb_seg->limit = fkvm_dtable->limit;
1055 static void
1056 fkvm_set_vmcb_seg(struct vmcb_seg *vmcb_seg, struct kvm_segment *fkvm_seg)
1058 vmcb_seg->base = fkvm_seg->base;
1059 vmcb_seg->limit = fkvm_seg->limit;
1060 vmcb_seg->selector = fkvm_seg->selector;
1061 if (fkvm_seg->unusable)
1062 vmcb_seg->attrib=0;
1063 else {
1064 vmcb_seg->attrib = (fkvm_seg->type & VMCB_SELECTOR_TYPE_MASK);
1065 vmcb_seg->attrib |= (fkvm_seg->s & 1) << VMCB_SELECTOR_S_SHIFT;
1066 vmcb_seg->attrib |= (fkvm_seg->dpl & 3) << VMCB_SELECTOR_DPL_SHIFT;
1067 vmcb_seg->attrib |= (fkvm_seg->present & 1) << VMCB_SELECTOR_P_SHIFT;
1068 vmcb_seg->attrib |= (fkvm_seg->avl & 1) << VMCB_SELECTOR_AVL_SHIFT;
1069 vmcb_seg->attrib |= (fkvm_seg->l & 1) << VMCB_SELECTOR_L_SHIFT;
1070 vmcb_seg->attrib |= (fkvm_seg->db & 1) << VMCB_SELECTOR_DB_SHIFT;
1071 vmcb_seg->attrib |= (fkvm_seg->g & 1) << VMCB_SELECTOR_G_SHIFT;
1075 static void
1076 fkvm_set_cr8(struct vcpu *vcpu, uint64_t cr8)
1078 // TODO: if cr8 has reserved bits inject GP Fault, return
1080 vcpu->vmcb->control.v_tpr = (uint8_t) cr8;
1083 static void
1084 fkvm_set_efer(struct vcpu *vcpu, uint64_t efer)
1086 struct vmcb *vmcb = vcpu->vmcb;
1087 //if efer has reserved bits set: inject GP Fault
1089 if (vmcb->save.cr0 & CR0_PG) { //If paging is enabled do not allow changes to LME
1090 if ((vmcb->save.efer & EFER_LME) != (efer & EFER_LME)) {
1091 printf("fkvm_set_efer: attempt to change LME while paging\n");
1092 //inject GP fault
1096 vmcb->save.efer = efer | EFER_SVME;
1099 static void
1100 fkvm_get_regs_sregs(struct vcpu *vcpu, struct kvm_sregs *out)
1102 /* XXX */
1105 static void
1106 fkvm_set_regs_sregs(struct vcpu *vcpu, struct kvm_sregs *in)
1108 struct vmcb *vmcb = vcpu->vmcb;
1110 fkvm_set_vmcb_seg(&vmcb->save.cs, &in->cs);
1111 fkvm_set_vmcb_seg(&vmcb->save.ds, &in->ds);
1112 fkvm_set_vmcb_seg(&vmcb->save.es, &in->es);
1113 fkvm_set_vmcb_seg(&vmcb->save.fs, &in->fs);
1114 fkvm_set_vmcb_seg(&vmcb->save.gs, &in->gs);
1115 fkvm_set_vmcb_seg(&vmcb->save.ss, &in->ss);
1116 fkvm_set_vmcb_seg(&vmcb->save.tr, &in->tr);
1117 fkvm_set_vmcb_seg(&vmcb->save.ldtr, &in->ldt);
1119 vmcb->save.cpl = (vmcb->save.cs.attrib >> VMCB_SELECTOR_DPL_SHIFT) & 3;
1121 fkvm_set_vmcb_dtable(&vmcb->save.idtr, &in->idt);
1122 fkvm_set_vmcb_dtable(&vmcb->save.gdtr, &in->gdt);
1124 vcpu->cr2 = in->cr2;
1125 vcpu->cr3 = in->cr3;
1127 fkvm_set_cr8(vcpu, in->cr8);
1128 fkvm_set_efer(vcpu, in->efer);
1129 /* TODO: apic_base */
1130 vmcb->save.cr0 = in->cr0;
1131 vmcb->save.cr4 = in->cr4;
1132 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1135 static void
1136 fkvm_get_regs_msrs(struct vcpu *vcpu, uint32_t nmsrs, struct kvm_msr_entry *entries) {
1137 /* XXX */
1140 static void
1141 fkvm_set_reg_msr(struct vcpu *vcpu, uint32_t index, uint64_t data) {
1142 struct vmcb *vmcb = vcpu->vmcb;
1144 switch(index) {
1146 case MSR_TSC: {
1147 uint64_t tsc;
1149 tsc = rdtsc();
1150 vmcb->control.tsc_offset = data - tsc;
1151 break;
1154 case MSR_STAR: {
1155 vmcb->save.star = data;
1156 break;
1159 case MSR_LSTAR: {
1160 vmcb->save.lstar = data;
1161 break;
1164 case MSR_CSTAR: {
1165 vmcb->save.cstar = data;
1166 break;
1169 case MSR_GSBASE: {
1170 vmcb->save.kernel_gs_base = data;
1171 break;
1174 case MSR_SF_MASK: {
1175 vmcb->save.sfmask = data;
1176 break;
1179 case MSR_SYSENTER_CS_MSR: {
1180 vmcb->save.sysenter_cs = data;
1181 break;
1184 case MSR_SYSENTER_EIP_MSR: {
1185 vmcb->save.sysenter_eip = data;
1186 break;
1189 case MSR_SYSENTER_ESP_MSR: {
1190 vmcb->save.sysenter_esp = data;
1191 break;
1194 case MSR_DEBUGCTLMSR: {
1195 printf("unimplemented at %d\n", __LINE__);
1196 break;
1199 case MSR_PERFEVSEL0 ... MSR_PERFEVSEL3:
1200 case MSR_PERFCTR0 ... MSR_PERFCTR3: {
1201 printf("unimplemented at %d\n", __LINE__);
1202 break;
1205 case MSR_EFER: {
1206 fkvm_set_efer(vcpu, data);
1207 break;
1210 case MSR_MC0_STATUS: {
1211 printf("unimplemented at %d\n", __LINE__);
1212 break;
1215 case MSR_MCG_STATUS: {
1216 printf("unimplemented at %d\n", __LINE__);
1217 break;
1220 case MSR_MCG_CTL: {
1221 printf("unimplemented at %d\n", __LINE__);
1222 break;
1225 //TODO: MSR_IA32_UCODE_REV
1226 //TODO: MSR_IA32_UCODE_WRITE
1227 //TODO: 0x200 ... 0x2ff: set_msr_mtrr
1229 case MSR_APICBASE: {
1230 printf("unimplemented at %d\n", __LINE__);
1231 break;
1234 case MSR_IA32_MISC_ENABLE: {
1235 printf("unimplemented at %d\n", __LINE__);
1236 break;
1239 //TODO: MSR_KVM_WALL_CLOCK
1240 //TODO: MSR_KVM_SYSTEM_TIME
1242 default:
1243 printf("Did not set unimplemented msr: 0x%" PRIx32 "\n", index);
1247 static void
1248 fkvm_set_regs_msrs(struct vcpu *vcpu, uint32_t nmsrs, struct kvm_msr_entry *entries) {
1249 int i;
1251 for (i = 0; i < nmsrs; i++) {
1252 fkvm_set_reg_msr(vcpu, entries[i].index, entries[i].data);
1256 /* System Calls */
1259 fkvm_get_regs(struct thread *td, struct fkvm_get_regs_args *uap)
1261 struct vcpu *vcpu;
1262 int error;
1264 if (!fkvm_loaded)
1265 return ENODEV;
1267 vcpu = TD_GET_VCPU(td);
1268 if (vcpu == NULL)
1269 return ENODEV;
1271 switch (uap->type) {
1273 case FKVM_REGS_TYPE_REGS: {
1274 struct kvm_regs out;
1275 fkvm_get_regs_regs(vcpu, &out);
1276 return copyout(&out, uap->regs, sizeof(out));
1279 case FKVM_REGS_TYPE_SREGS: {
1280 struct kvm_sregs out;
1281 fkvm_get_regs_sregs(vcpu, &out);
1282 return copyout(&out, uap->regs, sizeof(out));
1285 case FKVM_REGS_TYPE_MSRS: {
1286 struct kvm_msrs out;
1287 struct kvm_msrs *user_msrs;
1288 struct kvm_msr_entry *entries;
1289 int size;
1291 user_msrs = (struct kvm_msrs *)uap->regs;
1293 error = copyin(uap->regs, &out, sizeof(out));
1294 if (error != 0)
1295 return error;
1297 size = sizeof(*entries) * out.nmsrs;
1298 entries = malloc(size, M_DEVBUF, M_WAITOK|M_ZERO);
1299 if (entries == NULL)
1300 return ENOMEM;
1302 error = copyin(user_msrs->entries, entries, size);
1303 if (error != 0) {
1304 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1305 free(entries, M_DEVBUF);
1306 return error;
1309 fkvm_get_regs_msrs(vcpu, out.nmsrs, entries);
1311 error = copyout(user_msrs->entries, entries, size);
1312 if (error != 0) {
1313 printf("FKVM_REGS_TYPE_MSRS: unable to copyout entries\n");
1316 free(entries, M_DEVBUF);
1317 return error;
1320 default:
1321 return EINVAL;
1326 fkvm_set_regs(struct thread *td, struct fkvm_set_regs_args *uap)
1328 struct vcpu *vcpu;
1329 int error = 0;
1331 vcpu = TD_GET_VCPU(td);
1332 if (vcpu == NULL)
1333 return ENODEV;
1335 switch (uap->type) {
1337 case FKVM_REGS_TYPE_REGS: {
1338 struct kvm_regs in;
1339 error = copyin(uap->regs, &in, sizeof(in));
1340 if (error != 0)
1341 return error;
1342 fkvm_set_regs_regs(vcpu, &in);
1343 return 0;
1346 case FKVM_REGS_TYPE_SREGS: {
1347 struct kvm_sregs in;
1348 error = copyin(uap->regs, &in, sizeof(in));
1349 if (error != 0)
1350 return error;
1351 fkvm_set_regs_sregs(vcpu, &in);
1352 return 0;
1355 case FKVM_REGS_TYPE_MSRS: {
1356 struct kvm_msrs in;
1357 struct kvm_msrs *user_msrs;
1358 struct kvm_msr_entry *entries;
1359 int size;
1361 user_msrs = (struct kvm_msrs *)uap->regs;
1363 error = copyin(uap->regs, &in, sizeof(in));
1364 if (error != 0)
1365 return error;
1367 size = sizeof(*entries) * in.nmsrs;
1368 entries = malloc(size, M_DEVBUF, M_WAITOK|M_ZERO);
1369 if (entries == NULL)
1370 return ENOMEM;
1372 error = copyin(user_msrs->entries, entries, size);
1373 if (error != 0) {
1374 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1375 free(entries, M_DEVBUF);
1376 return error;
1379 fkvm_set_regs_msrs(vcpu, in.nmsrs, entries);
1381 free(entries, M_DEVBUF);
1382 return error;
1385 default:
1386 return EINVAL;
1390 /* This function can only be called with multiples of page sizes */
1391 /* vaddr as NULL overloads to fkvm_guest_check_range */
1393 fkvm_set_user_mem_region(struct thread *td, struct fkvm_set_user_mem_region_args *uap)
1395 struct guestvm *guest_vm;
1397 vm_offset_t start;
1398 vm_offset_t end;
1400 struct vmspace *user_vm_space;
1401 vm_map_t user_vm_map;
1403 vm_object_t vm_object;
1404 vm_pindex_t vm_object_pindex;
1405 vm_ooffset_t vm_object_offset;
1406 vm_prot_t throwaway_prot;
1407 boolean_t throwaway_wired;
1408 vm_map_entry_t lookup_entry;
1410 int error;
1412 guest_vm = PROC_GET_GUESTVM(td->td_proc);
1413 if (guest_vm == NULL) {
1414 printf("PROC_GET_GUESTVM -> NULL\n");
1415 return ENODEV;
1418 start = uap->guest_pa;
1419 end = uap->guest_pa + uap->size - 1;
1420 printf("start: 0x%" PRIx64 " bytes\n", start);
1421 printf("end: 0x%" PRIx64 " bytes\n", end);
1423 if (uap->vaddr == 0)
1424 return fkvm_guest_check_range(guest_vm, start, end);
1426 user_vm_space = td->td_proc->p_vmspace;
1427 user_vm_map = &user_vm_space->vm_map;
1428 printf("user vm space: %p\n", user_vm_space);
1429 printf("user vm map: %p\n", user_vm_map);
1431 error = vm_map_lookup(&user_vm_map, /* IN/OUT */
1432 uap->vaddr,
1433 VM_PROT_READ|VM_PROT_WRITE,
1434 &lookup_entry, /* OUT */
1435 &vm_object, /* OUT */
1436 &vm_object_pindex, /* OUT */
1437 &throwaway_prot, /* OUT */
1438 &throwaway_wired); /* OUT */
1439 if (error != KERN_SUCCESS) {
1440 printf("vm_map_lookup failed: %d\n", error);
1441 return EFAULT;
1444 /* TODO: Trust the user that the full region is valid.
1445 * This is very bad. See the note in fkvm_guest_check_range
1446 * on nesting vm lookups. */
1447 #if 0
1448 if (!fkvm_mem_has_entry(lookup_entry, user_vm_map, uap->vaddr + uap->size)) {
1449 printf("end of range not contained in same vm map entry as start\n");
1450 return EFAULT;
1452 #endif
1454 printf("vm object: %p\n", vm_object);
1455 printf(" size: %d pages\n", (int) vm_object->size);
1457 vm_object_offset = IDX_TO_OFF(vm_object_pindex);
1458 printf("vm_ooffset: 0x%" PRIx64 "\n", vm_object_offset);
1460 vm_object_reference(vm_object); // TODO: this might be a mem leak
1462 vm_map_lookup_done(user_vm_map, lookup_entry);
1464 error = vm_map_insert(&guest_vm->sp->vm_map,
1465 vm_object,
1466 vm_object_offset,
1467 start,
1468 end,
1469 VM_PROT_ALL, VM_PROT_ALL,
1471 if (error != KERN_SUCCESS) {
1472 printf("vm_map_insert failed: %d\n", error);
1473 switch (error) {
1474 case KERN_INVALID_ADDRESS:
1475 return EINVAL;
1476 case KERN_NO_SPACE:
1477 return ENOMEM;
1478 default:
1479 return 1;
1483 return 0;
1487 fkvm_unset_user_mem_region(struct thread *td, struct fkvm_unset_user_mem_region_args *uap)
1489 struct guestvm *guest_vm;
1491 if (!fkvm_loaded)
1492 return ENODEV;
1494 guest_vm = PROC_GET_GUESTVM(td->td_proc);
1495 if (guest_vm == NULL) {
1496 printf("PROC_GET_GUESTVM -> NULL\n");
1497 return ENODEV;
1500 vm_offset_t start;
1501 vm_offset_t end;
1503 vm_map_t guest_vm_map;
1505 int error;
1507 start = uap->guest_pa;
1508 end = uap->guest_pa + uap->size - 1;
1509 printf("start: 0x%" PRIx64 " bytes\n", start);
1510 printf("end: 0x%" PRIx64 " bytes\n", end);
1512 guest_vm_map = &guest_vm->sp->vm_map;
1514 error = vm_map_remove(guest_vm_map, start, end);
1515 if (error != KERN_SUCCESS)
1516 return -1;
1518 return 0;
1522 fkvm_create_vm(struct thread *td, struct fkvm_create_vm_args *uap)
1524 struct guestvm *guest_vm;
1526 printf("SYSCALL : fkvm_create_vm\n");
1528 if (!fkvm_loaded)
1529 return ENODEV;
1531 /* Allocate Guest VM */
1532 guest_vm = fkvm_guestvm_alloc();
1534 /* Set up the vm address space */
1535 guest_vm->sp = fkvm_make_vmspace();
1536 if (guest_vm->sp == NULL) {
1537 fkvm_guestvm_free(guest_vm);
1538 return ENOMEM;
1540 guest_vm->nested_cr3 = vtophys(vmspace_pmap(guest_vm->sp)->pm_pml4);
1542 printf("guest:\n");
1543 printf(" vm space: %p\n", guest_vm->sp);
1544 printf(" vm map: %p\n", &guest_vm->sp->vm_map);
1545 printf(" ncr3: 0x%" PRIx64 "\n", guest_vm->nested_cr3);
1547 PROC_SET_GUESTVM(td->td_proc, guest_vm);
1549 printf("fkvm_create_vm done\n");
1550 return 0;
1553 static void
1554 fkvm_destroy_vm(struct guestvm *guest_vm)
1556 /* Destroy the VCPUs */
1557 while (guest_vm->nr_vcpus > 0) {
1558 guest_vm->nr_vcpus--;
1559 fkvm_vcpu_destroy(guest_vm->vcpus[guest_vm->nr_vcpus]);
1560 guest_vm->vcpus[guest_vm->nr_vcpus] = NULL;
1563 /* Destroy the vmspace */
1564 if (guest_vm->sp != NULL)
1565 fkvm_destroy_vmspace(guest_vm->sp);
1567 /* Destroy the Guest VM itself */
1568 fkvm_guestvm_free(guest_vm);
1571 static int
1572 intercept_ioio(struct vcpu *vcpu, struct kvm_run *kvm_run, uint64_t ioio_info, uint64_t rip)
1574 struct vmcb *vmcb = vcpu->vmcb;
1575 uint8_t str;
1576 //uint8_t rep;
1578 str = (ioio_info & STR_MASK) >> STR_SHIFT;
1579 if (str) {
1580 printf("%s operation requested, not yet implemented, \n",
1581 kvm_run->u.io.in ? "INS" : "OUTS");
1582 return 1;
1585 kvm_run->u.io.port = ioio_info >> PORT_SHIFT;
1586 kvm_run->u.io.in = ioio_info & TYPE_MASK;
1588 kvm_run->u.io.size = (ioio_info & SIZE_MASK) >> SIZE_SHIFT;
1590 kvm_run->u.io.data_offset = PAGE_SIZE;
1591 kvm_run->u.io.pio_data = vcpu->regs[VCPU_REGS_RAX];
1593 /* We need to remove the Interrupt Shadow Flag from the VMCB (see 15.20.5 in AMD_Vol2) */
1594 vmcb->control.intr_shadow = 0;
1596 //rep = (ioio_info & REP_MASK) >> REP_SHIFT;
1597 /* TODO: Research more into Direction Flag checked in KVM; DF bit in RFLAGS */
1599 /* set the next rip in the VMCB save area for now */
1600 /* TODO: Store rIP in vm_run structure until we absolutely need it */
1601 vcpu->regs[VCPU_REGS_RIP] = rip;
1603 return 0;
1606 static void
1607 intercept_shutdown(struct vcpu *vcpu)
1609 struct vmcb *vmcb = vcpu->vmcb;
1610 memset(vmcb, 0, PAGE_SIZE);
1611 fkvm_vmcb_init(vmcb);
1615 fkvm_vm_run(struct thread *td, struct fkvm_vm_run_args *uap)
1617 struct vcpu *vcpu;
1618 struct guestvm *guest_vm;
1619 struct vmcb *vmcb;
1620 int error;
1621 int ret = 0;
1622 struct kvm_run kvm_run;
1624 if (!fkvm_loaded)
1625 return ENODEV;
1627 vcpu = TD_GET_VCPU(td);
1628 if (vcpu == NULL)
1629 return ENODEV;
1631 guest_vm = vcpu->guest_vm;
1632 vmcb = vcpu->vmcb;
1634 error = copyin(uap->run, &kvm_run, sizeof(struct kvm_run));
1635 if (error != 0)
1636 return error;
1638 fkvm_set_cr8(vcpu, kvm_run.cr8);
1640 kvm_run.exit_reason = KVM_EXIT_CONTINUE;
1642 while(kvm_run.exit_reason == KVM_EXIT_CONTINUE) {
1643 fkvm_vcpu_run(vcpu);
1645 switch (vmcb->control.exit_code) {
1647 case VMCB_EXIT_EXCP_BASE ... (VMCB_EXIT_EXCP_BASE + 31): {
1648 int excp_vector;
1650 excp_vector = vmcb->control.exit_code - VMCB_EXIT_EXCP_BASE;
1652 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
1653 excp_vector);
1654 ret = ENOSYS;
1655 break;
1658 case VMCB_EXIT_INTR: {
1659 printf("VMCB_EXIT_INTR - nothing to do\n");
1660 /* Handled by host OS already */
1661 kvm_run.exit_reason = KVM_EXIT_CONTINUE;
1662 break;
1665 case VMCB_EXIT_NPF: {
1666 /* EXITINFO1 contains fault error code */
1667 /* EXITINFO2 contains the guest physical address causing the fault. */
1669 u_int64_t fault_code;
1670 u_int64_t fault_gpa;
1672 vm_prot_t fault_type;
1673 int fault_flags;
1674 int rc;
1676 fault_code = vmcb->control.exit_info_1;
1677 fault_gpa = vmcb->control.exit_info_2;
1678 kvm_run.exit_reason = KVM_EXIT_CONTINUE;
1680 printf("VMCB_EXIT_NPF:\n");
1681 printf("gpa=0x%" PRIx64 "\n", fault_gpa);
1682 printf("fault code=0x%" PRIx64 " [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
1683 fault_code,
1684 (fault_code & PGEX_P) != 0,
1685 (fault_code & PGEX_W) != 0,
1686 (fault_code & PGEX_U) != 0,
1687 (fault_code & PGEX_I) != 0);
1689 if (fault_code & PGEX_W)
1690 fault_type = VM_PROT_WRITE;
1691 else if (fault_code & PGEX_I)
1692 fault_type = VM_PROT_EXECUTE;
1693 else
1694 fault_type = VM_PROT_READ;
1696 fault_flags = 0; /* TODO: is that right? */
1697 rc = vm_fault(&guest_vm->sp->vm_map, (fault_gpa & (~PAGE_MASK)), fault_type, fault_flags);
1698 if (rc != KERN_SUCCESS) {
1699 printf("vm_fault failed: %d\n", rc);
1700 kvm_run.exit_reason = KVM_EXIT_UNKNOWN;
1703 break;
1705 case VMCB_EXIT_WRITE_CR8:
1706 kvm_run.exit_reason = KVM_EXIT_SET_TPR;
1707 break;
1708 case VMCB_EXIT_NMI:
1709 kvm_run.exit_reason = KVM_EXIT_NMI;
1710 break;
1711 case VMCB_EXIT_HLT:
1712 kvm_run.exit_reason = KVM_EXIT_HLT;
1713 break;
1714 case VMCB_EXIT_SHUTDOWN:
1715 intercept_shutdown(vcpu);
1716 kvm_run.exit_reason = KVM_EXIT_SHUTDOWN;
1717 break;
1718 case VMCB_EXIT_IOIO:
1719 intercept_ioio(vcpu, &kvm_run,
1720 vmcb->control.exit_info_1,
1721 vmcb->control.exit_info_2);
1722 kvm_run.exit_reason = KVM_EXIT_IO;
1723 break;
1724 case VMCB_EXIT_READ_CR0:
1725 case VMCB_EXIT_READ_CR3:
1726 case VMCB_EXIT_READ_CR4:
1727 case VMCB_EXIT_READ_CR8:
1728 case VMCB_EXIT_WRITE_CR0:
1729 case VMCB_EXIT_WRITE_CR3:
1730 case VMCB_EXIT_WRITE_CR4:
1731 case VMCB_EXIT_READ_DR0:
1732 case VMCB_EXIT_READ_DR1:
1733 case VMCB_EXIT_READ_DR2:
1734 case VMCB_EXIT_READ_DR3:
1735 case VMCB_EXIT_WRITE_DR0:
1736 case VMCB_EXIT_WRITE_DR1:
1737 case VMCB_EXIT_WRITE_DR2:
1738 case VMCB_EXIT_WRITE_DR3:
1739 case VMCB_EXIT_WRITE_DR5:
1740 case VMCB_EXIT_WRITE_DR7:
1741 case VMCB_EXIT_SMI:
1742 case VMCB_EXIT_INIT:
1743 case VMCB_EXIT_VINTR:
1744 case VMCB_EXIT_CR0_SEL_WRITE:
1745 case VMCB_EXIT_CPUID:
1746 case VMCB_EXIT_INVD:
1747 case VMCB_EXIT_INVLPG:
1748 case VMCB_EXIT_INVLPGA:
1749 case VMCB_EXIT_MSR:
1750 case VMCB_EXIT_TASK_SWITCH:
1751 case VMCB_EXIT_VMRUN:
1752 case VMCB_EXIT_VMMCALL:
1753 case VMCB_EXIT_VMLOAD:
1754 case VMCB_EXIT_VMSAVE:
1755 case VMCB_EXIT_STGI:
1756 case VMCB_EXIT_CLGI:
1757 case VMCB_EXIT_SKINIT:
1758 case VMCB_EXIT_WBINVD:
1759 case VMCB_EXIT_MONITOR:
1760 case VMCB_EXIT_MWAIT_UNCOND:
1761 default:
1762 printf("Unhandled vmexit:\n"
1763 " code: 0x%" PRIx64 "\n"
1764 " info1: 0x%" PRIx64 "\n"
1765 " info2: 0x%" PRIx64 "\n",
1766 vmcb->control.exit_code,
1767 vmcb->control.exit_info_1,
1768 vmcb->control.exit_info_2);
1769 print_vmcb(vmcb);
1770 ret = ENOSYS;
1771 kvm_run.exit_reason = KVM_EXIT_UNKNOWN;
1775 printf("\n\n");
1777 /* TODO: check copyout ret val */
1778 copyout(&kvm_run, uap->run, sizeof(struct kvm_run));
1779 printf("sizeof(struct kvm_run) = %" PRIu64 "\n", sizeof(struct kvm_run));
1781 return ret;
1785 fkvm_create_vcpu(struct thread *td, struct fkvm_create_vcpu_args *uap)
1787 struct guestvm *guest_vm;
1788 struct vcpu *vcpu;
1790 if (!fkvm_loaded)
1791 return ENODEV;
1793 guest_vm = PROC_GET_GUESTVM(td->td_proc);
1794 if (guest_vm == NULL) {
1795 printf("PROC_GET_GUESTVM -> NULL\n");
1796 return ENODEV;
1799 /* Allocate VCPU */
1800 printf("fkvm_create_vcpu: td = %p\n", td);
1801 vcpu = fkvm_vcpu_create(guest_vm);
1802 fkvm_guestvm_add_vcpu(guest_vm, vcpu);
1804 TD_SET_VCPU(td, vcpu);
1805 printf("fkvm_create_vcpu: vcpu = %p\n", vcpu);
1806 return 0;
1809 static int
1810 fkvm_check_cpu_extension(void)
1812 u_int cpu_exthigh;
1813 u_int regs[4];
1814 u_int64_t vmcr;
1816 printf("fkvm_check_cpu_extension\n");
1818 /* Assumption: the architecture supports the cpuid instruction */
1820 /* Check if CPUID extended function 8000_0001h is supported. */
1821 do_cpuid(0x80000000, regs);
1822 cpu_exthigh = regs[0];
1824 printf("cpu_exthigh = %u\n", cpu_exthigh);
1826 if(cpu_exthigh >= 0x80000001) {
1827 /* Execute CPUID extended function 8000_0001h */
1828 do_cpuid(0x80000001, regs);
1829 printf("EAX = %u\n", regs[0]);
1831 if((regs[0] & 0x2) == 0) { /* Check SVM bit */
1832 printf("SVM not available\n");
1833 goto fail; /* SVM not available */
1836 vmcr = rdmsr(0xc0010114); /* Read VM_CR MSR */
1837 if((vmcr & 0x8) == 0) { /* Check SVMDIS bit */
1838 printf("vmcr = %" PRIx64 "\n", vmcr);
1839 printf("SVM allowed\n");
1840 return KERN_SUCCESS; /* SVM allowed */
1843 /* Execute CPUID extended function 8000_000ah */
1844 do_cpuid(0x8000000a, regs);
1845 if((regs[3] & 0x2) == 0) { /* Check SVM_LOCK bit */
1846 /* SVM disabled at bios; not unlockable.
1847 * User must change a BIOS setting to enable SVM.
1849 printf("EDX = %u\n", regs[3]);
1850 printf("SVM disabled at bios\n");
1851 goto fail;
1852 } else {
1853 /* TODO:
1854 * SVM may be unlockable;
1855 * consult the BIOS or TPM to obtain the key.
1857 printf("EDX = %u\n", regs[3]);
1858 printf("SVM maybe unlockable\n");
1859 goto fail;
1862 fail:
1863 return KERN_FAILURE;
1866 static void
1867 fkvm_proc_exit(void *arg, struct proc *p)
1869 struct guestvm *guest_vm;
1871 guest_vm = PROC_GET_GUESTVM(p);
1872 if (guest_vm == NULL)
1873 return;
1875 fkvm_destroy_vm(guest_vm);
1876 PROC_SET_GUESTVM(p, NULL);
1879 static void
1880 fkvm_load(void *unused)
1882 u_int64_t efer;
1883 int error;
1885 printf("fkvm_load\n");
1886 printf("sizeof(struct vmcb) = %" PRIx64 "\n", sizeof(struct vmcb));
1888 hsave_area = NULL;
1889 iopm = NULL;
1890 msrpm = NULL;
1892 /* check if SVM is supported */
1893 error = fkvm_check_cpu_extension();
1894 if(error != KERN_SUCCESS) {
1895 printf("ERROR: SVM extension not available\n");
1896 return;
1899 exit_tag = EVENTHANDLER_REGISTER(process_exit, fkvm_proc_exit, NULL,
1900 EVENTHANDLER_PRI_ANY);
1902 /* allocate structures */
1903 hsave_area = fkvm_hsave_area_alloc();
1904 iopm = fkvm_iopm_alloc();
1905 msrpm = fkvm_msrpm_alloc();
1907 /* Initialize structures */
1908 fkvm_hsave_area_init(hsave_area);
1909 fkvm_iopm_init(iopm);
1910 fkvm_msrpm_init(msrpm);
1912 /* Enable SVM in EFER */
1913 efer = rdmsr(MSR_EFER);
1914 printf("EFER = %" PRIx64 "\n", efer);
1915 wrmsr(MSR_EFER, efer | EFER_SVME);
1916 efer = rdmsr(MSR_EFER);
1917 printf("new EFER = %" PRIx64 "\n", efer);
1919 /* Write Host save address in MSR_VM_HSAVE_PA */
1920 wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave_area));
1922 fkvm_loaded = 1;
1924 SYSINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_load, NULL);
1926 static void
1927 fkvm_unload(void *unused)
1929 printf("fkvm_unload\n");
1931 if (!fkvm_loaded) {
1932 printf("fkvm_unload: fkvm not loaded");
1933 return;
1936 EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
1938 if (msrpm != NULL) {
1939 fkvm_msrpm_free(iopm);
1940 msrpm = NULL;
1942 if (iopm != NULL) {
1943 fkvm_iopm_free(iopm);
1944 iopm = NULL;
1946 if (hsave_area != NULL) {
1947 fkvm_hsave_area_free(hsave_area);
1948 hsave_area = NULL;
1951 SYSUNINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_unload, NULL);