turns printfs back on
[freebsd-src/fkvm-freebsd.git] / sys / kern / kern_fkvm.c
blob72d9aad0556c6d68938220262606bc236b8dcc20
1 /*-
2 * Copyright (c) 2008 Brent Stephens <brents@rice.edu>
3 * Copyright (c) 2008 Diego Ongaro <diego.ongaro@rice.edu>
4 * Copyright (c) 2008 Kaushik Kumar Ram <kaushik@rice.edu>
5 * Copyright (c) 2008 Oleg Pesok <olegpesok@gmail.com>
6 * All rights reserved.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
30 #include <sys/fkvm.h>
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/sysproto.h>
37 #include <sys/file.h>
38 #include <sys/mman.h>
39 #include <sys/proc.h>
40 #include <sys/eventhandler.h>
41 #include <vm/vm.h>
42 #include <vm/pmap.h>
43 #include <vm/vm_extern.h>
44 #include <vm/vm_map.h>
45 #include <vm/vm_object.h>
46 #include <vm/vm_param.h>
47 #include <machine/_inttypes.h>
48 #include <machine/specialreg.h>
49 #include <machine/segments.h>
50 #include <machine/vmcb.h>
53 /* Definitions for Port IO */
54 #define PORT_SHIFT 16
55 #define ADDR_SHIFT 7
56 #define SIZE_SHIFT 4
57 #define REP_SHIFT 3
58 #define STR_SHIFT 2
59 #define TYPE_SHIFT 0
61 #define PORT_MASK 0xFFFF0000
62 #define ADDR_MASK (7 << ADDR_SHIFT)
63 #define SIZE_MASK (7 << SIZE_SHIFT)
64 #define REP_MASK (1 << REP_SHIFT)
65 #define STR_MASK (1 << STR_SHIFT)
66 #define TYPE_MASK (1 << TYPE_SHIFT)
67 /* End Definitions for Port IO */
69 #define PMIO_PAGE_OFFSET 1
71 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
72 #define MSRPM_SIZE (8*1024)
74 /* fkvm data */
76 static int fkvm_loaded = 0;
78 static void *iopm = NULL; /* Should I allocate a vm_object_t instead? */
79 static void *msrpm = NULL; /* Should I allocate a vm_object_t instead? */
81 static void *hsave_area = NULL;
83 static eventhandler_tag exit_tag;
85 /* per-guest data */
87 enum {
88 VCPU_REGS_RAX = 0,
89 VCPU_REGS_RCX = 1,
90 VCPU_REGS_RDX = 2,
91 VCPU_REGS_RBX = 3,
92 VCPU_REGS_RSP = 4,
93 VCPU_REGS_RBP = 5,
94 VCPU_REGS_RSI = 6,
95 VCPU_REGS_RDI = 7,
96 VCPU_REGS_R8 = 8,
97 VCPU_REGS_R9 = 9,
98 VCPU_REGS_R10 = 10,
99 VCPU_REGS_R11 = 11,
100 VCPU_REGS_R12 = 12,
101 VCPU_REGS_R13 = 13,
102 VCPU_REGS_R14 = 14,
103 VCPU_REGS_R15 = 15,
104 VCPU_REGS_RIP,
105 NR_VCPU_REGS
108 struct vcpu {
109 /* VCPU data */
110 struct vmcb *vmcb;
111 unsigned long vmcb_pa;
113 unsigned long regs[NR_VCPU_REGS];
114 u_int64_t host_fs_base;
115 u_int64_t host_gs_base;
116 u_int64_t cr2;
117 u_int64_t cr3;
119 struct {
120 uint64_t default_type;
121 uint64_t mtrr64k[MTRR_N64K/8];
122 uint64_t mtrr16k[MTRR_N16K/8];
123 uint64_t mtrr4k [MTRR_N4K /8];
124 #define FKVM_MTRR_NVAR 8
125 uint64_t mtrrvar[FKVM_MTRR_NVAR *2];
126 } mtrrs;
128 struct guestvm *guest_vm;
130 unsigned long virqs[256 / (sizeof(unsigned long) * 8)];
133 struct guestvm {
134 struct vcpu *vcpus[MAX_VCPUS];
135 int nr_vcpus;
137 struct vmspace *sp;
138 u_int64_t nested_cr3;
142 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
143 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
144 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
145 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
146 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
147 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
149 static void
150 fkvm_virq_dequeue(struct vcpu *vcpu);
152 static inline struct vcpu *
153 TD_GET_VCPU(struct thread *td)
155 struct vcpu *vcpu;
156 vcpu = td->vcpu;
157 if (vcpu == NULL)
158 printf("TD_GET_VCPU -> NULL\n");
159 return vcpu;
162 static inline void
163 TD_SET_VCPU(struct thread *td, struct vcpu *vcpu)
165 td->vcpu = vcpu;
168 static inline struct guestvm *
169 PROC_GET_GUESTVM(struct proc *proc)
171 struct guestvm *guestvm;
172 guestvm = proc->p_guestvm;
173 return guestvm;
176 static inline void
177 PROC_SET_GUESTVM(struct proc *proc, struct guestvm *guestvm)
179 proc->p_guestvm = guestvm; \
182 static void
183 print_vmcb_seg(struct vmcb_seg* vmcb_seg, const char* name)
185 printf("%s Selector\n", name);
186 printf("Selector : %" PRIx16 "\n", vmcb_seg->selector);
187 printf("Attributes : %" PRIx16 "\n", vmcb_seg->attrib);
188 printf("Limit : %" PRIx32 "\n", vmcb_seg->limit);
189 printf("Base Address : %" PRIx64 "\n", vmcb_seg->base);
190 printf("\n");
193 static void
194 print_vmcb(struct vmcb *vmcb)
196 printf("VMCB Control Area\n");
197 printf("Intercept CR Reads : %" PRIx16 "\n", vmcb->control.intercept_cr_reads);
198 printf("Intercept CR Writes : %" PRIx16 "\n", vmcb->control.intercept_cr_writes);
199 printf("Intercept DR Reads : %" PRIx16 "\n", vmcb->control.intercept_dr_reads);
200 printf("Intercept DR Writes : %" PRIx16 "\n", vmcb->control.intercept_dr_writes);
201 printf("Intercept Exceptions : %" PRIx32 "\n", vmcb->control.intercept_exceptions);
202 printf("Intercepts : %" PRIx64 "\n", vmcb->control.intercepts);
203 printf("Reserved 1: \n");
204 for(int i=0; i < 44; i++) {
205 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
207 printf("\n");
208 printf("IOPM Base PA : %" PRIx64 "\n", vmcb->control.iopm_base_pa);
209 printf("MSRPM Base PA : %" PRIx64 "\n", vmcb->control.msrpm_base_pa);
210 printf("TSC Offset : %" PRIx64 "\n", vmcb->control.tsc_offset);
211 printf("Guest ASID : %" PRIx32 "\n", vmcb->control.guest_asid);
212 printf("TLB Control : %" PRIx8 "\n", vmcb->control.tlb_control);
213 printf("Reserved 2 : \n");
214 for(int i=0; i < 3; i++) {
215 printf("%" PRIx8 "", vmcb->control.reserved_1[i]); /* Should be Zero */
217 printf("\n");
218 printf("Virtual TPR : %" PRIx8 "\n", vmcb->control.v_tpr);
219 printf("Virtual IRQ Pending : %" PRIx8 "\n", vmcb->control.v_irq_pending);
220 printf("Virtual Interrupt : %" PRIx8 "\n", vmcb->control.v_intr);
221 printf("Virtual Interrupt Masking: %" PRIx8 "\n", vmcb->control.v_intr_masking);
222 printf("Virtual Interrupt Vector : %" PRIx8 "\n", vmcb->control.v_intr_vector);
223 printf("Reserved 6 : \n");
224 for(int i=0; i < 3; i++) {
225 printf("%" PRIx8 "", vmcb->control.reserved_6[i]); /* Should be Zero */
227 printf("\n");
228 printf("Interrupt Shadow : %" PRIx8 "\n", vmcb->control.intr_shadow);
229 printf("Reserved 7 : \n");
230 for(int i=0; i < 7; i++) {
231 printf("%" PRIx8 "", vmcb->control.reserved_7[i]); /* Should be Zero */
233 printf("\n");
234 printf("Exit Code : %" PRIx64 "\n", vmcb->control.exit_code);
235 printf("Exit Info 1 : %" PRIx64 "\n", vmcb->control.exit_info_1);
236 printf("Exit Info 2 : %" PRIx64 "\n", vmcb->control.exit_info_2);
237 printf("Exit Interrupt Info : %" PRIx32 "\n", vmcb->control.exit_int_info);
238 printf("Exit Interrupt Info Err Code: %" PRIx32 "\n", vmcb->control.exit_int_info_err_code);
239 printf("Nested Control : %" PRIx64 "\n", vmcb->control.nested_ctl);
240 printf("Reserved 8 : \n");
241 for(int i=0; i < 16; i++) {
242 printf("%" PRIx8 "", vmcb->control.reserved_8[i]); /* Should be Zero */
244 printf("\n");
245 printf("Event Injection : %" PRIx64 "\n", vmcb->control.event_inj);
246 printf("Nested CR3 : %" PRIx64 "\n", vmcb->control.nested_cr3);
247 printf("LBR Virtualization Enable: %" PRIx64 "\n", vmcb->control.lbr_virt_enable);
248 printf("Reserved 9 : \n");
249 for(int i=0; i < 832; i++) {
250 printf("%" PRIx8 "", vmcb->control.reserved_9[i]); /* Should be Zero */
252 printf("\n");
254 printf("\n");
256 printf("VMCB Save Area\n");
257 print_vmcb_seg(&(vmcb->save.es), "ES");
258 print_vmcb_seg(&(vmcb->save.cs), "CS");
259 print_vmcb_seg(&(vmcb->save.ss), "SS");
260 print_vmcb_seg(&(vmcb->save.ds), "DS");
261 print_vmcb_seg(&(vmcb->save.fs), "FS");
262 print_vmcb_seg(&(vmcb->save.gs), "GS");
263 print_vmcb_seg(&(vmcb->save.gdtr), "GDTR");
264 print_vmcb_seg(&(vmcb->save.ldtr), "LDTR");
265 print_vmcb_seg(&(vmcb->save.idtr), "IDTR");
266 print_vmcb_seg(&(vmcb->save.tr), "TR");
267 printf("Reserved 1 : \n");
268 for(int i=0; i < 43; i++) {
269 printf("%" PRIx8 "", vmcb->save.reserved_1[i]); /* Should be Zero */
271 printf("\n");
272 printf("Current Processor Level : %" PRIx8 "\n", vmcb->save.cpl);
273 printf("Reserved 2 : \n");
274 for(int i=0; i < 4; i++) {
275 printf("%" PRIx8 "", vmcb->save.reserved_2[i]); /* Should be Zero */
277 printf("\n");
278 printf("EFER : %" PRIx64 "\n", vmcb->save.efer);
279 printf("Reserved 3 : \n");
280 for(int i=0; i < 112; i++) {
281 printf("%" PRIx8 "", vmcb->save.reserved_3[i]); /* Should be Zero */
283 printf("\n");
284 printf("Control Register 4 : %" PRIx64 "\n", vmcb->save.cr4);
285 printf("Control Register 3 : %" PRIx64 "\n", vmcb->save.cr3);
286 printf("Control Register 0 : %" PRIx64 "\n", vmcb->save.cr0);
287 printf("Debug Register 7 : %" PRIx64 "\n", vmcb->save.dr7);
288 printf("Debug Register 6 : %" PRIx64 "\n", vmcb->save.dr6);
289 printf("RFlags : %" PRIx64 "\n", vmcb->save.rflags);
290 printf("RIP : %" PRIx64 "\n", vmcb->save.rip);
291 printf("Reserved 4 : \n");
292 for(int i=0; i < 88; i++) {
293 printf("%" PRIx8 "", vmcb->save.reserved_4[i]); /* Should be Zero */
295 printf("\n");
296 printf("RSP : %" PRIx64 "\n", vmcb->save.rsp);
297 printf("Reserved 5 : \n");
298 for(int i=0; i < 24; i++) {
299 printf("%" PRIx8 "", vmcb->save.reserved_5[i]); /* Should be Zero */
301 printf("\n");
302 printf("RAX : %" PRIx64 "\n", vmcb->save.rax);
303 printf("STAR : %" PRIx64 "\n", vmcb->save.star);
304 printf("LSTAR : %" PRIx64 "\n", vmcb->save.lstar);
305 printf("CSTAR : %" PRIx64 "\n", vmcb->save.cstar);
306 printf("SFMASK : %" PRIx64 "\n", vmcb->save.sfmask);
307 printf("Kernel GS Base : %" PRIx64 "\n", vmcb->save.kernel_gs_base);
308 printf("SYSENTER CS : %" PRIx64 "\n", vmcb->save.sysenter_cs);
309 printf("SYSENTER ESP : %" PRIx64 "\n", vmcb->save.sysenter_esp);
310 printf("SYSENTER EIP : %" PRIx64 "\n", vmcb->save.sysenter_eip);
311 printf("Control Register 2 : %" PRIx64 "\n", vmcb->save.cr2);
312 printf("Reserved 6 : \n");
313 for(int i=0; i < 32; i++) {
314 printf("%" PRIx8 "", vmcb->save.reserved_6[i]); /* Should be Zero */
316 printf("\n");
317 printf("Global PAT : %" PRIx64 "\n", vmcb->save.g_pat);
318 printf("Debug Control : %" PRIx64 "\n", vmcb->save.dbg_ctl);
319 printf("BR From : %" PRIx64 "\n", vmcb->save.br_from);
320 printf("BR To : %" PRIx64 "\n", vmcb->save.br_to);
321 printf("Last Exception From : %" PRIx64 "\n", vmcb->save.last_excp_from);
322 printf("Last Exception To : %" PRIx64 "\n", vmcb->save.last_excp_to);
324 printf("\n\n");
327 #if 0
328 static void
329 print_tss_desc(struct system_segment_descriptor *tss_desc)
331 printf("TSS desc @ %p:\n", tss_desc);
332 printf("sd_lolimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lolimit);
333 printf("sd_lobase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_lobase);
334 printf("sd_type: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_type);
335 printf("sd_dpl: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_dpl);
336 printf("sd_p: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_p);
337 printf("sd_hilimit: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hilimit);
338 printf("sd_xx0: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx0);
339 printf("sd_gran: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_gran);
340 printf("sd_hibase: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_hibase);
341 printf("sd_xx1: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx1);
342 printf("sd_mbz: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_mbz);
343 printf("sd_xx2: 0x%" PRIx64 "\n", (u_int64_t) tss_desc->sd_xx2);
344 printf("\n\n");
347 static void
348 print_tss(struct system_segment_descriptor *tss_desc)
350 u_int32_t *base;
351 int limit;
352 int i;
354 base = (u_int32_t*) ((((u_int64_t) tss_desc->sd_hibase) << 24) | ((u_int64_t) tss_desc->sd_lobase));
355 limit = ((tss_desc->sd_hilimit << 16) | tss_desc->sd_lolimit) / 4;
357 printf("TSS: @ %p\n", base);
358 for (i = 0; i <= limit; i++)
359 printf("%x: 0x%" PRIx32 "\n", i, base[i]);
360 printf("\n\n");
362 #endif
364 static inline void
365 print_vmcb_save_area(struct vmcb *vmcb)
367 printf("VMCB save area:\n");
368 printf(" cs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
369 vmcb->save.cs.selector,
370 vmcb->save.cs.attrib,
371 vmcb->save.cs.limit,
372 vmcb->save.cs.base);
373 printf(" fs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
374 vmcb->save.fs.selector,
375 vmcb->save.fs.attrib,
376 vmcb->save.fs.limit,
377 vmcb->save.fs.base);
378 printf(" gs: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
379 vmcb->save.gs.selector,
380 vmcb->save.gs.attrib,
381 vmcb->save.gs.limit,
382 vmcb->save.gs.base);
383 printf(" tr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
384 vmcb->save.tr.selector,
385 vmcb->save.tr.attrib,
386 vmcb->save.tr.limit,
387 vmcb->save.tr.base);
388 printf(" ldtr: [selector %" PRIx16 ", attrib %" PRIx16 ", limit %" PRIx32 ", base %" PRIx64 "]\n",
389 vmcb->save.ldtr.selector,
390 vmcb->save.ldtr.attrib,
391 vmcb->save.ldtr.limit,
392 vmcb->save.ldtr.base);
393 printf(" rip: %" PRIx64 "\n", vmcb->save.rip);
394 printf(" kernel_gs_base: %" PRIx64 "\n", vmcb->save.kernel_gs_base);
395 printf(" star: %" PRIx64 "\n", vmcb->save.star);
396 printf(" lstar: %" PRIx64 "\n", vmcb->save.lstar);
397 printf(" cstar: %" PRIx64 "\n", vmcb->save.cstar);
398 printf(" sfmask: %" PRIx64 "\n", vmcb->save.sfmask);
399 printf(" sysenter_cs: %" PRIx64 "\n", vmcb->save.sysenter_cs);
400 printf(" sysenter_esp: %" PRIx64 "\n", vmcb->save.sysenter_esp);
401 printf(" sysenter_eip: %" PRIx64 "\n", vmcb->save.sysenter_eip);
402 printf("\n\n");
405 static int
406 vmrun_assert(struct vmcb *vmcb)
408 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
410 int bad;
412 bad = 0;
414 // The following are illegal:
416 //EFER.SVME is zero.
417 A((vmcb->save.efer & 0x0000000000001000) == 0);
419 // CR0.CD is zero and CR0.NW is set
420 A( ((vmcb->save.cr0 & 0x0000000040000000) == 0) &&
421 ((vmcb->save.cr0 & 0x0000000020000000) != 0));
423 // CR0[63:32] are not zero.
424 A((vmcb->save.cr0 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
426 // Any MBZ bit of CR3 is set.
427 A((vmcb->save.cr3 & 0xFFF0000000000000) != 0);
429 // CR4[63:11] are not zero.
430 A((vmcb->save.cr4 & 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
432 // DR6[63:32] are not zero.
433 A((vmcb->save.dr6 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
435 // DR7[63:32] are not zero.
436 A((vmcb->save.dr7 & 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
438 // EFER[63:15] are not zero.
439 A((vmcb->save.efer & 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
441 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
442 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
444 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
445 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
446 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
447 ((vmcb->save.cr4 & 0x0000000000000020) != 0));
449 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
450 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
451 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
452 ((vmcb->save.cr0 & 0x0000000000000001) == 0));
454 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
455 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
456 A( ((vmcb->save.efer & 0x0000000000000100) != 0) &&
457 ((vmcb->save.cr0 & 0x0000000080000000) != 0) &&
458 ((vmcb->save.cr4 & 0x0000000000000020) != 0) &&
459 ((vmcb->save.cs.attrib & 0x0200) != 0) &&
460 ((vmcb->save.cs.attrib & 0x0400) != 0));
462 // The VMRUN intercept bit is clear.
463 A((vmcb->control.intercepts & 0x0000000100000000) == 0);
465 // The MSR or IOIO intercept tables extend to a physical address that is
466 // greater than or equal to the maximum supported physical address.
468 // Illegal event injection (see Section 15.19 on page 391).
470 // ASID is equal to zero.
471 A(vmcb->control.guest_asid == 0);
473 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
474 // combination that is otherwise illegal (see Section 15.18).
476 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
477 // sign-extend to 63 bits) all base addresses in the segment registers
478 // that have been loaded.
480 return bad;
482 #undef A
485 static void
486 fkvm_vcpu_run(struct vcpu *vcpu)
488 u_int64_t lstar;
489 u_int64_t cstar;
490 u_int64_t star;
491 u_int64_t sfmask;
493 u_short fs_selector;
494 u_short gs_selector;
495 u_short ldt_selector;
497 unsigned long host_cr2;
498 unsigned long host_dr6;
499 unsigned long host_dr7;
501 struct system_segment_descriptor *tss_desc;
502 u_int64_t sel;
504 struct vmcb *vmcb;
506 //printf("begin fkvm_vcpu_run\n");
508 vmcb = vcpu->vmcb;
510 fkvm_virq_dequeue(vcpu);
512 if (vmrun_assert(vmcb))
513 return;
515 tss_desc = (struct system_segment_descriptor*) (&gdt[GPROC0_SEL]);
516 sel = GSEL(GPROC0_SEL, SEL_KPL);
518 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
519 // print_tss_desc(tss_desc);
520 // print_tss(tss_desc);
522 // print_vmcb_save_area(vmcb);
523 // printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu->regs[VCPU_REGS_RIP]);
524 // disable_intr();
526 vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
527 vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
528 vmcb->save.rip = vcpu->regs[VCPU_REGS_RIP];
530 /* meh: kvm has pre_svm_run(svm); */
532 vcpu->host_fs_base = rdmsr(MSR_FSBASE);
533 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
534 // printf("host_fs_base: 0x%" PRIx64 "\n", vcpu->host_fs_base);
535 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
537 fs_selector = rfs();
538 gs_selector = rgs();
539 ldt_selector = rldt();
540 // printf("fs selector: %hx\n", fs_selector);
541 // printf("gs selector: %hx\n", gs_selector);
542 // printf("ldt selector: %hx\n", ldt_selector);
544 host_cr2 = rcr2();
546 host_dr6 = rdr6();
547 host_dr7 = rdr7();
549 vmcb->save.cr2 = vcpu->cr2;
550 /* meh: cr3? */
551 // TODO: something with apic_base?
553 /* meh: dr7? db_regs? */
555 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
556 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
557 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
558 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
560 star = rdmsr(MSR_STAR);
561 lstar = rdmsr(MSR_LSTAR);
562 cstar = rdmsr(MSR_CSTAR);
563 sfmask = rdmsr(MSR_SF_MASK);
565 // printf("CLGI...\n");
567 __asm __volatile (SVM_CLGI);
570 // enable_intr();
572 __asm __volatile (
573 "push %%rbp; \n\t"
574 "mov %c[rbx](%[svm]), %%rbx \n\t"
575 "mov %c[rcx](%[svm]), %%rcx \n\t"
576 "mov %c[rdx](%[svm]), %%rdx \n\t"
577 "mov %c[rsi](%[svm]), %%rsi \n\t"
578 "mov %c[rdi](%[svm]), %%rdi \n\t"
579 "mov %c[rbp](%[svm]), %%rbp \n\t"
580 "mov %c[r8](%[svm]), %%r8 \n\t"
581 "mov %c[r9](%[svm]), %%r9 \n\t"
582 "mov %c[r10](%[svm]), %%r10 \n\t"
583 "mov %c[r11](%[svm]), %%r11 \n\t"
584 "mov %c[r12](%[svm]), %%r12 \n\t"
585 "mov %c[r13](%[svm]), %%r13 \n\t"
586 "mov %c[r14](%[svm]), %%r14 \n\t"
587 "mov %c[r15](%[svm]), %%r15 \n\t"
589 /* Enter guest mode */
590 "push %%rax \n\t"
591 "mov %c[vmcb](%[svm]), %%rax \n\t"
592 SVM_VMLOAD "\n\t"
593 SVM_VMRUN "\n\t"
594 SVM_VMSAVE "\n\t"
595 "pop %%rax \n\t"
597 /* Save guest registers, load host registers */
598 "mov %%rbx, %c[rbx](%[svm]) \n\t"
599 "mov %%rcx, %c[rcx](%[svm]) \n\t"
600 "mov %%rdx, %c[rdx](%[svm]) \n\t"
601 "mov %%rsi, %c[rsi](%[svm]) \n\t"
602 "mov %%rdi, %c[rdi](%[svm]) \n\t"
603 "mov %%rbp, %c[rbp](%[svm]) \n\t"
604 "mov %%r8, %c[r8](%[svm]) \n\t"
605 "mov %%r9, %c[r9](%[svm]) \n\t"
606 "mov %%r10, %c[r10](%[svm]) \n\t"
607 "mov %%r11, %c[r11](%[svm]) \n\t"
608 "mov %%r12, %c[r12](%[svm]) \n\t"
609 "mov %%r13, %c[r13](%[svm]) \n\t"
610 "mov %%r14, %c[r14](%[svm]) \n\t"
611 "mov %%r15, %c[r15](%[svm]) \n\t"
612 "pop %%rbp"
614 : [svm]"a"(vcpu),
615 [vmcb]"i"(offsetof(struct vcpu, vmcb_pa)),
616 [rbx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBX])),
617 [rcx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RCX])),
618 [rdx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDX])),
619 [rsi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RSI])),
620 [rdi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDI])),
621 [rbp]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBP])),
622 [r8 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R8 ])),
623 [r9 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R9 ])),
624 [r10]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R10])),
625 [r11]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R11])),
626 [r12]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R12])),
627 [r13]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R13])),
628 [r14]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R14])),
629 [r15]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R15]))
630 : "cc", "memory",
631 "rbx", "rcx", "rdx", "rsi", "rdi",
632 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
636 /* meh: dr7? db_regs? */
638 vcpu->cr2 = vmcb->save.cr2;
640 vcpu->regs[VCPU_REGS_RAX] = vmcb->save.rax;
641 vcpu->regs[VCPU_REGS_RSP] = vmcb->save.rsp;
642 vcpu->regs[VCPU_REGS_RIP] = vmcb->save.rip;
644 load_dr6(host_dr6);
645 load_dr7(host_dr7);
647 load_cr2(host_cr2);
649 load_fs(fs_selector);
650 load_gs(gs_selector);
651 lldt(ldt_selector);
653 wrmsr(MSR_FSBASE, vcpu->host_fs_base);
654 wrmsr(MSR_GSBASE, vcpu->host_gs_base);
656 tss_desc->sd_type = SDT_SYSTSS;
657 ltr(sel);
659 wrmsr(MSR_STAR, star);
660 wrmsr(MSR_LSTAR, lstar);
661 wrmsr(MSR_CSTAR, cstar);
662 wrmsr(MSR_SF_MASK, sfmask);
664 // disable_intr();
666 __asm __volatile (SVM_STGI);
668 // printf("STGI\n");
670 // print_tss_desc(tss_desc);
671 // print_tss(tss_desc);
673 // print_vmcb_save_area(vmcb);
675 // enable_intr();
677 /* meh: next_rip */
680 static void
681 _fkvm_init_seg(struct vmcb_seg *seg, uint16_t attrib)
683 seg->selector = 0;
684 seg->attrib = VMCB_SELECTOR_P_MASK | attrib;
685 seg->limit = 0xffff;
686 seg->base = 0;
689 static inline void
690 fkvm_init_seg(struct vmcb_seg *seg)
692 _fkvm_init_seg(seg, VMCB_SELECTOR_S_MASK | VMCB_SELECTOR_WRITE_MASK);
695 static inline void
696 fkvm_init_sys_seg(struct vmcb_seg *seg, uint16_t attrib)
698 _fkvm_init_seg(seg, attrib);
701 static void*
702 fkvm_iopm_alloc(void)
704 return contigmalloc(IOPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
707 static void
708 fkvm_iopm_init(void *iopm)
710 memset(iopm, 0xff, IOPM_SIZE); /* TODO: we may want to allow access to PC debug port */
713 static void
714 fkvm_iopm_free(void *iopm)
716 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
719 static void*
720 fkvm_msrpm_alloc(void)
722 return contigmalloc(MSRPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
725 static void
726 fkvm_msrpm_init(void *msrpm)
728 memset(msrpm, 0xff, MSRPM_SIZE); /* TODO: we may want to allow some MSR accesses */
731 static void
732 fkvm_msrpm_free(void *msrpm)
734 contigfree(msrpm, MSRPM_SIZE, M_DEVBUF);
737 static void*
738 fkvm_hsave_area_alloc(void)
740 return contigmalloc(PAGE_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
743 static void
744 fkvm_hsave_area_init(void *hsave_area)
748 static void
749 fkvm_hsave_area_free(void *hsave_area)
751 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
754 static struct vmspace*
755 fkvm_make_vmspace(void)
757 struct vmspace *sp;
759 sp = vmspace_alloc(0, 0xffffffffffffffff);
760 if (sp == NULL) {
761 printf("vmspace_alloc failed\n");
762 return NULL;
765 return sp;
768 static void
769 fkvm_destroy_vmspace(struct vmspace* sp)
771 vmspace_free(sp);
774 static struct vmcb*
775 fkvm_vmcb_alloc(void)
777 return contigmalloc(PAGE_SIZE, M_DEVBUF, M_ZERO, 0, -1UL,
778 PAGE_SIZE, 0);
781 static void
782 fkvm_vmcb_init(struct vmcb *vmcb)
784 struct vmcb_control_area *control = &vmcb->control;
785 struct vmcb_save_area *save = &vmcb->save;
787 control->intercept_cr_reads = INTERCEPT_CR4_MASK;
789 control->intercept_cr_writes = INTERCEPT_CR4_MASK |
790 INTERCEPT_CR8_MASK;
792 control->intercept_dr_reads = INTERCEPT_DR0_MASK |
793 INTERCEPT_DR1_MASK |
794 INTERCEPT_DR2_MASK |
795 INTERCEPT_DR3_MASK;
797 control->intercept_dr_writes = INTERCEPT_DR0_MASK |
798 INTERCEPT_DR1_MASK |
799 INTERCEPT_DR2_MASK |
800 INTERCEPT_DR3_MASK |
801 INTERCEPT_DR5_MASK |
802 INTERCEPT_DR7_MASK;
804 control->intercept_exceptions = (1 << IDT_UD) | // Invalid Opcode
805 (1 << IDT_MC); // Machine Check
807 control->intercepts = INTERCEPT_INTR |
808 INTERCEPT_NMI |
809 INTERCEPT_SMI |
810 INTERCEPT_CPUID |
811 INTERCEPT_INVD |
812 INTERCEPT_HLT |
813 INTERCEPT_INVLPGA |
814 INTERCEPT_IOIO_PROT |
815 INTERCEPT_MSR_PROT |
816 INTERCEPT_SHUTDOWN |
817 INTERCEPT_VMRUN |
818 INTERCEPT_VMMCALL |
819 INTERCEPT_VMLOAD |
820 INTERCEPT_VMSAVE |
821 INTERCEPT_STGI |
822 INTERCEPT_CLGI |
823 INTERCEPT_SKINIT |
824 INTERCEPT_WBINVD |
825 INTERCEPT_MONITOR |
826 INTERCEPT_MWAIT_UNCOND;
828 control->iopm_base_pa = vtophys(iopm);
829 control->msrpm_base_pa = vtophys(msrpm);
830 control->tsc_offset = 0;
832 /* TODO: remove this once we assign asid's to distinct VM's */
833 control->guest_asid = 1;
834 control->tlb_control = VMCB_TLB_CONTROL_FLUSH_ALL;
836 /* let v_tpr default to 0 */
837 /* let v_irq_pending default to 0 */
838 /* let v_intr default to 0 */
840 control->v_intr_masking = 1;
842 /* let v_intr_vector default to 0 */
843 /* let intr_shadow default to 0 */
844 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
845 exit_int_info_err_code default to 0 */
847 control->nested_ctl = 1;
849 /* let event_inj default to 0 */
851 // (nested_cr3 is later)
853 /* let lbr_virt_enable default to 0 */
856 fkvm_init_seg(&save->ds);
857 fkvm_init_seg(&save->es);
858 fkvm_init_seg(&save->fs);
859 fkvm_init_seg(&save->gs);
860 fkvm_init_seg(&save->ss);
862 _fkvm_init_seg(&save->cs, VMCB_SELECTOR_READ_MASK | VMCB_SELECTOR_S_MASK |
863 VMCB_SELECTOR_CODE_MASK);
864 save->cs.selector = 0xf000;
865 save->cs.base = 0xffff0000;
867 save->gdtr.limit = 0xffff;
868 save->idtr.limit = 0xffff;
870 fkvm_init_sys_seg(&save->ldtr, SDT_SYSLDT);
871 fkvm_init_sys_seg(&save->tr, SDT_SYS286BSY);
873 save->g_pat = PAT_VALUE(PAT_WRITE_BACK, 0) | PAT_VALUE(PAT_WRITE_THROUGH, 1) |
874 PAT_VALUE(PAT_UNCACHED, 2) | PAT_VALUE(PAT_UNCACHEABLE, 3) |
875 PAT_VALUE(PAT_WRITE_BACK, 4) | PAT_VALUE(PAT_WRITE_THROUGH, 5) |
876 PAT_VALUE(PAT_UNCACHED, 6) | PAT_VALUE(PAT_UNCACHEABLE, 7);
878 /* CR0 = 6000_0010h at boot */
879 save->cr0 = CR0_ET | CR0_NW | CR0_CD;
880 save->dr6 = 0xffff0ff0;
881 save->dr7 = 0x400;
882 save->rflags = 2;
883 save->rip = 0x0000fff0;
885 save->efer = EFER_SVME;
888 static void
889 fkvm_vmcb_free(struct vmcb *vmcb)
891 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
894 static void
895 fkvm_virq_set(struct vcpu *vcpu, int virq)
897 int i, j;
899 i = virq / (sizeof(vcpu->virqs[0]) * 8);
900 j = virq % (sizeof(vcpu->virqs[0]) * 8);
902 vcpu->virqs[i] |= 1UL << j;
905 #ifndef ARRAY_SIZE
906 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
907 #endif
909 static int
910 fkvm_virq_pop(struct vcpu *vcpu)
912 int i, j;
913 for (i = ARRAY_SIZE(vcpu->virqs) - 1; i >= 0; i--) {
914 j = flsl(vcpu->virqs[i]);
915 // virqs[i] == 0 => j = 0
916 // virqs[i] == (1 << 0) => j = 1
917 // ...
918 if (j > 0) {
919 vcpu->virqs[i] &= ~(1UL << (j - 1));
920 return i * sizeof(vcpu->virqs[0]) * 8 + (j - 1);
923 return -1;
926 #if 0
927 static void
928 fkvm_virq_test(struct vcpu *vcpu)
930 #define VIRQ_ASSERT(cond) do { \
931 if (!(cond)) { \
932 printf("irq test failed %d\n", __LINE__); \
934 } while (0)
936 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == -1);
938 fkvm_virq_set(vcpu, 0);
939 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 0);
940 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == -1);
942 fkvm_virq_set(vcpu, 1);
943 fkvm_virq_set(vcpu, 0);
944 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 1);
945 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 0);
946 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == -1);
948 fkvm_virq_set(vcpu, 0);
949 fkvm_virq_set(vcpu, 1);
950 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 1);
951 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 0);
952 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == -1);
954 fkvm_virq_set(vcpu, 255);
955 fkvm_virq_set(vcpu, 0);
956 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 255);
957 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 0);
958 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == -1);
960 fkvm_virq_set(vcpu, 0);
961 fkvm_virq_set(vcpu, 237);
962 fkvm_virq_set(vcpu, 65);
963 fkvm_virq_set(vcpu, 204);
964 fkvm_virq_set(vcpu, 26);
965 fkvm_virq_set(vcpu, 234);
966 fkvm_virq_set(vcpu, 38);
967 fkvm_virq_set(vcpu, 189);
968 fkvm_virq_set(vcpu, 152);
969 fkvm_virq_set(vcpu, 29);
970 fkvm_virq_set(vcpu, 78);
971 fkvm_virq_set(vcpu, 22);
972 fkvm_virq_set(vcpu, 238);
973 fkvm_virq_set(vcpu, 118);
974 fkvm_virq_set(vcpu, 87);
975 fkvm_virq_set(vcpu, 147);
976 fkvm_virq_set(vcpu, 188);
977 fkvm_virq_set(vcpu, 252);
978 fkvm_virq_set(vcpu, 154);
979 fkvm_virq_set(vcpu, 242);
980 fkvm_virq_set(vcpu, 246);
981 fkvm_virq_set(vcpu, 40);
982 fkvm_virq_set(vcpu, 238);
983 fkvm_virq_set(vcpu, 172);
984 fkvm_virq_set(vcpu, 61);
986 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 252);
987 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 246);
988 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 242);
989 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 238);
990 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 237);
991 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 234);
992 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 204);
993 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 189);
994 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 188);
995 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 172);
996 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 154);
997 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 152);
998 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 147);
999 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 118);
1000 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 87);
1001 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 78);
1002 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 65);
1003 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 61);
1004 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 40);
1005 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 38);
1006 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 29);
1007 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 26);
1008 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 22);
1009 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == 0);
1010 VIRQ_ASSERT(fkvm_virq_pop(vcpu) == -1);
1013 #endif
1015 static void
1016 _fkvm_vmcb_set_virq(struct vcpu *vcpu, int virq)
1018 struct vmcb_control_area *control = &vcpu->vmcb->control;
1020 control->v_intr_vector = virq;
1021 control->v_intr = 0xf;
1022 control->v_irq_pending = 1;
1025 /* call this when we have a new interrupt for the vcpu */
1026 static void
1027 fkvm_virq_enqueue(struct vcpu *vcpu, int virq)
1029 struct vmcb_control_area *control = &vcpu->vmcb->control;
1031 if (control->v_irq_pending) {
1032 if (virq < control->v_intr_vector)
1033 fkvm_virq_set(vcpu, virq);
1034 else {
1035 fkvm_virq_set(vcpu, control->v_intr_vector);
1036 _fkvm_vmcb_set_virq(vcpu, virq);
1039 else {
1040 _fkvm_vmcb_set_virq(vcpu, virq);
1044 /* call this when the vcpu has finished handling an interrupt */
1045 static void
1046 fkvm_virq_dequeue(struct vcpu *vcpu)
1048 struct vmcb_control_area *control = &vcpu->vmcb->control;
1049 int virq;
1051 if (control->v_irq_pending)
1052 return; /* there's already an interrupt pending */
1054 virq = fkvm_virq_pop(vcpu);
1055 if (virq < 0)
1056 return; /* no interrupts waiting */
1058 _fkvm_vmcb_set_virq(vcpu, virq);
1062 fkvm_inject_virq(struct thread *td, struct fkvm_inject_virq_args *uap)
1064 struct vcpu *vcpu = TD_GET_VCPU(td);
1066 if (uap->virq < 0 || uap->virq > 255)
1067 return EINVAL;
1069 fkvm_virq_enqueue(vcpu, uap->virq);
1071 return 0;
1074 static struct vcpu*
1075 fkvm_vcpu_create(struct guestvm *guest_vm)
1077 struct vcpu *vcpu;
1078 vcpu = malloc(sizeof(struct vcpu), M_DEVBUF, M_WAITOK|M_ZERO);
1080 vcpu->vmcb = fkvm_vmcb_alloc();
1081 vcpu->vmcb_pa = vtophys(vcpu->vmcb);
1082 printf("vmcb = 0x%p\n", vcpu->vmcb);
1083 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu->vmcb_pa);
1085 fkvm_vmcb_init(vcpu->vmcb);
1086 vcpu->vmcb->control.nested_cr3 = guest_vm->nested_cr3;
1087 vcpu->regs[VCPU_REGS_RIP] = vcpu->vmcb->save.rip;
1089 vcpu->guest_vm = guest_vm;
1091 return vcpu;
1094 static void
1095 fkvm_vcpu_destroy(struct vcpu *vcpu)
1097 fkvm_vmcb_free(vcpu->vmcb);
1098 free(vcpu, M_DEVBUF);
1101 static struct guestvm*
1102 fkvm_guestvm_alloc(void)
1104 return malloc(sizeof(struct guestvm), M_DEVBUF, M_WAITOK|M_ZERO);
1107 static void
1108 fkvm_guestvm_free(struct guestvm* guest_vm)
1110 free(guest_vm, M_DEVBUF);
1113 static void
1114 fkvm_guestvm_add_vcpu(struct guestvm *guest_vm, struct vcpu *vcpu)
1116 guest_vm->vcpus[guest_vm->nr_vcpus] = vcpu;
1117 guest_vm->nr_vcpus++; /* TODO: Probably not safe to increment */
1118 /* How about a lock to protect all of this? */
1123 fkvm_userpoke(struct thread *td, struct fkvm_userpoke_args *uap)
1125 printf("fkvm_userpoke\n");
1127 if (!fkvm_loaded)
1128 return ENODEV;
1130 return ENOSYS;
1133 static int
1134 fkvm_mem_has_entry(vm_map_entry_t expected_entry, vm_map_t vm_map, vm_offset_t vaddr)
1136 vm_map_entry_t lookup_entry;
1137 vm_object_t throwaway_object;
1138 vm_pindex_t throwaway_pindex;
1139 vm_prot_t throwaway_prot;
1140 boolean_t throwaway_wired;
1141 int error;
1143 error = vm_map_lookup(&vm_map, /* IN/OUT */
1144 vaddr,
1145 VM_PROT_READ|VM_PROT_WRITE,
1146 &lookup_entry, /* OUT */
1147 &throwaway_object, /* OUT */
1148 &throwaway_pindex, /* OUT */
1149 &throwaway_prot, /* OUT */
1150 &throwaway_wired); /* OUT */
1151 if (error != KERN_SUCCESS)
1152 return 0;
1153 vm_map_lookup_done(vm_map, lookup_entry);
1154 return (lookup_entry == expected_entry);
1157 static int
1158 fkvm_guest_check_range(struct guestvm *guest_vm, uint64_t start, uint64_t end)
1160 vm_map_t guest_vm_map;
1161 vm_map_entry_t lookup_entry;
1162 vm_object_t throwaway_object;
1163 vm_pindex_t throwaway_pindex;
1164 vm_prot_t throwaway_prot;
1165 boolean_t throwaway_wired;
1166 int ret;
1167 int error;
1169 guest_vm_map = &guest_vm->sp->vm_map;
1171 error = vm_map_lookup(&guest_vm_map, /* IN/OUT */
1172 start,
1173 VM_PROT_READ|VM_PROT_WRITE,
1174 &lookup_entry, /* OUT */
1175 &throwaway_object, /* OUT */
1176 &throwaway_pindex, /* OUT */
1177 &throwaway_prot, /* OUT */
1178 &throwaway_wired); /* OUT */
1179 if (error != KERN_SUCCESS)
1180 return EFAULT;
1181 vm_map_lookup_done(guest_vm_map, lookup_entry);
1184 TODO: We can't actually nest the lookups:
1185 panic: _sx_xlock_hard: recursed on non-recursive sx user map @ ../../../vm/vm_map.c:3115
1186 Therefore, I've moved the lookup_done above for now, but we really need a lock here.
1188 Maybe it's better to use vm_map_lookup_entry directly.
1192 if (fkvm_mem_has_entry(lookup_entry, guest_vm_map, end))
1193 ret = 0;
1194 else
1195 ret = EFAULT;
1197 return ret;
1200 static void
1201 fkvm_get_regs_regs(struct vcpu *vcpu, struct kvm_regs *out)
1203 out->rax = vcpu->regs[VCPU_REGS_RAX];
1204 out->rbx = vcpu->regs[VCPU_REGS_RBX];
1205 out->rcx = vcpu->regs[VCPU_REGS_RCX];
1206 out->rdx = vcpu->regs[VCPU_REGS_RDX];
1207 out->rsi = vcpu->regs[VCPU_REGS_RSI];
1208 out->rdi = vcpu->regs[VCPU_REGS_RDI];
1209 out->rsp = vcpu->regs[VCPU_REGS_RSP];
1210 out->rbp = vcpu->regs[VCPU_REGS_RBP];
1211 out->r8 = vcpu->regs[VCPU_REGS_R8];
1212 out->r9 = vcpu->regs[VCPU_REGS_R9];
1213 out->r10 = vcpu->regs[VCPU_REGS_R10];
1214 out->r11 = vcpu->regs[VCPU_REGS_R11];
1215 out->r12 = vcpu->regs[VCPU_REGS_R12];
1216 out->r13 = vcpu->regs[VCPU_REGS_R13];
1217 out->r14 = vcpu->regs[VCPU_REGS_R14];
1218 out->r15 = vcpu->regs[VCPU_REGS_R15];
1219 out->rip = vcpu->regs[VCPU_REGS_RIP];
1220 out->rflags = vcpu->vmcb->save.rflags;
1223 static void
1224 fkvm_set_regs_regs(struct vcpu *vcpu, const struct kvm_regs *in)
1226 vcpu->regs[VCPU_REGS_RAX] = in->rax;
1227 vcpu->regs[VCPU_REGS_RBX] = in->rbx;
1228 vcpu->regs[VCPU_REGS_RCX] = in->rcx;
1229 vcpu->regs[VCPU_REGS_RDX] = in->rdx;
1230 vcpu->regs[VCPU_REGS_RSI] = in->rsi;
1231 vcpu->regs[VCPU_REGS_RDI] = in->rdi;
1232 vcpu->regs[VCPU_REGS_RSP] = in->rsp;
1233 vcpu->regs[VCPU_REGS_RBP] = in->rbp;
1234 vcpu->regs[VCPU_REGS_R8] = in->r8;
1235 vcpu->regs[VCPU_REGS_R9] = in->r9;
1236 vcpu->regs[VCPU_REGS_R10] = in->r10;
1237 vcpu->regs[VCPU_REGS_R11] = in->r11;
1238 vcpu->regs[VCPU_REGS_R12] = in->r12;
1239 vcpu->regs[VCPU_REGS_R13] = in->r13;
1240 vcpu->regs[VCPU_REGS_R14] = in->r14;
1241 vcpu->regs[VCPU_REGS_R15] = in->r15;
1242 vcpu->regs[VCPU_REGS_RIP] = in->rip;
1243 vcpu->vmcb->save.rflags = in->rflags;
1246 static void
1247 fkvm_get_vmcb_dtable(struct vmcb_seg *vmcb_seg, struct kvm_dtable *fkvm_dtable)
1249 fkvm_dtable->base = vmcb_seg->base;
1250 fkvm_dtable->limit = vmcb_seg->limit;
1253 static void
1254 fkvm_set_vmcb_dtable(struct vmcb_seg *vmcb_seg, struct kvm_dtable *fkvm_dtable)
1256 vmcb_seg->base = fkvm_dtable->base;
1257 vmcb_seg->limit = fkvm_dtable->limit;
1260 static void
1261 fkvm_get_vmcb_seg(struct vmcb_seg *vmcb_seg, struct kvm_segment *fkvm_seg)
1263 fkvm_seg->base = vmcb_seg->base;
1264 fkvm_seg->limit = vmcb_seg->limit;
1265 fkvm_seg->selector = vmcb_seg->selector;
1267 if (vmcb_seg->attrib == 0)
1268 fkvm_seg->unusable = 1;
1269 else {
1270 fkvm_seg->type = (vmcb_seg->attrib & VMCB_SELECTOR_TYPE_MASK);
1271 fkvm_seg->s = (vmcb_seg->attrib & VMCB_SELECTOR_S_MASK) >> VMCB_SELECTOR_S_SHIFT;
1272 fkvm_seg->dpl = (vmcb_seg->attrib & VMCB_SELECTOR_DPL_MASK) >> VMCB_SELECTOR_DPL_SHIFT;
1273 fkvm_seg->present = (vmcb_seg->attrib & VMCB_SELECTOR_P_MASK) >> VMCB_SELECTOR_P_SHIFT;
1274 fkvm_seg->avl = (vmcb_seg->attrib & VMCB_SELECTOR_AVL_MASK) >> VMCB_SELECTOR_AVL_SHIFT;
1275 fkvm_seg->l = (vmcb_seg->attrib & VMCB_SELECTOR_L_MASK) >> VMCB_SELECTOR_L_SHIFT;
1276 fkvm_seg->db = (vmcb_seg->attrib & VMCB_SELECTOR_DB_MASK) >> VMCB_SELECTOR_DB_SHIFT;
1277 fkvm_seg->g = (vmcb_seg->attrib & VMCB_SELECTOR_G_MASK) >> VMCB_SELECTOR_G_SHIFT;
1281 static void
1282 fkvm_set_vmcb_seg(struct vmcb_seg *vmcb_seg, struct kvm_segment *fkvm_seg)
1284 vmcb_seg->base = fkvm_seg->base;
1285 vmcb_seg->limit = fkvm_seg->limit;
1286 vmcb_seg->selector = fkvm_seg->selector;
1288 if (fkvm_seg->unusable)
1289 vmcb_seg->attrib=0;
1290 else {
1291 vmcb_seg->attrib = (fkvm_seg->type & VMCB_SELECTOR_TYPE_MASK);
1292 vmcb_seg->attrib |= (fkvm_seg->s & 1) << VMCB_SELECTOR_S_SHIFT;
1293 vmcb_seg->attrib |= (fkvm_seg->dpl & 3) << VMCB_SELECTOR_DPL_SHIFT;
1294 vmcb_seg->attrib |= (fkvm_seg->present & 1) << VMCB_SELECTOR_P_SHIFT;
1295 vmcb_seg->attrib |= (fkvm_seg->avl & 1) << VMCB_SELECTOR_AVL_SHIFT;
1296 vmcb_seg->attrib |= (fkvm_seg->l & 1) << VMCB_SELECTOR_L_SHIFT;
1297 vmcb_seg->attrib |= (fkvm_seg->db & 1) << VMCB_SELECTOR_DB_SHIFT;
1298 vmcb_seg->attrib |= (fkvm_seg->g & 1) << VMCB_SELECTOR_G_SHIFT;
1302 static uint64_t
1303 fkvm_get_cr8(struct vcpu *vcpu)
1305 // TODO: if cr8 has reserved bits inject GP Fault, return
1307 return (uint64_t) vcpu->vmcb->control.v_tpr;
1310 static void
1311 fkvm_set_cr8(struct vcpu *vcpu, uint64_t cr8)
1313 // TODO: if cr8 has reserved bits inject GP Fault, return
1315 vcpu->vmcb->control.v_tpr = (uint8_t) cr8;
1318 static uint64_t
1319 fkvm_get_efer(struct vcpu *vcpu)
1321 struct vmcb *vmcb = vcpu->vmcb;
1323 return vmcb->save.efer & (~EFER_SVME);
1326 static void
1327 fkvm_set_efer(struct vcpu *vcpu, uint64_t efer)
1329 struct vmcb *vmcb = vcpu->vmcb;
1330 //TODO: if efer has reserved bits set: inject GP Fault
1332 if (vmcb->save.cr0 & CR0_PG) { //If paging is enabled do not allow changes to LME
1333 if ((vmcb->save.efer & EFER_LME) != (efer & EFER_LME)) {
1334 printf("fkvm_set_efer: attempt to change LME while paging\n");
1335 //TODO: inject GP fault
1339 vmcb->save.efer = efer | EFER_SVME;
1342 static void
1343 fkvm_get_regs_sregs(struct vcpu *vcpu, struct kvm_sregs *out)
1345 struct vmcb *vmcb = vcpu->vmcb;
1347 fkvm_get_vmcb_seg(&vmcb->save.cs, &out->cs);
1348 fkvm_get_vmcb_seg(&vmcb->save.ds, &out->ds);
1349 fkvm_get_vmcb_seg(&vmcb->save.es, &out->es);
1350 fkvm_get_vmcb_seg(&vmcb->save.fs, &out->fs);
1351 fkvm_get_vmcb_seg(&vmcb->save.gs, &out->gs);
1352 fkvm_get_vmcb_seg(&vmcb->save.ss, &out->ss);
1353 fkvm_get_vmcb_seg(&vmcb->save.tr, &out->tr);
1354 fkvm_get_vmcb_seg(&vmcb->save.ldtr, &out->ldt);
1356 fkvm_get_vmcb_dtable(&vmcb->save.idtr, &out->idt);
1357 fkvm_get_vmcb_dtable(&vmcb->save.gdtr, &out->gdt);
1359 out->cr2 = vcpu->cr2;
1360 out->cr3 = vcpu->cr3;
1362 out->cr8 = fkvm_get_cr8(vcpu);
1363 out->efer = fkvm_get_efer(vcpu);
1364 /* TODO: apic_base */
1365 out->cr0 = vmcb->save.cr0;
1366 out->cr4 = vmcb->save.cr4;
1367 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1370 static void
1371 fkvm_set_regs_sregs(struct vcpu *vcpu, struct kvm_sregs *in)
1373 struct vmcb *vmcb = vcpu->vmcb;
1375 fkvm_set_vmcb_seg(&vmcb->save.cs, &in->cs);
1376 fkvm_set_vmcb_seg(&vmcb->save.ds, &in->ds);
1377 fkvm_set_vmcb_seg(&vmcb->save.es, &in->es);
1378 fkvm_set_vmcb_seg(&vmcb->save.fs, &in->fs);
1379 fkvm_set_vmcb_seg(&vmcb->save.gs, &in->gs);
1380 fkvm_set_vmcb_seg(&vmcb->save.ss, &in->ss);
1381 fkvm_set_vmcb_seg(&vmcb->save.tr, &in->tr);
1382 fkvm_set_vmcb_seg(&vmcb->save.ldtr, &in->ldt);
1384 vmcb->save.cpl = (vmcb->save.cs.attrib >> VMCB_SELECTOR_DPL_SHIFT) & 3;
1386 fkvm_set_vmcb_dtable(&vmcb->save.idtr, &in->idt);
1387 fkvm_set_vmcb_dtable(&vmcb->save.gdtr, &in->gdt);
1389 vcpu->cr2 = in->cr2;
1390 vcpu->cr3 = in->cr3;
1392 fkvm_set_cr8(vcpu, in->cr8);
1393 fkvm_set_efer(vcpu, in->efer);
1394 /* TODO: apic_base */
1395 vmcb->save.cr0 = in->cr0;
1396 vmcb->save.cr4 = in->cr4;
1397 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1400 static int
1401 fkvm_get_reg_msr(struct vcpu *vcpu, uint32_t index, uint64_t *data) {
1402 struct vmcb *vmcb = vcpu->vmcb;
1404 switch(index) {
1406 case MSR_TSC: {
1407 uint64_t tsc;
1409 tsc = rdtsc();
1410 *data = vmcb->control.tsc_offset + tsc;
1411 break;
1414 case MSR_STAR: {
1415 *data = vmcb->save.star;
1416 break;
1419 case MSR_LSTAR: {
1420 *data = vmcb->save.lstar;
1421 break;
1424 case MSR_CSTAR: {
1425 *data = vmcb->save.cstar;
1426 break;
1429 case MSR_GSBASE: {
1430 *data = vmcb->save.kernel_gs_base;
1431 break;
1434 case MSR_SF_MASK: {
1435 *data = vmcb->save.sfmask;
1436 break;
1439 case MSR_SYSENTER_CS_MSR: {
1440 *data = vmcb->save.sysenter_cs;
1441 break;
1444 case MSR_SYSENTER_EIP_MSR: {
1445 *data = vmcb->save.sysenter_eip;
1446 break;
1449 case MSR_SYSENTER_ESP_MSR: {
1450 *data = vmcb->save.sysenter_esp;
1451 break;
1454 case MSR_DEBUGCTLMSR: {
1455 printf("unimplemented at %d\n", __LINE__);
1456 return ENOSYS;
1457 break;
1460 case MSR_PERFEVSEL0 ... MSR_PERFEVSEL3:
1461 case MSR_PERFCTR0 ... MSR_PERFCTR3: {
1462 printf("unimplemented at %d\n", __LINE__);
1463 return ENOSYS;
1464 break;
1467 case MSR_EFER: {
1468 *data = fkvm_get_efer(vcpu);
1469 break;
1472 case MSR_MC0_STATUS: {
1473 printf("unimplemented at %d\n", __LINE__);
1474 return ENOSYS;
1475 break;
1478 case MSR_MCG_STATUS: {
1479 printf("unimplemented at %d\n", __LINE__);
1480 return ENOSYS;
1481 break;
1484 case MSR_MCG_CTL: {
1485 printf("unimplemented at %d\n", __LINE__);
1486 return ENOSYS;
1487 break;
1490 //TODO: MSR_IA32_UCODE_REV
1491 //TODO: MSR_IA32_UCODE_WRITE
1493 case MSR_MTRRcap: {
1494 *data = MTRR_CAP_WC | MTRR_CAP_FIXED | FKVM_MTRR_NVAR;
1495 break;
1498 case MSR_MTRRdefType: {
1499 *data = vcpu->mtrrs.default_type;
1500 break;
1503 case MSR_MTRR64kBase ... (MSR_MTRR64kBase + MTRR_N64K - 1): {
1504 *data = vcpu->mtrrs.mtrr64k[index - MSR_MTRR64kBase];
1505 break;
1508 case MSR_MTRR16kBase ... (MSR_MTRR16kBase + MTRR_N16K - 1): {
1509 *data = vcpu->mtrrs.mtrr16k[index - MSR_MTRR16kBase];
1510 break;
1513 case MSR_MTRR4kBase ... (MSR_MTRR4kBase + MTRR_N4K - 1): {
1514 *data = vcpu->mtrrs.mtrr4k[index - MSR_MTRR4kBase];
1515 break;
1518 case MSR_MTRRVarBase ... (MSR_MTRRVarBase + FKVM_MTRR_NVAR * 2 - 1): {
1519 *data = vcpu->mtrrs.mtrrvar[index - MSR_MTRRVarBase];
1520 break;
1523 case MSR_APICBASE: {
1524 printf("unimplemented at %d\n", __LINE__);
1525 return ENOSYS;
1526 break;
1529 case MSR_IA32_MISC_ENABLE: {
1530 printf("unimplemented at %d\n", __LINE__);
1531 return ENOSYS;
1532 break;
1535 //TODO: MSR_KVM_WALL_CLOCK
1536 //TODO: MSR_KVM_SYSTEM_TIME
1538 default:
1539 printf("Did not get unimplemented msr: 0x%" PRIx32 "\n", index);
1540 return ENOSYS;
1543 return 0;
1546 static void
1547 fkvm_get_regs_msrs(struct vcpu *vcpu, uint32_t nmsrs, struct kvm_msr_entry *entries) {
1548 int i;
1550 for (i = 0; i < nmsrs; i++) {
1551 fkvm_get_reg_msr(vcpu, entries[i].index, &entries[i].data);
1555 static int
1556 fkvm_set_reg_msr(struct vcpu *vcpu, uint32_t index, uint64_t data) {
1557 struct vmcb *vmcb = vcpu->vmcb;
1559 switch(index) {
1561 case MSR_TSC: {
1562 uint64_t tsc;
1564 tsc = rdtsc();
1565 vmcb->control.tsc_offset = data - tsc;
1566 break;
1569 case MSR_STAR: {
1570 vmcb->save.star = data;
1571 break;
1574 case MSR_LSTAR: {
1575 vmcb->save.lstar = data;
1576 break;
1579 case MSR_CSTAR: {
1580 vmcb->save.cstar = data;
1581 break;
1584 case MSR_GSBASE: {
1585 vmcb->save.kernel_gs_base = data;
1586 break;
1589 case MSR_SF_MASK: {
1590 vmcb->save.sfmask = data;
1591 break;
1594 case MSR_SYSENTER_CS_MSR: {
1595 vmcb->save.sysenter_cs = data;
1596 break;
1599 case MSR_SYSENTER_EIP_MSR: {
1600 vmcb->save.sysenter_eip = data;
1601 break;
1604 case MSR_SYSENTER_ESP_MSR: {
1605 vmcb->save.sysenter_esp = data;
1606 break;
1609 case MSR_DEBUGCTLMSR: {
1610 printf("unimplemented at %d\n", __LINE__);
1611 return ENOSYS;
1612 break;
1615 case MSR_PERFEVSEL0 ... MSR_PERFEVSEL3:
1616 case MSR_PERFCTR0 ... MSR_PERFCTR3: {
1617 printf("unimplemented at %d\n", __LINE__);
1618 return ENOSYS;
1619 break;
1622 case MSR_EFER: {
1623 fkvm_set_efer(vcpu, data);
1624 break;
1627 case MSR_MC0_STATUS: {
1628 printf("unimplemented at %d\n", __LINE__);
1629 return ENOSYS;
1630 break;
1633 case MSR_MCG_STATUS: {
1634 printf("unimplemented at %d\n", __LINE__);
1635 return ENOSYS;
1636 break;
1639 case MSR_MCG_CTL: {
1640 printf("unimplemented at %d\n", __LINE__);
1641 return ENOSYS;
1642 break;
1645 //TODO: MSR_IA32_UCODE_REV
1646 //TODO: MSR_IA32_UCODE_WRITE
1648 case MSR_MTRRdefType: {
1649 vcpu->mtrrs.default_type = data;
1650 break;
1653 case MSR_MTRR64kBase ... (MSR_MTRR64kBase + MTRR_N64K - 1): {
1654 vcpu->mtrrs.mtrr64k[index - MSR_MTRR64kBase] = data;
1655 break;
1658 case MSR_MTRR16kBase ... (MSR_MTRR16kBase + MTRR_N16K - 1): {
1659 vcpu->mtrrs.mtrr16k[index - MSR_MTRR16kBase] = data;
1660 break;
1663 case MSR_MTRR4kBase ... (MSR_MTRR4kBase + MTRR_N4K - 1): {
1664 vcpu->mtrrs.mtrr4k[index - MSR_MTRR4kBase] = data;
1665 break;
1668 case MSR_MTRRVarBase ... (MSR_MTRRVarBase + FKVM_MTRR_NVAR * 2 - 1): {
1669 vcpu->mtrrs.mtrrvar[index - MSR_MTRRVarBase] = data;
1670 break;
1673 case MSR_APICBASE: {
1674 printf("unimplemented at %d\n", __LINE__);
1675 return ENOSYS;
1676 break;
1679 case MSR_IA32_MISC_ENABLE: {
1680 printf("unimplemented at %d\n", __LINE__);
1681 return ENOSYS;
1682 break;
1685 //TODO: MSR_KVM_WALL_CLOCK
1686 //TODO: MSR_KVM_SYSTEM_TIME
1688 default:
1689 printf("Did not set unimplemented msr: 0x%" PRIx32 "\n", index);
1690 return ENOSYS;
1693 return 0;
1696 static void
1697 fkvm_set_regs_msrs(struct vcpu *vcpu, uint32_t nmsrs, struct kvm_msr_entry *entries) {
1698 int i;
1700 for (i = 0; i < nmsrs; i++) {
1701 fkvm_set_reg_msr(vcpu, entries[i].index, entries[i].data);
1705 /* System Calls */
1708 fkvm_get_regs(struct thread *td, struct fkvm_get_regs_args *uap)
1710 struct vcpu *vcpu;
1711 int error;
1713 if (!fkvm_loaded)
1714 return ENODEV;
1716 vcpu = TD_GET_VCPU(td);
1717 if (vcpu == NULL)
1718 return ENODEV;
1720 switch (uap->type) {
1722 case FKVM_REGS_TYPE_REGS: {
1723 struct kvm_regs out;
1724 fkvm_get_regs_regs(vcpu, &out);
1725 return copyout(&out, uap->regs, sizeof(out));
1728 case FKVM_REGS_TYPE_SREGS: {
1729 struct kvm_sregs out;
1730 fkvm_get_regs_sregs(vcpu, &out);
1731 return copyout(&out, uap->regs, sizeof(out));
1734 case FKVM_REGS_TYPE_MSRS: {
1735 struct kvm_msr_entry *user_entries;
1736 struct kvm_msr_entry *entries;
1737 int size;
1739 user_entries = (struct kvm_msr_entry *)uap->regs;
1741 size = sizeof(*entries) * uap->n;
1742 entries = malloc(size, M_DEVBUF, M_WAITOK|M_ZERO);
1743 if (entries == NULL)
1744 return ENOMEM;
1746 error = copyin(user_entries, entries, size);
1747 if (error != 0) {
1748 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1749 free(entries, M_DEVBUF);
1750 return error;
1753 fkvm_get_regs_msrs(vcpu, uap->n, entries);
1755 error = copyout(user_entries, entries, size);
1756 if (error != 0) {
1757 printf("FKVM_REGS_TYPE_MSRS: unable to copyout entries\n");
1760 free(entries, M_DEVBUF);
1761 return error;
1764 default:
1765 return EINVAL;
1770 fkvm_set_regs(struct thread *td, struct fkvm_set_regs_args *uap)
1772 struct vcpu *vcpu;
1773 int error = 0;
1775 vcpu = TD_GET_VCPU(td);
1776 if (vcpu == NULL)
1777 return ENODEV;
1779 switch (uap->type) {
1781 case FKVM_REGS_TYPE_REGS: {
1782 struct kvm_regs in;
1783 error = copyin(uap->regs, &in, sizeof(in));
1784 if (error != 0)
1785 return error;
1786 fkvm_set_regs_regs(vcpu, &in);
1787 return 0;
1790 case FKVM_REGS_TYPE_SREGS: {
1791 struct kvm_sregs in;
1792 error = copyin(uap->regs, &in, sizeof(in));
1793 if (error != 0)
1794 return error;
1795 fkvm_set_regs_sregs(vcpu, &in);
1796 return 0;
1799 case FKVM_REGS_TYPE_MSRS: {
1800 struct kvm_msr_entry *user_entries;
1801 struct kvm_msr_entry *entries;
1802 int size;
1804 user_entries = (struct kvm_msr_entry *)uap->regs;
1806 size = sizeof(*entries) * uap->n;
1807 entries = malloc(size, M_DEVBUF, M_WAITOK|M_ZERO);
1808 if (entries == NULL)
1809 return ENOMEM;
1811 error = copyin(user_entries, entries, size);
1812 if (error != 0) {
1813 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1814 free(entries, M_DEVBUF);
1815 return error;
1818 fkvm_set_regs_msrs(vcpu, uap->n, entries);
1820 free(entries, M_DEVBUF);
1821 return error;
1824 default:
1825 return EINVAL;
1829 /* This function can only be called with multiples of page sizes */
1830 /* vaddr as NULL overloads to fkvm_guest_check_range */
1832 fkvm_set_user_mem_region(struct thread *td, struct fkvm_set_user_mem_region_args *uap)
1834 struct guestvm *guest_vm;
1836 vm_offset_t start;
1837 vm_offset_t end;
1839 struct vmspace *user_vm_space;
1840 vm_map_t user_vm_map;
1842 vm_object_t vm_object;
1843 vm_pindex_t vm_object_pindex;
1844 vm_ooffset_t vm_object_offset;
1845 vm_prot_t throwaway_prot;
1846 boolean_t throwaway_wired;
1847 vm_map_entry_t lookup_entry;
1849 int error;
1851 guest_vm = PROC_GET_GUESTVM(td->td_proc);
1852 if (guest_vm == NULL) {
1853 printf("PROC_GET_GUESTVM -> NULL\n");
1854 return ENODEV;
1857 start = uap->guest_pa;
1858 end = uap->guest_pa + uap->size - 1;
1859 printf("start: 0x%" PRIx64 " bytes\n", start);
1860 printf("end: 0x%" PRIx64 " bytes\n", end);
1862 if (uap->vaddr == 0)
1863 return fkvm_guest_check_range(guest_vm, start, end);
1865 user_vm_space = td->td_proc->p_vmspace;
1866 user_vm_map = &user_vm_space->vm_map;
1867 printf("user vm space: %p\n", user_vm_space);
1868 printf("user vm map: %p\n", user_vm_map);
1870 error = vm_map_lookup(&user_vm_map, /* IN/OUT */
1871 uap->vaddr,
1872 VM_PROT_READ|VM_PROT_WRITE,
1873 &lookup_entry, /* OUT */
1874 &vm_object, /* OUT */
1875 &vm_object_pindex, /* OUT */
1876 &throwaway_prot, /* OUT */
1877 &throwaway_wired); /* OUT */
1878 if (error != KERN_SUCCESS) {
1879 printf("vm_map_lookup failed: %d\n", error);
1880 return EFAULT;
1883 /* TODO: Trust the user that the full region is valid.
1884 * This is very bad. See the note in fkvm_guest_check_range
1885 * on nesting vm lookups. */
1886 #if 0
1887 if (!fkvm_mem_has_entry(lookup_entry, user_vm_map, uap->vaddr + uap->size)) {
1888 printf("end of range not contained in same vm map entry as start\n");
1889 return EFAULT;
1891 #endif
1893 printf("vm object: %p\n", vm_object);
1894 printf(" size: %d pages\n", (int) vm_object->size);
1896 vm_object_offset = IDX_TO_OFF(vm_object_pindex);
1897 printf("vm_ooffset: 0x%" PRIx64 "\n", vm_object_offset);
1899 vm_object_reference(vm_object); // TODO: this might be a mem leak
1901 vm_map_lookup_done(user_vm_map, lookup_entry);
1903 error = vm_map_insert(&guest_vm->sp->vm_map,
1904 vm_object,
1905 vm_object_offset,
1906 start,
1907 end,
1908 VM_PROT_ALL, VM_PROT_ALL,
1910 if (error != KERN_SUCCESS) {
1911 printf("vm_map_insert failed: %d\n", error);
1912 switch (error) {
1913 case KERN_INVALID_ADDRESS:
1914 return EINVAL;
1915 case KERN_NO_SPACE:
1916 return ENOMEM;
1917 default:
1918 return 1;
1922 return 0;
1926 fkvm_unset_user_mem_region(struct thread *td, struct fkvm_unset_user_mem_region_args *uap)
1928 struct guestvm *guest_vm;
1930 if (!fkvm_loaded)
1931 return ENODEV;
1933 guest_vm = PROC_GET_GUESTVM(td->td_proc);
1934 if (guest_vm == NULL) {
1935 printf("PROC_GET_GUESTVM -> NULL\n");
1936 return ENODEV;
1939 vm_offset_t start;
1940 vm_offset_t end;
1942 vm_map_t guest_vm_map;
1944 int error;
1946 start = uap->guest_pa;
1947 end = uap->guest_pa + uap->size - 1;
1948 printf("start: 0x%" PRIx64 " bytes\n", start);
1949 printf("end: 0x%" PRIx64 " bytes\n", end);
1951 guest_vm_map = &guest_vm->sp->vm_map;
1953 error = vm_map_remove(guest_vm_map, start, end);
1954 if (error != KERN_SUCCESS)
1955 return -1;
1957 return 0;
1961 fkvm_create_vm(struct thread *td, struct fkvm_create_vm_args *uap)
1963 struct guestvm *guest_vm;
1965 printf("SYSCALL : fkvm_create_vm\n");
1967 if (!fkvm_loaded)
1968 return ENODEV;
1970 /* Allocate Guest VM */
1971 guest_vm = fkvm_guestvm_alloc();
1973 /* Set up the vm address space */
1974 guest_vm->sp = fkvm_make_vmspace();
1975 if (guest_vm->sp == NULL) {
1976 fkvm_guestvm_free(guest_vm);
1977 return ENOMEM;
1979 guest_vm->nested_cr3 = vtophys(vmspace_pmap(guest_vm->sp)->pm_pml4);
1981 printf("guest:\n");
1982 printf(" vm space: %p\n", guest_vm->sp);
1983 printf(" vm map: %p\n", &guest_vm->sp->vm_map);
1984 printf(" ncr3: 0x%" PRIx64 "\n", guest_vm->nested_cr3);
1986 PROC_SET_GUESTVM(td->td_proc, guest_vm);
1988 printf("fkvm_create_vm done\n");
1989 return 0;
1992 static void
1993 fkvm_destroy_vm(struct guestvm *guest_vm)
1995 /* Destroy the VCPUs */
1996 while (guest_vm->nr_vcpus > 0) {
1997 guest_vm->nr_vcpus--;
1998 fkvm_vcpu_destroy(guest_vm->vcpus[guest_vm->nr_vcpus]);
1999 guest_vm->vcpus[guest_vm->nr_vcpus] = NULL;
2002 /* Destroy the vmspace */
2003 if (guest_vm->sp != NULL)
2004 fkvm_destroy_vmspace(guest_vm->sp);
2006 /* Destroy the Guest VM itself */
2007 fkvm_guestvm_free(guest_vm);
2010 static int
2011 intercept_ioio(struct vcpu *vcpu, struct kvm_run *kvm_run,
2012 uint64_t ioio_info, uint64_t next_rip)
2014 struct vmcb *vmcb = vcpu->vmcb;
2016 kvm_run->u.io.string = (ioio_info & STR_MASK) >> STR_SHIFT;
2018 kvm_run->u.io.port = ioio_info >> PORT_SHIFT;
2019 kvm_run->u.io.in = ioio_info & TYPE_MASK;
2021 kvm_run->u.io.size = (ioio_info & SIZE_MASK) >> SIZE_SHIFT;
2023 /* We need to remove the Interrupt Shadow Flag from the VMCB (see 15.20.5 in AMD_Vol2) */
2024 vmcb->control.intr_shadow = 0;
2026 kvm_run->u.io.rep = (ioio_info & REP_MASK) >> REP_SHIFT;
2027 /* TODO: Research more into Direction Flag checked in KVM; DF bit in RFLAGS */
2029 kvm_run->u.io.next_rip = next_rip;
2031 return 0;
2034 static void
2035 intercept_shutdown(struct vcpu *vcpu)
2037 struct vmcb *vmcb = vcpu->vmcb;
2038 memset(vmcb, 0, PAGE_SIZE);
2039 fkvm_vmcb_init(vmcb);
2043 fkvm_vm_run(struct thread *td, struct fkvm_vm_run_args *uap)
2045 struct vcpu *vcpu;
2046 struct guestvm *guest_vm;
2047 struct vmcb *vmcb;
2048 int error;
2049 int ret = 0;
2050 int num_runs = 0;
2051 struct kvm_run kvm_run;
2053 if (!fkvm_loaded)
2054 return ENODEV;
2056 vcpu = TD_GET_VCPU(td);
2057 if (vcpu == NULL)
2058 return ENODEV;
2060 guest_vm = vcpu->guest_vm;
2061 vmcb = vcpu->vmcb;
2063 error = copyin(uap->run, &kvm_run, sizeof(struct kvm_run));
2064 if (error != 0)
2065 return error;
2067 fkvm_set_cr8(vcpu, kvm_run.cr8);
2069 kvm_run.exit_reason = KVM_EXIT_CONTINUE;
2071 while(kvm_run.exit_reason == KVM_EXIT_CONTINUE) {
2072 fkvm_vcpu_run(vcpu);
2074 switch (vmcb->control.exit_code) {
2076 case VMCB_EXIT_EXCP_BASE ... (VMCB_EXIT_EXCP_BASE + 31): {
2077 int excp_vector;
2079 excp_vector = vmcb->control.exit_code - VMCB_EXIT_EXCP_BASE;
2081 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
2082 excp_vector);
2083 kvm_run.exit_reason = KVM_EXIT_UNKNOWN;
2084 ret = ENOSYS;
2085 break;
2088 case VMCB_EXIT_INTR: {
2089 //printf("VMCB_EXIT_INTR - nothing to do\n");
2090 /* Handled by host OS already */
2091 kvm_run.exit_reason = KVM_EXIT_CONTINUE;
2092 break;
2095 case VMCB_EXIT_NPF: {
2096 /* EXITINFO1 contains fault error code */
2097 /* EXITINFO2 contains the guest physical address causing the fault. */
2099 u_int64_t fault_code;
2100 u_int64_t fault_gpa;
2102 vm_prot_t fault_type;
2103 int fault_flags;
2104 int rc;
2106 fault_code = vmcb->control.exit_info_1;
2107 fault_gpa = vmcb->control.exit_info_2;
2108 kvm_run.exit_reason = KVM_EXIT_CONTINUE;
2110 #if 0
2111 printf("VMCB_EXIT_NPF:\n");
2112 printf("gpa=0x%" PRIx64 "\n", fault_gpa);
2113 printf("fault code=0x%" PRIx64 " [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
2114 fault_code,
2115 (fault_code & PGEX_P) != 0,
2116 (fault_code & PGEX_W) != 0,
2117 (fault_code & PGEX_U) != 0,
2118 (fault_code & PGEX_I) != 0);
2119 #endif
2120 if (fault_code & PGEX_W)
2121 fault_type = VM_PROT_WRITE;
2122 else if (fault_code & PGEX_I)
2123 fault_type = VM_PROT_EXECUTE;
2124 else
2125 fault_type = VM_PROT_READ;
2127 fault_flags = 0; /* TODO: is that right? */
2128 rc = vm_fault(&guest_vm->sp->vm_map, (fault_gpa & (~PAGE_MASK)), fault_type, fault_flags);
2129 if (rc != KERN_SUCCESS) {
2130 printf("vm_fault failed: %d\n", rc);
2131 kvm_run.u.mmio.fault_gpa = fault_gpa;
2132 kvm_run.u.mmio.rip = vcpu->regs[VCPU_REGS_RIP];
2133 kvm_run.u.mmio.cs_base = vmcb->save.cs.base;
2134 kvm_run.exit_reason = KVM_EXIT_MMIO;
2137 break;
2139 case VMCB_EXIT_WRITE_CR8:
2140 kvm_run.exit_reason = KVM_EXIT_SET_TPR;
2141 break;
2142 case VMCB_EXIT_NMI:
2143 kvm_run.exit_reason = KVM_EXIT_NMI;
2144 break;
2145 case VMCB_EXIT_HLT:
2146 vcpu->regs[VCPU_REGS_RIP]++; /* skip HLT, opcode F4 */
2147 kvm_run.exit_reason = KVM_EXIT_HLT;
2148 break;
2149 case VMCB_EXIT_SHUTDOWN:
2150 intercept_shutdown(vcpu);
2151 kvm_run.exit_reason = KVM_EXIT_SHUTDOWN;
2152 break;
2153 case VMCB_EXIT_IOIO:
2154 error = intercept_ioio(vcpu, &kvm_run,
2155 vmcb->control.exit_info_1,
2156 vmcb->control.exit_info_2);
2157 if (error)
2158 kvm_run.exit_reason = KVM_EXIT_UNKNOWN;
2159 else
2160 kvm_run.exit_reason = KVM_EXIT_IO;
2161 break;
2162 case VMCB_EXIT_MSR: {
2163 int wrmsr;
2164 uint32_t msr;
2165 union {
2166 struct {
2167 uint32_t low;
2168 uint32_t high;
2169 } split;
2170 uint64_t full;
2171 } value;
2173 wrmsr = vmcb->control.exit_info_1;
2174 msr = (uint32_t) vcpu->regs[VCPU_REGS_RCX];
2176 printf("VMCB_EXIT_MSR:\n"
2177 " %s msr 0x%" PRIx64 "\n",
2178 wrmsr ? "write to" : "read from",
2179 vcpu->regs[VCPU_REGS_RCX]);
2181 if (!wrmsr) { /* rdmsr */
2182 error = fkvm_get_reg_msr(vcpu, msr, &value.full);
2183 if (error != 0) {
2184 ret = ENOSYS;
2185 kvm_run.exit_reason = KVM_EXIT_UNKNOWN;
2186 break;
2189 vcpu->regs[VCPU_REGS_RDX] = (uint64_t) value.split.high;
2190 vcpu->regs[VCPU_REGS_RAX] = (uint64_t) value.split.low;
2192 else { /* wrmsr */
2193 value.split.high = (uint32_t) vcpu->regs[VCPU_REGS_RDX];
2194 value.split.low = (uint32_t) vcpu->regs[VCPU_REGS_RAX];
2196 error = fkvm_set_reg_msr(vcpu, msr, value.full);
2197 if (error != 0) {
2198 ret = ENOSYS;
2199 kvm_run.exit_reason = KVM_EXIT_UNKNOWN;
2200 break;
2205 vcpu->regs[VCPU_REGS_RIP] += 2;
2206 break;
2208 case VMCB_EXIT_CPUID: {
2209 kvm_run.u.cpuid.fn = (uint32_t) vcpu->regs[VCPU_REGS_RAX];
2210 kvm_run.exit_reason = KVM_EXIT_CPUID;
2211 break;
2213 case VMCB_EXIT_WBINVD: {
2214 /* TODO: stop ignoring this intercept when we have more than 1-cpu guests */
2215 vcpu->regs[VCPU_REGS_RIP] += 2;
2216 break;
2218 case VMCB_EXIT_READ_CR0:
2219 case VMCB_EXIT_READ_CR3:
2220 case VMCB_EXIT_READ_CR4:
2221 case VMCB_EXIT_READ_CR8:
2222 case VMCB_EXIT_WRITE_CR0:
2223 case VMCB_EXIT_WRITE_CR3:
2224 case VMCB_EXIT_WRITE_CR4:
2225 case VMCB_EXIT_READ_DR0:
2226 case VMCB_EXIT_READ_DR1:
2227 case VMCB_EXIT_READ_DR2:
2228 case VMCB_EXIT_READ_DR3:
2229 case VMCB_EXIT_WRITE_DR0:
2230 case VMCB_EXIT_WRITE_DR1:
2231 case VMCB_EXIT_WRITE_DR2:
2232 case VMCB_EXIT_WRITE_DR3:
2233 case VMCB_EXIT_WRITE_DR5:
2234 case VMCB_EXIT_WRITE_DR7:
2235 case VMCB_EXIT_SMI:
2236 case VMCB_EXIT_INIT:
2237 case VMCB_EXIT_VINTR:
2238 case VMCB_EXIT_CR0_SEL_WRITE:
2239 case VMCB_EXIT_INVD:
2240 case VMCB_EXIT_INVLPG:
2241 case VMCB_EXIT_INVLPGA:
2242 case VMCB_EXIT_TASK_SWITCH:
2243 case VMCB_EXIT_VMRUN:
2244 case VMCB_EXIT_VMMCALL:
2245 case VMCB_EXIT_VMLOAD:
2246 case VMCB_EXIT_VMSAVE:
2247 case VMCB_EXIT_STGI:
2248 case VMCB_EXIT_CLGI:
2249 case VMCB_EXIT_SKINIT:
2250 case VMCB_EXIT_MONITOR:
2251 case VMCB_EXIT_MWAIT_UNCOND:
2252 default:
2253 printf("Unhandled vmexit:\n"
2254 " code: 0x%" PRIx64 "\n"
2255 " info1: 0x%" PRIx64 "\n"
2256 " info2: 0x%" PRIx64 "\n",
2257 vmcb->control.exit_code,
2258 vmcb->control.exit_info_1,
2259 vmcb->control.exit_info_2);
2260 print_vmcb(vmcb);
2261 ret = ENOSYS;
2262 kvm_run.exit_reason = KVM_EXIT_UNKNOWN;
2265 num_runs++;
2266 if (num_runs == 20) //TODO: make this a #define
2267 break;
2270 // printf("\n\n");
2272 /* we're going up to userspace - set the out fields of kvm_run: */
2274 #define IF_MASK 0x00000200
2275 kvm_run.if_flag = !!(vcpu->vmcb->save.rflags & IF_MASK);
2277 /* TODO: kvm adds a check to see if in-kernel interrupt queues are empty */
2278 kvm_run.ready_for_interrupt_injection = kvm_run.if_flag &&
2279 !vcpu->vmcb->control.intr_shadow;
2281 /* TODO kvm_run.ready_for_nmi_injection = ...; */
2283 kvm_run.cr8 = fkvm_get_cr8(vcpu);
2286 /* TODO: check copyout ret val */
2287 copyout(&kvm_run, uap->run, sizeof(struct kvm_run));
2288 // printf("sizeof(struct kvm_run) = %" PRIu64 "\n", sizeof(struct kvm_run));
2290 return ret;
2294 fkvm_create_vcpu(struct thread *td, struct fkvm_create_vcpu_args *uap)
2296 struct guestvm *guest_vm;
2297 struct vcpu *vcpu;
2299 if (!fkvm_loaded)
2300 return ENODEV;
2302 guest_vm = PROC_GET_GUESTVM(td->td_proc);
2303 if (guest_vm == NULL) {
2304 printf("PROC_GET_GUESTVM -> NULL\n");
2305 return ENODEV;
2308 /* Allocate VCPU */
2309 printf("fkvm_create_vcpu: td = %p\n", td);
2310 vcpu = fkvm_vcpu_create(guest_vm);
2311 fkvm_guestvm_add_vcpu(guest_vm, vcpu);
2313 TD_SET_VCPU(td, vcpu);
2314 printf("fkvm_create_vcpu: vcpu = %p\n", vcpu);
2315 return 0;
2318 static int
2319 fkvm_check_cpu_extension(void)
2321 u_int cpu_exthigh;
2322 u_int regs[4];
2323 u_int64_t vmcr;
2325 printf("fkvm_check_cpu_extension\n");
2327 /* Assumption: the architecture supports the cpuid instruction */
2329 /* Check if CPUID extended function 8000_0001h is supported. */
2330 do_cpuid(0x80000000, regs);
2331 cpu_exthigh = regs[0];
2333 printf("cpu_exthigh = %u\n", cpu_exthigh);
2335 if(cpu_exthigh >= 0x80000001) {
2336 /* Execute CPUID extended function 8000_0001h */
2337 do_cpuid(0x80000001, regs);
2338 printf("EAX = %u\n", regs[0]);
2340 if((regs[0] & 0x2) == 0) { /* Check SVM bit */
2341 printf("SVM not available\n");
2342 goto fail; /* SVM not available */
2345 vmcr = rdmsr(0xc0010114); /* Read VM_CR MSR */
2346 if((vmcr & 0x8) == 0) { /* Check SVMDIS bit */
2347 printf("vmcr = %" PRIx64 "\n", vmcr);
2348 printf("SVM allowed\n");
2349 return KERN_SUCCESS; /* SVM allowed */
2352 /* Execute CPUID extended function 8000_000ah */
2353 do_cpuid(0x8000000a, regs);
2354 if((regs[3] & 0x2) == 0) { /* Check SVM_LOCK bit */
2355 /* SVM disabled at bios; not unlockable.
2356 * User must change a BIOS setting to enable SVM.
2358 printf("EDX = %u\n", regs[3]);
2359 printf("SVM disabled at bios\n");
2360 goto fail;
2361 } else {
2362 /* TODO:
2363 * SVM may be unlockable;
2364 * consult the BIOS or TPM to obtain the key.
2366 printf("EDX = %u\n", regs[3]);
2367 printf("SVM maybe unlockable\n");
2368 goto fail;
2371 fail:
2372 return KERN_FAILURE;
2375 static void
2376 fkvm_proc_exit(void *arg, struct proc *p)
2378 struct guestvm *guest_vm;
2380 guest_vm = PROC_GET_GUESTVM(p);
2381 if (guest_vm == NULL)
2382 return;
2384 fkvm_destroy_vm(guest_vm);
2385 PROC_SET_GUESTVM(p, NULL);
2388 static void
2389 fkvm_load(void *unused)
2391 u_int64_t efer;
2392 int error;
2394 printf("fkvm_load\n");
2395 printf("sizeof(struct vmcb) = %" PRIx64 "\n", sizeof(struct vmcb));
2397 hsave_area = NULL;
2398 iopm = NULL;
2399 msrpm = NULL;
2401 /* check if SVM is supported */
2402 error = fkvm_check_cpu_extension();
2403 if(error != KERN_SUCCESS) {
2404 printf("ERROR: SVM extension not available\n");
2405 return;
2408 exit_tag = EVENTHANDLER_REGISTER(process_exit, fkvm_proc_exit, NULL,
2409 EVENTHANDLER_PRI_ANY);
2411 /* allocate structures */
2412 hsave_area = fkvm_hsave_area_alloc();
2413 iopm = fkvm_iopm_alloc();
2414 msrpm = fkvm_msrpm_alloc();
2416 /* Initialize structures */
2417 fkvm_hsave_area_init(hsave_area);
2418 fkvm_iopm_init(iopm);
2419 fkvm_msrpm_init(msrpm);
2421 /* Enable SVM in EFER */
2422 efer = rdmsr(MSR_EFER);
2423 printf("EFER = %" PRIx64 "\n", efer);
2424 wrmsr(MSR_EFER, efer | EFER_SVME);
2425 efer = rdmsr(MSR_EFER);
2426 printf("new EFER = %" PRIx64 "\n", efer);
2428 /* Write Host save address in MSR_VM_HSAVE_PA */
2429 wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave_area));
2431 fkvm_loaded = 1;
2433 SYSINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_load, NULL);
2435 static void
2436 fkvm_unload(void *unused)
2438 printf("fkvm_unload\n");
2440 if (!fkvm_loaded) {
2441 printf("fkvm_unload: fkvm not loaded");
2442 return;
2445 EVENTHANDLER_DEREGISTER(process_exit, exit_tag);
2447 if (msrpm != NULL) {
2448 fkvm_msrpm_free(iopm);
2449 msrpm = NULL;
2451 if (iopm != NULL) {
2452 fkvm_iopm_free(iopm);
2453 iopm = NULL;
2455 if (hsave_area != NULL) {
2456 fkvm_hsave_area_free(hsave_area);
2457 hsave_area = NULL;
2460 SYSUNINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_unload, NULL);