2 * Copyright (c) 2008 Brent Stephens <brents@rice.edu>
3 * Copyright (c) 2008 Diego Ongaro <diego.ongaro@rice.edu>
4 * Copyright (c) 2008 Kaushik Kumar Ram <kaushik@rice.edu>
5 * Copyright (c) 2008 Oleg Pesok <olegpesok@gmail.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/sysproto.h>
40 #include <sys/eventhandler.h>
43 #include <vm/vm_extern.h>
44 #include <vm/vm_map.h>
45 #include <vm/vm_object.h>
46 #include <vm/vm_param.h>
47 #include <machine/_inttypes.h>
48 #include <machine/specialreg.h>
49 #include <machine/segments.h>
50 #include <machine/vmcb.h>
53 /* Definitions for Port IO */
61 #define PORT_MASK 0xFFFF0000
62 #define ADDR_MASK (7 << ADDR_SHIFT)
63 #define SIZE_MASK (7 << SIZE_SHIFT)
64 #define REP_MASK (1 << REP_SHIFT)
65 #define STR_MASK (1 << STR_SHIFT)
66 #define TYPE_MASK (1 << TYPE_SHIFT)
67 /* End Definitions for Port IO */
69 #define PMIO_PAGE_OFFSET 1
71 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
72 #define MSRPM_SIZE (8*1024)
76 static int fkvm_loaded
= 0;
78 static void *iopm
= NULL
; /* Should I allocate a vm_object_t instead? */
79 static void *msrpm
= NULL
; /* Should I allocate a vm_object_t instead? */
81 static void *hsave_area
= NULL
;
83 static eventhandler_tag exit_tag
;
111 unsigned long vmcb_pa
;
113 unsigned long regs
[NR_VCPU_REGS
];
114 u_int64_t host_fs_base
;
115 u_int64_t host_gs_base
;
120 uint64_t default_type
;
121 uint64_t mtrr64k
[MTRR_N64K
/8];
122 uint64_t mtrr16k
[MTRR_N16K
/8];
123 uint64_t mtrr4k
[MTRR_N4K
/8];
124 #define FKVM_MTRR_NVAR 8
125 uint64_t mtrrvar
[FKVM_MTRR_NVAR
*2];
128 struct guestvm
*guest_vm
;
132 struct vcpu
*vcpus
[MAX_VCPUS
];
136 u_int64_t nested_cr3
;
140 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
141 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
142 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
143 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
144 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
145 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
147 static inline struct vcpu
*
148 TD_GET_VCPU(struct thread
*td
)
153 printf("TD_GET_VCPU -> NULL\n");
158 TD_SET_VCPU(struct thread
*td
, struct vcpu
*vcpu
)
163 static inline struct guestvm
*
164 PROC_GET_GUESTVM(struct proc
*proc
)
166 struct guestvm
*guestvm
;
167 guestvm
= proc
->p_guestvm
;
172 PROC_SET_GUESTVM(struct proc
*proc
, struct guestvm
*guestvm
)
174 proc
->p_guestvm
= guestvm
; \
178 print_vmcb_seg(struct vmcb_seg
* vmcb_seg
, const char* name
)
180 printf("%s Selector\n", name
);
181 printf("Selector : %" PRIx16
"\n", vmcb_seg
->selector
);
182 printf("Attributes : %" PRIx16
"\n", vmcb_seg
->attrib
);
183 printf("Limit : %" PRIx32
"\n", vmcb_seg
->limit
);
184 printf("Base Address : %" PRIx64
"\n", vmcb_seg
->base
);
189 print_vmcb(struct vmcb
*vmcb
)
191 printf("VMCB Control Area\n");
192 printf("Intercept CR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_cr_reads
);
193 printf("Intercept CR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_cr_writes
);
194 printf("Intercept DR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_dr_reads
);
195 printf("Intercept DR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_dr_writes
);
196 printf("Intercept Exceptions : %" PRIx32
"\n", vmcb
->control
.intercept_exceptions
);
197 printf("Intercepts : %" PRIx64
"\n", vmcb
->control
.intercepts
);
198 printf("Reserved 1: \n");
199 for(int i
=0; i
< 44; i
++) {
200 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
203 printf("IOPM Base PA : %" PRIx64
"\n", vmcb
->control
.iopm_base_pa
);
204 printf("MSRPM Base PA : %" PRIx64
"\n", vmcb
->control
.msrpm_base_pa
);
205 printf("TSC Offset : %" PRIx64
"\n", vmcb
->control
.tsc_offset
);
206 printf("Guest ASID : %" PRIx32
"\n", vmcb
->control
.guest_asid
);
207 printf("TLB Control : %" PRIx8
"\n", vmcb
->control
.tlb_control
);
208 printf("Reserved 2 : \n");
209 for(int i
=0; i
< 3; i
++) {
210 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
213 printf("Virtual TPR : %" PRIx8
"\n", vmcb
->control
.v_tpr
);
214 printf("Virtual IRQ : %" PRIx8
"\n", vmcb
->control
.v_irq
);
215 printf("Virtual Interrupt : %" PRIx8
"\n", vmcb
->control
.v_intr
);
216 printf("Virtual Interrupt Masking: %" PRIx8
"\n", vmcb
->control
.v_intr_masking
);
217 printf("Virtual Interrupt Vector : %" PRIx8
"\n", vmcb
->control
.v_intr_vector
);
218 printf("Reserved 6 : \n");
219 for(int i
=0; i
< 3; i
++) {
220 printf("%" PRIx8
"", vmcb
->control
.reserved_6
[i
]); /* Should be Zero */
223 printf("Interrupt Shadow : %" PRIx8
"\n", vmcb
->control
.intr_shadow
);
224 printf("Reserved 7 : \n");
225 for(int i
=0; i
< 7; i
++) {
226 printf("%" PRIx8
"", vmcb
->control
.reserved_7
[i
]); /* Should be Zero */
229 printf("Exit Code : %" PRIx64
"\n", vmcb
->control
.exit_code
);
230 printf("Exit Info 1 : %" PRIx64
"\n", vmcb
->control
.exit_info_1
);
231 printf("Exit Info 2 : %" PRIx64
"\n", vmcb
->control
.exit_info_2
);
232 printf("Exit Interrupt Info : %" PRIx32
"\n", vmcb
->control
.exit_int_info
);
233 printf("Exit Interrupt Info Err Code: %" PRIx32
"\n", vmcb
->control
.exit_int_info_err_code
);
234 printf("Nested Control : %" PRIx64
"\n", vmcb
->control
.nested_ctl
);
235 printf("Reserved 8 : \n");
236 for(int i
=0; i
< 16; i
++) {
237 printf("%" PRIx8
"", vmcb
->control
.reserved_8
[i
]); /* Should be Zero */
240 printf("Event Injection : %" PRIx64
"\n", vmcb
->control
.event_inj
);
241 printf("Nested CR3 : %" PRIx64
"\n", vmcb
->control
.nested_cr3
);
242 printf("LBR Virtualization Enable: %" PRIx64
"\n", vmcb
->control
.lbr_virt_enable
);
243 printf("Reserved 9 : \n");
244 for(int i
=0; i
< 832; i
++) {
245 printf("%" PRIx8
"", vmcb
->control
.reserved_9
[i
]); /* Should be Zero */
251 printf("VMCB Save Area\n");
252 print_vmcb_seg(&(vmcb
->save
.es
), "ES");
253 print_vmcb_seg(&(vmcb
->save
.cs
), "CS");
254 print_vmcb_seg(&(vmcb
->save
.ss
), "SS");
255 print_vmcb_seg(&(vmcb
->save
.ds
), "DS");
256 print_vmcb_seg(&(vmcb
->save
.fs
), "FS");
257 print_vmcb_seg(&(vmcb
->save
.gs
), "GS");
258 print_vmcb_seg(&(vmcb
->save
.gdtr
), "GDTR");
259 print_vmcb_seg(&(vmcb
->save
.ldtr
), "LDTR");
260 print_vmcb_seg(&(vmcb
->save
.idtr
), "IDTR");
261 print_vmcb_seg(&(vmcb
->save
.tr
), "TR");
262 printf("Reserved 1 : \n");
263 for(int i
=0; i
< 43; i
++) {
264 printf("%" PRIx8
"", vmcb
->save
.reserved_1
[i
]); /* Should be Zero */
267 printf("Current Processor Level : %" PRIx8
"\n", vmcb
->save
.cpl
);
268 printf("Reserved 2 : \n");
269 for(int i
=0; i
< 4; i
++) {
270 printf("%" PRIx8
"", vmcb
->save
.reserved_2
[i
]); /* Should be Zero */
273 printf("EFER : %" PRIx64
"\n", vmcb
->save
.efer
);
274 printf("Reserved 3 : \n");
275 for(int i
=0; i
< 112; i
++) {
276 printf("%" PRIx8
"", vmcb
->save
.reserved_3
[i
]); /* Should be Zero */
279 printf("Control Register 4 : %" PRIx64
"\n", vmcb
->save
.cr4
);
280 printf("Control Register 3 : %" PRIx64
"\n", vmcb
->save
.cr3
);
281 printf("Control Register 0 : %" PRIx64
"\n", vmcb
->save
.cr0
);
282 printf("Debug Register 7 : %" PRIx64
"\n", vmcb
->save
.dr7
);
283 printf("Debug Register 6 : %" PRIx64
"\n", vmcb
->save
.dr6
);
284 printf("RFlags : %" PRIx64
"\n", vmcb
->save
.rflags
);
285 printf("RIP : %" PRIx64
"\n", vmcb
->save
.rip
);
286 printf("Reserved 4 : \n");
287 for(int i
=0; i
< 88; i
++) {
288 printf("%" PRIx8
"", vmcb
->save
.reserved_4
[i
]); /* Should be Zero */
291 printf("RSP : %" PRIx64
"\n", vmcb
->save
.rsp
);
292 printf("Reserved 5 : \n");
293 for(int i
=0; i
< 24; i
++) {
294 printf("%" PRIx8
"", vmcb
->save
.reserved_5
[i
]); /* Should be Zero */
297 printf("RAX : %" PRIx64
"\n", vmcb
->save
.rax
);
298 printf("STAR : %" PRIx64
"\n", vmcb
->save
.star
);
299 printf("LSTAR : %" PRIx64
"\n", vmcb
->save
.lstar
);
300 printf("CSTAR : %" PRIx64
"\n", vmcb
->save
.cstar
);
301 printf("SFMASK : %" PRIx64
"\n", vmcb
->save
.sfmask
);
302 printf("Kernel GS Base : %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
303 printf("SYSENTER CS : %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
304 printf("SYSENTER ESP : %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
305 printf("SYSENTER EIP : %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
306 printf("Control Register 2 : %" PRIx64
"\n", vmcb
->save
.cr2
);
307 printf("Reserved 6 : \n");
308 for(int i
=0; i
< 32; i
++) {
309 printf("%" PRIx8
"", vmcb
->save
.reserved_6
[i
]); /* Should be Zero */
312 printf("Global PAT : %" PRIx64
"\n", vmcb
->save
.g_pat
);
313 printf("Debug Control : %" PRIx64
"\n", vmcb
->save
.dbg_ctl
);
314 printf("BR From : %" PRIx64
"\n", vmcb
->save
.br_from
);
315 printf("BR To : %" PRIx64
"\n", vmcb
->save
.br_to
);
316 printf("Last Exception From : %" PRIx64
"\n", vmcb
->save
.last_excp_from
);
317 printf("Last Exception To : %" PRIx64
"\n", vmcb
->save
.last_excp_to
);
324 print_tss_desc(struct system_segment_descriptor
*tss_desc
)
326 printf("TSS desc @ %p:\n", tss_desc
);
327 printf("sd_lolimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lolimit
);
328 printf("sd_lobase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lobase
);
329 printf("sd_type: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_type
);
330 printf("sd_dpl: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_dpl
);
331 printf("sd_p: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_p
);
332 printf("sd_hilimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hilimit
);
333 printf("sd_xx0: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx0
);
334 printf("sd_gran: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_gran
);
335 printf("sd_hibase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hibase
);
336 printf("sd_xx1: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx1
);
337 printf("sd_mbz: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_mbz
);
338 printf("sd_xx2: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx2
);
343 print_tss(struct system_segment_descriptor
*tss_desc
)
349 base
= (u_int32_t
*) ((((u_int64_t
) tss_desc
->sd_hibase
) << 24) | ((u_int64_t
) tss_desc
->sd_lobase
));
350 limit
= ((tss_desc
->sd_hilimit
<< 16) | tss_desc
->sd_lolimit
) / 4;
352 printf("TSS: @ %p\n", base
);
353 for (i
= 0; i
<= limit
; i
++)
354 printf("%x: 0x%" PRIx32
"\n", i
, base
[i
]);
360 print_vmcb_save_area(struct vmcb
*vmcb
)
362 printf("VMCB save area:\n");
363 printf(" cs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
364 vmcb
->save
.cs
.selector
,
365 vmcb
->save
.cs
.attrib
,
368 printf(" fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
369 vmcb
->save
.fs
.selector
,
370 vmcb
->save
.fs
.attrib
,
373 printf(" gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
374 vmcb
->save
.gs
.selector
,
375 vmcb
->save
.gs
.attrib
,
378 printf(" tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
379 vmcb
->save
.tr
.selector
,
380 vmcb
->save
.tr
.attrib
,
383 printf(" ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
384 vmcb
->save
.ldtr
.selector
,
385 vmcb
->save
.ldtr
.attrib
,
386 vmcb
->save
.ldtr
.limit
,
387 vmcb
->save
.ldtr
.base
);
388 printf(" rip: %" PRIx64
"\n", vmcb
->save
.rip
);
389 printf(" kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
390 printf(" star: %" PRIx64
"\n", vmcb
->save
.star
);
391 printf(" lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
392 printf(" cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
393 printf(" sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
394 printf(" sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
395 printf(" sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
396 printf(" sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
401 vmrun_assert(struct vmcb
*vmcb
)
403 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
409 // The following are illegal:
412 A((vmcb
->save
.efer
& 0x0000000000001000) == 0);
414 // CR0.CD is zero and CR0.NW is set
415 A( ((vmcb
->save
.cr0
& 0x0000000040000000) == 0) &&
416 ((vmcb
->save
.cr0
& 0x0000000020000000) != 0));
418 // CR0[63:32] are not zero.
419 A((vmcb
->save
.cr0
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
421 // Any MBZ bit of CR3 is set.
422 A((vmcb
->save
.cr3
& 0xFFF0000000000000) != 0);
424 // CR4[63:11] are not zero.
425 A((vmcb
->save
.cr4
& 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
427 // DR6[63:32] are not zero.
428 A((vmcb
->save
.dr6
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
430 // DR7[63:32] are not zero.
431 A((vmcb
->save
.dr7
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
433 // EFER[63:15] are not zero.
434 A((vmcb
->save
.efer
& 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
436 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
437 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
439 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
440 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
441 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
442 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0));
444 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
445 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
446 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
447 ((vmcb
->save
.cr0
& 0x0000000000000001) == 0));
449 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
450 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
451 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
452 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
453 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0) &&
454 ((vmcb
->save
.cs
.attrib
& 0x0200) != 0) &&
455 ((vmcb
->save
.cs
.attrib
& 0x0400) != 0));
457 // The VMRUN intercept bit is clear.
458 A((vmcb
->control
.intercepts
& 0x0000000100000000) == 0);
460 // The MSR or IOIO intercept tables extend to a physical address that is
461 // greater than or equal to the maximum supported physical address.
463 // Illegal event injection (see Section 15.19 on page 391).
465 // ASID is equal to zero.
466 A(vmcb
->control
.guest_asid
== 0);
468 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
469 // combination that is otherwise illegal (see Section 15.18).
471 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
472 // sign-extend to 63 bits) all base addresses in the segment registers
473 // that have been loaded.
481 fkvm_vcpu_run(struct vcpu
*vcpu
)
490 u_short ldt_selector
;
492 unsigned long host_cr2
;
493 unsigned long host_dr6
;
494 unsigned long host_dr7
;
496 struct system_segment_descriptor
*tss_desc
;
501 //printf("begin fkvm_vcpu_run\n");
505 if (vmrun_assert(vmcb
))
508 tss_desc
= (struct system_segment_descriptor
*) (&gdt
[GPROC0_SEL
]);
509 sel
= GSEL(GPROC0_SEL
, SEL_KPL
);
511 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
512 // print_tss_desc(tss_desc);
513 // print_tss(tss_desc);
515 // print_vmcb_save_area(vmcb);
516 // printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu->regs[VCPU_REGS_RIP]);
519 vmcb
->save
.rax
= vcpu
->regs
[VCPU_REGS_RAX
];
520 vmcb
->save
.rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
521 vmcb
->save
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
523 /* meh: kvm has pre_svm_run(svm); */
525 vcpu
->host_fs_base
= rdmsr(MSR_FSBASE
);
526 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
527 // printf("host_fs_base: 0x%" PRIx64 "\n", vcpu->host_fs_base);
528 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
532 ldt_selector
= rldt();
533 // printf("fs selector: %hx\n", fs_selector);
534 // printf("gs selector: %hx\n", gs_selector);
535 // printf("ldt selector: %hx\n", ldt_selector);
542 vmcb
->save
.cr2
= vcpu
->cr2
;
544 // TODO: something with apic_base?
546 /* meh: dr7? db_regs? */
548 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
549 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
550 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
551 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
553 star
= rdmsr(MSR_STAR
);
554 lstar
= rdmsr(MSR_LSTAR
);
555 cstar
= rdmsr(MSR_CSTAR
);
556 sfmask
= rdmsr(MSR_SF_MASK
);
558 // printf("CLGI...\n");
560 __asm
__volatile (SVM_CLGI
);
567 "mov %c[rbx](%[svm]), %%rbx \n\t"
568 "mov %c[rcx](%[svm]), %%rcx \n\t"
569 "mov %c[rdx](%[svm]), %%rdx \n\t"
570 "mov %c[rsi](%[svm]), %%rsi \n\t"
571 "mov %c[rdi](%[svm]), %%rdi \n\t"
572 "mov %c[rbp](%[svm]), %%rbp \n\t"
573 "mov %c[r8](%[svm]), %%r8 \n\t"
574 "mov %c[r9](%[svm]), %%r9 \n\t"
575 "mov %c[r10](%[svm]), %%r10 \n\t"
576 "mov %c[r11](%[svm]), %%r11 \n\t"
577 "mov %c[r12](%[svm]), %%r12 \n\t"
578 "mov %c[r13](%[svm]), %%r13 \n\t"
579 "mov %c[r14](%[svm]), %%r14 \n\t"
580 "mov %c[r15](%[svm]), %%r15 \n\t"
582 /* Enter guest mode */
584 "mov %c[vmcb](%[svm]), %%rax \n\t"
590 /* Save guest registers, load host registers */
591 "mov %%rbx, %c[rbx](%[svm]) \n\t"
592 "mov %%rcx, %c[rcx](%[svm]) \n\t"
593 "mov %%rdx, %c[rdx](%[svm]) \n\t"
594 "mov %%rsi, %c[rsi](%[svm]) \n\t"
595 "mov %%rdi, %c[rdi](%[svm]) \n\t"
596 "mov %%rbp, %c[rbp](%[svm]) \n\t"
597 "mov %%r8, %c[r8](%[svm]) \n\t"
598 "mov %%r9, %c[r9](%[svm]) \n\t"
599 "mov %%r10, %c[r10](%[svm]) \n\t"
600 "mov %%r11, %c[r11](%[svm]) \n\t"
601 "mov %%r12, %c[r12](%[svm]) \n\t"
602 "mov %%r13, %c[r13](%[svm]) \n\t"
603 "mov %%r14, %c[r14](%[svm]) \n\t"
604 "mov %%r15, %c[r15](%[svm]) \n\t"
608 [vmcb
]"i"(offsetof(struct vcpu
, vmcb_pa
)),
609 [rbx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBX
])),
610 [rcx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RCX
])),
611 [rdx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDX
])),
612 [rsi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RSI
])),
613 [rdi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDI
])),
614 [rbp
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBP
])),
615 [r8
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R8
])),
616 [r9
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R9
])),
617 [r10
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R10
])),
618 [r11
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R11
])),
619 [r12
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R12
])),
620 [r13
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R13
])),
621 [r14
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R14
])),
622 [r15
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R15
]))
624 "rbx", "rcx", "rdx", "rsi", "rdi",
625 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
629 /* meh: dr7? db_regs? */
631 vcpu
->cr2
= vmcb
->save
.cr2
;
633 vcpu
->regs
[VCPU_REGS_RAX
] = vmcb
->save
.rax
;
634 vcpu
->regs
[VCPU_REGS_RSP
] = vmcb
->save
.rsp
;
635 vcpu
->regs
[VCPU_REGS_RIP
] = vmcb
->save
.rip
;
642 load_fs(fs_selector
);
643 load_gs(gs_selector
);
646 wrmsr(MSR_FSBASE
, vcpu
->host_fs_base
);
647 wrmsr(MSR_GSBASE
, vcpu
->host_gs_base
);
649 tss_desc
->sd_type
= SDT_SYSTSS
;
652 wrmsr(MSR_STAR
, star
);
653 wrmsr(MSR_LSTAR
, lstar
);
654 wrmsr(MSR_CSTAR
, cstar
);
655 wrmsr(MSR_SF_MASK
, sfmask
);
659 __asm
__volatile (SVM_STGI
);
663 // print_tss_desc(tss_desc);
664 // print_tss(tss_desc);
666 // print_vmcb_save_area(vmcb);
674 _fkvm_init_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
677 seg
->attrib
= VMCB_SELECTOR_P_MASK
| attrib
;
683 fkvm_init_seg(struct vmcb_seg
*seg
)
685 _fkvm_init_seg(seg
, VMCB_SELECTOR_S_MASK
| VMCB_SELECTOR_WRITE_MASK
);
689 fkvm_init_sys_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
691 _fkvm_init_seg(seg
, attrib
);
695 fkvm_iopm_alloc(void)
697 return contigmalloc(IOPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
701 fkvm_iopm_init(void *iopm
)
703 memset(iopm
, 0xff, IOPM_SIZE
); /* TODO: we may want to allow access to PC debug port */
707 fkvm_iopm_free(void *iopm
)
709 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
713 fkvm_msrpm_alloc(void)
715 return contigmalloc(MSRPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
719 fkvm_msrpm_init(void *msrpm
)
721 memset(msrpm
, 0xff, MSRPM_SIZE
); /* TODO: we may want to allow some MSR accesses */
725 fkvm_msrpm_free(void *msrpm
)
727 contigfree(msrpm
, MSRPM_SIZE
, M_DEVBUF
);
731 fkvm_hsave_area_alloc(void)
733 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
737 fkvm_hsave_area_init(void *hsave_area
)
742 fkvm_hsave_area_free(void *hsave_area
)
744 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
747 static struct vmspace
*
748 fkvm_make_vmspace(void)
752 sp
= vmspace_alloc(0, 0xffffffffffffffff);
754 printf("vmspace_alloc failed\n");
762 fkvm_destroy_vmspace(struct vmspace
* sp
)
768 fkvm_vmcb_alloc(void)
770 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL,
775 fkvm_vmcb_init(struct vmcb
*vmcb
)
777 struct vmcb_control_area
*control
= &vmcb
->control
;
778 struct vmcb_save_area
*save
= &vmcb
->save
;
780 control
->intercept_cr_reads
= INTERCEPT_CR4_MASK
;
782 control
->intercept_cr_writes
= INTERCEPT_CR4_MASK
|
785 control
->intercept_dr_reads
= INTERCEPT_DR0_MASK
|
790 control
->intercept_dr_writes
= INTERCEPT_DR0_MASK
|
797 control
->intercept_exceptions
= (1 << IDT_UD
) | // Invalid Opcode
798 (1 << IDT_MC
); // Machine Check
800 control
->intercepts
= INTERCEPT_INTR
|
807 INTERCEPT_IOIO_PROT
|
819 INTERCEPT_MWAIT_UNCOND
;
821 control
->iopm_base_pa
= vtophys(iopm
);
822 control
->msrpm_base_pa
= vtophys(msrpm
);
823 control
->tsc_offset
= 0;
825 /* TODO: remove this once we assign asid's to distinct VM's */
826 control
->guest_asid
= 1;
827 control
->tlb_control
= VMCB_TLB_CONTROL_FLUSH_ALL
;
829 /* let v_tpr default to 0 */
830 /* let v_irq default to 0 */
831 /* let v_intr default to 0 */
833 control
->v_intr_masking
= 1;
835 /* let v_intr_vector default to 0 */
836 /* let intr_shadow default to 0 */
837 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
838 exit_int_info_err_code default to 0 */
840 control
->nested_ctl
= 1;
842 /* let event_inj default to 0 */
844 // (nested_cr3 is later)
846 /* let lbr_virt_enable default to 0 */
849 fkvm_init_seg(&save
->ds
);
850 fkvm_init_seg(&save
->es
);
851 fkvm_init_seg(&save
->fs
);
852 fkvm_init_seg(&save
->gs
);
853 fkvm_init_seg(&save
->ss
);
855 _fkvm_init_seg(&save
->cs
, VMCB_SELECTOR_READ_MASK
| VMCB_SELECTOR_S_MASK
|
856 VMCB_SELECTOR_CODE_MASK
);
857 save
->cs
.selector
= 0xf000;
858 save
->cs
.base
= 0xffff0000;
860 save
->gdtr
.limit
= 0xffff;
861 save
->idtr
.limit
= 0xffff;
863 fkvm_init_sys_seg(&save
->ldtr
, SDT_SYSLDT
);
864 fkvm_init_sys_seg(&save
->tr
, SDT_SYS286BSY
);
866 save
->g_pat
= PAT_VALUE(PAT_WRITE_BACK
, 0) | PAT_VALUE(PAT_WRITE_THROUGH
, 1) |
867 PAT_VALUE(PAT_UNCACHED
, 2) | PAT_VALUE(PAT_UNCACHEABLE
, 3) |
868 PAT_VALUE(PAT_WRITE_BACK
, 4) | PAT_VALUE(PAT_WRITE_THROUGH
, 5) |
869 PAT_VALUE(PAT_UNCACHED
, 6) | PAT_VALUE(PAT_UNCACHEABLE
, 7);
871 /* CR0 = 6000_0010h at boot */
872 save
->cr0
= CR0_ET
| CR0_NW
| CR0_CD
;
873 save
->dr6
= 0xffff0ff0;
876 save
->rip
= 0x0000fff0;
878 save
->efer
= EFER_SVME
;
882 fkvm_vmcb_free(struct vmcb
*vmcb
)
884 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
888 fkvm_vcpu_create(struct guestvm
*guest_vm
)
891 vcpu
= malloc(sizeof(struct vcpu
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
893 vcpu
->vmcb
= fkvm_vmcb_alloc();
894 vcpu
->vmcb_pa
= vtophys(vcpu
->vmcb
);
895 printf("vmcb = 0x%p\n", vcpu
->vmcb
);
896 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu
->vmcb_pa
);
898 fkvm_vmcb_init(vcpu
->vmcb
);
899 vcpu
->vmcb
->control
.nested_cr3
= guest_vm
->nested_cr3
;
900 vcpu
->regs
[VCPU_REGS_RIP
] = vcpu
->vmcb
->save
.rip
;
902 vcpu
->guest_vm
= guest_vm
;
908 fkvm_vcpu_destroy(struct vcpu
*vcpu
)
910 fkvm_vmcb_free(vcpu
->vmcb
);
911 free(vcpu
, M_DEVBUF
);
914 static struct guestvm
*
915 fkvm_guestvm_alloc(void)
917 return malloc(sizeof(struct guestvm
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
921 fkvm_guestvm_free(struct guestvm
* guest_vm
)
923 free(guest_vm
, M_DEVBUF
);
927 fkvm_guestvm_add_vcpu(struct guestvm
*guest_vm
, struct vcpu
*vcpu
)
929 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = vcpu
;
930 guest_vm
->nr_vcpus
++; /* TODO: Probably not safe to increment */
931 /* How about a lock to protect all of this? */
936 fkvm_userpoke(struct thread
*td
, struct fkvm_userpoke_args
*uap
)
938 printf("fkvm_userpoke\n");
947 fkvm_mem_has_entry(vm_map_entry_t expected_entry
, vm_map_t vm_map
, vm_offset_t vaddr
)
949 vm_map_entry_t lookup_entry
;
950 vm_object_t throwaway_object
;
951 vm_pindex_t throwaway_pindex
;
952 vm_prot_t throwaway_prot
;
953 boolean_t throwaway_wired
;
956 error
= vm_map_lookup(&vm_map
, /* IN/OUT */
958 VM_PROT_READ
|VM_PROT_WRITE
,
959 &lookup_entry
, /* OUT */
960 &throwaway_object
, /* OUT */
961 &throwaway_pindex
, /* OUT */
962 &throwaway_prot
, /* OUT */
963 &throwaway_wired
); /* OUT */
964 if (error
!= KERN_SUCCESS
)
966 vm_map_lookup_done(vm_map
, lookup_entry
);
967 return (lookup_entry
== expected_entry
);
971 fkvm_guest_check_range(struct guestvm
*guest_vm
, uint64_t start
, uint64_t end
)
973 vm_map_t guest_vm_map
;
974 vm_map_entry_t lookup_entry
;
975 vm_object_t throwaway_object
;
976 vm_pindex_t throwaway_pindex
;
977 vm_prot_t throwaway_prot
;
978 boolean_t throwaway_wired
;
982 guest_vm_map
= &guest_vm
->sp
->vm_map
;
984 error
= vm_map_lookup(&guest_vm_map
, /* IN/OUT */
986 VM_PROT_READ
|VM_PROT_WRITE
,
987 &lookup_entry
, /* OUT */
988 &throwaway_object
, /* OUT */
989 &throwaway_pindex
, /* OUT */
990 &throwaway_prot
, /* OUT */
991 &throwaway_wired
); /* OUT */
992 if (error
!= KERN_SUCCESS
)
994 vm_map_lookup_done(guest_vm_map
, lookup_entry
);
997 TODO: We can't actually nest the lookups:
998 panic: _sx_xlock_hard: recursed on non-recursive sx user map @ ../../../vm/vm_map.c:3115
999 Therefore, I've moved the lookup_done above for now, but we really need a lock here.
1001 Maybe it's better to use vm_map_lookup_entry directly.
1005 if (fkvm_mem_has_entry(lookup_entry
, guest_vm_map
, end
))
1014 fkvm_get_regs_regs(struct vcpu
*vcpu
, struct kvm_regs
*out
)
1016 out
->rax
= vcpu
->regs
[VCPU_REGS_RAX
];
1017 out
->rbx
= vcpu
->regs
[VCPU_REGS_RBX
];
1018 out
->rcx
= vcpu
->regs
[VCPU_REGS_RCX
];
1019 out
->rdx
= vcpu
->regs
[VCPU_REGS_RDX
];
1020 out
->rsi
= vcpu
->regs
[VCPU_REGS_RSI
];
1021 out
->rdi
= vcpu
->regs
[VCPU_REGS_RDI
];
1022 out
->rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
1023 out
->rbp
= vcpu
->regs
[VCPU_REGS_RBP
];
1024 out
->r8
= vcpu
->regs
[VCPU_REGS_R8
];
1025 out
->r9
= vcpu
->regs
[VCPU_REGS_R9
];
1026 out
->r10
= vcpu
->regs
[VCPU_REGS_R10
];
1027 out
->r11
= vcpu
->regs
[VCPU_REGS_R11
];
1028 out
->r12
= vcpu
->regs
[VCPU_REGS_R12
];
1029 out
->r13
= vcpu
->regs
[VCPU_REGS_R13
];
1030 out
->r14
= vcpu
->regs
[VCPU_REGS_R14
];
1031 out
->r15
= vcpu
->regs
[VCPU_REGS_R15
];
1032 out
->rip
= vcpu
->regs
[VCPU_REGS_RIP
];
1033 out
->rflags
= vcpu
->vmcb
->save
.rflags
;
1037 fkvm_set_regs_regs(struct vcpu
*vcpu
, const struct kvm_regs
*in
)
1039 vcpu
->regs
[VCPU_REGS_RAX
] = in
->rax
;
1040 vcpu
->regs
[VCPU_REGS_RBX
] = in
->rbx
;
1041 vcpu
->regs
[VCPU_REGS_RCX
] = in
->rcx
;
1042 vcpu
->regs
[VCPU_REGS_RDX
] = in
->rdx
;
1043 vcpu
->regs
[VCPU_REGS_RSI
] = in
->rsi
;
1044 vcpu
->regs
[VCPU_REGS_RDI
] = in
->rdi
;
1045 vcpu
->regs
[VCPU_REGS_RSP
] = in
->rsp
;
1046 vcpu
->regs
[VCPU_REGS_RBP
] = in
->rbp
;
1047 vcpu
->regs
[VCPU_REGS_R8
] = in
->r8
;
1048 vcpu
->regs
[VCPU_REGS_R9
] = in
->r9
;
1049 vcpu
->regs
[VCPU_REGS_R10
] = in
->r10
;
1050 vcpu
->regs
[VCPU_REGS_R11
] = in
->r11
;
1051 vcpu
->regs
[VCPU_REGS_R12
] = in
->r12
;
1052 vcpu
->regs
[VCPU_REGS_R13
] = in
->r13
;
1053 vcpu
->regs
[VCPU_REGS_R14
] = in
->r14
;
1054 vcpu
->regs
[VCPU_REGS_R15
] = in
->r15
;
1055 vcpu
->regs
[VCPU_REGS_RIP
] = in
->rip
;
1056 vcpu
->vmcb
->save
.rflags
= in
->rflags
;
1060 fkvm_get_vmcb_dtable(struct vmcb_seg
*vmcb_seg
, struct kvm_dtable
*fkvm_dtable
)
1062 fkvm_dtable
->base
= vmcb_seg
->base
;
1063 fkvm_dtable
->limit
= vmcb_seg
->limit
;
1067 fkvm_set_vmcb_dtable(struct vmcb_seg
*vmcb_seg
, struct kvm_dtable
*fkvm_dtable
)
1069 vmcb_seg
->base
= fkvm_dtable
->base
;
1070 vmcb_seg
->limit
= fkvm_dtable
->limit
;
1074 fkvm_get_vmcb_seg(struct vmcb_seg
*vmcb_seg
, struct kvm_segment
*fkvm_seg
)
1076 fkvm_seg
->base
= vmcb_seg
->base
;
1077 fkvm_seg
->limit
= vmcb_seg
->limit
;
1078 fkvm_seg
->selector
= vmcb_seg
->selector
;
1080 if (vmcb_seg
->attrib
== 0)
1081 fkvm_seg
->unusable
= 1;
1083 fkvm_seg
->type
= (vmcb_seg
->attrib
& VMCB_SELECTOR_TYPE_MASK
);
1084 fkvm_seg
->s
= (vmcb_seg
->attrib
& VMCB_SELECTOR_S_MASK
) >> VMCB_SELECTOR_S_SHIFT
;
1085 fkvm_seg
->dpl
= (vmcb_seg
->attrib
& VMCB_SELECTOR_DPL_MASK
) >> VMCB_SELECTOR_DPL_SHIFT
;
1086 fkvm_seg
->present
= (vmcb_seg
->attrib
& VMCB_SELECTOR_P_MASK
) >> VMCB_SELECTOR_P_SHIFT
;
1087 fkvm_seg
->avl
= (vmcb_seg
->attrib
& VMCB_SELECTOR_AVL_MASK
) >> VMCB_SELECTOR_AVL_SHIFT
;
1088 fkvm_seg
->l
= (vmcb_seg
->attrib
& VMCB_SELECTOR_L_MASK
) >> VMCB_SELECTOR_L_SHIFT
;
1089 fkvm_seg
->db
= (vmcb_seg
->attrib
& VMCB_SELECTOR_DB_MASK
) >> VMCB_SELECTOR_DB_SHIFT
;
1090 fkvm_seg
->g
= (vmcb_seg
->attrib
& VMCB_SELECTOR_G_MASK
) >> VMCB_SELECTOR_G_SHIFT
;
1095 fkvm_set_vmcb_seg(struct vmcb_seg
*vmcb_seg
, struct kvm_segment
*fkvm_seg
)
1097 vmcb_seg
->base
= fkvm_seg
->base
;
1098 vmcb_seg
->limit
= fkvm_seg
->limit
;
1099 vmcb_seg
->selector
= fkvm_seg
->selector
;
1101 if (fkvm_seg
->unusable
)
1104 vmcb_seg
->attrib
= (fkvm_seg
->type
& VMCB_SELECTOR_TYPE_MASK
);
1105 vmcb_seg
->attrib
|= (fkvm_seg
->s
& 1) << VMCB_SELECTOR_S_SHIFT
;
1106 vmcb_seg
->attrib
|= (fkvm_seg
->dpl
& 3) << VMCB_SELECTOR_DPL_SHIFT
;
1107 vmcb_seg
->attrib
|= (fkvm_seg
->present
& 1) << VMCB_SELECTOR_P_SHIFT
;
1108 vmcb_seg
->attrib
|= (fkvm_seg
->avl
& 1) << VMCB_SELECTOR_AVL_SHIFT
;
1109 vmcb_seg
->attrib
|= (fkvm_seg
->l
& 1) << VMCB_SELECTOR_L_SHIFT
;
1110 vmcb_seg
->attrib
|= (fkvm_seg
->db
& 1) << VMCB_SELECTOR_DB_SHIFT
;
1111 vmcb_seg
->attrib
|= (fkvm_seg
->g
& 1) << VMCB_SELECTOR_G_SHIFT
;
1116 fkvm_get_cr8(struct vcpu
*vcpu
)
1118 // TODO: if cr8 has reserved bits inject GP Fault, return
1120 return (uint64_t) vcpu
->vmcb
->control
.v_tpr
;
1124 fkvm_set_cr8(struct vcpu
*vcpu
, uint64_t cr8
)
1126 // TODO: if cr8 has reserved bits inject GP Fault, return
1128 vcpu
->vmcb
->control
.v_tpr
= (uint8_t) cr8
;
1132 fkvm_get_efer(struct vcpu
*vcpu
)
1134 struct vmcb
*vmcb
= vcpu
->vmcb
;
1136 return vmcb
->save
.efer
& (~EFER_SVME
);
1140 fkvm_set_efer(struct vcpu
*vcpu
, uint64_t efer
)
1142 struct vmcb
*vmcb
= vcpu
->vmcb
;
1143 //TODO: if efer has reserved bits set: inject GP Fault
1145 if (vmcb
->save
.cr0
& CR0_PG
) { //If paging is enabled do not allow changes to LME
1146 if ((vmcb
->save
.efer
& EFER_LME
) != (efer
& EFER_LME
)) {
1147 printf("fkvm_set_efer: attempt to change LME while paging\n");
1148 //TODO: inject GP fault
1152 vmcb
->save
.efer
= efer
| EFER_SVME
;
1156 fkvm_get_regs_sregs(struct vcpu
*vcpu
, struct kvm_sregs
*out
)
1158 struct vmcb
*vmcb
= vcpu
->vmcb
;
1160 fkvm_get_vmcb_seg(&vmcb
->save
.cs
, &out
->cs
);
1161 fkvm_get_vmcb_seg(&vmcb
->save
.ds
, &out
->ds
);
1162 fkvm_get_vmcb_seg(&vmcb
->save
.es
, &out
->es
);
1163 fkvm_get_vmcb_seg(&vmcb
->save
.fs
, &out
->fs
);
1164 fkvm_get_vmcb_seg(&vmcb
->save
.gs
, &out
->gs
);
1165 fkvm_get_vmcb_seg(&vmcb
->save
.ss
, &out
->ss
);
1166 fkvm_get_vmcb_seg(&vmcb
->save
.tr
, &out
->tr
);
1167 fkvm_get_vmcb_seg(&vmcb
->save
.ldtr
, &out
->ldt
);
1169 fkvm_get_vmcb_dtable(&vmcb
->save
.idtr
, &out
->idt
);
1170 fkvm_get_vmcb_dtable(&vmcb
->save
.gdtr
, &out
->gdt
);
1172 out
->cr2
= vcpu
->cr2
;
1173 out
->cr3
= vcpu
->cr3
;
1175 out
->cr8
= fkvm_get_cr8(vcpu
);
1176 out
->efer
= fkvm_get_efer(vcpu
);
1177 /* TODO: apic_base */
1178 out
->cr0
= vmcb
->save
.cr0
;
1179 out
->cr4
= vmcb
->save
.cr4
;
1180 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1184 fkvm_set_regs_sregs(struct vcpu
*vcpu
, struct kvm_sregs
*in
)
1186 struct vmcb
*vmcb
= vcpu
->vmcb
;
1188 fkvm_set_vmcb_seg(&vmcb
->save
.cs
, &in
->cs
);
1189 fkvm_set_vmcb_seg(&vmcb
->save
.ds
, &in
->ds
);
1190 fkvm_set_vmcb_seg(&vmcb
->save
.es
, &in
->es
);
1191 fkvm_set_vmcb_seg(&vmcb
->save
.fs
, &in
->fs
);
1192 fkvm_set_vmcb_seg(&vmcb
->save
.gs
, &in
->gs
);
1193 fkvm_set_vmcb_seg(&vmcb
->save
.ss
, &in
->ss
);
1194 fkvm_set_vmcb_seg(&vmcb
->save
.tr
, &in
->tr
);
1195 fkvm_set_vmcb_seg(&vmcb
->save
.ldtr
, &in
->ldt
);
1197 vmcb
->save
.cpl
= (vmcb
->save
.cs
.attrib
>> VMCB_SELECTOR_DPL_SHIFT
) & 3;
1199 fkvm_set_vmcb_dtable(&vmcb
->save
.idtr
, &in
->idt
);
1200 fkvm_set_vmcb_dtable(&vmcb
->save
.gdtr
, &in
->gdt
);
1202 vcpu
->cr2
= in
->cr2
;
1203 vcpu
->cr3
= in
->cr3
;
1205 fkvm_set_cr8(vcpu
, in
->cr8
);
1206 fkvm_set_efer(vcpu
, in
->efer
);
1207 /* TODO: apic_base */
1208 vmcb
->save
.cr0
= in
->cr0
;
1209 vmcb
->save
.cr4
= in
->cr4
;
1210 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1214 fkvm_get_reg_msr(struct vcpu
*vcpu
, uint32_t index
, uint64_t *data
) {
1215 struct vmcb
*vmcb
= vcpu
->vmcb
;
1223 *data
= vmcb
->control
.tsc_offset
+ tsc
;
1228 *data
= vmcb
->save
.star
;
1233 *data
= vmcb
->save
.lstar
;
1238 *data
= vmcb
->save
.cstar
;
1243 *data
= vmcb
->save
.kernel_gs_base
;
1248 *data
= vmcb
->save
.sfmask
;
1252 case MSR_SYSENTER_CS_MSR
: {
1253 *data
= vmcb
->save
.sysenter_cs
;
1257 case MSR_SYSENTER_EIP_MSR
: {
1258 *data
= vmcb
->save
.sysenter_eip
;
1262 case MSR_SYSENTER_ESP_MSR
: {
1263 *data
= vmcb
->save
.sysenter_esp
;
1267 case MSR_DEBUGCTLMSR
: {
1268 printf("unimplemented at %d\n", __LINE__
);
1273 case MSR_PERFEVSEL0
... MSR_PERFEVSEL3
:
1274 case MSR_PERFCTR0
... MSR_PERFCTR3
: {
1275 printf("unimplemented at %d\n", __LINE__
);
1281 *data
= fkvm_get_efer(vcpu
);
1285 case MSR_MC0_STATUS
: {
1286 printf("unimplemented at %d\n", __LINE__
);
1291 case MSR_MCG_STATUS
: {
1292 printf("unimplemented at %d\n", __LINE__
);
1298 printf("unimplemented at %d\n", __LINE__
);
1303 //TODO: MSR_IA32_UCODE_REV
1304 //TODO: MSR_IA32_UCODE_WRITE
1307 *data
= MTRR_CAP_WC
| MTRR_CAP_FIXED
| FKVM_MTRR_NVAR
;
1311 case MSR_MTRRdefType
: {
1312 *data
= vcpu
->mtrrs
.default_type
;
1316 case MSR_MTRR64kBase
... (MSR_MTRR64kBase
+ MTRR_N64K
- 1): {
1317 *data
= vcpu
->mtrrs
.mtrr64k
[index
- MSR_MTRR64kBase
];
1321 case MSR_MTRR16kBase
... (MSR_MTRR16kBase
+ MTRR_N16K
- 1): {
1322 *data
= vcpu
->mtrrs
.mtrr16k
[index
- MSR_MTRR16kBase
];
1326 case MSR_MTRR4kBase
... (MSR_MTRR4kBase
+ MTRR_N4K
- 1): {
1327 *data
= vcpu
->mtrrs
.mtrr4k
[index
- MSR_MTRR4kBase
];
1331 case MSR_MTRRVarBase
... (MSR_MTRRVarBase
+ FKVM_MTRR_NVAR
* 2 - 1): {
1332 *data
= vcpu
->mtrrs
.mtrrvar
[index
- MSR_MTRRVarBase
];
1336 case MSR_APICBASE
: {
1337 printf("unimplemented at %d\n", __LINE__
);
1342 case MSR_IA32_MISC_ENABLE
: {
1343 printf("unimplemented at %d\n", __LINE__
);
1348 //TODO: MSR_KVM_WALL_CLOCK
1349 //TODO: MSR_KVM_SYSTEM_TIME
1352 printf("Did not get unimplemented msr: 0x%" PRIx32
"\n", index
);
1360 fkvm_get_regs_msrs(struct vcpu
*vcpu
, uint32_t nmsrs
, struct kvm_msr_entry
*entries
) {
1363 for (i
= 0; i
< nmsrs
; i
++) {
1364 fkvm_get_reg_msr(vcpu
, entries
[i
].index
, &entries
[i
].data
);
1369 fkvm_set_reg_msr(struct vcpu
*vcpu
, uint32_t index
, uint64_t data
) {
1370 struct vmcb
*vmcb
= vcpu
->vmcb
;
1378 vmcb
->control
.tsc_offset
= data
- tsc
;
1383 vmcb
->save
.star
= data
;
1388 vmcb
->save
.lstar
= data
;
1393 vmcb
->save
.cstar
= data
;
1398 vmcb
->save
.kernel_gs_base
= data
;
1403 vmcb
->save
.sfmask
= data
;
1407 case MSR_SYSENTER_CS_MSR
: {
1408 vmcb
->save
.sysenter_cs
= data
;
1412 case MSR_SYSENTER_EIP_MSR
: {
1413 vmcb
->save
.sysenter_eip
= data
;
1417 case MSR_SYSENTER_ESP_MSR
: {
1418 vmcb
->save
.sysenter_esp
= data
;
1422 case MSR_DEBUGCTLMSR
: {
1423 printf("unimplemented at %d\n", __LINE__
);
1428 case MSR_PERFEVSEL0
... MSR_PERFEVSEL3
:
1429 case MSR_PERFCTR0
... MSR_PERFCTR3
: {
1430 printf("unimplemented at %d\n", __LINE__
);
1436 fkvm_set_efer(vcpu
, data
);
1440 case MSR_MC0_STATUS
: {
1441 printf("unimplemented at %d\n", __LINE__
);
1446 case MSR_MCG_STATUS
: {
1447 printf("unimplemented at %d\n", __LINE__
);
1453 printf("unimplemented at %d\n", __LINE__
);
1458 //TODO: MSR_IA32_UCODE_REV
1459 //TODO: MSR_IA32_UCODE_WRITE
1461 case MSR_MTRRdefType
: {
1462 vcpu
->mtrrs
.default_type
= data
;
1466 case MSR_MTRR64kBase
... (MSR_MTRR64kBase
+ MTRR_N64K
- 1): {
1467 vcpu
->mtrrs
.mtrr64k
[index
- MSR_MTRR64kBase
] = data
;
1471 case MSR_MTRR16kBase
... (MSR_MTRR16kBase
+ MTRR_N16K
- 1): {
1472 vcpu
->mtrrs
.mtrr16k
[index
- MSR_MTRR16kBase
] = data
;
1476 case MSR_MTRR4kBase
... (MSR_MTRR4kBase
+ MTRR_N4K
- 1): {
1477 vcpu
->mtrrs
.mtrr4k
[index
- MSR_MTRR4kBase
] = data
;
1481 case MSR_MTRRVarBase
... (MSR_MTRRVarBase
+ FKVM_MTRR_NVAR
* 2 - 1): {
1482 vcpu
->mtrrs
.mtrrvar
[index
- MSR_MTRRVarBase
] = data
;
1486 case MSR_APICBASE
: {
1487 printf("unimplemented at %d\n", __LINE__
);
1492 case MSR_IA32_MISC_ENABLE
: {
1493 printf("unimplemented at %d\n", __LINE__
);
1498 //TODO: MSR_KVM_WALL_CLOCK
1499 //TODO: MSR_KVM_SYSTEM_TIME
1502 printf("Did not set unimplemented msr: 0x%" PRIx32
"\n", index
);
1510 fkvm_set_regs_msrs(struct vcpu
*vcpu
, uint32_t nmsrs
, struct kvm_msr_entry
*entries
) {
1513 for (i
= 0; i
< nmsrs
; i
++) {
1514 fkvm_set_reg_msr(vcpu
, entries
[i
].index
, entries
[i
].data
);
1521 fkvm_get_regs(struct thread
*td
, struct fkvm_get_regs_args
*uap
)
1529 vcpu
= TD_GET_VCPU(td
);
1533 switch (uap
->type
) {
1535 case FKVM_REGS_TYPE_REGS
: {
1536 struct kvm_regs out
;
1537 fkvm_get_regs_regs(vcpu
, &out
);
1538 return copyout(&out
, uap
->regs
, sizeof(out
));
1541 case FKVM_REGS_TYPE_SREGS
: {
1542 struct kvm_sregs out
;
1543 fkvm_get_regs_sregs(vcpu
, &out
);
1544 return copyout(&out
, uap
->regs
, sizeof(out
));
1547 case FKVM_REGS_TYPE_MSRS
: {
1548 struct kvm_msr_entry
*user_entries
;
1549 struct kvm_msr_entry
*entries
;
1552 user_entries
= (struct kvm_msr_entry
*)uap
->regs
;
1554 size
= sizeof(*entries
) * uap
->n
;
1555 entries
= malloc(size
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
1556 if (entries
== NULL
)
1559 error
= copyin(user_entries
, entries
, size
);
1561 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1562 free(entries
, M_DEVBUF
);
1566 fkvm_get_regs_msrs(vcpu
, uap
->n
, entries
);
1568 error
= copyout(user_entries
, entries
, size
);
1570 printf("FKVM_REGS_TYPE_MSRS: unable to copyout entries\n");
1573 free(entries
, M_DEVBUF
);
1583 fkvm_set_regs(struct thread
*td
, struct fkvm_set_regs_args
*uap
)
1588 vcpu
= TD_GET_VCPU(td
);
1592 switch (uap
->type
) {
1594 case FKVM_REGS_TYPE_REGS
: {
1596 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1599 fkvm_set_regs_regs(vcpu
, &in
);
1603 case FKVM_REGS_TYPE_SREGS
: {
1604 struct kvm_sregs in
;
1605 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1608 fkvm_set_regs_sregs(vcpu
, &in
);
1612 case FKVM_REGS_TYPE_MSRS
: {
1613 struct kvm_msr_entry
*user_entries
;
1614 struct kvm_msr_entry
*entries
;
1617 user_entries
= (struct kvm_msr_entry
*)uap
->regs
;
1619 size
= sizeof(*entries
) * uap
->n
;
1620 entries
= malloc(size
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
1621 if (entries
== NULL
)
1624 error
= copyin(user_entries
, entries
, size
);
1626 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1627 free(entries
, M_DEVBUF
);
1631 fkvm_set_regs_msrs(vcpu
, uap
->n
, entries
);
1633 free(entries
, M_DEVBUF
);
1642 /* This function can only be called with multiples of page sizes */
1643 /* vaddr as NULL overloads to fkvm_guest_check_range */
1645 fkvm_set_user_mem_region(struct thread
*td
, struct fkvm_set_user_mem_region_args
*uap
)
1647 struct guestvm
*guest_vm
;
1652 struct vmspace
*user_vm_space
;
1653 vm_map_t user_vm_map
;
1655 vm_object_t vm_object
;
1656 vm_pindex_t vm_object_pindex
;
1657 vm_ooffset_t vm_object_offset
;
1658 vm_prot_t throwaway_prot
;
1659 boolean_t throwaway_wired
;
1660 vm_map_entry_t lookup_entry
;
1664 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1665 if (guest_vm
== NULL
) {
1666 printf("PROC_GET_GUESTVM -> NULL\n");
1670 start
= uap
->guest_pa
;
1671 end
= uap
->guest_pa
+ uap
->size
- 1;
1672 printf("start: 0x%" PRIx64
" bytes\n", start
);
1673 printf("end: 0x%" PRIx64
" bytes\n", end
);
1675 if (uap
->vaddr
== 0)
1676 return fkvm_guest_check_range(guest_vm
, start
, end
);
1678 user_vm_space
= td
->td_proc
->p_vmspace
;
1679 user_vm_map
= &user_vm_space
->vm_map
;
1680 printf("user vm space: %p\n", user_vm_space
);
1681 printf("user vm map: %p\n", user_vm_map
);
1683 error
= vm_map_lookup(&user_vm_map
, /* IN/OUT */
1685 VM_PROT_READ
|VM_PROT_WRITE
,
1686 &lookup_entry
, /* OUT */
1687 &vm_object
, /* OUT */
1688 &vm_object_pindex
, /* OUT */
1689 &throwaway_prot
, /* OUT */
1690 &throwaway_wired
); /* OUT */
1691 if (error
!= KERN_SUCCESS
) {
1692 printf("vm_map_lookup failed: %d\n", error
);
1696 /* TODO: Trust the user that the full region is valid.
1697 * This is very bad. See the note in fkvm_guest_check_range
1698 * on nesting vm lookups. */
1700 if (!fkvm_mem_has_entry(lookup_entry
, user_vm_map
, uap
->vaddr
+ uap
->size
)) {
1701 printf("end of range not contained in same vm map entry as start\n");
1706 printf("vm object: %p\n", vm_object
);
1707 printf(" size: %d pages\n", (int) vm_object
->size
);
1709 vm_object_offset
= IDX_TO_OFF(vm_object_pindex
);
1710 printf("vm_ooffset: 0x%" PRIx64
"\n", vm_object_offset
);
1712 vm_object_reference(vm_object
); // TODO: this might be a mem leak
1714 vm_map_lookup_done(user_vm_map
, lookup_entry
);
1716 error
= vm_map_insert(&guest_vm
->sp
->vm_map
,
1721 VM_PROT_ALL
, VM_PROT_ALL
,
1723 if (error
!= KERN_SUCCESS
) {
1724 printf("vm_map_insert failed: %d\n", error
);
1726 case KERN_INVALID_ADDRESS
:
1739 fkvm_unset_user_mem_region(struct thread
*td
, struct fkvm_unset_user_mem_region_args
*uap
)
1741 struct guestvm
*guest_vm
;
1746 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1747 if (guest_vm
== NULL
) {
1748 printf("PROC_GET_GUESTVM -> NULL\n");
1755 vm_map_t guest_vm_map
;
1759 start
= uap
->guest_pa
;
1760 end
= uap
->guest_pa
+ uap
->size
- 1;
1761 printf("start: 0x%" PRIx64
" bytes\n", start
);
1762 printf("end: 0x%" PRIx64
" bytes\n", end
);
1764 guest_vm_map
= &guest_vm
->sp
->vm_map
;
1766 error
= vm_map_remove(guest_vm_map
, start
, end
);
1767 if (error
!= KERN_SUCCESS
)
1774 fkvm_create_vm(struct thread
*td
, struct fkvm_create_vm_args
*uap
)
1776 struct guestvm
*guest_vm
;
1778 printf("SYSCALL : fkvm_create_vm\n");
1783 /* Allocate Guest VM */
1784 guest_vm
= fkvm_guestvm_alloc();
1786 /* Set up the vm address space */
1787 guest_vm
->sp
= fkvm_make_vmspace();
1788 if (guest_vm
->sp
== NULL
) {
1789 fkvm_guestvm_free(guest_vm
);
1792 guest_vm
->nested_cr3
= vtophys(vmspace_pmap(guest_vm
->sp
)->pm_pml4
);
1795 printf(" vm space: %p\n", guest_vm
->sp
);
1796 printf(" vm map: %p\n", &guest_vm
->sp
->vm_map
);
1797 printf(" ncr3: 0x%" PRIx64
"\n", guest_vm
->nested_cr3
);
1799 PROC_SET_GUESTVM(td
->td_proc
, guest_vm
);
1801 printf("fkvm_create_vm done\n");
1806 fkvm_destroy_vm(struct guestvm
*guest_vm
)
1808 /* Destroy the VCPUs */
1809 while (guest_vm
->nr_vcpus
> 0) {
1810 guest_vm
->nr_vcpus
--;
1811 fkvm_vcpu_destroy(guest_vm
->vcpus
[guest_vm
->nr_vcpus
]);
1812 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = NULL
;
1815 /* Destroy the vmspace */
1816 if (guest_vm
->sp
!= NULL
)
1817 fkvm_destroy_vmspace(guest_vm
->sp
);
1819 /* Destroy the Guest VM itself */
1820 fkvm_guestvm_free(guest_vm
);
1824 intercept_ioio(struct vcpu
*vcpu
, struct kvm_run
*kvm_run
, uint64_t ioio_info
, uint64_t rip
)
1826 struct vmcb
*vmcb
= vcpu
->vmcb
;
1828 kvm_run
->u
.io
.string
= (ioio_info
& STR_MASK
) >> STR_SHIFT
;
1830 kvm_run
->u
.io
.port
= ioio_info
>> PORT_SHIFT
;
1831 kvm_run
->u
.io
.in
= ioio_info
& TYPE_MASK
;
1833 kvm_run
->u
.io
.size
= (ioio_info
& SIZE_MASK
) >> SIZE_SHIFT
;
1835 /* We need to remove the Interrupt Shadow Flag from the VMCB (see 15.20.5 in AMD_Vol2) */
1836 vmcb
->control
.intr_shadow
= 0;
1838 kvm_run
->u
.io
.rep
= (ioio_info
& REP_MASK
) >> REP_SHIFT
;
1839 /* TODO: Research more into Direction Flag checked in KVM; DF bit in RFLAGS */
1841 /* set the next rip in the VMCB save area for now */
1842 /* TODO: Store rIP in vm_run structure until we absolutely need it */
1843 vcpu
->regs
[VCPU_REGS_RIP
] = rip
;
1849 intercept_shutdown(struct vcpu
*vcpu
)
1851 struct vmcb
*vmcb
= vcpu
->vmcb
;
1852 memset(vmcb
, 0, PAGE_SIZE
);
1853 fkvm_vmcb_init(vmcb
);
1857 fkvm_vm_run(struct thread
*td
, struct fkvm_vm_run_args
*uap
)
1860 struct guestvm
*guest_vm
;
1865 struct kvm_run kvm_run
;
1870 vcpu
= TD_GET_VCPU(td
);
1874 guest_vm
= vcpu
->guest_vm
;
1877 error
= copyin(uap
->run
, &kvm_run
, sizeof(struct kvm_run
));
1881 fkvm_set_cr8(vcpu
, kvm_run
.cr8
);
1883 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
1885 while(kvm_run
.exit_reason
== KVM_EXIT_CONTINUE
) {
1886 fkvm_vcpu_run(vcpu
);
1888 switch (vmcb
->control
.exit_code
) {
1890 case VMCB_EXIT_EXCP_BASE
... (VMCB_EXIT_EXCP_BASE
+ 31): {
1893 excp_vector
= vmcb
->control
.exit_code
- VMCB_EXIT_EXCP_BASE
;
1895 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
1897 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
1902 case VMCB_EXIT_INTR
: {
1903 printf("VMCB_EXIT_INTR - nothing to do\n");
1904 /* Handled by host OS already */
1905 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
1909 case VMCB_EXIT_NPF
: {
1910 /* EXITINFO1 contains fault error code */
1911 /* EXITINFO2 contains the guest physical address causing the fault. */
1913 u_int64_t fault_code
;
1914 u_int64_t fault_gpa
;
1916 vm_prot_t fault_type
;
1920 fault_code
= vmcb
->control
.exit_info_1
;
1921 fault_gpa
= vmcb
->control
.exit_info_2
;
1922 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
1925 printf("VMCB_EXIT_NPF:\n");
1926 printf("gpa=0x%" PRIx64
"\n", fault_gpa
);
1927 printf("fault code=0x%" PRIx64
" [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
1929 (fault_code
& PGEX_P
) != 0,
1930 (fault_code
& PGEX_W
) != 0,
1931 (fault_code
& PGEX_U
) != 0,
1932 (fault_code
& PGEX_I
) != 0);
1934 if (fault_code
& PGEX_W
)
1935 fault_type
= VM_PROT_WRITE
;
1936 else if (fault_code
& PGEX_I
)
1937 fault_type
= VM_PROT_EXECUTE
;
1939 fault_type
= VM_PROT_READ
;
1941 fault_flags
= 0; /* TODO: is that right? */
1942 rc
= vm_fault(&guest_vm
->sp
->vm_map
, (fault_gpa
& (~PAGE_MASK
)), fault_type
, fault_flags
);
1943 if (rc
!= KERN_SUCCESS
) {
1944 printf("vm_fault failed: %d\n", rc
);
1945 kvm_run
.u
.mmio
.fault_gpa
= fault_gpa
;
1946 kvm_run
.u
.mmio
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
1947 kvm_run
.u
.mmio
.cs_base
= vmcb
->save
.cs
.base
;
1948 kvm_run
.exit_reason
= KVM_EXIT_MMIO
;
1953 case VMCB_EXIT_WRITE_CR8
:
1954 kvm_run
.exit_reason
= KVM_EXIT_SET_TPR
;
1957 kvm_run
.exit_reason
= KVM_EXIT_NMI
;
1960 kvm_run
.exit_reason
= KVM_EXIT_HLT
;
1962 case VMCB_EXIT_SHUTDOWN
:
1963 intercept_shutdown(vcpu
);
1964 kvm_run
.exit_reason
= KVM_EXIT_SHUTDOWN
;
1966 case VMCB_EXIT_IOIO
:
1967 error
= intercept_ioio(vcpu
, &kvm_run
,
1968 vmcb
->control
.exit_info_1
,
1969 vmcb
->control
.exit_info_2
);
1971 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
1973 kvm_run
.exit_reason
= KVM_EXIT_IO
;
1975 case VMCB_EXIT_MSR
: {
1986 wrmsr
= vmcb
->control
.exit_info_1
;
1987 msr
= (uint32_t) vcpu
->regs
[VCPU_REGS_RCX
];
1989 printf("VMCB_EXIT_MSR:\n"
1990 " %s msr 0x%" PRIx64
"\n",
1991 wrmsr
? "write to" : "read from",
1992 vcpu
->regs
[VCPU_REGS_RCX
]);
1994 if (!wrmsr
) { /* rdmsr */
1995 error
= fkvm_get_reg_msr(vcpu
, msr
, &value
.full
);
1998 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2002 vcpu
->regs
[VCPU_REGS_RDX
] = (uint64_t) value
.split
.high
;
2003 vcpu
->regs
[VCPU_REGS_RAX
] = (uint64_t) value
.split
.low
;
2006 value
.split
.high
= (uint32_t) vcpu
->regs
[VCPU_REGS_RDX
];
2007 value
.split
.low
= (uint32_t) vcpu
->regs
[VCPU_REGS_RAX
];
2009 error
= fkvm_set_reg_msr(vcpu
, msr
, value
.full
);
2012 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2018 vcpu
->regs
[VCPU_REGS_RIP
] += 2;
2021 case VMCB_EXIT_CPUID
: {
2022 kvm_run
.u
.cpuid
.fn
= (uint32_t) vcpu
->regs
[VCPU_REGS_RAX
];
2023 kvm_run
.exit_reason
= KVM_EXIT_CPUID
;
2026 case VMCB_EXIT_WBINVD
: {
2027 /* TODO: stop ignoring this intercept when we have more than 1-cpu guests */
2028 vcpu
->regs
[VCPU_REGS_RIP
] += 2;
2031 case VMCB_EXIT_READ_CR0
:
2032 case VMCB_EXIT_READ_CR3
:
2033 case VMCB_EXIT_READ_CR4
:
2034 case VMCB_EXIT_READ_CR8
:
2035 case VMCB_EXIT_WRITE_CR0
:
2036 case VMCB_EXIT_WRITE_CR3
:
2037 case VMCB_EXIT_WRITE_CR4
:
2038 case VMCB_EXIT_READ_DR0
:
2039 case VMCB_EXIT_READ_DR1
:
2040 case VMCB_EXIT_READ_DR2
:
2041 case VMCB_EXIT_READ_DR3
:
2042 case VMCB_EXIT_WRITE_DR0
:
2043 case VMCB_EXIT_WRITE_DR1
:
2044 case VMCB_EXIT_WRITE_DR2
:
2045 case VMCB_EXIT_WRITE_DR3
:
2046 case VMCB_EXIT_WRITE_DR5
:
2047 case VMCB_EXIT_WRITE_DR7
:
2049 case VMCB_EXIT_INIT
:
2050 case VMCB_EXIT_VINTR
:
2051 case VMCB_EXIT_CR0_SEL_WRITE
:
2052 case VMCB_EXIT_INVD
:
2053 case VMCB_EXIT_INVLPG
:
2054 case VMCB_EXIT_INVLPGA
:
2055 case VMCB_EXIT_TASK_SWITCH
:
2056 case VMCB_EXIT_VMRUN
:
2057 case VMCB_EXIT_VMMCALL
:
2058 case VMCB_EXIT_VMLOAD
:
2059 case VMCB_EXIT_VMSAVE
:
2060 case VMCB_EXIT_STGI
:
2061 case VMCB_EXIT_CLGI
:
2062 case VMCB_EXIT_SKINIT
:
2063 case VMCB_EXIT_MONITOR
:
2064 case VMCB_EXIT_MWAIT_UNCOND
:
2066 printf("Unhandled vmexit:\n"
2067 " code: 0x%" PRIx64
"\n"
2068 " info1: 0x%" PRIx64
"\n"
2069 " info2: 0x%" PRIx64
"\n",
2070 vmcb
->control
.exit_code
,
2071 vmcb
->control
.exit_info_1
,
2072 vmcb
->control
.exit_info_2
);
2075 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2079 if (num_runs
== 20) //TODO: make this a #define
2085 /* we're going up to userspace - set the out fields of kvm_run: */
2087 #define IF_MASK 0x00000200
2088 kvm_run
.if_flag
= !!(vcpu
->vmcb
->save
.rflags
& IF_MASK
);
2090 /* TODO: kvm adds a check to see if in-kernel interrupt queues are empty */
2091 kvm_run
.ready_for_interrupt_injection
= kvm_run
.if_flag
&&
2092 !vcpu
->vmcb
->control
.intr_shadow
;
2094 /* TODO kvm_run.ready_for_nmi_injection = ...; */
2096 kvm_run
.cr8
= fkvm_get_cr8(vcpu
);
2099 /* TODO: check copyout ret val */
2100 copyout(&kvm_run
, uap
->run
, sizeof(struct kvm_run
));
2101 // printf("sizeof(struct kvm_run) = %" PRIu64 "\n", sizeof(struct kvm_run));
2107 fkvm_create_vcpu(struct thread
*td
, struct fkvm_create_vcpu_args
*uap
)
2109 struct guestvm
*guest_vm
;
2115 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
2116 if (guest_vm
== NULL
) {
2117 printf("PROC_GET_GUESTVM -> NULL\n");
2122 printf("fkvm_create_vcpu: td = %p\n", td
);
2123 vcpu
= fkvm_vcpu_create(guest_vm
);
2124 fkvm_guestvm_add_vcpu(guest_vm
, vcpu
);
2126 TD_SET_VCPU(td
, vcpu
);
2127 printf("fkvm_create_vcpu: vcpu = %p\n", vcpu
);
2132 fkvm_check_cpu_extension(void)
2138 printf("fkvm_check_cpu_extension\n");
2140 /* Assumption: the architecture supports the cpuid instruction */
2142 /* Check if CPUID extended function 8000_0001h is supported. */
2143 do_cpuid(0x80000000, regs
);
2144 cpu_exthigh
= regs
[0];
2146 printf("cpu_exthigh = %u\n", cpu_exthigh
);
2148 if(cpu_exthigh
>= 0x80000001) {
2149 /* Execute CPUID extended function 8000_0001h */
2150 do_cpuid(0x80000001, regs
);
2151 printf("EAX = %u\n", regs
[0]);
2153 if((regs
[0] & 0x2) == 0) { /* Check SVM bit */
2154 printf("SVM not available\n");
2155 goto fail
; /* SVM not available */
2158 vmcr
= rdmsr(0xc0010114); /* Read VM_CR MSR */
2159 if((vmcr
& 0x8) == 0) { /* Check SVMDIS bit */
2160 printf("vmcr = %" PRIx64
"\n", vmcr
);
2161 printf("SVM allowed\n");
2162 return KERN_SUCCESS
; /* SVM allowed */
2165 /* Execute CPUID extended function 8000_000ah */
2166 do_cpuid(0x8000000a, regs
);
2167 if((regs
[3] & 0x2) == 0) { /* Check SVM_LOCK bit */
2168 /* SVM disabled at bios; not unlockable.
2169 * User must change a BIOS setting to enable SVM.
2171 printf("EDX = %u\n", regs
[3]);
2172 printf("SVM disabled at bios\n");
2176 * SVM may be unlockable;
2177 * consult the BIOS or TPM to obtain the key.
2179 printf("EDX = %u\n", regs
[3]);
2180 printf("SVM maybe unlockable\n");
2185 return KERN_FAILURE
;
2189 fkvm_proc_exit(void *arg
, struct proc
*p
)
2191 struct guestvm
*guest_vm
;
2193 guest_vm
= PROC_GET_GUESTVM(p
);
2194 if (guest_vm
== NULL
)
2197 fkvm_destroy_vm(guest_vm
);
2198 PROC_SET_GUESTVM(p
, NULL
);
2202 fkvm_load(void *unused
)
2207 printf("fkvm_load\n");
2208 printf("sizeof(struct vmcb) = %" PRIx64
"\n", sizeof(struct vmcb
));
2214 /* check if SVM is supported */
2215 error
= fkvm_check_cpu_extension();
2216 if(error
!= KERN_SUCCESS
) {
2217 printf("ERROR: SVM extension not available\n");
2221 exit_tag
= EVENTHANDLER_REGISTER(process_exit
, fkvm_proc_exit
, NULL
,
2222 EVENTHANDLER_PRI_ANY
);
2224 /* allocate structures */
2225 hsave_area
= fkvm_hsave_area_alloc();
2226 iopm
= fkvm_iopm_alloc();
2227 msrpm
= fkvm_msrpm_alloc();
2229 /* Initialize structures */
2230 fkvm_hsave_area_init(hsave_area
);
2231 fkvm_iopm_init(iopm
);
2232 fkvm_msrpm_init(msrpm
);
2234 /* Enable SVM in EFER */
2235 efer
= rdmsr(MSR_EFER
);
2236 printf("EFER = %" PRIx64
"\n", efer
);
2237 wrmsr(MSR_EFER
, efer
| EFER_SVME
);
2238 efer
= rdmsr(MSR_EFER
);
2239 printf("new EFER = %" PRIx64
"\n", efer
);
2241 /* Write Host save address in MSR_VM_HSAVE_PA */
2242 wrmsr(MSR_VM_HSAVE_PA
, vtophys(hsave_area
));
2246 SYSINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_load
, NULL
);
2249 fkvm_unload(void *unused
)
2251 printf("fkvm_unload\n");
2254 printf("fkvm_unload: fkvm not loaded");
2258 EVENTHANDLER_DEREGISTER(process_exit
, exit_tag
);
2260 if (msrpm
!= NULL
) {
2261 fkvm_msrpm_free(iopm
);
2265 fkvm_iopm_free(iopm
);
2268 if (hsave_area
!= NULL
) {
2269 fkvm_hsave_area_free(hsave_area
);
2273 SYSUNINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_unload
, NULL
);