2 * Copyright (c) 2008 Brent Stephens <brents@rice.edu>
3 * Copyright (c) 2008 Diego Ongaro <diego.ongaro@rice.edu>
4 * Copyright (c) 2008 Kaushik Kumar Ram <kaushik@rice.edu>
5 * Copyright (c) 2008 Oleg Pesok <olegpesok@gmail.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/sysproto.h>
40 #include <sys/eventhandler.h>
43 #include <vm/vm_extern.h>
44 #include <vm/vm_map.h>
45 #include <vm/vm_object.h>
46 #include <vm/vm_param.h>
47 #include <machine/_inttypes.h>
48 #include <machine/specialreg.h>
49 #include <machine/segments.h>
50 #include <machine/vmcb.h>
53 /* Definitions for Port IO */
61 #define PORT_MASK 0xFFFF0000
62 #define ADDR_MASK (7 << ADDR_SHIFT)
63 #define SIZE_MASK (7 << SIZE_SHIFT)
64 #define REP_MASK (1 << REP_SHIFT)
65 #define STR_MASK (1 << STR_SHIFT)
66 #define TYPE_MASK (1 << TYPE_SHIFT)
67 /* End Definitions for Port IO */
69 #define PMIO_PAGE_OFFSET 1
71 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
72 #define MSRPM_SIZE (8*1024)
76 static int fkvm_loaded
= 0;
78 static void *iopm
= NULL
; /* Should I allocate a vm_object_t instead? */
79 static void *msrpm
= NULL
; /* Should I allocate a vm_object_t instead? */
81 static void *hsave_area
= NULL
;
83 static eventhandler_tag exit_tag
;
111 unsigned long vmcb_pa
;
113 unsigned long regs
[NR_VCPU_REGS
];
114 u_int64_t host_gs_base
;
118 struct guestvm
*guest_vm
;
122 struct vcpu
*vcpus
[MAX_VCPUS
];
126 u_int64_t nested_cr3
;
130 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
131 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
132 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
133 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
134 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
135 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
137 static inline struct vcpu
*
138 TD_GET_VCPU(struct thread
*td
)
143 printf("TD_GET_VCPU -> NULL\n");
148 TD_SET_VCPU(struct thread
*td
, struct vcpu
*vcpu
)
153 static inline struct guestvm
*
154 PROC_GET_GUESTVM(struct proc
*proc
)
156 struct guestvm
*guestvm
;
157 guestvm
= proc
->p_guestvm
;
162 PROC_SET_GUESTVM(struct proc
*proc
, struct guestvm
*guestvm
)
164 proc
->p_guestvm
= guestvm
; \
168 print_vmcb_seg(struct vmcb_seg
* vmcb_seg
, const char* name
)
170 printf("%s Selector\n", name
);
171 printf("Selector : %" PRIx16
"\n", vmcb_seg
->selector
);
172 printf("Attributes : %" PRIx16
"\n", vmcb_seg
->attrib
);
173 printf("Limit : %" PRIx32
"\n", vmcb_seg
->limit
);
174 printf("Base Address : %" PRIx64
"\n", vmcb_seg
->base
);
179 print_vmcb(struct vmcb
*vmcb
)
181 printf("VMCB Control Area\n");
182 printf("Intercept CR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_cr_reads
);
183 printf("Intercept CR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_cr_writes
);
184 printf("Intercept DR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_dr_reads
);
185 printf("Intercept DR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_dr_writes
);
186 printf("Intercept Exceptions : %" PRIx32
"\n", vmcb
->control
.intercept_exceptions
);
187 printf("Intercepts : %" PRIx64
"\n", vmcb
->control
.intercepts
);
188 printf("Reserved 1: \n");
189 for(int i
=0; i
< 44; i
++) {
190 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
193 printf("IOPM Base PA : %" PRIx64
"\n", vmcb
->control
.iopm_base_pa
);
194 printf("MSRPM Base PA : %" PRIx64
"\n", vmcb
->control
.msrpm_base_pa
);
195 printf("TSC Offset : %" PRIx64
"\n", vmcb
->control
.tsc_offset
);
196 printf("Guest ASID : %" PRIx32
"\n", vmcb
->control
.guest_asid
);
197 printf("TLB Control : %" PRIx8
"\n", vmcb
->control
.tlb_control
);
198 printf("Reserved 2 : \n");
199 for(int i
=0; i
< 3; i
++) {
200 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
203 printf("Virtual TPR : %" PRIx8
"\n", vmcb
->control
.v_tpr
);
204 printf("Virtual IRQ : %" PRIx8
"\n", vmcb
->control
.v_irq
);
205 printf("Virtual Interrupt : %" PRIx8
"\n", vmcb
->control
.v_intr
);
206 printf("Virtual Interrupt Masking: %" PRIx8
"\n", vmcb
->control
.v_intr_masking
);
207 printf("Virtual Interrupt Vector : %" PRIx8
"\n", vmcb
->control
.v_intr_vector
);
208 printf("Reserved 6 : \n");
209 for(int i
=0; i
< 3; i
++) {
210 printf("%" PRIx8
"", vmcb
->control
.reserved_6
[i
]); /* Should be Zero */
213 printf("Interrupt Shadow : %" PRIx8
"\n", vmcb
->control
.intr_shadow
);
214 printf("Reserved 7 : \n");
215 for(int i
=0; i
< 7; i
++) {
216 printf("%" PRIx8
"", vmcb
->control
.reserved_7
[i
]); /* Should be Zero */
219 printf("Exit Code : %" PRIx64
"\n", vmcb
->control
.exit_code
);
220 printf("Exit Info 1 : %" PRIx64
"\n", vmcb
->control
.exit_info_1
);
221 printf("Exit Info 2 : %" PRIx64
"\n", vmcb
->control
.exit_info_2
);
222 printf("Exit Interrupt Info : %" PRIx32
"\n", vmcb
->control
.exit_int_info
);
223 printf("Exit Interrupt Info Err Code: %" PRIx32
"\n", vmcb
->control
.exit_int_info_err_code
);
224 printf("Nested Control : %" PRIx64
"\n", vmcb
->control
.nested_ctl
);
225 printf("Reserved 8 : \n");
226 for(int i
=0; i
< 16; i
++) {
227 printf("%" PRIx8
"", vmcb
->control
.reserved_8
[i
]); /* Should be Zero */
230 printf("Event Injection : %" PRIx64
"\n", vmcb
->control
.event_inj
);
231 printf("Nested CR3 : %" PRIx64
"\n", vmcb
->control
.nested_cr3
);
232 printf("LBR Virtualization Enable: %" PRIx64
"\n", vmcb
->control
.lbr_virt_enable
);
233 printf("Reserved 9 : \n");
234 for(int i
=0; i
< 832; i
++) {
235 printf("%" PRIx8
"", vmcb
->control
.reserved_9
[i
]); /* Should be Zero */
241 printf("VMCB Save Area\n");
242 print_vmcb_seg(&(vmcb
->save
.es
), "ES");
243 print_vmcb_seg(&(vmcb
->save
.es
), "CS");
244 print_vmcb_seg(&(vmcb
->save
.es
), "SS");
245 print_vmcb_seg(&(vmcb
->save
.es
), "DS");
246 print_vmcb_seg(&(vmcb
->save
.es
), "FS");
247 print_vmcb_seg(&(vmcb
->save
.es
), "GS");
248 print_vmcb_seg(&(vmcb
->save
.es
), "GDTR");
249 print_vmcb_seg(&(vmcb
->save
.es
), "LDTR");
250 print_vmcb_seg(&(vmcb
->save
.es
), "IDTR");
251 print_vmcb_seg(&(vmcb
->save
.es
), "TR");
252 printf("Reserved 1 : \n");
253 for(int i
=0; i
< 43; i
++) {
254 printf("%" PRIx8
"", vmcb
->save
.reserved_1
[i
]); /* Should be Zero */
257 printf("Current Processor Level : %" PRIx8
"\n", vmcb
->save
.cpl
);
258 printf("Reserved 2 : \n");
259 for(int i
=0; i
< 4; i
++) {
260 printf("%" PRIx8
"", vmcb
->save
.reserved_2
[i
]); /* Should be Zero */
263 printf("EFER : %" PRIx64
"\n", vmcb
->save
.efer
);
264 printf("Reserved 3 : \n");
265 for(int i
=0; i
< 112; i
++) {
266 printf("%" PRIx8
"", vmcb
->save
.reserved_3
[i
]); /* Should be Zero */
269 printf("Control Register 4 : %" PRIx64
"\n", vmcb
->save
.cr4
);
270 printf("Control Register 3 : %" PRIx64
"\n", vmcb
->save
.cr3
);
271 printf("Control Register 0 : %" PRIx64
"\n", vmcb
->save
.cr0
);
272 printf("Debug Register 7 : %" PRIx64
"\n", vmcb
->save
.dr7
);
273 printf("Debug Register 6 : %" PRIx64
"\n", vmcb
->save
.dr6
);
274 printf("RFlags : %" PRIx64
"\n", vmcb
->save
.rflags
);
275 printf("RIP : %" PRIx64
"\n", vmcb
->save
.rip
);
276 printf("Reserved 4 : \n");
277 for(int i
=0; i
< 88; i
++) {
278 printf("%" PRIx8
"", vmcb
->save
.reserved_4
[i
]); /* Should be Zero */
281 printf("RSP : %" PRIx64
"\n", vmcb
->save
.rsp
);
282 printf("Reserved 5 : \n");
283 for(int i
=0; i
< 24; i
++) {
284 printf("%" PRIx8
"", vmcb
->save
.reserved_5
[i
]); /* Should be Zero */
287 printf("RAX : %" PRIx64
"\n", vmcb
->save
.rax
);
288 printf("STAR : %" PRIx64
"\n", vmcb
->save
.star
);
289 printf("LSTAR : %" PRIx64
"\n", vmcb
->save
.lstar
);
290 printf("CSTAR : %" PRIx64
"\n", vmcb
->save
.cstar
);
291 printf("SFMASK : %" PRIx64
"\n", vmcb
->save
.sfmask
);
292 printf("Kernel GS Base : %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
293 printf("SYSENTER CS : %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
294 printf("SYSENTER ESP : %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
295 printf("SYSENTER EIP : %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
296 printf("Control Register 2 : %" PRIx64
"\n", vmcb
->save
.cr2
);
297 printf("Reserved 6 : \n");
298 for(int i
=0; i
< 32; i
++) {
299 printf("%" PRIx8
"", vmcb
->save
.reserved_6
[i
]); /* Should be Zero */
302 printf("Global PAT : %" PRIx64
"\n", vmcb
->save
.g_pat
);
303 printf("Debug Control : %" PRIx64
"\n", vmcb
->save
.dbg_ctl
);
304 printf("BR From : %" PRIx64
"\n", vmcb
->save
.br_from
);
305 printf("BR To : %" PRIx64
"\n", vmcb
->save
.br_to
);
306 printf("Last Exception From : %" PRIx64
"\n", vmcb
->save
.last_excp_from
);
307 printf("Last Exception To : %" PRIx64
"\n", vmcb
->save
.last_excp_to
);
314 print_tss_desc(struct system_segment_descriptor
*tss_desc
)
316 printf("TSS desc @ %p:\n", tss_desc
);
317 printf("sd_lolimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lolimit
);
318 printf("sd_lobase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lobase
);
319 printf("sd_type: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_type
);
320 printf("sd_dpl: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_dpl
);
321 printf("sd_p: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_p
);
322 printf("sd_hilimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hilimit
);
323 printf("sd_xx0: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx0
);
324 printf("sd_gran: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_gran
);
325 printf("sd_hibase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hibase
);
326 printf("sd_xx1: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx1
);
327 printf("sd_mbz: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_mbz
);
328 printf("sd_xx2: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx2
);
333 print_tss(struct system_segment_descriptor
*tss_desc
)
339 base
= (u_int32_t
*) ((((u_int64_t
) tss_desc
->sd_hibase
) << 24) | ((u_int64_t
) tss_desc
->sd_lobase
));
340 limit
= ((tss_desc
->sd_hilimit
<< 16) | tss_desc
->sd_lolimit
) / 4;
342 printf("TSS: @ %p\n", base
);
343 for (i
= 0; i
<= limit
; i
++)
344 printf("%x: 0x%" PRIx32
"\n", i
, base
[i
]);
350 print_vmcb_save_area(struct vmcb
*vmcb
)
352 printf("VMCB save area:\n");
353 printf(" cs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
354 vmcb
->save
.cs
.selector
,
355 vmcb
->save
.cs
.attrib
,
358 printf(" fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
359 vmcb
->save
.fs
.selector
,
360 vmcb
->save
.fs
.attrib
,
363 printf(" gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
364 vmcb
->save
.gs
.selector
,
365 vmcb
->save
.gs
.attrib
,
368 printf(" tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
369 vmcb
->save
.tr
.selector
,
370 vmcb
->save
.tr
.attrib
,
373 printf(" ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
374 vmcb
->save
.ldtr
.selector
,
375 vmcb
->save
.ldtr
.attrib
,
376 vmcb
->save
.ldtr
.limit
,
377 vmcb
->save
.ldtr
.base
);
378 printf(" rip: %" PRIx64
"\n", vmcb
->save
.rip
);
379 printf(" kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
380 printf(" star: %" PRIx64
"\n", vmcb
->save
.star
);
381 printf(" lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
382 printf(" cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
383 printf(" sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
384 printf(" sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
385 printf(" sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
386 printf(" sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
391 vmrun_assert(struct vmcb
*vmcb
)
393 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
399 // The following are illegal:
402 A((vmcb
->save
.efer
& 0x0000000000001000) == 0);
404 // CR0.CD is zero and CR0.NW is set
405 A( ((vmcb
->save
.cr0
& 0x0000000040000000) == 0) &&
406 ((vmcb
->save
.cr0
& 0x0000000020000000) != 0));
408 // CR0[63:32] are not zero.
409 A((vmcb
->save
.cr0
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
411 // Any MBZ bit of CR3 is set.
412 A((vmcb
->save
.cr3
& 0xFFF0000000000000) != 0);
414 // CR4[63:11] are not zero.
415 A((vmcb
->save
.cr4
& 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
417 // DR6[63:32] are not zero.
418 A((vmcb
->save
.dr6
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
420 // DR7[63:32] are not zero.
421 A((vmcb
->save
.dr7
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
423 // EFER[63:15] are not zero.
424 A((vmcb
->save
.efer
& 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
426 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
427 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
429 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
430 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
431 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
432 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0));
434 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
435 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
436 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
437 ((vmcb
->save
.cr0
& 0x0000000000000001) == 0));
439 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
440 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
441 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
442 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
443 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0) &&
444 ((vmcb
->save
.cs
.attrib
& 0x0200) != 0) &&
445 ((vmcb
->save
.cs
.attrib
& 0x0400) != 0));
447 // The VMRUN intercept bit is clear.
448 A((vmcb
->control
.intercepts
& 0x0000000100000000) == 0);
450 // The MSR or IOIO intercept tables extend to a physical address that is
451 // greater than or equal to the maximum supported physical address.
453 // Illegal event injection (see Section 15.19 on page 391).
455 // ASID is equal to zero.
456 A(vmcb
->control
.guest_asid
== 0);
458 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
459 // combination that is otherwise illegal (see Section 15.18).
461 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
462 // sign-extend to 63 bits) all base addresses in the segment registers
463 // that have been loaded.
471 fkvm_vcpu_run(struct vcpu
*vcpu
)
480 u_short ldt_selector
;
482 unsigned long host_cr2
;
483 unsigned long host_dr6
;
484 unsigned long host_dr7
;
486 struct system_segment_descriptor
*tss_desc
;
491 printf("begin fkvm_vcpu_run\n");
495 if (vmrun_assert(vmcb
))
498 tss_desc
= (struct system_segment_descriptor
*) (&gdt
[GPROC0_SEL
]);
499 sel
= GSEL(GPROC0_SEL
, SEL_KPL
);
501 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
502 // print_tss_desc(tss_desc);
503 // print_tss(tss_desc);
505 print_vmcb_save_area(vmcb
);
506 printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu
->regs
[VCPU_REGS_RIP
]);
509 vmcb
->save
.rax
= vcpu
->regs
[VCPU_REGS_RAX
];
510 vmcb
->save
.rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
511 vmcb
->save
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
513 /* meh: kvm has pre_svm_run(svm); */
515 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
516 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
520 ldt_selector
= rldt();
521 // printf("fs selector: %hx\n", fs_selector);
522 // printf("gs selector: %hx\n", gs_selector);
523 // printf("ldt selector: %hx\n", ldt_selector);
530 vmcb
->save
.cr2
= vcpu
->cr2
;
532 // TODO: something with apic_base?
534 /* meh: dr7? db_regs? */
536 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
537 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
538 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
539 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
541 star
= rdmsr(MSR_STAR
);
542 lstar
= rdmsr(MSR_LSTAR
);
543 cstar
= rdmsr(MSR_CSTAR
);
544 sfmask
= rdmsr(MSR_SF_MASK
);
548 __asm
__volatile (SVM_CLGI
);
555 "mov %c[rbx](%[svm]), %%rbx \n\t"
556 "mov %c[rcx](%[svm]), %%rcx \n\t"
557 "mov %c[rdx](%[svm]), %%rdx \n\t"
558 "mov %c[rsi](%[svm]), %%rsi \n\t"
559 "mov %c[rdi](%[svm]), %%rdi \n\t"
560 "mov %c[rbp](%[svm]), %%rbp \n\t"
561 "mov %c[r8](%[svm]), %%r8 \n\t"
562 "mov %c[r9](%[svm]), %%r9 \n\t"
563 "mov %c[r10](%[svm]), %%r10 \n\t"
564 "mov %c[r11](%[svm]), %%r11 \n\t"
565 "mov %c[r12](%[svm]), %%r12 \n\t"
566 "mov %c[r13](%[svm]), %%r13 \n\t"
567 "mov %c[r14](%[svm]), %%r14 \n\t"
568 "mov %c[r15](%[svm]), %%r15 \n\t"
570 /* Enter guest mode */
572 "mov %c[vmcb](%[svm]), %%rax \n\t"
578 /* Save guest registers, load host registers */
579 "mov %%rbx, %c[rbx](%[svm]) \n\t"
580 "mov %%rcx, %c[rcx](%[svm]) \n\t"
581 "mov %%rdx, %c[rdx](%[svm]) \n\t"
582 "mov %%rsi, %c[rsi](%[svm]) \n\t"
583 "mov %%rdi, %c[rdi](%[svm]) \n\t"
584 "mov %%rbp, %c[rbp](%[svm]) \n\t"
585 "mov %%r8, %c[r8](%[svm]) \n\t"
586 "mov %%r9, %c[r9](%[svm]) \n\t"
587 "mov %%r10, %c[r10](%[svm]) \n\t"
588 "mov %%r11, %c[r11](%[svm]) \n\t"
589 "mov %%r12, %c[r12](%[svm]) \n\t"
590 "mov %%r13, %c[r13](%[svm]) \n\t"
591 "mov %%r14, %c[r14](%[svm]) \n\t"
592 "mov %%r15, %c[r15](%[svm]) \n\t"
596 [vmcb
]"i"(offsetof(struct vcpu
, vmcb_pa
)),
597 [rbx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBX
])),
598 [rcx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RCX
])),
599 [rdx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDX
])),
600 [rsi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RSI
])),
601 [rdi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDI
])),
602 [rbp
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBP
])),
603 [r8
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R8
])),
604 [r9
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R9
])),
605 [r10
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R10
])),
606 [r11
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R11
])),
607 [r12
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R12
])),
608 [r13
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R13
])),
609 [r14
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R14
])),
610 [r15
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R15
]))
612 "rbx", "rcx", "rdx", "rsi", "rdi",
613 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
617 /* meh: dr7? db_regs? */
619 vcpu
->cr2
= vmcb
->save
.cr2
;
621 vcpu
->regs
[VCPU_REGS_RAX
] = vmcb
->save
.rax
;
622 vcpu
->regs
[VCPU_REGS_RSP
] = vmcb
->save
.rsp
;
623 vcpu
->regs
[VCPU_REGS_RIP
] = vmcb
->save
.rip
;
630 load_fs(fs_selector
);
631 load_gs(gs_selector
);
634 wrmsr(MSR_GSBASE
, vcpu
->host_gs_base
);
636 tss_desc
->sd_type
= SDT_SYSTSS
;
639 wrmsr(MSR_STAR
, star
);
640 wrmsr(MSR_LSTAR
, lstar
);
641 wrmsr(MSR_CSTAR
, cstar
);
642 wrmsr(MSR_SF_MASK
, sfmask
);
646 __asm
__volatile (SVM_STGI
);
650 printf("exit_code: %" PRIx64
"\n", vmcb
->control
.exit_code
);
652 // print_tss_desc(tss_desc);
653 // print_tss(tss_desc);
655 print_vmcb_save_area(vmcb
);
663 _fkvm_init_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
666 seg
->attrib
= VMCB_SELECTOR_P_MASK
| attrib
;
672 fkvm_init_seg(struct vmcb_seg
*seg
)
674 _fkvm_init_seg(seg
, VMCB_SELECTOR_S_MASK
| VMCB_SELECTOR_WRITE_MASK
);
678 fkvm_init_sys_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
680 _fkvm_init_seg(seg
, attrib
);
684 fkvm_iopm_alloc(void)
686 return contigmalloc(IOPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
690 fkvm_iopm_init(void *iopm
)
692 memset(iopm
, 0xff, IOPM_SIZE
); /* TODO: we may want to allow access to PC debug port */
696 fkvm_iopm_free(void *iopm
)
698 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
702 fkvm_msrpm_alloc(void)
704 return contigmalloc(MSRPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
708 fkvm_msrpm_init(void *msrpm
)
710 memset(msrpm
, 0xff, MSRPM_SIZE
); /* TODO: we may want to allow some MSR accesses */
714 fkvm_msrpm_free(void *msrpm
)
716 contigfree(msrpm
, MSRPM_SIZE
, M_DEVBUF
);
720 fkvm_hsave_area_alloc(void)
722 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
726 fkvm_hsave_area_init(void *hsave_area
)
731 fkvm_hsave_area_free(void *hsave_area
)
733 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
736 static struct vmspace
*
737 fkvm_make_vmspace(void)
741 sp
= vmspace_alloc(0, 0xffffffffffffffff);
743 printf("vmspace_alloc failed\n");
751 fkvm_destroy_vmspace(struct vmspace
* sp
)
757 fkvm_vmcb_alloc(void)
759 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL,
764 fkvm_vmcb_init(struct vmcb
*vmcb
)
766 struct vmcb_control_area
*control
= &vmcb
->control
;
767 struct vmcb_save_area
*save
= &vmcb
->save
;
769 control
->intercept_cr_reads
= INTERCEPT_CR4_MASK
;
771 control
->intercept_cr_writes
= INTERCEPT_CR4_MASK
|
774 control
->intercept_dr_reads
= INTERCEPT_DR0_MASK
|
779 control
->intercept_dr_writes
= INTERCEPT_DR0_MASK
|
786 control
->intercept_exceptions
= (1 << IDT_UD
) | // Invalid Opcode
787 (1 << IDT_MC
); // Machine Check
789 control
->intercepts
= INTERCEPT_INTR
|
796 INTERCEPT_IOIO_PROT
|
808 INTERCEPT_MWAIT_UNCOND
;
810 control
->iopm_base_pa
= vtophys(iopm
);
811 control
->msrpm_base_pa
= vtophys(msrpm
);
812 control
->tsc_offset
= 0;
814 /* TODO: remove this once we assign asid's to distinct VM's */
815 control
->guest_asid
= 1;
816 control
->tlb_control
= VMCB_TLB_CONTROL_FLUSH_ALL
;
818 /* let v_tpr default to 0 */
819 /* let v_irq default to 0 */
820 /* let v_intr default to 0 */
822 control
->v_intr_masking
= 1;
824 /* let v_intr_vector default to 0 */
825 /* let intr_shadow default to 0 */
826 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
827 exit_int_info_err_code default to 0 */
829 control
->nested_ctl
= 1;
831 /* let event_inj default to 0 */
833 // (nested_cr3 is later)
835 /* let lbr_virt_enable default to 0 */
838 fkvm_init_seg(&save
->ds
);
839 fkvm_init_seg(&save
->es
);
840 fkvm_init_seg(&save
->fs
);
841 fkvm_init_seg(&save
->gs
);
842 fkvm_init_seg(&save
->ss
);
844 _fkvm_init_seg(&save
->cs
, VMCB_SELECTOR_READ_MASK
| VMCB_SELECTOR_S_MASK
|
845 VMCB_SELECTOR_CODE_MASK
);
846 save
->cs
.selector
= 0xf000;
847 save
->cs
.base
= 0xffff0000;
849 save
->gdtr
.limit
= 0xffff;
850 save
->idtr
.limit
= 0xffff;
852 fkvm_init_sys_seg(&save
->ldtr
, SDT_SYSLDT
);
853 fkvm_init_sys_seg(&save
->tr
, SDT_SYS286BSY
);
855 save
->g_pat
= PAT_VALUE(PAT_WRITE_BACK
, 0) | PAT_VALUE(PAT_WRITE_THROUGH
, 1) |
856 PAT_VALUE(PAT_UNCACHED
, 2) | PAT_VALUE(PAT_UNCACHEABLE
, 3) |
857 PAT_VALUE(PAT_WRITE_BACK
, 4) | PAT_VALUE(PAT_WRITE_THROUGH
, 5) |
858 PAT_VALUE(PAT_UNCACHED
, 6) | PAT_VALUE(PAT_UNCACHEABLE
, 7);
860 /* CR0 = 6000_0010h at boot */
861 save
->cr0
= CR0_ET
| CR0_NW
| CR0_CD
;
862 save
->dr6
= 0xffff0ff0;
865 save
->rip
= 0x0000fff0;
867 save
->efer
= EFER_SVME
;
871 fkvm_vmcb_free(struct vmcb
*vmcb
)
873 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
877 fkvm_vcpu_create(struct guestvm
*guest_vm
)
880 vcpu
= malloc(sizeof(struct vcpu
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
882 vcpu
->vmcb
= fkvm_vmcb_alloc();
883 vcpu
->vmcb_pa
= vtophys(vcpu
->vmcb
);
884 printf("vmcb = 0x%p\n", vcpu
->vmcb
);
885 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu
->vmcb_pa
);
887 fkvm_vmcb_init(vcpu
->vmcb
);
888 vcpu
->vmcb
->control
.nested_cr3
= guest_vm
->nested_cr3
;
889 vcpu
->regs
[VCPU_REGS_RIP
] = vcpu
->vmcb
->save
.rip
;
891 vcpu
->guest_vm
= guest_vm
;
897 fkvm_vcpu_destroy(struct vcpu
*vcpu
)
899 fkvm_vmcb_free(vcpu
->vmcb
);
900 free(vcpu
, M_DEVBUF
);
903 static struct guestvm
*
904 fkvm_guestvm_alloc(void)
906 return malloc(sizeof(struct guestvm
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
910 fkvm_guestvm_free(struct guestvm
* guest_vm
)
912 free(guest_vm
, M_DEVBUF
);
916 fkvm_guestvm_add_vcpu(struct guestvm
*guest_vm
, struct vcpu
*vcpu
)
918 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = vcpu
;
919 guest_vm
->nr_vcpus
++; /* TODO: Probably not safe to increment */
920 /* How about a lock to protect all of this? */
925 fkvm_userpoke(struct thread
*td
, struct fkvm_userpoke_args
*uap
)
927 printf("fkvm_userpoke\n");
936 fkvm_mem_has_entry(vm_map_entry_t expected_entry
, vm_map_t vm_map
, vm_offset_t vaddr
)
938 vm_map_entry_t lookup_entry
;
939 vm_object_t throwaway_object
;
940 vm_pindex_t throwaway_pindex
;
941 vm_prot_t throwaway_prot
;
942 boolean_t throwaway_wired
;
945 error
= vm_map_lookup(&vm_map
, /* IN/OUT */
947 VM_PROT_READ
|VM_PROT_WRITE
,
948 &lookup_entry
, /* OUT */
949 &throwaway_object
, /* OUT */
950 &throwaway_pindex
, /* OUT */
951 &throwaway_prot
, /* OUT */
952 &throwaway_wired
); /* OUT */
953 if (error
!= KERN_SUCCESS
)
955 vm_map_lookup_done(vm_map
, lookup_entry
);
956 return (lookup_entry
== expected_entry
);
960 fkvm_guest_check_range(struct guestvm
*guest_vm
, uint64_t start
, uint64_t end
)
962 vm_map_t guest_vm_map
;
963 vm_map_entry_t lookup_entry
;
964 vm_object_t throwaway_object
;
965 vm_pindex_t throwaway_pindex
;
966 vm_prot_t throwaway_prot
;
967 boolean_t throwaway_wired
;
971 guest_vm_map
= &guest_vm
->sp
->vm_map
;
973 error
= vm_map_lookup(&guest_vm_map
, /* IN/OUT */
975 VM_PROT_READ
|VM_PROT_WRITE
,
976 &lookup_entry
, /* OUT */
977 &throwaway_object
, /* OUT */
978 &throwaway_pindex
, /* OUT */
979 &throwaway_prot
, /* OUT */
980 &throwaway_wired
); /* OUT */
981 if (error
!= KERN_SUCCESS
)
983 vm_map_lookup_done(guest_vm_map
, lookup_entry
);
986 TODO: We can't actually nest the lookups:
987 panic: _sx_xlock_hard: recursed on non-recursive sx user map @ ../../../vm/vm_map.c:3115
988 Therefore, I've moved the lookup_done above for now, but we really need a lock here.
990 Maybe it's better to use vm_map_lookup_entry directly.
994 if (fkvm_mem_has_entry(lookup_entry
, guest_vm_map
, end
))
1003 fkvm_get_regs_regs(struct vcpu
*vcpu
, struct kvm_regs
*out
)
1005 out
->rax
= vcpu
->regs
[VCPU_REGS_RAX
];
1006 out
->rbx
= vcpu
->regs
[VCPU_REGS_RBX
];
1007 out
->rcx
= vcpu
->regs
[VCPU_REGS_RCX
];
1008 out
->rdx
= vcpu
->regs
[VCPU_REGS_RDX
];
1009 out
->rsi
= vcpu
->regs
[VCPU_REGS_RSI
];
1010 out
->rdi
= vcpu
->regs
[VCPU_REGS_RDI
];
1011 out
->rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
1012 out
->rbp
= vcpu
->regs
[VCPU_REGS_RBP
];
1013 out
->r8
= vcpu
->regs
[VCPU_REGS_R8
];
1014 out
->r9
= vcpu
->regs
[VCPU_REGS_R9
];
1015 out
->r10
= vcpu
->regs
[VCPU_REGS_R10
];
1016 out
->r11
= vcpu
->regs
[VCPU_REGS_R11
];
1017 out
->r12
= vcpu
->regs
[VCPU_REGS_R12
];
1018 out
->r13
= vcpu
->regs
[VCPU_REGS_R13
];
1019 out
->r14
= vcpu
->regs
[VCPU_REGS_R14
];
1020 out
->r15
= vcpu
->regs
[VCPU_REGS_R15
];
1021 out
->rip
= vcpu
->regs
[VCPU_REGS_RIP
];
1022 out
->rflags
= vcpu
->vmcb
->save
.rflags
;
1026 fkvm_set_regs_regs(struct vcpu
*vcpu
, const struct kvm_regs
*in
)
1028 vcpu
->regs
[VCPU_REGS_RAX
] = in
->rax
;
1029 vcpu
->regs
[VCPU_REGS_RBX
] = in
->rbx
;
1030 vcpu
->regs
[VCPU_REGS_RCX
] = in
->rcx
;
1031 vcpu
->regs
[VCPU_REGS_RDX
] = in
->rdx
;
1032 vcpu
->regs
[VCPU_REGS_RSI
] = in
->rsi
;
1033 vcpu
->regs
[VCPU_REGS_RDI
] = in
->rdi
;
1034 vcpu
->regs
[VCPU_REGS_RSP
] = in
->rsp
;
1035 vcpu
->regs
[VCPU_REGS_RBP
] = in
->rbp
;
1036 vcpu
->regs
[VCPU_REGS_R8
] = in
->r8
;
1037 vcpu
->regs
[VCPU_REGS_R9
] = in
->r9
;
1038 vcpu
->regs
[VCPU_REGS_R10
] = in
->r10
;
1039 vcpu
->regs
[VCPU_REGS_R11
] = in
->r11
;
1040 vcpu
->regs
[VCPU_REGS_R12
] = in
->r12
;
1041 vcpu
->regs
[VCPU_REGS_R13
] = in
->r13
;
1042 vcpu
->regs
[VCPU_REGS_R14
] = in
->r14
;
1043 vcpu
->regs
[VCPU_REGS_R15
] = in
->r15
;
1044 vcpu
->regs
[VCPU_REGS_RIP
] = in
->rip
;
1045 vcpu
->vmcb
->save
.rflags
= in
->rflags
;
1049 fkvm_set_vmcb_dtable(struct vmcb_seg
*vmcb_seg
, struct kvm_dtable
*fkvm_dtable
)
1051 vmcb_seg
->base
= fkvm_dtable
->base
;
1052 vmcb_seg
->limit
= fkvm_dtable
->limit
;
1056 fkvm_set_vmcb_seg(struct vmcb_seg
*vmcb_seg
, struct kvm_segment
*fkvm_seg
)
1058 vmcb_seg
->base
= fkvm_seg
->base
;
1059 vmcb_seg
->limit
= fkvm_seg
->limit
;
1060 vmcb_seg
->selector
= fkvm_seg
->selector
;
1061 if (fkvm_seg
->unusable
)
1064 vmcb_seg
->attrib
= (fkvm_seg
->type
& VMCB_SELECTOR_TYPE_MASK
);
1065 vmcb_seg
->attrib
|= (fkvm_seg
->s
& 1) << VMCB_SELECTOR_S_SHIFT
;
1066 vmcb_seg
->attrib
|= (fkvm_seg
->dpl
& 3) << VMCB_SELECTOR_DPL_SHIFT
;
1067 vmcb_seg
->attrib
|= (fkvm_seg
->present
& 1) << VMCB_SELECTOR_P_SHIFT
;
1068 vmcb_seg
->attrib
|= (fkvm_seg
->avl
& 1) << VMCB_SELECTOR_AVL_SHIFT
;
1069 vmcb_seg
->attrib
|= (fkvm_seg
->l
& 1) << VMCB_SELECTOR_L_SHIFT
;
1070 vmcb_seg
->attrib
|= (fkvm_seg
->db
& 1) << VMCB_SELECTOR_DB_SHIFT
;
1071 vmcb_seg
->attrib
|= (fkvm_seg
->g
& 1) << VMCB_SELECTOR_G_SHIFT
;
1076 fkvm_set_cr8(struct vcpu
*vcpu
, uint64_t cr8
)
1078 // TODO: if cr8 has reserved bits inject GP Fault, return
1080 vcpu
->vmcb
->control
.v_tpr
= (uint8_t) cr8
;
1084 fkvm_set_efer(struct vcpu
*vcpu
, uint64_t efer
)
1086 struct vmcb
*vmcb
= vcpu
->vmcb
;
1087 //if efer has reserved bits set: inject GP Fault
1089 if (vmcb
->save
.cr0
& CR0_PG
) { //If paging is enabled do not allow changes to LME
1090 if ((vmcb
->save
.efer
& EFER_LME
) != (efer
& EFER_LME
)) {
1091 printf("fkvm_set_efer: attempt to change LME while paging\n");
1096 vmcb
->save
.efer
= efer
| EFER_SVME
;
1100 fkvm_get_regs_sregs(struct vcpu
*vcpu
, struct kvm_sregs
*out
)
1106 fkvm_set_regs_sregs(struct vcpu
*vcpu
, struct kvm_sregs
*in
)
1108 struct vmcb
*vmcb
= vcpu
->vmcb
;
1110 fkvm_set_vmcb_seg(&vmcb
->save
.cs
, &in
->cs
);
1111 fkvm_set_vmcb_seg(&vmcb
->save
.ds
, &in
->ds
);
1112 fkvm_set_vmcb_seg(&vmcb
->save
.es
, &in
->es
);
1113 fkvm_set_vmcb_seg(&vmcb
->save
.fs
, &in
->fs
);
1114 fkvm_set_vmcb_seg(&vmcb
->save
.gs
, &in
->gs
);
1115 fkvm_set_vmcb_seg(&vmcb
->save
.ss
, &in
->ss
);
1116 fkvm_set_vmcb_seg(&vmcb
->save
.tr
, &in
->tr
);
1117 fkvm_set_vmcb_seg(&vmcb
->save
.ldtr
, &in
->ldt
);
1119 vmcb
->save
.cpl
= (vmcb
->save
.cs
.attrib
>> VMCB_SELECTOR_DPL_SHIFT
) & 3;
1121 fkvm_set_vmcb_dtable(&vmcb
->save
.idtr
, &in
->idt
);
1122 fkvm_set_vmcb_dtable(&vmcb
->save
.gdtr
, &in
->gdt
);
1124 vcpu
->cr2
= in
->cr2
;
1125 vcpu
->cr3
= in
->cr3
;
1127 fkvm_set_cr8(vcpu
, in
->cr8
);
1128 fkvm_set_efer(vcpu
, in
->efer
);
1129 /* TODO: apic_base */
1130 vmcb
->save
.cr0
= in
->cr0
;
1131 vmcb
->save
.cr4
= in
->cr4
;
1132 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1136 fkvm_get_regs_msrs(struct vcpu
*vcpu
, uint32_t nmsrs
, struct kvm_msr_entry
*entries
) {
1141 fkvm_set_reg_msr(struct vcpu
*vcpu
, uint32_t index
, uint64_t data
) {
1142 struct vmcb
*vmcb
= vcpu
->vmcb
;
1150 vmcb
->control
.tsc_offset
= data
- tsc
;
1155 vmcb
->save
.star
= data
;
1160 vmcb
->save
.lstar
= data
;
1165 vmcb
->save
.cstar
= data
;
1170 vmcb
->save
.kernel_gs_base
= data
;
1175 vmcb
->save
.sfmask
= data
;
1179 case MSR_SYSENTER_CS_MSR
: {
1180 vmcb
->save
.sysenter_cs
= data
;
1184 case MSR_SYSENTER_EIP_MSR
: {
1185 vmcb
->save
.sysenter_eip
= data
;
1189 case MSR_SYSENTER_ESP_MSR
: {
1190 vmcb
->save
.sysenter_esp
= data
;
1194 case MSR_DEBUGCTLMSR
: {
1195 printf("unimplemented at %d\n", __LINE__
);
1199 case MSR_PERFEVSEL0
... MSR_PERFEVSEL3
:
1200 case MSR_PERFCTR0
... MSR_PERFCTR3
: {
1201 printf("unimplemented at %d\n", __LINE__
);
1206 fkvm_set_efer(vcpu
, data
);
1210 case MSR_MC0_STATUS
: {
1211 printf("unimplemented at %d\n", __LINE__
);
1215 case MSR_MCG_STATUS
: {
1216 printf("unimplemented at %d\n", __LINE__
);
1221 printf("unimplemented at %d\n", __LINE__
);
1225 //TODO: MSR_IA32_UCODE_REV
1226 //TODO: MSR_IA32_UCODE_WRITE
1227 //TODO: 0x200 ... 0x2ff: set_msr_mtrr
1229 case MSR_APICBASE
: {
1230 printf("unimplemented at %d\n", __LINE__
);
1234 case MSR_IA32_MISC_ENABLE
: {
1235 printf("unimplemented at %d\n", __LINE__
);
1239 //TODO: MSR_KVM_WALL_CLOCK
1240 //TODO: MSR_KVM_SYSTEM_TIME
1243 printf("Did not set unimplemented msr: 0x%" PRIx32
"\n", index
);
1248 fkvm_set_regs_msrs(struct vcpu
*vcpu
, uint32_t nmsrs
, struct kvm_msr_entry
*entries
) {
1251 for (i
= 0; i
< nmsrs
; i
++) {
1252 fkvm_set_reg_msr(vcpu
, entries
[i
].index
, entries
[i
].data
);
1259 fkvm_get_regs(struct thread
*td
, struct fkvm_get_regs_args
*uap
)
1267 vcpu
= TD_GET_VCPU(td
);
1271 switch (uap
->type
) {
1273 case FKVM_REGS_TYPE_REGS
: {
1274 struct kvm_regs out
;
1275 fkvm_get_regs_regs(vcpu
, &out
);
1276 return copyout(&out
, uap
->regs
, sizeof(out
));
1279 case FKVM_REGS_TYPE_SREGS
: {
1280 struct kvm_sregs out
;
1281 fkvm_get_regs_sregs(vcpu
, &out
);
1282 return copyout(&out
, uap
->regs
, sizeof(out
));
1285 case FKVM_REGS_TYPE_MSRS
: {
1286 struct kvm_msrs out
;
1287 struct kvm_msrs
*user_msrs
;
1288 struct kvm_msr_entry
*entries
;
1291 user_msrs
= (struct kvm_msrs
*)uap
->regs
;
1293 error
= copyin(uap
->regs
, &out
, sizeof(out
));
1297 size
= sizeof(*entries
) * out
.nmsrs
;
1298 entries
= malloc(size
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
1299 if (entries
== NULL
)
1302 error
= copyin(user_msrs
->entries
, entries
, size
);
1304 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1305 free(entries
, M_DEVBUF
);
1309 fkvm_get_regs_msrs(vcpu
, out
.nmsrs
, entries
);
1311 error
= copyout(user_msrs
->entries
, entries
, size
);
1313 printf("FKVM_REGS_TYPE_MSRS: unable to copyout entries\n");
1316 free(entries
, M_DEVBUF
);
1326 fkvm_set_regs(struct thread
*td
, struct fkvm_set_regs_args
*uap
)
1331 vcpu
= TD_GET_VCPU(td
);
1335 switch (uap
->type
) {
1337 case FKVM_REGS_TYPE_REGS
: {
1339 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1342 fkvm_set_regs_regs(vcpu
, &in
);
1346 case FKVM_REGS_TYPE_SREGS
: {
1347 struct kvm_sregs in
;
1348 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1351 fkvm_set_regs_sregs(vcpu
, &in
);
1355 case FKVM_REGS_TYPE_MSRS
: {
1357 struct kvm_msrs
*user_msrs
;
1358 struct kvm_msr_entry
*entries
;
1361 user_msrs
= (struct kvm_msrs
*)uap
->regs
;
1363 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1367 size
= sizeof(*entries
) * in
.nmsrs
;
1368 entries
= malloc(size
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
1369 if (entries
== NULL
)
1372 error
= copyin(user_msrs
->entries
, entries
, size
);
1374 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1375 free(entries
, M_DEVBUF
);
1379 fkvm_set_regs_msrs(vcpu
, in
.nmsrs
, entries
);
1381 free(entries
, M_DEVBUF
);
1390 /* This function can only be called with multiples of page sizes */
1391 /* vaddr as NULL overloads to fkvm_guest_check_range */
1393 fkvm_set_user_mem_region(struct thread
*td
, struct fkvm_set_user_mem_region_args
*uap
)
1395 struct guestvm
*guest_vm
;
1400 struct vmspace
*user_vm_space
;
1401 vm_map_t user_vm_map
;
1403 vm_object_t vm_object
;
1404 vm_pindex_t vm_object_pindex
;
1405 vm_ooffset_t vm_object_offset
;
1406 vm_prot_t throwaway_prot
;
1407 boolean_t throwaway_wired
;
1408 vm_map_entry_t lookup_entry
;
1412 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1413 if (guest_vm
== NULL
) {
1414 printf("PROC_GET_GUESTVM -> NULL\n");
1418 start
= uap
->guest_pa
;
1419 end
= uap
->guest_pa
+ uap
->size
- 1;
1420 printf("start: 0x%" PRIx64
" bytes\n", start
);
1421 printf("end: 0x%" PRIx64
" bytes\n", end
);
1423 if (uap
->vaddr
== 0)
1424 return fkvm_guest_check_range(guest_vm
, start
, end
);
1426 user_vm_space
= td
->td_proc
->p_vmspace
;
1427 user_vm_map
= &user_vm_space
->vm_map
;
1428 printf("user vm space: %p\n", user_vm_space
);
1429 printf("user vm map: %p\n", user_vm_map
);
1431 error
= vm_map_lookup(&user_vm_map
, /* IN/OUT */
1433 VM_PROT_READ
|VM_PROT_WRITE
,
1434 &lookup_entry
, /* OUT */
1435 &vm_object
, /* OUT */
1436 &vm_object_pindex
, /* OUT */
1437 &throwaway_prot
, /* OUT */
1438 &throwaway_wired
); /* OUT */
1439 if (error
!= KERN_SUCCESS
) {
1440 printf("vm_map_lookup failed: %d\n", error
);
1444 /* TODO: Trust the user that the full region is valid.
1445 * This is very bad. See the note in fkvm_guest_check_range
1446 * on nesting vm lookups. */
1448 if (!fkvm_mem_has_entry(lookup_entry
, user_vm_map
, uap
->vaddr
+ uap
->size
)) {
1449 printf("end of range not contained in same vm map entry as start\n");
1454 printf("vm object: %p\n", vm_object
);
1455 printf(" size: %d pages\n", (int) vm_object
->size
);
1457 vm_object_offset
= IDX_TO_OFF(vm_object_pindex
);
1458 printf("vm_ooffset: 0x%" PRIx64
"\n", vm_object_offset
);
1460 vm_object_reference(vm_object
); // TODO: this might be a mem leak
1462 vm_map_lookup_done(user_vm_map
, lookup_entry
);
1464 error
= vm_map_insert(&guest_vm
->sp
->vm_map
,
1469 VM_PROT_ALL
, VM_PROT_ALL
,
1471 if (error
!= KERN_SUCCESS
) {
1472 printf("vm_map_insert failed: %d\n", error
);
1474 case KERN_INVALID_ADDRESS
:
1487 fkvm_unset_user_mem_region(struct thread
*td
, struct fkvm_unset_user_mem_region_args
*uap
)
1489 struct guestvm
*guest_vm
;
1494 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1495 if (guest_vm
== NULL
) {
1496 printf("PROC_GET_GUESTVM -> NULL\n");
1503 vm_map_t guest_vm_map
;
1507 start
= uap
->guest_pa
;
1508 end
= uap
->guest_pa
+ uap
->size
- 1;
1509 printf("start: 0x%" PRIx64
" bytes\n", start
);
1510 printf("end: 0x%" PRIx64
" bytes\n", end
);
1512 guest_vm_map
= &guest_vm
->sp
->vm_map
;
1514 error
= vm_map_remove(guest_vm_map
, start
, end
);
1515 if (error
!= KERN_SUCCESS
)
1522 fkvm_create_vm(struct thread
*td
, struct fkvm_create_vm_args
*uap
)
1524 struct guestvm
*guest_vm
;
1526 printf("SYSCALL : fkvm_create_vm\n");
1531 /* Allocate Guest VM */
1532 guest_vm
= fkvm_guestvm_alloc();
1534 /* Set up the vm address space */
1535 guest_vm
->sp
= fkvm_make_vmspace();
1536 if (guest_vm
->sp
== NULL
) {
1537 fkvm_guestvm_free(guest_vm
);
1540 guest_vm
->nested_cr3
= vtophys(vmspace_pmap(guest_vm
->sp
)->pm_pml4
);
1543 printf(" vm space: %p\n", guest_vm
->sp
);
1544 printf(" vm map: %p\n", &guest_vm
->sp
->vm_map
);
1545 printf(" ncr3: 0x%" PRIx64
"\n", guest_vm
->nested_cr3
);
1547 PROC_SET_GUESTVM(td
->td_proc
, guest_vm
);
1549 printf("fkvm_create_vm done\n");
1554 fkvm_destroy_vm(struct guestvm
*guest_vm
)
1556 /* Destroy the VCPUs */
1557 while (guest_vm
->nr_vcpus
> 0) {
1558 guest_vm
->nr_vcpus
--;
1559 fkvm_vcpu_destroy(guest_vm
->vcpus
[guest_vm
->nr_vcpus
]);
1560 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = NULL
;
1563 /* Destroy the vmspace */
1564 if (guest_vm
->sp
!= NULL
)
1565 fkvm_destroy_vmspace(guest_vm
->sp
);
1567 /* Destroy the Guest VM itself */
1568 fkvm_guestvm_free(guest_vm
);
1572 intercept_ioio(struct vcpu
*vcpu
, struct kvm_run
*kvm_run
, uint64_t ioio_info
, uint64_t rip
)
1574 struct vmcb
*vmcb
= vcpu
->vmcb
;
1578 str
= (ioio_info
& STR_MASK
) >> STR_SHIFT
;
1580 printf("%s operation requested, not yet implemented, \n",
1581 kvm_run
->u
.io
.in
? "INS" : "OUTS");
1585 kvm_run
->u
.io
.port
= ioio_info
>> PORT_SHIFT
;
1586 kvm_run
->u
.io
.in
= ioio_info
& TYPE_MASK
;
1588 kvm_run
->u
.io
.size
= (ioio_info
& SIZE_MASK
) >> SIZE_SHIFT
;
1590 kvm_run
->u
.io
.data_offset
= PAGE_SIZE
;
1591 kvm_run
->u
.io
.pio_data
= vcpu
->regs
[VCPU_REGS_RAX
];
1593 /* We need to remove the Interrupt Shadow Flag from the VMCB (see 15.20.5 in AMD_Vol2) */
1594 vmcb
->control
.intr_shadow
= 0;
1596 //rep = (ioio_info & REP_MASK) >> REP_SHIFT;
1597 /* TODO: Research more into Direction Flag checked in KVM; DF bit in RFLAGS */
1599 /* set the next rip in the VMCB save area for now */
1600 /* TODO: Store rIP in vm_run structure until we absolutely need it */
1601 vcpu
->regs
[VCPU_REGS_RIP
] = rip
;
1607 intercept_shutdown(struct vcpu
*vcpu
)
1609 struct vmcb
*vmcb
= vcpu
->vmcb
;
1610 memset(vmcb
, 0, PAGE_SIZE
);
1611 fkvm_vmcb_init(vmcb
);
1615 fkvm_vm_run(struct thread
*td
, struct fkvm_vm_run_args
*uap
)
1618 struct guestvm
*guest_vm
;
1622 struct kvm_run kvm_run
;
1627 vcpu
= TD_GET_VCPU(td
);
1631 guest_vm
= vcpu
->guest_vm
;
1634 error
= copyin(uap
->run
, &kvm_run
, sizeof(struct kvm_run
));
1638 fkvm_set_cr8(vcpu
, kvm_run
.cr8
);
1640 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
1642 while(kvm_run
.exit_reason
== KVM_EXIT_CONTINUE
) {
1643 fkvm_vcpu_run(vcpu
);
1645 switch (vmcb
->control
.exit_code
) {
1647 case VMCB_EXIT_EXCP_BASE
... (VMCB_EXIT_EXCP_BASE
+ 31): {
1650 excp_vector
= vmcb
->control
.exit_code
- VMCB_EXIT_EXCP_BASE
;
1652 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
1658 case VMCB_EXIT_INTR
: {
1659 printf("VMCB_EXIT_INTR - nothing to do\n");
1660 /* Handled by host OS already */
1661 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
1665 case VMCB_EXIT_NPF
: {
1666 /* EXITINFO1 contains fault error code */
1667 /* EXITINFO2 contains the guest physical address causing the fault. */
1669 u_int64_t fault_code
;
1670 u_int64_t fault_gpa
;
1672 vm_prot_t fault_type
;
1676 fault_code
= vmcb
->control
.exit_info_1
;
1677 fault_gpa
= vmcb
->control
.exit_info_2
;
1678 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
1680 printf("VMCB_EXIT_NPF:\n");
1681 printf("gpa=0x%" PRIx64
"\n", fault_gpa
);
1682 printf("fault code=0x%" PRIx64
" [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
1684 (fault_code
& PGEX_P
) != 0,
1685 (fault_code
& PGEX_W
) != 0,
1686 (fault_code
& PGEX_U
) != 0,
1687 (fault_code
& PGEX_I
) != 0);
1689 if (fault_code
& PGEX_W
)
1690 fault_type
= VM_PROT_WRITE
;
1691 else if (fault_code
& PGEX_I
)
1692 fault_type
= VM_PROT_EXECUTE
;
1694 fault_type
= VM_PROT_READ
;
1696 fault_flags
= 0; /* TODO: is that right? */
1697 rc
= vm_fault(&guest_vm
->sp
->vm_map
, (fault_gpa
& (~PAGE_MASK
)), fault_type
, fault_flags
);
1698 if (rc
!= KERN_SUCCESS
) {
1699 printf("vm_fault failed: %d\n", rc
);
1700 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
1705 case VMCB_EXIT_WRITE_CR8
:
1706 kvm_run
.exit_reason
= KVM_EXIT_SET_TPR
;
1709 kvm_run
.exit_reason
= KVM_EXIT_NMI
;
1712 kvm_run
.exit_reason
= KVM_EXIT_HLT
;
1714 case VMCB_EXIT_SHUTDOWN
:
1715 intercept_shutdown(vcpu
);
1716 kvm_run
.exit_reason
= KVM_EXIT_SHUTDOWN
;
1718 case VMCB_EXIT_IOIO
:
1719 intercept_ioio(vcpu
, &kvm_run
,
1720 vmcb
->control
.exit_info_1
,
1721 vmcb
->control
.exit_info_2
);
1722 kvm_run
.exit_reason
= KVM_EXIT_IO
;
1724 case VMCB_EXIT_READ_CR0
:
1725 case VMCB_EXIT_READ_CR3
:
1726 case VMCB_EXIT_READ_CR4
:
1727 case VMCB_EXIT_READ_CR8
:
1728 case VMCB_EXIT_WRITE_CR0
:
1729 case VMCB_EXIT_WRITE_CR3
:
1730 case VMCB_EXIT_WRITE_CR4
:
1731 case VMCB_EXIT_READ_DR0
:
1732 case VMCB_EXIT_READ_DR1
:
1733 case VMCB_EXIT_READ_DR2
:
1734 case VMCB_EXIT_READ_DR3
:
1735 case VMCB_EXIT_WRITE_DR0
:
1736 case VMCB_EXIT_WRITE_DR1
:
1737 case VMCB_EXIT_WRITE_DR2
:
1738 case VMCB_EXIT_WRITE_DR3
:
1739 case VMCB_EXIT_WRITE_DR5
:
1740 case VMCB_EXIT_WRITE_DR7
:
1742 case VMCB_EXIT_INIT
:
1743 case VMCB_EXIT_VINTR
:
1744 case VMCB_EXIT_CR0_SEL_WRITE
:
1745 case VMCB_EXIT_CPUID
:
1746 case VMCB_EXIT_INVD
:
1747 case VMCB_EXIT_INVLPG
:
1748 case VMCB_EXIT_INVLPGA
:
1750 case VMCB_EXIT_TASK_SWITCH
:
1751 case VMCB_EXIT_VMRUN
:
1752 case VMCB_EXIT_VMMCALL
:
1753 case VMCB_EXIT_VMLOAD
:
1754 case VMCB_EXIT_VMSAVE
:
1755 case VMCB_EXIT_STGI
:
1756 case VMCB_EXIT_CLGI
:
1757 case VMCB_EXIT_SKINIT
:
1758 case VMCB_EXIT_WBINVD
:
1759 case VMCB_EXIT_MONITOR
:
1760 case VMCB_EXIT_MWAIT_UNCOND
:
1762 printf("Unhandled vmexit:\n"
1763 " code: 0x%" PRIx64
"\n"
1764 " info1: 0x%" PRIx64
"\n"
1765 " info2: 0x%" PRIx64
"\n",
1766 vmcb
->control
.exit_code
,
1767 vmcb
->control
.exit_info_1
,
1768 vmcb
->control
.exit_info_2
);
1771 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
1777 /* TODO: check copyout ret val */
1778 copyout(&kvm_run
, uap
->run
, sizeof(struct kvm_run
));
1779 printf("sizeof(struct kvm_run) = %" PRIu64
"\n", sizeof(struct kvm_run
));
1785 fkvm_create_vcpu(struct thread
*td
, struct fkvm_create_vcpu_args
*uap
)
1787 struct guestvm
*guest_vm
;
1793 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1794 if (guest_vm
== NULL
) {
1795 printf("PROC_GET_GUESTVM -> NULL\n");
1800 printf("fkvm_create_vcpu: td = %p\n", td
);
1801 vcpu
= fkvm_vcpu_create(guest_vm
);
1802 fkvm_guestvm_add_vcpu(guest_vm
, vcpu
);
1804 TD_SET_VCPU(td
, vcpu
);
1805 printf("fkvm_create_vcpu: vcpu = %p\n", vcpu
);
1810 fkvm_check_cpu_extension(void)
1816 printf("fkvm_check_cpu_extension\n");
1818 /* Assumption: the architecture supports the cpuid instruction */
1820 /* Check if CPUID extended function 8000_0001h is supported. */
1821 do_cpuid(0x80000000, regs
);
1822 cpu_exthigh
= regs
[0];
1824 printf("cpu_exthigh = %u\n", cpu_exthigh
);
1826 if(cpu_exthigh
>= 0x80000001) {
1827 /* Execute CPUID extended function 8000_0001h */
1828 do_cpuid(0x80000001, regs
);
1829 printf("EAX = %u\n", regs
[0]);
1831 if((regs
[0] & 0x2) == 0) { /* Check SVM bit */
1832 printf("SVM not available\n");
1833 goto fail
; /* SVM not available */
1836 vmcr
= rdmsr(0xc0010114); /* Read VM_CR MSR */
1837 if((vmcr
& 0x8) == 0) { /* Check SVMDIS bit */
1838 printf("vmcr = %" PRIx64
"\n", vmcr
);
1839 printf("SVM allowed\n");
1840 return KERN_SUCCESS
; /* SVM allowed */
1843 /* Execute CPUID extended function 8000_000ah */
1844 do_cpuid(0x8000000a, regs
);
1845 if((regs
[3] & 0x2) == 0) { /* Check SVM_LOCK bit */
1846 /* SVM disabled at bios; not unlockable.
1847 * User must change a BIOS setting to enable SVM.
1849 printf("EDX = %u\n", regs
[3]);
1850 printf("SVM disabled at bios\n");
1854 * SVM may be unlockable;
1855 * consult the BIOS or TPM to obtain the key.
1857 printf("EDX = %u\n", regs
[3]);
1858 printf("SVM maybe unlockable\n");
1863 return KERN_FAILURE
;
1867 fkvm_proc_exit(void *arg
, struct proc
*p
)
1869 struct guestvm
*guest_vm
;
1871 guest_vm
= PROC_GET_GUESTVM(p
);
1872 if (guest_vm
== NULL
)
1875 fkvm_destroy_vm(guest_vm
);
1876 PROC_SET_GUESTVM(p
, NULL
);
1880 fkvm_load(void *unused
)
1885 printf("fkvm_load\n");
1886 printf("sizeof(struct vmcb) = %" PRIx64
"\n", sizeof(struct vmcb
));
1892 /* check if SVM is supported */
1893 error
= fkvm_check_cpu_extension();
1894 if(error
!= KERN_SUCCESS
) {
1895 printf("ERROR: SVM extension not available\n");
1899 exit_tag
= EVENTHANDLER_REGISTER(process_exit
, fkvm_proc_exit
, NULL
,
1900 EVENTHANDLER_PRI_ANY
);
1902 /* allocate structures */
1903 hsave_area
= fkvm_hsave_area_alloc();
1904 iopm
= fkvm_iopm_alloc();
1905 msrpm
= fkvm_msrpm_alloc();
1907 /* Initialize structures */
1908 fkvm_hsave_area_init(hsave_area
);
1909 fkvm_iopm_init(iopm
);
1910 fkvm_msrpm_init(msrpm
);
1912 /* Enable SVM in EFER */
1913 efer
= rdmsr(MSR_EFER
);
1914 printf("EFER = %" PRIx64
"\n", efer
);
1915 wrmsr(MSR_EFER
, efer
| EFER_SVME
);
1916 efer
= rdmsr(MSR_EFER
);
1917 printf("new EFER = %" PRIx64
"\n", efer
);
1919 /* Write Host save address in MSR_VM_HSAVE_PA */
1920 wrmsr(MSR_VM_HSAVE_PA
, vtophys(hsave_area
));
1924 SYSINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_load
, NULL
);
1927 fkvm_unload(void *unused
)
1929 printf("fkvm_unload\n");
1932 printf("fkvm_unload: fkvm not loaded");
1936 EVENTHANDLER_DEREGISTER(process_exit
, exit_tag
);
1938 if (msrpm
!= NULL
) {
1939 fkvm_msrpm_free(iopm
);
1943 fkvm_iopm_free(iopm
);
1946 if (hsave_area
!= NULL
) {
1947 fkvm_hsave_area_free(hsave_area
);
1951 SYSUNINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_unload
, NULL
);