2 * Copyright (c) 2008 The FreeBSD Project
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/kernel.h>
32 #include <sys/malloc.h>
33 #include <sys/sysproto.h>
37 #include <sys/eventhandler.h>
40 #include <vm/vm_extern.h>
41 #include <vm/vm_map.h>
42 #include <vm/vm_object.h>
43 #include <vm/vm_param.h>
44 #include <machine/_inttypes.h>
45 #include <machine/specialreg.h>
46 #include <machine/segments.h>
47 #include <machine/vmcb.h>
50 /* Definitions for Port IO */
58 #define PORT_MASK 0xFFFF0000
59 #define ADDR_MASK (7 << ADDR_SHIFT)
60 #define SIZE_MASK (7 << SIZE_SHIFT)
61 #define REP_MASK (1 << REP_SHIFT)
62 #define STR_MASK (1 << STR_SHIFT)
63 #define TYPE_MASK (1 << TYPE_SHIFT)
64 /* End Definitions for Port IO */
66 #define PMIO_PAGE_OFFSET 1
68 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
69 #define MSRPM_SIZE (8*1024)
73 static int fkvm_loaded
= 0;
75 static void *iopm
= NULL
; /* Should I allocate a vm_object_t instead? */
76 static void *msrpm
= NULL
; /* Should I allocate a vm_object_t instead? */
78 static void *hsave_area
= NULL
;
80 static eventhandler_tag exit_tag
;
108 unsigned long vmcb_pa
;
110 unsigned long regs
[NR_VCPU_REGS
];
111 u_int64_t host_gs_base
;
115 struct guestvm
*guest_vm
;
119 struct vcpu
*vcpus
[MAX_VCPUS
];
123 u_int64_t nested_cr3
;
127 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
128 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
129 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
130 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
131 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
132 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
134 static inline struct vcpu
*
135 TD_GET_VCPU(struct thread
*td
)
140 printf("TD_GET_VCPU -> NULL\n");
145 TD_SET_VCPU(struct thread
*td
, struct vcpu
*vcpu
)
150 static inline struct guestvm
*
151 PROC_GET_GUESTVM(struct proc
*proc
)
153 struct guestvm
*guestvm
;
154 guestvm
= proc
->p_guestvm
;
159 PROC_SET_GUESTVM(struct proc
*proc
, struct guestvm
*guestvm
)
161 proc
->p_guestvm
= guestvm
; \
165 print_vmcb_seg(struct vmcb_seg
* vmcb_seg
, const char* name
)
167 printf("%s Selector\n", name
);
168 printf("Selector : %" PRIx16
"\n", vmcb_seg
->selector
);
169 printf("Attributes : %" PRIx16
"\n", vmcb_seg
->attrib
);
170 printf("Limit : %" PRIx32
"\n", vmcb_seg
->limit
);
171 printf("Base Address : %" PRIx64
"\n", vmcb_seg
->base
);
176 print_vmcb(struct vmcb
*vmcb
)
178 printf("VMCB Control Area\n");
179 printf("Intercept CR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_cr_reads
);
180 printf("Intercept CR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_cr_writes
);
181 printf("Intercept DR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_dr_reads
);
182 printf("Intercept DR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_dr_writes
);
183 printf("Intercept Exceptions : %" PRIx32
"\n", vmcb
->control
.intercept_exceptions
);
184 printf("Intercepts : %" PRIx64
"\n", vmcb
->control
.intercepts
);
185 printf("Reserved 1: \n");
186 for(int i
=0; i
< 44; i
++) {
187 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
190 printf("IOPM Base PA : %" PRIx64
"\n", vmcb
->control
.iopm_base_pa
);
191 printf("MSRPM Base PA : %" PRIx64
"\n", vmcb
->control
.msrpm_base_pa
);
192 printf("TSC Offset : %" PRIx64
"\n", vmcb
->control
.tsc_offset
);
193 printf("Guest ASID : %" PRIx32
"\n", vmcb
->control
.guest_asid
);
194 printf("TLB Control : %" PRIx8
"\n", vmcb
->control
.tlb_control
);
195 printf("Reserved 2 : \n");
196 for(int i
=0; i
< 3; i
++) {
197 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
200 printf("Virtual TPR : %" PRIx8
"\n", vmcb
->control
.v_tpr
);
201 printf("Virtual IRQ : %" PRIx8
"\n", vmcb
->control
.v_irq
);
202 printf("Virtual Interrupt : %" PRIx8
"\n", vmcb
->control
.v_intr
);
203 printf("Virtual Interrupt Masking: %" PRIx8
"\n", vmcb
->control
.v_intr_masking
);
204 printf("Virtual Interrupt Vector : %" PRIx8
"\n", vmcb
->control
.v_intr_vector
);
205 printf("Reserved 6 : \n");
206 for(int i
=0; i
< 3; i
++) {
207 printf("%" PRIx8
"", vmcb
->control
.reserved_6
[i
]); /* Should be Zero */
210 printf("Interrupt Shadow : %" PRIx8
"\n", vmcb
->control
.intr_shadow
);
211 printf("Reserved 7 : \n");
212 for(int i
=0; i
< 7; i
++) {
213 printf("%" PRIx8
"", vmcb
->control
.reserved_7
[i
]); /* Should be Zero */
216 printf("Exit Code : %" PRIx64
"\n", vmcb
->control
.exit_code
);
217 printf("Exit Info 1 : %" PRIx64
"\n", vmcb
->control
.exit_info_1
);
218 printf("Exit Info 2 : %" PRIx64
"\n", vmcb
->control
.exit_info_2
);
219 printf("Exit Interrupt Info : %" PRIx32
"\n", vmcb
->control
.exit_int_info
);
220 printf("Exit Interrupt Info Err Code: %" PRIx32
"\n", vmcb
->control
.exit_int_info_err_code
);
221 printf("Nested Control : %" PRIx64
"\n", vmcb
->control
.nested_ctl
);
222 printf("Reserved 8 : \n");
223 for(int i
=0; i
< 16; i
++) {
224 printf("%" PRIx8
"", vmcb
->control
.reserved_8
[i
]); /* Should be Zero */
227 printf("Event Injection : %" PRIx64
"\n", vmcb
->control
.event_inj
);
228 printf("Nested CR3 : %" PRIx64
"\n", vmcb
->control
.nested_cr3
);
229 printf("LBR Virtualization Enable: %" PRIx64
"\n", vmcb
->control
.lbr_virt_enable
);
230 printf("Reserved 9 : \n");
231 for(int i
=0; i
< 832; i
++) {
232 printf("%" PRIx8
"", vmcb
->control
.reserved_9
[i
]); /* Should be Zero */
238 printf("VMCB Save Area\n");
239 print_vmcb_seg(&(vmcb
->save
.es
), "ES");
240 print_vmcb_seg(&(vmcb
->save
.es
), "CS");
241 print_vmcb_seg(&(vmcb
->save
.es
), "SS");
242 print_vmcb_seg(&(vmcb
->save
.es
), "DS");
243 print_vmcb_seg(&(vmcb
->save
.es
), "FS");
244 print_vmcb_seg(&(vmcb
->save
.es
), "GS");
245 print_vmcb_seg(&(vmcb
->save
.es
), "GDTR");
246 print_vmcb_seg(&(vmcb
->save
.es
), "LDTR");
247 print_vmcb_seg(&(vmcb
->save
.es
), "IDTR");
248 print_vmcb_seg(&(vmcb
->save
.es
), "TR");
249 printf("Reserved 1 : \n");
250 for(int i
=0; i
< 43; i
++) {
251 printf("%" PRIx8
"", vmcb
->save
.reserved_1
[i
]); /* Should be Zero */
254 printf("Current Processor Level : %" PRIx8
"\n", vmcb
->save
.cpl
);
255 printf("Reserved 2 : \n");
256 for(int i
=0; i
< 4; i
++) {
257 printf("%" PRIx8
"", vmcb
->save
.reserved_2
[i
]); /* Should be Zero */
260 printf("EFER : %" PRIx64
"\n", vmcb
->save
.efer
);
261 printf("Reserved 3 : \n");
262 for(int i
=0; i
< 112; i
++) {
263 printf("%" PRIx8
"", vmcb
->save
.reserved_3
[i
]); /* Should be Zero */
266 printf("Control Register 4 : %" PRIx64
"\n", vmcb
->save
.cr4
);
267 printf("Control Register 3 : %" PRIx64
"\n", vmcb
->save
.cr3
);
268 printf("Control Register 0 : %" PRIx64
"\n", vmcb
->save
.cr0
);
269 printf("Debug Register 7 : %" PRIx64
"\n", vmcb
->save
.dr7
);
270 printf("Debug Register 6 : %" PRIx64
"\n", vmcb
->save
.dr6
);
271 printf("RFlags : %" PRIx64
"\n", vmcb
->save
.rflags
);
272 printf("RIP : %" PRIx64
"\n", vmcb
->save
.rip
);
273 printf("Reserved 4 : \n");
274 for(int i
=0; i
< 88; i
++) {
275 printf("%" PRIx8
"", vmcb
->save
.reserved_4
[i
]); /* Should be Zero */
278 printf("RSP : %" PRIx64
"\n", vmcb
->save
.rsp
);
279 printf("Reserved 5 : \n");
280 for(int i
=0; i
< 24; i
++) {
281 printf("%" PRIx8
"", vmcb
->save
.reserved_5
[i
]); /* Should be Zero */
284 printf("RAX : %" PRIx64
"\n", vmcb
->save
.rax
);
285 printf("STAR : %" PRIx64
"\n", vmcb
->save
.star
);
286 printf("LSTAR : %" PRIx64
"\n", vmcb
->save
.lstar
);
287 printf("CSTAR : %" PRIx64
"\n", vmcb
->save
.cstar
);
288 printf("SFMASK : %" PRIx64
"\n", vmcb
->save
.sfmask
);
289 printf("Kernel GS Base : %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
290 printf("SYSENTER CS : %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
291 printf("SYSENTER ESP : %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
292 printf("SYSENTER EIP : %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
293 printf("Control Register 2 : %" PRIx64
"\n", vmcb
->save
.cr2
);
294 printf("Reserved 6 : \n");
295 for(int i
=0; i
< 32; i
++) {
296 printf("%" PRIx8
"", vmcb
->save
.reserved_6
[i
]); /* Should be Zero */
299 printf("Global PAT : %" PRIx64
"\n", vmcb
->save
.g_pat
);
300 printf("Debug Control : %" PRIx64
"\n", vmcb
->save
.dbg_ctl
);
301 printf("BR From : %" PRIx64
"\n", vmcb
->save
.br_from
);
302 printf("BR To : %" PRIx64
"\n", vmcb
->save
.br_to
);
303 printf("Last Exception From : %" PRIx64
"\n", vmcb
->save
.last_excp_from
);
304 printf("Last Exception To : %" PRIx64
"\n", vmcb
->save
.last_excp_to
);
311 print_tss_desc(struct system_segment_descriptor
*tss_desc
)
313 printf("TSS desc @ %p:\n", tss_desc
);
314 printf("sd_lolimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lolimit
);
315 printf("sd_lobase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lobase
);
316 printf("sd_type: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_type
);
317 printf("sd_dpl: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_dpl
);
318 printf("sd_p: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_p
);
319 printf("sd_hilimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hilimit
);
320 printf("sd_xx0: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx0
);
321 printf("sd_gran: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_gran
);
322 printf("sd_hibase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hibase
);
323 printf("sd_xx1: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx1
);
324 printf("sd_mbz: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_mbz
);
325 printf("sd_xx2: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx2
);
330 print_tss(struct system_segment_descriptor
*tss_desc
)
336 base
= (u_int32_t
*) ((((u_int64_t
) tss_desc
->sd_hibase
) << 24) | ((u_int64_t
) tss_desc
->sd_lobase
));
337 limit
= ((tss_desc
->sd_hilimit
<< 16) | tss_desc
->sd_lolimit
) / 4;
339 printf("TSS: @ %p\n", base
);
340 for (i
= 0; i
<= limit
; i
++)
341 printf("%x: 0x%" PRIx32
"\n", i
, base
[i
]);
347 print_vmcb_save_area(struct vmcb
*vmcb
)
349 printf("VMCB save area:\n");
350 printf(" cs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
351 vmcb
->save
.cs
.selector
,
352 vmcb
->save
.cs
.attrib
,
355 printf(" fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
356 vmcb
->save
.fs
.selector
,
357 vmcb
->save
.fs
.attrib
,
360 printf(" gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
361 vmcb
->save
.gs
.selector
,
362 vmcb
->save
.gs
.attrib
,
365 printf(" tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
366 vmcb
->save
.tr
.selector
,
367 vmcb
->save
.tr
.attrib
,
370 printf(" ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
371 vmcb
->save
.ldtr
.selector
,
372 vmcb
->save
.ldtr
.attrib
,
373 vmcb
->save
.ldtr
.limit
,
374 vmcb
->save
.ldtr
.base
);
375 printf(" rip: %" PRIx64
"\n", vmcb
->save
.rip
);
376 printf(" kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
377 printf(" star: %" PRIx64
"\n", vmcb
->save
.star
);
378 printf(" lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
379 printf(" cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
380 printf(" sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
381 printf(" sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
382 printf(" sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
383 printf(" sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
388 vmrun_assert(struct vmcb
*vmcb
)
390 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
396 // The following are illegal:
399 A((vmcb
->save
.efer
& 0x0000000000001000) == 0);
401 // CR0.CD is zero and CR0.NW is set
402 A( ((vmcb
->save
.cr0
& 0x0000000040000000) == 0) &&
403 ((vmcb
->save
.cr0
& 0x0000000020000000) != 0));
405 // CR0[63:32] are not zero.
406 A((vmcb
->save
.cr0
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
408 // Any MBZ bit of CR3 is set.
409 A((vmcb
->save
.cr3
& 0xFFF0000000000000) != 0);
411 // CR4[63:11] are not zero.
412 A((vmcb
->save
.cr4
& 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
414 // DR6[63:32] are not zero.
415 A((vmcb
->save
.dr6
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
417 // DR7[63:32] are not zero.
418 A((vmcb
->save
.dr7
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
420 // EFER[63:15] are not zero.
421 A((vmcb
->save
.efer
& 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
423 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
424 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
426 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
427 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
428 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
429 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0));
431 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
432 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
433 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
434 ((vmcb
->save
.cr0
& 0x0000000000000001) == 0));
436 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
437 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
438 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
439 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
440 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0) &&
441 ((vmcb
->save
.cs
.attrib
& 0x0200) != 0) &&
442 ((vmcb
->save
.cs
.attrib
& 0x0400) != 0));
444 // The VMRUN intercept bit is clear.
445 A((vmcb
->control
.intercepts
& 0x0000000100000000) == 0);
447 // The MSR or IOIO intercept tables extend to a physical address that is
448 // greater than or equal to the maximum supported physical address.
450 // Illegal event injection (see Section 15.19 on page 391).
452 // ASID is equal to zero.
453 A(vmcb
->control
.guest_asid
== 0);
455 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
456 // combination that is otherwise illegal (see Section 15.18).
458 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
459 // sign-extend to 63 bits) all base addresses in the segment registers
460 // that have been loaded.
468 fkvm_vcpu_run(struct vcpu
*vcpu
)
477 u_short ldt_selector
;
479 unsigned long host_cr2
;
480 unsigned long host_dr6
;
481 unsigned long host_dr7
;
483 struct system_segment_descriptor
*tss_desc
;
488 printf("begin fkvm_vcpu_run\n");
492 if (vmrun_assert(vmcb
))
495 tss_desc
= (struct system_segment_descriptor
*) (&gdt
[GPROC0_SEL
]);
496 sel
= GSEL(GPROC0_SEL
, SEL_KPL
);
498 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
499 // print_tss_desc(tss_desc);
500 // print_tss(tss_desc);
502 print_vmcb_save_area(vmcb
);
503 printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu
->regs
[VCPU_REGS_RIP
]);
506 vmcb
->save
.rax
= vcpu
->regs
[VCPU_REGS_RAX
];
507 vmcb
->save
.rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
508 vmcb
->save
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
510 /* meh: kvm has pre_svm_run(svm); */
512 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
513 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
517 ldt_selector
= rldt();
518 // printf("fs selector: %hx\n", fs_selector);
519 // printf("gs selector: %hx\n", gs_selector);
520 // printf("ldt selector: %hx\n", ldt_selector);
527 vmcb
->save
.cr2
= vcpu
->cr2
;
529 // TODO: something with apic_base?
531 /* meh: dr7? db_regs? */
533 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
534 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
535 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
536 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
538 star
= rdmsr(MSR_STAR
);
539 lstar
= rdmsr(MSR_LSTAR
);
540 cstar
= rdmsr(MSR_CSTAR
);
541 sfmask
= rdmsr(MSR_SF_MASK
);
545 __asm
__volatile (SVM_CLGI
);
552 "mov %c[rbx](%[svm]), %%rbx \n\t"
553 "mov %c[rcx](%[svm]), %%rcx \n\t"
554 "mov %c[rdx](%[svm]), %%rdx \n\t"
555 "mov %c[rsi](%[svm]), %%rsi \n\t"
556 "mov %c[rdi](%[svm]), %%rdi \n\t"
557 "mov %c[rbp](%[svm]), %%rbp \n\t"
558 "mov %c[r8](%[svm]), %%r8 \n\t"
559 "mov %c[r9](%[svm]), %%r9 \n\t"
560 "mov %c[r10](%[svm]), %%r10 \n\t"
561 "mov %c[r11](%[svm]), %%r11 \n\t"
562 "mov %c[r12](%[svm]), %%r12 \n\t"
563 "mov %c[r13](%[svm]), %%r13 \n\t"
564 "mov %c[r14](%[svm]), %%r14 \n\t"
565 "mov %c[r15](%[svm]), %%r15 \n\t"
567 /* Enter guest mode */
569 "mov %c[vmcb](%[svm]), %%rax \n\t"
575 /* Save guest registers, load host registers */
576 "mov %%rbx, %c[rbx](%[svm]) \n\t"
577 "mov %%rcx, %c[rcx](%[svm]) \n\t"
578 "mov %%rdx, %c[rdx](%[svm]) \n\t"
579 "mov %%rsi, %c[rsi](%[svm]) \n\t"
580 "mov %%rdi, %c[rdi](%[svm]) \n\t"
581 "mov %%rbp, %c[rbp](%[svm]) \n\t"
582 "mov %%r8, %c[r8](%[svm]) \n\t"
583 "mov %%r9, %c[r9](%[svm]) \n\t"
584 "mov %%r10, %c[r10](%[svm]) \n\t"
585 "mov %%r11, %c[r11](%[svm]) \n\t"
586 "mov %%r12, %c[r12](%[svm]) \n\t"
587 "mov %%r13, %c[r13](%[svm]) \n\t"
588 "mov %%r14, %c[r14](%[svm]) \n\t"
589 "mov %%r15, %c[r15](%[svm]) \n\t"
593 [vmcb
]"i"(offsetof(struct vcpu
, vmcb_pa
)),
594 [rbx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBX
])),
595 [rcx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RCX
])),
596 [rdx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDX
])),
597 [rsi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RSI
])),
598 [rdi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDI
])),
599 [rbp
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBP
])),
600 [r8
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R8
])),
601 [r9
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R9
])),
602 [r10
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R10
])),
603 [r11
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R11
])),
604 [r12
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R12
])),
605 [r13
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R13
])),
606 [r14
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R14
])),
607 [r15
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R15
]))
609 "rbx", "rcx", "rdx", "rsi", "rdi",
610 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
614 /* meh: dr7? db_regs? */
616 vcpu
->cr2
= vmcb
->save
.cr2
;
618 vcpu
->regs
[VCPU_REGS_RAX
] = vmcb
->save
.rax
;
619 vcpu
->regs
[VCPU_REGS_RSP
] = vmcb
->save
.rsp
;
620 vcpu
->regs
[VCPU_REGS_RIP
] = vmcb
->save
.rip
;
627 load_fs(fs_selector
);
628 load_gs(gs_selector
);
631 wrmsr(MSR_GSBASE
, vcpu
->host_gs_base
);
633 tss_desc
->sd_type
= SDT_SYSTSS
;
636 wrmsr(MSR_STAR
, star
);
637 wrmsr(MSR_LSTAR
, lstar
);
638 wrmsr(MSR_CSTAR
, cstar
);
639 wrmsr(MSR_SF_MASK
, sfmask
);
643 __asm
__volatile (SVM_STGI
);
647 printf("exit_code: %" PRIx64
"\n", vmcb
->control
.exit_code
);
649 // print_tss_desc(tss_desc);
650 // print_tss(tss_desc);
652 print_vmcb_save_area(vmcb
);
660 _fkvm_init_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
663 seg
->attrib
= VMCB_SELECTOR_P_MASK
| attrib
;
669 fkvm_init_seg(struct vmcb_seg
*seg
)
671 _fkvm_init_seg(seg
, VMCB_SELECTOR_S_MASK
| VMCB_SELECTOR_WRITE_MASK
);
675 fkvm_init_sys_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
677 _fkvm_init_seg(seg
, attrib
);
681 fkvm_iopm_alloc(void)
683 return contigmalloc(IOPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
687 fkvm_iopm_init(void *iopm
)
689 memset(iopm
, 0xff, IOPM_SIZE
); /* TODO: we may want to allow access to PC debug port */
693 fkvm_iopm_free(void *iopm
)
695 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
699 fkvm_msrpm_alloc(void)
701 return contigmalloc(MSRPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
705 fkvm_msrpm_init(void *msrpm
)
707 memset(msrpm
, 0xff, MSRPM_SIZE
); /* TODO: we may want to allow some MSR accesses */
711 fkvm_msrpm_free(void *msrpm
)
713 contigfree(msrpm
, MSRPM_SIZE
, M_DEVBUF
);
717 fkvm_hsave_area_alloc(void)
719 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
723 fkvm_hsave_area_init(void *hsave_area
)
728 fkvm_hsave_area_free(void *hsave_area
)
730 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
733 static struct vmspace
*
734 fkvm_make_vmspace(void)
738 sp
= vmspace_alloc(0, 0xffffffffffffffff);
740 printf("vmspace_alloc failed\n");
748 fkvm_destroy_vmspace(struct vmspace
* sp
)
754 fkvm_vmcb_alloc(void)
756 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL,
761 fkvm_vmcb_init(struct vmcb
*vmcb
)
763 struct vmcb_control_area
*control
= &vmcb
->control
;
764 struct vmcb_save_area
*save
= &vmcb
->save
;
766 control
->intercept_cr_reads
= INTERCEPT_CR4_MASK
;
768 control
->intercept_cr_writes
= INTERCEPT_CR4_MASK
|
771 control
->intercept_dr_reads
= INTERCEPT_DR0_MASK
|
776 control
->intercept_dr_writes
= INTERCEPT_DR0_MASK
|
783 control
->intercept_exceptions
= (1 << IDT_UD
) | // Invalid Opcode
784 (1 << IDT_MC
); // Machine Check
786 control
->intercepts
= INTERCEPT_INTR
|
793 INTERCEPT_IOIO_PROT
|
805 INTERCEPT_MWAIT_UNCOND
;
807 control
->iopm_base_pa
= vtophys(iopm
);
808 control
->msrpm_base_pa
= vtophys(msrpm
);
809 control
->tsc_offset
= 0;
811 /* TODO: remove this once we assign asid's to distinct VM's */
812 control
->guest_asid
= 1;
813 control
->tlb_control
= VMCB_TLB_CONTROL_FLUSH_ALL
;
815 /* let v_tpr default to 0 */
816 /* let v_irq default to 0 */
817 /* let v_intr default to 0 */
819 control
->v_intr_masking
= 1;
821 /* let v_intr_vector default to 0 */
822 /* let intr_shadow default to 0 */
823 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
824 exit_int_info_err_code default to 0 */
826 control
->nested_ctl
= 1;
828 /* let event_inj default to 0 */
830 // (nested_cr3 is later)
832 /* let lbr_virt_enable default to 0 */
835 fkvm_init_seg(&save
->ds
);
836 fkvm_init_seg(&save
->es
);
837 fkvm_init_seg(&save
->fs
);
838 fkvm_init_seg(&save
->gs
);
839 fkvm_init_seg(&save
->ss
);
841 _fkvm_init_seg(&save
->cs
, VMCB_SELECTOR_READ_MASK
| VMCB_SELECTOR_S_MASK
|
842 VMCB_SELECTOR_CODE_MASK
);
843 save
->cs
.selector
= 0xf000;
844 save
->cs
.base
= 0xffff0000;
846 save
->gdtr
.limit
= 0xffff;
847 save
->idtr
.limit
= 0xffff;
849 fkvm_init_sys_seg(&save
->ldtr
, SDT_SYSLDT
);
850 fkvm_init_sys_seg(&save
->tr
, SDT_SYS286BSY
);
852 save
->g_pat
= PAT_VALUE(PAT_WRITE_BACK
, 0) | PAT_VALUE(PAT_WRITE_THROUGH
, 1) |
853 PAT_VALUE(PAT_UNCACHED
, 2) | PAT_VALUE(PAT_UNCACHEABLE
, 3) |
854 PAT_VALUE(PAT_WRITE_BACK
, 4) | PAT_VALUE(PAT_WRITE_THROUGH
, 5) |
855 PAT_VALUE(PAT_UNCACHED
, 6) | PAT_VALUE(PAT_UNCACHEABLE
, 7);
857 /* CR0 = 6000_0010h at boot */
858 save
->cr0
= CR0_ET
| CR0_NW
| CR0_CD
;
859 save
->dr6
= 0xffff0ff0;
862 save
->rip
= 0x0000fff0;
864 save
->efer
= EFER_SVME
;
868 fkvm_vmcb_free(struct vmcb
*vmcb
)
870 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
874 fkvm_vcpu_create(struct guestvm
*guest_vm
)
877 vcpu
= malloc(sizeof(struct vcpu
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
879 vcpu
->vmcb
= fkvm_vmcb_alloc();
880 vcpu
->vmcb_pa
= vtophys(vcpu
->vmcb
);
881 printf("vmcb = 0x%p\n", vcpu
->vmcb
);
882 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu
->vmcb_pa
);
884 fkvm_vmcb_init(vcpu
->vmcb
);
885 vcpu
->vmcb
->control
.nested_cr3
= guest_vm
->nested_cr3
;
886 vcpu
->regs
[VCPU_REGS_RIP
] = vcpu
->vmcb
->save
.rip
;
888 vcpu
->guest_vm
= guest_vm
;
894 fkvm_vcpu_destroy(struct vcpu
*vcpu
)
896 fkvm_vmcb_free(vcpu
->vmcb
);
897 free(vcpu
, M_DEVBUF
);
900 static struct guestvm
*
901 fkvm_guestvm_alloc(void)
903 return malloc(sizeof(struct guestvm
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
907 fkvm_guestvm_free(struct guestvm
* guest_vm
)
909 free(guest_vm
, M_DEVBUF
);
913 fkvm_guestvm_add_vcpu(struct guestvm
*guest_vm
, struct vcpu
*vcpu
)
915 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = vcpu
;
916 guest_vm
->nr_vcpus
++; /* TODO: Probably not safe to increment */
917 /* How about a lock to protect all of this? */
922 fkvm_userpoke(struct thread
*td
, struct fkvm_userpoke_args
*uap
)
924 printf("fkvm_userpoke\n");
933 fkvm_mem_has_entry(vm_map_entry_t expected_entry
, vm_map_t vm_map
, vm_offset_t vaddr
)
935 vm_map_entry_t lookup_entry
;
936 vm_object_t throwaway_object
;
937 vm_pindex_t throwaway_pindex
;
938 vm_prot_t throwaway_prot
;
939 boolean_t throwaway_wired
;
942 error
= vm_map_lookup(&vm_map
, /* IN/OUT */
944 VM_PROT_READ
|VM_PROT_WRITE
,
945 &lookup_entry
, /* OUT */
946 &throwaway_object
, /* OUT */
947 &throwaway_pindex
, /* OUT */
948 &throwaway_prot
, /* OUT */
949 &throwaway_wired
); /* OUT */
950 if (error
!= KERN_SUCCESS
)
952 vm_map_lookup_done(vm_map
, lookup_entry
);
953 return (lookup_entry
== expected_entry
);
957 fkvm_guest_check_range(struct guestvm
*guest_vm
, uint64_t start
, uint64_t end
)
959 vm_map_t guest_vm_map
;
960 vm_map_entry_t lookup_entry
;
961 vm_object_t throwaway_object
;
962 vm_pindex_t throwaway_pindex
;
963 vm_prot_t throwaway_prot
;
964 boolean_t throwaway_wired
;
968 guest_vm_map
= &guest_vm
->sp
->vm_map
;
970 error
= vm_map_lookup(&guest_vm_map
, /* IN/OUT */
972 VM_PROT_READ
|VM_PROT_WRITE
,
973 &lookup_entry
, /* OUT */
974 &throwaway_object
, /* OUT */
975 &throwaway_pindex
, /* OUT */
976 &throwaway_prot
, /* OUT */
977 &throwaway_wired
); /* OUT */
978 if (error
!= KERN_SUCCESS
)
980 vm_map_lookup_done(guest_vm_map
, lookup_entry
);
983 TODO: We can't actually nest the lookups:
984 panic: _sx_xlock_hard: recursed on non-recursive sx user map @ ../../../vm/vm_map.c:3115
985 Therefore, I've moved the lookup_done above for now, but we really need a lock here.
987 Maybe it's better to use vm_map_lookup_entry directly.
991 if (fkvm_mem_has_entry(lookup_entry
, guest_vm_map
, end
))
1000 fkvm_get_regs_regs(struct vcpu
*vcpu
, struct kvm_regs
*out
)
1002 out
->rax
= vcpu
->regs
[VCPU_REGS_RAX
];
1003 out
->rbx
= vcpu
->regs
[VCPU_REGS_RBX
];
1004 out
->rcx
= vcpu
->regs
[VCPU_REGS_RCX
];
1005 out
->rdx
= vcpu
->regs
[VCPU_REGS_RDX
];
1006 out
->rsi
= vcpu
->regs
[VCPU_REGS_RSI
];
1007 out
->rdi
= vcpu
->regs
[VCPU_REGS_RDI
];
1008 out
->rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
1009 out
->rbp
= vcpu
->regs
[VCPU_REGS_RBP
];
1010 out
->r8
= vcpu
->regs
[VCPU_REGS_R8
];
1011 out
->r9
= vcpu
->regs
[VCPU_REGS_R9
];
1012 out
->r10
= vcpu
->regs
[VCPU_REGS_R10
];
1013 out
->r11
= vcpu
->regs
[VCPU_REGS_R11
];
1014 out
->r12
= vcpu
->regs
[VCPU_REGS_R12
];
1015 out
->r13
= vcpu
->regs
[VCPU_REGS_R13
];
1016 out
->r14
= vcpu
->regs
[VCPU_REGS_R14
];
1017 out
->r15
= vcpu
->regs
[VCPU_REGS_R15
];
1018 out
->rip
= vcpu
->regs
[VCPU_REGS_RIP
];
1019 out
->rflags
= vcpu
->vmcb
->save
.rflags
;
1023 fkvm_set_regs_regs(struct vcpu
*vcpu
, const struct kvm_regs
*in
)
1025 vcpu
->regs
[VCPU_REGS_RAX
] = in
->rax
;
1026 vcpu
->regs
[VCPU_REGS_RBX
] = in
->rbx
;
1027 vcpu
->regs
[VCPU_REGS_RCX
] = in
->rcx
;
1028 vcpu
->regs
[VCPU_REGS_RDX
] = in
->rdx
;
1029 vcpu
->regs
[VCPU_REGS_RSI
] = in
->rsi
;
1030 vcpu
->regs
[VCPU_REGS_RDI
] = in
->rdi
;
1031 vcpu
->regs
[VCPU_REGS_RSP
] = in
->rsp
;
1032 vcpu
->regs
[VCPU_REGS_RBP
] = in
->rbp
;
1033 vcpu
->regs
[VCPU_REGS_R8
] = in
->r8
;
1034 vcpu
->regs
[VCPU_REGS_R9
] = in
->r9
;
1035 vcpu
->regs
[VCPU_REGS_R10
] = in
->r10
;
1036 vcpu
->regs
[VCPU_REGS_R11
] = in
->r11
;
1037 vcpu
->regs
[VCPU_REGS_R12
] = in
->r12
;
1038 vcpu
->regs
[VCPU_REGS_R13
] = in
->r13
;
1039 vcpu
->regs
[VCPU_REGS_R14
] = in
->r14
;
1040 vcpu
->regs
[VCPU_REGS_R15
] = in
->r15
;
1041 vcpu
->regs
[VCPU_REGS_RIP
] = in
->rip
;
1042 vcpu
->vmcb
->save
.rflags
= in
->rflags
;
1046 fkvm_set_vmcb_dtable(struct vmcb_seg
*vmcb_seg
, struct kvm_dtable
*fkvm_dtable
)
1048 vmcb_seg
->base
= fkvm_dtable
->base
;
1049 vmcb_seg
->limit
= fkvm_dtable
->limit
;
1053 fkvm_set_vmcb_seg(struct vmcb_seg
*vmcb_seg
, struct kvm_segment
*fkvm_seg
)
1055 vmcb_seg
->base
= fkvm_seg
->base
;
1056 vmcb_seg
->limit
= fkvm_seg
->limit
;
1057 vmcb_seg
->selector
= fkvm_seg
->selector
;
1058 if (fkvm_seg
->unusable
)
1061 vmcb_seg
->attrib
= (fkvm_seg
->type
& VMCB_SELECTOR_TYPE_MASK
);
1062 vmcb_seg
->attrib
|= (fkvm_seg
->s
& 1) << VMCB_SELECTOR_S_SHIFT
;
1063 vmcb_seg
->attrib
|= (fkvm_seg
->dpl
& 3) << VMCB_SELECTOR_DPL_SHIFT
;
1064 vmcb_seg
->attrib
|= (fkvm_seg
->present
& 1) << VMCB_SELECTOR_P_SHIFT
;
1065 vmcb_seg
->attrib
|= (fkvm_seg
->avl
& 1) << VMCB_SELECTOR_AVL_SHIFT
;
1066 vmcb_seg
->attrib
|= (fkvm_seg
->l
& 1) << VMCB_SELECTOR_L_SHIFT
;
1067 vmcb_seg
->attrib
|= (fkvm_seg
->db
& 1) << VMCB_SELECTOR_DB_SHIFT
;
1068 vmcb_seg
->attrib
|= (fkvm_seg
->g
& 1) << VMCB_SELECTOR_G_SHIFT
;
1073 fkvm_set_cr8(struct vcpu
*vcpu
, uint64_t cr8
)
1075 // TODO: if cr8 has reserved bits inject GP Fault, return
1077 vcpu
->vmcb
->control
.v_tpr
= (uint8_t) cr8
;
1081 fkvm_set_efer(struct vcpu
*vcpu
, uint64_t efer
)
1083 struct vmcb
*vmcb
= vcpu
->vmcb
;
1084 //if efer has reserved bits set: inject GP Fault
1086 if (vmcb
->save
.cr0
& CR0_PG
) { //If paging is enabled do not allow changes to LME
1087 if ((vmcb
->save
.efer
& EFER_LME
) != (efer
& EFER_LME
)) {
1088 printf("fkvm_set_efer: attempt to change LME while paging\n");
1093 vmcb
->save
.efer
= efer
| EFER_SVME
;
1097 fkvm_get_regs_sregs(struct vcpu
*vcpu
, struct kvm_sregs
*out
)
1103 fkvm_set_regs_sregs(struct vcpu
*vcpu
, struct kvm_sregs
*in
)
1105 struct vmcb
*vmcb
= vcpu
->vmcb
;
1107 fkvm_set_vmcb_seg(&vmcb
->save
.cs
, &in
->cs
);
1108 fkvm_set_vmcb_seg(&vmcb
->save
.ds
, &in
->ds
);
1109 fkvm_set_vmcb_seg(&vmcb
->save
.es
, &in
->es
);
1110 fkvm_set_vmcb_seg(&vmcb
->save
.fs
, &in
->fs
);
1111 fkvm_set_vmcb_seg(&vmcb
->save
.gs
, &in
->gs
);
1112 fkvm_set_vmcb_seg(&vmcb
->save
.ss
, &in
->ss
);
1113 fkvm_set_vmcb_seg(&vmcb
->save
.tr
, &in
->tr
);
1114 fkvm_set_vmcb_seg(&vmcb
->save
.ldtr
, &in
->ldt
);
1116 vmcb
->save
.cpl
= (vmcb
->save
.cs
.attrib
>> VMCB_SELECTOR_DPL_SHIFT
) & 3;
1118 fkvm_set_vmcb_dtable(&vmcb
->save
.idtr
, &in
->idt
);
1119 fkvm_set_vmcb_dtable(&vmcb
->save
.gdtr
, &in
->gdt
);
1121 vcpu
->cr2
= in
->cr2
;
1122 vcpu
->cr3
= in
->cr3
;
1124 fkvm_set_cr8(vcpu
, in
->cr8
);
1125 fkvm_set_efer(vcpu
, in
->efer
);
1126 /* TODO: apic_base */
1127 vmcb
->save
.cr0
= in
->cr0
;
1128 vmcb
->save
.cr4
= in
->cr4
;
1129 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1133 fkvm_get_regs_msrs(struct vcpu
*vcpu
, uint32_t nmsrs
, struct kvm_msr_entry
*entries
) {
1138 fkvm_set_reg_msr(struct vcpu
*vcpu
, uint32_t index
, uint64_t data
) {
1139 struct vmcb
*vmcb
= vcpu
->vmcb
;
1147 vmcb
->control
.tsc_offset
= data
- tsc
;
1152 vmcb
->save
.star
= data
;
1157 vmcb
->save
.lstar
= data
;
1162 vmcb
->save
.cstar
= data
;
1167 vmcb
->save
.kernel_gs_base
= data
;
1172 vmcb
->save
.sfmask
= data
;
1176 case MSR_SYSENTER_CS_MSR
: {
1177 vmcb
->save
.sysenter_cs
= data
;
1181 case MSR_SYSENTER_EIP_MSR
: {
1182 vmcb
->save
.sysenter_eip
= data
;
1186 case MSR_SYSENTER_ESP_MSR
: {
1187 vmcb
->save
.sysenter_esp
= data
;
1191 case MSR_DEBUGCTLMSR
: {
1192 printf("unimplemented at %d\n", __LINE__
);
1196 case MSR_PERFEVSEL0
... MSR_PERFEVSEL3
:
1197 case MSR_PERFCTR0
... MSR_PERFCTR3
: {
1198 printf("unimplemented at %d\n", __LINE__
);
1203 fkvm_set_efer(vcpu
, data
);
1207 case MSR_MC0_STATUS
: {
1208 printf("unimplemented at %d\n", __LINE__
);
1212 case MSR_MCG_STATUS
: {
1213 printf("unimplemented at %d\n", __LINE__
);
1218 printf("unimplemented at %d\n", __LINE__
);
1222 //TODO: MSR_IA32_UCODE_REV
1223 //TODO: MSR_IA32_UCODE_WRITE
1224 //TODO: 0x200 ... 0x2ff: set_msr_mtrr
1226 case MSR_APICBASE
: {
1227 printf("unimplemented at %d\n", __LINE__
);
1231 case MSR_IA32_MISC_ENABLE
: {
1232 printf("unimplemented at %d\n", __LINE__
);
1236 //TODO: MSR_KVM_WALL_CLOCK
1237 //TODO: MSR_KVM_SYSTEM_TIME
1240 printf("Did not set unimplemented msr: 0x%" PRIx32
"\n", index
);
1245 fkvm_set_regs_msrs(struct vcpu
*vcpu
, uint32_t nmsrs
, struct kvm_msr_entry
*entries
) {
1248 for (i
= 0; i
< nmsrs
; i
++) {
1249 fkvm_set_reg_msr(vcpu
, entries
[i
].index
, entries
[i
].data
);
1256 fkvm_get_regs(struct thread
*td
, struct fkvm_get_regs_args
*uap
)
1264 vcpu
= TD_GET_VCPU(td
);
1268 switch (uap
->type
) {
1270 case FKVM_REGS_TYPE_REGS
: {
1271 struct kvm_regs out
;
1272 fkvm_get_regs_regs(vcpu
, &out
);
1273 return copyout(&out
, uap
->regs
, sizeof(out
));
1276 case FKVM_REGS_TYPE_SREGS
: {
1277 struct kvm_sregs out
;
1278 fkvm_get_regs_sregs(vcpu
, &out
);
1279 return copyout(&out
, uap
->regs
, sizeof(out
));
1282 case FKVM_REGS_TYPE_MSRS
: {
1283 struct kvm_msrs out
;
1284 struct kvm_msrs
*user_msrs
;
1285 struct kvm_msr_entry
*entries
;
1288 user_msrs
= (struct kvm_msrs
*)uap
->regs
;
1290 error
= copyin(uap
->regs
, &out
, sizeof(out
));
1294 size
= sizeof(*entries
) * out
.nmsrs
;
1295 entries
= malloc(size
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
1296 if (entries
== NULL
)
1299 error
= copyin(user_msrs
->entries
, entries
, size
);
1301 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1302 free(entries
, M_DEVBUF
);
1306 fkvm_get_regs_msrs(vcpu
, out
.nmsrs
, entries
);
1308 error
= copyout(user_msrs
->entries
, entries
, size
);
1310 printf("FKVM_REGS_TYPE_MSRS: unable to copyout entries\n");
1313 free(entries
, M_DEVBUF
);
1323 fkvm_set_regs(struct thread
*td
, struct fkvm_set_regs_args
*uap
)
1328 vcpu
= TD_GET_VCPU(td
);
1332 switch (uap
->type
) {
1334 case FKVM_REGS_TYPE_REGS
: {
1336 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1339 fkvm_set_regs_regs(vcpu
, &in
);
1343 case FKVM_REGS_TYPE_SREGS
: {
1344 struct kvm_sregs in
;
1345 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1348 fkvm_set_regs_sregs(vcpu
, &in
);
1352 case FKVM_REGS_TYPE_MSRS
: {
1354 struct kvm_msrs
*user_msrs
;
1355 struct kvm_msr_entry
*entries
;
1358 user_msrs
= (struct kvm_msrs
*)uap
->regs
;
1360 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1364 size
= sizeof(*entries
) * in
.nmsrs
;
1365 entries
= malloc(size
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
1366 if (entries
== NULL
)
1369 error
= copyin(user_msrs
->entries
, entries
, size
);
1371 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1372 free(entries
, M_DEVBUF
);
1376 fkvm_set_regs_msrs(vcpu
, in
.nmsrs
, entries
);
1378 free(entries
, M_DEVBUF
);
1387 /* This function can only be called with multiples of page sizes */
1388 /* vaddr as NULL overloads to fkvm_guest_check_range */
1390 fkvm_set_user_mem_region(struct thread
*td
, struct fkvm_set_user_mem_region_args
*uap
)
1392 struct guestvm
*guest_vm
;
1397 struct vmspace
*user_vm_space
;
1398 vm_map_t user_vm_map
;
1400 vm_object_t vm_object
;
1401 vm_pindex_t vm_object_pindex
;
1402 vm_ooffset_t vm_object_offset
;
1403 vm_prot_t throwaway_prot
;
1404 boolean_t throwaway_wired
;
1405 vm_map_entry_t lookup_entry
;
1409 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1410 if (guest_vm
== NULL
) {
1411 printf("PROC_GET_GUESTVM -> NULL\n");
1415 start
= uap
->guest_pa
;
1416 end
= uap
->guest_pa
+ uap
->size
- 1;
1417 printf("start: 0x%" PRIx64
" bytes\n", start
);
1418 printf("end: 0x%" PRIx64
" bytes\n", end
);
1420 if (uap
->vaddr
== 0)
1421 return fkvm_guest_check_range(guest_vm
, start
, end
);
1423 user_vm_space
= td
->td_proc
->p_vmspace
;
1424 user_vm_map
= &user_vm_space
->vm_map
;
1425 printf("user vm space: %p\n", user_vm_space
);
1426 printf("user vm map: %p\n", user_vm_map
);
1428 error
= vm_map_lookup(&user_vm_map
, /* IN/OUT */
1430 VM_PROT_READ
|VM_PROT_WRITE
,
1431 &lookup_entry
, /* OUT */
1432 &vm_object
, /* OUT */
1433 &vm_object_pindex
, /* OUT */
1434 &throwaway_prot
, /* OUT */
1435 &throwaway_wired
); /* OUT */
1436 if (error
!= KERN_SUCCESS
) {
1437 printf("vm_map_lookup failed: %d\n", error
);
1441 /* TODO: Trust the user that the full region is valid.
1442 * This is very bad. See the note in fkvm_guest_check_range
1443 * on nesting vm lookups. */
1445 if (!fkvm_mem_has_entry(lookup_entry
, user_vm_map
, uap
->vaddr
+ uap
->size
)) {
1446 printf("end of range not contained in same vm map entry as start\n");
1451 printf("vm object: %p\n", vm_object
);
1452 printf(" size: %d pages\n", (int) vm_object
->size
);
1454 vm_object_offset
= IDX_TO_OFF(vm_object_pindex
);
1455 printf("vm_ooffset: 0x%" PRIx64
"\n", vm_object_offset
);
1457 vm_object_reference(vm_object
); // TODO: this might be a mem leak
1459 vm_map_lookup_done(user_vm_map
, lookup_entry
);
1461 error
= vm_map_insert(&guest_vm
->sp
->vm_map
,
1466 VM_PROT_ALL
, VM_PROT_ALL
,
1468 if (error
!= KERN_SUCCESS
) {
1469 printf("vm_map_insert failed: %d\n", error
);
1471 case KERN_INVALID_ADDRESS
:
1484 fkvm_unset_user_mem_region(struct thread
*td
, struct fkvm_unset_user_mem_region_args
*uap
)
1486 struct guestvm
*guest_vm
;
1491 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1492 if (guest_vm
== NULL
) {
1493 printf("PROC_GET_GUESTVM -> NULL\n");
1500 vm_map_t guest_vm_map
;
1504 start
= uap
->guest_pa
;
1505 end
= uap
->guest_pa
+ uap
->size
- 1;
1506 printf("start: 0x%" PRIx64
" bytes\n", start
);
1507 printf("end: 0x%" PRIx64
" bytes\n", end
);
1509 guest_vm_map
= &guest_vm
->sp
->vm_map
;
1511 error
= vm_map_remove(guest_vm_map
, start
, end
);
1512 if (error
!= KERN_SUCCESS
)
1519 fkvm_create_vm(struct thread
*td
, struct fkvm_create_vm_args
*uap
)
1521 struct guestvm
*guest_vm
;
1523 printf("SYSCALL : fkvm_create_vm\n");
1528 /* Allocate Guest VM */
1529 guest_vm
= fkvm_guestvm_alloc();
1531 /* Set up the vm address space */
1532 guest_vm
->sp
= fkvm_make_vmspace();
1533 if (guest_vm
->sp
== NULL
) {
1534 fkvm_guestvm_free(guest_vm
);
1537 guest_vm
->nested_cr3
= vtophys(vmspace_pmap(guest_vm
->sp
)->pm_pml4
);
1540 printf(" vm space: %p\n", guest_vm
->sp
);
1541 printf(" vm map: %p\n", &guest_vm
->sp
->vm_map
);
1542 printf(" ncr3: 0x%" PRIx64
"\n", guest_vm
->nested_cr3
);
1544 PROC_SET_GUESTVM(td
->td_proc
, guest_vm
);
1546 printf("fkvm_create_vm done\n");
1551 fkvm_destroy_vm(struct guestvm
*guest_vm
)
1553 /* Destroy the VCPUs */
1554 while (guest_vm
->nr_vcpus
> 0) {
1555 guest_vm
->nr_vcpus
--;
1556 fkvm_vcpu_destroy(guest_vm
->vcpus
[guest_vm
->nr_vcpus
]);
1557 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = NULL
;
1560 /* Destroy the vmspace */
1561 if (guest_vm
->sp
!= NULL
)
1562 fkvm_destroy_vmspace(guest_vm
->sp
);
1564 /* Destroy the Guest VM itself */
1565 fkvm_guestvm_free(guest_vm
);
1569 intercept_ioio(struct vcpu
*vcpu
, struct kvm_run
*kvm_run
, uint64_t ioio_info
, uint64_t rip
)
1571 struct vmcb
*vmcb
= vcpu
->vmcb
;
1575 str
= (ioio_info
& STR_MASK
) >> STR_SHIFT
;
1577 printf("%s operation requested, not yet implemented, \n",
1578 kvm_run
->u
.io
.in
? "INS" : "OUTS");
1582 kvm_run
->u
.io
.port
= ioio_info
>> PORT_SHIFT
;
1583 kvm_run
->u
.io
.in
= ioio_info
& TYPE_MASK
;
1585 kvm_run
->u
.io
.size
= (ioio_info
& SIZE_MASK
) >> SIZE_SHIFT
;
1587 kvm_run
->u
.io
.data_offset
= PAGE_SIZE
;
1588 kvm_run
->u
.io
.pio_data
= vcpu
->regs
[VCPU_REGS_RAX
];
1590 /* We need to remove the Interrupt Shadow Flag from the VMCB (see 15.20.5 in AMD_Vol2) */
1591 vmcb
->control
.intr_shadow
= 0;
1593 //rep = (ioio_info & REP_MASK) >> REP_SHIFT;
1594 /* TODO: Research more into Direction Flag checked in KVM; DF bit in RFLAGS */
1596 /* set the next rip in the VMCB save area for now */
1597 /* TODO: Store rIP in vm_run structure until we absolutely need it */
1598 vcpu
->regs
[VCPU_REGS_RIP
] = rip
;
1604 intercept_shutdown(struct vcpu
*vcpu
)
1606 struct vmcb
*vmcb
= vcpu
->vmcb
;
1607 memset(vmcb
, 0, PAGE_SIZE
);
1608 fkvm_vmcb_init(vmcb
);
1612 fkvm_vm_run(struct thread
*td
, struct fkvm_vm_run_args
*uap
)
1615 struct guestvm
*guest_vm
;
1619 struct kvm_run kvm_run
;
1624 vcpu
= TD_GET_VCPU(td
);
1628 guest_vm
= vcpu
->guest_vm
;
1631 error
= copyin(uap
->run
, &kvm_run
, sizeof(struct kvm_run
));
1635 fkvm_set_cr8(vcpu
, kvm_run
.cr8
);
1637 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
1639 while(kvm_run
.exit_reason
== KVM_EXIT_CONTINUE
) {
1640 fkvm_vcpu_run(vcpu
);
1642 switch (vmcb
->control
.exit_code
) {
1644 case VMCB_EXIT_EXCP_BASE
... (VMCB_EXIT_EXCP_BASE
+ 31): {
1647 excp_vector
= vmcb
->control
.exit_code
- VMCB_EXIT_EXCP_BASE
;
1649 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
1655 case VMCB_EXIT_INTR
: {
1656 printf("VMCB_EXIT_INTR - nothing to do\n");
1657 /* Handled by host OS already */
1658 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
1662 case VMCB_EXIT_NPF
: {
1663 /* EXITINFO1 contains fault error code */
1664 /* EXITINFO2 contains the guest physical address causing the fault. */
1666 u_int64_t fault_code
;
1667 u_int64_t fault_gpa
;
1669 vm_prot_t fault_type
;
1673 fault_code
= vmcb
->control
.exit_info_1
;
1674 fault_gpa
= vmcb
->control
.exit_info_2
;
1675 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
1677 printf("VMCB_EXIT_NPF:\n");
1678 printf("gpa=0x%" PRIx64
"\n", fault_gpa
);
1679 printf("fault code=0x%" PRIx64
" [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
1681 (fault_code
& PGEX_P
) != 0,
1682 (fault_code
& PGEX_W
) != 0,
1683 (fault_code
& PGEX_U
) != 0,
1684 (fault_code
& PGEX_I
) != 0);
1686 if (fault_code
& PGEX_W
)
1687 fault_type
= VM_PROT_WRITE
;
1688 else if (fault_code
& PGEX_I
)
1689 fault_type
= VM_PROT_EXECUTE
;
1691 fault_type
= VM_PROT_READ
;
1693 fault_flags
= 0; /* TODO: is that right? */
1694 rc
= vm_fault(&guest_vm
->sp
->vm_map
, (fault_gpa
& (~PAGE_MASK
)), fault_type
, fault_flags
);
1695 if (rc
!= KERN_SUCCESS
) {
1696 printf("vm_fault failed: %d\n", rc
);
1697 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
1702 case VMCB_EXIT_WRITE_CR8
:
1703 kvm_run
.exit_reason
= KVM_EXIT_SET_TPR
;
1706 kvm_run
.exit_reason
= KVM_EXIT_NMI
;
1709 kvm_run
.exit_reason
= KVM_EXIT_HLT
;
1711 case VMCB_EXIT_SHUTDOWN
:
1712 intercept_shutdown(vcpu
);
1713 kvm_run
.exit_reason
= KVM_EXIT_SHUTDOWN
;
1715 case VMCB_EXIT_IOIO
:
1716 intercept_ioio(vcpu
, &kvm_run
,
1717 vmcb
->control
.exit_info_1
,
1718 vmcb
->control
.exit_info_2
);
1719 kvm_run
.exit_reason
= KVM_EXIT_IO
;
1721 case VMCB_EXIT_READ_CR0
:
1722 case VMCB_EXIT_READ_CR3
:
1723 case VMCB_EXIT_READ_CR4
:
1724 case VMCB_EXIT_READ_CR8
:
1725 case VMCB_EXIT_WRITE_CR0
:
1726 case VMCB_EXIT_WRITE_CR3
:
1727 case VMCB_EXIT_WRITE_CR4
:
1728 case VMCB_EXIT_READ_DR0
:
1729 case VMCB_EXIT_READ_DR1
:
1730 case VMCB_EXIT_READ_DR2
:
1731 case VMCB_EXIT_READ_DR3
:
1732 case VMCB_EXIT_WRITE_DR0
:
1733 case VMCB_EXIT_WRITE_DR1
:
1734 case VMCB_EXIT_WRITE_DR2
:
1735 case VMCB_EXIT_WRITE_DR3
:
1736 case VMCB_EXIT_WRITE_DR5
:
1737 case VMCB_EXIT_WRITE_DR7
:
1739 case VMCB_EXIT_INIT
:
1740 case VMCB_EXIT_VINTR
:
1741 case VMCB_EXIT_CR0_SEL_WRITE
:
1742 case VMCB_EXIT_CPUID
:
1743 case VMCB_EXIT_INVD
:
1744 case VMCB_EXIT_INVLPG
:
1745 case VMCB_EXIT_INVLPGA
:
1747 case VMCB_EXIT_TASK_SWITCH
:
1748 case VMCB_EXIT_VMRUN
:
1749 case VMCB_EXIT_VMMCALL
:
1750 case VMCB_EXIT_VMLOAD
:
1751 case VMCB_EXIT_VMSAVE
:
1752 case VMCB_EXIT_STGI
:
1753 case VMCB_EXIT_CLGI
:
1754 case VMCB_EXIT_SKINIT
:
1755 case VMCB_EXIT_WBINVD
:
1756 case VMCB_EXIT_MONITOR
:
1757 case VMCB_EXIT_MWAIT_UNCOND
:
1759 printf("Unhandled vmexit:\n"
1760 " code: 0x%" PRIx64
"\n"
1761 " info1: 0x%" PRIx64
"\n"
1762 " info2: 0x%" PRIx64
"\n",
1763 vmcb
->control
.exit_code
,
1764 vmcb
->control
.exit_info_1
,
1765 vmcb
->control
.exit_info_2
);
1768 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
1774 /* TODO: check copyout ret val */
1775 copyout(&kvm_run
, uap
->run
, sizeof(struct kvm_run
));
1776 printf("sizeof(struct kvm_run) = %" PRIu64
"\n", sizeof(struct kvm_run
));
1782 fkvm_create_vcpu(struct thread
*td
, struct fkvm_create_vcpu_args
*uap
)
1784 struct guestvm
*guest_vm
;
1790 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1791 if (guest_vm
== NULL
) {
1792 printf("PROC_GET_GUESTVM -> NULL\n");
1797 printf("fkvm_create_vcpu: td = %p\n", td
);
1798 vcpu
= fkvm_vcpu_create(guest_vm
);
1799 fkvm_guestvm_add_vcpu(guest_vm
, vcpu
);
1801 TD_SET_VCPU(td
, vcpu
);
1802 printf("fkvm_create_vcpu: vcpu = %p\n", vcpu
);
1807 fkvm_check_cpu_extension(void)
1813 printf("fkvm_check_cpu_extension\n");
1815 /* Assumption: the architecture supports the cpuid instruction */
1817 /* Check if CPUID extended function 8000_0001h is supported. */
1818 do_cpuid(0x80000000, regs
);
1819 cpu_exthigh
= regs
[0];
1821 printf("cpu_exthigh = %u\n", cpu_exthigh
);
1823 if(cpu_exthigh
>= 0x80000001) {
1824 /* Execute CPUID extended function 8000_0001h */
1825 do_cpuid(0x80000001, regs
);
1826 printf("EAX = %u\n", regs
[0]);
1828 if((regs
[0] & 0x2) == 0) { /* Check SVM bit */
1829 printf("SVM not available\n");
1830 goto fail
; /* SVM not available */
1833 vmcr
= rdmsr(0xc0010114); /* Read VM_CR MSR */
1834 if((vmcr
& 0x8) == 0) { /* Check SVMDIS bit */
1835 printf("vmcr = %" PRIx64
"\n", vmcr
);
1836 printf("SVM allowed\n");
1837 return KERN_SUCCESS
; /* SVM allowed */
1840 /* Execute CPUID extended function 8000_000ah */
1841 do_cpuid(0x8000000a, regs
);
1842 if((regs
[3] & 0x2) == 0) { /* Check SVM_LOCK bit */
1843 /* SVM disabled at bios; not unlockable.
1844 * User must change a BIOS setting to enable SVM.
1846 printf("EDX = %u\n", regs
[3]);
1847 printf("SVM disabled at bios\n");
1851 * SVM may be unlockable;
1852 * consult the BIOS or TPM to obtain the key.
1854 printf("EDX = %u\n", regs
[3]);
1855 printf("SVM maybe unlockable\n");
1860 return KERN_FAILURE
;
1864 fkvm_proc_exit(void *arg
, struct proc
*p
)
1866 struct guestvm
*guest_vm
;
1868 guest_vm
= PROC_GET_GUESTVM(p
);
1869 if (guest_vm
== NULL
)
1872 fkvm_destroy_vm(guest_vm
);
1873 PROC_SET_GUESTVM(p
, NULL
);
1877 fkvm_load(void *unused
)
1882 printf("fkvm_load\n");
1883 printf("sizeof(struct vmcb) = %" PRIx64
"\n", sizeof(struct vmcb
));
1889 /* check if SVM is supported */
1890 error
= fkvm_check_cpu_extension();
1891 if(error
!= KERN_SUCCESS
) {
1892 printf("ERROR: SVM extension not available\n");
1896 exit_tag
= EVENTHANDLER_REGISTER(process_exit
, fkvm_proc_exit
, NULL
,
1897 EVENTHANDLER_PRI_ANY
);
1899 /* allocate structures */
1900 hsave_area
= fkvm_hsave_area_alloc();
1901 iopm
= fkvm_iopm_alloc();
1902 msrpm
= fkvm_msrpm_alloc();
1904 /* Initialize structures */
1905 fkvm_hsave_area_init(hsave_area
);
1906 fkvm_iopm_init(iopm
);
1907 fkvm_msrpm_init(msrpm
);
1909 /* Enable SVM in EFER */
1910 efer
= rdmsr(MSR_EFER
);
1911 printf("EFER = %" PRIx64
"\n", efer
);
1912 wrmsr(MSR_EFER
, efer
| EFER_SVME
);
1913 efer
= rdmsr(MSR_EFER
);
1914 printf("new EFER = %" PRIx64
"\n", efer
);
1916 /* Write Host save address in MSR_VM_HSAVE_PA */
1917 wrmsr(MSR_VM_HSAVE_PA
, vtophys(hsave_area
));
1921 SYSINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_load
, NULL
);
1924 fkvm_unload(void *unused
)
1926 printf("fkvm_unload\n");
1929 printf("fkvm_unload: fkvm not loaded");
1933 EVENTHANDLER_DEREGISTER(process_exit
, exit_tag
);
1935 if (msrpm
!= NULL
) {
1936 fkvm_msrpm_free(iopm
);
1940 fkvm_iopm_free(iopm
);
1943 if (hsave_area
!= NULL
) {
1944 fkvm_hsave_area_free(hsave_area
);
1948 SYSUNINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_unload
, NULL
);