2 * Copyright (c) 2008 Brent Stephens <brents@rice.edu>
3 * Copyright (c) 2008 Diego Ongaro <diego.ongaro@rice.edu>
4 * Copyright (c) 2008 Kaushik Kumar Ram <kaushik@rice.edu>
5 * Copyright (c) 2008 Oleg Pesok <olegpesok@gmail.com>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/sysproto.h>
40 #include <sys/eventhandler.h>
43 #include <vm/vm_extern.h>
44 #include <vm/vm_map.h>
45 #include <vm/vm_object.h>
46 #include <vm/vm_param.h>
47 #include <machine/_inttypes.h>
48 #include <machine/specialreg.h>
49 #include <machine/segments.h>
50 #include <machine/vmcb.h>
53 /* Definitions for Port IO */
61 #define PORT_MASK 0xFFFF0000
62 #define ADDR_MASK (7 << ADDR_SHIFT)
63 #define SIZE_MASK (7 << SIZE_SHIFT)
64 #define REP_MASK (1 << REP_SHIFT)
65 #define STR_MASK (1 << STR_SHIFT)
66 #define TYPE_MASK (1 << TYPE_SHIFT)
67 /* End Definitions for Port IO */
69 #define PMIO_PAGE_OFFSET 1
71 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
72 #define MSRPM_SIZE (8*1024)
76 static int fkvm_loaded
= 0;
78 static void *iopm
= NULL
; /* Should I allocate a vm_object_t instead? */
79 static void *msrpm
= NULL
; /* Should I allocate a vm_object_t instead? */
81 static void *hsave_area
= NULL
;
83 static eventhandler_tag exit_tag
;
111 unsigned long vmcb_pa
;
113 unsigned long regs
[NR_VCPU_REGS
];
114 u_int64_t host_fs_base
;
115 u_int64_t host_gs_base
;
120 uint64_t default_type
;
121 uint64_t mtrr64k
[MTRR_N64K
/8];
122 uint64_t mtrr16k
[MTRR_N16K
/8];
123 uint64_t mtrr4k
[MTRR_N4K
/8];
124 #define FKVM_MTRR_NVAR 8
125 uint64_t mtrrvar
[FKVM_MTRR_NVAR
*2];
128 struct guestvm
*guest_vm
;
130 unsigned long virqs
[256 / (sizeof(unsigned long) * 8)];
134 struct vcpu
*vcpus
[MAX_VCPUS
];
138 u_int64_t nested_cr3
;
142 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
143 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
144 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
145 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
146 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
147 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
150 fkvm_virq_dequeue(struct vcpu
*vcpu
);
152 static inline struct vcpu
*
153 TD_GET_VCPU(struct thread
*td
)
158 printf("TD_GET_VCPU -> NULL\n");
163 TD_SET_VCPU(struct thread
*td
, struct vcpu
*vcpu
)
168 static inline struct guestvm
*
169 PROC_GET_GUESTVM(struct proc
*proc
)
171 struct guestvm
*guestvm
;
172 guestvm
= proc
->p_guestvm
;
177 PROC_SET_GUESTVM(struct proc
*proc
, struct guestvm
*guestvm
)
179 proc
->p_guestvm
= guestvm
; \
183 print_vmcb_seg(struct vmcb_seg
* vmcb_seg
, const char* name
)
185 printf("%s Selector\n", name
);
186 printf("Selector : %" PRIx16
"\n", vmcb_seg
->selector
);
187 printf("Attributes : %" PRIx16
"\n", vmcb_seg
->attrib
);
188 printf("Limit : %" PRIx32
"\n", vmcb_seg
->limit
);
189 printf("Base Address : %" PRIx64
"\n", vmcb_seg
->base
);
194 print_vmcb(struct vmcb
*vmcb
)
196 printf("VMCB Control Area\n");
197 printf("Intercept CR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_cr_reads
);
198 printf("Intercept CR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_cr_writes
);
199 printf("Intercept DR Reads : %" PRIx16
"\n", vmcb
->control
.intercept_dr_reads
);
200 printf("Intercept DR Writes : %" PRIx16
"\n", vmcb
->control
.intercept_dr_writes
);
201 printf("Intercept Exceptions : %" PRIx32
"\n", vmcb
->control
.intercept_exceptions
);
202 printf("Intercepts : %" PRIx64
"\n", vmcb
->control
.intercepts
);
203 printf("Reserved 1: \n");
204 for(int i
=0; i
< 44; i
++) {
205 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
208 printf("IOPM Base PA : %" PRIx64
"\n", vmcb
->control
.iopm_base_pa
);
209 printf("MSRPM Base PA : %" PRIx64
"\n", vmcb
->control
.msrpm_base_pa
);
210 printf("TSC Offset : %" PRIx64
"\n", vmcb
->control
.tsc_offset
);
211 printf("Guest ASID : %" PRIx32
"\n", vmcb
->control
.guest_asid
);
212 printf("TLB Control : %" PRIx8
"\n", vmcb
->control
.tlb_control
);
213 printf("Reserved 2 : \n");
214 for(int i
=0; i
< 3; i
++) {
215 printf("%" PRIx8
"", vmcb
->control
.reserved_1
[i
]); /* Should be Zero */
218 printf("Virtual TPR : %" PRIx8
"\n", vmcb
->control
.v_tpr
);
219 printf("Virtual IRQ Pending : %" PRIx8
"\n", vmcb
->control
.v_irq_pending
);
220 printf("Virtual Interrupt : %" PRIx8
"\n", vmcb
->control
.v_intr
);
221 printf("Virtual Interrupt Masking: %" PRIx8
"\n", vmcb
->control
.v_intr_masking
);
222 printf("Virtual Interrupt Vector : %" PRIx8
"\n", vmcb
->control
.v_intr_vector
);
223 printf("Reserved 6 : \n");
224 for(int i
=0; i
< 3; i
++) {
225 printf("%" PRIx8
"", vmcb
->control
.reserved_6
[i
]); /* Should be Zero */
228 printf("Interrupt Shadow : %" PRIx8
"\n", vmcb
->control
.intr_shadow
);
229 printf("Reserved 7 : \n");
230 for(int i
=0; i
< 7; i
++) {
231 printf("%" PRIx8
"", vmcb
->control
.reserved_7
[i
]); /* Should be Zero */
234 printf("Exit Code : %" PRIx64
"\n", vmcb
->control
.exit_code
);
235 printf("Exit Info 1 : %" PRIx64
"\n", vmcb
->control
.exit_info_1
);
236 printf("Exit Info 2 : %" PRIx64
"\n", vmcb
->control
.exit_info_2
);
237 printf("Exit Interrupt Info : %" PRIx32
"\n", vmcb
->control
.exit_int_info
);
238 printf("Exit Interrupt Info Err Code: %" PRIx32
"\n", vmcb
->control
.exit_int_info_err_code
);
239 printf("Nested Control : %" PRIx64
"\n", vmcb
->control
.nested_ctl
);
240 printf("Reserved 8 : \n");
241 for(int i
=0; i
< 16; i
++) {
242 printf("%" PRIx8
"", vmcb
->control
.reserved_8
[i
]); /* Should be Zero */
245 printf("Event Injection : %" PRIx64
"\n", vmcb
->control
.event_inj
);
246 printf("Nested CR3 : %" PRIx64
"\n", vmcb
->control
.nested_cr3
);
247 printf("LBR Virtualization Enable: %" PRIx64
"\n", vmcb
->control
.lbr_virt_enable
);
248 printf("Reserved 9 : \n");
249 for(int i
=0; i
< 832; i
++) {
250 printf("%" PRIx8
"", vmcb
->control
.reserved_9
[i
]); /* Should be Zero */
256 printf("VMCB Save Area\n");
257 print_vmcb_seg(&(vmcb
->save
.es
), "ES");
258 print_vmcb_seg(&(vmcb
->save
.cs
), "CS");
259 print_vmcb_seg(&(vmcb
->save
.ss
), "SS");
260 print_vmcb_seg(&(vmcb
->save
.ds
), "DS");
261 print_vmcb_seg(&(vmcb
->save
.fs
), "FS");
262 print_vmcb_seg(&(vmcb
->save
.gs
), "GS");
263 print_vmcb_seg(&(vmcb
->save
.gdtr
), "GDTR");
264 print_vmcb_seg(&(vmcb
->save
.ldtr
), "LDTR");
265 print_vmcb_seg(&(vmcb
->save
.idtr
), "IDTR");
266 print_vmcb_seg(&(vmcb
->save
.tr
), "TR");
267 printf("Reserved 1 : \n");
268 for(int i
=0; i
< 43; i
++) {
269 printf("%" PRIx8
"", vmcb
->save
.reserved_1
[i
]); /* Should be Zero */
272 printf("Current Processor Level : %" PRIx8
"\n", vmcb
->save
.cpl
);
273 printf("Reserved 2 : \n");
274 for(int i
=0; i
< 4; i
++) {
275 printf("%" PRIx8
"", vmcb
->save
.reserved_2
[i
]); /* Should be Zero */
278 printf("EFER : %" PRIx64
"\n", vmcb
->save
.efer
);
279 printf("Reserved 3 : \n");
280 for(int i
=0; i
< 112; i
++) {
281 printf("%" PRIx8
"", vmcb
->save
.reserved_3
[i
]); /* Should be Zero */
284 printf("Control Register 4 : %" PRIx64
"\n", vmcb
->save
.cr4
);
285 printf("Control Register 3 : %" PRIx64
"\n", vmcb
->save
.cr3
);
286 printf("Control Register 0 : %" PRIx64
"\n", vmcb
->save
.cr0
);
287 printf("Debug Register 7 : %" PRIx64
"\n", vmcb
->save
.dr7
);
288 printf("Debug Register 6 : %" PRIx64
"\n", vmcb
->save
.dr6
);
289 printf("RFlags : %" PRIx64
"\n", vmcb
->save
.rflags
);
290 printf("RIP : %" PRIx64
"\n", vmcb
->save
.rip
);
291 printf("Reserved 4 : \n");
292 for(int i
=0; i
< 88; i
++) {
293 printf("%" PRIx8
"", vmcb
->save
.reserved_4
[i
]); /* Should be Zero */
296 printf("RSP : %" PRIx64
"\n", vmcb
->save
.rsp
);
297 printf("Reserved 5 : \n");
298 for(int i
=0; i
< 24; i
++) {
299 printf("%" PRIx8
"", vmcb
->save
.reserved_5
[i
]); /* Should be Zero */
302 printf("RAX : %" PRIx64
"\n", vmcb
->save
.rax
);
303 printf("STAR : %" PRIx64
"\n", vmcb
->save
.star
);
304 printf("LSTAR : %" PRIx64
"\n", vmcb
->save
.lstar
);
305 printf("CSTAR : %" PRIx64
"\n", vmcb
->save
.cstar
);
306 printf("SFMASK : %" PRIx64
"\n", vmcb
->save
.sfmask
);
307 printf("Kernel GS Base : %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
308 printf("SYSENTER CS : %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
309 printf("SYSENTER ESP : %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
310 printf("SYSENTER EIP : %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
311 printf("Control Register 2 : %" PRIx64
"\n", vmcb
->save
.cr2
);
312 printf("Reserved 6 : \n");
313 for(int i
=0; i
< 32; i
++) {
314 printf("%" PRIx8
"", vmcb
->save
.reserved_6
[i
]); /* Should be Zero */
317 printf("Global PAT : %" PRIx64
"\n", vmcb
->save
.g_pat
);
318 printf("Debug Control : %" PRIx64
"\n", vmcb
->save
.dbg_ctl
);
319 printf("BR From : %" PRIx64
"\n", vmcb
->save
.br_from
);
320 printf("BR To : %" PRIx64
"\n", vmcb
->save
.br_to
);
321 printf("Last Exception From : %" PRIx64
"\n", vmcb
->save
.last_excp_from
);
322 printf("Last Exception To : %" PRIx64
"\n", vmcb
->save
.last_excp_to
);
329 print_tss_desc(struct system_segment_descriptor
*tss_desc
)
331 printf("TSS desc @ %p:\n", tss_desc
);
332 printf("sd_lolimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lolimit
);
333 printf("sd_lobase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lobase
);
334 printf("sd_type: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_type
);
335 printf("sd_dpl: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_dpl
);
336 printf("sd_p: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_p
);
337 printf("sd_hilimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hilimit
);
338 printf("sd_xx0: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx0
);
339 printf("sd_gran: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_gran
);
340 printf("sd_hibase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hibase
);
341 printf("sd_xx1: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx1
);
342 printf("sd_mbz: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_mbz
);
343 printf("sd_xx2: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx2
);
348 print_tss(struct system_segment_descriptor
*tss_desc
)
354 base
= (u_int32_t
*) ((((u_int64_t
) tss_desc
->sd_hibase
) << 24) | ((u_int64_t
) tss_desc
->sd_lobase
));
355 limit
= ((tss_desc
->sd_hilimit
<< 16) | tss_desc
->sd_lolimit
) / 4;
357 printf("TSS: @ %p\n", base
);
358 for (i
= 0; i
<= limit
; i
++)
359 printf("%x: 0x%" PRIx32
"\n", i
, base
[i
]);
365 print_vmcb_save_area(struct vmcb
*vmcb
)
367 printf("VMCB save area:\n");
368 printf(" cs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
369 vmcb
->save
.cs
.selector
,
370 vmcb
->save
.cs
.attrib
,
373 printf(" fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
374 vmcb
->save
.fs
.selector
,
375 vmcb
->save
.fs
.attrib
,
378 printf(" gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
379 vmcb
->save
.gs
.selector
,
380 vmcb
->save
.gs
.attrib
,
383 printf(" tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
384 vmcb
->save
.tr
.selector
,
385 vmcb
->save
.tr
.attrib
,
388 printf(" ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
389 vmcb
->save
.ldtr
.selector
,
390 vmcb
->save
.ldtr
.attrib
,
391 vmcb
->save
.ldtr
.limit
,
392 vmcb
->save
.ldtr
.base
);
393 printf(" rip: %" PRIx64
"\n", vmcb
->save
.rip
);
394 printf(" kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
395 printf(" star: %" PRIx64
"\n", vmcb
->save
.star
);
396 printf(" lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
397 printf(" cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
398 printf(" sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
399 printf(" sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
400 printf(" sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
401 printf(" sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
406 vmrun_assert(struct vmcb
*vmcb
)
408 #define A(cond) do { if ((cond)) { printf("Error: assertion not met on line %d\n", __LINE__); bad = 1; } } while (0)
414 // The following are illegal:
417 A((vmcb
->save
.efer
& 0x0000000000001000) == 0);
419 // CR0.CD is zero and CR0.NW is set
420 A( ((vmcb
->save
.cr0
& 0x0000000040000000) == 0) &&
421 ((vmcb
->save
.cr0
& 0x0000000020000000) != 0));
423 // CR0[63:32] are not zero.
424 A((vmcb
->save
.cr0
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
426 // Any MBZ bit of CR3 is set.
427 A((vmcb
->save
.cr3
& 0xFFF0000000000000) != 0);
429 // CR4[63:11] are not zero.
430 A((vmcb
->save
.cr4
& 0xFFFFFFFFFFFFF800) == 0xFFFFFFFFFFFFF800);
432 // DR6[63:32] are not zero.
433 A((vmcb
->save
.dr6
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
435 // DR7[63:32] are not zero.
436 A((vmcb
->save
.dr7
& 0xFFFFFFFF00000000) == 0xFFFFFFFF00000000);
438 // EFER[63:15] are not zero.
439 A((vmcb
->save
.efer
& 0xFFFFFFFFFFFF8000) == 0xFFFFFFFFFFF8000);
441 // EFER.LMA or EFER.LME is non-zero and this processor does not support long mode.
442 //// A((vmcb->save.efer & 0x0000000000000500) != 0);
444 // EFER.LME and CR0.PG are both set and CR4.PAE is zero.
445 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
446 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
447 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0));
449 // EFER.LME and CR0.PG are both non-zero and CR0.PE is zero.
450 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
451 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
452 ((vmcb
->save
.cr0
& 0x0000000000000001) == 0));
454 // EFER.LME, CR0.PG, CR4.PAE, CS.L, and CS.D are all non-zero.
455 // cs.attrib = concat 55-52 and 47-40 (p372 v2)
456 A( ((vmcb
->save
.efer
& 0x0000000000000100) != 0) &&
457 ((vmcb
->save
.cr0
& 0x0000000080000000) != 0) &&
458 ((vmcb
->save
.cr4
& 0x0000000000000020) != 0) &&
459 ((vmcb
->save
.cs
.attrib
& 0x0200) != 0) &&
460 ((vmcb
->save
.cs
.attrib
& 0x0400) != 0));
462 // The VMRUN intercept bit is clear.
463 A((vmcb
->control
.intercepts
& 0x0000000100000000) == 0);
465 // The MSR or IOIO intercept tables extend to a physical address that is
466 // greater than or equal to the maximum supported physical address.
468 // Illegal event injection (see Section 15.19 on page 391).
470 // ASID is equal to zero.
471 A(vmcb
->control
.guest_asid
== 0);
473 // VMRUN can load a guest value of CR0 with PE = 0 but PG = 1, a
474 // combination that is otherwise illegal (see Section 15.18).
476 // In addition to consistency checks, VMRUN and #VMEXIT canonicalize (i.e.,
477 // sign-extend to 63 bits) all base addresses in the segment registers
478 // that have been loaded.
486 fkvm_vcpu_run(struct vcpu
*vcpu
)
495 u_short ldt_selector
;
497 unsigned long host_cr2
;
498 unsigned long host_dr6
;
499 unsigned long host_dr7
;
501 struct system_segment_descriptor
*tss_desc
;
506 //printf("begin fkvm_vcpu_run\n");
510 fkvm_virq_dequeue(vcpu
);
512 if (vmrun_assert(vmcb
))
515 tss_desc
= (struct system_segment_descriptor
*) (&gdt
[GPROC0_SEL
]);
516 sel
= GSEL(GPROC0_SEL
, SEL_KPL
);
518 // printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64 "\n", sel);
519 // print_tss_desc(tss_desc);
520 // print_tss(tss_desc);
522 // print_vmcb_save_area(vmcb);
523 // printf("vcpu->regs[VCPU_REGS_RIP]: 0x%lx\n", vcpu->regs[VCPU_REGS_RIP]);
526 vmcb
->save
.rax
= vcpu
->regs
[VCPU_REGS_RAX
];
527 vmcb
->save
.rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
528 vmcb
->save
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
530 /* meh: kvm has pre_svm_run(svm); */
532 vcpu
->host_fs_base
= rdmsr(MSR_FSBASE
);
533 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
534 // printf("host_fs_base: 0x%" PRIx64 "\n", vcpu->host_fs_base);
535 // printf("host_gs_base: 0x%" PRIx64 "\n", vcpu->host_gs_base);
539 ldt_selector
= rldt();
540 // printf("fs selector: %hx\n", fs_selector);
541 // printf("gs selector: %hx\n", gs_selector);
542 // printf("ldt selector: %hx\n", ldt_selector);
549 vmcb
->save
.cr2
= vcpu
->cr2
;
551 // TODO: something with apic_base?
553 /* meh: dr7? db_regs? */
555 // printf("MSR_STAR: %" PRIx64 "\n", rdmsr(MSR_STAR));
556 // printf("MSR_LSTAR: %" PRIx64 "\n", rdmsr(MSR_LSTAR));
557 // printf("MSR_CSTAR: %" PRIx64 "\n", rdmsr(MSR_CSTAR));
558 // printf("MSR_SF_MASK: %" PRIx64 "\n", rdmsr(MSR_SF_MASK));
560 star
= rdmsr(MSR_STAR
);
561 lstar
= rdmsr(MSR_LSTAR
);
562 cstar
= rdmsr(MSR_CSTAR
);
563 sfmask
= rdmsr(MSR_SF_MASK
);
565 // printf("CLGI...\n");
567 __asm
__volatile (SVM_CLGI
);
574 "mov %c[rbx](%[svm]), %%rbx \n\t"
575 "mov %c[rcx](%[svm]), %%rcx \n\t"
576 "mov %c[rdx](%[svm]), %%rdx \n\t"
577 "mov %c[rsi](%[svm]), %%rsi \n\t"
578 "mov %c[rdi](%[svm]), %%rdi \n\t"
579 "mov %c[rbp](%[svm]), %%rbp \n\t"
580 "mov %c[r8](%[svm]), %%r8 \n\t"
581 "mov %c[r9](%[svm]), %%r9 \n\t"
582 "mov %c[r10](%[svm]), %%r10 \n\t"
583 "mov %c[r11](%[svm]), %%r11 \n\t"
584 "mov %c[r12](%[svm]), %%r12 \n\t"
585 "mov %c[r13](%[svm]), %%r13 \n\t"
586 "mov %c[r14](%[svm]), %%r14 \n\t"
587 "mov %c[r15](%[svm]), %%r15 \n\t"
589 /* Enter guest mode */
591 "mov %c[vmcb](%[svm]), %%rax \n\t"
597 /* Save guest registers, load host registers */
598 "mov %%rbx, %c[rbx](%[svm]) \n\t"
599 "mov %%rcx, %c[rcx](%[svm]) \n\t"
600 "mov %%rdx, %c[rdx](%[svm]) \n\t"
601 "mov %%rsi, %c[rsi](%[svm]) \n\t"
602 "mov %%rdi, %c[rdi](%[svm]) \n\t"
603 "mov %%rbp, %c[rbp](%[svm]) \n\t"
604 "mov %%r8, %c[r8](%[svm]) \n\t"
605 "mov %%r9, %c[r9](%[svm]) \n\t"
606 "mov %%r10, %c[r10](%[svm]) \n\t"
607 "mov %%r11, %c[r11](%[svm]) \n\t"
608 "mov %%r12, %c[r12](%[svm]) \n\t"
609 "mov %%r13, %c[r13](%[svm]) \n\t"
610 "mov %%r14, %c[r14](%[svm]) \n\t"
611 "mov %%r15, %c[r15](%[svm]) \n\t"
615 [vmcb
]"i"(offsetof(struct vcpu
, vmcb_pa
)),
616 [rbx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBX
])),
617 [rcx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RCX
])),
618 [rdx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDX
])),
619 [rsi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RSI
])),
620 [rdi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDI
])),
621 [rbp
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBP
])),
622 [r8
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R8
])),
623 [r9
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R9
])),
624 [r10
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R10
])),
625 [r11
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R11
])),
626 [r12
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R12
])),
627 [r13
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R13
])),
628 [r14
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R14
])),
629 [r15
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R15
]))
631 "rbx", "rcx", "rdx", "rsi", "rdi",
632 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
636 /* meh: dr7? db_regs? */
638 vcpu
->cr2
= vmcb
->save
.cr2
;
640 vcpu
->regs
[VCPU_REGS_RAX
] = vmcb
->save
.rax
;
641 vcpu
->regs
[VCPU_REGS_RSP
] = vmcb
->save
.rsp
;
642 vcpu
->regs
[VCPU_REGS_RIP
] = vmcb
->save
.rip
;
649 load_fs(fs_selector
);
650 load_gs(gs_selector
);
653 wrmsr(MSR_FSBASE
, vcpu
->host_fs_base
);
654 wrmsr(MSR_GSBASE
, vcpu
->host_gs_base
);
656 tss_desc
->sd_type
= SDT_SYSTSS
;
659 wrmsr(MSR_STAR
, star
);
660 wrmsr(MSR_LSTAR
, lstar
);
661 wrmsr(MSR_CSTAR
, cstar
);
662 wrmsr(MSR_SF_MASK
, sfmask
);
666 __asm
__volatile (SVM_STGI
);
670 // print_tss_desc(tss_desc);
671 // print_tss(tss_desc);
673 // print_vmcb_save_area(vmcb);
681 _fkvm_init_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
684 seg
->attrib
= VMCB_SELECTOR_P_MASK
| attrib
;
690 fkvm_init_seg(struct vmcb_seg
*seg
)
692 _fkvm_init_seg(seg
, VMCB_SELECTOR_S_MASK
| VMCB_SELECTOR_WRITE_MASK
);
696 fkvm_init_sys_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
698 _fkvm_init_seg(seg
, attrib
);
702 fkvm_iopm_alloc(void)
704 return contigmalloc(IOPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
708 fkvm_iopm_init(void *iopm
)
710 memset(iopm
, 0xff, IOPM_SIZE
); /* TODO: we may want to allow access to PC debug port */
714 fkvm_iopm_free(void *iopm
)
716 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
720 fkvm_msrpm_alloc(void)
722 return contigmalloc(MSRPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
726 fkvm_msrpm_init(void *msrpm
)
728 memset(msrpm
, 0xff, MSRPM_SIZE
); /* TODO: we may want to allow some MSR accesses */
732 fkvm_msrpm_free(void *msrpm
)
734 contigfree(msrpm
, MSRPM_SIZE
, M_DEVBUF
);
738 fkvm_hsave_area_alloc(void)
740 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
744 fkvm_hsave_area_init(void *hsave_area
)
749 fkvm_hsave_area_free(void *hsave_area
)
751 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
754 static struct vmspace
*
755 fkvm_make_vmspace(void)
759 sp
= vmspace_alloc(0, 0xffffffffffffffff);
761 printf("vmspace_alloc failed\n");
769 fkvm_destroy_vmspace(struct vmspace
* sp
)
775 fkvm_vmcb_alloc(void)
777 return contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL,
782 fkvm_vmcb_init(struct vmcb
*vmcb
)
784 struct vmcb_control_area
*control
= &vmcb
->control
;
785 struct vmcb_save_area
*save
= &vmcb
->save
;
787 control
->intercept_cr_reads
= INTERCEPT_CR4_MASK
;
789 control
->intercept_cr_writes
= INTERCEPT_CR4_MASK
|
792 control
->intercept_dr_reads
= INTERCEPT_DR0_MASK
|
797 control
->intercept_dr_writes
= INTERCEPT_DR0_MASK
|
804 control
->intercept_exceptions
= (1 << IDT_UD
) | // Invalid Opcode
805 (1 << IDT_MC
); // Machine Check
807 control
->intercepts
= INTERCEPT_INTR
|
814 INTERCEPT_IOIO_PROT
|
826 INTERCEPT_MWAIT_UNCOND
;
828 control
->iopm_base_pa
= vtophys(iopm
);
829 control
->msrpm_base_pa
= vtophys(msrpm
);
830 control
->tsc_offset
= 0;
832 /* TODO: remove this once we assign asid's to distinct VM's */
833 control
->guest_asid
= 1;
834 control
->tlb_control
= VMCB_TLB_CONTROL_FLUSH_ALL
;
836 /* let v_tpr default to 0 */
837 /* let v_irq_pending default to 0 */
838 /* let v_intr default to 0 */
840 control
->v_intr_masking
= 1;
842 /* let v_intr_vector default to 0 */
843 /* let intr_shadow default to 0 */
844 /* let exit_code, exit_info_1, exit_info_2, exit_int_info,
845 exit_int_info_err_code default to 0 */
847 control
->nested_ctl
= 1;
849 /* let event_inj default to 0 */
851 // (nested_cr3 is later)
853 /* let lbr_virt_enable default to 0 */
856 fkvm_init_seg(&save
->ds
);
857 fkvm_init_seg(&save
->es
);
858 fkvm_init_seg(&save
->fs
);
859 fkvm_init_seg(&save
->gs
);
860 fkvm_init_seg(&save
->ss
);
862 _fkvm_init_seg(&save
->cs
, VMCB_SELECTOR_READ_MASK
| VMCB_SELECTOR_S_MASK
|
863 VMCB_SELECTOR_CODE_MASK
);
864 save
->cs
.selector
= 0xf000;
865 save
->cs
.base
= 0xffff0000;
867 save
->gdtr
.limit
= 0xffff;
868 save
->idtr
.limit
= 0xffff;
870 fkvm_init_sys_seg(&save
->ldtr
, SDT_SYSLDT
);
871 fkvm_init_sys_seg(&save
->tr
, SDT_SYS286BSY
);
873 save
->g_pat
= PAT_VALUE(PAT_WRITE_BACK
, 0) | PAT_VALUE(PAT_WRITE_THROUGH
, 1) |
874 PAT_VALUE(PAT_UNCACHED
, 2) | PAT_VALUE(PAT_UNCACHEABLE
, 3) |
875 PAT_VALUE(PAT_WRITE_BACK
, 4) | PAT_VALUE(PAT_WRITE_THROUGH
, 5) |
876 PAT_VALUE(PAT_UNCACHED
, 6) | PAT_VALUE(PAT_UNCACHEABLE
, 7);
878 /* CR0 = 6000_0010h at boot */
879 save
->cr0
= CR0_ET
| CR0_NW
| CR0_CD
;
880 save
->dr6
= 0xffff0ff0;
883 save
->rip
= 0x0000fff0;
885 save
->efer
= EFER_SVME
;
889 fkvm_vmcb_free(struct vmcb
*vmcb
)
891 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
895 fkvm_virq_set(struct vcpu
*vcpu
, int virq
)
899 i
= virq
/ (sizeof(vcpu
->virqs
[0]) * 8);
900 j
= virq
% (sizeof(vcpu
->virqs
[0]) * 8);
902 vcpu
->virqs
[i
] |= 1UL << j
;
906 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
910 fkvm_virq_pop(struct vcpu
*vcpu
)
913 for (i
= ARRAY_SIZE(vcpu
->virqs
) - 1; i
>= 0; i
--) {
914 j
= flsl(vcpu
->virqs
[i
]);
915 // virqs[i] == 0 => j = 0
916 // virqs[i] == (1 << 0) => j = 1
919 vcpu
->virqs
[i
] &= ~(1UL << (j
- 1));
920 return i
* sizeof(vcpu
->virqs
[0]) * 8 + (j
- 1);
928 fkvm_virq_test(struct vcpu
*vcpu
)
930 #define VIRQ_ASSERT(cond) do { \
932 printf("irq test failed %d\n", __LINE__); \
936 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
938 fkvm_virq_set(vcpu
, 0);
939 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 0);
940 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
942 fkvm_virq_set(vcpu
, 1);
943 fkvm_virq_set(vcpu
, 0);
944 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 1);
945 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 0);
946 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
948 fkvm_virq_set(vcpu
, 0);
949 fkvm_virq_set(vcpu
, 1);
950 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 1);
951 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 0);
952 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
954 fkvm_virq_set(vcpu
, 255);
955 fkvm_virq_set(vcpu
, 0);
956 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 255);
957 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 0);
958 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
960 fkvm_virq_set(vcpu
, 0);
961 fkvm_virq_set(vcpu
, 237);
962 fkvm_virq_set(vcpu
, 65);
963 fkvm_virq_set(vcpu
, 204);
964 fkvm_virq_set(vcpu
, 26);
965 fkvm_virq_set(vcpu
, 234);
966 fkvm_virq_set(vcpu
, 38);
967 fkvm_virq_set(vcpu
, 189);
968 fkvm_virq_set(vcpu
, 152);
969 fkvm_virq_set(vcpu
, 29);
970 fkvm_virq_set(vcpu
, 78);
971 fkvm_virq_set(vcpu
, 22);
972 fkvm_virq_set(vcpu
, 238);
973 fkvm_virq_set(vcpu
, 118);
974 fkvm_virq_set(vcpu
, 87);
975 fkvm_virq_set(vcpu
, 147);
976 fkvm_virq_set(vcpu
, 188);
977 fkvm_virq_set(vcpu
, 252);
978 fkvm_virq_set(vcpu
, 154);
979 fkvm_virq_set(vcpu
, 242);
980 fkvm_virq_set(vcpu
, 246);
981 fkvm_virq_set(vcpu
, 40);
982 fkvm_virq_set(vcpu
, 238);
983 fkvm_virq_set(vcpu
, 172);
984 fkvm_virq_set(vcpu
, 61);
986 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 252);
987 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 246);
988 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 242);
989 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 238);
990 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 237);
991 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 234);
992 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 204);
993 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 189);
994 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 188);
995 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 172);
996 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 154);
997 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 152);
998 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 147);
999 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 118);
1000 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 87);
1001 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 78);
1002 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 65);
1003 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 61);
1004 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 40);
1005 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 38);
1006 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 29);
1007 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 26);
1008 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 22);
1009 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == 0);
1010 VIRQ_ASSERT(fkvm_virq_pop(vcpu
) == -1);
1016 _fkvm_vmcb_set_virq(struct vcpu
*vcpu
, int virq
)
1018 struct vmcb_control_area
*control
= &vcpu
->vmcb
->control
;
1020 control
->v_intr_vector
= virq
;
1021 control
->v_intr
= 0xf;
1022 control
->v_irq_pending
= 1;
1025 /* call this when we have a new interrupt for the vcpu */
1027 fkvm_virq_enqueue(struct vcpu
*vcpu
, int virq
)
1029 struct vmcb_control_area
*control
= &vcpu
->vmcb
->control
;
1031 if (control
->v_irq_pending
) {
1032 if (virq
< control
->v_intr_vector
)
1033 fkvm_virq_set(vcpu
, virq
);
1035 fkvm_virq_set(vcpu
, control
->v_intr_vector
);
1036 _fkvm_vmcb_set_virq(vcpu
, virq
);
1040 _fkvm_vmcb_set_virq(vcpu
, virq
);
1044 /* call this when the vcpu has finished handling an interrupt */
1046 fkvm_virq_dequeue(struct vcpu
*vcpu
)
1048 struct vmcb_control_area
*control
= &vcpu
->vmcb
->control
;
1051 if (control
->v_irq_pending
)
1052 return; /* there's already an interrupt pending */
1054 virq
= fkvm_virq_pop(vcpu
);
1056 return; /* no interrupts waiting */
1058 _fkvm_vmcb_set_virq(vcpu
, virq
);
1062 fkvm_inject_virq(struct thread
*td
, struct fkvm_inject_virq_args
*uap
)
1064 struct vcpu
*vcpu
= TD_GET_VCPU(td
);
1066 if (uap
->virq
< 0 || uap
->virq
> 255)
1069 fkvm_virq_enqueue(vcpu
, uap
->virq
);
1075 fkvm_vcpu_create(struct guestvm
*guest_vm
)
1078 vcpu
= malloc(sizeof(struct vcpu
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
1080 vcpu
->vmcb
= fkvm_vmcb_alloc();
1081 vcpu
->vmcb_pa
= vtophys(vcpu
->vmcb
);
1082 printf("vmcb = 0x%p\n", vcpu
->vmcb
);
1083 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu
->vmcb_pa
);
1085 fkvm_vmcb_init(vcpu
->vmcb
);
1086 vcpu
->vmcb
->control
.nested_cr3
= guest_vm
->nested_cr3
;
1087 vcpu
->regs
[VCPU_REGS_RIP
] = vcpu
->vmcb
->save
.rip
;
1089 vcpu
->guest_vm
= guest_vm
;
1095 fkvm_vcpu_destroy(struct vcpu
*vcpu
)
1097 fkvm_vmcb_free(vcpu
->vmcb
);
1098 free(vcpu
, M_DEVBUF
);
1101 static struct guestvm
*
1102 fkvm_guestvm_alloc(void)
1104 return malloc(sizeof(struct guestvm
), M_DEVBUF
, M_WAITOK
|M_ZERO
);
1108 fkvm_guestvm_free(struct guestvm
* guest_vm
)
1110 free(guest_vm
, M_DEVBUF
);
1114 fkvm_guestvm_add_vcpu(struct guestvm
*guest_vm
, struct vcpu
*vcpu
)
1116 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = vcpu
;
1117 guest_vm
->nr_vcpus
++; /* TODO: Probably not safe to increment */
1118 /* How about a lock to protect all of this? */
1123 fkvm_userpoke(struct thread
*td
, struct fkvm_userpoke_args
*uap
)
1125 printf("fkvm_userpoke\n");
1134 fkvm_mem_has_entry(vm_map_entry_t expected_entry
, vm_map_t vm_map
, vm_offset_t vaddr
)
1136 vm_map_entry_t lookup_entry
;
1137 vm_object_t throwaway_object
;
1138 vm_pindex_t throwaway_pindex
;
1139 vm_prot_t throwaway_prot
;
1140 boolean_t throwaway_wired
;
1143 error
= vm_map_lookup(&vm_map
, /* IN/OUT */
1145 VM_PROT_READ
|VM_PROT_WRITE
,
1146 &lookup_entry
, /* OUT */
1147 &throwaway_object
, /* OUT */
1148 &throwaway_pindex
, /* OUT */
1149 &throwaway_prot
, /* OUT */
1150 &throwaway_wired
); /* OUT */
1151 if (error
!= KERN_SUCCESS
)
1153 vm_map_lookup_done(vm_map
, lookup_entry
);
1154 return (lookup_entry
== expected_entry
);
1158 fkvm_guest_check_range(struct guestvm
*guest_vm
, uint64_t start
, uint64_t end
)
1160 vm_map_t guest_vm_map
;
1161 vm_map_entry_t lookup_entry
;
1162 vm_object_t throwaway_object
;
1163 vm_pindex_t throwaway_pindex
;
1164 vm_prot_t throwaway_prot
;
1165 boolean_t throwaway_wired
;
1169 guest_vm_map
= &guest_vm
->sp
->vm_map
;
1171 error
= vm_map_lookup(&guest_vm_map
, /* IN/OUT */
1173 VM_PROT_READ
|VM_PROT_WRITE
,
1174 &lookup_entry
, /* OUT */
1175 &throwaway_object
, /* OUT */
1176 &throwaway_pindex
, /* OUT */
1177 &throwaway_prot
, /* OUT */
1178 &throwaway_wired
); /* OUT */
1179 if (error
!= KERN_SUCCESS
)
1181 vm_map_lookup_done(guest_vm_map
, lookup_entry
);
1184 TODO: We can't actually nest the lookups:
1185 panic: _sx_xlock_hard: recursed on non-recursive sx user map @ ../../../vm/vm_map.c:3115
1186 Therefore, I've moved the lookup_done above for now, but we really need a lock here.
1188 Maybe it's better to use vm_map_lookup_entry directly.
1192 if (fkvm_mem_has_entry(lookup_entry
, guest_vm_map
, end
))
1201 fkvm_get_regs_regs(struct vcpu
*vcpu
, struct kvm_regs
*out
)
1203 out
->rax
= vcpu
->regs
[VCPU_REGS_RAX
];
1204 out
->rbx
= vcpu
->regs
[VCPU_REGS_RBX
];
1205 out
->rcx
= vcpu
->regs
[VCPU_REGS_RCX
];
1206 out
->rdx
= vcpu
->regs
[VCPU_REGS_RDX
];
1207 out
->rsi
= vcpu
->regs
[VCPU_REGS_RSI
];
1208 out
->rdi
= vcpu
->regs
[VCPU_REGS_RDI
];
1209 out
->rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
1210 out
->rbp
= vcpu
->regs
[VCPU_REGS_RBP
];
1211 out
->r8
= vcpu
->regs
[VCPU_REGS_R8
];
1212 out
->r9
= vcpu
->regs
[VCPU_REGS_R9
];
1213 out
->r10
= vcpu
->regs
[VCPU_REGS_R10
];
1214 out
->r11
= vcpu
->regs
[VCPU_REGS_R11
];
1215 out
->r12
= vcpu
->regs
[VCPU_REGS_R12
];
1216 out
->r13
= vcpu
->regs
[VCPU_REGS_R13
];
1217 out
->r14
= vcpu
->regs
[VCPU_REGS_R14
];
1218 out
->r15
= vcpu
->regs
[VCPU_REGS_R15
];
1219 out
->rip
= vcpu
->regs
[VCPU_REGS_RIP
];
1220 out
->rflags
= vcpu
->vmcb
->save
.rflags
;
1224 fkvm_set_regs_regs(struct vcpu
*vcpu
, const struct kvm_regs
*in
)
1226 vcpu
->regs
[VCPU_REGS_RAX
] = in
->rax
;
1227 vcpu
->regs
[VCPU_REGS_RBX
] = in
->rbx
;
1228 vcpu
->regs
[VCPU_REGS_RCX
] = in
->rcx
;
1229 vcpu
->regs
[VCPU_REGS_RDX
] = in
->rdx
;
1230 vcpu
->regs
[VCPU_REGS_RSI
] = in
->rsi
;
1231 vcpu
->regs
[VCPU_REGS_RDI
] = in
->rdi
;
1232 vcpu
->regs
[VCPU_REGS_RSP
] = in
->rsp
;
1233 vcpu
->regs
[VCPU_REGS_RBP
] = in
->rbp
;
1234 vcpu
->regs
[VCPU_REGS_R8
] = in
->r8
;
1235 vcpu
->regs
[VCPU_REGS_R9
] = in
->r9
;
1236 vcpu
->regs
[VCPU_REGS_R10
] = in
->r10
;
1237 vcpu
->regs
[VCPU_REGS_R11
] = in
->r11
;
1238 vcpu
->regs
[VCPU_REGS_R12
] = in
->r12
;
1239 vcpu
->regs
[VCPU_REGS_R13
] = in
->r13
;
1240 vcpu
->regs
[VCPU_REGS_R14
] = in
->r14
;
1241 vcpu
->regs
[VCPU_REGS_R15
] = in
->r15
;
1242 vcpu
->regs
[VCPU_REGS_RIP
] = in
->rip
;
1243 vcpu
->vmcb
->save
.rflags
= in
->rflags
;
1247 fkvm_get_vmcb_dtable(struct vmcb_seg
*vmcb_seg
, struct kvm_dtable
*fkvm_dtable
)
1249 fkvm_dtable
->base
= vmcb_seg
->base
;
1250 fkvm_dtable
->limit
= vmcb_seg
->limit
;
1254 fkvm_set_vmcb_dtable(struct vmcb_seg
*vmcb_seg
, struct kvm_dtable
*fkvm_dtable
)
1256 vmcb_seg
->base
= fkvm_dtable
->base
;
1257 vmcb_seg
->limit
= fkvm_dtable
->limit
;
1261 fkvm_get_vmcb_seg(struct vmcb_seg
*vmcb_seg
, struct kvm_segment
*fkvm_seg
)
1263 fkvm_seg
->base
= vmcb_seg
->base
;
1264 fkvm_seg
->limit
= vmcb_seg
->limit
;
1265 fkvm_seg
->selector
= vmcb_seg
->selector
;
1267 if (vmcb_seg
->attrib
== 0)
1268 fkvm_seg
->unusable
= 1;
1270 fkvm_seg
->type
= (vmcb_seg
->attrib
& VMCB_SELECTOR_TYPE_MASK
);
1271 fkvm_seg
->s
= (vmcb_seg
->attrib
& VMCB_SELECTOR_S_MASK
) >> VMCB_SELECTOR_S_SHIFT
;
1272 fkvm_seg
->dpl
= (vmcb_seg
->attrib
& VMCB_SELECTOR_DPL_MASK
) >> VMCB_SELECTOR_DPL_SHIFT
;
1273 fkvm_seg
->present
= (vmcb_seg
->attrib
& VMCB_SELECTOR_P_MASK
) >> VMCB_SELECTOR_P_SHIFT
;
1274 fkvm_seg
->avl
= (vmcb_seg
->attrib
& VMCB_SELECTOR_AVL_MASK
) >> VMCB_SELECTOR_AVL_SHIFT
;
1275 fkvm_seg
->l
= (vmcb_seg
->attrib
& VMCB_SELECTOR_L_MASK
) >> VMCB_SELECTOR_L_SHIFT
;
1276 fkvm_seg
->db
= (vmcb_seg
->attrib
& VMCB_SELECTOR_DB_MASK
) >> VMCB_SELECTOR_DB_SHIFT
;
1277 fkvm_seg
->g
= (vmcb_seg
->attrib
& VMCB_SELECTOR_G_MASK
) >> VMCB_SELECTOR_G_SHIFT
;
1282 fkvm_set_vmcb_seg(struct vmcb_seg
*vmcb_seg
, struct kvm_segment
*fkvm_seg
)
1284 vmcb_seg
->base
= fkvm_seg
->base
;
1285 vmcb_seg
->limit
= fkvm_seg
->limit
;
1286 vmcb_seg
->selector
= fkvm_seg
->selector
;
1288 if (fkvm_seg
->unusable
)
1291 vmcb_seg
->attrib
= (fkvm_seg
->type
& VMCB_SELECTOR_TYPE_MASK
);
1292 vmcb_seg
->attrib
|= (fkvm_seg
->s
& 1) << VMCB_SELECTOR_S_SHIFT
;
1293 vmcb_seg
->attrib
|= (fkvm_seg
->dpl
& 3) << VMCB_SELECTOR_DPL_SHIFT
;
1294 vmcb_seg
->attrib
|= (fkvm_seg
->present
& 1) << VMCB_SELECTOR_P_SHIFT
;
1295 vmcb_seg
->attrib
|= (fkvm_seg
->avl
& 1) << VMCB_SELECTOR_AVL_SHIFT
;
1296 vmcb_seg
->attrib
|= (fkvm_seg
->l
& 1) << VMCB_SELECTOR_L_SHIFT
;
1297 vmcb_seg
->attrib
|= (fkvm_seg
->db
& 1) << VMCB_SELECTOR_DB_SHIFT
;
1298 vmcb_seg
->attrib
|= (fkvm_seg
->g
& 1) << VMCB_SELECTOR_G_SHIFT
;
1303 fkvm_get_cr8(struct vcpu
*vcpu
)
1305 // TODO: if cr8 has reserved bits inject GP Fault, return
1307 return (uint64_t) vcpu
->vmcb
->control
.v_tpr
;
1311 fkvm_set_cr8(struct vcpu
*vcpu
, uint64_t cr8
)
1313 // TODO: if cr8 has reserved bits inject GP Fault, return
1315 vcpu
->vmcb
->control
.v_tpr
= (uint8_t) cr8
;
1319 fkvm_get_efer(struct vcpu
*vcpu
)
1321 struct vmcb
*vmcb
= vcpu
->vmcb
;
1323 return vmcb
->save
.efer
& (~EFER_SVME
);
1327 fkvm_set_efer(struct vcpu
*vcpu
, uint64_t efer
)
1329 struct vmcb
*vmcb
= vcpu
->vmcb
;
1330 //TODO: if efer has reserved bits set: inject GP Fault
1332 if (vmcb
->save
.cr0
& CR0_PG
) { //If paging is enabled do not allow changes to LME
1333 if ((vmcb
->save
.efer
& EFER_LME
) != (efer
& EFER_LME
)) {
1334 printf("fkvm_set_efer: attempt to change LME while paging\n");
1335 //TODO: inject GP fault
1339 vmcb
->save
.efer
= efer
| EFER_SVME
;
1343 fkvm_get_regs_sregs(struct vcpu
*vcpu
, struct kvm_sregs
*out
)
1345 struct vmcb
*vmcb
= vcpu
->vmcb
;
1347 fkvm_get_vmcb_seg(&vmcb
->save
.cs
, &out
->cs
);
1348 fkvm_get_vmcb_seg(&vmcb
->save
.ds
, &out
->ds
);
1349 fkvm_get_vmcb_seg(&vmcb
->save
.es
, &out
->es
);
1350 fkvm_get_vmcb_seg(&vmcb
->save
.fs
, &out
->fs
);
1351 fkvm_get_vmcb_seg(&vmcb
->save
.gs
, &out
->gs
);
1352 fkvm_get_vmcb_seg(&vmcb
->save
.ss
, &out
->ss
);
1353 fkvm_get_vmcb_seg(&vmcb
->save
.tr
, &out
->tr
);
1354 fkvm_get_vmcb_seg(&vmcb
->save
.ldtr
, &out
->ldt
);
1356 fkvm_get_vmcb_dtable(&vmcb
->save
.idtr
, &out
->idt
);
1357 fkvm_get_vmcb_dtable(&vmcb
->save
.gdtr
, &out
->gdt
);
1359 out
->cr2
= vcpu
->cr2
;
1360 out
->cr3
= vcpu
->cr3
;
1362 out
->cr8
= fkvm_get_cr8(vcpu
);
1363 out
->efer
= fkvm_get_efer(vcpu
);
1364 /* TODO: apic_base */
1365 out
->cr0
= vmcb
->save
.cr0
;
1366 out
->cr4
= vmcb
->save
.cr4
;
1367 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1371 fkvm_set_regs_sregs(struct vcpu
*vcpu
, struct kvm_sregs
*in
)
1373 struct vmcb
*vmcb
= vcpu
->vmcb
;
1375 fkvm_set_vmcb_seg(&vmcb
->save
.cs
, &in
->cs
);
1376 fkvm_set_vmcb_seg(&vmcb
->save
.ds
, &in
->ds
);
1377 fkvm_set_vmcb_seg(&vmcb
->save
.es
, &in
->es
);
1378 fkvm_set_vmcb_seg(&vmcb
->save
.fs
, &in
->fs
);
1379 fkvm_set_vmcb_seg(&vmcb
->save
.gs
, &in
->gs
);
1380 fkvm_set_vmcb_seg(&vmcb
->save
.ss
, &in
->ss
);
1381 fkvm_set_vmcb_seg(&vmcb
->save
.tr
, &in
->tr
);
1382 fkvm_set_vmcb_seg(&vmcb
->save
.ldtr
, &in
->ldt
);
1384 vmcb
->save
.cpl
= (vmcb
->save
.cs
.attrib
>> VMCB_SELECTOR_DPL_SHIFT
) & 3;
1386 fkvm_set_vmcb_dtable(&vmcb
->save
.idtr
, &in
->idt
);
1387 fkvm_set_vmcb_dtable(&vmcb
->save
.gdtr
, &in
->gdt
);
1389 vcpu
->cr2
= in
->cr2
;
1390 vcpu
->cr3
= in
->cr3
;
1392 fkvm_set_cr8(vcpu
, in
->cr8
);
1393 fkvm_set_efer(vcpu
, in
->efer
);
1394 /* TODO: apic_base */
1395 vmcb
->save
.cr0
= in
->cr0
;
1396 vmcb
->save
.cr4
= in
->cr4
;
1397 /* TODO: irq_pending, interrupt_bitmap, irq_summary */
1401 fkvm_get_reg_msr(struct vcpu
*vcpu
, uint32_t index
, uint64_t *data
) {
1402 struct vmcb
*vmcb
= vcpu
->vmcb
;
1410 *data
= vmcb
->control
.tsc_offset
+ tsc
;
1415 *data
= vmcb
->save
.star
;
1420 *data
= vmcb
->save
.lstar
;
1425 *data
= vmcb
->save
.cstar
;
1430 *data
= vmcb
->save
.kernel_gs_base
;
1435 *data
= vmcb
->save
.sfmask
;
1439 case MSR_SYSENTER_CS_MSR
: {
1440 *data
= vmcb
->save
.sysenter_cs
;
1444 case MSR_SYSENTER_EIP_MSR
: {
1445 *data
= vmcb
->save
.sysenter_eip
;
1449 case MSR_SYSENTER_ESP_MSR
: {
1450 *data
= vmcb
->save
.sysenter_esp
;
1454 case MSR_DEBUGCTLMSR
: {
1455 printf("unimplemented at %d\n", __LINE__
);
1460 case MSR_PERFEVSEL0
... MSR_PERFEVSEL3
:
1461 case MSR_PERFCTR0
... MSR_PERFCTR3
: {
1462 printf("unimplemented at %d\n", __LINE__
);
1468 *data
= fkvm_get_efer(vcpu
);
1472 case MSR_MC0_STATUS
: {
1473 printf("unimplemented at %d\n", __LINE__
);
1478 case MSR_MCG_STATUS
: {
1479 printf("unimplemented at %d\n", __LINE__
);
1485 printf("unimplemented at %d\n", __LINE__
);
1490 //TODO: MSR_IA32_UCODE_REV
1491 //TODO: MSR_IA32_UCODE_WRITE
1494 *data
= MTRR_CAP_WC
| MTRR_CAP_FIXED
| FKVM_MTRR_NVAR
;
1498 case MSR_MTRRdefType
: {
1499 *data
= vcpu
->mtrrs
.default_type
;
1503 case MSR_MTRR64kBase
... (MSR_MTRR64kBase
+ MTRR_N64K
- 1): {
1504 *data
= vcpu
->mtrrs
.mtrr64k
[index
- MSR_MTRR64kBase
];
1508 case MSR_MTRR16kBase
... (MSR_MTRR16kBase
+ MTRR_N16K
- 1): {
1509 *data
= vcpu
->mtrrs
.mtrr16k
[index
- MSR_MTRR16kBase
];
1513 case MSR_MTRR4kBase
... (MSR_MTRR4kBase
+ MTRR_N4K
- 1): {
1514 *data
= vcpu
->mtrrs
.mtrr4k
[index
- MSR_MTRR4kBase
];
1518 case MSR_MTRRVarBase
... (MSR_MTRRVarBase
+ FKVM_MTRR_NVAR
* 2 - 1): {
1519 *data
= vcpu
->mtrrs
.mtrrvar
[index
- MSR_MTRRVarBase
];
1523 case MSR_APICBASE
: {
1524 printf("unimplemented at %d\n", __LINE__
);
1529 case MSR_IA32_MISC_ENABLE
: {
1530 printf("unimplemented at %d\n", __LINE__
);
1535 //TODO: MSR_KVM_WALL_CLOCK
1536 //TODO: MSR_KVM_SYSTEM_TIME
1539 printf("Did not get unimplemented msr: 0x%" PRIx32
"\n", index
);
1547 fkvm_get_regs_msrs(struct vcpu
*vcpu
, uint32_t nmsrs
, struct kvm_msr_entry
*entries
) {
1550 for (i
= 0; i
< nmsrs
; i
++) {
1551 fkvm_get_reg_msr(vcpu
, entries
[i
].index
, &entries
[i
].data
);
1556 fkvm_set_reg_msr(struct vcpu
*vcpu
, uint32_t index
, uint64_t data
) {
1557 struct vmcb
*vmcb
= vcpu
->vmcb
;
1565 vmcb
->control
.tsc_offset
= data
- tsc
;
1570 vmcb
->save
.star
= data
;
1575 vmcb
->save
.lstar
= data
;
1580 vmcb
->save
.cstar
= data
;
1585 vmcb
->save
.kernel_gs_base
= data
;
1590 vmcb
->save
.sfmask
= data
;
1594 case MSR_SYSENTER_CS_MSR
: {
1595 vmcb
->save
.sysenter_cs
= data
;
1599 case MSR_SYSENTER_EIP_MSR
: {
1600 vmcb
->save
.sysenter_eip
= data
;
1604 case MSR_SYSENTER_ESP_MSR
: {
1605 vmcb
->save
.sysenter_esp
= data
;
1609 case MSR_DEBUGCTLMSR
: {
1610 printf("unimplemented at %d\n", __LINE__
);
1615 case MSR_PERFEVSEL0
... MSR_PERFEVSEL3
:
1616 case MSR_PERFCTR0
... MSR_PERFCTR3
: {
1617 printf("unimplemented at %d\n", __LINE__
);
1623 fkvm_set_efer(vcpu
, data
);
1627 case MSR_MC0_STATUS
: {
1628 printf("unimplemented at %d\n", __LINE__
);
1633 case MSR_MCG_STATUS
: {
1634 printf("unimplemented at %d\n", __LINE__
);
1640 printf("unimplemented at %d\n", __LINE__
);
1645 //TODO: MSR_IA32_UCODE_REV
1646 //TODO: MSR_IA32_UCODE_WRITE
1648 case MSR_MTRRdefType
: {
1649 vcpu
->mtrrs
.default_type
= data
;
1653 case MSR_MTRR64kBase
... (MSR_MTRR64kBase
+ MTRR_N64K
- 1): {
1654 vcpu
->mtrrs
.mtrr64k
[index
- MSR_MTRR64kBase
] = data
;
1658 case MSR_MTRR16kBase
... (MSR_MTRR16kBase
+ MTRR_N16K
- 1): {
1659 vcpu
->mtrrs
.mtrr16k
[index
- MSR_MTRR16kBase
] = data
;
1663 case MSR_MTRR4kBase
... (MSR_MTRR4kBase
+ MTRR_N4K
- 1): {
1664 vcpu
->mtrrs
.mtrr4k
[index
- MSR_MTRR4kBase
] = data
;
1668 case MSR_MTRRVarBase
... (MSR_MTRRVarBase
+ FKVM_MTRR_NVAR
* 2 - 1): {
1669 vcpu
->mtrrs
.mtrrvar
[index
- MSR_MTRRVarBase
] = data
;
1673 case MSR_APICBASE
: {
1674 printf("unimplemented at %d\n", __LINE__
);
1679 case MSR_IA32_MISC_ENABLE
: {
1680 printf("unimplemented at %d\n", __LINE__
);
1685 //TODO: MSR_KVM_WALL_CLOCK
1686 //TODO: MSR_KVM_SYSTEM_TIME
1689 printf("Did not set unimplemented msr: 0x%" PRIx32
"\n", index
);
1697 fkvm_set_regs_msrs(struct vcpu
*vcpu
, uint32_t nmsrs
, struct kvm_msr_entry
*entries
) {
1700 for (i
= 0; i
< nmsrs
; i
++) {
1701 fkvm_set_reg_msr(vcpu
, entries
[i
].index
, entries
[i
].data
);
1708 fkvm_get_regs(struct thread
*td
, struct fkvm_get_regs_args
*uap
)
1716 vcpu
= TD_GET_VCPU(td
);
1720 switch (uap
->type
) {
1722 case FKVM_REGS_TYPE_REGS
: {
1723 struct kvm_regs out
;
1724 fkvm_get_regs_regs(vcpu
, &out
);
1725 return copyout(&out
, uap
->regs
, sizeof(out
));
1728 case FKVM_REGS_TYPE_SREGS
: {
1729 struct kvm_sregs out
;
1730 fkvm_get_regs_sregs(vcpu
, &out
);
1731 return copyout(&out
, uap
->regs
, sizeof(out
));
1734 case FKVM_REGS_TYPE_MSRS
: {
1735 struct kvm_msr_entry
*user_entries
;
1736 struct kvm_msr_entry
*entries
;
1739 user_entries
= (struct kvm_msr_entry
*)uap
->regs
;
1741 size
= sizeof(*entries
) * uap
->n
;
1742 entries
= malloc(size
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
1743 if (entries
== NULL
)
1746 error
= copyin(user_entries
, entries
, size
);
1748 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1749 free(entries
, M_DEVBUF
);
1753 fkvm_get_regs_msrs(vcpu
, uap
->n
, entries
);
1755 error
= copyout(user_entries
, entries
, size
);
1757 printf("FKVM_REGS_TYPE_MSRS: unable to copyout entries\n");
1760 free(entries
, M_DEVBUF
);
1770 fkvm_set_regs(struct thread
*td
, struct fkvm_set_regs_args
*uap
)
1775 vcpu
= TD_GET_VCPU(td
);
1779 switch (uap
->type
) {
1781 case FKVM_REGS_TYPE_REGS
: {
1783 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1786 fkvm_set_regs_regs(vcpu
, &in
);
1790 case FKVM_REGS_TYPE_SREGS
: {
1791 struct kvm_sregs in
;
1792 error
= copyin(uap
->regs
, &in
, sizeof(in
));
1795 fkvm_set_regs_sregs(vcpu
, &in
);
1799 case FKVM_REGS_TYPE_MSRS
: {
1800 struct kvm_msr_entry
*user_entries
;
1801 struct kvm_msr_entry
*entries
;
1804 user_entries
= (struct kvm_msr_entry
*)uap
->regs
;
1806 size
= sizeof(*entries
) * uap
->n
;
1807 entries
= malloc(size
, M_DEVBUF
, M_WAITOK
|M_ZERO
);
1808 if (entries
== NULL
)
1811 error
= copyin(user_entries
, entries
, size
);
1813 printf("FKVM_REGS_TYPE_MSRS: unable to copyin entries\n");
1814 free(entries
, M_DEVBUF
);
1818 fkvm_set_regs_msrs(vcpu
, uap
->n
, entries
);
1820 free(entries
, M_DEVBUF
);
1829 /* This function can only be called with multiples of page sizes */
1830 /* vaddr as NULL overloads to fkvm_guest_check_range */
1832 fkvm_set_user_mem_region(struct thread
*td
, struct fkvm_set_user_mem_region_args
*uap
)
1834 struct guestvm
*guest_vm
;
1839 struct vmspace
*user_vm_space
;
1840 vm_map_t user_vm_map
;
1842 vm_object_t vm_object
;
1843 vm_pindex_t vm_object_pindex
;
1844 vm_ooffset_t vm_object_offset
;
1845 vm_prot_t throwaway_prot
;
1846 boolean_t throwaway_wired
;
1847 vm_map_entry_t lookup_entry
;
1851 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1852 if (guest_vm
== NULL
) {
1853 printf("PROC_GET_GUESTVM -> NULL\n");
1857 start
= uap
->guest_pa
;
1858 end
= uap
->guest_pa
+ uap
->size
- 1;
1859 printf("start: 0x%" PRIx64
" bytes\n", start
);
1860 printf("end: 0x%" PRIx64
" bytes\n", end
);
1862 if (uap
->vaddr
== 0)
1863 return fkvm_guest_check_range(guest_vm
, start
, end
);
1865 user_vm_space
= td
->td_proc
->p_vmspace
;
1866 user_vm_map
= &user_vm_space
->vm_map
;
1867 printf("user vm space: %p\n", user_vm_space
);
1868 printf("user vm map: %p\n", user_vm_map
);
1870 error
= vm_map_lookup(&user_vm_map
, /* IN/OUT */
1872 VM_PROT_READ
|VM_PROT_WRITE
,
1873 &lookup_entry
, /* OUT */
1874 &vm_object
, /* OUT */
1875 &vm_object_pindex
, /* OUT */
1876 &throwaway_prot
, /* OUT */
1877 &throwaway_wired
); /* OUT */
1878 if (error
!= KERN_SUCCESS
) {
1879 printf("vm_map_lookup failed: %d\n", error
);
1883 /* TODO: Trust the user that the full region is valid.
1884 * This is very bad. See the note in fkvm_guest_check_range
1885 * on nesting vm lookups. */
1887 if (!fkvm_mem_has_entry(lookup_entry
, user_vm_map
, uap
->vaddr
+ uap
->size
)) {
1888 printf("end of range not contained in same vm map entry as start\n");
1893 printf("vm object: %p\n", vm_object
);
1894 printf(" size: %d pages\n", (int) vm_object
->size
);
1896 vm_object_offset
= IDX_TO_OFF(vm_object_pindex
);
1897 printf("vm_ooffset: 0x%" PRIx64
"\n", vm_object_offset
);
1899 vm_object_reference(vm_object
); // TODO: this might be a mem leak
1901 vm_map_lookup_done(user_vm_map
, lookup_entry
);
1903 error
= vm_map_insert(&guest_vm
->sp
->vm_map
,
1908 VM_PROT_ALL
, VM_PROT_ALL
,
1910 if (error
!= KERN_SUCCESS
) {
1911 printf("vm_map_insert failed: %d\n", error
);
1913 case KERN_INVALID_ADDRESS
:
1926 fkvm_unset_user_mem_region(struct thread
*td
, struct fkvm_unset_user_mem_region_args
*uap
)
1928 struct guestvm
*guest_vm
;
1933 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
1934 if (guest_vm
== NULL
) {
1935 printf("PROC_GET_GUESTVM -> NULL\n");
1942 vm_map_t guest_vm_map
;
1946 start
= uap
->guest_pa
;
1947 end
= uap
->guest_pa
+ uap
->size
- 1;
1948 printf("start: 0x%" PRIx64
" bytes\n", start
);
1949 printf("end: 0x%" PRIx64
" bytes\n", end
);
1951 guest_vm_map
= &guest_vm
->sp
->vm_map
;
1953 error
= vm_map_remove(guest_vm_map
, start
, end
);
1954 if (error
!= KERN_SUCCESS
)
1961 fkvm_create_vm(struct thread
*td
, struct fkvm_create_vm_args
*uap
)
1963 struct guestvm
*guest_vm
;
1965 printf("SYSCALL : fkvm_create_vm\n");
1970 /* Allocate Guest VM */
1971 guest_vm
= fkvm_guestvm_alloc();
1973 /* Set up the vm address space */
1974 guest_vm
->sp
= fkvm_make_vmspace();
1975 if (guest_vm
->sp
== NULL
) {
1976 fkvm_guestvm_free(guest_vm
);
1979 guest_vm
->nested_cr3
= vtophys(vmspace_pmap(guest_vm
->sp
)->pm_pml4
);
1982 printf(" vm space: %p\n", guest_vm
->sp
);
1983 printf(" vm map: %p\n", &guest_vm
->sp
->vm_map
);
1984 printf(" ncr3: 0x%" PRIx64
"\n", guest_vm
->nested_cr3
);
1986 PROC_SET_GUESTVM(td
->td_proc
, guest_vm
);
1988 printf("fkvm_create_vm done\n");
1993 fkvm_destroy_vm(struct guestvm
*guest_vm
)
1995 /* Destroy the VCPUs */
1996 while (guest_vm
->nr_vcpus
> 0) {
1997 guest_vm
->nr_vcpus
--;
1998 fkvm_vcpu_destroy(guest_vm
->vcpus
[guest_vm
->nr_vcpus
]);
1999 guest_vm
->vcpus
[guest_vm
->nr_vcpus
] = NULL
;
2002 /* Destroy the vmspace */
2003 if (guest_vm
->sp
!= NULL
)
2004 fkvm_destroy_vmspace(guest_vm
->sp
);
2006 /* Destroy the Guest VM itself */
2007 fkvm_guestvm_free(guest_vm
);
2011 intercept_ioio(struct vcpu
*vcpu
, struct kvm_run
*kvm_run
,
2012 uint64_t ioio_info
, uint64_t next_rip
)
2014 struct vmcb
*vmcb
= vcpu
->vmcb
;
2016 kvm_run
->u
.io
.string
= (ioio_info
& STR_MASK
) >> STR_SHIFT
;
2018 kvm_run
->u
.io
.port
= ioio_info
>> PORT_SHIFT
;
2019 kvm_run
->u
.io
.in
= ioio_info
& TYPE_MASK
;
2021 kvm_run
->u
.io
.size
= (ioio_info
& SIZE_MASK
) >> SIZE_SHIFT
;
2023 /* We need to remove the Interrupt Shadow Flag from the VMCB (see 15.20.5 in AMD_Vol2) */
2024 vmcb
->control
.intr_shadow
= 0;
2026 kvm_run
->u
.io
.rep
= (ioio_info
& REP_MASK
) >> REP_SHIFT
;
2027 /* TODO: Research more into Direction Flag checked in KVM; DF bit in RFLAGS */
2029 kvm_run
->u
.io
.next_rip
= next_rip
;
2035 intercept_shutdown(struct vcpu
*vcpu
)
2037 struct vmcb
*vmcb
= vcpu
->vmcb
;
2038 memset(vmcb
, 0, PAGE_SIZE
);
2039 fkvm_vmcb_init(vmcb
);
2043 fkvm_vm_run(struct thread
*td
, struct fkvm_vm_run_args
*uap
)
2046 struct guestvm
*guest_vm
;
2051 struct kvm_run kvm_run
;
2056 vcpu
= TD_GET_VCPU(td
);
2060 guest_vm
= vcpu
->guest_vm
;
2063 error
= copyin(uap
->run
, &kvm_run
, sizeof(struct kvm_run
));
2067 fkvm_set_cr8(vcpu
, kvm_run
.cr8
);
2069 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
2071 while(kvm_run
.exit_reason
== KVM_EXIT_CONTINUE
) {
2072 fkvm_vcpu_run(vcpu
);
2074 switch (vmcb
->control
.exit_code
) {
2076 case VMCB_EXIT_EXCP_BASE
... (VMCB_EXIT_EXCP_BASE
+ 31): {
2079 excp_vector
= vmcb
->control
.exit_code
- VMCB_EXIT_EXCP_BASE
;
2081 printf("VMCB_EXIT_EXCP_BASE, exception vector: 0x%x\n",
2083 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2088 case VMCB_EXIT_INTR
: {
2089 //printf("VMCB_EXIT_INTR - nothing to do\n");
2090 /* Handled by host OS already */
2091 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
2095 case VMCB_EXIT_NPF
: {
2096 /* EXITINFO1 contains fault error code */
2097 /* EXITINFO2 contains the guest physical address causing the fault. */
2099 u_int64_t fault_code
;
2100 u_int64_t fault_gpa
;
2102 vm_prot_t fault_type
;
2106 fault_code
= vmcb
->control
.exit_info_1
;
2107 fault_gpa
= vmcb
->control
.exit_info_2
;
2108 kvm_run
.exit_reason
= KVM_EXIT_CONTINUE
;
2111 printf("VMCB_EXIT_NPF:\n");
2112 printf("gpa=0x%" PRIx64
"\n", fault_gpa
);
2113 printf("fault code=0x%" PRIx64
" [P=%x, R/W=%x, U/S=%x, I/D=%x]\n",
2115 (fault_code
& PGEX_P
) != 0,
2116 (fault_code
& PGEX_W
) != 0,
2117 (fault_code
& PGEX_U
) != 0,
2118 (fault_code
& PGEX_I
) != 0);
2120 if (fault_code
& PGEX_W
)
2121 fault_type
= VM_PROT_WRITE
;
2122 else if (fault_code
& PGEX_I
)
2123 fault_type
= VM_PROT_EXECUTE
;
2125 fault_type
= VM_PROT_READ
;
2127 fault_flags
= 0; /* TODO: is that right? */
2128 rc
= vm_fault(&guest_vm
->sp
->vm_map
, (fault_gpa
& (~PAGE_MASK
)), fault_type
, fault_flags
);
2129 if (rc
!= KERN_SUCCESS
) {
2130 printf("vm_fault failed: %d\n", rc
);
2131 kvm_run
.u
.mmio
.fault_gpa
= fault_gpa
;
2132 kvm_run
.u
.mmio
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
2133 kvm_run
.u
.mmio
.cs_base
= vmcb
->save
.cs
.base
;
2134 kvm_run
.exit_reason
= KVM_EXIT_MMIO
;
2139 case VMCB_EXIT_WRITE_CR8
:
2140 kvm_run
.exit_reason
= KVM_EXIT_SET_TPR
;
2143 kvm_run
.exit_reason
= KVM_EXIT_NMI
;
2146 vcpu
->regs
[VCPU_REGS_RIP
]++; /* skip HLT, opcode F4 */
2147 kvm_run
.exit_reason
= KVM_EXIT_HLT
;
2149 case VMCB_EXIT_SHUTDOWN
:
2150 intercept_shutdown(vcpu
);
2151 kvm_run
.exit_reason
= KVM_EXIT_SHUTDOWN
;
2153 case VMCB_EXIT_IOIO
:
2154 error
= intercept_ioio(vcpu
, &kvm_run
,
2155 vmcb
->control
.exit_info_1
,
2156 vmcb
->control
.exit_info_2
);
2158 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2160 kvm_run
.exit_reason
= KVM_EXIT_IO
;
2162 case VMCB_EXIT_MSR
: {
2173 wrmsr
= vmcb
->control
.exit_info_1
;
2174 msr
= (uint32_t) vcpu
->regs
[VCPU_REGS_RCX
];
2176 printf("VMCB_EXIT_MSR:\n"
2177 " %s msr 0x%" PRIx64
"\n",
2178 wrmsr
? "write to" : "read from",
2179 vcpu
->regs
[VCPU_REGS_RCX
]);
2181 if (!wrmsr
) { /* rdmsr */
2182 error
= fkvm_get_reg_msr(vcpu
, msr
, &value
.full
);
2185 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2189 vcpu
->regs
[VCPU_REGS_RDX
] = (uint64_t) value
.split
.high
;
2190 vcpu
->regs
[VCPU_REGS_RAX
] = (uint64_t) value
.split
.low
;
2193 value
.split
.high
= (uint32_t) vcpu
->regs
[VCPU_REGS_RDX
];
2194 value
.split
.low
= (uint32_t) vcpu
->regs
[VCPU_REGS_RAX
];
2196 error
= fkvm_set_reg_msr(vcpu
, msr
, value
.full
);
2199 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2205 vcpu
->regs
[VCPU_REGS_RIP
] += 2;
2208 case VMCB_EXIT_CPUID
: {
2209 kvm_run
.u
.cpuid
.fn
= (uint32_t) vcpu
->regs
[VCPU_REGS_RAX
];
2210 kvm_run
.exit_reason
= KVM_EXIT_CPUID
;
2213 case VMCB_EXIT_WBINVD
: {
2214 /* TODO: stop ignoring this intercept when we have more than 1-cpu guests */
2215 vcpu
->regs
[VCPU_REGS_RIP
] += 2;
2218 case VMCB_EXIT_READ_CR0
:
2219 case VMCB_EXIT_READ_CR3
:
2220 case VMCB_EXIT_READ_CR4
:
2221 case VMCB_EXIT_READ_CR8
:
2222 case VMCB_EXIT_WRITE_CR0
:
2223 case VMCB_EXIT_WRITE_CR3
:
2224 case VMCB_EXIT_WRITE_CR4
:
2225 case VMCB_EXIT_READ_DR0
:
2226 case VMCB_EXIT_READ_DR1
:
2227 case VMCB_EXIT_READ_DR2
:
2228 case VMCB_EXIT_READ_DR3
:
2229 case VMCB_EXIT_WRITE_DR0
:
2230 case VMCB_EXIT_WRITE_DR1
:
2231 case VMCB_EXIT_WRITE_DR2
:
2232 case VMCB_EXIT_WRITE_DR3
:
2233 case VMCB_EXIT_WRITE_DR5
:
2234 case VMCB_EXIT_WRITE_DR7
:
2236 case VMCB_EXIT_INIT
:
2237 case VMCB_EXIT_VINTR
:
2238 case VMCB_EXIT_CR0_SEL_WRITE
:
2239 case VMCB_EXIT_INVD
:
2240 case VMCB_EXIT_INVLPG
:
2241 case VMCB_EXIT_INVLPGA
:
2242 case VMCB_EXIT_TASK_SWITCH
:
2243 case VMCB_EXIT_VMRUN
:
2244 case VMCB_EXIT_VMMCALL
:
2245 case VMCB_EXIT_VMLOAD
:
2246 case VMCB_EXIT_VMSAVE
:
2247 case VMCB_EXIT_STGI
:
2248 case VMCB_EXIT_CLGI
:
2249 case VMCB_EXIT_SKINIT
:
2250 case VMCB_EXIT_MONITOR
:
2251 case VMCB_EXIT_MWAIT_UNCOND
:
2253 printf("Unhandled vmexit:\n"
2254 " code: 0x%" PRIx64
"\n"
2255 " info1: 0x%" PRIx64
"\n"
2256 " info2: 0x%" PRIx64
"\n",
2257 vmcb
->control
.exit_code
,
2258 vmcb
->control
.exit_info_1
,
2259 vmcb
->control
.exit_info_2
);
2262 kvm_run
.exit_reason
= KVM_EXIT_UNKNOWN
;
2266 if (num_runs
== 20) //TODO: make this a #define
2272 /* we're going up to userspace - set the out fields of kvm_run: */
2274 #define IF_MASK 0x00000200
2275 kvm_run
.if_flag
= !!(vcpu
->vmcb
->save
.rflags
& IF_MASK
);
2277 /* TODO: kvm adds a check to see if in-kernel interrupt queues are empty */
2278 kvm_run
.ready_for_interrupt_injection
= kvm_run
.if_flag
&&
2279 !vcpu
->vmcb
->control
.intr_shadow
;
2281 /* TODO kvm_run.ready_for_nmi_injection = ...; */
2283 kvm_run
.cr8
= fkvm_get_cr8(vcpu
);
2286 /* TODO: check copyout ret val */
2287 copyout(&kvm_run
, uap
->run
, sizeof(struct kvm_run
));
2288 // printf("sizeof(struct kvm_run) = %" PRIu64 "\n", sizeof(struct kvm_run));
2294 fkvm_create_vcpu(struct thread
*td
, struct fkvm_create_vcpu_args
*uap
)
2296 struct guestvm
*guest_vm
;
2302 guest_vm
= PROC_GET_GUESTVM(td
->td_proc
);
2303 if (guest_vm
== NULL
) {
2304 printf("PROC_GET_GUESTVM -> NULL\n");
2309 printf("fkvm_create_vcpu: td = %p\n", td
);
2310 vcpu
= fkvm_vcpu_create(guest_vm
);
2311 fkvm_guestvm_add_vcpu(guest_vm
, vcpu
);
2313 TD_SET_VCPU(td
, vcpu
);
2314 printf("fkvm_create_vcpu: vcpu = %p\n", vcpu
);
2319 fkvm_check_cpu_extension(void)
2325 printf("fkvm_check_cpu_extension\n");
2327 /* Assumption: the architecture supports the cpuid instruction */
2329 /* Check if CPUID extended function 8000_0001h is supported. */
2330 do_cpuid(0x80000000, regs
);
2331 cpu_exthigh
= regs
[0];
2333 printf("cpu_exthigh = %u\n", cpu_exthigh
);
2335 if(cpu_exthigh
>= 0x80000001) {
2336 /* Execute CPUID extended function 8000_0001h */
2337 do_cpuid(0x80000001, regs
);
2338 printf("EAX = %u\n", regs
[0]);
2340 if((regs
[0] & 0x2) == 0) { /* Check SVM bit */
2341 printf("SVM not available\n");
2342 goto fail
; /* SVM not available */
2345 vmcr
= rdmsr(0xc0010114); /* Read VM_CR MSR */
2346 if((vmcr
& 0x8) == 0) { /* Check SVMDIS bit */
2347 printf("vmcr = %" PRIx64
"\n", vmcr
);
2348 printf("SVM allowed\n");
2349 return KERN_SUCCESS
; /* SVM allowed */
2352 /* Execute CPUID extended function 8000_000ah */
2353 do_cpuid(0x8000000a, regs
);
2354 if((regs
[3] & 0x2) == 0) { /* Check SVM_LOCK bit */
2355 /* SVM disabled at bios; not unlockable.
2356 * User must change a BIOS setting to enable SVM.
2358 printf("EDX = %u\n", regs
[3]);
2359 printf("SVM disabled at bios\n");
2363 * SVM may be unlockable;
2364 * consult the BIOS or TPM to obtain the key.
2366 printf("EDX = %u\n", regs
[3]);
2367 printf("SVM maybe unlockable\n");
2372 return KERN_FAILURE
;
2376 fkvm_proc_exit(void *arg
, struct proc
*p
)
2378 struct guestvm
*guest_vm
;
2380 guest_vm
= PROC_GET_GUESTVM(p
);
2381 if (guest_vm
== NULL
)
2384 fkvm_destroy_vm(guest_vm
);
2385 PROC_SET_GUESTVM(p
, NULL
);
2389 fkvm_load(void *unused
)
2394 printf("fkvm_load\n");
2395 printf("sizeof(struct vmcb) = %" PRIx64
"\n", sizeof(struct vmcb
));
2401 /* check if SVM is supported */
2402 error
= fkvm_check_cpu_extension();
2403 if(error
!= KERN_SUCCESS
) {
2404 printf("ERROR: SVM extension not available\n");
2408 exit_tag
= EVENTHANDLER_REGISTER(process_exit
, fkvm_proc_exit
, NULL
,
2409 EVENTHANDLER_PRI_ANY
);
2411 /* allocate structures */
2412 hsave_area
= fkvm_hsave_area_alloc();
2413 iopm
= fkvm_iopm_alloc();
2414 msrpm
= fkvm_msrpm_alloc();
2416 /* Initialize structures */
2417 fkvm_hsave_area_init(hsave_area
);
2418 fkvm_iopm_init(iopm
);
2419 fkvm_msrpm_init(msrpm
);
2421 /* Enable SVM in EFER */
2422 efer
= rdmsr(MSR_EFER
);
2423 printf("EFER = %" PRIx64
"\n", efer
);
2424 wrmsr(MSR_EFER
, efer
| EFER_SVME
);
2425 efer
= rdmsr(MSR_EFER
);
2426 printf("new EFER = %" PRIx64
"\n", efer
);
2428 /* Write Host save address in MSR_VM_HSAVE_PA */
2429 wrmsr(MSR_VM_HSAVE_PA
, vtophys(hsave_area
));
2433 SYSINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_load
, NULL
);
2436 fkvm_unload(void *unused
)
2438 printf("fkvm_unload\n");
2441 printf("fkvm_unload: fkvm not loaded");
2445 EVENTHANDLER_DEREGISTER(process_exit
, exit_tag
);
2447 if (msrpm
!= NULL
) {
2448 fkvm_msrpm_free(iopm
);
2452 fkvm_iopm_free(iopm
);
2455 if (hsave_area
!= NULL
) {
2456 fkvm_hsave_area_free(hsave_area
);
2460 SYSUNINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_unload
, NULL
);