2 * Copyright (c) 2008 The FreeBSD Project
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
34 #include <vm/vm_extern.h>
35 #include <vm/vm_map.h>
36 #include <vm/vm_object.h>
37 #include <vm/vm_param.h>
38 #include <machine/_inttypes.h>
39 #include <machine/specialreg.h>
40 #include <machine/segments.h>
41 #include <machine/vmcb.h>
43 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
44 #define MSRPM_SIZE (8*1024)
47 static void *iopm
= NULL
; /* Should I allocate a vm_object_t instead? */
48 static void * msrpm
= NULL
; /* Should I allocate a vm_object_t instead? */
53 static void *hsave_area
= NULL
;
54 static struct vmcb
*vmcb
= NULL
;
78 unsigned long vmcb_pa
;
79 unsigned long regs
[NR_VCPU_REGS
];
80 u_int64_t host_gs_base
;
85 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
86 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
87 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
88 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
89 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
90 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
93 print_tss_desc(struct system_segment_descriptor
*tss_desc
)
95 printf("TSS desc @ %p:\n", tss_desc
);
96 printf("sd_lolimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lolimit
);
97 printf("sd_lobase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lobase
);
98 printf("sd_type: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_type
);
99 printf("sd_dpl: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_dpl
);
100 printf("sd_p: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_p
);
101 printf("sd_hilimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hilimit
);
102 printf("sd_xx0: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx0
);
103 printf("sd_gran: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_gran
);
104 printf("sd_hibase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hibase
);
105 printf("sd_xx1: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx1
);
106 printf("sd_mbz: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_mbz
);
107 printf("sd_xx2: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx2
);
112 print_tss(struct system_segment_descriptor
*tss_desc
)
118 base
= (u_int32_t
*) ((((u_int64_t
) tss_desc
->sd_hibase
) << 24) | ((u_int64_t
) tss_desc
->sd_lobase
));
119 limit
= ((tss_desc
->sd_hilimit
<< 16) | tss_desc
->sd_lolimit
) / 4;
121 printf("TSS: @ %p\n", base
);
122 for (i
= 0; i
<= limit
; i
++)
123 printf("%x: 0x%" PRIx32
"\n", i
, base
[i
]);
128 fkvm_vcpu_run(struct vcpu
*vcpu
, struct vmcb
*vmcb
)
132 u_short ldt_selector
;
133 unsigned long host_cr2
;
134 unsigned long host_dr6
;
135 unsigned long host_dr7
;
136 struct system_segment_descriptor
*tss_desc
;
139 printf("begin fkvm_vcpu_run\n");
141 tss_desc
= (struct system_segment_descriptor
*) (&gdt
[GPROC0_SEL
]);
142 sel
= GSEL(GPROC0_SEL
, SEL_KPL
);
144 printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64
"\n", sel
);
145 print_tss_desc(tss_desc
);
148 printf("VMCB save area:\n");
149 printf("fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
150 vmcb
->save
.fs
.selector
,
151 vmcb
->save
.fs
.attrib
,
154 printf("gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
155 vmcb
->save
.gs
.selector
,
156 vmcb
->save
.gs
.attrib
,
159 printf("tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
160 vmcb
->save
.tr
.selector
,
161 vmcb
->save
.tr
.attrib
,
164 printf("ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
165 vmcb
->save
.ldtr
.selector
,
166 vmcb
->save
.ldtr
.attrib
,
167 vmcb
->save
.ldtr
.limit
,
168 vmcb
->save
.ldtr
.base
);
169 printf("kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
170 printf("star: %" PRIx64
"\n", vmcb
->save
.star
);
171 printf("lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
172 printf("cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
173 printf("sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
174 printf("sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
175 printf("sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
176 printf("sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
181 vcpu
->vmcb_pa
= vtophys(vmcb
);
182 printf("vmcb = 0x%p\n", vmcb
);
183 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu
->vmcb_pa
);
185 vmcb
->save
.rax
= vcpu
->regs
[VCPU_REGS_RAX
];
186 vmcb
->save
.rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
187 vmcb
->save
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
189 /* meh: kvm has pre_svm_run(svm); */
191 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
192 printf("host_gs_base: 0x%" PRIx64
"\n", vcpu
->host_gs_base
);
196 ldt_selector
= rldt();
197 printf("fs selector: %hx\n", fs_selector
);
198 printf("gs selector: %hx\n", gs_selector
);
199 printf("ldt selector: %hx\n", ldt_selector
);
206 vmcb
->save
.cr2
= vcpu
->cr2
;
209 /* meh: dr7? db_regs? */
213 __asm
__volatile (SVM_CLGI
);
221 "mov %c[rbx](%[svm]), %%"R
"bx \n\t"
222 "mov %c[rcx](%[svm]), %%"R
"cx \n\t"
223 "mov %c[rdx](%[svm]), %%"R
"dx \n\t"
224 "mov %c[rsi](%[svm]), %%"R
"si \n\t"
225 "mov %c[rdi](%[svm]), %%"R
"di \n\t"
226 "mov %c[rbp](%[svm]), %%"R
"bp \n\t"
227 "mov %c[r8](%[svm]), %%r8 \n\t"
228 "mov %c[r9](%[svm]), %%r9 \n\t"
229 "mov %c[r10](%[svm]), %%r10 \n\t"
230 "mov %c[r11](%[svm]), %%r11 \n\t"
231 "mov %c[r12](%[svm]), %%r12 \n\t"
232 "mov %c[r13](%[svm]), %%r13 \n\t"
233 "mov %c[r14](%[svm]), %%r14 \n\t"
234 "mov %c[r15](%[svm]), %%r15 \n\t"
236 /* Enter guest mode */
238 "mov %c[vmcb](%[svm]), %%"R
"ax \n\t"
244 /* Save guest registers, load host registers */
245 "mov %%"R
"bx, %c[rbx](%[svm]) \n\t"
246 "mov %%"R
"cx, %c[rcx](%[svm]) \n\t"
247 "mov %%"R
"dx, %c[rdx](%[svm]) \n\t"
248 "mov %%"R
"si, %c[rsi](%[svm]) \n\t"
249 "mov %%"R
"di, %c[rdi](%[svm]) \n\t"
250 "mov %%"R
"bp, %c[rbp](%[svm]) \n\t"
251 "mov %%r8, %c[r8](%[svm]) \n\t"
252 "mov %%r9, %c[r9](%[svm]) \n\t"
253 "mov %%r10, %c[r10](%[svm]) \n\t"
254 "mov %%r11, %c[r11](%[svm]) \n\t"
255 "mov %%r12, %c[r12](%[svm]) \n\t"
256 "mov %%r13, %c[r13](%[svm]) \n\t"
257 "mov %%r14, %c[r14](%[svm]) \n\t"
258 "mov %%r15, %c[r15](%[svm]) \n\t"
262 [vmcb
]"i"(offsetof(struct vcpu
, vmcb_pa
)),
263 [rbx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBX
])),
264 [rcx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RCX
])),
265 [rdx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDX
])),
266 [rsi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RSI
])),
267 [rdi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDI
])),
268 [rbp
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBP
])),
269 [r8
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R8
])),
270 [r9
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R9
])),
271 [r10
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R10
])),
272 [r11
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R11
])),
273 [r12
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R12
])),
274 [r13
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R13
])),
275 [r14
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R14
])),
276 [r15
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R15
]))
278 R
"bx", R
"cx", R
"dx", R
"si", R
"di",
279 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
283 /* meh: dr7? db_regs? */
285 vcpu
->cr2
= vmcb
->save
.cr2
;
287 vcpu
->regs
[VCPU_REGS_RAX
] = vmcb
->save
.rax
;
288 vcpu
->regs
[VCPU_REGS_RSP
] = vmcb
->save
.rsp
;
289 vcpu
->regs
[VCPU_REGS_RIP
] = vmcb
->save
.rip
;
296 load_fs(fs_selector
);
297 load_gs(gs_selector
);
300 wrmsr(MSR_GSBASE
, vcpu
->host_gs_base
);
302 tss_desc
->sd_type
= SDT_SYSTSS
;
307 __asm
__volatile (SVM_STGI
);
311 printf("exit_code: %" PRIx64
"\n", vmcb
->control
.exit_code
);
315 ldt_selector
= rldt();
316 printf("fs selector: %hx\n", fs_selector
);
317 printf("gs selector: %hx\n", gs_selector
);
318 printf("ldt selector: %hx\n", ldt_selector
);
320 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
321 printf("host_gs_base: 0x%" PRIx64
"\n", vcpu
->host_gs_base
);
323 print_tss_desc(tss_desc
);
326 printf("VMCB save area:\n");
327 printf("fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
328 vmcb
->save
.fs
.selector
,
329 vmcb
->save
.fs
.attrib
,
332 printf("gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
333 vmcb
->save
.gs
.selector
,
334 vmcb
->save
.gs
.attrib
,
337 printf("tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
338 vmcb
->save
.tr
.selector
,
339 vmcb
->save
.tr
.attrib
,
342 printf("ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
343 vmcb
->save
.ldtr
.selector
,
344 vmcb
->save
.ldtr
.attrib
,
345 vmcb
->save
.ldtr
.limit
,
346 vmcb
->save
.ldtr
.base
);
347 printf("kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
348 printf("star: %" PRIx64
"\n", vmcb
->save
.star
);
349 printf("lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
350 printf("cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
351 printf("sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
352 printf("sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
353 printf("sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
354 printf("sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
363 _fkvm_init_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
366 seg
->attrib
= VMCB_SELECTOR_P_MASK
| attrib
;
372 fkvm_init_seg(struct vmcb_seg
*seg
)
374 _fkvm_init_seg(seg
, VMCB_SELECTOR_S_MASK
| VMCB_SELECTOR_WRITE_MASK
);
378 fkvm_init_sys_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
380 _fkvm_init_seg(seg
, attrib
);
384 fkvm_iopm_init(void *iopm
)
386 memset(iopm
, 0xff, IOPM_SIZE
); /* TODO: we may want to allow access to PC debug port */
390 fkvm_msrpm_init(void *msrpm
)
392 memset(msrpm
, 0xff, MSRPM_SIZE
); /* TODO: we may want to allow some MSR accesses */
396 fkvm_make_vm_map(void)
399 struct vmspace
*sp
= NULL
;
400 vm_object_t obj
= NULL
;
402 sp
= vmspace_alloc(0, 0xffffffffffffffff);
404 printf("vmspace_alloc failed\n");
408 obj
= vm_object_allocate(OBJT_DEFAULT
, 0xffffffffffffffff >> PAGE_SHIFT
);
410 vm_object_reference(obj
);
411 rc
= vm_map_insert(&sp
->vm_map
,
413 0, 0, 0xffffffffffffffff >> PAGE_SHIFT
,
414 VM_PROT_ALL
, VM_PROT_ALL
,
416 if (rc
!= KERN_SUCCESS
) {
417 printf("vm_map_insert failed: %d\n", rc
);
418 vm_object_deallocate(obj
);
422 return vtophys(vmspace_pmap(sp
)->pm_pml4
);
426 vm_object_deallocate(obj
);
439 fkvm_vmcb_init(struct vmcb
*vmcb
)
441 struct vmcb_control_area
*control
= &vmcb
->control
;
442 struct vmcb_save_area
*save
= &vmcb
->save
;
444 control
->intercept_cr_reads
= INTERCEPT_CR4_MASK
;
446 control
->intercept_cr_writes
= INTERCEPT_CR4_MASK
|
449 control
->intercept_dr_reads
= INTERCEPT_DR0_MASK
|
454 control
->intercept_dr_writes
= INTERCEPT_DR0_MASK
|
461 control
->intercept_exceptions
= (1 << IDT_UD
) | // Invalid Opcode
462 (1 << IDT_MC
); // Machine Check
464 control
->intercepts
= INTERCEPT_INTR
|
471 INTERCEPT_IOIO_PROT
|
483 INTERCEPT_MWAIT_UNCOND
;
485 control
->iopm_base_pa
= vtophys(iopm
);
486 control
->msrpm_base_pa
= vtophys(msrpm
);
487 control
->tsc_offset
= 0;
489 /* TODO: remove this once we assign asid's to distinct VM's */
490 control
->guest_asid
= 1;
491 control
->tlb_control
= VMCB_TLB_CONTROL_FLUSH_ALL
;
493 control
->v_intr_masking
= 1;
494 control
->nested_ctl
= 1;
496 fkvm_init_seg(&save
->es
);
497 fkvm_init_seg(&save
->ss
);
498 fkvm_init_seg(&save
->ds
);
499 fkvm_init_seg(&save
->fs
);
500 fkvm_init_seg(&save
->gs
);
502 _fkvm_init_seg(&save
->cs
, VMCB_SELECTOR_READ_MASK
| VMCB_SELECTOR_S_MASK
|
503 VMCB_SELECTOR_CODE_MASK
);
504 save
->cs
.selector
= 0xf000;
505 save
->cs
.base
= 0xffff0000;
507 save
->gdtr
.limit
= 0xffff;
508 save
->idtr
.limit
= 0xffff;
510 fkvm_init_sys_seg(&save
->ldtr
, SDT_SYSLDT
);
511 fkvm_init_sys_seg(&save
->tr
, SDT_SYS286BSY
);
513 save
->g_pat
= PAT_VALUE(PAT_WRITE_BACK
, 0) | PAT_VALUE(PAT_WRITE_THROUGH
, 1) |
514 PAT_VALUE(PAT_UNCACHED
, 2) | PAT_VALUE(PAT_UNCACHEABLE
, 3) |
515 PAT_VALUE(PAT_WRITE_BACK
, 4) | PAT_VALUE(PAT_WRITE_THROUGH
, 5) |
516 PAT_VALUE(PAT_UNCACHED
, 6) | PAT_VALUE(PAT_UNCACHEABLE
, 7);
517 save
->dr6
= 0xffff0ff0;
519 //save->rflags = 2; /* It seems like bit 1 is reserved. This line makes no sense. */
520 save
->rip
= 0x0000fff0;
522 control
->nested_cr3
= fkvm_make_vm_map();
523 printf("ncr3: %" PRIx64
"\n", control
->nested_cr3
);
527 struct vmspace
*sp
= NULL
;
528 vm_object_t obj
= NULL
;
530 int fkvm_userpoke(void *data
);
533 fkvm_userpoke(void *data
)
535 printf("fkvm_userpoke\n");
538 fkvm_vcpu_run(&vcpu
, vmcb
);
544 fkvm_load(void *unused
)
548 printf("fkvm_load\n");
549 printf("sizeof(struct vmcb) = %" PRIx64
"\n", sizeof(struct vmcb
));
551 /* TODO: check for the presense of extensions */
553 hsave_area
= contigmalloc(PAGE_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
554 if(hsave_area
== NULL
)
557 vmcb
= (struct vmcb
*)contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL, PAGE_SIZE
, 0);
561 iopm
= contigmalloc(IOPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
565 msrpm
= contigmalloc(MSRPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
569 /* Initialize iopm and msrpm */
570 fkvm_iopm_init(iopm
);
571 fkvm_msrpm_init(msrpm
);
573 /* Initialize VMCB */
574 fkvm_vmcb_init(vmcb
);
576 /* Enable SVM in EFER */
577 efer
= rdmsr(MSR_EFER
);
578 printf("EFER = %" PRIx64
"\n", efer
);
579 wrmsr(MSR_EFER
, efer
| EFER_SVME
);
580 efer
= rdmsr(MSR_EFER
);
581 printf("new EFER = %" PRIx64
"\n", efer
);
583 /* Write Host save address in MSR_VM_HSAVE_PA */
584 wrmsr(MSR_VM_HSAVE_PA
, vtophys(hsave_area
));
589 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
592 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
595 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
598 SYSINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_load
, NULL
);
601 fkvm_unload(void *unused
)
603 printf("fkvm_unload\n");
607 contigfree(msrpm
, MSRPM_SIZE
, M_DEVBUF
);
610 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
613 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
615 if(hsave_area
!= NULL
)
616 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
618 SYSUNINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_unload
, NULL
);