2 * Copyright (c) 2008 The FreeBSD Project
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
34 #include <vm/vm_extern.h>
35 #include <vm/vm_map.h>
36 #include <vm/vm_object.h>
37 #include <vm/vm_param.h>
38 #include <machine/_inttypes.h>
39 #include <machine/specialreg.h>
40 #include <machine/segments.h>
41 #include <machine/vmcb.h>
43 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
44 #define MSRPM_SIZE (8*1024)
47 static void *iopm
= NULL
; /* Should I allocate a vm_object_t instead? */
48 static void * msrpm
= NULL
; /* Should I allocate a vm_object_t instead? */
53 static void *hsave_area
= NULL
;
54 static struct vmcb
*vmcb
= NULL
;
78 unsigned long vmcb_pa
;
79 unsigned long regs
[NR_VCPU_REGS
];
80 u_int64_t host_gs_base
;
85 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
86 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
87 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
88 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
89 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
90 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
93 print_tss_desc(struct system_segment_descriptor
*tss_desc
)
95 printf("TSS desc @ %p:\n", tss_desc
);
96 printf("sd_lolimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lolimit
);
97 printf("sd_lobase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_lobase
);
98 printf("sd_type: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_type
);
99 printf("sd_dpl: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_dpl
);
100 printf("sd_p: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_p
);
101 printf("sd_hilimit: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hilimit
);
102 printf("sd_xx0: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx0
);
103 printf("sd_gran: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_gran
);
104 printf("sd_hibase: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_hibase
);
105 printf("sd_xx1: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx1
);
106 printf("sd_mbz: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_mbz
);
107 printf("sd_xx2: 0x%" PRIx64
"\n", (u_int64_t
) tss_desc
->sd_xx2
);
112 print_tss(struct system_segment_descriptor
*tss_desc
)
118 base
= (u_int32_t
*) ((((u_int64_t
) tss_desc
->sd_hibase
) << 24) | ((u_int64_t
) tss_desc
->sd_lobase
));
119 limit
= ((tss_desc
->sd_hilimit
<< 16) | tss_desc
->sd_lolimit
) / 4;
121 printf("TSS: @ %p\n", base
);
122 for (i
= 0; i
<= limit
; i
++)
123 printf("%x: 0x%" PRIx32
"\n", i
, base
[i
]);
128 fkvm_vcpu_run(struct vcpu
*vcpu
, struct vmcb
*vmcb
)
132 u_short ldt_selector
;
133 unsigned long host_cr2
;
134 unsigned long host_dr6
;
135 unsigned long host_dr7
;
136 struct system_segment_descriptor
*tss_desc
;
139 printf("begin fkvm_vcpu_run\n");
141 tss_desc
= (struct system_segment_descriptor
*) (&gdt
[GPROC0_SEL
]);
142 sel
= GSEL(GPROC0_SEL
, SEL_KPL
);
144 printf("GSEL(GPROC0_SEL, SEL_KPL)=0x%" PRIx64
"\n", sel
);
145 print_tss_desc(tss_desc
);
148 printf("fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
149 vmcb
->save
.fs
.selector
,
150 vmcb
->save
.fs
.attrib
,
153 printf("gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
154 vmcb
->save
.gs
.selector
,
155 vmcb
->save
.gs
.attrib
,
158 printf("tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
159 vmcb
->save
.tr
.selector
,
160 vmcb
->save
.tr
.attrib
,
163 printf("ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
164 vmcb
->save
.ldtr
.selector
,
165 vmcb
->save
.ldtr
.attrib
,
166 vmcb
->save
.ldtr
.limit
,
167 vmcb
->save
.ldtr
.base
);
168 printf("kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
169 printf("star: %" PRIx64
"\n", vmcb
->save
.star
);
170 printf("lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
171 printf("cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
172 printf("sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
173 printf("sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
174 printf("sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
175 printf("sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
179 vcpu
->vmcb_pa
= vtophys(vmcb
);
180 printf("vmcb = 0x%p\n", vmcb
);
181 printf("vcpu->vmcb_pa = 0x%lx\n", vcpu
->vmcb_pa
);
183 vmcb
->save
.rax
= vcpu
->regs
[VCPU_REGS_RAX
];
184 vmcb
->save
.rsp
= vcpu
->regs
[VCPU_REGS_RSP
];
185 vmcb
->save
.rip
= vcpu
->regs
[VCPU_REGS_RIP
];
187 /* meh: kvm has pre_svm_run(svm); */
189 vcpu
->host_gs_base
= rdmsr(MSR_GSBASE
);
193 ldt_selector
= rldt();
200 vmcb
->save
.cr2
= vcpu
->cr2
;
203 /* meh: dr7? db_regs? */
207 __asm
__volatile (SVM_CLGI
);
215 "mov %c[rbx](%[svm]), %%"R
"bx \n\t"
216 "mov %c[rcx](%[svm]), %%"R
"cx \n\t"
217 "mov %c[rdx](%[svm]), %%"R
"dx \n\t"
218 "mov %c[rsi](%[svm]), %%"R
"si \n\t"
219 "mov %c[rdi](%[svm]), %%"R
"di \n\t"
220 "mov %c[rbp](%[svm]), %%"R
"bp \n\t"
221 "mov %c[r8](%[svm]), %%r8 \n\t"
222 "mov %c[r9](%[svm]), %%r9 \n\t"
223 "mov %c[r10](%[svm]), %%r10 \n\t"
224 "mov %c[r11](%[svm]), %%r11 \n\t"
225 "mov %c[r12](%[svm]), %%r12 \n\t"
226 "mov %c[r13](%[svm]), %%r13 \n\t"
227 "mov %c[r14](%[svm]), %%r14 \n\t"
228 "mov %c[r15](%[svm]), %%r15 \n\t"
230 /* Enter guest mode */
232 "mov %c[vmcb](%[svm]), %%"R
"ax \n\t"
238 /* Save guest registers, load host registers */
239 "mov %%"R
"bx, %c[rbx](%[svm]) \n\t"
240 "mov %%"R
"cx, %c[rcx](%[svm]) \n\t"
241 "mov %%"R
"dx, %c[rdx](%[svm]) \n\t"
242 "mov %%"R
"si, %c[rsi](%[svm]) \n\t"
243 "mov %%"R
"di, %c[rdi](%[svm]) \n\t"
244 "mov %%"R
"bp, %c[rbp](%[svm]) \n\t"
245 "mov %%r8, %c[r8](%[svm]) \n\t"
246 "mov %%r9, %c[r9](%[svm]) \n\t"
247 "mov %%r10, %c[r10](%[svm]) \n\t"
248 "mov %%r11, %c[r11](%[svm]) \n\t"
249 "mov %%r12, %c[r12](%[svm]) \n\t"
250 "mov %%r13, %c[r13](%[svm]) \n\t"
251 "mov %%r14, %c[r14](%[svm]) \n\t"
252 "mov %%r15, %c[r15](%[svm]) \n\t"
256 [vmcb
]"i"(offsetof(struct vcpu
, vmcb_pa
)),
257 [rbx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBX
])),
258 [rcx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RCX
])),
259 [rdx
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDX
])),
260 [rsi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RSI
])),
261 [rdi
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RDI
])),
262 [rbp
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_RBP
])),
263 [r8
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R8
])),
264 [r9
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R9
])),
265 [r10
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R10
])),
266 [r11
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R11
])),
267 [r12
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R12
])),
268 [r13
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R13
])),
269 [r14
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R14
])),
270 [r15
]"i"(offsetof(struct vcpu
, regs
[VCPU_REGS_R15
]))
272 R
"bx", R
"cx", R
"dx", R
"si", R
"di",
273 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
277 /* meh: dr7? db_regs? */
279 vcpu
->cr2
= vmcb
->save
.cr2
;
281 vcpu
->regs
[VCPU_REGS_RAX
] = vmcb
->save
.rax
;
282 vcpu
->regs
[VCPU_REGS_RSP
] = vmcb
->save
.rsp
;
283 vcpu
->regs
[VCPU_REGS_RIP
] = vmcb
->save
.rip
;
290 load_fs(fs_selector
);
291 load_gs(gs_selector
);
294 wrmsr(MSR_GSBASE
, vcpu
->host_gs_base
);
296 tss_desc
->sd_type
= SDT_SYSTSS
;
301 __asm
__volatile (SVM_STGI
);
305 printf("exit_code: %" PRIx64
"\n", vmcb
->control
.exit_code
);
307 print_tss_desc(tss_desc
);
310 printf("fs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
311 vmcb
->save
.fs
.selector
,
312 vmcb
->save
.fs
.attrib
,
315 printf("gs: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
316 vmcb
->save
.gs
.selector
,
317 vmcb
->save
.gs
.attrib
,
320 printf("tr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
321 vmcb
->save
.tr
.selector
,
322 vmcb
->save
.tr
.attrib
,
325 printf("ldtr: [selector %" PRIx16
", attrib %" PRIx16
", limit %" PRIx32
", base %" PRIx64
"]\n",
326 vmcb
->save
.ldtr
.selector
,
327 vmcb
->save
.ldtr
.attrib
,
328 vmcb
->save
.ldtr
.limit
,
329 vmcb
->save
.ldtr
.base
);
330 printf("kernel_gs_base: %" PRIx64
"\n", vmcb
->save
.kernel_gs_base
);
331 printf("star: %" PRIx64
"\n", vmcb
->save
.star
);
332 printf("lstar: %" PRIx64
"\n", vmcb
->save
.lstar
);
333 printf("cstar: %" PRIx64
"\n", vmcb
->save
.cstar
);
334 printf("sfmask: %" PRIx64
"\n", vmcb
->save
.sfmask
);
335 printf("sysenter_cs: %" PRIx64
"\n", vmcb
->save
.sysenter_cs
);
336 printf("sysenter_esp: %" PRIx64
"\n", vmcb
->save
.sysenter_esp
);
337 printf("sysenter_eip: %" PRIx64
"\n", vmcb
->save
.sysenter_eip
);
345 _fkvm_init_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
348 seg
->attrib
= VMCB_SELECTOR_P_MASK
| attrib
;
354 fkvm_init_seg(struct vmcb_seg
*seg
)
356 _fkvm_init_seg(seg
, VMCB_SELECTOR_S_MASK
| VMCB_SELECTOR_WRITE_MASK
);
360 fkvm_init_sys_seg(struct vmcb_seg
*seg
, uint16_t attrib
)
362 _fkvm_init_seg(seg
, attrib
);
366 fkvm_iopm_init(void *iopm
)
368 memset(iopm
, 0xff, IOPM_SIZE
); /* TODO: we may want to allow access to PC debug port */
372 fkvm_msrpm_init(void *msrpm
)
374 memset(msrpm
, 0xff, MSRPM_SIZE
); /* TODO: we may want to allow some MSR accesses */
378 fkvm_make_vm_map(void)
381 struct vmspace
*sp
= NULL
;
382 vm_object_t obj
= NULL
;
384 sp
= vmspace_alloc(0, 0xffffffffffffffff);
386 printf("vmspace_alloc failed\n");
390 obj
= vm_object_allocate(OBJT_DEFAULT
, 0xffffffffffffffff >> PAGE_SHIFT
);
392 vm_object_reference(obj
);
393 rc
= vm_map_insert(&sp
->vm_map
,
395 0, 0, 0xffffffffffffffff >> PAGE_SHIFT
,
396 VM_PROT_ALL
, VM_PROT_ALL
,
398 if (rc
!= KERN_SUCCESS
) {
399 printf("vm_map_insert failed: %d\n", rc
);
400 vm_object_deallocate(obj
);
404 return vtophys(vmspace_pmap(sp
)->pm_pml4
);
408 vm_object_deallocate(obj
);
421 fkvm_vmcb_init(struct vmcb
*vmcb
)
423 struct vmcb_control_area
*control
= &vmcb
->control
;
424 struct vmcb_save_area
*save
= &vmcb
->save
;
426 control
->intercept_cr_reads
= INTERCEPT_CR4_MASK
;
428 control
->intercept_cr_writes
= INTERCEPT_CR4_MASK
|
431 control
->intercept_dr_reads
= INTERCEPT_DR0_MASK
|
436 control
->intercept_dr_writes
= INTERCEPT_DR0_MASK
|
443 control
->intercept_exceptions
= (1 << IDT_UD
) | // Invalid Opcode
444 (1 << IDT_MC
); // Machine Check
446 control
->intercepts
= INTERCEPT_INTR
|
453 INTERCEPT_IOIO_PROT
|
465 INTERCEPT_MWAIT_UNCOND
;
467 control
->iopm_base_pa
= vtophys(iopm
);
468 control
->msrpm_base_pa
= vtophys(msrpm
);
469 control
->tsc_offset
= 0;
471 /* TODO: remove this once we assign asid's to distinct VM's */
472 control
->guest_asid
= 1;
473 control
->tlb_control
= VMCB_TLB_CONTROL_FLUSH_ALL
;
475 control
->v_intr_masking
= 1;
476 control
->nested_ctl
= 1;
478 fkvm_init_seg(&save
->es
);
479 fkvm_init_seg(&save
->ss
);
480 fkvm_init_seg(&save
->ds
);
481 fkvm_init_seg(&save
->fs
);
482 fkvm_init_seg(&save
->gs
);
484 _fkvm_init_seg(&save
->cs
, VMCB_SELECTOR_READ_MASK
| VMCB_SELECTOR_S_MASK
|
485 VMCB_SELECTOR_CODE_MASK
);
486 save
->cs
.selector
= 0xf000;
487 save
->cs
.base
= 0xffff0000;
489 save
->gdtr
.limit
= 0xffff;
490 save
->idtr
.limit
= 0xffff;
492 fkvm_init_sys_seg(&save
->ldtr
, SDT_SYSLDT
);
493 fkvm_init_sys_seg(&save
->tr
, SDT_SYS286BSY
);
495 save
->g_pat
= PAT_VALUE(PAT_WRITE_BACK
, 0) | PAT_VALUE(PAT_WRITE_THROUGH
, 1) |
496 PAT_VALUE(PAT_UNCACHED
, 2) | PAT_VALUE(PAT_UNCACHEABLE
, 3) |
497 PAT_VALUE(PAT_WRITE_BACK
, 4) | PAT_VALUE(PAT_WRITE_THROUGH
, 5) |
498 PAT_VALUE(PAT_UNCACHED
, 6) | PAT_VALUE(PAT_UNCACHEABLE
, 7);
499 save
->dr6
= 0xffff0ff0;
501 //save->rflags = 2; /* It seems like bit 1 is reserved. This line makes no sense. */
502 save
->rip
= 0x0000fff0;
504 control
->nested_cr3
= fkvm_make_vm_map();
505 printf("ncr3: %" PRIx64
"\n", control
->nested_cr3
);
509 struct vmspace
*sp
= NULL
;
510 vm_object_t obj
= NULL
;
512 int fkvm_userpoke(void *data
);
515 fkvm_userpoke(void *data
)
517 printf("fkvm_userpoke\n");
520 fkvm_vcpu_run(&vcpu
, vmcb
);
526 fkvm_load(void *unused
)
530 printf("fkvm_load\n");
531 printf("sizeof(struct vmcb) = %" PRIx64
"\n", sizeof(struct vmcb
));
533 /* TODO: check for the presense of extensions */
535 hsave_area
= contigmalloc(PAGE_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
536 if(hsave_area
== NULL
)
539 vmcb
= (struct vmcb
*)contigmalloc(PAGE_SIZE
, M_DEVBUF
, M_ZERO
, 0, -1UL, PAGE_SIZE
, 0);
543 iopm
= contigmalloc(IOPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
547 msrpm
= contigmalloc(MSRPM_SIZE
, M_DEVBUF
, 0, 0, -1UL, PAGE_SIZE
, 0);
551 /* Initialize iopm and msrpm */
552 fkvm_iopm_init(iopm
);
553 fkvm_msrpm_init(msrpm
);
555 /* Initialize VMCB */
556 fkvm_vmcb_init(vmcb
);
558 /* Enable SVM in EFER */
559 efer
= rdmsr(MSR_EFER
);
560 printf("EFER = %" PRIx64
"\n", efer
);
561 wrmsr(MSR_EFER
, efer
| EFER_SVME
);
562 efer
= rdmsr(MSR_EFER
);
563 printf("new EFER = %" PRIx64
"\n", efer
);
565 /* Write Host save address in MSR_VM_HSAVE_PA */
566 wrmsr(MSR_VM_HSAVE_PA
, vtophys(hsave_area
));
571 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
574 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
577 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
580 SYSINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_load
, NULL
);
583 fkvm_unload(void *unused
)
585 printf("fkvm_unload\n");
589 contigfree(msrpm
, MSRPM_SIZE
, M_DEVBUF
);
592 contigfree(iopm
, IOPM_SIZE
, M_DEVBUF
);
595 contigfree(vmcb
, PAGE_SIZE
, M_DEVBUF
);
597 if(hsave_area
!= NULL
)
598 contigfree(hsave_area
, PAGE_SIZE
, M_DEVBUF
);
600 SYSUNINIT(fkvm
, SI_SUB_PSEUDO
, SI_ORDER_MIDDLE
, fkvm_unload
, NULL
);