fkvm_vcpu_run(): really messy and largely copied from kvm
[freebsd-src/fkvm-freebsd.git] / sys / kern / kern_fkvm.c
blob6a94fa6a88dec7d32dd531491bc33ab83f9d5e5e
1 /*-
2 * Copyright (c) 2008 The FreeBSD Project
3 * All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
27 #include <sys/cdefs.h>
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/malloc.h>
32 #include <vm/vm.h>
33 #include <vm/pmap.h>
34 #include <machine/_inttypes.h>
35 #include <machine/specialreg.h>
36 #include <machine/segments.h>
37 #include <machine/vmcb.h>
39 #define IOPM_SIZE (8*1024 + 1) /* TODO: ensure that this need not be 12Kbtes, not just 8Kb+1 */
40 #define MSRPM_SIZE (8*1024)
42 /* fkvm data */
43 static void *iopm = NULL; /* Should I allocate a vm_object_t instead? */
44 static void * msrpm = NULL; /* Should I allocate a vm_object_t instead? */
46 /* per-guest data */
48 /* VCPU data */
49 static void *hsave_area = NULL;
50 static struct vmcb *vmcb = NULL;
52 enum {
53 VCPU_REGS_RAX = 0,
54 VCPU_REGS_RCX = 1,
55 VCPU_REGS_RDX = 2,
56 VCPU_REGS_RBX = 3,
57 VCPU_REGS_RSP = 4,
58 VCPU_REGS_RBP = 5,
59 VCPU_REGS_RSI = 6,
60 VCPU_REGS_RDI = 7,
61 VCPU_REGS_R8 = 8,
62 VCPU_REGS_R9 = 9,
63 VCPU_REGS_R10 = 10,
64 VCPU_REGS_R11 = 11,
65 VCPU_REGS_R12 = 12,
66 VCPU_REGS_R13 = 13,
67 VCPU_REGS_R14 = 14,
68 VCPU_REGS_R15 = 15,
69 VCPU_REGS_RIP,
70 NR_VCPU_REGS
73 static struct vcpu {
74 unsigned long vmcb_pa;
75 unsigned long regs[NR_VCPU_REGS];
76 u_int64_t host_gs_base;
77 u_int64_t cr2;
78 u_int64_t cr3;
79 } vcpu;
81 #define SVM_VMLOAD ".byte 0x0f, 0x01, 0xda"
82 #define SVM_VMRUN ".byte 0x0f, 0x01, 0xd8"
83 #define SVM_VMSAVE ".byte 0x0f, 0x01, 0xdb"
84 #define SVM_CLGI ".byte 0x0f, 0x01, 0xdd"
85 #define SVM_STGI ".byte 0x0f, 0x01, 0xdc"
86 #define SVM_INVLPGA ".byte 0x0f, 0x01, 0xdf"
89 static void
90 fkvm_vcpu_run(struct vcpu *vcpu, struct vmcb *vmcb)
92 u_short fs_selector;
93 u_short gs_selector;
94 u_short ldt_selector;
95 unsigned long host_cr2;
96 unsigned long host_dr6;
97 unsigned long host_dr7;
99 vcpu->vmcb_pa = vtophys(vmcb);
101 vmcb->save.rax = vcpu->regs[VCPU_REGS_RAX];
102 vmcb->save.rsp = vcpu->regs[VCPU_REGS_RSP];
103 vmcb->save.rip = vcpu->regs[VCPU_REGS_RIP];
105 /* meh: kvm has pre_svm_run(svm); */
107 vcpu->host_gs_base = rdmsr(MSR_GSBASE);
109 fs_selector = rfs();
110 gs_selector = rgs();
111 ldt_selector = rldt();
113 host_cr2 = rcr2();
115 host_dr6 = rdr6();
116 host_dr7 = rdr7();
118 vmcb->save.cr2 = vcpu->cr2;
119 /* meh: cr3? */
121 /* meh: dr7? db_regs? */
123 __asm __volatile (SVM_CLGI);
125 enable_intr();
127 #define R "r"
128 __asm __volatile (
129 "push %%"R"bp; \n\t"
130 "mov %c[rbx](%[svm]), %%"R"bx \n\t"
131 "mov %c[rcx](%[svm]), %%"R"cx \n\t"
132 "mov %c[rdx](%[svm]), %%"R"dx \n\t"
133 "mov %c[rsi](%[svm]), %%"R"si \n\t"
134 "mov %c[rdi](%[svm]), %%"R"di \n\t"
135 "mov %c[rbp](%[svm]), %%"R"bp \n\t"
136 "mov %c[r8](%[svm]), %%r8 \n\t"
137 "mov %c[r9](%[svm]), %%r9 \n\t"
138 "mov %c[r10](%[svm]), %%r10 \n\t"
139 "mov %c[r11](%[svm]), %%r11 \n\t"
140 "mov %c[r12](%[svm]), %%r12 \n\t"
141 "mov %c[r13](%[svm]), %%r13 \n\t"
142 "mov %c[r14](%[svm]), %%r14 \n\t"
143 "mov %c[r15](%[svm]), %%r15 \n\t"
145 /* Enter guest mode */
146 "push %%"R"ax \n\t"
147 "mov %c[vmcb](%[svm]), %%"R"ax \n\t"
148 SVM_VMLOAD "\n\t"
149 SVM_VMRUN "\n\t"
150 SVM_VMSAVE "\n\t"
151 "pop %%"R"ax \n\t"
153 /* Save guest registers, load host registers */
154 "mov %%"R"bx, %c[rbx](%[svm]) \n\t"
155 "mov %%"R"cx, %c[rcx](%[svm]) \n\t"
156 "mov %%"R"dx, %c[rdx](%[svm]) \n\t"
157 "mov %%"R"si, %c[rsi](%[svm]) \n\t"
158 "mov %%"R"di, %c[rdi](%[svm]) \n\t"
159 "mov %%"R"bp, %c[rbp](%[svm]) \n\t"
160 "mov %%r8, %c[r8](%[svm]) \n\t"
161 "mov %%r9, %c[r9](%[svm]) \n\t"
162 "mov %%r10, %c[r10](%[svm]) \n\t"
163 "mov %%r11, %c[r11](%[svm]) \n\t"
164 "mov %%r12, %c[r12](%[svm]) \n\t"
165 "mov %%r13, %c[r13](%[svm]) \n\t"
166 "mov %%r14, %c[r14](%[svm]) \n\t"
167 "mov %%r15, %c[r15](%[svm]) \n\t"
168 "pop %%"R"bp"
170 : [svm]"a"(vcpu),
171 [vmcb]"i"(offsetof(struct vcpu, vmcb_pa)),
172 [rbx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBX])),
173 [rcx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RCX])),
174 [rdx]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDX])),
175 [rsi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RSI])),
176 [rdi]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RDI])),
177 [rbp]"i"(offsetof(struct vcpu, regs[VCPU_REGS_RBP])),
178 [r8 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R8 ])),
179 [r9 ]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R9 ])),
180 [r10]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R10])),
181 [r11]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R11])),
182 [r12]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R12])),
183 [r13]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R13])),
184 [r14]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R14])),
185 [r15]"i"(offsetof(struct vcpu, regs[VCPU_REGS_R15]))
186 : "cc", "memory",
187 R"bx", R"cx", R"dx", R"si", R"di",
188 "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
192 /* meh: dr7? db_regs? */
194 vcpu->cr2 = vmcb->save.cr2;
196 vcpu->regs[VCPU_REGS_RAX] = vmcb->save.rax;
197 vcpu->regs[VCPU_REGS_RSP] = vmcb->save.rsp;
198 vcpu->regs[VCPU_REGS_RIP] = vmcb->save.rip;
200 load_dr6(host_dr6);
201 load_dr7(host_dr7);
203 load_cr2(host_cr2);
205 load_fs(fs_selector);
206 load_gs(gs_selector);
207 lldt(ldt_selector);
209 wrmsr(MSR_GSBASE, vcpu->host_gs_base);
211 /* TODO
212 tss_desc->type = 9;
213 ltr(GSEL(GPROC0_SEL, SEL_KPL));
216 disable_intr();
218 __asm __volatile (SVM_STGI);
220 /* meh: next_rip */
223 static void
224 _fkvm_init_seg(struct vmcb_seg *seg, uint16_t attrib)
226 seg->selector = 0;
227 seg->attrib = VMCB_SELECTOR_P_MASK | attrib;
228 seg->limit = 0xffff;
229 seg->base = 0;
232 static inline void
233 fkvm_init_seg(struct vmcb_seg *seg)
235 _fkvm_init_seg(seg, VMCB_SELECTOR_S_MASK | VMCB_SELECTOR_WRITE_MASK);
238 static inline void
239 fkvm_init_sys_seg(struct vmcb_seg *seg, uint16_t attrib)
241 _fkvm_init_seg(seg, attrib);
244 static void
245 fkvm_iopm_init(void *iopm)
247 memset(iopm, 0xff, IOPM_SIZE); /* TODO: we may want to allow access to PC debug port */
250 static void
251 fkvm_msrpm_init(void *msrpm)
253 memset(msrpm, 0xff, MSRPM_SIZE); /* TODO: we may want to allow some MSR accesses */
256 static void
257 fkvm_vmcb_init(struct vmcb *vmcb)
259 struct vmcb_control_area *control = &vmcb->control;
260 struct vmcb_save_area *save = &vmcb->save;
262 control->intercept_cr_reads = INTERCEPT_CR4_MASK;
264 control->intercept_cr_writes = INTERCEPT_CR4_MASK |
265 INTERCEPT_CR8_MASK;
267 control->intercept_dr_reads = INTERCEPT_DR0_MASK |
268 INTERCEPT_DR1_MASK |
269 INTERCEPT_DR2_MASK |
270 INTERCEPT_DR3_MASK;
272 control->intercept_dr_writes = INTERCEPT_DR0_MASK |
273 INTERCEPT_DR1_MASK |
274 INTERCEPT_DR2_MASK |
275 INTERCEPT_DR3_MASK |
276 INTERCEPT_DR5_MASK |
277 INTERCEPT_DR7_MASK;
279 control->intercept_exceptions = (1 << IDT_UD) | // Invalid Opcode
280 (1 << IDT_MC); // Machine Check
282 control->intercepts = INTERCEPT_INTR |
283 INTERCEPT_NMI |
284 INTERCEPT_SMI |
285 INTERCEPT_CPUID |
286 INTERCEPT_INVD |
287 INTERCEPT_HLT |
288 INTERCEPT_INVLPGA |
289 INTERCEPT_IOIO_PROT |
290 INTERCEPT_MSR_PROT |
291 INTERCEPT_SHUTDOWN |
292 INTERCEPT_VMRUN |
293 INTERCEPT_VMMCALL |
294 INTERCEPT_VMLOAD |
295 INTERCEPT_VMSAVE |
296 INTERCEPT_STGI |
297 INTERCEPT_CLGI |
298 INTERCEPT_SKINIT |
299 INTERCEPT_WBINVD |
300 INTERCEPT_MONITOR |
301 INTERCEPT_MWAIT_UNCOND;
303 control->iopm_base_pa = vtophys(iopm);
304 control->msrpm_base_pa = vtophys(msrpm);
305 control->tsc_offset = 0;
307 /* TODO: remove this once we assign asid's to distinct VM's */
308 control->guest_asid = 1;
309 control->tlb_control = VMCB_TLB_CONTROL_FLUSH_ALL;
311 control->v_intr_masking = 1;
312 control->nested_ctl = 1;
314 fkvm_init_seg(&save->es);
315 fkvm_init_seg(&save->ss);
316 fkvm_init_seg(&save->ds);
317 fkvm_init_seg(&save->fs);
318 fkvm_init_seg(&save->gs);
320 _fkvm_init_seg(&save->cs, VMCB_SELECTOR_READ_MASK | VMCB_SELECTOR_S_MASK |
321 VMCB_SELECTOR_CODE_MASK);
322 save->cs.selector = 0xf000;
323 save->cs.base = 0xffff0000;
325 save->gdtr.limit = 0xffff;
326 save->idtr.limit = 0xffff;
328 fkvm_init_sys_seg(&save->ldtr, SDT_SYSLDT);
329 fkvm_init_sys_seg(&save->tr, SDT_SYS286BSY);
331 save->g_pat = PAT_VALUE(PAT_WRITE_BACK, 0) | PAT_VALUE(PAT_WRITE_THROUGH, 1) |
332 PAT_VALUE(PAT_UNCACHED, 2) | PAT_VALUE(PAT_UNCACHEABLE, 3) |
333 PAT_VALUE(PAT_WRITE_BACK, 4) | PAT_VALUE(PAT_WRITE_THROUGH, 5) |
334 PAT_VALUE(PAT_UNCACHED, 6) | PAT_VALUE(PAT_UNCACHEABLE, 7);
335 save->dr6 = 0xffff0ff0;
336 save->dr7 = 0x400;
337 //save->rflags = 2; /* It seems like bit 1 is reserved. This line makes no sense. */
338 save->rip = 0x0000fff0;
340 fkvm_vcpu_run(&vcpu, vmcb);
343 static void
344 fkvm_load(void *unused)
346 u_int64_t efer;
348 printf("fkvm_load\n");
349 printf("sizeof(struct vmcb) = %" PRIx64 "\n", sizeof(struct vmcb));
351 /* TODO: check for the presense of extensions */
353 hsave_area = contigmalloc(PAGE_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
354 if(hsave_area == NULL)
355 return;
357 vmcb = (struct vmcb *)contigmalloc(PAGE_SIZE, M_DEVBUF, M_ZERO, 0, -1UL, PAGE_SIZE, 0);
358 if(vmcb == NULL)
359 goto errout0;
361 iopm = contigmalloc(IOPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
362 if(iopm == NULL)
363 goto errout1;
365 msrpm = contigmalloc(MSRPM_SIZE, M_DEVBUF, 0, 0, -1UL, PAGE_SIZE, 0);
366 if(msrpm == NULL)
367 goto errout2;
369 /* Initialize iopm and msrpm */
370 fkvm_iopm_init(iopm);
371 fkvm_msrpm_init(msrpm);
373 /* Initialize VMCB */
374 fkvm_vmcb_init(vmcb);
376 /* Enable SVM in EFER */
377 efer = rdmsr(MSR_EFER);
378 printf("EFER = %" PRIx64 "\n", efer);
379 wrmsr(MSR_EFER, efer | EFER_SVME);
380 efer = rdmsr(MSR_EFER);
381 printf("new EFER = %" PRIx64 "\n", efer);
383 /* Write Host save address in MSR_VM_HSAVE_PA */
384 wrmsr(MSR_VM_HSAVE_PA, vtophys(hsave_area));
386 /* VMRUN */
388 return;
390 errout2:
391 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
392 iopm = NULL;
393 errout1:
394 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
395 vmcb = NULL;
396 errout0:
397 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
398 vmcb = NULL;
400 SYSINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_load, NULL);
402 static void
403 fkvm_unload(void *unused)
405 printf("fkvm_unload\n");
406 /* TODO */
408 if(msrpm != NULL)
409 contigfree(msrpm, MSRPM_SIZE, M_DEVBUF);
411 if(iopm != NULL)
412 contigfree(iopm, IOPM_SIZE, M_DEVBUF);
414 if(vmcb != NULL)
415 contigfree(vmcb, PAGE_SIZE, M_DEVBUF);
417 if(hsave_area != NULL)
418 contigfree(hsave_area, PAGE_SIZE, M_DEVBUF);
420 SYSUNINIT(fkvm, SI_SUB_PSEUDO, SI_ORDER_MIDDLE, fkvm_unload, NULL);