kernel - Fix a system lockup with vmm
[dragonfly.git] / sys / platform / pc64 / vmm / ept.c
blob214b67bfd76621696ccf7c51046b59c9bb5cb432
1 /*
2 * Copyright (c) 2003-2013 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Mihai Carabas <mihai.carabas@gmail.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
35 #include <sys/systm.h>
36 #include <sys/sfbuf.h>
37 #include <sys/proc.h>
38 #include <sys/thread.h>
40 #include <machine/pmap.h>
41 #include <machine/specialreg.h>
42 #include <machine/cpufunc.h>
43 #include <machine/vmm.h>
45 #include <vm/vm_extern.h>
46 #include <vm/vm_map.h>
48 #include "vmx.h"
49 #include "ept.h"
50 #include "vmm_utils.h"
51 #include "vmm.h"
53 static uint64_t pmap_bits_ept[PG_BITS_SIZE];
54 static pt_entry_t pmap_cache_bits_ept[PAT_INDEX_SIZE];
55 static int ept_protection_codes[PROTECTION_CODES_SIZE];
56 static pt_entry_t pmap_cache_mask_ept;
58 static int pmap_pm_flags_ept = PMAP_HVM;
59 static int eptp_bits;
61 extern uint64_t vmx_ept_vpid_cap;
63 int
64 vmx_ept_init(void)
66 int prot;
67 /* Chapter 28 VMX SUPPORT FOR ADDRESS TRANSLATION
68 * Intel Manual 3c, page 107
70 vmx_ept_vpid_cap = rdmsr(IA32_VMX_EPT_VPID_CAP);
72 if(!EPT_PWL4(vmx_ept_vpid_cap)||
73 !EPT_MEMORY_TYPE_WB(vmx_ept_vpid_cap)) {
74 return EINVAL;
77 eptp_bits |= EPTP_CACHE(PAT_WRITE_BACK) |
78 EPTP_PWLEN(EPT_PWLEVELS - 1);
80 if (EPT_AD_BITS_SUPPORTED(vmx_ept_vpid_cap)) {
81 eptp_bits |= EPTP_AD_ENABLE;
82 } else {
83 pmap_pm_flags_ept |= PMAP_EMULATE_AD_BITS;
86 /* Initialize EPT bits
87 * - for PG_V - set READ and EXECUTE to preserve compatibility
88 * - for PG_U and PG_G - set 0 to preserve compatiblity
89 * - for PG_N - set the Uncacheable bit
91 pmap_bits_ept[TYPE_IDX] = EPT_PMAP;
92 pmap_bits_ept[PG_V_IDX] = EPT_PG_READ | EPT_PG_EXECUTE;
93 pmap_bits_ept[PG_RW_IDX] = EPT_PG_WRITE;
94 pmap_bits_ept[PG_PS_IDX] = EPT_PG_PS;
95 pmap_bits_ept[PG_G_IDX] = 0;
96 pmap_bits_ept[PG_U_IDX] = 0;
97 pmap_bits_ept[PG_A_IDX] = EPT_PG_A;
98 pmap_bits_ept[PG_M_IDX] = EPT_PG_M;
99 pmap_bits_ept[PG_W_IDX] = EPT_PG_AVAIL1;
100 pmap_bits_ept[PG_MANAGED_IDX] = EPT_PG_AVAIL2;
101 pmap_bits_ept[PG_DEVICE_IDX] = EPT_PG_AVAIL3;
102 pmap_bits_ept[PG_N_IDX] = EPT_IGNORE_PAT | EPT_MEM_TYPE_UC;
105 pmap_cache_mask_ept = EPT_IGNORE_PAT | EPT_MEM_TYPE_MASK;
107 pmap_cache_bits_ept[PAT_UNCACHEABLE] = EPT_IGNORE_PAT | EPT_MEM_TYPE_UC;
108 pmap_cache_bits_ept[PAT_WRITE_COMBINING] = EPT_IGNORE_PAT | EPT_MEM_TYPE_WC;
109 pmap_cache_bits_ept[PAT_WRITE_THROUGH] = EPT_IGNORE_PAT | EPT_MEM_TYPE_WT;
110 pmap_cache_bits_ept[PAT_WRITE_PROTECTED] = EPT_IGNORE_PAT | EPT_MEM_TYPE_WP;
111 pmap_cache_bits_ept[PAT_WRITE_BACK] = EPT_IGNORE_PAT | EPT_MEM_TYPE_WB;
112 pmap_cache_bits_ept[PAT_UNCACHED] = EPT_IGNORE_PAT | EPT_MEM_TYPE_UC;
114 for (prot = 0; prot < PROTECTION_CODES_SIZE; prot++) {
115 switch (prot) {
116 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
117 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
118 case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
119 case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
120 ept_protection_codes[prot] = 0;
121 break;
122 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
123 case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
124 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
125 case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
126 ept_protection_codes[prot] = pmap_bits_ept[PG_RW_IDX];
128 break;
132 return 0;
135 /* Build the VMCS_EPTP pointer
136 * - the ept_address
137 * - the EPTP bits indicating optional features
139 uint64_t vmx_eptp(uint64_t ept_address)
141 return (ept_address | eptp_bits);
144 /* Copyin from guest VMM */
145 static int
146 ept_copyin(const void *udaddr, void *kaddr, size_t len)
148 struct lwbuf *lwb;
149 struct lwbuf lwb_cache;
150 vm_page_t m;
151 register_t gpa;
152 size_t n;
153 int err = 0;
154 struct vmspace *vm = curproc->p_vmspace;
155 struct vmx_thread_info *vti = curthread->td_vmm;
156 register_t guest_cr3 = vti->guest_cr3;
158 while (len) {
159 /* Get the GPA by manually walking the-GUEST page table*/
160 err = guest_phys_addr(vm, &gpa, guest_cr3, (vm_offset_t)udaddr);
161 if (err) {
162 kprintf("%s: could not get guest_phys_addr\n", __func__);
163 break;
166 m = vm_fault_page(&vm->vm_map, trunc_page(gpa),
167 VM_PROT_READ, VM_FAULT_NORMAL, &err);
168 if (err) {
169 if (vmm_debug) {
170 kprintf("%s: could not fault in vm map, gpa: %llx\n",
171 __func__, (unsigned long long) gpa);
173 break;
176 n = PAGE_SIZE - ((vm_offset_t)udaddr & PAGE_MASK);
177 if (n > len)
178 n = len;
180 lwb = lwbuf_alloc(m, &lwb_cache);
181 bcopy((char *)lwbuf_kva(lwb)+((vm_offset_t)udaddr & PAGE_MASK), kaddr, n);
182 len -= n;
183 udaddr = (const char *)udaddr + n;
184 kaddr = (char *)kaddr + n;
185 lwbuf_free(lwb);
186 vm_page_unhold(m);
188 if (err)
189 err = EFAULT;
190 return (err);
193 /* Copyout from guest VMM */
194 static int
195 ept_copyout(const void *kaddr, void *udaddr, size_t len)
197 struct lwbuf *lwb;
198 struct lwbuf lwb_cache;
199 vm_page_t m;
200 register_t gpa;
201 size_t n;
202 int err = 0;
203 struct vmspace *vm = curproc->p_vmspace;
204 struct vmx_thread_info *vti = curthread->td_vmm;
205 register_t guest_cr3 = vti->guest_cr3;
207 while (len) {
208 /* Get the GPA by manually walking the-GUEST page table*/
209 err = guest_phys_addr(vm, &gpa, guest_cr3, (vm_offset_t)udaddr);
210 if (err) {
211 kprintf("%s: could not get guest_phys_addr\n", __func__);
212 break;
215 m = vm_fault_page(&vm->vm_map, trunc_page(gpa),
216 VM_PROT_READ | VM_PROT_WRITE,
217 VM_FAULT_NORMAL, &err);
218 if (err) {
219 if (vmm_debug) {
220 kprintf("%s: could not fault in vm map, gpa: %llx\n",
221 __func__, (unsigned long long) gpa);
223 break;
226 n = PAGE_SIZE - ((vm_offset_t)udaddr & PAGE_MASK);
227 if (n > len)
228 n = len;
230 lwb = lwbuf_alloc(m, &lwb_cache);
231 bcopy(kaddr, (char *)lwbuf_kva(lwb) +
232 ((vm_offset_t)udaddr & PAGE_MASK), n);
234 len -= n;
235 udaddr = (char *)udaddr + n;
236 kaddr = (const char *)kaddr + n;
237 vm_page_dirty(m);
238 #if 0
239 /* should not be needed */
240 cpu_invlpg((char *)lwbuf_kva(lwb) +
241 ((vm_offset_t)udaddr & PAGE_MASK));
242 #endif
243 lwbuf_free(lwb);
244 vm_page_unhold(m);
246 if (err)
247 err = EFAULT;
248 return (err);
251 static int
252 ept_copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *res)
254 int error;
255 size_t n;
256 const char *uptr = udaddr;
257 char *kptr = kaddr;
259 if (res)
260 *res = 0;
261 while (len) {
262 n = PAGE_SIZE - ((vm_offset_t)uptr & PAGE_MASK);
263 if (n > 32)
264 n = 32;
265 if (n > len)
266 n = len;
267 if ((error = ept_copyin(uptr, kptr, n)) != 0)
268 return(error);
269 while (n) {
270 if (res)
271 ++*res;
272 if (*kptr == 0)
273 return(0);
274 ++kptr;
275 ++uptr;
276 --n;
277 --len;
281 return(ENAMETOOLONG);
285 static int
286 ept_fubyte(const void *base)
288 unsigned char c = 0;
290 if (ept_copyin(base, &c, 1) == 0)
291 return((int)c);
292 return(-1);
295 static int
296 ept_subyte(void *base, int byte)
298 unsigned char c = byte;
300 if (ept_copyout(&c, base, 1) == 0)
301 return(0);
302 return(-1);
305 static long
306 ept_fuword(const void *base)
308 long v;
310 if (ept_copyin(base, &v, sizeof(v)) == 0)
311 return(v);
312 return(-1);
315 static int
316 ept_suword(void *base, long word)
318 if (ept_copyout(&word, base, sizeof(word)) == 0)
319 return(0);
320 return(-1);
323 static int
324 ept_suword32(void *base, int word)
326 if (ept_copyout(&word, base, sizeof(word)) == 0)
327 return(0);
328 return(-1);
331 void
332 vmx_ept_pmap_pinit(pmap_t pmap)
334 pmap->pm_flags |= pmap_pm_flags_ept;
336 bcopy(pmap_bits_ept, pmap->pmap_bits, sizeof(pmap_bits_ept));
337 bcopy(ept_protection_codes, pmap->protection_codes,
338 sizeof(ept_protection_codes));
339 bcopy(pmap_cache_bits_ept, pmap->pmap_cache_bits,
340 sizeof(pmap_cache_bits_ept));
341 pmap->pmap_cache_mask = pmap_cache_mask_ept;
342 pmap->copyinstr = ept_copyinstr;
343 pmap->copyin = ept_copyin;
344 pmap->copyout = ept_copyout;
345 pmap->fubyte = ept_fubyte;
346 pmap->subyte = ept_subyte;
347 pmap->fuword = ept_fuword;
348 pmap->suword = ept_suword;
349 pmap->suword32 = ept_suword32;