vkernel - Sync to recent API changes
[dragonfly.git] / sys / platform / vkernel64 / platform / pmap_inval.c
blob7e802e928c12602087b36ea8d5a5e799600a39d4
1 /*
2 * Copyright (c) 2003-2016 The DragonFly Project. All rights reserved.
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
34 * $DragonFly: src/sys/platform/vkernel/platform/pmap_inval.c,v 1.4 2007/07/02 02:22:58 dillon Exp $
38 * pmap invalidation support code. Certain hardware requirements must
39 * be dealt with when manipulating page table entries and page directory
40 * entries within a pmap. In particular, we cannot safely manipulate
41 * page tables which are in active use by another cpu (even if it is
42 * running in userland) for two reasons: First, TLB writebacks will
43 * race against our own modifications and tests. Second, even if we
44 * were to use bus-locked instruction we can still screw up the
45 * target cpu's instruction pipeline due to Intel cpu errata.
47 * For our virtual page tables, the real kernel will handle SMP interactions
48 * with pmaps that may be active on other cpus. Even so, we have to be
49 * careful about bit setting races particularly when we are trying to clean
50 * a page and test the modified bit to avoid races where the modified bit
51 * might get set after our poll but before we clear the field.
53 #include <sys/param.h>
54 #include <sys/systm.h>
55 #include <sys/kernel.h>
56 #include <sys/proc.h>
57 #include <sys/vmmeter.h>
58 #include <sys/thread2.h>
59 #include <sys/cdefs.h>
60 #include <sys/mman.h>
61 #include <sys/vmspace.h>
62 #include <sys/vmm.h>
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 #include <vm/vm_object.h>
68 #include <machine/cputypes.h>
69 #include <machine/md_var.h>
70 #include <machine/specialreg.h>
71 #include <machine/smp.h>
72 #include <machine/globaldata.h>
73 #include <machine/pmap.h>
74 #include <machine/pmap_inval.h>
76 #include <unistd.h>
77 #include <pthread.h>
79 #include <vm/vm_page2.h>
81 extern int vmm_enabled;
84 * Invalidate the TLB on the current cpu
86 * (VMM enabled only)
88 static __inline
89 void
90 vmm_cpu_invltlb(void)
92 #if 0
93 /* not directly supported */
94 cpu_invltlb();
95 #else
96 /* vmm_guest_sync_addr(NULL, NULL); */
97 /* For VMM mode forces vmmexit/resume */
98 uint64_t rax = -1;
99 __asm __volatile("syscall;"
101 : "a" (rax)
103 #endif
106 static __inline
107 void
108 vmm_cpu_invlpg(void *addr __unused)
110 vmm_cpu_invltlb();
114 * Invalidate va in the TLB on the current cpu
116 * (VMM disabled only)
118 static __inline
119 void
120 pmap_inval_cpu(struct pmap *pmap, vm_offset_t va, size_t bytes)
122 if (pmap == &kernel_pmap) {
123 madvise((void *)va, bytes, MADV_INVAL);
124 } else {
125 vmspace_mcontrol(pmap, (void *)va, bytes, MADV_INVAL, 0);
130 * This is a bit of a mess because we don't know what virtual cpus are
131 * mapped to real cpus. Basically try to optimize the degenerate cases
132 * (primarily related to user processes with only one thread or only one
133 * running thread), and shunt all the rest to the host cpu. The host cpu
134 * will invalidate all real cpu's the vkernel is running on.
136 * This can't optimize situations where a pmap is only mapped to some of
137 * the virtual cpus, though shunting to the real host will still be faster
138 * if the virtual kernel processes are running on fewer real-host cpus.
139 * (And probably will be faster anyway since there's no round-trip signaling
140 * overhead).
142 * NOTE: The critical section protects against preemption while the pmap
143 * is locked, which could otherwise result in a deadlock.
145 static __inline
146 void
147 guest_sync_addr(struct pmap *pmap, volatile vpte_t *ptep, vpte_t *srcv)
149 globaldata_t gd = mycpu;
150 cpulock_t olock;
151 cpulock_t nlock;
154 * Lock the pmap
156 crit_enter();
157 for (;;) {
158 olock = pmap->pm_active_lock;
159 cpu_ccfence();
160 if ((olock & CPULOCK_EXCL) == 0) {
161 nlock = olock | CPULOCK_EXCL;
162 if (atomic_cmpset_int(&pmap->pm_active_lock,
163 olock, nlock)) {
164 break;
167 cpu_pause();
168 lwkt_process_ipiq();
169 pthread_yield();
173 * Update the pte and synchronize with other cpus. If we can update
174 * it trivially, do so.
176 if (CPUMASK_TESTZERO(pmap->pm_active) ||
177 CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) {
178 if (ptep)
179 *srcv = atomic_swap_long(ptep, *srcv);
180 vmm_cpu_invltlb();
181 } else {
182 vmm_guest_sync_addr(__DEVOLATILE(void *, ptep), srcv);
186 * Unlock the pmap
188 atomic_clear_int(&pmap->pm_active_lock, CPULOCK_EXCL);
189 crit_exit();
193 * Invalidate a pte in a pmap and synchronize with target cpus
194 * as required. Throw away the modified and access bits. Use
195 * pmap_clean_pte() to do the same thing but also get an interlocked
196 * modified/access status.
198 * Clearing the field first (basically clearing VPTE_V) prevents any
199 * new races from occuring while we invalidate the TLB (i.e. the pmap
200 * on the real cpu), then clear it again to clean out any race that
201 * might have occured before the invalidation completed.
203 void
204 pmap_inval_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
206 vpte_t pte;
208 if (vmm_enabled == 0) {
209 atomic_swap_long(ptep, 0);
210 pmap_inval_cpu(pmap, va, PAGE_SIZE);
211 } else {
212 pte = 0;
213 guest_sync_addr(pmap, ptep, &pte);
218 * Same as pmap_inval_pte() but only synchronize with the current
219 * cpu. For the moment its the same as the non-quick version.
221 void
222 pmap_inval_pte_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
224 atomic_swap_long(ptep, 0);
225 if (vmm_enabled == 0)
226 pmap_inval_cpu(pmap, va, PAGE_SIZE);
227 else
228 vmm_cpu_invltlb();
232 * Invalidate the tlb for a range of virtual addresses across all cpus
233 * belonging to the pmap.
235 void
236 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
238 if (vmm_enabled == 0) {
239 pmap_inval_cpu(pmap, sva, eva - sva);
240 } else {
241 guest_sync_addr(pmap, NULL, NULL);
246 * Invalidating page directory entries requires some additional
247 * sophistication. The cachemask must be cleared so the kernel
248 * resynchronizes its temporary page table mappings cache.
250 void
251 pmap_inval_pde(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
253 vpte_t pte;
255 if (vmm_enabled == 0) {
256 atomic_swap_long(ptep, 0);
257 pmap_inval_cpu(pmap, va, SEG_SIZE);
258 } else if (CPUMASK_TESTMASK(pmap->pm_active,
259 mycpu->gd_other_cpus) == 0) {
260 atomic_swap_long(ptep, 0);
261 vmm_cpu_invltlb();
262 } else {
263 pte = 0;
264 guest_sync_addr(pmap, ptep, &pte);
268 void
269 pmap_inval_pde_quick(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va)
271 pmap_inval_pde(ptep, pmap, va);
275 * This is really nasty.
277 * (1) The vkernel interlocks pte operations with the related vm_page_t
278 * spin-lock (and doesn't handle unmanaged page races).
280 * (2) The vkernel must also issu an invalidation to the real cpu. It
281 * (nastily) does this while holding the spin-lock too.
283 * In addition, atomic ops must be used to properly interlock against
284 * other cpus and the real kernel (which could be taking a fault on another
285 * cpu and will adjust VPTE_M and VPTE_A appropriately).
287 * The atomicc ops do a good job of interlocking against other cpus, but
288 * we still need to lock the pte location (which we use the vm_page spin-lock
289 * for) to avoid races against PG_WRITEABLE and other tests.
291 * Cleaning the pte involves clearing VPTE_M and VPTE_RW, synchronizing with
292 * the real host, and updating the vm_page appropriately.
294 * If the caller passes a non-NULL (m), the caller holds the spin-lock,
295 * otherwise we must acquire and release the spin-lock. (m) is only
296 * applicable to managed pages.
298 vpte_t
299 pmap_clean_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va,
300 vm_page_t m)
302 vpte_t pte;
303 int spin = 0;
306 * Acquire (m) and spin-lock it.
308 while (m == NULL) {
309 pte = *ptep;
310 if ((pte & VPTE_V) == 0)
311 return pte;
312 if ((pte & VPTE_MANAGED) == 0)
313 break;
314 m = PHYS_TO_VM_PAGE(pte & VPTE_FRAME);
315 vm_page_spin_lock(m);
317 pte = *ptep;
318 if ((pte & VPTE_V) == 0) {
319 vm_page_spin_unlock(m);
320 m = NULL;
321 continue;
323 if ((pte & VPTE_MANAGED) == 0) {
324 vm_page_spin_unlock(m);
325 m = NULL;
326 continue;
328 if (m != PHYS_TO_VM_PAGE(pte & VPTE_FRAME)) {
329 vm_page_spin_unlock(m);
330 m = NULL;
331 continue;
333 spin = 1;
334 break;
337 if (vmm_enabled == 0) {
338 for (;;) {
339 pte = *ptep;
340 cpu_ccfence();
341 if ((pte & VPTE_RW) == 0)
342 break;
343 if (atomic_cmpset_long(ptep,
344 pte,
345 pte & ~(VPTE_RW | VPTE_M))) {
346 pmap_inval_cpu(pmap, va, PAGE_SIZE);
347 break;
350 } else {
351 pte = *ptep & ~(VPTE_RW | VPTE_M);
352 guest_sync_addr(pmap, ptep, &pte);
355 if (m) {
356 if (pte & VPTE_A) {
357 vm_page_flag_set(m, PG_REFERENCED);
358 atomic_clear_long(ptep, VPTE_A);
360 if (pte & VPTE_M) {
361 if (pmap_track_modified(pmap, va))
362 vm_page_dirty(m);
364 if (spin)
365 vm_page_spin_unlock(m);
367 return pte;
371 * This is a combination of pmap_inval_pte() and pmap_clean_pte().
372 * Firts prevent races with the 'A' and 'M' bits, then clean out
373 * the tlb (the real cpu's pmap), then incorporate any races that
374 * may have occured in the mean time, and finally zero out the pte.
376 vpte_t
377 pmap_inval_loadandclear(volatile vpte_t *ptep, struct pmap *pmap,
378 vm_offset_t va)
380 vpte_t pte;
382 if (vmm_enabled == 0) {
383 pte = atomic_swap_long(ptep, 0);
384 pmap_inval_cpu(pmap, va, PAGE_SIZE);
385 } else {
386 pte = 0;
387 guest_sync_addr(pmap, ptep, &pte);
389 return(pte);
392 void
393 cpu_invlpg(void *addr)
395 if (vmm_enabled)
396 vmm_cpu_invlpg(addr);
397 else
398 madvise(addr, PAGE_SIZE, MADV_INVAL);
401 void
402 cpu_invltlb(void)
404 if (vmm_enabled)
405 vmm_cpu_invltlb(); /* For VMM mode forces vmmexit/resume */
406 else
407 madvise((void *)KvaStart, KvaEnd - KvaStart, MADV_INVAL);
411 * Invalidate the TLB on all cpus. Instead what the vkernel does is
412 * ignore VM_PROT_NOSYNC on pmap_enter() calls.
414 void
415 smp_invltlb(void)
417 /* do nothing */
420 void
421 smp_sniff(void)
423 /* not implemented */
426 void
427 cpu_sniff(int dcpu __unused)
429 /* not implemented */