From ccd67bf614ec9cd72ba3afc5d73e88ba8be3bc70 Mon Sep 17 00:00:00 2001 From: Matthew Dillon Date: Sat, 16 Jul 2016 19:16:02 -0700 Subject: [PATCH] kernel - Refactor Xinvltlb (3) * Rollup invalidation operations for numerous kernel-related pmap, reducing the number of IPIs needed (particularly for buffer cache operations). * Implement semi-synchronous command execution, where target cpus do not need to wait for the originating cpu to execute a command. This is used for the above rollups when the related kernel memory is known to be accessed concurrently with the pmap operations. * Support invalidation of VA ranges. * Support reduction of target cpu set for semi-synchronous commands, including invltlb's, by removing idle cpus from the set when possible. --- sys/cpu/x86_64/include/cpufunc.h | 26 +-- sys/kern/sys_vmm.c | 6 +- sys/kern/vfs_bio.c | 6 +- sys/platform/pc64/include/pmap.h | 2 - sys/platform/pc64/include/pmap_inval.h | 18 +- sys/platform/pc64/x86_64/mp_machdep.c | 112 +++++-------- sys/platform/pc64/x86_64/pmap.c | 177 ++++++++++++-------- sys/platform/pc64/x86_64/pmap_inval.c | 240 ++++++++++++++++++++++----- sys/platform/vkernel64/platform/pmap.c | 26 ++- sys/platform/vkernel64/platform/pmap_inval.c | 17 +- sys/vm/pmap.h | 4 + sys/vm/vm_contig.c | 7 +- 12 files changed, 427 insertions(+), 214 deletions(-) diff --git a/sys/cpu/x86_64/include/cpufunc.h b/sys/cpu/x86_64/include/cpufunc.h index 5252b1e14d..3dedf9c63c 100644 --- a/sys/cpu/x86_64/include/cpufunc.h +++ b/sys/cpu/x86_64/include/cpufunc.h @@ -416,10 +416,6 @@ invd(void) #if defined(_KERNEL) -void smp_invltlb(void); -void smp_invlpg(cpumask_t *cmdmask); -void smp_inval_intr(void); - #ifndef _CPU_INVLPG_DEFINED /* @@ -436,26 +432,6 @@ cpu_invlpg(void *addr) #endif -#if defined(_KERNEL) -struct smp_invlpg_range_cpusync_arg { - vm_offset_t sva; - vm_offset_t eva; -}; - -void -smp_invlpg_range_cpusync(void *arg); - -static __inline void -smp_invlpg_range(cpumask_t mask, vm_offset_t sva, vm_offset_t eva) -{ - struct smp_invlpg_range_cpusync_arg arg; - - arg.sva = sva; - arg.eva = eva; - lwkt_cpusync_simple(mask, smp_invlpg_range_cpusync, &arg); -} -#endif - static __inline void cpu_nop(void) { @@ -709,6 +685,8 @@ cpu_invltlb(void) #endif +extern void smp_invltlb(void); + static __inline u_short rfs(void) { diff --git a/sys/kern/sys_vmm.c b/sys/kern/sys_vmm.c index c80c6d60da..1c2f9ff6e6 100644 --- a/sys/kern/sys_vmm.c +++ b/sys/kern/sys_vmm.c @@ -180,8 +180,10 @@ sys_vmm_guest_sync_addr(struct vmm_guest_sync_addr_args *uap) /* * Make the requested modification, wakeup any waiters. */ - copyin(uap->srcaddr, &val, sizeof(long)); - copyout(&val, uap->dstaddr, sizeof(long)); + if (uap->srcaddr) { + copyin(uap->srcaddr, &val, sizeof(long)); + copyout(&val, uap->dstaddr, sizeof(long)); + } atomic_clear_int(&p->p_vmm_cpulock, CPULOCK_EXCL); wakeup(&p->p_vmm_cpulock); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index e0281d7fdb..1a54f4dada 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -4495,7 +4495,7 @@ vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to) vm_page_wire(p); p->valid = VM_PAGE_BITS_ALL; vm_page_flag_clear(p, PG_ZERO); - pmap_kenter(pg, VM_PAGE_TO_PHYS(p)); + pmap_kenter_noinval(pg, VM_PAGE_TO_PHYS(p)); bp->b_xio.xio_pages[index] = p; vm_page_wakeup(p); @@ -4503,6 +4503,7 @@ vm_hold_load_pages(struct buf *bp, vm_offset_t from, vm_offset_t to) ++index; } } + pmap_invalidate_range(&kernel_pmap, from, to); bp->b_xio.xio_npages = index; } @@ -4624,12 +4625,13 @@ vm_hold_free_pages(struct buf *bp, vm_offset_t from, vm_offset_t to) (long long)bp->b_loffset); } bp->b_xio.xio_pages[index] = NULL; - pmap_kremove(pg); + pmap_kremove_noinval(pg); vm_page_busy_wait(p, FALSE, "vmhldpg"); vm_page_unwire(p, 0); vm_page_free(p); } } + pmap_invalidate_range(&kernel_pmap, from, to); bp->b_xio.xio_npages = newnpages; } diff --git a/sys/platform/pc64/include/pmap.h b/sys/platform/pc64/include/pmap.h index e2bdb771c8..e3486eb10c 100644 --- a/sys/platform/pc64/include/pmap.h +++ b/sys/platform/pc64/include/pmap.h @@ -367,8 +367,6 @@ void pmap_unmapdev (vm_offset_t, vm_size_t); struct vm_page *pmap_use_pt (pmap_t, vm_offset_t); void pmap_set_opt (void); void pmap_init_pat(void); -vm_paddr_t pmap_kextract(vm_offset_t); -void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); void pmap_invalidate_cache_pages(vm_page_t *pages, int count); void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva); diff --git a/sys/platform/pc64/include/pmap_inval.h b/sys/platform/pc64/include/pmap_inval.h index 9598a9df54..539929cf99 100644 --- a/sys/platform/pc64/include/pmap_inval.h +++ b/sys/platform/pc64/include/pmap_inval.h @@ -47,12 +47,28 @@ #include #endif -pt_entry_t pmap_inval_smp(pmap_t pmap, vm_offset_t va, +typedef struct pmap_inval_bulk { + pmap_t pmap; + vm_offset_t va_beg; + vm_offset_t va_end; + long count; +} pmap_inval_bulk_t; + +pt_entry_t pmap_inval_smp(pmap_t pmap, vm_offset_t va, int npgs, pt_entry_t *ptep, pt_entry_t npte); int pmap_inval_smp_cmpset(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, pt_entry_t opte, pt_entry_t npte); int pmap_inval_intr(cpumask_t *cpumask); +void pmap_inval_bulk_init(pmap_inval_bulk_t *bulk, struct pmap *pmap); +pt_entry_t pmap_inval_bulk(pmap_inval_bulk_t *bulk, vm_offset_t va, + pt_entry_t *ptep, pt_entry_t npte); +void pmap_inval_bulk_flush(pmap_inval_bulk_t *bulk); + +void smp_smurf_idleinvlclr(cpumask_t *mask); +void smp_invlpg(cpumask_t *cmdmask); +void smp_inval_intr(void); + #endif #endif diff --git a/sys/platform/pc64/x86_64/mp_machdep.c b/sys/platform/pc64/x86_64/mp_machdep.c index 82f6403cc8..de881023be 100644 --- a/sys/platform/pc64/x86_64/mp_machdep.c +++ b/sys/platform/pc64/x86_64/mp_machdep.c @@ -178,8 +178,6 @@ cpumask_t smp_active_mask = CPUMASK_INITIALIZER_ONLYONE; cpumask_t smp_finalize_mask = CPUMASK_INITIALIZER_ONLYONE; SYSCTL_INT(_machdep, OID_AUTO, smp_active, CTLFLAG_RD, &smp_active_mask, 0, ""); -static int invl_mfence = 0; -SYSCTL_INT(_machdep, OID_AUTO, invl_mfence, CTLFLAG_RW, &invl_mfence, 0, ""); static u_int bootMP_size; static u_int report_invlpg_src; SYSCTL_INT(_machdep, OID_AUTO, report_invlpg_src, CTLFLAG_RW, @@ -822,78 +820,66 @@ extern cpumask_t smp_idleinvl_mask; extern cpumask_t smp_idleinvl_reqs; /* - * Atomically OR bits in *mask to smp_smurf_mask. Return the prior - * contents of smp_smurf_mask (the caller can NAND against omask to - * obtain just the bits that changed from 0->1). - * - * Atomic ops which write the same value to the target memory as already - * exists in the target memory may cause relaxed synchronization between - * cpus. - * - * omask = smp_smurf_mask - * smp_smurf_mask |= mask + * Atomically OR bits in *mask to smp_smurf_mask. Adjust *mask to remove + * bits that do not need to be IPId. These bits are still part of the command, + * but the target cpus have already been signalled and do not need to be + * sigalled again. */ #include #include static __noinline void -smp_smurf_fetchset(cpumask_t *mask, cpumask_t *omask, int frompg) +smp_smurf_fetchset(cpumask_t *mask, int frompg) { - if (invl_mfence >= 0) { - int i; - __uint64_t obits; - __uint64_t nbits; - - if (invl_mfence) - cpu_mfence(); - i = 0; - while (i < CPUMASK_ELEMENTS) { - obits = smp_smurf_mask.ary[i]; - cpu_ccfence(); - nbits = obits | mask->ary[i]; - if (atomic_cmpset_long(&smp_smurf_mask.ary[i], obits, nbits)) { - omask->ary[i] = obits; - ++i; - } + cpumask_t omask; + int i; + __uint64_t obits; + __uint64_t nbits; + + i = 0; + while (i < CPUMASK_ELEMENTS) { + obits = smp_smurf_mask.ary[i]; + cpu_ccfence(); + nbits = obits | mask->ary[i]; + if (atomic_cmpset_long(&smp_smurf_mask.ary[i], obits, nbits)) { + omask.ary[i] = obits; + ++i; } - } else { - ATOMIC_CPUMASK_ORMASK(smp_smurf_mask, *mask); - CPUMASK_ASSZERO(*omask); } - cpu_mfence(); + CPUMASK_NANDMASK(*mask, omask); } /* - * Atomically set bits in smp_idleinvl_reqs based on bits set in mask. - * Return a cpumask (omask) representing which cpus are currently idle - * and will automatically cpu_invltlb() when they wake up. + * This is a mechanism which guarantees that cpu_invltlb() will be executed + * on idle cpus without having to signal or wake them up. The invltlb will be + * executed when they wake up, prior to any scheduling or interrupt thread. * - * Bits may be set in smp_idleinvl_reqs that are not idle. The caller - * must (mask NAND omask) to reduce the callers IPI list to those cpus - * it requested which are also idle. - * - * NOTE! If the cpu idle code does not support this function, it will - * leave its bits in smp_idleinvl_mask cleared and this function - * will effectively be a NOP. + * (*mask) is modified to remove the cpus we successfully negotiate this + * function with. This function may only be used with semi-synchronous + * commands (typically invltlb's or semi-synchronous invalidations which + * are usually associated only with kernel memory). */ -static void -smp_smurf_idleinvlclr(cpumask_t *mask, cpumask_t *omask) +smp_smurf_idleinvlclr(cpumask_t *mask) { ATOMIC_CPUMASK_ORMASK(smp_idleinvl_reqs, *mask); - ATOMIC_CPUMASK_COPY(*omask, smp_idleinvl_mask); + /* cpu_lfence() not needed */ + CPUMASK_NANDMASK(*mask, smp_idleinvl_mask); } /* * Issue cpu_invltlb() across all cpus except the current cpu. + * + * This function will arrange to avoid idle cpus, but still gurantee that + * invltlb is run on them when they wake up prior to any scheduling or + * nominal interrupt. */ void smp_invltlb(void) { struct mdglobaldata *md = mdcpu; cpumask_t mask; - cpumask_t omask; unsigned long rflags; #ifdef LOOPMASK int loops; @@ -917,8 +903,7 @@ smp_invltlb(void) */ mask = smp_active_mask; CPUMASK_NANDBIT(mask, md->mi.gd_cpuid); - smp_smurf_idleinvlclr(&mask, &omask); - CPUMASK_NANDMASK(mask, omask); + smp_smurf_idleinvlclr(&mask); rflags = read_rflags(); cpu_disable_intr(); @@ -936,8 +921,7 @@ smp_invltlb(void) * NOTE: We are not signalling ourselves, mask already does NOT * include our own cpu. */ - smp_smurf_fetchset(&mask, &omask, 0); - CPUMASK_NANDMASK(mask, omask); /* mask = only 0->1 trans */ + smp_smurf_fetchset(&mask, 0); /* * Issue the IPI. Note that the XINVLTLB IPI runs regardless of @@ -986,13 +970,15 @@ smp_invltlb(void) * Should only be called from pmap_inval.c, issues the XINVLTLB IPI which * causes callbacks to be made to pmap_inval_intr() on multiple cpus, as * specified by the cpumask. Used for interlocked page invalidations. + * + * NOTE: Caller has already called smp_smurf_idleinvlclr(&mask) if the + * command it setup was semi-synchronous-safe. */ void smp_invlpg(cpumask_t *cmdmask) { struct mdglobaldata *md = mdcpu; cpumask_t mask; - cpumask_t omask; unsigned long rflags; if (report_invlpg_src > 0) { @@ -1019,8 +1005,7 @@ smp_invlpg(cpumask_t *cmdmask) */ rflags = read_rflags(); cpu_disable_intr(); - smp_smurf_fetchset(&mask, &omask, 1); - CPUMASK_NANDMASK(mask, omask); /* mask = only 0->1 trans */ + smp_smurf_fetchset(&mask, 1); /* * Issue the IPI. Note that the XINVLTLB IPI runs regardless of @@ -1078,7 +1063,8 @@ smp_inval_intr(void) * * We are going to enable interrupts so make sure we are in a * critical section. This is necessary to avoid deadlocking - * the lapic. + * the lapic and to ensure that we execute our commands prior to + * any nominal interrupt or preemption. */ cpumask = smp_invmask; crit_enter_gd(&md->mi); @@ -1092,8 +1078,6 @@ loop: /* * Specific page request(s), and we can't return until all bits * are zero. - * - * Must reenable interrupts to prevent the apic from locking up. */ for (;;) { /* @@ -1135,7 +1119,7 @@ loop: * stop here to avoid deadlocking on the hardware * IPI (another IPI will occur). */ - smp_smurf_fetchset(&md->mi.gd_cpumask, &omask); + smp_smurf_fetchset(&md->mi.gd_cpumask XXX if (CPUMASK_TESTBIT(omask, md->mi.gd_cpuid)) { break; } @@ -1190,18 +1174,6 @@ cpu_wbinvd_on_all_cpus_callback(void *arg) wbinvd(); } -void -smp_invlpg_range_cpusync(void *arg) -{ - vm_offset_t eva, sva, addr; - sva = ((struct smp_invlpg_range_cpusync_arg *)arg)->sva; - eva = ((struct smp_invlpg_range_cpusync_arg *)arg)->eva; - - for (addr = sva; addr < eva; addr += PAGE_SIZE) { - cpu_invlpg((void *)addr); - } -} - /* * When called the executing CPU will send an IPI to all other CPUs * requesting that they halt execution. diff --git a/sys/platform/pc64/x86_64/pmap.c b/sys/platform/pc64/x86_64/pmap.c index a33b818c94..7eb2099392 100644 --- a/sys/platform/pc64/x86_64/pmap.c +++ b/sys/platform/pc64/x86_64/pmap.c @@ -278,9 +278,11 @@ static pv_entry_t pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, pv_entry_t *pvpp); static pv_entry_t pmap_allocpte_seg(pmap_t pmap, vm_pindex_t ptepindex, pv_entry_t *pvpp, vm_map_entry_t entry, vm_offset_t va); -static void pmap_remove_pv_pte(pv_entry_t pv, pv_entry_t pvp, int smp); +static void pmap_remove_pv_pte(pv_entry_t pv, pv_entry_t pvp, + pmap_inval_bulk_t *bulk); static vm_page_t pmap_remove_pv_page(pv_entry_t pv); -static int pmap_release_pv(pv_entry_t pv, pv_entry_t pvp, int issmp); +static int pmap_release_pv(pv_entry_t pv, pv_entry_t pvp, + pmap_inval_bulk_t *bulk); struct pmap_scan_info; static void pmap_remove_callback(pmap_t pmap, struct pmap_scan_info *info, @@ -1340,7 +1342,7 @@ pmap_kenter(vm_offset_t va, vm_paddr_t pa) // pgeflag; ptep = vtopte(va); #if 1 - pmap_inval_smp(&kernel_pmap, va, ptep, npte); + pmap_inval_smp(&kernel_pmap, va, 1, ptep, npte); #else /* FUTURE */ if (*ptep) @@ -1380,6 +1382,35 @@ pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) } /* + * Enter addresses into the kernel pmap but don't bother + * doing any tlb invalidations. Caller will do a rollup + * invalidation via pmap_rollup_inval(). + */ +int +pmap_kenter_noinval(vm_offset_t va, vm_paddr_t pa) +{ + pt_entry_t *ptep; + pt_entry_t npte; + int res; + + npte = pa | + kernel_pmap.pmap_bits[PG_RW_IDX] | + kernel_pmap.pmap_bits[PG_V_IDX]; +// pgeflag; + ptep = vtopte(va); +#if 1 + res = 1; +#else + /* FUTURE */ + res = (*ptep != 0); +#endif + *ptep = npte; + cpu_invlpg((void *)va); + + return res; +} + +/* * remove a page from the kernel pagetables */ void @@ -1388,7 +1419,7 @@ pmap_kremove(vm_offset_t va) pt_entry_t *ptep; ptep = vtopte(va); - pmap_inval_smp(&kernel_pmap, va, ptep, 0); + pmap_inval_smp(&kernel_pmap, va, 1, ptep, 0); } void @@ -1402,6 +1433,20 @@ pmap_kremove_quick(vm_offset_t va) } /* + * Remove addresses from the kernel pmap but don't bother + * doing any tlb invalidations. Caller will do a rollup + * invalidation via pmap_rollup_inval(). + */ +void +pmap_kremove_noinval(vm_offset_t va) +{ + pt_entry_t *ptep; + + ptep = vtopte(va); + (void)pte_load_clear(ptep); +} + +/* * XXX these need to be recoded. They are not used in any critical path. */ void @@ -1497,10 +1542,15 @@ pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva) cpu_wbinvd_on_all_cpus(); } } + +/* + * Invalidate the specified range of virtual memory on all cpus associated + * with the pmap. + */ void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - smp_invlpg_range(pmap->pm_active, sva, eva); + pmap_inval_smp(pmap, sva, (eva - sva) >> PAGE_SHIFT, NULL, 0); } /* @@ -1512,30 +1562,25 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) * over. The page *must* be wired. */ void -pmap_qenter(vm_offset_t va, vm_page_t *m, int count) +pmap_qenter(vm_offset_t beg_va, vm_page_t *m, int count) { vm_offset_t end_va; - int do_smpinvltlb = 0; + vm_offset_t va; - end_va = va + count * PAGE_SIZE; - - while (va < end_va) { + end_va = beg_va + count * PAGE_SIZE; + + for (va = beg_va; va < end_va; va += PAGE_SIZE) { pt_entry_t *pte; pte = vtopte(va); - if (*pte) - do_smpinvltlb = 1; *pte = VM_PAGE_TO_PHYS(*m) | kernel_pmap.pmap_bits[PG_RW_IDX] | kernel_pmap.pmap_bits[PG_V_IDX] | kernel_pmap.pmap_cache_bits[(*m)->pat_mode]; // pgeflag; - cpu_invlpg((void *)va); - va += PAGE_SIZE; m++; } - if (do_smpinvltlb) - smp_invltlb(); + pmap_invalidate_range(&kernel_pmap, beg_va, end_va); } /* @@ -1963,7 +2008,7 @@ pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex, pv_entry_t *pvpp) panic("pmap_allocpte: unexpected pte %p/%d", pvp, (int)ptepindex); } - pte = pmap_inval_smp(pmap, (vm_offset_t)-1, ptep, 0); + pte = pmap_inval_smp(pmap, (vm_offset_t)-1, 1, ptep, 0); if (vm_page_unwire_quick( PHYS_TO_VM_PAGE(pte & PG_FRAME))) { panic("pmap_allocpte: shared pgtable " @@ -2169,6 +2214,8 @@ retry: * it is not optimized. */ if (proc_pt_pv) { + pmap_inval_bulk_t bulk; + if (proc_pt_pv->pv_m->wire_count != 1) { pv_put(proc_pd_pv); pv_put(proc_pt_pv); @@ -2183,7 +2230,9 @@ retry: /* * The release call will indirectly clean out *pt */ - pmap_release_pv(proc_pt_pv, proc_pd_pv, 1); + pmap_inval_bulk_init(&bulk, proc_pt_pv->pv_pmap); + pmap_release_pv(proc_pt_pv, proc_pd_pv, &bulk); + pmap_inval_bulk_flush(&bulk); proc_pt_pv = NULL; /* relookup */ pt = pv_pte_lookup(proc_pd_pv, pmap_pt_index(b)); @@ -2198,7 +2247,7 @@ retry: vm_page_wire_quick(proc_pd_pv->pv_m); atomic_add_long(&pmap->pm_stats.resident_count, 1); } else if (*pt != npte) { - opte = pmap_inval_smp(pmap, (vm_offset_t)-1, pt, npte); + opte = pmap_inval_smp(pmap, (vm_offset_t)-1, 1, pt, npte); #if 0 opte = pte_load_clear(pt); @@ -2315,7 +2364,7 @@ pmap_release_callback(pv_entry_t pv, void *data) info->retry = 1; return(-1); } - r = pmap_release_pv(pv, NULL, 0); + r = pmap_release_pv(pv, NULL, NULL); spin_lock(&pmap->pm_spin); return(r); } @@ -2329,7 +2378,7 @@ pmap_release_callback(pv_entry_t pv, void *data) * pass NULL for pvp. */ static int -pmap_release_pv(pv_entry_t pv, pv_entry_t pvp, int smp) +pmap_release_pv(pv_entry_t pv, pv_entry_t pvp, pmap_inval_bulk_t *bulk) { vm_page_t p; @@ -2341,7 +2390,7 @@ pmap_release_pv(pv_entry_t pv, pv_entry_t pvp, int smp) * This will clean out the pte at any level of the page table. * If smp != 0 all cpus are affected. */ - pmap_remove_pv_pte(pv, pvp, smp); + pmap_remove_pv_pte(pv, pvp, bulk); /* * Terminal pvs are unhooked from their vm_pages. Because @@ -2414,7 +2463,7 @@ skip: */ static void -pmap_remove_pv_pte(pv_entry_t pv, pv_entry_t pvp, int smp) +pmap_remove_pv_pte(pv_entry_t pv, pv_entry_t pvp, pmap_inval_bulk_t *bulk) { vm_pindex_t ptepindex = pv->pv_pindex; pmap_t pmap = pv->pv_pmap; @@ -2448,10 +2497,7 @@ pmap_remove_pv_pte(pv_entry_t pv, pv_entry_t pvp, int smp) pdp = &pmap->pm_pml4[pdp_index & ((1ul << NPML4EPGSHIFT) - 1)]; KKASSERT((*pdp & pmap->pmap_bits[PG_V_IDX]) != 0); p = PHYS_TO_VM_PAGE(*pdp & PG_FRAME); - if (smp) - pmap_inval_smp(pmap, (vm_offset_t)-1, pdp, 0); - else - *pdp = 0; + pmap_inval_bulk(bulk, (vm_offset_t)-1, pdp, 0); } else if (ptepindex >= pmap_pd_pindex(0)) { /* * Remove a PD page from the pdp @@ -2478,10 +2524,7 @@ pmap_remove_pv_pte(pv_entry_t pv, pv_entry_t pvp, int smp) ((1ul << NPDPEPGSHIFT) - 1)); KKASSERT((*pd & pmap->pmap_bits[PG_V_IDX]) != 0); p = PHYS_TO_VM_PAGE(*pd & PG_FRAME); - if (smp) - pmap_inval_smp(pmap, (vm_offset_t)-1, pd, 0); - else - *pd = 0; + pmap_inval_bulk(bulk, (vm_offset_t)-1, pd, 0); } else { KKASSERT(pmap->pm_flags & PMAP_FLAG_SIMPLE); p = pv->pv_m; /* degenerate test later */ @@ -2506,10 +2549,7 @@ pmap_remove_pv_pte(pv_entry_t pv, pv_entry_t pvp, int smp) pt = pv_pte_lookup(pvp, pt_index & ((1ul << NPDPEPGSHIFT) - 1)); KKASSERT((*pt & pmap->pmap_bits[PG_V_IDX]) != 0); p = PHYS_TO_VM_PAGE(*pt & PG_FRAME); - if (smp) - pmap_inval_smp(pmap, (vm_offset_t)-1, pt, 0); - else - *pt = 0; + pmap_inval_bulk(bulk, (vm_offset_t)-1, pt, 0); } else { /* * Remove a PTE from the PT page @@ -2543,12 +2583,9 @@ pmap_remove_pv_pte(pv_entry_t pv, pv_entry_t pvp, int smp) ptep = pv_pte_lookup(pvp, ptepindex & ((1ul << NPDPEPGSHIFT) - 1)); } - if (smp) { - pte = pmap_inval_smp(pmap, va, ptep, 0); - } else { - pte = pte_load_clear(ptep); - cpu_invlpg((void *)va); - } + pte = pmap_inval_bulk(bulk, va, ptep, 0); + if (bulk == NULL) /* XXX */ + cpu_invlpg((void *)va); /* XXX */ /* * Now update the vm_page_t @@ -3221,7 +3258,8 @@ struct pmap_scan_info { pv_entry_t, pv_entry_t, int, vm_offset_t, pt_entry_t *, void *); void *arg; - int dosmp; + pmap_inval_bulk_t bulk_core; + pmap_inval_bulk_t *bulk; int count; }; @@ -3229,7 +3267,7 @@ static int pmap_scan_cmp(pv_entry_t pv, void *data); static int pmap_scan_callback(pv_entry_t pv, void *data); static void -pmap_scan(struct pmap_scan_info *info) +pmap_scan(struct pmap_scan_info *info, int smp_inval) { struct pmap *pmap = info->pmap; pv_entry_t pd_pv; /* A page directory PV */ @@ -3242,6 +3280,12 @@ pmap_scan(struct pmap_scan_info *info) if (pmap == NULL) return; + if (smp_inval) { + info->bulk = &info->bulk_core; + pmap_inval_bulk_init(&info->bulk_core, pmap); + } else { + info->bulk = NULL; + } /* * Hold the token for stability; if the pmap is empty we have nothing @@ -3369,6 +3413,7 @@ again: if (pt_pv) pv_put(pt_pv); fast_skip: + pmap_inval_bulk_flush(info->bulk); lwkt_reltoken(&pmap->pm_token); return; } @@ -3405,6 +3450,7 @@ fast_skip: pmap_scan_cmp, pmap_scan_callback, info); spin_unlock(&pmap->pm_spin); } + pmap_inval_bulk_flush(info->bulk); lwkt_reltoken(&pmap->pm_token); } @@ -3739,8 +3785,7 @@ pmap_remove(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) info.eva = eva; info.func = pmap_remove_callback; info.arg = NULL; - info.dosmp = 1; /* normal remove requires pmap inval */ - pmap_scan(&info); + pmap_scan(&info, 1); } static void @@ -3753,8 +3798,7 @@ pmap_remove_noinval(struct pmap *pmap, vm_offset_t sva, vm_offset_t eva) info.eva = eva; info.func = pmap_remove_callback; info.arg = NULL; - info.dosmp = 0; /* do not synchronize w/other cpus */ - pmap_scan(&info); + pmap_scan(&info, 0); } static void @@ -3769,7 +3813,7 @@ pmap_remove_callback(pmap_t pmap, struct pmap_scan_info *info, * This will also drop pt_pv's wire_count. Note that * terminal pages are not wired based on mmu presence. */ - pmap_remove_pv_pte(pte_pv, pt_pv, info->dosmp); + pmap_remove_pv_pte(pte_pv, pt_pv, info->bulk); pmap_remove_pv_page(pte_pv); pv_free(pte_pv); } else if (sharept == 0) { @@ -3784,10 +3828,7 @@ pmap_remove_callback(pmap_t pmap, struct pmap_scan_info *info, * It is unclear how we can invalidate a segment so we * invalidate -1 which invlidates the tlb. */ - if (info->dosmp) - pte = pmap_inval_smp(pmap, (vm_offset_t)-1, ptep, 0); - else - pte = pte_load_clear(ptep); + pte = pmap_inval_bulk(info->bulk, (vm_offset_t)-1, ptep, 0); if (pte & pmap->pmap_bits[PG_W_IDX]) atomic_add_long(&pmap->pm_stats.wired_count, -1); atomic_add_long(&pmap->pm_stats.resident_count, -1); @@ -3807,10 +3848,7 @@ pmap_remove_callback(pmap_t pmap, struct pmap_scan_info *info, * It is unclear how we can invalidate a segment so we * invalidate -1 which invlidates the tlb. */ - if (info->dosmp) - pte = pmap_inval_smp(pmap, (vm_offset_t)-1, ptep, 0); - else - pte = pte_load_clear(ptep); + pte = pmap_inval_bulk(info->bulk, (vm_offset_t)-1, ptep, 0); atomic_add_long(&pmap->pm_stats.resident_count, -1); KKASSERT((pte & pmap->pmap_bits[PG_DEVICE_IDX]) == 0); if (vm_page_unwire_quick(PHYS_TO_VM_PAGE(pte & PG_FRAME))) @@ -3831,6 +3869,7 @@ void pmap_remove_all(vm_page_t m) { pv_entry_t pv; + pmap_inval_bulk_t bulk; if (!pmap_initialized /* || (m->flags & PG_FICTITIOUS)*/) return; @@ -3853,7 +3892,9 @@ pmap_remove_all(vm_page_t m) /* * Holding no spinlocks, pv is locked. */ - pmap_remove_pv_pte(pv, NULL, 1); + pmap_inval_bulk_init(&bulk, pv->pv_pmap); + pmap_remove_pv_pte(pv, NULL, &bulk); + pmap_inval_bulk_flush(&bulk); pmap_remove_pv_page(pv); pv_free(pv); vm_page_spin_lock(m); @@ -3891,8 +3932,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) info.eva = eva; info.func = pmap_protect_callback; info.arg = &prot; - info.dosmp = 1; - pmap_scan(&info); + pmap_scan(&info, 1); } static @@ -3944,7 +3984,7 @@ again: * OBJT_DEVICE or OBJT_MGTDEVICE (PG_FICTITIOUS) mappings * so PHYS_TO_VM_PAGE() should be safe here. */ - pte = pmap_inval_smp(pmap, (vm_offset_t)-1, ptep, 0); + pte = pmap_inval_smp(pmap, (vm_offset_t)-1, 1, ptep, 0); if (vm_page_unwire_quick(PHYS_TO_VM_PAGE(pte & PG_FRAME))) panic("pmap_protect: pgtable1 pg bad wirecount"); if (vm_page_unwire_quick(pt_pv->pv_m)) @@ -4135,10 +4175,15 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, */ if (pt_pv) vm_page_wire_quick(pt_pv->pv_m); - if (prot & VM_PROT_NOSYNC) - pmap_remove_pv_pte(pte_pv, pt_pv, 0); - else - pmap_remove_pv_pte(pte_pv, pt_pv, 1); + if (prot & VM_PROT_NOSYNC) { + pmap_remove_pv_pte(pte_pv, pt_pv, NULL); + } else { + pmap_inval_bulk_t bulk; + + pmap_inval_bulk_init(&bulk, pmap); + pmap_remove_pv_pte(pte_pv, pt_pv, &bulk); + pmap_inval_bulk_flush(&bulk); + } if (pte_pv->pv_m) pmap_remove_pv_page(pte_pv); } else if (prot & VM_PROT_NOSYNC) { @@ -4156,7 +4201,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, * * Leave wire count on PT page intact. */ - pmap_inval_smp(pmap, va, ptep, 0); + pmap_inval_smp(pmap, va, 1, ptep, 0); atomic_add_long(&pmap->pm_stats.resident_count, -1); } KKASSERT(*ptep == 0); @@ -4205,7 +4250,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, * get crashes. */ if ((prot & VM_PROT_NOSYNC) == 0 && pt_pv == NULL) { - pmap_inval_smp(pmap, va, ptep, newpte); + pmap_inval_smp(pmap, va, 1, ptep, newpte); } else { *(volatile pt_entry_t *)ptep = newpte; if (pt_pv == NULL) diff --git a/sys/platform/pc64/x86_64/pmap_inval.c b/sys/platform/pc64/x86_64/pmap_inval.c index c8f434bfe9..238704bf63 100644 --- a/sys/platform/pc64/x86_64/pmap_inval.c +++ b/sys/platform/pc64/x86_64/pmap_inval.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include @@ -66,6 +67,8 @@ #define LOOPMASK (/* 32 * */ 16 * 128 * 1024 - 1) #endif +#define MAX_INVAL_PAGES 128 + struct pmap_inval_info { vm_offset_t va; pt_entry_t *ptep; @@ -73,6 +76,7 @@ struct pmap_inval_info { pt_entry_t npte; enum { INVDONE, INVSTORE, INVCMPSET } mode; int success; + int npgs; cpumask_t done; cpumask_t mask; #ifdef LOOPMASK @@ -92,6 +96,10 @@ extern cpumask_t smp_in_mask; #endif extern cpumask_t smp_smurf_mask; #endif +static long pmap_inval_bulk_count; + +SYSCTL_LONG(_machdep, OID_AUTO, pmap_inval_bulk_count, CTLFLAG_RW, + &pmap_inval_bulk_count, 0, ""); static void pmap_inval_init(pmap_t pmap) @@ -164,6 +172,8 @@ loopdebug(const char *msg, pmap_inval_info_t *info) kprintf("\n"); } +#ifdef CHECKSIG + #define CHECKSIGMASK(info) _checksigmask(info, __FILE__, __LINE__) static @@ -180,14 +190,28 @@ _checksigmask(pmap_inval_info_t *info, const char *file, int line) } } +#else + +#define CHECKSIGMASK(info) + +#endif +/* + * Invalidate the specified va across all cpus associated with the pmap. + * If va == (vm_offset_t)-1, we invltlb() instead of invlpg(). The operation + * will be done fully synchronously with storing npte into *ptep and returning + * opte. + * + * If ptep is NULL the operation will execute semi-synchronously. + * ptep must be NULL if npgs > 1 + */ pt_entry_t -pmap_inval_smp(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, - pt_entry_t npte) +pmap_inval_smp(pmap_t pmap, vm_offset_t va, int npgs, + pt_entry_t *ptep, pt_entry_t npte) { globaldata_t gd = mycpu; pmap_inval_info_t *info; - pt_entry_t opte; + pt_entry_t opte = 0; int cpu = gd->gd_cpuid; cpumask_t tmpmask; unsigned long rflags; @@ -199,19 +223,34 @@ pmap_inval_smp(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, pmap = &kernel_pmap; pmap_inval_init(pmap); if (CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) { - for (;;) { - opte = *ptep; - cpu_ccfence(); - if (atomic_cmpset_long(ptep, opte, npte)) { - if (va == (vm_offset_t)-1) - cpu_invltlb(); - else - cpu_invlpg((void *)va); - pmap_inval_done(pmap); - return opte; + /* + * Convert to invltlb if there are too many pages to + * invlpg on. + */ + if (npgs > MAX_INVAL_PAGES) { + npgs = 0; + va = (vm_offset_t)-1; + } + + /* + * Invalidate the specified pages, handle invltlb if requested. + */ + while (npgs) { + --npgs; + if (ptep) { + opte = atomic_swap_long(ptep, npte); + ++ptep; } - cpu_pause(); + if (va == (vm_offset_t)-1) + break; + cpu_invlpg((void *)va); + va += PAGE_SIZE; } + if (va == (vm_offset_t)-1) + cpu_invltlb(); + pmap_inval_done(pmap); + + return opte; } /* @@ -254,6 +293,7 @@ pmap_inval_smp(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu); info->va = va; + info->npgs = npgs; info->ptep = ptep; info->npte = npte; info->opte = 0; @@ -263,9 +303,18 @@ pmap_inval_smp(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, tmpmask = pmap->pm_active; /* volatile (bits may be cleared) */ cpu_ccfence(); CPUMASK_ANDMASK(tmpmask, smp_active_mask); + + /* + * If ptep is NULL the operation can be semi-synchronous, which means + * we can improve performance by flagging and removing idle cpus + * (see the idleinvlclr function in mp_machdep.c). + * + * Typically kernel page table operation is semi-synchronous. + */ + if (ptep == NULL) + smp_smurf_idleinvlclr(&tmpmask); CPUMASK_ORBIT(tmpmask, cpu); info->mode = INVSTORE; - cpu_ccfence(); /* * Command may start executing the moment 'done' is initialized, @@ -274,13 +323,13 @@ pmap_inval_smp(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, * cpu clears its mask bit, but other cpus CAN start clearing their * mask bits). */ - cpu_ccfence(); info->mask = tmpmask; #ifdef LOOPMASK info->sigmask = tmpmask; CHECKSIGMASK(info); #endif - info->done = tmpmask; + cpu_sfence(); + info->done = tmpmask; /* execute can begin here due to races */ /* * Pass our copy of the done bits (so they don't change out from @@ -303,8 +352,8 @@ pmap_inval_smp(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, } /* - * API function - invalidation the pte at (va) and replace *ptep with - * npte atomically only if *ptep equals opte, across the pmap's active cpus. + * API function - invalidate the pte at (va) and replace *ptep with npte + * atomically only if *ptep equals opte, across the pmap's active cpus. * * Returns 1 on success, 0 on failure (caller typically retries). */ @@ -379,6 +428,7 @@ pmap_inval_smp_cmpset(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, ATOMIC_CPUMASK_ORBIT(smp_invmask, cpu); info->va = va; + info->npgs = 1; /* unused */ info->ptep = ptep; info->npte = npte; info->opte = opte; @@ -425,6 +475,101 @@ pmap_inval_smp_cmpset(pmap_t pmap, vm_offset_t va, pt_entry_t *ptep, return success; } +void +pmap_inval_bulk_init(pmap_inval_bulk_t *bulk, struct pmap *pmap) +{ + bulk->pmap = pmap; + bulk->va_beg = 0; + bulk->va_end = 0; + bulk->count = 0; +} + +pt_entry_t +pmap_inval_bulk(pmap_inval_bulk_t *bulk, vm_offset_t va, + pt_entry_t *ptep, pt_entry_t npte) +{ + pt_entry_t pte; + + /* + * Degenerate case, localized or we don't care (e.g. because we + * are jacking the entire page table) or the pmap is not in-use + * by anyone. No invalidations are done on any cpu. + */ + if (bulk == NULL) { + pte = atomic_swap_long(ptep, npte); + return pte; + } + + /* + * If it isn't the kernel pmap we execute the operation synchronously + * on all cpus belonging to the pmap, which avoids concurrency bugs in + * the hw related to changing pte's out from under threads. + * + * Eventually I would like to implement streaming pmap invalidation + * for user pmaps to reduce mmap/munmap overheads for heavily-loaded + * threaded programs. + */ + if (bulk->pmap != &kernel_pmap) { + pte = pmap_inval_smp(bulk->pmap, va, 1, ptep, npte); + return pte; + } + + /* + * This is the kernel_pmap. All unmap operations presume that there + * are no other cpus accessing the addresses in question. Implement + * the bulking algorithm. collect the required information and + * synchronize once at the end. + */ + pte = atomic_swap_long(ptep, npte); + if (va == (vm_offset_t)-1) { + bulk->va_beg = va; + } else if (bulk->va_beg == bulk->va_end) { + bulk->va_beg = va; + bulk->va_end = va + PAGE_SIZE; + } else if (va == bulk->va_end) { + bulk->va_end = va + PAGE_SIZE; + } else { + bulk->va_beg = (vm_offset_t)-1; + bulk->va_end = 0; +#if 0 + pmap_inval_bulk_flush(bulk); + bulk->count = 1; + if (va == (vm_offset_t)-1) { + bulk->va_beg = va; + bulk->va_end = 0; + } else { + bulk->va_beg = va; + bulk->va_end = va + PAGE_SIZE; + } +#endif + } + ++bulk->count; + + return pte; +} + +void +pmap_inval_bulk_flush(pmap_inval_bulk_t *bulk) +{ + if (bulk == NULL) + return; + if (bulk->count > 0) + pmap_inval_bulk_count += (bulk->count - 1); + if (bulk->va_beg != bulk->va_end) { + if (bulk->va_beg == (vm_offset_t)-1) { + pmap_inval_smp(bulk->pmap, bulk->va_beg, 1, NULL, 0); + } else { + long n; + + n = (bulk->va_end - bulk->va_beg) >> PAGE_SHIFT; + pmap_inval_smp(bulk->pmap, bulk->va_beg, n, NULL, 0); + } + } + bulk->va_beg = 0; + bulk->va_end = 0; + bulk->count = 0; +} + /* * Called with interrupts hard-disabled. */ @@ -493,23 +638,34 @@ pmap_inval_intr(cpumask_t *cpumaskp) */ ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); loopme = 1; - } else if (CPUMASK_TESTBIT(info->mask, n)) { + } else if (info->ptep && + CPUMASK_TESTBIT(info->mask, n)) { /* - * Other cpu waits for originator (n) to - * complete the command. + * Other cpu must wait for the originator (n) + * to complete its command if ptep is not NULL. */ loopme = 1; } else { /* * Other cpu detects that the originator has - * completed its command. Now that the page - * table entry has changed, we can follow up - * with our own invalidation. + * completed its command, or there was no + * command. + * + * Now that the page table entry has changed, + * we can follow up with our own invalidation. */ - if (info->va == (vm_offset_t)-1) + vm_offset_t va = info->va; + int npgs; + + if (va == (vm_offset_t)-1 || + info->npgs > MAX_INVAL_PAGES) { cpu_invltlb(); - else - cpu_invlpg((void *)info->va); + } else { + for (npgs = info->npgs; npgs; --npgs) { + cpu_invlpg((void *)va); + va += PAGE_SIZE; + } + } ATOMIC_CPUMASK_NANDBIT(info->done, cpu); /* info invalid now */ /* loopme left alone */ @@ -541,15 +697,11 @@ pmap_inval_intr(cpumask_t *cpumaskp) */ KKASSERT(info->mode != INVDONE); if (info->mode == INVSTORE) { - info->opte = *info->ptep; - cpu_ccfence(); - if (atomic_cmpset_long(info->ptep, - info->opte, info->npte)) { - CHECKSIGMASK(info); - ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); - CHECKSIGMASK(info); - } - /* else will loop/retry */ + if (info->ptep) + info->opte = atomic_swap_long(info->ptep, info->npte); + CHECKSIGMASK(info); + ATOMIC_CPUMASK_NANDBIT(info->mask, cpu); + CHECKSIGMASK(info); } else { if (atomic_cmpset_long(info->ptep, info->opte, info->npte)) { @@ -571,10 +723,18 @@ pmap_inval_intr(cpumask_t *cpumaskp) * until the other cpus have cleared their done bits * (asynchronously). */ - if (info->va == (vm_offset_t)-1) + vm_offset_t va = info->va; + int npgs; + + if (va == (vm_offset_t)-1 || + info->npgs > MAX_INVAL_PAGES) { cpu_invltlb(); - else - cpu_invlpg((void *)info->va); + } else { + for (npgs = info->npgs; npgs; --npgs) { + cpu_invlpg((void *)va); + va += PAGE_SIZE; + } + } #ifdef LOOPMASK info->xloops = 0; #endif diff --git a/sys/platform/vkernel64/platform/pmap.c b/sys/platform/vkernel64/platform/pmap.c index 344368ff85..f8bed2ccd4 100644 --- a/sys/platform/vkernel64/platform/pmap.c +++ b/sys/platform/vkernel64/platform/pmap.c @@ -842,6 +842,20 @@ pmap_kenter_quick(vm_offset_t va, vm_paddr_t pa) return res; } +void +pmap_kenter_noinval(vm_offset_t va, vm_paddr_t pa) +{ + pt_entry_t *pte; + pt_entry_t npte; + + KKASSERT(va >= KvaStart && va < KvaEnd); + + npte = (vpte_t)pa | VPTE_RW | VPTE_V | VPTE_U; + pte = vtopte(va); + + *pte = npte; +} + /* * Remove an unmanaged mapping created with pmap_kenter*(). */ @@ -879,6 +893,17 @@ pmap_kremove_quick(vm_offset_t va) *pte = 0; } +void +pmap_kremove_noinval(vm_offset_t va) +{ + pt_entry_t *pte; + + KKASSERT(va >= KvaStart && va < KvaEnd); + + pte = vtopte(va); + *pte = 0; +} + /* * Used to map a range of physical addresses into kernel * virtual address space. @@ -892,7 +917,6 @@ pmap_map(vm_offset_t *virtp, vm_paddr_t start, vm_paddr_t end, int prot) return PHYS_TO_DMAP(start); } - /* * Map a set of unmanaged VM pages into KVM. */ diff --git a/sys/platform/vkernel64/platform/pmap_inval.c b/sys/platform/vkernel64/platform/pmap_inval.c index 42f8fa6713..85a325c2df 100644 --- a/sys/platform/vkernel64/platform/pmap_inval.c +++ b/sys/platform/vkernel64/platform/pmap_inval.c @@ -154,7 +154,8 @@ guest_sync_addr(struct pmap *pmap, */ if (CPUMASK_TESTZERO(pmap->pm_active) || CPUMASK_CMPMASKEQ(pmap->pm_active, gd->gd_cpumask)) { - *dst_ptep = *src_ptep; + if (src_ptep) + *dst_ptep = *src_ptep; vmm_cpu_invltlb(); } else { vmm_guest_sync_addr(__DEVOLATILE(void *, dst_ptep), @@ -194,6 +195,20 @@ pmap_inval_pte(volatile vpte_t *ptep, struct pmap *pmap, vm_offset_t va) } /* + * Invalidate the tlb for a range of virtual addresses across all cpus + * belonging to the pmap. + */ +void +pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) +{ + if (vmm_enabled == 0) { + pmap_inval_cpu(pmap, sva, eva - sva); + } else { + guest_sync_addr(pmap, NULL, NULL); + } +} + +/* * Same as pmap_inval_pte() but only synchronize with the current * cpu. For the moment its the same as the non-quick version. */ diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h index 8770fbdb79..a05a388d89 100644 --- a/sys/vm/pmap.h +++ b/sys/vm/pmap.h @@ -174,10 +174,12 @@ void pmap_qenter (vm_offset_t, struct vm_page **, int); void pmap_qremove (vm_offset_t, int); void pmap_kenter (vm_offset_t, vm_paddr_t); int pmap_kenter_quick (vm_offset_t, vm_paddr_t); +int pmap_kenter_noinval (vm_offset_t, vm_paddr_t); void pmap_kmodify_rw(vm_offset_t va); void pmap_kmodify_nc(vm_offset_t va); void pmap_kremove (vm_offset_t); void pmap_kremove_quick (vm_offset_t); +void pmap_kremove_noinval (vm_offset_t); void pmap_reference (pmap_t); void pmap_remove (pmap_t, vm_offset_t, vm_offset_t); void pmap_remove_pages (pmap_t, vm_offset_t, vm_offset_t); @@ -191,6 +193,8 @@ void pmap_init_proc (struct proc *); void pmap_init_thread (struct thread *td); void pmap_replacevm (struct proc *, struct vmspace *, int); void pmap_setlwpvm (struct lwp *, struct vmspace *); +vm_paddr_t pmap_kextract(vm_offset_t); +void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t); vm_offset_t pmap_addr_hint (vm_object_t obj, vm_offset_t addr, vm_size_t size); void *pmap_kenter_temporary (vm_paddr_t pa, long i); diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index 3aab2baa75..62deb42160 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -479,19 +479,16 @@ vm_contig_pg_kmap(int start, u_long size, vm_map_t map, int flags) vm_paddr_t pa; vm_page_t pga = vm_page_array; u_long offset; - int dotlb; if (size == 0) panic("vm_contig_pg_kmap: size must not be 0"); size = round_page(size); addr = kmem_alloc_pageable(&kernel_map, size); if (addr) { - dotlb = 0; pa = VM_PAGE_TO_PHYS(&pga[start]); for (offset = 0; offset < size; offset += PAGE_SIZE) - dotlb += pmap_kenter_quick(addr + offset, pa + offset); - if (dotlb) - smp_invltlb(); + pmap_kenter_noinval(addr + offset, pa + offset); + pmap_invalidate_range(&kernel_pmap, addr, addr + size); if (flags & M_ZERO) bzero((void *)addr, size); } -- 2.11.4.GIT