Merge commit 'ea01a15a654b9e1c7b37d958f4d1911882ed7781'
[unleashed.git] / kernel / vm / seg_map.c
blob79fb010b69e6a9704e916aa1eef98a593f7ac0e0
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * Portions of this source code were derived from Berkeley 4.3 BSD
31 * under license from the Regents of the University of California.
35 * VM - generic vnode mapping segment.
37 * The segmap driver is used only by the kernel to get faster (than seg_vn)
38 * mappings [lower routine overhead; more persistent cache] to random
39 * vnode/offsets. Note than the kernel may (and does) use seg_vn as well.
42 #include <sys/types.h>
43 #include <sys/t_lock.h>
44 #include <sys/param.h>
45 #include <sys/sysmacros.h>
46 #include <sys/buf.h>
47 #include <sys/systm.h>
48 #include <sys/vnode.h>
49 #include <sys/mman.h>
50 #include <sys/errno.h>
51 #include <sys/cred.h>
52 #include <sys/kmem.h>
53 #include <sys/vtrace.h>
54 #include <sys/cmn_err.h>
55 #include <sys/debug.h>
56 #include <sys/thread.h>
57 #include <sys/dumphdr.h>
58 #include <sys/bitmap.h>
59 #include <sys/lgrp.h>
61 #include <vm/seg_kmem.h>
62 #include <vm/hat.h>
63 #include <vm/as.h>
64 #include <vm/seg.h>
65 #include <vm/seg_kpm.h>
66 #include <vm/seg_map.h>
67 #include <vm/page.h>
68 #include <vm/pvn.h>
69 #include <vm/rm.h>
72 * Private seg op routines.
74 static void segmap_free(struct seg *seg);
75 faultcode_t segmap_fault(struct hat *hat, struct seg *seg, caddr_t addr,
76 size_t len, enum fault_type type, enum seg_rw rw);
77 static faultcode_t segmap_faulta(struct seg *seg, caddr_t addr);
78 static int segmap_checkprot(struct seg *seg, caddr_t addr, size_t len,
79 uint_t prot);
80 static int segmap_kluster(struct seg *seg, caddr_t addr, ssize_t);
81 static int segmap_getprot(struct seg *seg, caddr_t addr, size_t len,
82 uint_t *protv);
83 static uoff_t segmap_getoffset(struct seg *seg, caddr_t addr);
84 static int segmap_gettype(struct seg *seg, caddr_t addr);
85 static int segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
86 static void segmap_dump(struct seg *seg);
87 static int segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
88 struct page ***ppp, enum lock_type type,
89 enum seg_rw rw);
90 static void segmap_badop(void);
91 static int segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
93 /* segkpm support */
94 static caddr_t segmap_pagecreate_kpm(struct seg *, vnode_t *, uoff_t,
95 struct smap *, enum seg_rw);
96 struct smap *get_smap_kpm(caddr_t, page_t **);
98 #define SEGMAP_BADOP(t) (t(*)())segmap_badop
100 static const struct seg_ops segmap_ops = {
101 .dup = SEGMAP_BADOP(int),
102 .unmap = SEGMAP_BADOP(int),
103 .free = segmap_free,
104 .fault = segmap_fault,
105 .faulta = segmap_faulta,
106 .setprot = SEGMAP_BADOP(int),
107 .checkprot = segmap_checkprot,
108 .kluster = segmap_kluster,
109 .sync = SEGMAP_BADOP(int),
110 .incore = SEGMAP_BADOP(size_t),
111 .lockop = SEGMAP_BADOP(int),
112 .getprot = segmap_getprot,
113 .getoffset = segmap_getoffset,
114 .gettype = segmap_gettype,
115 .getvp = segmap_getvp,
116 .advise = SEGMAP_BADOP(int),
117 .dump = segmap_dump,
118 .pagelock = segmap_pagelock,
119 .setpagesize = SEGMAP_BADOP(int),
120 .getmemid = segmap_getmemid,
124 * Private segmap routines.
126 static void segmap_unlock(struct hat *hat, struct seg *seg, caddr_t addr,
127 size_t len, enum seg_rw rw, struct smap *smp);
128 static void segmap_smapadd(struct smap *smp);
129 static struct smap *segmap_hashin(struct smap *smp, struct vnode *vp,
130 uoff_t off, int hashid);
131 static void segmap_hashout(struct smap *smp);
135 * Statistics for segmap operations.
137 * No explicit locking to protect these stats.
139 struct segmapcnt segmapcnt = {
140 { "fault", KSTAT_DATA_ULONG },
141 { "faulta", KSTAT_DATA_ULONG },
142 { "getmap", KSTAT_DATA_ULONG },
143 { "get_use", KSTAT_DATA_ULONG },
144 { "get_reclaim", KSTAT_DATA_ULONG },
145 { "get_reuse", KSTAT_DATA_ULONG },
146 { "get_unused", KSTAT_DATA_ULONG },
147 { "get_nofree", KSTAT_DATA_ULONG },
148 { "rel_async", KSTAT_DATA_ULONG },
149 { "rel_write", KSTAT_DATA_ULONG },
150 { "rel_free", KSTAT_DATA_ULONG },
151 { "rel_abort", KSTAT_DATA_ULONG },
152 { "rel_dontneed", KSTAT_DATA_ULONG },
153 { "release", KSTAT_DATA_ULONG },
154 { "pagecreate", KSTAT_DATA_ULONG },
155 { "free_notfree", KSTAT_DATA_ULONG },
156 { "free_dirty", KSTAT_DATA_ULONG },
157 { "free", KSTAT_DATA_ULONG },
158 { "stolen", KSTAT_DATA_ULONG },
159 { "get_nomtx", KSTAT_DATA_ULONG }
162 kstat_named_t *segmapcnt_ptr = (kstat_named_t *)&segmapcnt;
163 uint_t segmapcnt_ndata = sizeof (segmapcnt) / sizeof (kstat_named_t);
166 * Return number of map pages in segment.
168 #define MAP_PAGES(seg) ((seg)->s_size >> MAXBSHIFT)
171 * Translate addr into smap number within segment.
173 #define MAP_PAGE(seg, addr) (((addr) - (seg)->s_base) >> MAXBSHIFT)
176 * Translate addr in seg into struct smap pointer.
178 #define GET_SMAP(seg, addr) \
179 &(((struct segmap_data *)((seg)->s_data))->smd_sm[MAP_PAGE(seg, addr)])
182 * Bit in map (16 bit bitmap).
184 #define SMAP_BIT_MASK(bitindex) (1 << ((bitindex) & 0xf))
186 static int smd_colormsk = 0;
187 static int smd_ncolor = 0;
188 static int smd_nfree = 0;
189 static int smd_freemsk = 0;
190 #ifdef DEBUG
191 static int *colors_used;
192 #endif
193 static struct smap *smd_smap;
194 static struct smaphash *smd_hash;
195 #ifdef SEGMAP_HASHSTATS
196 static unsigned int *smd_hash_len;
197 #endif
198 static struct smfree *smd_free;
199 static ulong_t smd_hashmsk = 0;
201 #define SEGMAP_MAXCOLOR 2
202 #define SEGMAP_CACHE_PAD 64
204 union segmap_cpu {
205 struct {
206 uint32_t scpu_free_ndx[SEGMAP_MAXCOLOR];
207 struct smap *scpu_last_smap;
208 ulong_t scpu_getmap;
209 ulong_t scpu_release;
210 ulong_t scpu_get_reclaim;
211 ulong_t scpu_fault;
212 ulong_t scpu_pagecreate;
213 ulong_t scpu_get_reuse;
214 } scpu;
215 char scpu_pad[SEGMAP_CACHE_PAD];
217 static union segmap_cpu *smd_cpu;
220 * There are three locks in seg_map:
221 * - per freelist mutexes
222 * - per hashchain mutexes
223 * - per smap mutexes
225 * The lock ordering is to get the smap mutex to lock down the slot
226 * first then the hash lock (for hash in/out (vp, off) list) or the
227 * freelist lock to put the slot back on the free list.
229 * The hash search is done by only holding the hashchain lock, when a wanted
230 * slot is found, we drop the hashchain lock then lock the slot so there
231 * is no overlapping of hashchain and smap locks. After the slot is
232 * locked, we verify again if the slot is still what we are looking
233 * for.
235 * Allocation of a free slot is done by holding the freelist lock,
236 * then locking the smap slot at the head of the freelist. This is
237 * in reversed lock order so mutex_tryenter() is used.
239 * The smap lock protects all fields in smap structure except for
240 * the link fields for hash/free lists which are protected by
241 * hashchain and freelist locks.
244 #define SHASHMTX(hashid) (&smd_hash[hashid].sh_mtx)
246 #define SMP2SMF(smp) (&smd_free[(smp - smd_smap) & smd_freemsk])
247 #define SMP2SMF_NDX(smp) (ushort_t)((smp - smd_smap) & smd_freemsk)
249 #define SMAPMTX(smp) (&smp->sm_mtx)
251 #define SMAP_HASHFUNC(vp, off, hashid) \
253 hashid = ((((uintptr_t)(vp) >> 6) + ((uintptr_t)(vp) >> 3) + \
254 ((off) >> MAXBSHIFT)) & smd_hashmsk); \
258 * The most frequently updated kstat counters are kept in the
259 * per cpu array to avoid hot cache blocks. The update function
260 * sums the cpu local counters to update the global counters.
263 /* ARGSUSED */
265 segmap_kstat_update(kstat_t *ksp, int rw)
267 int i;
268 ulong_t getmap, release, get_reclaim;
269 ulong_t fault, pagecreate, get_reuse;
271 if (rw == KSTAT_WRITE)
272 return (EACCES);
273 getmap = release = get_reclaim = (ulong_t)0;
274 fault = pagecreate = get_reuse = (ulong_t)0;
275 for (i = 0; i < max_ncpus; i++) {
276 getmap += smd_cpu[i].scpu.scpu_getmap;
277 release += smd_cpu[i].scpu.scpu_release;
278 get_reclaim += smd_cpu[i].scpu.scpu_get_reclaim;
279 fault += smd_cpu[i].scpu.scpu_fault;
280 pagecreate += smd_cpu[i].scpu.scpu_pagecreate;
281 get_reuse += smd_cpu[i].scpu.scpu_get_reuse;
283 segmapcnt.smp_getmap.value.ul = getmap;
284 segmapcnt.smp_release.value.ul = release;
285 segmapcnt.smp_get_reclaim.value.ul = get_reclaim;
286 segmapcnt.smp_fault.value.ul = fault;
287 segmapcnt.smp_pagecreate.value.ul = pagecreate;
288 segmapcnt.smp_get_reuse.value.ul = get_reuse;
289 return (0);
293 segmap_create(struct seg *seg, void *argsp)
295 struct segmap_data *smd;
296 struct smap *smp;
297 struct smfree *sm;
298 struct segmap_crargs *a = (struct segmap_crargs *)argsp;
299 struct smaphash *shashp;
300 union segmap_cpu *scpu;
301 long i, npages;
302 size_t hashsz;
303 uint_t nfreelist;
304 extern void prefetch_smap_w(void *);
305 extern int max_ncpus;
307 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
309 if (((uintptr_t)seg->s_base | seg->s_size) & MAXBOFFSET) {
310 panic("segkmap not MAXBSIZE aligned");
311 /*NOTREACHED*/
314 smd = kmem_zalloc(sizeof (struct segmap_data), KM_SLEEP);
316 seg->s_data = (void *)smd;
317 seg->s_ops = &segmap_ops;
318 smd->smd_prot = a->prot;
321 * Scale the number of smap freelists to be
322 * proportional to max_ncpus * number of virtual colors.
323 * The caller can over-ride this scaling by providing
324 * a non-zero a->nfreelist argument.
326 nfreelist = a->nfreelist;
327 if (nfreelist == 0)
328 nfreelist = max_ncpus;
329 else if (nfreelist < 0 || nfreelist > 4 * max_ncpus) {
330 cmn_err(CE_WARN, "segmap_create: nfreelist out of range "
331 "%d, using %d", nfreelist, max_ncpus);
332 nfreelist = max_ncpus;
334 if (!ISP2(nfreelist)) {
335 /* round up nfreelist to the next power of two. */
336 nfreelist = 1 << (highbit(nfreelist));
340 * Get the number of virtual colors - must be a power of 2.
342 if (a->shmsize)
343 smd_ncolor = a->shmsize >> MAXBSHIFT;
344 else
345 smd_ncolor = 1;
346 ASSERT((smd_ncolor & (smd_ncolor - 1)) == 0);
347 ASSERT(smd_ncolor <= SEGMAP_MAXCOLOR);
348 smd_colormsk = smd_ncolor - 1;
349 smd->smd_nfree = smd_nfree = smd_ncolor * nfreelist;
350 smd_freemsk = smd_nfree - 1;
353 * Allocate and initialize the freelist headers.
354 * Note that sm_freeq[1] starts out as the release queue. This
355 * is known when the smap structures are initialized below.
357 smd_free = smd->smd_free =
358 kmem_zalloc(smd_nfree * sizeof (struct smfree), KM_SLEEP);
359 for (i = 0; i < smd_nfree; i++) {
360 sm = &smd->smd_free[i];
361 mutex_init(&sm->sm_freeq[0].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
362 mutex_init(&sm->sm_freeq[1].smq_mtx, NULL, MUTEX_DEFAULT, NULL);
363 sm->sm_allocq = &sm->sm_freeq[0];
364 sm->sm_releq = &sm->sm_freeq[1];
368 * Allocate and initialize the smap hash chain headers.
369 * Compute hash size rounding down to the next power of two.
371 npages = MAP_PAGES(seg);
372 smd->smd_npages = npages;
373 hashsz = npages / SMAP_HASHAVELEN;
374 hashsz = 1 << (highbit(hashsz)-1);
375 smd_hashmsk = hashsz - 1;
376 smd_hash = smd->smd_hash =
377 kmem_alloc(hashsz * sizeof (struct smaphash), KM_SLEEP);
378 #ifdef SEGMAP_HASHSTATS
379 smd_hash_len =
380 kmem_zalloc(hashsz * sizeof (unsigned int), KM_SLEEP);
381 #endif
382 for (i = 0, shashp = smd_hash; i < hashsz; i++, shashp++) {
383 shashp->sh_hash_list = NULL;
384 mutex_init(&shashp->sh_mtx, NULL, MUTEX_DEFAULT, NULL);
388 * Allocate and initialize the smap structures.
389 * Link all slots onto the appropriate freelist.
390 * The smap array is large enough to affect boot time
391 * on large systems, so use memory prefetching and only
392 * go through the array 1 time. Inline a optimized version
393 * of segmap_smapadd to add structures to freelists with
394 * knowledge that no locks are needed here.
396 smd_smap = smd->smd_sm =
397 kmem_alloc(sizeof (struct smap) * npages, KM_SLEEP);
399 for (smp = &smd->smd_sm[MAP_PAGES(seg) - 1];
400 smp >= smd->smd_sm; smp--) {
401 struct smap *smpfreelist;
402 struct sm_freeq *releq;
404 prefetch_smap_w((char *)smp);
406 smp->sm_vp = NULL;
407 smp->sm_hash = NULL;
408 smp->sm_off = 0;
409 smp->sm_bitmap = 0;
410 smp->sm_refcnt = 0;
411 mutex_init(&smp->sm_mtx, NULL, MUTEX_DEFAULT, NULL);
412 smp->sm_free_ndx = SMP2SMF_NDX(smp);
414 sm = SMP2SMF(smp);
415 releq = sm->sm_releq;
417 smpfreelist = releq->smq_free;
418 if (smpfreelist == 0) {
419 releq->smq_free = smp->sm_next = smp->sm_prev = smp;
420 } else {
421 smp->sm_next = smpfreelist;
422 smp->sm_prev = smpfreelist->sm_prev;
423 smpfreelist->sm_prev = smp;
424 smp->sm_prev->sm_next = smp;
425 releq->smq_free = smp->sm_next;
429 * sm_flag = 0 (no SM_QNDX_ZERO) implies smap on sm_freeq[1]
431 smp->sm_flags = 0;
433 #ifdef SEGKPM_SUPPORT
435 * Due to the fragile prefetch loop no
436 * separate function is used here.
438 smp->sm_kpme_next = NULL;
439 smp->sm_kpme_prev = NULL;
440 smp->sm_kpme_page = NULL;
441 #endif
445 * Allocate the per color indices that distribute allocation
446 * requests over the free lists. Each cpu will have a private
447 * rotor index to spread the allocations even across the available
448 * smap freelists. Init the scpu_last_smap field to the first
449 * smap element so there is no need to check for NULL.
451 smd_cpu =
452 kmem_zalloc(sizeof (union segmap_cpu) * max_ncpus, KM_SLEEP);
453 for (i = 0, scpu = smd_cpu; i < max_ncpus; i++, scpu++) {
454 int j;
455 for (j = 0; j < smd_ncolor; j++)
456 scpu->scpu.scpu_free_ndx[j] = j;
457 scpu->scpu.scpu_last_smap = smd_smap;
460 vpm_init();
462 #ifdef DEBUG
464 * Keep track of which colors are used more often.
466 colors_used = kmem_zalloc(smd_nfree * sizeof (int), KM_SLEEP);
467 #endif /* DEBUG */
469 return (0);
472 static void
473 segmap_free(seg)
474 struct seg *seg;
476 ASSERT(seg->s_as && RW_WRITE_HELD(&seg->s_as->a_lock));
480 * Do a F_SOFTUNLOCK call over the range requested.
481 * The range must have already been F_SOFTLOCK'ed.
483 static void
484 segmap_unlock(
485 struct hat *hat,
486 struct seg *seg,
487 caddr_t addr,
488 size_t len,
489 enum seg_rw rw,
490 struct smap *smp)
492 page_t *pp;
493 caddr_t adr;
494 uoff_t off;
495 struct vnode *vp;
496 kmutex_t *smtx;
498 ASSERT(smp->sm_refcnt > 0);
501 if (segmap_kpm && IS_KPM_ADDR(addr)) {
504 * We're called only from segmap_fault and this was a
505 * NOP in case of a kpm based smap, so dangerous things
506 * must have happened in the meantime. Pages are prefaulted
507 * and locked in segmap_getmapflt and they will not be
508 * unlocked until segmap_release.
510 panic("segmap_unlock: called with kpm addr %p", (void *)addr);
511 /*NOTREACHED*/
514 vp = smp->sm_vp;
515 off = smp->sm_off + (uoff_t)((uintptr_t)addr & MAXBOFFSET);
517 hat_unlock(hat, addr, P2ROUNDUP(len, PAGESIZE));
518 for (adr = addr; adr < addr + len; adr += PAGESIZE, off += PAGESIZE) {
519 ushort_t bitmask;
522 * Use page_find() instead of page_lookup() to
523 * find the page since we know that it has
524 * "shared" lock.
526 pp = page_find(&vp->v_object, off);
527 if (pp == NULL) {
528 panic("segmap_unlock: page not found");
529 /*NOTREACHED*/
532 if (rw == S_WRITE) {
533 hat_setrefmod(pp);
534 } else if (rw != S_OTHER) {
535 hat_setref(pp);
539 * Clear bitmap, if the bit corresponding to "off" is set,
540 * since the page and translation are being unlocked.
542 bitmask = SMAP_BIT_MASK((off - smp->sm_off) >> PAGESHIFT);
545 * Large Files: Following assertion is to verify
546 * the correctness of the cast to (int) above.
548 ASSERT((uoff_t)(off - smp->sm_off) <= INT_MAX);
549 smtx = SMAPMTX(smp);
550 mutex_enter(smtx);
551 if (smp->sm_bitmap & bitmask) {
552 smp->sm_bitmap &= ~bitmask;
554 mutex_exit(smtx);
556 page_unlock(pp);
560 #define MAXPPB (MAXBSIZE/4096) /* assumes minimum page size of 4k */
563 * This routine is called via a machine specific fault handling
564 * routine. It is also called by software routines wishing to
565 * lock or unlock a range of addresses.
567 * Note that this routine expects a page-aligned "addr".
569 faultcode_t
570 segmap_fault(
571 struct hat *hat,
572 struct seg *seg,
573 caddr_t addr,
574 size_t len,
575 enum fault_type type,
576 enum seg_rw rw)
578 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
579 struct smap *smp;
580 page_t *pp, **ppp;
581 struct vnode *vp;
582 uoff_t off;
583 page_t *pl[MAXPPB + 1];
584 uint_t prot;
585 uoff_t addroff;
586 caddr_t adr;
587 int err;
588 uoff_t sm_off;
589 int hat_flag;
591 if (segmap_kpm && IS_KPM_ADDR(addr)) {
592 int newpage;
593 kmutex_t *smtx;
596 * Pages are successfully prefaulted and locked in
597 * segmap_getmapflt and can't be unlocked until
598 * segmap_release. No hat mappings have to be locked
599 * and they also can't be unlocked as long as the
600 * caller owns an active kpm addr.
602 #ifndef DEBUG
603 if (type != F_SOFTUNLOCK)
604 return (0);
605 #endif
607 if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
608 panic("segmap_fault: smap not found "
609 "for addr %p", (void *)addr);
610 /*NOTREACHED*/
613 smtx = SMAPMTX(smp);
614 #ifdef DEBUG
615 newpage = smp->sm_flags & SM_KPM_NEWPAGE;
616 if (newpage) {
617 cmn_err(CE_WARN, "segmap_fault: newpage? smp %p",
618 (void *)smp);
621 if (type != F_SOFTUNLOCK) {
622 mutex_exit(smtx);
623 return (0);
625 #endif
626 mutex_exit(smtx);
627 vp = smp->sm_vp;
628 sm_off = smp->sm_off;
630 if (vp == NULL)
631 return (FC_MAKE_ERR(EIO));
633 ASSERT(smp->sm_refcnt > 0);
635 addroff = (uoff_t)((uintptr_t)addr & MAXBOFFSET);
636 if (addroff + len > MAXBSIZE)
637 panic("segmap_fault: endaddr %p exceeds MAXBSIZE chunk",
638 (void *)(addr + len));
640 off = sm_off + addroff;
642 pp = page_find(&vp->v_object, off);
644 if (pp == NULL)
645 panic("segmap_fault: softunlock page not found");
648 * Set ref bit also here in case of S_OTHER to avoid the
649 * overhead of supporting other cases than F_SOFTUNLOCK
650 * with segkpm. We can do this because the underlying
651 * pages are locked anyway.
653 if (rw == S_WRITE) {
654 hat_setrefmod(pp);
655 } else {
656 hat_setref(pp);
659 return (0);
662 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
663 smp = GET_SMAP(seg, addr);
664 vp = smp->sm_vp;
665 sm_off = smp->sm_off;
667 if (vp == NULL)
668 return (FC_MAKE_ERR(EIO));
670 ASSERT(smp->sm_refcnt > 0);
672 addroff = (uoff_t)((uintptr_t)addr & MAXBOFFSET);
673 if (addroff + len > MAXBSIZE) {
674 panic("segmap_fault: endaddr %p "
675 "exceeds MAXBSIZE chunk", (void *)(addr + len));
676 /*NOTREACHED*/
678 off = sm_off + addroff;
681 * First handle the easy stuff
683 if (type == F_SOFTUNLOCK) {
684 segmap_unlock(hat, seg, addr, len, rw, smp);
685 return (0);
688 err = fop_getpage(vp, (offset_t)off, len, &prot, pl, MAXBSIZE,
689 seg, addr, rw, CRED(), NULL);
691 if (err)
692 return (FC_MAKE_ERR(err));
694 prot &= smd->smd_prot;
697 * Handle all pages returned in the pl[] array.
698 * This loop is coded on the assumption that if
699 * there was no error from the fop_getpage routine,
700 * that the page list returned will contain all the
701 * needed pages for the vp from [off..off + len].
703 ppp = pl;
704 while ((pp = *ppp++) != NULL) {
705 uoff_t poff;
706 VERIFY(pp->p_object == &vp->v_object);
707 ASSERT(pp->p_vnode == vp);
708 hat_flag = HAT_LOAD;
711 * Verify that the pages returned are within the range
712 * of this segmap region. Note that it is theoretically
713 * possible for pages outside this range to be returned,
714 * but it is not very likely. If we cannot use the
715 * page here, just release it and go on to the next one.
717 if (pp->p_offset < sm_off ||
718 pp->p_offset >= sm_off + MAXBSIZE) {
719 (void) page_release(pp, 1);
720 continue;
723 ASSERT(hat == kas.a_hat);
724 poff = pp->p_offset;
725 adr = addr + (poff - off);
726 if (adr >= addr && adr < addr + len) {
727 hat_setref(pp);
728 if (type == F_SOFTLOCK)
729 hat_flag = HAT_LOAD_LOCK;
733 * Deal with VMODSORT pages here. If we know this is a write
734 * do the setmod now and allow write protection.
735 * As long as it's modified or not S_OTHER, remove write
736 * protection. With S_OTHER it's up to the FS to deal with this.
738 if (IS_VMODSORT(vp)) {
739 if (rw == S_WRITE)
740 hat_setmod(pp);
741 else if (rw != S_OTHER && !hat_ismod(pp))
742 prot &= ~PROT_WRITE;
745 hat_memload(hat, adr, pp, prot, hat_flag);
746 if (hat_flag != HAT_LOAD_LOCK)
747 page_unlock(pp);
749 return (0);
753 * This routine is used to start I/O on pages asynchronously.
755 static faultcode_t
756 segmap_faulta(struct seg *seg, caddr_t addr)
758 struct smap *smp;
759 struct vnode *vp;
760 uoff_t off;
761 int err;
763 if (segmap_kpm && IS_KPM_ADDR(addr)) {
764 int newpage;
765 kmutex_t *smtx;
768 * Pages are successfully prefaulted and locked in
769 * segmap_getmapflt and can't be unlocked until
770 * segmap_release. No hat mappings have to be locked
771 * and they also can't be unlocked as long as the
772 * caller owns an active kpm addr.
774 #ifdef DEBUG
775 if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
776 panic("segmap_faulta: smap not found "
777 "for addr %p", (void *)addr);
778 /*NOTREACHED*/
781 smtx = SMAPMTX(smp);
782 newpage = smp->sm_flags & SM_KPM_NEWPAGE;
783 mutex_exit(smtx);
784 if (newpage)
785 cmn_err(CE_WARN, "segmap_faulta: newpage? smp %p",
786 (void *)smp);
787 #endif
788 return (0);
791 segmapcnt.smp_faulta.value.ul++;
792 smp = GET_SMAP(seg, addr);
794 ASSERT(smp->sm_refcnt > 0);
796 vp = smp->sm_vp;
797 off = smp->sm_off;
799 if (vp == NULL) {
800 cmn_err(CE_WARN, "segmap_faulta - no vp");
801 return (FC_MAKE_ERR(EIO));
804 err = fop_getpage(vp, (offset_t)(off + ((offset_t)((uintptr_t)addr
805 & MAXBOFFSET))), PAGESIZE, (uint_t *)NULL, (page_t **)NULL, 0,
806 seg, addr, S_READ, CRED(), NULL);
808 if (err)
809 return (FC_MAKE_ERR(err));
810 return (0);
813 /*ARGSUSED*/
814 static int
815 segmap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
817 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
819 ASSERT(seg->s_as && RW_LOCK_HELD(&seg->s_as->a_lock));
822 * Need not acquire the segment lock since
823 * "smd_prot" is a read-only field.
825 return (((smd->smd_prot & prot) != prot) ? EACCES : 0);
828 static int
829 segmap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
831 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
832 size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
834 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
836 if (pgno != 0) {
837 do {
838 protv[--pgno] = smd->smd_prot;
839 } while (pgno != 0);
841 return (0);
844 static uoff_t
845 segmap_getoffset(struct seg *seg, caddr_t addr)
847 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
849 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
851 return ((uoff_t)smd->smd_sm->sm_off + (addr - seg->s_base));
854 /*ARGSUSED*/
855 static int
856 segmap_gettype(struct seg *seg, caddr_t addr)
858 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
860 return (MAP_SHARED);
863 /*ARGSUSED*/
864 static int
865 segmap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
867 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
869 ASSERT(seg->s_as && RW_READ_HELD(&seg->s_as->a_lock));
871 /* XXX - This doesn't make any sense */
872 *vpp = smd->smd_sm->sm_vp;
873 return (0);
877 * Check to see if it makes sense to do kluster/read ahead to
878 * addr + delta relative to the mapping at addr. We assume here
879 * that delta is a signed PAGESIZE'd multiple (which can be negative).
881 * For segmap we always "approve" of this action from our standpoint.
883 /*ARGSUSED*/
884 static int
885 segmap_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
887 return (0);
890 static void
891 segmap_badop()
893 panic("segmap_badop");
894 /*NOTREACHED*/
898 * Special private segmap operations
902 * Add smap to the appropriate free list.
904 static void
905 segmap_smapadd(struct smap *smp)
907 struct smfree *sm;
908 struct smap *smpfreelist;
909 struct sm_freeq *releq;
911 ASSERT(MUTEX_HELD(SMAPMTX(smp)));
913 if (smp->sm_refcnt != 0) {
914 panic("segmap_smapadd");
915 /*NOTREACHED*/
918 sm = &smd_free[smp->sm_free_ndx];
920 * Add to the tail of the release queue
921 * Note that sm_releq and sm_allocq could toggle
922 * before we get the lock. This does not affect
923 * correctness as the 2 queues are only maintained
924 * to reduce lock pressure.
926 releq = sm->sm_releq;
927 if (releq == &sm->sm_freeq[0])
928 smp->sm_flags |= SM_QNDX_ZERO;
929 else
930 smp->sm_flags &= ~SM_QNDX_ZERO;
931 mutex_enter(&releq->smq_mtx);
932 smpfreelist = releq->smq_free;
933 if (smpfreelist == 0) {
934 int want;
936 releq->smq_free = smp->sm_next = smp->sm_prev = smp;
938 * Both queue mutexes held to set sm_want;
939 * snapshot the value before dropping releq mutex.
940 * If sm_want appears after the releq mutex is dropped,
941 * then the smap just freed is already gone.
943 want = sm->sm_want;
944 mutex_exit(&releq->smq_mtx);
946 * See if there was a waiter before dropping the releq mutex
947 * then recheck after obtaining sm_freeq[0] mutex as
948 * the another thread may have already signaled.
950 if (want) {
951 mutex_enter(&sm->sm_freeq[0].smq_mtx);
952 if (sm->sm_want)
953 cv_signal(&sm->sm_free_cv);
954 mutex_exit(&sm->sm_freeq[0].smq_mtx);
956 } else {
957 smp->sm_next = smpfreelist;
958 smp->sm_prev = smpfreelist->sm_prev;
959 smpfreelist->sm_prev = smp;
960 smp->sm_prev->sm_next = smp;
961 mutex_exit(&releq->smq_mtx);
966 static struct smap *
967 segmap_hashin(struct smap *smp, struct vnode *vp, uoff_t off, int hashid)
969 struct smap **hpp;
970 struct smap *tmp;
971 kmutex_t *hmtx;
973 ASSERT(MUTEX_HELD(SMAPMTX(smp)));
974 ASSERT(smp->sm_vp == NULL);
975 ASSERT(smp->sm_hash == NULL);
976 ASSERT(smp->sm_prev == NULL);
977 ASSERT(smp->sm_next == NULL);
978 ASSERT(hashid >= 0 && hashid <= smd_hashmsk);
980 hmtx = SHASHMTX(hashid);
982 mutex_enter(hmtx);
984 * First we need to verify that no one has created a smp
985 * with (vp,off) as its tag before we us.
987 for (tmp = smd_hash[hashid].sh_hash_list;
988 tmp != NULL; tmp = tmp->sm_hash)
989 if (tmp->sm_vp == vp && tmp->sm_off == off)
990 break;
992 if (tmp == NULL) {
994 * No one created one yet.
996 * Funniness here - we don't increment the ref count on the
997 * vnode * even though we have another pointer to it here.
998 * The reason for this is that we don't want the fact that
999 * a seg_map entry somewhere refers to a vnode to prevent the
1000 * vnode * itself from going away. This is because this
1001 * reference to the vnode is a "soft one". In the case where
1002 * a mapping is being used by a rdwr [or directory routine?]
1003 * there already has to be a non-zero ref count on the vnode.
1004 * In the case where the vp has been freed and the the smap
1005 * structure is on the free list, there are no pages in memory
1006 * that can refer to the vnode. Thus even if we reuse the same
1007 * vnode/smap structure for a vnode which has the same
1008 * address but represents a different object, we are ok.
1010 smp->sm_vp = vp;
1011 smp->sm_off = off;
1013 hpp = &smd_hash[hashid].sh_hash_list;
1014 smp->sm_hash = *hpp;
1015 *hpp = smp;
1016 #ifdef SEGMAP_HASHSTATS
1017 smd_hash_len[hashid]++;
1018 #endif
1020 mutex_exit(hmtx);
1022 return (tmp);
1025 static void
1026 segmap_hashout(struct smap *smp)
1028 struct smap **hpp, *hp;
1029 struct vnode *vp;
1030 kmutex_t *mtx;
1031 int hashid;
1032 uoff_t off;
1034 ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1036 vp = smp->sm_vp;
1037 off = smp->sm_off;
1039 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1040 mtx = SHASHMTX(hashid);
1041 mutex_enter(mtx);
1043 hpp = &smd_hash[hashid].sh_hash_list;
1044 for (;;) {
1045 hp = *hpp;
1046 if (hp == NULL) {
1047 panic("segmap_hashout");
1048 /*NOTREACHED*/
1050 if (hp == smp)
1051 break;
1052 hpp = &hp->sm_hash;
1055 *hpp = smp->sm_hash;
1056 smp->sm_hash = NULL;
1057 #ifdef SEGMAP_HASHSTATS
1058 smd_hash_len[hashid]--;
1059 #endif
1060 mutex_exit(mtx);
1062 smp->sm_vp = NULL;
1063 smp->sm_off = 0;
1068 * Attempt to free unmodified, unmapped, and non locked segmap
1069 * pages.
1071 void
1072 segmap_pagefree(struct vnode *vp, uoff_t off)
1074 uoff_t pgoff;
1075 page_t *pp;
1077 for (pgoff = off; pgoff < off + MAXBSIZE; pgoff += PAGESIZE) {
1079 if ((pp = page_lookup_nowait(&vp->v_object, pgoff, SE_EXCL)) == NULL)
1080 continue;
1082 switch (page_release(pp, 1)) {
1083 case PGREL_NOTREL:
1084 segmapcnt.smp_free_notfree.value.ul++;
1085 break;
1086 case PGREL_MOD:
1087 segmapcnt.smp_free_dirty.value.ul++;
1088 break;
1089 case PGREL_CLEAN:
1090 segmapcnt.smp_free.value.ul++;
1091 break;
1097 * Locks held on entry: smap lock
1098 * Locks held on exit : smap lock.
1101 static void
1102 grab_smp(struct smap *smp, page_t *pp)
1104 ASSERT(MUTEX_HELD(SMAPMTX(smp)));
1105 ASSERT(smp->sm_refcnt == 0);
1107 if (smp->sm_vp != NULL) {
1108 struct vnode *vp = smp->sm_vp;
1109 uoff_t off = smp->sm_off;
1111 * Destroy old vnode association and
1112 * unload any hardware translations to
1113 * the old object.
1115 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reuse++;
1116 segmap_hashout(smp);
1119 * This node is off freelist and hashlist,
1120 * so there is no reason to drop/reacquire sm_mtx
1121 * across calls to hat_unload.
1123 if (segmap_kpm) {
1124 caddr_t vaddr;
1125 int hat_unload_needed = 0;
1128 * unload kpm mapping
1130 if (pp != NULL) {
1131 vaddr = hat_kpm_page2va(pp, 1);
1132 hat_kpm_mapout(pp, GET_KPME(smp), vaddr);
1133 page_unlock(pp);
1137 * Check if we have (also) the rare case of a
1138 * non kpm mapping.
1140 if (smp->sm_flags & SM_NOTKPM_RELEASED) {
1141 hat_unload_needed = 1;
1142 smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1145 if (hat_unload_needed) {
1146 hat_unload(kas.a_hat, segkmap->s_base +
1147 ((smp - smd_smap) * MAXBSIZE),
1148 MAXBSIZE, HAT_UNLOAD);
1151 } else {
1152 ASSERT(smp->sm_flags & SM_NOTKPM_RELEASED);
1153 smp->sm_flags &= ~SM_NOTKPM_RELEASED;
1154 hat_unload(kas.a_hat, segkmap->s_base +
1155 ((smp - smd_smap) * MAXBSIZE),
1156 MAXBSIZE, HAT_UNLOAD);
1158 segmap_pagefree(vp, off);
1162 static struct smap *
1163 get_free_smp(int free_ndx)
1165 struct smfree *sm;
1166 kmutex_t *smtx;
1167 struct smap *smp, *first;
1168 struct sm_freeq *allocq, *releq;
1169 struct kpme *kpme;
1170 page_t *pp = NULL;
1171 int end_ndx, page_locked = 0;
1173 end_ndx = free_ndx;
1174 sm = &smd_free[free_ndx];
1176 retry_queue:
1177 allocq = sm->sm_allocq;
1178 mutex_enter(&allocq->smq_mtx);
1180 if ((smp = allocq->smq_free) == NULL) {
1182 skip_queue:
1184 * The alloc list is empty or this queue is being skipped;
1185 * first see if the allocq toggled.
1187 if (sm->sm_allocq != allocq) {
1188 /* queue changed */
1189 mutex_exit(&allocq->smq_mtx);
1190 goto retry_queue;
1192 releq = sm->sm_releq;
1193 if (!mutex_tryenter(&releq->smq_mtx)) {
1194 /* cannot get releq; a free smp may be there now */
1195 mutex_exit(&allocq->smq_mtx);
1198 * This loop could spin forever if this thread has
1199 * higher priority than the thread that is holding
1200 * releq->smq_mtx. In order to force the other thread
1201 * to run, we'll lock/unlock the mutex which is safe
1202 * since we just unlocked the allocq mutex.
1204 mutex_enter(&releq->smq_mtx);
1205 mutex_exit(&releq->smq_mtx);
1206 goto retry_queue;
1208 if (releq->smq_free == NULL) {
1210 * This freelist is empty.
1211 * This should not happen unless clients
1212 * are failing to release the segmap
1213 * window after accessing the data.
1214 * Before resorting to sleeping, try
1215 * the next list of the same color.
1217 free_ndx = (free_ndx + smd_ncolor) & smd_freemsk;
1218 if (free_ndx != end_ndx) {
1219 mutex_exit(&releq->smq_mtx);
1220 mutex_exit(&allocq->smq_mtx);
1221 sm = &smd_free[free_ndx];
1222 goto retry_queue;
1225 * Tried all freelists of the same color once,
1226 * wait on this list and hope something gets freed.
1228 segmapcnt.smp_get_nofree.value.ul++;
1229 sm->sm_want++;
1230 mutex_exit(&sm->sm_freeq[1].smq_mtx);
1231 cv_wait(&sm->sm_free_cv,
1232 &sm->sm_freeq[0].smq_mtx);
1233 sm->sm_want--;
1234 mutex_exit(&sm->sm_freeq[0].smq_mtx);
1235 sm = &smd_free[free_ndx];
1236 goto retry_queue;
1237 } else {
1239 * Something on the rele queue; flip the alloc
1240 * and rele queues and retry.
1242 sm->sm_allocq = releq;
1243 sm->sm_releq = allocq;
1244 mutex_exit(&allocq->smq_mtx);
1245 mutex_exit(&releq->smq_mtx);
1246 if (page_locked) {
1247 ddi_msleep(250);
1248 page_locked = 0;
1250 goto retry_queue;
1252 } else {
1254 * Fastpath the case we get the smap mutex
1255 * on the first try.
1257 first = smp;
1258 next_smap:
1259 smtx = SMAPMTX(smp);
1260 if (!mutex_tryenter(smtx)) {
1262 * Another thread is trying to reclaim this slot.
1263 * Skip to the next queue or smap.
1265 if ((smp = smp->sm_next) == first) {
1266 goto skip_queue;
1267 } else {
1268 goto next_smap;
1270 } else {
1272 * if kpme exists, get shared lock on the page
1274 if (segmap_kpm && smp->sm_vp != NULL) {
1276 kpme = GET_KPME(smp);
1277 pp = kpme->kpe_page;
1279 if (pp != NULL) {
1280 if (!page_trylock(pp, SE_SHARED)) {
1281 smp = smp->sm_next;
1282 mutex_exit(smtx);
1283 page_locked = 1;
1285 pp = NULL;
1287 if (smp == first) {
1288 goto skip_queue;
1289 } else {
1290 goto next_smap;
1292 } else {
1293 if (kpme->kpe_page == NULL) {
1294 page_unlock(pp);
1295 pp = NULL;
1302 * At this point, we've selected smp. Remove smp
1303 * from its freelist. If smp is the first one in
1304 * the freelist, update the head of the freelist.
1306 if (first == smp) {
1307 ASSERT(first == allocq->smq_free);
1308 allocq->smq_free = smp->sm_next;
1312 * if the head of the freelist still points to smp,
1313 * then there are no more free smaps in that list.
1315 if (allocq->smq_free == smp)
1317 * Took the last one
1319 allocq->smq_free = NULL;
1320 else {
1321 smp->sm_prev->sm_next = smp->sm_next;
1322 smp->sm_next->sm_prev = smp->sm_prev;
1324 mutex_exit(&allocq->smq_mtx);
1325 smp->sm_prev = smp->sm_next = NULL;
1328 * if pp != NULL, pp must have been locked;
1329 * grab_smp() unlocks pp.
1331 ASSERT((pp == NULL) || PAGE_LOCKED(pp));
1332 grab_smp(smp, pp);
1333 /* return smp locked. */
1334 ASSERT(SMAPMTX(smp) == smtx);
1335 ASSERT(MUTEX_HELD(smtx));
1336 return (smp);
1342 * Special public segmap operations
1346 * Create pages (without using fop_getpage) and load up translations to them.
1347 * If softlock is TRUE, then set things up so that it looks like a call
1348 * to segmap_fault with F_SOFTLOCK.
1350 * Returns 1, if a page is created by calling page_create_va(), or 0 otherwise.
1352 * All fields in the generic segment (struct seg) are considered to be
1353 * read-only for "segmap" even though the kernel address space (kas) may
1354 * not be locked, hence no lock is needed to access them.
1357 segmap_pagecreate(struct seg *seg, caddr_t addr, size_t len, int softlock)
1359 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
1360 page_t *pp;
1361 uoff_t off;
1362 struct smap *smp;
1363 struct vnode *vp;
1364 caddr_t eaddr;
1365 int newpage = 0;
1366 uint_t prot;
1367 kmutex_t *smtx;
1368 int hat_flag;
1370 ASSERT(seg->s_as == &kas);
1372 if (segmap_kpm && IS_KPM_ADDR(addr)) {
1374 * Pages are successfully prefaulted and locked in
1375 * segmap_getmapflt and can't be unlocked until
1376 * segmap_release. The SM_KPM_NEWPAGE flag is set
1377 * in segmap_pagecreate_kpm when new pages are created.
1378 * and it is returned as "newpage" indication here.
1380 if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1381 panic("segmap_pagecreate: smap not found "
1382 "for addr %p", (void *)addr);
1383 /*NOTREACHED*/
1386 smtx = SMAPMTX(smp);
1387 newpage = smp->sm_flags & SM_KPM_NEWPAGE;
1388 smp->sm_flags &= ~SM_KPM_NEWPAGE;
1389 mutex_exit(smtx);
1391 return (newpage);
1394 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
1396 eaddr = addr + len;
1397 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1399 smp = GET_SMAP(seg, addr);
1402 * We don't grab smp mutex here since we assume the smp
1403 * has a refcnt set already which prevents the slot from
1404 * changing its id.
1406 ASSERT(smp->sm_refcnt > 0);
1408 vp = smp->sm_vp;
1409 off = smp->sm_off + ((uoff_t)((uintptr_t)addr & MAXBOFFSET));
1410 prot = smd->smd_prot;
1412 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1413 hat_flag = HAT_LOAD;
1414 pp = page_lookup(&vp->v_object, off, SE_SHARED);
1415 if (pp == NULL) {
1416 ushort_t bitindex;
1418 if ((pp = page_create_va(&vp->v_object, off,
1419 PAGESIZE, PG_WAIT, seg, addr)) == NULL) {
1420 panic("segmap_pagecreate: page_create failed");
1421 /*NOTREACHED*/
1423 newpage = 1;
1424 page_io_unlock(pp);
1427 * Since pages created here do not contain valid
1428 * data until the caller writes into them, the
1429 * "exclusive" lock will not be dropped to prevent
1430 * other users from accessing the page. We also
1431 * have to lock the translation to prevent a fault
1432 * from occurring when the virtual address mapped by
1433 * this page is written into. This is necessary to
1434 * avoid a deadlock since we haven't dropped the
1435 * "exclusive" lock.
1437 bitindex = (ushort_t)((off - smp->sm_off) >> PAGESHIFT);
1440 * Large Files: The following assertion is to
1441 * verify the cast above.
1443 ASSERT((uoff_t)(off - smp->sm_off) <= INT_MAX);
1444 smtx = SMAPMTX(smp);
1445 mutex_enter(smtx);
1446 smp->sm_bitmap |= SMAP_BIT_MASK(bitindex);
1447 mutex_exit(smtx);
1449 hat_flag = HAT_LOAD_LOCK;
1450 } else if (softlock) {
1451 hat_flag = HAT_LOAD_LOCK;
1454 if (IS_VMODSORT(pp->p_vnode) && (prot & PROT_WRITE))
1455 hat_setmod(pp);
1457 hat_memload(kas.a_hat, addr, pp, prot, hat_flag);
1459 if (hat_flag != HAT_LOAD_LOCK)
1460 page_unlock(pp);
1463 return (newpage);
1466 void
1467 segmap_pageunlock(struct seg *seg, caddr_t addr, size_t len, enum seg_rw rw)
1469 struct smap *smp;
1470 ushort_t bitmask;
1471 page_t *pp;
1472 struct vnode *vp;
1473 uoff_t off;
1474 caddr_t eaddr;
1475 kmutex_t *smtx;
1477 ASSERT(seg->s_as == &kas);
1479 eaddr = addr + len;
1480 addr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
1482 if (segmap_kpm && IS_KPM_ADDR(addr)) {
1484 * Pages are successfully prefaulted and locked in
1485 * segmap_getmapflt and can't be unlocked until
1486 * segmap_release, so no pages or hat mappings have
1487 * to be unlocked at this point.
1489 #ifdef DEBUG
1490 if ((smp = get_smap_kpm(addr, NULL)) == NULL) {
1491 panic("segmap_pageunlock: smap not found "
1492 "for addr %p", (void *)addr);
1493 /*NOTREACHED*/
1496 ASSERT(smp->sm_refcnt > 0);
1497 mutex_exit(SMAPMTX(smp));
1498 #endif
1499 return;
1502 smp = GET_SMAP(seg, addr);
1503 smtx = SMAPMTX(smp);
1505 ASSERT(smp->sm_refcnt > 0);
1507 vp = smp->sm_vp;
1508 off = smp->sm_off + ((uoff_t)((uintptr_t)addr & MAXBOFFSET));
1510 for (; addr < eaddr; addr += PAGESIZE, off += PAGESIZE) {
1511 bitmask = SMAP_BIT_MASK((int)(off - smp->sm_off) >> PAGESHIFT);
1514 * Large Files: Following assertion is to verify
1515 * the correctness of the cast to (int) above.
1517 ASSERT((uoff_t)(off - smp->sm_off) <= INT_MAX);
1520 * If the bit corresponding to "off" is set,
1521 * clear this bit in the bitmap, unlock translations,
1522 * and release the "exclusive" lock on the page.
1524 if (smp->sm_bitmap & bitmask) {
1525 mutex_enter(smtx);
1526 smp->sm_bitmap &= ~bitmask;
1527 mutex_exit(smtx);
1529 hat_unlock(kas.a_hat, addr, PAGESIZE);
1532 * Use page_find() instead of page_lookup() to
1533 * find the page since we know that it has
1534 * "exclusive" lock.
1536 pp = page_find(&vp->v_object, off);
1537 if (pp == NULL) {
1538 panic("segmap_pageunlock: page not found");
1539 /*NOTREACHED*/
1541 if (rw == S_WRITE) {
1542 hat_setrefmod(pp);
1543 } else if (rw != S_OTHER) {
1544 hat_setref(pp);
1547 page_unlock(pp);
1552 caddr_t
1553 segmap_getmap(struct seg *seg, struct vnode *vp, uoff_t off)
1555 return (segmap_getmapflt(seg, vp, off, MAXBSIZE, 0, S_OTHER));
1559 * This is the magic virtual address that offset 0 of an ELF
1560 * file gets mapped to in user space. This is used to pick
1561 * the vac color on the freelist.
1563 #define ELF_OFFZERO_VA (0x10000)
1565 * segmap_getmap allocates a MAXBSIZE big slot to map the vnode vp
1566 * in the range <off, off + len). off doesn't need to be MAXBSIZE aligned.
1567 * The return address is always MAXBSIZE aligned.
1569 * If forcefault is nonzero and the MMU translations haven't yet been created,
1570 * segmap_getmap will call segmap_fault(..., F_INVAL, rw) to create them.
1572 caddr_t
1573 segmap_getmapflt(
1574 struct seg *seg,
1575 struct vnode *vp,
1576 uoff_t off,
1577 size_t len,
1578 int forcefault,
1579 enum seg_rw rw)
1581 struct smap *smp, *nsmp;
1582 extern struct vnode *common_specvp();
1583 caddr_t baseaddr; /* MAXBSIZE aligned */
1584 uoff_t baseoff;
1585 int newslot;
1586 caddr_t vaddr;
1587 int color, hashid;
1588 kmutex_t *hashmtx, *smapmtx;
1589 struct smfree *sm;
1590 page_t *pp;
1591 struct kpme *kpme;
1592 uint_t prot;
1593 caddr_t base;
1594 page_t *pl[MAXPPB + 1];
1595 int error;
1596 int is_kpm = 1;
1598 ASSERT(seg->s_as == &kas);
1599 ASSERT(seg == segkmap);
1601 baseoff = off & (offset_t)MAXBMASK;
1602 if (off + len > baseoff + MAXBSIZE) {
1603 panic("segmap_getmap bad len");
1604 /*NOTREACHED*/
1608 * If this is a block device we have to be sure to use the
1609 * "common" block device vnode for the mapping.
1611 if (vp->v_type == VBLK)
1612 vp = common_specvp(vp);
1614 smd_cpu[CPU->cpu_seqid].scpu.scpu_getmap++;
1616 if (segmap_kpm == 0 ||
1617 (forcefault == SM_PAGECREATE && rw != S_WRITE)) {
1618 is_kpm = 0;
1621 SMAP_HASHFUNC(vp, off, hashid); /* macro assigns hashid */
1622 hashmtx = SHASHMTX(hashid);
1624 retry_hash:
1625 mutex_enter(hashmtx);
1626 for (smp = smd_hash[hashid].sh_hash_list;
1627 smp != NULL; smp = smp->sm_hash)
1628 if (smp->sm_vp == vp && smp->sm_off == baseoff)
1629 break;
1630 mutex_exit(hashmtx);
1632 vrfy_smp:
1633 if (smp != NULL) {
1635 ASSERT(vp->v_count != 0);
1638 * Get smap lock and recheck its tag. The hash lock
1639 * is dropped since the hash is based on (vp, off)
1640 * and (vp, off) won't change when we have smap mtx.
1642 smapmtx = SMAPMTX(smp);
1643 mutex_enter(smapmtx);
1644 if (smp->sm_vp != vp || smp->sm_off != baseoff) {
1645 mutex_exit(smapmtx);
1646 goto retry_hash;
1649 if (smp->sm_refcnt == 0) {
1651 smd_cpu[CPU->cpu_seqid].scpu.scpu_get_reclaim++;
1654 * Could still be on the free list. However, this
1655 * could also be an smp that is transitioning from
1656 * the free list when we have too much contention
1657 * for the smapmtx's. In this case, we have an
1658 * unlocked smp that is not on the free list any
1659 * longer, but still has a 0 refcnt. The only way
1660 * to be sure is to check the freelist pointers.
1661 * Since we now have the smapmtx, we are guaranteed
1662 * that the (vp, off) won't change, so we are safe
1663 * to reclaim it. get_free_smp() knows that this
1664 * can happen, and it will check the refcnt.
1667 if ((smp->sm_next != NULL)) {
1668 struct sm_freeq *freeq;
1670 ASSERT(smp->sm_prev != NULL);
1671 sm = &smd_free[smp->sm_free_ndx];
1673 if (smp->sm_flags & SM_QNDX_ZERO)
1674 freeq = &sm->sm_freeq[0];
1675 else
1676 freeq = &sm->sm_freeq[1];
1678 mutex_enter(&freeq->smq_mtx);
1679 if (freeq->smq_free != smp) {
1681 * fastpath normal case
1683 smp->sm_prev->sm_next = smp->sm_next;
1684 smp->sm_next->sm_prev = smp->sm_prev;
1685 } else if (smp == smp->sm_next) {
1687 * Taking the last smap on freelist
1689 freeq->smq_free = NULL;
1690 } else {
1692 * Reclaiming 1st smap on list
1694 freeq->smq_free = smp->sm_next;
1695 smp->sm_prev->sm_next = smp->sm_next;
1696 smp->sm_next->sm_prev = smp->sm_prev;
1698 mutex_exit(&freeq->smq_mtx);
1699 smp->sm_prev = smp->sm_next = NULL;
1700 } else {
1701 ASSERT(smp->sm_prev == NULL);
1702 segmapcnt.smp_stolen.value.ul++;
1705 } else {
1706 segmapcnt.smp_get_use.value.ul++;
1708 smp->sm_refcnt++; /* another user */
1711 * We don't invoke segmap_fault via TLB miss, so we set ref
1712 * and mod bits in advance. For S_OTHER we set them in
1713 * segmap_fault F_SOFTUNLOCK.
1715 if (is_kpm) {
1716 if (rw == S_WRITE) {
1717 smp->sm_flags |= SM_WRITE_DATA;
1718 } else if (rw == S_READ) {
1719 smp->sm_flags |= SM_READ_DATA;
1722 mutex_exit(smapmtx);
1724 newslot = 0;
1725 } else {
1727 uint32_t free_ndx, *free_ndxp;
1728 union segmap_cpu *scpu;
1731 * On a PAC machine or a machine with anti-alias
1732 * hardware, smd_colormsk will be zero.
1734 * On a VAC machine- pick color by offset in the file
1735 * so we won't get VAC conflicts on elf files.
1736 * On data files, color does not matter but we
1737 * don't know what kind of file it is so we always
1738 * pick color by offset. This causes color
1739 * corresponding to file offset zero to be used more
1740 * heavily.
1742 color = (baseoff >> MAXBSHIFT) & smd_colormsk;
1743 scpu = smd_cpu+CPU->cpu_seqid;
1744 free_ndxp = &scpu->scpu.scpu_free_ndx[color];
1745 free_ndx = (*free_ndxp += smd_ncolor) & smd_freemsk;
1746 #ifdef DEBUG
1747 colors_used[free_ndx]++;
1748 #endif /* DEBUG */
1751 * Get a locked smp slot from the free list.
1753 smp = get_free_smp(free_ndx);
1754 smapmtx = SMAPMTX(smp);
1756 ASSERT(smp->sm_vp == NULL);
1758 if ((nsmp = segmap_hashin(smp, vp, baseoff, hashid)) != NULL) {
1760 * Failed to hashin, there exists one now.
1761 * Return the smp we just allocated.
1763 segmap_smapadd(smp);
1764 mutex_exit(smapmtx);
1766 smp = nsmp;
1767 goto vrfy_smp;
1769 smp->sm_refcnt++; /* another user */
1772 * We don't invoke segmap_fault via TLB miss, so we set ref
1773 * and mod bits in advance. For S_OTHER we set them in
1774 * segmap_fault F_SOFTUNLOCK.
1776 if (is_kpm) {
1777 if (rw == S_WRITE) {
1778 smp->sm_flags |= SM_WRITE_DATA;
1779 } else if (rw == S_READ) {
1780 smp->sm_flags |= SM_READ_DATA;
1783 mutex_exit(smapmtx);
1785 newslot = 1;
1788 if (!is_kpm)
1789 goto use_segmap_range;
1792 * Use segkpm
1794 /* Lint directive required until 6746211 is fixed */
1795 /*CONSTCOND*/
1796 ASSERT(PAGESIZE == MAXBSIZE);
1799 * remember the last smp faulted on this cpu.
1801 (smd_cpu+CPU->cpu_seqid)->scpu.scpu_last_smap = smp;
1803 if (forcefault == SM_PAGECREATE) {
1804 baseaddr = segmap_pagecreate_kpm(seg, vp, baseoff, smp, rw);
1805 return (baseaddr);
1808 if (newslot == 0 &&
1809 (pp = GET_KPME(smp)->kpe_page) != NULL) {
1811 /* fastpath */
1812 switch (rw) {
1813 case S_READ:
1814 case S_WRITE:
1815 if (page_trylock(pp, SE_SHARED)) {
1816 if (PP_ISFREE(pp) ||
1817 !(pp->p_vnode == vp &&
1818 pp->p_offset == baseoff)) {
1819 page_unlock(pp);
1820 pp = page_lookup(&vp->v_object,
1821 baseoff, SE_SHARED);
1823 } else {
1824 pp = page_lookup(&vp->v_object, baseoff,
1825 SE_SHARED);
1828 if (pp == NULL) {
1829 ASSERT(GET_KPME(smp)->kpe_page == NULL);
1830 break;
1833 if (rw == S_WRITE &&
1834 hat_page_getattr(pp, P_MOD | P_REF) !=
1835 (P_MOD | P_REF)) {
1836 page_unlock(pp);
1837 break;
1841 * We have the p_selock as reader, grab_smp
1842 * can't hit us, we have bumped the smap
1843 * refcnt and hat_pageunload needs the
1844 * p_selock exclusive.
1846 kpme = GET_KPME(smp);
1847 if (kpme->kpe_page == pp) {
1848 baseaddr = hat_kpm_page2va(pp, 0);
1849 } else if (kpme->kpe_page == NULL) {
1850 baseaddr = hat_kpm_mapin(pp, kpme);
1851 } else {
1852 panic("segmap_getmapflt: stale "
1853 "kpme page, kpme %p", (void *)kpme);
1854 /*NOTREACHED*/
1858 * We don't invoke segmap_fault via TLB miss,
1859 * so we set ref and mod bits in advance.
1860 * For S_OTHER and we set them in segmap_fault
1861 * F_SOFTUNLOCK.
1863 if (rw == S_READ && !hat_isref(pp))
1864 hat_setref(pp);
1866 return (baseaddr);
1867 default:
1868 break;
1872 base = segkpm_create_va(baseoff);
1873 error = fop_getpage(vp, (offset_t)baseoff, len, &prot, pl, MAXBSIZE,
1874 seg, base, rw, CRED(), NULL);
1876 pp = pl[0];
1877 if (error || pp == NULL) {
1879 * Use segmap address slot and let segmap_fault deal
1880 * with the error cases. There is no error return
1881 * possible here.
1883 goto use_segmap_range;
1886 ASSERT(pl[1] == NULL);
1889 * When prot is not returned w/ PROT_ALL the returned pages
1890 * are not backed by fs blocks. For most of the segmap users
1891 * this is no problem, they don't write to the pages in the
1892 * same request and therefore don't rely on a following
1893 * trap driven segmap_fault. With SM_LOCKPROTO users it
1894 * is more secure to use segkmap adresses to allow
1895 * protection segmap_fault's.
1897 if (prot != PROT_ALL && forcefault == SM_LOCKPROTO) {
1899 * Use segmap address slot and let segmap_fault
1900 * do the error return.
1902 ASSERT(rw != S_WRITE);
1903 ASSERT(PAGE_LOCKED(pp));
1904 page_unlock(pp);
1905 forcefault = 0;
1906 goto use_segmap_range;
1910 * We have the p_selock as reader, grab_smp can't hit us, we
1911 * have bumped the smap refcnt and hat_pageunload needs the
1912 * p_selock exclusive.
1914 kpme = GET_KPME(smp);
1915 if (kpme->kpe_page == pp) {
1916 baseaddr = hat_kpm_page2va(pp, 0);
1917 } else if (kpme->kpe_page == NULL) {
1918 baseaddr = hat_kpm_mapin(pp, kpme);
1919 } else {
1920 panic("segmap_getmapflt: stale kpme page after "
1921 "fop_getpage, kpme %p", (void *)kpme);
1922 /*NOTREACHED*/
1925 smd_cpu[CPU->cpu_seqid].scpu.scpu_fault++;
1927 return (baseaddr);
1930 use_segmap_range:
1931 baseaddr = seg->s_base + ((smp - smd_smap) * MAXBSIZE);
1934 * Prefault the translations
1936 vaddr = baseaddr + (off - baseoff);
1937 if (forcefault && (newslot || !hat_probe(kas.a_hat, vaddr))) {
1939 caddr_t pgaddr = (caddr_t)((uintptr_t)vaddr &
1940 (uintptr_t)PAGEMASK);
1942 (void) segmap_fault(kas.a_hat, seg, pgaddr,
1943 (vaddr + len - pgaddr + PAGESIZE - 1) & (uintptr_t)PAGEMASK,
1944 F_INVAL, rw);
1947 return (baseaddr);
1951 segmap_release(struct seg *seg, caddr_t addr, uint_t flags)
1953 struct smap *smp;
1954 int error;
1955 int bflags = 0;
1956 struct vnode *vp;
1957 uoff_t offset;
1958 kmutex_t *smtx;
1959 int is_kpm = 0;
1960 page_t *pp;
1962 if (segmap_kpm && IS_KPM_ADDR(addr)) {
1964 if (((uintptr_t)addr & MAXBOFFSET) != 0) {
1965 panic("segmap_release: addr %p not "
1966 "MAXBSIZE aligned", (void *)addr);
1967 /*NOTREACHED*/
1970 if ((smp = get_smap_kpm(addr, &pp)) == NULL) {
1971 panic("segmap_release: smap not found "
1972 "for addr %p", (void *)addr);
1973 /*NOTREACHED*/
1976 smtx = SMAPMTX(smp);
1979 * For compatibility reasons segmap_pagecreate_kpm sets this
1980 * flag to allow a following segmap_pagecreate to return
1981 * this as "newpage" flag. When segmap_pagecreate is not
1982 * called at all we clear it now.
1984 smp->sm_flags &= ~SM_KPM_NEWPAGE;
1985 is_kpm = 1;
1986 if (smp->sm_flags & SM_WRITE_DATA) {
1987 hat_setrefmod(pp);
1988 } else if (smp->sm_flags & SM_READ_DATA) {
1989 hat_setref(pp);
1991 } else {
1992 if (addr < seg->s_base || addr >= seg->s_base + seg->s_size ||
1993 ((uintptr_t)addr & MAXBOFFSET) != 0) {
1994 panic("segmap_release: bad addr %p", (void *)addr);
1995 /*NOTREACHED*/
1997 smp = GET_SMAP(seg, addr);
1999 smtx = SMAPMTX(smp);
2000 mutex_enter(smtx);
2001 smp->sm_flags |= SM_NOTKPM_RELEASED;
2004 ASSERT(smp->sm_refcnt > 0);
2007 * Need to call fop_putpage() if any flags (except SM_DONTNEED)
2008 * are set.
2010 if ((flags & ~SM_DONTNEED) != 0) {
2011 if (flags & SM_WRITE)
2012 segmapcnt.smp_rel_write.value.ul++;
2013 if (flags & SM_ASYNC) {
2014 bflags |= B_ASYNC;
2015 segmapcnt.smp_rel_async.value.ul++;
2017 if (flags & SM_INVAL) {
2018 bflags |= B_INVAL;
2019 segmapcnt.smp_rel_abort.value.ul++;
2021 if (flags & SM_DESTROY) {
2022 bflags |= (B_INVAL|B_TRUNC);
2023 segmapcnt.smp_rel_abort.value.ul++;
2025 if (smp->sm_refcnt == 1) {
2027 * We only bother doing the FREE and DONTNEED flags
2028 * if no one else is still referencing this mapping.
2030 if (flags & SM_FREE) {
2031 bflags |= B_FREE;
2032 segmapcnt.smp_rel_free.value.ul++;
2034 if (flags & SM_DONTNEED) {
2035 bflags |= B_DONTNEED;
2036 segmapcnt.smp_rel_dontneed.value.ul++;
2039 } else {
2040 smd_cpu[CPU->cpu_seqid].scpu.scpu_release++;
2043 vp = smp->sm_vp;
2044 offset = smp->sm_off;
2046 if (--smp->sm_refcnt == 0) {
2048 smp->sm_flags &= ~(SM_WRITE_DATA | SM_READ_DATA);
2050 if (flags & (SM_INVAL|SM_DESTROY)) {
2051 segmap_hashout(smp); /* remove map info */
2052 if (is_kpm) {
2053 hat_kpm_mapout(pp, GET_KPME(smp), addr);
2054 if (smp->sm_flags & SM_NOTKPM_RELEASED) {
2055 smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2056 hat_unload(kas.a_hat, segkmap->s_base +
2057 ((smp - smd_smap) * MAXBSIZE),
2058 MAXBSIZE, HAT_UNLOAD);
2061 } else {
2062 if (segmap_kpm)
2063 segkpm_mapout_validkpme(GET_KPME(smp));
2065 smp->sm_flags &= ~SM_NOTKPM_RELEASED;
2066 hat_unload(kas.a_hat, addr, MAXBSIZE,
2067 HAT_UNLOAD);
2070 segmap_smapadd(smp); /* add to free list */
2073 mutex_exit(smtx);
2075 if (is_kpm)
2076 page_unlock(pp);
2078 * Now invoke fop_putpage() if any flags (except SM_DONTNEED)
2079 * are set.
2081 if ((flags & ~SM_DONTNEED) != 0) {
2082 error = fop_putpage(vp, offset, MAXBSIZE,
2083 bflags, CRED(), NULL);
2084 } else {
2085 error = 0;
2088 return (error);
2092 * Dump the pages belonging to this segmap segment.
2094 static void
2095 segmap_dump(struct seg *seg)
2097 struct segmap_data *smd;
2098 struct smap *smp, *smp_end;
2099 page_t *pp;
2100 pfn_t pfn;
2101 uoff_t off;
2102 caddr_t addr;
2104 smd = (struct segmap_data *)seg->s_data;
2105 addr = seg->s_base;
2106 for (smp = smd->smd_sm, smp_end = smp + smd->smd_npages;
2107 smp < smp_end; smp++) {
2109 if (smp->sm_refcnt) {
2110 for (off = 0; off < MAXBSIZE; off += PAGESIZE) {
2111 int we_own_it = 0;
2114 * If pp == NULL, the page either does
2115 * not exist or is exclusively locked.
2116 * So determine if it exists before
2117 * searching for it.
2119 if ((pp = page_lookup_nowait(&smp->sm_vp->v_object,
2120 smp->sm_off + off,
2121 SE_SHARED)))
2122 we_own_it = 1;
2123 else
2124 pp = page_exists(&smp->sm_vp->v_object,
2125 smp->sm_off + off);
2127 if (pp) {
2128 pfn = page_pptonum(pp);
2129 dump_addpage(seg->s_as,
2130 addr + off, pfn);
2131 if (we_own_it)
2132 page_unlock(pp);
2134 dump_timeleft = dump_timeout;
2137 addr += MAXBSIZE;
2141 /*ARGSUSED*/
2142 static int
2143 segmap_pagelock(struct seg *seg, caddr_t addr, size_t len,
2144 struct page ***ppp, enum lock_type type, enum seg_rw rw)
2146 return (ENOTSUP);
2149 static int
2150 segmap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
2152 struct segmap_data *smd = (struct segmap_data *)seg->s_data;
2154 memidp->val[0] = (uintptr_t)smd->smd_sm->sm_vp;
2155 memidp->val[1] = smd->smd_sm->sm_off + (uintptr_t)(addr - seg->s_base);
2156 return (0);
2160 #ifdef SEGKPM_SUPPORT
2163 * segkpm support routines
2166 static caddr_t
2167 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, uoff_t off,
2168 struct smap *smp, enum seg_rw rw)
2170 caddr_t base;
2171 page_t *pp;
2172 int newpage = 0;
2173 struct kpme *kpme;
2175 ASSERT(smp->sm_refcnt > 0);
2177 if ((pp = page_lookup(&vp->v_object, off, SE_SHARED)) == NULL) {
2178 kmutex_t *smtx;
2180 base = segkpm_create_va(off);
2182 if ((pp = page_create_va(&vp->v_object, off, PAGESIZE, PG_WAIT,
2183 seg, base)) == NULL) {
2184 panic("segmap_pagecreate_kpm: "
2185 "page_create failed");
2186 /*NOTREACHED*/
2189 newpage = 1;
2190 page_io_unlock(pp);
2191 ASSERT((uoff_t)(off - smp->sm_off) <= INT_MAX);
2194 * Mark this here until the following segmap_pagecreate
2195 * or segmap_release.
2197 smtx = SMAPMTX(smp);
2198 mutex_enter(smtx);
2199 smp->sm_flags |= SM_KPM_NEWPAGE;
2200 mutex_exit(smtx);
2203 kpme = GET_KPME(smp);
2204 if (!newpage && kpme->kpe_page == pp)
2205 base = hat_kpm_page2va(pp, 0);
2206 else
2207 base = hat_kpm_mapin(pp, kpme);
2210 * FS code may decide not to call segmap_pagecreate and we
2211 * don't invoke segmap_fault via TLB miss, so we have to set
2212 * ref and mod bits in advance.
2214 if (rw == S_WRITE) {
2215 hat_setrefmod(pp);
2216 } else {
2217 ASSERT(rw == S_READ);
2218 hat_setref(pp);
2221 smd_cpu[CPU->cpu_seqid].scpu.scpu_pagecreate++;
2223 return (base);
2227 * Find the smap structure corresponding to the
2228 * KPM addr and return it locked.
2230 struct smap *
2231 get_smap_kpm(caddr_t addr, page_t **ppp)
2233 struct smap *smp;
2234 struct vnode *vp;
2235 uoff_t offset;
2236 caddr_t baseaddr = (caddr_t)((uintptr_t)addr & MAXBMASK);
2237 int hashid;
2238 kmutex_t *hashmtx;
2239 page_t *pp;
2240 union segmap_cpu *scpu;
2242 pp = hat_kpm_vaddr2page(baseaddr);
2244 ASSERT(pp && !PP_ISFREE(pp));
2245 ASSERT(PAGE_LOCKED(pp));
2246 ASSERT(((uintptr_t)pp->p_offset & MAXBOFFSET) == 0);
2248 vp = pp->p_vnode;
2249 offset = pp->p_offset;
2250 ASSERT(vp != NULL);
2253 * Assume the last smap used on this cpu is the one needed.
2255 scpu = smd_cpu+CPU->cpu_seqid;
2256 smp = scpu->scpu.scpu_last_smap;
2257 mutex_enter(&smp->sm_mtx);
2258 if (smp->sm_vp == vp && smp->sm_off == offset) {
2259 ASSERT(smp->sm_refcnt > 0);
2260 } else {
2262 * Assumption wrong, find the smap on the hash chain.
2264 mutex_exit(&smp->sm_mtx);
2265 SMAP_HASHFUNC(vp, offset, hashid); /* macro assigns hashid */
2266 hashmtx = SHASHMTX(hashid);
2268 mutex_enter(hashmtx);
2269 smp = smd_hash[hashid].sh_hash_list;
2270 for (; smp != NULL; smp = smp->sm_hash) {
2271 if (smp->sm_vp == vp && smp->sm_off == offset)
2272 break;
2274 mutex_exit(hashmtx);
2275 if (smp) {
2276 mutex_enter(&smp->sm_mtx);
2277 ASSERT(smp->sm_vp == vp && smp->sm_off == offset);
2281 if (ppp)
2282 *ppp = smp ? pp : NULL;
2284 return (smp);
2287 #else /* SEGKPM_SUPPORT */
2289 /* segkpm stubs */
2291 /*ARGSUSED*/
2292 static caddr_t
2293 segmap_pagecreate_kpm(struct seg *seg, vnode_t *vp, uoff_t off,
2294 struct smap *smp, enum seg_rw rw)
2296 return (NULL);
2299 /*ARGSUSED*/
2300 struct smap *
2301 get_smap_kpm(caddr_t addr, page_t **ppp)
2303 return (NULL);
2306 #endif /* SEGKPM_SUPPORT */