3882 Remove xmod & friends
[illumos-gate.git] / usr / src / uts / common / os / grow.c
blobc08ea0d42493bdbc6b1e1731e48f6c11674539a2
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28 /* All Rights Reserved */
30 #include <sys/types.h>
31 #include <sys/inttypes.h>
32 #include <sys/param.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/signal.h>
36 #include <sys/user.h>
37 #include <sys/errno.h>
38 #include <sys/var.h>
39 #include <sys/proc.h>
40 #include <sys/tuneable.h>
41 #include <sys/debug.h>
42 #include <sys/cmn_err.h>
43 #include <sys/cred.h>
44 #include <sys/vnode.h>
45 #include <sys/vfs.h>
46 #include <sys/vm.h>
47 #include <sys/file.h>
48 #include <sys/mman.h>
49 #include <sys/vmparam.h>
50 #include <sys/fcntl.h>
51 #include <sys/lwpchan_impl.h>
52 #include <sys/nbmlock.h>
54 #include <vm/hat.h>
55 #include <vm/as.h>
56 #include <vm/seg.h>
57 #include <vm/seg_dev.h>
58 #include <vm/seg_vn.h>
60 int use_brk_lpg = 1;
61 int use_stk_lpg = 1;
63 static int brk_lpg(caddr_t nva);
64 static int grow_lpg(caddr_t sp);
66 int
67 brk(caddr_t nva)
69 int error;
70 proc_t *p = curproc;
73 * Serialize brk operations on an address space.
74 * This also serves as the lock protecting p_brksize
75 * and p_brkpageszc.
77 as_rangelock(p->p_as);
78 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
79 error = brk_lpg(nva);
80 } else {
81 error = brk_internal(nva, p->p_brkpageszc);
83 as_rangeunlock(p->p_as);
84 return ((error != 0 ? set_errno(error) : 0));
88 * Algorithm: call arch-specific map_pgsz to get best page size to use,
89 * then call brk_internal().
90 * Returns 0 on success.
92 static int
93 brk_lpg(caddr_t nva)
95 struct proc *p = curproc;
96 size_t pgsz, len;
97 caddr_t addr, brkend;
98 caddr_t bssbase = p->p_bssbase;
99 caddr_t brkbase = p->p_brkbase;
100 int oszc, szc;
101 int err;
103 oszc = p->p_brkpageszc;
106 * If p_brkbase has not yet been set, the first call
107 * to brk_internal() will initialize it.
109 if (brkbase == 0) {
110 return (brk_internal(nva, oszc));
113 len = nva - bssbase;
115 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
116 szc = page_szc(pgsz);
119 * Covers two cases:
120 * 1. page_szc() returns -1 for invalid page size, so we want to
121 * ignore it in that case.
122 * 2. By design we never decrease page size, as it is more stable.
124 if (szc <= oszc) {
125 err = brk_internal(nva, oszc);
126 /* If failed, back off to base page size. */
127 if (err != 0 && oszc != 0) {
128 err = brk_internal(nva, 0);
130 return (err);
133 err = brk_internal(nva, szc);
134 /* If using szc failed, map with base page size and return. */
135 if (err != 0) {
136 if (szc != 0) {
137 err = brk_internal(nva, 0);
139 return (err);
143 * Round up brk base to a large page boundary and remap
144 * anything in the segment already faulted in beyond that
145 * point.
147 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
148 brkend = brkbase + p->p_brksize;
149 len = brkend - addr;
150 /* Check that len is not negative. Update page size code for heap. */
151 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
152 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
153 p->p_brkpageszc = szc;
156 ASSERT(err == 0);
157 return (err); /* should always be 0 */
161 * Returns 0 on success.
164 brk_internal(caddr_t nva, uint_t brkszc)
166 caddr_t ova; /* current break address */
167 size_t size;
168 int error;
169 struct proc *p = curproc;
170 struct as *as = p->p_as;
171 size_t pgsz;
172 uint_t szc;
173 rctl_qty_t as_rctl;
176 * extend heap to brkszc alignment but use current p->p_brkpageszc
177 * for the newly created segment. This allows the new extension
178 * segment to be concatenated successfully with the existing brk
179 * segment.
181 if ((szc = brkszc) != 0) {
182 pgsz = page_get_pagesize(szc);
183 ASSERT(pgsz > PAGESIZE);
184 } else {
185 pgsz = PAGESIZE;
188 mutex_enter(&p->p_lock);
189 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
190 p->p_rctls, p);
191 mutex_exit(&p->p_lock);
194 * If p_brkbase has not yet been set, the first call
195 * to brk() will initialize it.
197 if (p->p_brkbase == 0)
198 p->p_brkbase = nva;
201 * Before multiple page size support existed p_brksize was the value
202 * not rounded to the pagesize (i.e. it stored the exact user request
203 * for heap size). If pgsz is greater than PAGESIZE calculate the
204 * heap size as the real new heap size by rounding it up to pgsz.
205 * This is useful since we may want to know where the heap ends
206 * without knowing heap pagesize (e.g. some old code) and also if
207 * heap pagesize changes we can update p_brkpageszc but delay adding
208 * new mapping yet still know from p_brksize where the heap really
209 * ends. The user requested heap end is stored in libc variable.
211 if (pgsz > PAGESIZE) {
212 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
213 size = tnva - p->p_brkbase;
214 if (tnva < p->p_brkbase || (size > p->p_brksize &&
215 size > (size_t)as_rctl)) {
216 szc = 0;
217 pgsz = PAGESIZE;
218 size = nva - p->p_brkbase;
220 } else {
221 size = nva - p->p_brkbase;
225 * use PAGESIZE to roundup ova because we want to know the real value
226 * of the current heap end in case p_brkpageszc changes since the last
227 * p_brksize was computed.
229 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
230 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
231 PAGESIZE);
233 if ((nva < p->p_brkbase) || (size > p->p_brksize &&
234 size > as_rctl)) {
235 mutex_enter(&p->p_lock);
236 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
237 RCA_SAFE);
238 mutex_exit(&p->p_lock);
239 return (ENOMEM);
242 if (nva > ova) {
243 struct segvn_crargs crargs =
244 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
246 if (!(p->p_datprot & PROT_EXEC)) {
247 crargs.prot &= ~PROT_EXEC;
251 * Add new zfod mapping to extend UNIX data segment
252 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
253 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
254 * page sizes if ova is not aligned to szc's pgsz.
256 if (szc > 0) {
257 caddr_t rbss;
259 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
260 pgsz);
261 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
262 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
263 AS_MAP_NO_LPOOB;
264 } else if (ova == rbss) {
265 crargs.szc = szc;
266 } else {
267 crargs.szc = AS_MAP_HEAP;
269 } else {
270 crargs.szc = AS_MAP_NO_LPOOB;
272 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
273 error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
274 &crargs);
275 if (error) {
276 return (error);
279 } else if (nva < ova) {
281 * Release mapping to shrink UNIX data segment.
283 (void) as_unmap(as, nva, (size_t)(ova - nva));
285 p->p_brksize = size;
286 return (0);
290 * Grow the stack to include sp. Return 1 if successful, 0 otherwise.
291 * This routine assumes that the stack grows downward.
294 grow(caddr_t sp)
296 struct proc *p = curproc;
297 struct as *as = p->p_as;
298 size_t oldsize = p->p_stksize;
299 size_t newsize;
300 int err;
303 * Serialize grow operations on an address space.
304 * This also serves as the lock protecting p_stksize
305 * and p_stkpageszc.
307 as_rangelock(as);
308 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
309 err = grow_lpg(sp);
310 } else {
311 err = grow_internal(sp, p->p_stkpageszc);
313 as_rangeunlock(as);
315 if (err == 0 && (newsize = p->p_stksize) > oldsize) {
316 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
317 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
319 * Set up translations so the process doesn't have to fault in
320 * the stack pages we just gave it.
322 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
323 newsize - oldsize, F_INVAL, S_WRITE);
325 return ((err == 0 ? 1 : 0));
329 * Algorithm: call arch-specific map_pgsz to get best page size to use,
330 * then call grow_internal().
331 * Returns 0 on success.
333 static int
334 grow_lpg(caddr_t sp)
336 struct proc *p = curproc;
337 size_t pgsz;
338 size_t len, newsize;
339 caddr_t addr, saddr;
340 caddr_t growend;
341 int oszc, szc;
342 int err;
344 newsize = p->p_usrstack - sp;
346 oszc = p->p_stkpageszc;
347 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
348 szc = page_szc(pgsz);
351 * Covers two cases:
352 * 1. page_szc() returns -1 for invalid page size, so we want to
353 * ignore it in that case.
354 * 2. By design we never decrease page size, as it is more stable.
355 * This shouldn't happen as the stack never shrinks.
357 if (szc <= oszc) {
358 err = grow_internal(sp, oszc);
359 /* failed, fall back to base page size */
360 if (err != 0 && oszc != 0) {
361 err = grow_internal(sp, 0);
363 return (err);
367 * We've grown sufficiently to switch to a new page size.
368 * So we are going to remap the whole segment with the new page size.
370 err = grow_internal(sp, szc);
371 /* The grow with szc failed, so fall back to base page size. */
372 if (err != 0) {
373 if (szc != 0) {
374 err = grow_internal(sp, 0);
376 return (err);
380 * Round up stack pointer to a large page boundary and remap
381 * any pgsz pages in the segment already faulted in beyond that
382 * point.
384 saddr = p->p_usrstack - p->p_stksize;
385 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
386 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
387 len = growend - addr;
388 /* Check that len is not negative. Update page size code for stack. */
389 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
390 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
391 p->p_stkpageszc = szc;
394 ASSERT(err == 0);
395 return (err); /* should always be 0 */
399 * This routine assumes that the stack grows downward.
400 * Returns 0 on success, errno on failure.
403 grow_internal(caddr_t sp, uint_t growszc)
405 struct proc *p = curproc;
406 size_t newsize;
407 size_t oldsize;
408 int error;
409 size_t pgsz;
410 uint_t szc;
411 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
413 ASSERT(sp < p->p_usrstack);
414 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
417 * grow to growszc alignment but use current p->p_stkpageszc for
418 * the segvn_crargs szc passed to segvn_create. For memcntl to
419 * increase the szc, this allows the new extension segment to be
420 * concatenated successfully with the existing stack segment.
422 if ((szc = growszc) != 0) {
423 pgsz = page_get_pagesize(szc);
424 ASSERT(pgsz > PAGESIZE);
425 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
426 if (newsize > (size_t)p->p_stk_ctl) {
427 szc = 0;
428 pgsz = PAGESIZE;
429 newsize = p->p_usrstack - sp;
431 } else {
432 pgsz = PAGESIZE;
433 newsize = p->p_usrstack - sp;
436 if (newsize > (size_t)p->p_stk_ctl) {
437 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
438 RCA_UNSAFE_ALL);
440 return (ENOMEM);
443 oldsize = p->p_stksize;
444 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
446 if (newsize <= oldsize) { /* prevent the stack from shrinking */
447 return (0);
450 if (!(p->p_stkprot & PROT_EXEC)) {
451 crargs.prot &= ~PROT_EXEC;
454 * extend stack with the proposed new growszc, which is different
455 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
456 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
457 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
458 * if not aligned to szc's pgsz.
460 if (szc > 0) {
461 caddr_t oldsp = p->p_usrstack - oldsize;
462 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
463 pgsz);
465 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
466 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
467 AS_MAP_NO_LPOOB;
468 } else if (oldsp == austk) {
469 crargs.szc = szc;
470 } else {
471 crargs.szc = AS_MAP_STACK;
473 } else {
474 crargs.szc = AS_MAP_NO_LPOOB;
476 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
478 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
479 segvn_create, &crargs)) != 0) {
480 if (error == EAGAIN) {
481 cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
482 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
484 return (error);
486 p->p_stksize = newsize;
487 return (0);
491 * Find address for user to map.
492 * If MAP_FIXED is not specified, we can pick any address we want, but we will
493 * first try the value in *addrp if it is non-NULL. Thus this is implementing
494 * a way to try and get a preferred address.
497 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
498 int vacalign, uint_t flags)
500 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
501 size_t lenp = len;
503 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */
504 if (flags & MAP_FIXED) {
505 (void) as_unmap(as, *addrp, len);
506 return (0);
507 } else if (basep != NULL && ((flags & MAP_ALIGN) == 0) &&
508 !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
509 /* User supplied address was available */
510 *addrp = basep;
511 } else {
513 * No user supplied address or the address supplied was not
514 * available.
516 map_addr(addrp, len, off, vacalign, flags);
518 if (*addrp == NULL)
519 return (ENOMEM);
520 return (0);
525 * Used for MAP_ANON - fast way to get anonymous pages
527 static int
528 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
529 offset_t pos)
531 struct segvn_crargs vn_a;
532 int error;
534 if (((PROT_ALL & uprot) != uprot))
535 return (EACCES);
537 if ((flags & MAP_FIXED) != 0) {
538 caddr_t userlimit;
541 * Use the user address. First verify that
542 * the address to be used is page aligned.
543 * Then make some simple bounds checks.
545 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
546 return (EINVAL);
548 userlimit = flags & _MAP_LOW32 ?
549 (caddr_t)USERLIMIT32 : as->a_userlimit;
550 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
551 case RANGE_OKAY:
552 break;
553 case RANGE_BADPROT:
554 return (ENOTSUP);
555 case RANGE_BADADDR:
556 default:
557 return (ENOMEM);
561 * No need to worry about vac alignment for anonymous
562 * pages since this is a "clone" object that doesn't
563 * yet exist.
565 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
566 if (error != 0) {
567 return (error);
571 * Use the seg_vn segment driver; passing in the NULL amp
572 * gives the desired "cloning" effect.
574 vn_a.vp = NULL;
575 vn_a.offset = 0;
576 vn_a.type = flags & MAP_TYPE;
577 vn_a.prot = uprot;
578 vn_a.maxprot = PROT_ALL;
579 vn_a.flags = flags & ~MAP_TYPE;
580 vn_a.cred = CRED();
581 vn_a.amp = NULL;
582 vn_a.szc = 0;
583 vn_a.lgrp_mem_policy_flags = 0;
585 return (as_map(as, *addrp, len, segvn_create, &vn_a));
588 static int
589 smmap_common(caddr_t *addrp, size_t len,
590 int prot, int flags, struct file *fp, offset_t pos)
592 struct vnode *vp;
593 struct as *as = curproc->p_as;
594 uint_t uprot, maxprot, type;
595 int error;
596 int in_crit = 0;
598 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | _MAP_NEW |
599 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
600 MAP_TEXT | MAP_INITDATA)) != 0) {
601 /* | MAP_RENAME */ /* not implemented, let user know */
602 return (EINVAL);
605 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
606 return (EINVAL);
609 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
610 return (EINVAL);
613 #if defined(__sparc)
615 * See if this is an "old mmap call". If so, remember this
616 * fact and convert the flags value given to mmap to indicate
617 * the specified address in the system call must be used.
618 * _MAP_NEW is turned set by all new uses of mmap.
620 if ((flags & _MAP_NEW) == 0)
621 flags |= MAP_FIXED;
622 #endif
623 flags &= ~_MAP_NEW;
625 type = flags & MAP_TYPE;
626 if (type != MAP_PRIVATE && type != MAP_SHARED)
627 return (EINVAL);
630 if (flags & MAP_ALIGN) {
632 if (flags & MAP_FIXED)
633 return (EINVAL);
635 /* alignment needs to be a power of 2 >= page size */
636 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
637 !ISP2((uintptr_t)*addrp))
638 return (EINVAL);
641 * Check for bad lengths and file position.
642 * We let the VOP_MAP routine check for negative lengths
643 * since on some vnode types this might be appropriate.
645 if (len == 0 || (pos & (u_offset_t)PAGEOFFSET) != 0)
646 return (EINVAL);
648 maxprot = PROT_ALL; /* start out allowing all accesses */
649 uprot = prot | PROT_USER;
651 if (fp == NULL) {
652 ASSERT(flags & MAP_ANON);
653 /* discard lwpchan mappings, like munmap() */
654 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
655 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
656 as_rangelock(as);
657 error = zmap(as, addrp, len, uprot, flags, pos);
658 as_rangeunlock(as);
660 * Tell machine specific code that lwp has mapped shared memory
662 if (error == 0 && (flags & MAP_SHARED)) {
663 /* EMPTY */
664 LWP_MMODEL_SHARED_AS(*addrp, len);
666 return (error);
667 } else if ((flags & MAP_ANON) != 0)
668 return (EINVAL);
670 vp = fp->f_vnode;
672 /* Can't execute code from "noexec" mounted filesystem. */
673 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
674 maxprot &= ~PROT_EXEC;
677 * These checks were added as part of large files.
679 * Return ENXIO if the initial position is negative; return EOVERFLOW
680 * if (offset + len) would overflow the maximum allowed offset for the
681 * type of file descriptor being used.
683 if (vp->v_type == VREG) {
684 if (pos < 0)
685 return (ENXIO);
686 if ((offset_t)len > (OFFSET_MAX(fp) - pos))
687 return (EOVERFLOW);
690 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
691 /* no write access allowed */
692 maxprot &= ~PROT_WRITE;
696 * XXX - Do we also adjust maxprot based on protections
697 * of the vnode? E.g. if no execute permission is given
698 * on the vnode for the current user, maxprot probably
699 * should disallow PROT_EXEC also? This is different
700 * from the write access as this would be a per vnode
701 * test as opposed to a per fd test for writability.
705 * Verify that the specified protections are not greater than
706 * the maximum allowable protections. Also test to make sure
707 * that the file descriptor does allows for read access since
708 * "write only" mappings are hard to do since normally we do
709 * the read from the file before the page can be written.
711 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
712 return (EACCES);
715 * If the user specified an address, do some simple checks here
717 if ((flags & MAP_FIXED) != 0) {
718 caddr_t userlimit;
721 * Use the user address. First verify that
722 * the address to be used is page aligned.
723 * Then make some simple bounds checks.
725 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
726 return (EINVAL);
728 userlimit = flags & _MAP_LOW32 ?
729 (caddr_t)USERLIMIT32 : as->a_userlimit;
730 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
731 case RANGE_OKAY:
732 break;
733 case RANGE_BADPROT:
734 return (ENOTSUP);
735 case RANGE_BADADDR:
736 default:
737 return (ENOMEM);
741 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
742 nbl_need_check(vp)) {
743 int svmand;
744 nbl_op_t nop;
746 nbl_start_crit(vp, RW_READER);
747 in_crit = 1;
748 error = nbl_svmand(vp, fp->f_cred, &svmand);
749 if (error != 0)
750 goto done;
751 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
752 if (prot & (PROT_READ | PROT_EXEC)) {
753 nop = NBL_READWRITE;
754 } else {
755 nop = NBL_WRITE;
757 } else {
758 nop = NBL_READ;
760 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
761 error = EACCES;
762 goto done;
766 /* discard lwpchan mappings, like munmap() */
767 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
768 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
771 * Ok, now let the vnode map routine do its thing to set things up.
773 error = VOP_MAP(vp, pos, as,
774 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
776 if (error == 0) {
778 * Tell machine specific code that lwp has mapped shared memory
780 if (flags & MAP_SHARED) {
781 /* EMPTY */
782 LWP_MMODEL_SHARED_AS(*addrp, len);
784 if (vp->v_type == VREG &&
785 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
787 * Mark this as an executable vnode
789 mutex_enter(&vp->v_lock);
790 vp->v_flag |= VVMEXEC;
791 mutex_exit(&vp->v_lock);
795 done:
796 if (in_crit)
797 nbl_end_crit(vp);
798 return (error);
801 #ifdef _LP64
803 * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
805 * The "large file" mmap routine mmap64(2) is also mapped to this routine
806 * by the 64-bit version of libc.
808 * Eventually, this should be the only version, and have smmap_common()
809 * folded back into it again. Some day.
811 caddr_t
812 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
814 struct file *fp;
815 int error;
817 if (flags & _MAP_LOW32)
818 error = EINVAL;
819 else if (fd == -1 && (flags & MAP_ANON) != 0)
820 error = smmap_common(&addr, len, prot, flags,
821 NULL, (offset_t)pos);
822 else if ((fp = getf(fd)) != NULL) {
823 error = smmap_common(&addr, len, prot, flags,
824 fp, (offset_t)pos);
825 releasef(fd);
826 } else
827 error = EBADF;
829 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
831 #endif /* _LP64 */
833 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
836 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
838 caddr_t
839 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
841 struct file *fp;
842 int error;
843 caddr_t a = (caddr_t)(uintptr_t)addr;
845 if (flags & _MAP_LOW32)
846 error = EINVAL;
847 else if (fd == -1 && (flags & MAP_ANON) != 0)
848 error = smmap_common(&a, (size_t)len, prot,
849 flags | _MAP_LOW32, NULL, (offset_t)pos);
850 else if ((fp = getf(fd)) != NULL) {
851 error = smmap_common(&a, (size_t)len, prot,
852 flags | _MAP_LOW32, fp, (offset_t)pos);
853 releasef(fd);
854 } else
855 error = EBADF;
857 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
859 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
863 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
865 * Now things really get ugly because we can't use the C-style
866 * calling convention for more than 6 args, and 64-bit parameter
867 * passing on 32-bit systems is less than clean.
870 struct mmaplf32a {
871 caddr_t addr;
872 size_t len;
873 #ifdef _LP64
875 * 32-bit contents, 64-bit cells
877 uint64_t prot;
878 uint64_t flags;
879 uint64_t fd;
880 uint64_t offhi;
881 uint64_t offlo;
882 #else
884 * 32-bit contents, 32-bit cells
886 uint32_t prot;
887 uint32_t flags;
888 uint32_t fd;
889 uint32_t offhi;
890 uint32_t offlo;
891 #endif
895 smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
897 struct file *fp;
898 int error;
899 caddr_t a = uap->addr;
900 int flags = (int)uap->flags;
901 int fd = (int)uap->fd;
902 #ifdef _BIG_ENDIAN
903 offset_t off = ((u_offset_t)uap->offhi << 32) | (u_offset_t)uap->offlo;
904 #else
905 offset_t off = ((u_offset_t)uap->offlo << 32) | (u_offset_t)uap->offhi;
906 #endif
908 if (flags & _MAP_LOW32)
909 error = EINVAL;
910 else if (fd == -1 && (flags & MAP_ANON) != 0)
911 error = smmap_common(&a, uap->len, (int)uap->prot,
912 flags | _MAP_LOW32, NULL, off);
913 else if ((fp = getf(fd)) != NULL) {
914 error = smmap_common(&a, uap->len, (int)uap->prot,
915 flags | _MAP_LOW32, fp, off);
916 releasef(fd);
917 } else
918 error = EBADF;
920 if (error == 0)
921 rvp->r_val1 = (uintptr_t)a;
922 return (error);
925 #endif /* _SYSCALL32_IMPL || _ILP32 */
928 munmap(caddr_t addr, size_t len)
930 struct proc *p = curproc;
931 struct as *as = p->p_as;
933 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
934 return (set_errno(EINVAL));
936 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
937 return (set_errno(EINVAL));
940 * Discard lwpchan mappings.
942 if (p->p_lcp != NULL)
943 lwpchan_delete_mapping(p, addr, addr + len);
944 if (as_unmap(as, addr, len) != 0)
945 return (set_errno(EINVAL));
947 return (0);
951 mprotect(caddr_t addr, size_t len, int prot)
953 struct as *as = curproc->p_as;
954 uint_t uprot = prot | PROT_USER;
955 int error;
957 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
958 return (set_errno(EINVAL));
960 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
961 case RANGE_OKAY:
962 break;
963 case RANGE_BADPROT:
964 return (set_errno(ENOTSUP));
965 case RANGE_BADADDR:
966 default:
967 return (set_errno(ENOMEM));
970 error = as_setprot(as, addr, len, uprot);
971 if (error)
972 return (set_errno(error));
973 return (0);
976 #define MC_CACHE 128 /* internal result buffer */
977 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */
980 mincore(caddr_t addr, size_t len, char *vecp)
982 struct as *as = curproc->p_as;
983 caddr_t ea; /* end address of loop */
984 size_t rl; /* inner result length */
985 char vec[MC_CACHE]; /* local vector cache */
986 int error;
987 model_t model;
988 long llen;
990 model = get_udatamodel();
992 * Validate form of address parameters.
994 if (model == DATAMODEL_NATIVE) {
995 llen = (long)len;
996 } else {
997 llen = (int32_t)(size32_t)len;
999 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1000 return (set_errno(EINVAL));
1002 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1003 return (set_errno(ENOMEM));
1006 * Loop over subranges of interval [addr : addr + len), recovering
1007 * results internally and then copying them out to caller. Subrange
1008 * is based on the size of MC_CACHE, defined above.
1010 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1011 error = as_incore(as, addr,
1012 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1013 if (rl != 0) {
1014 rl = (rl + PAGESIZE - 1) / PAGESIZE;
1015 if (copyout(vec, vecp, rl) != 0)
1016 return (set_errno(EFAULT));
1017 vecp += rl;
1019 if (error != 0)
1020 return (set_errno(ENOMEM));
1022 return (0);