Merge commit '7e934d3acc051b7ee3ef0d11571fd1225800a607'
[unleashed.git] / kernel / os / grow.c
blobf98edcfc5a0bbbcf614add0cab3d3f78540e6b5e
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 /* Copyright 2013 OmniTI Computer Consulting, Inc. All rights reserved. */
25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
29 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
30 /* All Rights Reserved */
32 #include <sys/types.h>
33 #include <sys/inttypes.h>
34 #include <sys/param.h>
35 #include <sys/sysmacros.h>
36 #include <sys/systm.h>
37 #include <sys/signal.h>
38 #include <sys/user.h>
39 #include <sys/errno.h>
40 #include <sys/var.h>
41 #include <sys/proc.h>
42 #include <sys/tuneable.h>
43 #include <sys/debug.h>
44 #include <sys/cmn_err.h>
45 #include <sys/cred.h>
46 #include <sys/vnode.h>
47 #include <sys/vfs.h>
48 #include <sys/vm.h>
49 #include <sys/file.h>
50 #include <sys/mman.h>
51 #include <sys/vmparam.h>
52 #include <sys/fcntl.h>
53 #include <sys/lwpchan_impl.h>
54 #include <sys/nbmlock.h>
56 #include <vm/hat.h>
57 #include <vm/as.h>
58 #include <vm/seg.h>
59 #include <vm/seg_dev.h>
60 #include <vm/seg_vn.h>
62 int use_brk_lpg = 1;
63 int use_stk_lpg = 1;
66 * If set, we will not randomize mappings where the 'addr' argument is
67 * non-NULL and not an alignment.
69 int aslr_respect_mmap_hint = 1;
71 static int brk_lpg(caddr_t nva);
72 static int grow_lpg(caddr_t sp);
74 intptr_t
75 brk(caddr_t nva)
77 int error;
78 proc_t *p = curproc;
81 * Serialize brk operations on an address space.
82 * This also serves as the lock protecting p_brksize
83 * and p_brkpageszc.
85 as_rangelock(p->p_as);
88 * As a special case to aid the implementation of sbrk(3C), if given a
89 * new brk of 0, return the current brk. We'll hide this in brk(3C).
91 if (nva == 0) {
92 intptr_t base = (intptr_t)(p->p_brkbase + p->p_brksize);
93 as_rangeunlock(p->p_as);
94 return (base);
97 if (use_brk_lpg && (p->p_flag & SAUTOLPG) != 0) {
98 error = brk_lpg(nva);
99 } else {
100 error = brk_internal(nva, p->p_brkpageszc);
102 as_rangeunlock(p->p_as);
103 return ((error != 0 ? set_errno(error) : 0));
107 * Algorithm: call arch-specific map_pgsz to get best page size to use,
108 * then call brk_internal().
109 * Returns 0 on success.
111 static int
112 brk_lpg(caddr_t nva)
114 struct proc *p = curproc;
115 size_t pgsz, len;
116 caddr_t addr, brkend;
117 caddr_t bssbase = p->p_bssbase;
118 caddr_t brkbase = p->p_brkbase;
119 int oszc, szc;
120 int err;
122 oszc = p->p_brkpageszc;
125 * If p_brkbase has not yet been set, the first call
126 * to brk_internal() will initialize it.
128 if (brkbase == 0) {
129 return (brk_internal(nva, oszc));
132 len = nva - bssbase;
134 pgsz = map_pgsz(MAPPGSZ_HEAP, p, bssbase, len, 0);
135 szc = page_szc(pgsz);
138 * Covers two cases:
139 * 1. page_szc() returns -1 for invalid page size, so we want to
140 * ignore it in that case.
141 * 2. By design we never decrease page size, as it is more stable.
143 if (szc <= oszc) {
144 err = brk_internal(nva, oszc);
145 /* If failed, back off to base page size. */
146 if (err != 0 && oszc != 0) {
147 err = brk_internal(nva, 0);
149 return (err);
152 err = brk_internal(nva, szc);
153 /* If using szc failed, map with base page size and return. */
154 if (err != 0) {
155 if (szc != 0) {
156 err = brk_internal(nva, 0);
158 return (err);
162 * Round up brk base to a large page boundary and remap
163 * anything in the segment already faulted in beyond that
164 * point.
166 addr = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase, pgsz);
167 brkend = brkbase + p->p_brksize;
168 len = brkend - addr;
169 /* Check that len is not negative. Update page size code for heap. */
170 if (addr >= p->p_bssbase && brkend > addr && IS_P2ALIGNED(len, pgsz)) {
171 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
172 p->p_brkpageszc = szc;
175 ASSERT(err == 0);
176 return (err); /* should always be 0 */
180 * Returns 0 on success.
183 brk_internal(caddr_t nva, uint_t brkszc)
185 caddr_t ova; /* current break address */
186 size_t size;
187 int error;
188 struct proc *p = curproc;
189 struct as *as = p->p_as;
190 size_t pgsz;
191 uint_t szc;
192 rctl_qty_t as_rctl;
195 * extend heap to brkszc alignment but use current p->p_brkpageszc
196 * for the newly created segment. This allows the new extension
197 * segment to be concatenated successfully with the existing brk
198 * segment.
200 if ((szc = brkszc) != 0) {
201 pgsz = page_get_pagesize(szc);
202 ASSERT(pgsz > PAGESIZE);
203 } else {
204 pgsz = PAGESIZE;
207 mutex_enter(&p->p_lock);
208 as_rctl = rctl_enforced_value(rctlproc_legacy[RLIMIT_DATA],
209 p->p_rctls, p);
210 mutex_exit(&p->p_lock);
213 * If p_brkbase has not yet been set, the first call
214 * to brk() will initialize it.
216 if (p->p_brkbase == 0)
217 p->p_brkbase = nva;
220 * Before multiple page size support existed p_brksize was the value
221 * not rounded to the pagesize (i.e. it stored the exact user request
222 * for heap size). If pgsz is greater than PAGESIZE calculate the
223 * heap size as the real new heap size by rounding it up to pgsz.
224 * This is useful since we may want to know where the heap ends
225 * without knowing heap pagesize (e.g. some old code) and also if
226 * heap pagesize changes we can update p_brkpageszc but delay adding
227 * new mapping yet still know from p_brksize where the heap really
228 * ends. The user requested heap end is stored in libc variable.
230 if (pgsz > PAGESIZE) {
231 caddr_t tnva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
232 size = tnva - p->p_brkbase;
233 if (tnva < p->p_brkbase || (size > p->p_brksize &&
234 size > (size_t)as_rctl)) {
235 szc = 0;
236 pgsz = PAGESIZE;
237 size = nva - p->p_brkbase;
239 } else {
240 size = nva - p->p_brkbase;
244 * use PAGESIZE to roundup ova because we want to know the real value
245 * of the current heap end in case p_brkpageszc changes since the last
246 * p_brksize was computed.
248 nva = (caddr_t)P2ROUNDUP((uintptr_t)nva, pgsz);
249 ova = (caddr_t)P2ROUNDUP((uintptr_t)(p->p_brkbase + p->p_brksize),
250 PAGESIZE);
252 if ((nva < p->p_brkbase) || (size > p->p_brksize &&
253 size > as_rctl)) {
254 mutex_enter(&p->p_lock);
255 (void) rctl_action(rctlproc_legacy[RLIMIT_DATA], p->p_rctls, p,
256 RCA_SAFE);
257 mutex_exit(&p->p_lock);
258 return (ENOMEM);
261 if (nva > ova) {
262 struct segvn_crargs crargs =
263 SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
265 if (!(p->p_datprot & PROT_EXEC)) {
266 crargs.prot &= ~PROT_EXEC;
270 * Add new zfod mapping to extend UNIX data segment
271 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies
272 * via map_pgszcvec(). Use AS_MAP_HEAP to get intermediate
273 * page sizes if ova is not aligned to szc's pgsz.
275 if (szc > 0) {
276 caddr_t rbss;
278 rbss = (caddr_t)P2ROUNDUP((uintptr_t)p->p_bssbase,
279 pgsz);
280 if (IS_P2ALIGNED(p->p_bssbase, pgsz) || ova > rbss) {
281 crargs.szc = p->p_brkpageszc ? p->p_brkpageszc :
282 AS_MAP_NO_LPOOB;
283 } else if (ova == rbss) {
284 crargs.szc = szc;
285 } else {
286 crargs.szc = AS_MAP_HEAP;
288 } else {
289 crargs.szc = AS_MAP_NO_LPOOB;
291 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_UP;
292 error = as_map(as, ova, (size_t)(nva - ova), segvn_create,
293 &crargs);
294 if (error) {
295 return (error);
298 } else if (nva < ova) {
300 * Release mapping to shrink UNIX data segment.
302 (void) as_unmap(as, nva, (size_t)(ova - nva));
304 p->p_brksize = size;
305 return (0);
309 * Grow the stack to include sp. Return 1 if successful, 0 otherwise.
310 * This routine assumes that the stack grows downward.
313 grow(caddr_t sp)
315 struct proc *p = curproc;
316 struct as *as = p->p_as;
317 size_t oldsize = p->p_stksize;
318 size_t newsize;
319 int err;
322 * Serialize grow operations on an address space.
323 * This also serves as the lock protecting p_stksize
324 * and p_stkpageszc.
326 as_rangelock(as);
327 if (use_stk_lpg && (p->p_flag & SAUTOLPG) != 0) {
328 err = grow_lpg(sp);
329 } else {
330 err = grow_internal(sp, p->p_stkpageszc);
332 as_rangeunlock(as);
334 if (err == 0 && (newsize = p->p_stksize) > oldsize) {
335 ASSERT(IS_P2ALIGNED(oldsize, PAGESIZE));
336 ASSERT(IS_P2ALIGNED(newsize, PAGESIZE));
338 * Set up translations so the process doesn't have to fault in
339 * the stack pages we just gave it.
341 (void) as_fault(as->a_hat, as, p->p_usrstack - newsize,
342 newsize - oldsize, F_INVAL, S_WRITE);
344 return ((err == 0 ? 1 : 0));
348 * Algorithm: call arch-specific map_pgsz to get best page size to use,
349 * then call grow_internal().
350 * Returns 0 on success.
352 static int
353 grow_lpg(caddr_t sp)
355 struct proc *p = curproc;
356 size_t pgsz;
357 size_t len, newsize;
358 caddr_t addr, saddr;
359 caddr_t growend;
360 int oszc, szc;
361 int err;
363 newsize = p->p_usrstack - sp;
365 oszc = p->p_stkpageszc;
366 pgsz = map_pgsz(MAPPGSZ_STK, p, sp, newsize, 0);
367 szc = page_szc(pgsz);
370 * Covers two cases:
371 * 1. page_szc() returns -1 for invalid page size, so we want to
372 * ignore it in that case.
373 * 2. By design we never decrease page size, as it is more stable.
374 * This shouldn't happen as the stack never shrinks.
376 if (szc <= oszc) {
377 err = grow_internal(sp, oszc);
378 /* failed, fall back to base page size */
379 if (err != 0 && oszc != 0) {
380 err = grow_internal(sp, 0);
382 return (err);
386 * We've grown sufficiently to switch to a new page size.
387 * So we are going to remap the whole segment with the new page size.
389 err = grow_internal(sp, szc);
390 /* The grow with szc failed, so fall back to base page size. */
391 if (err != 0) {
392 if (szc != 0) {
393 err = grow_internal(sp, 0);
395 return (err);
399 * Round up stack pointer to a large page boundary and remap
400 * any pgsz pages in the segment already faulted in beyond that
401 * point.
403 saddr = p->p_usrstack - p->p_stksize;
404 addr = (caddr_t)P2ROUNDUP((uintptr_t)saddr, pgsz);
405 growend = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack, pgsz);
406 len = growend - addr;
407 /* Check that len is not negative. Update page size code for stack. */
408 if (addr >= saddr && growend > addr && IS_P2ALIGNED(len, pgsz)) {
409 (void) as_setpagesize(p->p_as, addr, len, szc, B_FALSE);
410 p->p_stkpageszc = szc;
413 ASSERT(err == 0);
414 return (err); /* should always be 0 */
418 * This routine assumes that the stack grows downward.
419 * Returns 0 on success, errno on failure.
422 grow_internal(caddr_t sp, uint_t growszc)
424 struct proc *p = curproc;
425 size_t newsize;
426 size_t oldsize;
427 int error;
428 size_t pgsz;
429 uint_t szc;
430 struct segvn_crargs crargs = SEGVN_ZFOD_ARGS(PROT_ZFOD, PROT_ALL);
432 ASSERT(sp < p->p_usrstack);
433 sp = (caddr_t)P2ALIGN((uintptr_t)sp, PAGESIZE);
436 * grow to growszc alignment but use current p->p_stkpageszc for
437 * the segvn_crargs szc passed to segvn_create. For memcntl to
438 * increase the szc, this allows the new extension segment to be
439 * concatenated successfully with the existing stack segment.
441 if ((szc = growszc) != 0) {
442 pgsz = page_get_pagesize(szc);
443 ASSERT(pgsz > PAGESIZE);
444 newsize = p->p_usrstack - (caddr_t)P2ALIGN((uintptr_t)sp, pgsz);
445 if (newsize > (size_t)p->p_stk_ctl) {
446 szc = 0;
447 pgsz = PAGESIZE;
448 newsize = p->p_usrstack - sp;
450 } else {
451 pgsz = PAGESIZE;
452 newsize = p->p_usrstack - sp;
455 if (newsize > (size_t)p->p_stk_ctl) {
456 (void) rctl_action(rctlproc_legacy[RLIMIT_STACK], p->p_rctls, p,
457 RCA_UNSAFE_ALL);
459 return (ENOMEM);
462 oldsize = p->p_stksize;
463 ASSERT(P2PHASE(oldsize, PAGESIZE) == 0);
465 if (newsize <= oldsize) { /* prevent the stack from shrinking */
466 return (0);
469 if (!(p->p_stkprot & PROT_EXEC)) {
470 crargs.prot &= ~PROT_EXEC;
473 * extend stack with the proposed new growszc, which is different
474 * than p_stkpageszc only on a memcntl to increase the stack pagesize.
475 * AS_MAP_NO_LPOOB means use 0, and don't reapply OOB policies via
476 * map_pgszcvec(). Use AS_MAP_STACK to get intermediate page sizes
477 * if not aligned to szc's pgsz.
479 if (szc > 0) {
480 caddr_t oldsp = p->p_usrstack - oldsize;
481 caddr_t austk = (caddr_t)P2ALIGN((uintptr_t)p->p_usrstack,
482 pgsz);
484 if (IS_P2ALIGNED(p->p_usrstack, pgsz) || oldsp < austk) {
485 crargs.szc = p->p_stkpageszc ? p->p_stkpageszc :
486 AS_MAP_NO_LPOOB;
487 } else if (oldsp == austk) {
488 crargs.szc = szc;
489 } else {
490 crargs.szc = AS_MAP_STACK;
492 } else {
493 crargs.szc = AS_MAP_NO_LPOOB;
495 crargs.lgrp_mem_policy_flags = LGRP_MP_FLAG_EXTEND_DOWN;
497 if ((error = as_map(p->p_as, p->p_usrstack - newsize, newsize - oldsize,
498 segvn_create, &crargs)) != 0) {
499 if (error == EAGAIN) {
500 cmn_err(CE_WARN, "Sorry, no swap space to grow stack "
501 "for pid %d (%s)", p->p_pid, PTOU(p)->u_comm);
503 return (error);
505 p->p_stksize = newsize;
506 return (0);
510 * Find address for user to map. If MAP_FIXED is not specified, we can pick
511 * any address we want, but we will first try the value in *addrp if it is
512 * non-NULL and _MAP_RANDOMIZE is not set. Thus this is implementing a way to
513 * try and get a preferred address.
516 choose_addr(struct as *as, caddr_t *addrp, size_t len, offset_t off,
517 int vacalign, uint_t flags)
519 caddr_t basep = (caddr_t)(uintptr_t)((uintptr_t)*addrp & PAGEMASK);
520 size_t lenp = len;
522 ASSERT(AS_ISCLAIMGAP(as)); /* searches should be serialized */
523 if (flags & MAP_FIXED) {
524 (void) as_unmap(as, *addrp, len);
525 return (0);
526 } else if (basep != NULL &&
527 ((flags & (MAP_ALIGN | _MAP_RANDOMIZE)) == 0) &&
528 !as_gap(as, len, &basep, &lenp, 0, *addrp)) {
529 /* User supplied address was available */
530 *addrp = basep;
531 } else {
533 * No user supplied address or the address supplied was not
534 * available.
536 map_addr(addrp, len, off, vacalign, flags);
538 if (*addrp == NULL)
539 return (ENOMEM);
540 return (0);
545 * Used for MAP_ANON - fast way to get anonymous pages
547 static int
548 zmap(struct as *as, caddr_t *addrp, size_t len, uint_t uprot, int flags,
549 offset_t pos)
551 struct segvn_crargs vn_a;
552 int error;
554 if (((PROT_ALL & uprot) != uprot))
555 return (EACCES);
557 if ((flags & MAP_FIXED) != 0) {
558 caddr_t userlimit;
561 * Use the user address. First verify that
562 * the address to be used is page aligned.
563 * Then make some simple bounds checks.
565 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
566 return (EINVAL);
568 userlimit = flags & _MAP_LOW32 ?
569 (caddr_t)USERLIMIT32 : as->a_userlimit;
570 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
571 case RANGE_OKAY:
572 break;
573 case RANGE_BADPROT:
574 return (ENOTSUP);
575 case RANGE_BADADDR:
576 default:
577 return (ENOMEM);
581 * No need to worry about vac alignment for anonymous
582 * pages since this is a "clone" object that doesn't
583 * yet exist.
585 error = choose_addr(as, addrp, len, pos, ADDR_NOVACALIGN, flags);
586 if (error != 0) {
587 return (error);
591 * Use the seg_vn segment driver; passing in the NULL amp
592 * gives the desired "cloning" effect.
594 vn_a.vp = NULL;
595 vn_a.offset = 0;
596 vn_a.type = flags & MAP_TYPE;
597 vn_a.prot = uprot;
598 vn_a.maxprot = PROT_ALL;
599 vn_a.flags = flags & ~MAP_TYPE;
600 vn_a.cred = CRED();
601 vn_a.amp = NULL;
602 vn_a.szc = 0;
603 vn_a.lgrp_mem_policy_flags = 0;
605 return (as_map(as, *addrp, len, segvn_create, &vn_a));
608 #define RANDOMIZABLE_MAPPING(addr, flags) (((flags & MAP_FIXED) == 0) && \
609 !(((flags & MAP_ALIGN) == 0) && (addr != 0) && aslr_respect_mmap_hint))
611 static int
612 smmap_common(caddr_t *addrp, size_t len,
613 int prot, int flags, struct file *fp, offset_t pos)
615 struct vnode *vp;
616 struct as *as = curproc->p_as;
617 uint_t uprot, maxprot, type;
618 int error;
619 int in_crit = 0;
621 if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED |
622 _MAP_LOW32 | MAP_NORESERVE | MAP_ANON | MAP_ALIGN |
623 MAP_TEXT | MAP_INITDATA)) != 0) {
624 /* | MAP_RENAME */ /* not implemented, let user know */
625 return (EINVAL);
628 if ((flags & MAP_TEXT) && !(prot & PROT_EXEC)) {
629 return (EINVAL);
632 if ((flags & (MAP_TEXT | MAP_INITDATA)) == (MAP_TEXT | MAP_INITDATA)) {
633 return (EINVAL);
636 if ((flags & (MAP_FIXED | _MAP_RANDOMIZE)) ==
637 (MAP_FIXED | _MAP_RANDOMIZE)) {
638 return (EINVAL);
642 * If it's not a fixed allocation and mmap ASLR is enabled, randomize
643 * it.
645 if (RANDOMIZABLE_MAPPING(*addrp, flags) &&
646 secflag_enabled(curproc, PROC_SEC_ASLR))
647 flags |= _MAP_RANDOMIZE;
649 type = flags & MAP_TYPE;
650 if (type != MAP_PRIVATE && type != MAP_SHARED)
651 return (EINVAL);
654 if (flags & MAP_ALIGN) {
655 if (flags & MAP_FIXED)
656 return (EINVAL);
658 /* alignment needs to be a power of 2 >= page size */
659 if (((uintptr_t)*addrp < PAGESIZE && (uintptr_t)*addrp != 0) ||
660 !ISP2((uintptr_t)*addrp))
661 return (EINVAL);
664 * Check for bad lengths and file position.
665 * We let the fop_map routine check for negative lengths
666 * since on some vnode types this might be appropriate.
668 if (len == 0 || (pos & (uoff_t)PAGEOFFSET) != 0)
669 return (EINVAL);
671 maxprot = PROT_ALL; /* start out allowing all accesses */
672 uprot = prot | PROT_USER;
674 if (fp == NULL) {
675 ASSERT(flags & MAP_ANON);
676 /* discard lwpchan mappings, like munmap() */
677 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
678 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
679 as_rangelock(as);
680 error = zmap(as, addrp, len, uprot, flags, pos);
681 as_rangeunlock(as);
683 * Tell machine specific code that lwp has mapped shared memory
685 if (error == 0 && (flags & MAP_SHARED)) {
686 /* EMPTY */
687 LWP_MMODEL_SHARED_AS(*addrp, len);
689 return (error);
690 } else if ((flags & MAP_ANON) != 0)
691 return (EINVAL);
693 vp = fp->f_vnode;
695 /* Can't execute code from "noexec" mounted filesystem. */
696 if ((vp->v_vfsp->vfs_flag & VFS_NOEXEC) != 0)
697 maxprot &= ~PROT_EXEC;
700 * These checks were added as part of large files.
702 * Return ENXIO if the initial position is negative; return EOVERFLOW
703 * if (offset + len) would overflow the maximum allowed offset for the
704 * type of file descriptor being used.
706 if (vp->v_type == VREG) {
707 if (pos < 0)
708 return (ENXIO);
709 if ((offset_t)len > (OFFSET_MAX(fp) - pos))
710 return (EOVERFLOW);
713 if (type == MAP_SHARED && (fp->f_flag & FWRITE) == 0) {
714 /* no write access allowed */
715 maxprot &= ~PROT_WRITE;
719 * XXX - Do we also adjust maxprot based on protections
720 * of the vnode? E.g. if no execute permission is given
721 * on the vnode for the current user, maxprot probably
722 * should disallow PROT_EXEC also? This is different
723 * from the write access as this would be a per vnode
724 * test as opposed to a per fd test for writability.
728 * Verify that the specified protections are not greater than
729 * the maximum allowable protections. Also test to make sure
730 * that the file descriptor does allows for read access since
731 * "write only" mappings are hard to do since normally we do
732 * the read from the file before the page can be written.
734 if (((maxprot & uprot) != uprot) || (fp->f_flag & FREAD) == 0)
735 return (EACCES);
738 * If the user specified an address, do some simple checks here
740 if ((flags & MAP_FIXED) != 0) {
741 caddr_t userlimit;
744 * Use the user address. First verify that
745 * the address to be used is page aligned.
746 * Then make some simple bounds checks.
748 if (((uintptr_t)*addrp & PAGEOFFSET) != 0)
749 return (EINVAL);
751 userlimit = flags & _MAP_LOW32 ?
752 (caddr_t)USERLIMIT32 : as->a_userlimit;
753 switch (valid_usr_range(*addrp, len, uprot, as, userlimit)) {
754 case RANGE_OKAY:
755 break;
756 case RANGE_BADPROT:
757 return (ENOTSUP);
758 case RANGE_BADADDR:
759 default:
760 return (ENOMEM);
764 if ((prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) &&
765 nbl_need_check(vp)) {
766 int svmand;
767 nbl_op_t nop;
769 nbl_start_crit(vp, RW_READER);
770 in_crit = 1;
771 error = nbl_svmand(vp, fp->f_cred, &svmand);
772 if (error != 0)
773 goto done;
774 if ((prot & PROT_WRITE) && (type == MAP_SHARED)) {
775 if (prot & (PROT_READ | PROT_EXEC)) {
776 nop = NBL_READWRITE;
777 } else {
778 nop = NBL_WRITE;
780 } else {
781 nop = NBL_READ;
783 if (nbl_conflict(vp, nop, 0, LONG_MAX, svmand, NULL)) {
784 error = EACCES;
785 goto done;
789 /* discard lwpchan mappings, like munmap() */
790 if ((flags & MAP_FIXED) && curproc->p_lcp != NULL)
791 lwpchan_delete_mapping(curproc, *addrp, *addrp + len);
794 * Ok, now let the vnode map routine do its thing to set things up.
796 error = fop_map(vp, pos, as,
797 addrp, len, uprot, maxprot, flags, fp->f_cred, NULL);
799 if (error == 0) {
801 * Tell machine specific code that lwp has mapped shared memory
803 if (flags & MAP_SHARED) {
804 /* EMPTY */
805 LWP_MMODEL_SHARED_AS(*addrp, len);
807 if (vp->v_type == VREG &&
808 (flags & (MAP_TEXT | MAP_INITDATA)) != 0) {
810 * Mark this as an executable vnode
812 mutex_enter(&vp->v_lock);
813 vp->v_flag |= VVMEXEC;
814 mutex_exit(&vp->v_lock);
818 done:
819 if (in_crit)
820 nbl_end_crit(vp);
821 return (error);
824 #ifdef _LP64
826 * LP64 mmap(2) system call: 64-bit offset, 64-bit address.
828 * The "large file" mmap routine mmap64(2) is also mapped to this routine
829 * by the 64-bit version of libc.
831 * Eventually, this should be the only version, and have smmap_common()
832 * folded back into it again. Some day.
834 caddr_t
835 smmap64(caddr_t addr, size_t len, int prot, int flags, int fd, off_t pos)
837 struct file *fp;
838 int error;
840 if (fd == -1 && (flags & MAP_ANON) != 0)
841 error = smmap_common(&addr, len, prot, flags,
842 NULL, (offset_t)pos);
843 else if ((fp = getf(fd)) != NULL) {
844 error = smmap_common(&addr, len, prot, flags,
845 fp, (offset_t)pos);
846 releasef(fd);
847 } else
848 error = EBADF;
850 return (error ? (caddr_t)(uintptr_t)set_errno(error) : addr);
852 #endif /* _LP64 */
854 #if defined(_SYSCALL32_IMPL) || defined(_ILP32)
857 * ILP32 mmap(2) system call: 32-bit offset, 32-bit address.
859 caddr_t
860 smmap32(caddr32_t addr, size32_t len, int prot, int flags, int fd, off32_t pos)
862 struct file *fp;
863 int error;
864 caddr_t a = (caddr_t)(uintptr_t)addr;
866 if (flags & _MAP_LOW32)
867 error = EINVAL;
868 else if (fd == -1 && (flags & MAP_ANON) != 0)
869 error = smmap_common(&a, (size_t)len, prot,
870 flags | _MAP_LOW32, NULL, (offset_t)pos);
871 else if ((fp = getf(fd)) != NULL) {
872 error = smmap_common(&a, (size_t)len, prot,
873 flags | _MAP_LOW32, fp, (offset_t)pos);
874 releasef(fd);
875 } else
876 error = EBADF;
878 ASSERT(error != 0 || (uintptr_t)(a + len) < (uintptr_t)UINT32_MAX);
880 return (error ? (caddr_t)(uintptr_t)set_errno(error) : a);
884 * ILP32 mmap64(2) system call: 64-bit offset, 32-bit address.
886 * Now things really get ugly because we can't use the C-style
887 * calling convention for more than 6 args, and 64-bit parameter
888 * passing on 32-bit systems is less than clean.
891 struct mmaplf32a {
892 caddr_t addr;
893 size_t len;
894 #ifdef _LP64
896 * 32-bit contents, 64-bit cells
898 uint64_t prot;
899 uint64_t flags;
900 uint64_t fd;
901 uint64_t offhi;
902 uint64_t offlo;
903 #else
905 * 32-bit contents, 32-bit cells
907 uint32_t prot;
908 uint32_t flags;
909 uint32_t fd;
910 uint32_t offhi;
911 uint32_t offlo;
912 #endif
916 smmaplf32(struct mmaplf32a *uap, rval_t *rvp)
918 struct file *fp;
919 int error;
920 caddr_t a = uap->addr;
921 int flags = (int)uap->flags;
922 int fd = (int)uap->fd;
923 #ifdef _BIG_ENDIAN
924 offset_t off = ((uoff_t)uap->offhi << 32) | (uoff_t)uap->offlo;
925 #else
926 offset_t off = ((uoff_t)uap->offlo << 32) | (uoff_t)uap->offhi;
927 #endif
929 if (flags & _MAP_LOW32)
930 error = EINVAL;
931 else if (fd == -1 && (flags & MAP_ANON) != 0)
932 error = smmap_common(&a, uap->len, (int)uap->prot,
933 flags | _MAP_LOW32, NULL, off);
934 else if ((fp = getf(fd)) != NULL) {
935 error = smmap_common(&a, uap->len, (int)uap->prot,
936 flags | _MAP_LOW32, fp, off);
937 releasef(fd);
938 } else
939 error = EBADF;
941 if (error == 0)
942 rvp->r_val1 = (uintptr_t)a;
943 return (error);
946 #endif /* _SYSCALL32_IMPL || _ILP32 */
949 munmap(caddr_t addr, size_t len)
951 struct proc *p = curproc;
952 struct as *as = p->p_as;
954 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
955 return (set_errno(EINVAL));
957 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
958 return (set_errno(EINVAL));
961 * Discard lwpchan mappings.
963 if (p->p_lcp != NULL)
964 lwpchan_delete_mapping(p, addr, addr + len);
965 if (as_unmap(as, addr, len) != 0)
966 return (set_errno(EINVAL));
968 return (0);
972 mprotect(caddr_t addr, size_t len, int prot)
974 struct as *as = curproc->p_as;
975 uint_t uprot = prot | PROT_USER;
976 int error;
978 if (((uintptr_t)addr & PAGEOFFSET) != 0 || len == 0)
979 return (set_errno(EINVAL));
981 switch (valid_usr_range(addr, len, prot, as, as->a_userlimit)) {
982 case RANGE_OKAY:
983 break;
984 case RANGE_BADPROT:
985 return (set_errno(ENOTSUP));
986 case RANGE_BADADDR:
987 default:
988 return (set_errno(ENOMEM));
991 error = as_setprot(as, addr, len, uprot);
992 if (error)
993 return (set_errno(error));
994 return (0);
997 #define MC_CACHE 128 /* internal result buffer */
998 #define MC_QUANTUM (MC_CACHE * PAGESIZE) /* addresses covered in loop */
1001 mincore(caddr_t addr, size_t len, char *vecp)
1003 struct as *as = curproc->p_as;
1004 caddr_t ea; /* end address of loop */
1005 size_t rl; /* inner result length */
1006 char vec[MC_CACHE]; /* local vector cache */
1007 int error;
1008 model_t model;
1009 long llen;
1011 model = get_udatamodel();
1013 * Validate form of address parameters.
1015 if (model == DATAMODEL_NATIVE) {
1016 llen = (long)len;
1017 } else {
1018 llen = (int32_t)(size32_t)len;
1020 if (((uintptr_t)addr & PAGEOFFSET) != 0 || llen <= 0)
1021 return (set_errno(EINVAL));
1023 if (valid_usr_range(addr, len, 0, as, as->a_userlimit) != RANGE_OKAY)
1024 return (set_errno(ENOMEM));
1027 * Loop over subranges of interval [addr : addr + len), recovering
1028 * results internally and then copying them out to caller. Subrange
1029 * is based on the size of MC_CACHE, defined above.
1031 for (ea = addr + len; addr < ea; addr += MC_QUANTUM) {
1032 error = as_incore(as, addr,
1033 (size_t)MIN(MC_QUANTUM, ea - addr), vec, &rl);
1034 if (rl != 0) {
1035 rl = (rl + PAGESIZE - 1) / PAGESIZE;
1036 if (copyout(vec, vecp, rl) != 0)
1037 return (set_errno(EFAULT));
1038 vecp += rl;
1040 if (error != 0)
1041 return (set_errno(ENOMEM));
1043 return (0);