4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
40 * Inter-Process Communication Shared Memory Facility.
42 * See os/ipc.c for a description of common IPC functionality.
47 * Control: zone.max-shm-ids (rc_zone_shmmni)
48 * Description: Maximum number of shared memory ids allowed a zone.
50 * When shmget() is used to allocate a shared memory segment, one id
51 * is allocated. If the id allocation doesn't succeed, shmget()
52 * fails and errno is set to ENOSPC. Upon successful shmctl(,
53 * IPC_RMID) the id is deallocated.
55 * Control: project.max-shm-ids (rc_project_shmmni)
56 * Description: Maximum number of shared memory ids allowed a project.
58 * When shmget() is used to allocate a shared memory segment, one id
59 * is allocated. If the id allocation doesn't succeed, shmget()
60 * fails and errno is set to ENOSPC. Upon successful shmctl(,
61 * IPC_RMID) the id is deallocated.
63 * Control: zone.max-shm-memory (rc_zone_shmmax)
64 * Description: Total amount of shared memory allowed a zone.
66 * When shmget() is used to allocate a shared memory segment, the
67 * segment's size is allocated against this limit. If the space
68 * allocation doesn't succeed, shmget() fails and errno is set to
69 * EINVAL. The size will be deallocated once the last process has
70 * detached the segment and the segment has been successfully
71 * shmctl(, IPC_RMID)ed.
73 * Control: project.max-shm-memory (rc_project_shmmax)
74 * Description: Total amount of shared memory allowed a project.
76 * When shmget() is used to allocate a shared memory segment, the
77 * segment's size is allocated against this limit. If the space
78 * allocation doesn't succeed, shmget() fails and errno is set to
79 * EINVAL. The size will be deallocated once the last process has
80 * detached the segment and the segment has been successfully
81 * shmctl(, IPC_RMID)ed.
84 #include <sys/types.h>
85 #include <sys/param.h>
87 #include <sys/errno.h>
92 #include <sys/systm.h>
93 #include <sys/prsystm.h>
94 #include <sys/sysmacros.h>
95 #include <sys/tuneable.h>
99 #include <sys/cmn_err.h>
100 #include <sys/debug.h>
101 #include <sys/lwpchan_impl.h>
103 #include <sys/modctl.h>
104 #include <sys/syscall.h>
105 #include <sys/task.h>
106 #include <sys/project.h>
107 #include <sys/policy.h>
108 #include <sys/zone.h>
109 #include <sys/rctl.h>
112 #include <sys/ipc_impl.h>
114 #include <sys/shm_impl.h>
119 #include <vm/seg_vn.h>
122 #include <vm/vpage.h>
123 #include <vm/seg_spt.h>
125 #include <c2/audit.h>
127 static int shmem_lock(kshmid_t
*sp
, struct anon_map
*amp
);
128 static void shmem_unlock(kshmid_t
*sp
, struct anon_map
*amp
);
129 static void sa_add(struct proc
*pp
, caddr_t addr
, size_t len
, ulong_t flags
,
131 static void shm_rm_amp(kshmid_t
*sp
);
132 static void shm_dtor(kipc_perm_t
*);
133 static void shm_rmid(kipc_perm_t
*);
134 static void shm_remove_zone(zoneid_t
, void *);
137 * Semantics for share_page_table and ism_off:
139 * These are hooks in /etc/system - only for internal testing purpose.
141 * Setting share_page_table automatically turns on the SHM_SHARE_MMU (ISM) flag
142 * in a call to shmat(2). In other words, with share_page_table set, you always
143 * get ISM, even if say, DISM is specified. It should really be called "ism_on".
145 * Setting ism_off turns off the SHM_SHARE_MMU flag from the flags passed to
148 * If both share_page_table and ism_off are set, share_page_table prevails.
150 * Although these tunables should probably be removed, they do have some
151 * external exposure; as long as they exist, they should at least work sensibly.
154 int share_page_table
;
158 * The following tunables are obsolete. Though for compatibility we
159 * still read and interpret shminfo_shmmax and shminfo_shmmni (see
160 * os/project.c), the preferred mechanism for administrating the IPC
161 * Shared Memory facility is through the resource controls described at
162 * the top of this file.
164 size_t shminfo_shmmax
= 0x800000; /* (obsolete) */
165 int shminfo_shmmni
= 100; /* (obsolete) */
166 size_t shminfo_shmmin
= 1; /* (obsolete) */
167 int shminfo_shmseg
= 6; /* (obsolete) */
169 extern rctl_hndl_t rc_zone_shmmax
;
170 extern rctl_hndl_t rc_zone_shmmni
;
171 extern rctl_hndl_t rc_project_shmmax
;
172 extern rctl_hndl_t rc_project_shmmni
;
173 static ipc_service_t
*shm_svc
;
174 static zone_key_t shm_zone_key
;
177 * Module linkage information for the kernel.
179 static uintptr_t shmsys(int, uintptr_t, uintptr_t, uintptr_t);
181 static struct sysent ipcshm_sysent
= {
183 #ifdef _SYSCALL32_IMPL
184 SE_ARGC
| SE_NOUNLOAD
| SE_64RVAL
,
185 #else /* _SYSCALL32_IMPL */
186 SE_ARGC
| SE_NOUNLOAD
| SE_32RVAL1
,
187 #endif /* _SYSCALL32_IMPL */
191 #ifdef _SYSCALL32_IMPL
192 static struct sysent ipcshm_sysent32
= {
194 SE_ARGC
| SE_NOUNLOAD
| SE_32RVAL1
,
197 #endif /* _SYSCALL32_IMPL */
199 static struct modlsys modlsys
= {
200 &mod_syscallops
, "System V shared memory", &ipcshm_sysent
203 #ifdef _SYSCALL32_IMPL
204 static struct modlsys modlsys32
= {
205 &mod_syscallops32
, "32-bit System V shared memory", &ipcshm_sysent32
207 #endif /* _SYSCALL32_IMPL */
209 static struct modlinkage modlinkage
= {
212 #ifdef _SYSCALL32_IMPL
224 shm_svc
= ipcs_create("shmids", rc_project_shmmni
, rc_zone_shmmni
,
225 sizeof (kshmid_t
), shm_dtor
, shm_rmid
, AT_IPC_SHM
,
226 offsetof(ipc_rqty_t
, ipcq_shmmni
));
227 zone_key_create(&shm_zone_key
, NULL
, shm_remove_zone
, NULL
);
229 if ((result
= mod_install(&modlinkage
)) == 0)
232 (void) zone_key_delete(shm_zone_key
);
233 ipcs_destroy(shm_svc
);
245 _info(struct modinfo
*modinfop
)
247 return (mod_info(&modlinkage
, modinfop
));
251 * Shmat (attach shared segment) system call.
254 shmat(int shmid
, caddr_t uaddr
, int uflags
, uintptr_t *rvp
)
256 kshmid_t
*sp
; /* shared memory header ptr */
259 proc_t
*pp
= curproc
;
260 struct as
*as
= pp
->p_as
;
261 struct segvn_crargs crargs
; /* segvn create arguments */
263 struct seg
*segspt
= NULL
;
264 caddr_t addr
= uaddr
;
265 int flags
= (uflags
& SHMAT_VALID_FLAGS_MASK
);
267 uchar_t prot
= PROT_ALL
;
270 if ((lock
= ipc_lookup(shm_svc
, shmid
, (kipc_perm_t
**)&sp
)) == NULL
)
272 if (error
= ipcperm_access(&sp
->shm_perm
, SHM_R
, CRED()))
274 if ((flags
& SHM_RDONLY
) == 0 &&
275 (error
= ipcperm_access(&sp
->shm_perm
, SHM_W
, CRED())))
277 if (spt_invalid(flags
)) {
282 flags
= flags
& ~SHM_SHARE_MMU
;
283 if (share_page_table
) {
284 flags
= flags
& ~SHM_PAGEABLE
;
285 flags
= flags
| SHM_SHARE_MMU
;
287 useISM
= (spt_locked(flags
) || spt_pageable(flags
));
288 if (useISM
&& (error
= ipcperm_access(&sp
->shm_perm
, SHM_W
, CRED())))
290 if (useISM
&& isspt(sp
)) {
291 uint_t newsptflags
= flags
| spt_flags(sp
->shm_sptseg
);
293 * If trying to change an existing {D}ISM segment from ISM
294 * to DISM or vice versa, return error. Note that this
295 * validation of flags needs to be done after the effect of
296 * tunables such as ism_off and share_page_table, for
297 * semantics that are consistent with the tunables' settings.
299 if (spt_invalid(newsptflags
)) {
304 ANON_LOCK_ENTER(&sp
->shm_amp
->a_rwlock
, RW_WRITER
);
305 size
= sp
->shm_amp
->size
;
306 ANON_LOCK_EXIT(&sp
->shm_amp
->a_rwlock
);
308 /* somewhere to record spt info for final detach */
309 if (sp
->shm_sptinfo
== NULL
)
310 sp
->shm_sptinfo
= kmem_zalloc(sizeof (sptinfo_t
), KM_SLEEP
);
321 uintptr_t align_hint
;
324 * Pick a share pagesize to use, if (!isspt(sp)).
325 * Otherwise use the already chosen page size.
327 * For the initial shmat (!isspt(sp)), where sptcreate is
328 * called, map_pgsz is called to recommend a [D]ISM pagesize,
329 * important for systems which offer more than one potential
331 * If the shmat is just to attach to an already created
332 * [D]ISM segment, then use the previously selected page size.
335 share_size
= map_pgsz(MAPPGSZ_ISM
, pp
, addr
, size
, 0);
336 if (share_size
== 0) {
341 share_szc
= page_szc(share_size
);
343 share_szc
= sp
->shm_sptseg
->s_szc
;
344 share_size
= page_get_pagesize(share_szc
);
346 size
= P2ROUNDUP(size
, share_size
);
348 align_hint
= share_size
;
349 #if defined(__i386) || defined(__amd64)
351 * For x86, we want to share as much of the page table tree
352 * as possible. We use a large align_hint at first, but
353 * if that fails, then the code below retries with align_hint
356 * The explicit extern here is due to the difficulties
357 * of getting to platform dependent includes. When/if the
358 * platform dependent bits of this function are cleaned up,
359 * another way of doing this should found.
362 extern uint_t ptes_per_table
;
364 while (size
>= ptes_per_table
* (uint64_t)align_hint
)
365 align_hint
*= ptes_per_table
;
367 #endif /* __i386 || __amd64 */
369 #if defined(__sparcv9)
371 pp
->p_model
== DATAMODEL_LP64
&& AS_TYPE_64BIT(as
)) {
373 * If no address has been passed in, and this is a
374 * 64-bit process, we'll try to find an address
375 * in the predict-ISM zone.
377 caddr_t predbase
= (caddr_t
)PREDISM_1T_BASE
;
378 size_t len
= PREDISM_BOUND
- PREDISM_1T_BASE
;
381 if (as_gap(as
, size
+ share_size
, &predbase
, &len
,
382 AH_LO
, NULL
) != -1) {
384 * We found an address which looks like a
385 * candidate. We want to round it up, and
386 * then check that it's a valid user range.
387 * This assures that we won't fail below.
389 addr
= (caddr_t
)P2ROUNDUP((uintptr_t)predbase
,
392 if (valid_usr_range(addr
, size
, prot
,
393 as
, as
->a_userlimit
) != RANGE_OKAY
) {
398 #endif /* __sparcv9 */
402 addr
= (caddr_t
)align_hint
;
403 map_addr(&addr
, size
, 0ll, 1, MAP_ALIGN
);
404 if (addr
!= NULL
|| align_hint
== share_size
)
406 align_hint
= share_size
;
413 ASSERT(((uintptr_t)addr
& (align_hint
- 1)) == 0);
415 /* Use the user-supplied attach address */
420 * Check that the address range
421 * 1) is properly aligned
422 * 2) is correct in unix terms
423 * 3) is within an unmapped address segment
426 len
= size
; /* use spt aligned size */
427 /* XXX - in SunOS, is sp->shm_segsz */
428 if ((uintptr_t)base
& (share_size
- 1)) {
433 result
= valid_usr_range(base
, len
, prot
, as
,
435 if (result
== RANGE_BADPROT
) {
437 * We try to accomodate processors which
438 * may not support execute permissions on
439 * all ISM segments by trying the check
440 * again but without PROT_EXEC.
443 result
= valid_usr_range(base
, len
, prot
, as
,
447 if (result
!= RANGE_OKAY
||
448 as_gap(as
, len
, &base
, &len
, AH_LO
,
457 error
= sptcreate(size
, &segspt
, sp
->shm_amp
, prot
,
463 sp
->shm_sptinfo
->sptas
= segspt
->s_as
;
464 sp
->shm_sptseg
= segspt
;
465 sp
->shm_sptprot
= prot
;
466 } else if ((prot
& sp
->shm_sptprot
) != sp
->shm_sptprot
) {
468 * Ensure we're attaching to an ISM segment with
469 * fewer or equal permissions than what we're
470 * allowed. Fail if the segment has more
471 * permissions than what we're allowed.
478 ssd
.shm_sptseg
= sp
->shm_sptseg
;
479 ssd
.shm_sptas
= sp
->shm_sptinfo
->sptas
;
480 ssd
.shm_amp
= sp
->shm_amp
;
481 error
= as_map(as
, addr
, size
, segspt_shmattach
, &ssd
);
483 sp
->shm_ismattch
++; /* keep count of ISM attaches */
489 if (flags
& SHM_RDONLY
)
493 /* Let the system pick the attach address */
494 map_addr(&addr
, size
, 0ll, 1, 0);
501 /* Use the user-supplied attach address */
506 addr
= (caddr_t
)((uintptr_t)addr
&
509 * Check that the address range
510 * 1) is properly aligned
511 * 2) is correct in unix terms
512 * 3) is within an unmapped address segment
515 len
= size
; /* use aligned size */
516 /* XXX - in SunOS, is sp->shm_segsz */
517 if ((uintptr_t)base
& PAGEOFFSET
) {
522 result
= valid_usr_range(base
, len
, prot
, as
,
524 if (result
== RANGE_BADPROT
) {
526 result
= valid_usr_range(base
, len
, prot
, as
,
530 if (result
!= RANGE_OKAY
||
531 as_gap(as
, len
, &base
, &len
,
539 /* Initialize the create arguments and map the segment */
540 crargs
= *(struct segvn_crargs
*)zfod_argsp
;
542 crargs
.type
= MAP_SHARED
;
543 crargs
.amp
= sp
->shm_amp
;
545 crargs
.maxprot
= crargs
.prot
;
548 error
= as_map(as
, addr
, size
, segvn_create
, &crargs
);
555 /* record shmem range for the detach */
556 sa_add(pp
, addr
, (size_t)size
, useISM
? SHMSA_ISM
: 0, sp
);
557 *rvp
= (uintptr_t)addr
;
559 sp
->shm_atime
= gethrestime_sec();
560 sp
->shm_lpid
= pp
->p_pid
;
561 ipc_hold(shm_svc
, (kipc_perm_t
*)sp
);
564 * Tell machine specific code that lwp has mapped shared memory
566 LWP_MMODEL_SHARED_AS(addr
, size
);
574 shm_dtor(kipc_perm_t
*perm
)
576 kshmid_t
*sp
= (kshmid_t
*)perm
;
580 ANON_LOCK_ENTER(&sp
->shm_amp
->a_rwlock
, RW_WRITER
);
581 anonmap_purge(sp
->shm_amp
);
582 ANON_LOCK_EXIT(&sp
->shm_amp
->a_rwlock
);
584 if (sp
->shm_sptinfo
) {
586 sptdestroy(sp
->shm_sptinfo
->sptas
, sp
->shm_amp
);
589 kmem_free(sp
->shm_sptinfo
, sizeof (sptinfo_t
));
592 if (sp
->shm_lkcnt
> 0) {
593 shmem_unlock(sp
, sp
->shm_amp
);
597 ANON_LOCK_ENTER(&sp
->shm_amp
->a_rwlock
, RW_WRITER
);
598 cnt
= --sp
->shm_amp
->refcnt
;
599 ANON_LOCK_EXIT(&sp
->shm_amp
->a_rwlock
);
603 if (sp
->shm_perm
.ipc_id
!= IPC_ID_INVAL
) {
604 rsize
= ptob(btopr(sp
->shm_segsz
));
606 sp
->shm_perm
.ipc_proj
->kpj_data
.kpd_shmmax
-= rsize
;
607 sp
->shm_perm
.ipc_zone_ref
.zref_zone
->zone_shmmax
-= rsize
;
608 ipcs_unlock(shm_svc
);
614 shm_rmid(kipc_perm_t
*perm
)
620 * Shmctl system call.
624 shmctl(int shmid
, int cmd
, void *arg
)
626 kshmid_t
*sp
; /* shared memory header ptr */
627 STRUCT_DECL(shmid_ds
, ds
); /* for SVR4 IPC_SET */
629 struct cred
*cr
= CRED();
631 model_t mdl
= get_udatamodel();
632 struct shmid_ds64 ds64
;
635 STRUCT_INIT(ds
, mdl
);
638 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
642 if (copyin(arg
, STRUCT_BUF(ds
), STRUCT_SIZE(ds
)))
647 if (copyin(arg
, &ds64
, sizeof (struct shmid_ds64
)))
652 return (ipc_rmid(shm_svc
, shmid
, cr
));
655 if ((lock
= ipc_lookup(shm_svc
, shmid
, (kipc_perm_t
**)&sp
)) == NULL
)
659 /* Set ownership and permissions. */
661 if (error
= ipcperm_set(shm_svc
, cr
, &sp
->shm_perm
,
662 &STRUCT_BUF(ds
)->shm_perm
, mdl
))
664 sp
->shm_ctime
= gethrestime_sec();
668 if (error
= ipcperm_access(&sp
->shm_perm
, SHM_R
, cr
))
671 nattch
= sp
->shm_perm
.ipc_ref
- 1;
673 ipcperm_stat(&STRUCT_BUF(ds
)->shm_perm
, &sp
->shm_perm
, mdl
);
674 STRUCT_FSET(ds
, shm_segsz
, sp
->shm_segsz
);
675 STRUCT_FSETP(ds
, shm_amp
, NULL
); /* kernel addr */
676 STRUCT_FSET(ds
, shm_lkcnt
, sp
->shm_lkcnt
);
677 STRUCT_FSET(ds
, shm_lpid
, sp
->shm_lpid
);
678 STRUCT_FSET(ds
, shm_cpid
, sp
->shm_cpid
);
679 STRUCT_FSET(ds
, shm_nattch
, nattch
);
680 STRUCT_FSET(ds
, shm_cnattch
, sp
->shm_ismattch
);
681 STRUCT_FSET(ds
, shm_atime
, sp
->shm_atime
);
682 STRUCT_FSET(ds
, shm_dtime
, sp
->shm_dtime
);
683 STRUCT_FSET(ds
, shm_ctime
, sp
->shm_ctime
);
686 if (copyout(STRUCT_BUF(ds
), arg
, STRUCT_SIZE(ds
)))
692 if (error
= ipcperm_set64(shm_svc
, cr
,
693 &sp
->shm_perm
, &ds64
.shmx_perm
))
695 sp
->shm_ctime
= gethrestime_sec();
699 nattch
= sp
->shm_perm
.ipc_ref
- 1;
701 ipcperm_stat64(&ds64
.shmx_perm
, &sp
->shm_perm
);
702 ds64
.shmx_segsz
= sp
->shm_segsz
;
703 ds64
.shmx_lkcnt
= sp
->shm_lkcnt
;
704 ds64
.shmx_lpid
= sp
->shm_lpid
;
705 ds64
.shmx_cpid
= sp
->shm_cpid
;
706 ds64
.shmx_nattch
= nattch
;
707 ds64
.shmx_cnattch
= sp
->shm_ismattch
;
708 ds64
.shmx_atime
= sp
->shm_atime
;
709 ds64
.shmx_dtime
= sp
->shm_dtime
;
710 ds64
.shmx_ctime
= sp
->shm_ctime
;
713 if (copyout(&ds64
, arg
, sizeof (struct shmid_ds64
)))
718 /* Lock segment in memory */
720 if ((error
= secpolicy_lock_memory(cr
)) != 0)
723 /* protect against overflow */
724 if (sp
->shm_lkcnt
>= USHRT_MAX
) {
728 if (!isspt(sp
) && (sp
->shm_lkcnt
++ == 0)) {
729 if (error
= shmem_lock(sp
, sp
->shm_amp
)) {
730 ANON_LOCK_ENTER(&sp
->shm_amp
->a_rwlock
,
732 cmn_err(CE_NOTE
, "shmctl - couldn't lock %ld"
733 " pages into memory", sp
->shm_amp
->size
);
734 ANON_LOCK_EXIT(&sp
->shm_amp
->a_rwlock
);
743 if ((error
= secpolicy_lock_memory(cr
)) != 0)
746 if (sp
->shm_lkcnt
&& (--sp
->shm_lkcnt
== 0)) {
747 shmem_unlock(sp
, sp
->shm_amp
);
760 shm_detach(proc_t
*pp
, segacct_t
*sap
)
762 kshmid_t
*sp
= sap
->sa_id
;
763 size_t len
= sap
->sa_len
;
764 caddr_t addr
= sap
->sa_addr
;
767 * Discard lwpchan mappings.
769 if (pp
->p_lcp
!= NULL
)
770 lwpchan_delete_mapping(pp
, addr
, addr
+ len
);
771 (void) as_unmap(pp
->p_as
, addr
, len
);
774 * Perform some detach-time accounting.
776 (void) ipc_lock(shm_svc
, sp
->shm_perm
.ipc_id
);
777 if (sap
->sa_flags
& SHMSA_ISM
)
779 sp
->shm_dtime
= gethrestime_sec();
780 sp
->shm_lpid
= pp
->p_pid
;
781 ipc_rele(shm_svc
, (kipc_perm_t
*)sp
); /* Drops lock */
783 kmem_free(sap
, sizeof (segacct_t
));
789 proc_t
*pp
= curproc
;
790 segacct_t
*sap
, template;
792 mutex_enter(&pp
->p_lock
);
793 prbarrier(pp
); /* block /proc. See shmgetid(). */
795 template.sa_addr
= addr
;
797 if ((pp
->p_segacct
== NULL
) ||
798 ((sap
= avl_find(pp
->p_segacct
, &template, NULL
)) == NULL
)) {
799 mutex_exit(&pp
->p_lock
);
802 if (sap
->sa_addr
!= addr
) {
803 mutex_exit(&pp
->p_lock
);
806 avl_remove(pp
->p_segacct
, sap
);
807 mutex_exit(&pp
->p_lock
);
815 * Remove all shared memory segments associated with a given zone.
816 * Called by zone_shutdown when the zone is halted.
820 shm_remove_zone(zoneid_t zoneid
, void *arg
)
822 ipc_remove_zone(shm_svc
, zoneid
);
826 * Shmget (create new shmem) system call.
829 shmget(key_t key
, size_t size
, int shmflg
, uintptr_t *rvp
)
831 proc_t
*pp
= curproc
;
837 if (error
= ipc_get(shm_svc
, key
, shmflg
, (kipc_perm_t
**)&sp
, &lock
))
840 if (!IPC_FREE(&sp
->shm_perm
)) {
842 * A segment with the requested key exists.
844 if (size
> sp
->shm_segsz
) {
850 * A new segment should be created.
852 size_t npages
= btopr(size
);
853 size_t rsize
= ptob(npages
);
856 * Check rsize and the per-project and per-zone limit on
857 * shared memory. Checking rsize handles both the size == 0
858 * case and the size < ULONG_MAX & PAGEMASK case (i.e.
859 * rounding up wraps a size_t).
862 (rctl_test(rc_project_shmmax
,
863 pp
->p_task
->tk_proj
->kpj_rctls
, pp
, rsize
,
864 RCA_SAFE
) & RCT_DENY
) ||
865 (rctl_test(rc_zone_shmmax
,
866 pp
->p_zone
->zone_rctls
, pp
, rsize
,
867 RCA_SAFE
) & RCT_DENY
)) {
869 mutex_exit(&pp
->p_lock
);
871 ipc_cleanup(shm_svc
, (kipc_perm_t
*)sp
);
874 mutex_exit(&pp
->p_lock
);
877 if (anon_resv(rsize
) == 0) {
878 ipc_cleanup(shm_svc
, (kipc_perm_t
*)sp
);
883 * If any new failure points are introduced between the
884 * the above anon_resv() and the below ipc_commit_begin(),
885 * these failure points will need to unreserve the anon
886 * reserved using anon_unresv().
888 * Once ipc_commit_begin() is called, the anon reserved
889 * above will be automatically unreserved by future calls to
890 * ipcs_cleanup() -> shm_dtor() -> shm_rm_amp(). If
891 * ipc_commit_begin() fails, it internally calls shm_dtor(),
892 * unreserving the above anon, and freeing the below amp.
895 sp
->shm_amp
= anonmap_alloc(rsize
, rsize
, ANON_SLEEP
);
896 sp
->shm_amp
->a_sp
= sp
;
898 * Store the original user's requested size, in bytes,
899 * rather than the page-aligned size. The former is
900 * used for IPC_STAT and shmget() lookups. The latter
901 * is saved in the anon_map structure and is used for
902 * calls to the vm layer.
904 sp
->shm_segsz
= size
;
905 sp
->shm_atime
= sp
->shm_dtime
= 0;
906 sp
->shm_ctime
= gethrestime_sec();
907 sp
->shm_lpid
= (pid_t
)0;
908 sp
->shm_cpid
= curproc
->p_pid
;
909 sp
->shm_ismattch
= 0;
910 sp
->shm_sptinfo
= NULL
;
912 * Check limits one last time, push id into global
913 * visibility, and update resource usage counts.
915 if (error
= ipc_commit_begin(shm_svc
, key
, shmflg
,
916 (kipc_perm_t
*)sp
)) {
922 if ((rctl_test(rc_project_shmmax
,
923 sp
->shm_perm
.ipc_proj
->kpj_rctls
, pp
, rsize
,
924 RCA_SAFE
) & RCT_DENY
) ||
925 (rctl_test(rc_zone_shmmax
,
926 sp
->shm_perm
.ipc_zone_ref
.zref_zone
->zone_rctls
, pp
, rsize
,
927 RCA_SAFE
) & RCT_DENY
)) {
928 ipc_cleanup(shm_svc
, (kipc_perm_t
*)sp
);
931 sp
->shm_perm
.ipc_proj
->kpj_data
.kpd_shmmax
+= rsize
;
932 sp
->shm_perm
.ipc_zone_ref
.zref_zone
->zone_shmmax
+= rsize
;
934 lock
= ipc_commit_end(shm_svc
, &sp
->shm_perm
);
938 audit_ipcget(AT_IPC_SHM
, (void *)sp
);
940 *rvp
= (uintptr_t)(sp
->shm_perm
.ipc_id
);
947 * shmids system call.
950 shmids(int *buf
, uint_t nids
, uint_t
*pnids
)
952 return (ipc_ids(shm_svc
, buf
, nids
, pnids
));
956 * System entry point for shmat, shmctl, shmdt, and shmget system calls.
959 shmsys(int opcode
, uintptr_t a0
, uintptr_t a1
, uintptr_t a2
)
966 error
= shmat((int)a0
, (caddr_t
)a1
, (int)a2
, &r_val
);
969 error
= shmctl((int)a0
, (int)a1
, (void *)a2
);
972 error
= shmdt((caddr_t
)a0
);
975 error
= shmget((key_t
)a0
, (size_t)a1
, (int)a2
, &r_val
);
978 error
= shmids((int *)a0
, (uint_t
)a1
, (uint_t
*)a2
);
986 return ((uintptr_t)set_errno(error
));
992 * segacct_t comparator
993 * This works as expected, with one minor change: the first of two real
994 * segments with equal addresses is considered to be 'greater than' the
995 * second. We only return equal when searching using a template, in
996 * which case we explicitly set the template segment's length to 0
997 * (which is invalid for a real segment).
1000 shm_sacompar(const void *x
, const void *y
)
1002 segacct_t
*sa1
= (segacct_t
*)x
;
1003 segacct_t
*sa2
= (segacct_t
*)y
;
1005 if (sa1
->sa_addr
< sa2
->sa_addr
) {
1007 } else if (sa2
->sa_len
!= 0) {
1008 if (sa1
->sa_addr
>= sa2
->sa_addr
+ sa2
->sa_len
) {
1010 } else if (sa1
->sa_len
!= 0) {
1015 } else if (sa1
->sa_addr
> sa2
->sa_addr
) {
1023 * add this record to the segacct list.
1026 sa_add(struct proc
*pp
, caddr_t addr
, size_t len
, ulong_t flags
, kshmid_t
*id
)
1029 avl_tree_t
*tree
= NULL
;
1032 nsap
= kmem_alloc(sizeof (segacct_t
), KM_SLEEP
);
1033 nsap
->sa_addr
= addr
;
1035 nsap
->sa_flags
= flags
;
1038 if (pp
->p_segacct
== NULL
)
1039 tree
= kmem_alloc(sizeof (avl_tree_t
), KM_SLEEP
);
1041 mutex_enter(&pp
->p_lock
);
1042 prbarrier(pp
); /* block /proc. See shmgetid(). */
1044 if (pp
->p_segacct
== NULL
) {
1045 avl_create(tree
, shm_sacompar
, sizeof (segacct_t
),
1046 offsetof(segacct_t
, sa_tree
));
1047 pp
->p_segacct
= tree
;
1049 kmem_free(tree
, sizeof (avl_tree_t
));
1053 * We can ignore the result of avl_find, as the comparator will
1054 * never return equal for segments with non-zero length. This
1055 * is a necessary hack to get around the fact that we do, in
1056 * fact, have duplicate keys.
1058 (void) avl_find(pp
->p_segacct
, nsap
, &where
);
1059 avl_insert(pp
->p_segacct
, nsap
, where
);
1061 mutex_exit(&pp
->p_lock
);
1065 * Duplicate parent's segacct records in child.
1068 shmfork(struct proc
*ppp
, struct proc
*cpp
)
1074 ASSERT(ppp
->p_segacct
!= NULL
);
1077 * We are the only lwp running in the parent so nobody can
1078 * mess with our p_segacct list. Thus it is safe to traverse
1079 * the list without holding p_lock. This is essential because
1080 * we can't hold p_lock during a KM_SLEEP allocation.
1082 for (sap
= (segacct_t
*)avl_first(ppp
->p_segacct
); sap
!= NULL
;
1083 sap
= (segacct_t
*)AVL_NEXT(ppp
->p_segacct
, sap
)) {
1084 sa_add(cpp
, sap
->sa_addr
, sap
->sa_len
, sap
->sa_flags
,
1087 mp
= ipc_lock(shm_svc
, sp
->shm_perm
.ipc_id
);
1088 if (sap
->sa_flags
& SHMSA_ISM
)
1090 ipc_hold(shm_svc
, (kipc_perm_t
*)sp
);
1096 * Detach shared memory segments from exiting process.
1099 shmexit(struct proc
*pp
)
1103 void *cookie
= NULL
;
1105 ASSERT(pp
->p_segacct
!= NULL
);
1107 mutex_enter(&pp
->p_lock
);
1109 tree
= pp
->p_segacct
;
1110 pp
->p_segacct
= NULL
;
1111 mutex_exit(&pp
->p_lock
);
1113 while ((sap
= avl_destroy_nodes(tree
, &cookie
)) != NULL
)
1114 (void) shm_detach(pp
, sap
);
1117 kmem_free(tree
, sizeof (avl_tree_t
));
1121 * At this time pages should be in memory, so just lock them.
1124 lock_again(size_t npages
, kshmid_t
*sp
, struct anon_map
*amp
)
1131 anon_sync_obj_t cookie
;
1133 mutex_enter(&sp
->shm_mlock
);
1134 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
1135 for (anon_idx
= 0; npages
!= 0; anon_idx
++, npages
--) {
1137 anon_array_enter(amp
, anon_idx
, &cookie
);
1138 ap
= anon_get_ptr(amp
->ahp
, anon_idx
);
1140 swap_xlate(ap
, &vp
, &off
);
1141 anon_array_exit(&cookie
);
1143 pp
= page_lookup(&vp
->v_object
, off
, SE_SHARED
);
1145 panic("lock_again: page not in the system");
1148 /* page should already be locked by caller */
1149 ASSERT(pp
->p_lckcnt
> 0);
1150 (void) page_pp_lock(pp
, 0, 0);
1153 ANON_LOCK_EXIT(&
->a_rwlock
);
1154 mutex_exit(&sp
->shm_mlock
);
1158 * Attach the shared memory segment to the process
1159 * address space and lock the pages.
1162 shmem_lock(kshmid_t
*sp
, struct anon_map
*amp
)
1164 size_t npages
= btopr(amp
->size
);
1166 struct segvn_crargs crargs
;
1170 * A later ISM/DISM attach may increase the size of the amp, so
1171 * cache the number of pages locked for the future shmem_unlock()
1173 sp
->shm_lkpages
= npages
;
1176 /* Initialize the create arguments and map the segment */
1177 crargs
= *(struct segvn_crargs
*)zfod_argsp
; /* structure copy */
1179 crargs
.type
= MAP_SHARED
;
1181 crargs
.prot
= PROT_ALL
;
1182 crargs
.maxprot
= crargs
.prot
;
1184 error
= as_map(as
, 0x0, amp
->size
, segvn_create
, &crargs
);
1186 if ((error
= as_ctl(as
, 0x0, amp
->size
, MC_LOCK
, 0, 0,
1188 lock_again(npages
, sp
, amp
);
1190 (void) as_unmap(as
, 0x0, amp
->size
);
1198 * Unlock shared memory
1201 shmem_unlock(kshmid_t
*sp
, struct anon_map
*amp
)
1204 pgcnt_t npages
= sp
->shm_lkpages
;
1209 size_t unlocked_bytes
= 0;
1211 anon_sync_obj_t cookie
;
1213 proj
= sp
->shm_perm
.ipc_proj
;
1214 mutex_enter(&sp
->shm_mlock
);
1215 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
1216 for (anon_idx
= 0; anon_idx
< npages
; anon_idx
++) {
1218 anon_array_enter(amp
, anon_idx
, &cookie
);
1219 if ((ap
= anon_get_ptr(amp
->ahp
, anon_idx
)) == NULL
) {
1220 panic("shmem_unlock: null app");
1223 swap_xlate(ap
, &vp
, &off
);
1224 anon_array_exit(&cookie
);
1225 pp
= page_lookup(&vp
->v_object
, off
, SE_SHARED
);
1227 panic("shmem_unlock: page not in the system");
1231 * Page should at least have once lock from previous
1234 ASSERT(pp
->p_lckcnt
> 0);
1235 page_pp_unlock(pp
, 0, 0);
1236 if (pp
->p_lckcnt
== 0)
1237 unlocked_bytes
+= PAGESIZE
;
1242 if (unlocked_bytes
> 0) {
1243 rctl_decr_locked_mem(NULL
, proj
, unlocked_bytes
, 0);
1246 ANON_LOCK_EXIT(&
->a_rwlock
);
1247 mutex_exit(&sp
->shm_mlock
);
1251 * We call this routine when we have removed all references to this
1252 * amp. This means all shmdt()s and the IPC_RMID have been done.
1255 shm_rm_amp(kshmid_t
*sp
)
1257 struct anon_map
*amp
= sp
->shm_amp
;
1260 zone
= sp
->shm_perm
.ipc_zone_ref
.zref_zone
;
1261 ASSERT(zone
!= NULL
);
1263 * Free up the anon_map.
1265 lgrp_shm_policy_fini(amp
, NULL
);
1266 ANON_LOCK_ENTER(&
->a_rwlock
, RW_WRITER
);
1267 if (amp
->a_szc
!= 0) {
1268 anon_shmap_free_pages(amp
, 0, amp
->size
);
1270 anon_free(amp
->ahp
, 0, amp
->size
);
1272 ANON_LOCK_EXIT(&
->a_rwlock
);
1273 anon_unresv_zone(amp
->swresv
, zone
);
1278 * Return the shared memory id for the process's virtual address.
1279 * Return SHMID_NONE if addr is not within a SysV shared memory segment.
1280 * Return SHMID_FREE if addr's SysV shared memory segment's id has been freed.
1282 * shmgetid() is called from code in /proc with the process locked but
1283 * with pp->p_lock not held. The address space lock is held, so we
1284 * cannot grab pp->p_lock here due to lock-ordering constraints.
1285 * Because of all this, modifications to the p_segacct list must only
1286 * be made after calling prbarrier() to ensure the process is not locked.
1287 * See shmdt() and sa_add(), above. shmgetid() may also be called on a
1288 * thread's own process without the process locked.
1291 shmgetid(proc_t
*pp
, caddr_t addr
)
1293 segacct_t
*sap
, template;
1295 ASSERT(MUTEX_NOT_HELD(&pp
->p_lock
));
1296 ASSERT((pp
->p_proc_flag
& P_PR_LOCK
) || pp
== curproc
);
1298 if (pp
->p_segacct
== NULL
)
1299 return (SHMID_NONE
);
1301 template.sa_addr
= addr
;
1302 template.sa_len
= 0;
1303 if ((sap
= avl_find(pp
->p_segacct
, &template, NULL
)) == NULL
)
1304 return (SHMID_NONE
);
1306 if (IPC_FREE(&sap
->sa_id
->shm_perm
))
1307 return (SHMID_FREE
);
1309 return (sap
->sa_id
->shm_perm
.ipc_id
);