4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * University Copyright- Copyright (c) 1982, 1986, 1988
31 * The Regents of the University of California
34 * University Acknowledgment- Portions of this document are derived from
35 * software developed by the University of California, Berkeley, and its
40 * Inter-Process Communication Shared Memory Facility.
42 * See os/ipc.c for a description of common IPC functionality.
47 * Control: zone.max-shm-ids (rc_zone_shmmni)
48 * Description: Maximum number of shared memory ids allowed a zone.
50 * When shmget() is used to allocate a shared memory segment, one id
51 * is allocated. If the id allocation doesn't succeed, shmget()
52 * fails and errno is set to ENOSPC. Upon successful shmctl(,
53 * IPC_RMID) the id is deallocated.
55 * Control: project.max-shm-ids (rc_project_shmmni)
56 * Description: Maximum number of shared memory ids allowed a project.
58 * When shmget() is used to allocate a shared memory segment, one id
59 * is allocated. If the id allocation doesn't succeed, shmget()
60 * fails and errno is set to ENOSPC. Upon successful shmctl(,
61 * IPC_RMID) the id is deallocated.
63 * Control: zone.max-shm-memory (rc_zone_shmmax)
64 * Description: Total amount of shared memory allowed a zone.
66 * When shmget() is used to allocate a shared memory segment, the
67 * segment's size is allocated against this limit. If the space
68 * allocation doesn't succeed, shmget() fails and errno is set to
69 * EINVAL. The size will be deallocated once the last process has
70 * detached the segment and the segment has been successfully
71 * shmctl(, IPC_RMID)ed.
73 * Control: project.max-shm-memory (rc_project_shmmax)
74 * Description: Total amount of shared memory allowed a project.
76 * When shmget() is used to allocate a shared memory segment, the
77 * segment's size is allocated against this limit. If the space
78 * allocation doesn't succeed, shmget() fails and errno is set to
79 * EINVAL. The size will be deallocated once the last process has
80 * detached the segment and the segment has been successfully
81 * shmctl(, IPC_RMID)ed.
84 #include <sys/types.h>
85 #include <sys/param.h>
87 #include <sys/errno.h>
92 #include <sys/systm.h>
93 #include <sys/prsystm.h>
94 #include <sys/sysmacros.h>
95 #include <sys/tuneable.h>
99 #include <sys/cmn_err.h>
100 #include <sys/debug.h>
101 #include <sys/lwpchan_impl.h>
103 #include <sys/modctl.h>
104 #include <sys/syscall.h>
105 #include <sys/task.h>
106 #include <sys/project.h>
107 #include <sys/policy.h>
108 #include <sys/zone.h>
109 #include <sys/rctl.h>
112 #include <sys/ipc_impl.h>
114 #include <sys/shm_impl.h>
119 #include <vm/seg_vn.h>
122 #include <vm/vpage.h>
123 #include <vm/seg_spt.h>
125 #include <c2/audit.h>
127 static int shmem_lock(kshmid_t
*sp
, struct anon_map
*amp
);
128 static void shmem_unlock(kshmid_t
*sp
, struct anon_map
*amp
);
129 static void sa_add(struct proc
*pp
, caddr_t addr
, size_t len
, ulong_t flags
,
131 static void shm_rm_amp(kshmid_t
*sp
);
132 static void shm_dtor(kipc_perm_t
*);
133 static void shm_rmid(kipc_perm_t
*);
134 static void shm_remove_zone(zoneid_t
, void *);
137 * Semantics for share_page_table and ism_off:
139 * These are hooks in /etc/system - only for internal testing purpose.
141 * Setting share_page_table automatically turns on the SHM_SHARE_MMU (ISM) flag
142 * in a call to shmat(2). In other words, with share_page_table set, you always
143 * get ISM, even if say, DISM is specified. It should really be called "ism_on".
145 * Setting ism_off turns off the SHM_SHARE_MMU flag from the flags passed to
148 * If both share_page_table and ism_off are set, share_page_table prevails.
150 * Although these tunables should probably be removed, they do have some
151 * external exposure; as long as they exist, they should at least work sensibly.
154 int share_page_table
;
158 * The following tunables are obsolete. Though for compatibility we
159 * still read and interpret shminfo_shmmax and shminfo_shmmni (see
160 * os/project.c), the preferred mechanism for administrating the IPC
161 * Shared Memory facility is through the resource controls described at
162 * the top of this file.
164 size_t shminfo_shmmax
= 0x800000; /* (obsolete) */
165 int shminfo_shmmni
= 100; /* (obsolete) */
166 size_t shminfo_shmmin
= 1; /* (obsolete) */
167 int shminfo_shmseg
= 6; /* (obsolete) */
169 extern rctl_hndl_t rc_zone_shmmax
;
170 extern rctl_hndl_t rc_zone_shmmni
;
171 extern rctl_hndl_t rc_project_shmmax
;
172 extern rctl_hndl_t rc_project_shmmni
;
173 static ipc_service_t
*shm_svc
;
174 static zone_key_t shm_zone_key
;
177 * Module linkage information for the kernel.
179 static uintptr_t shmsys(int, uintptr_t, uintptr_t, uintptr_t);
181 static struct sysent ipcshm_sysent
= {
183 #ifdef _SYSCALL32_IMPL
184 SE_ARGC
| SE_NOUNLOAD
| SE_64RVAL
,
185 #else /* _SYSCALL32_IMPL */
186 SE_ARGC
| SE_NOUNLOAD
| SE_32RVAL1
,
187 #endif /* _SYSCALL32_IMPL */
191 #ifdef _SYSCALL32_IMPL
192 static struct sysent ipcshm_sysent32
= {
194 SE_ARGC
| SE_NOUNLOAD
| SE_32RVAL1
,
197 #endif /* _SYSCALL32_IMPL */
199 static struct modlsys modlsys
= {
200 &mod_syscallops
, "System V shared memory", &ipcshm_sysent
203 #ifdef _SYSCALL32_IMPL
204 static struct modlsys modlsys32
= {
205 &mod_syscallops32
, "32-bit System V shared memory", &ipcshm_sysent32
207 #endif /* _SYSCALL32_IMPL */
209 static struct modlinkage modlinkage
= {
212 #ifdef _SYSCALL32_IMPL
224 shm_svc
= ipcs_create("shmids", rc_project_shmmni
, rc_zone_shmmni
,
225 sizeof (kshmid_t
), shm_dtor
, shm_rmid
, AT_IPC_SHM
,
226 offsetof(ipc_rqty_t
, ipcq_shmmni
));
227 zone_key_create(&shm_zone_key
, NULL
, shm_remove_zone
, NULL
);
229 if ((result
= mod_install(&modlinkage
)) == 0)
232 (void) zone_key_delete(shm_zone_key
);
233 ipcs_destroy(shm_svc
);
245 _info(struct modinfo
*modinfop
)
247 return (mod_info(&modlinkage
, modinfop
));
251 * Shmat (attach shared segment) system call.
254 shmat(int shmid
, caddr_t uaddr
, int uflags
, uintptr_t *rvp
)
256 kshmid_t
*sp
; /* shared memory header ptr */
259 proc_t
*pp
= curproc
;
260 struct as
*as
= pp
->p_as
;
261 struct segvn_crargs crargs
; /* segvn create arguments */
263 struct seg
*segspt
= NULL
;
264 caddr_t addr
= uaddr
;
265 int flags
= (uflags
& SHMAT_VALID_FLAGS_MASK
);
267 uchar_t prot
= PROT_ALL
;
270 if ((lock
= ipc_lookup(shm_svc
, shmid
, (kipc_perm_t
**)&sp
)) == NULL
)
272 if (error
= ipcperm_access(&sp
->shm_perm
, SHM_R
, CRED()))
274 if ((flags
& SHM_RDONLY
) == 0 &&
275 (error
= ipcperm_access(&sp
->shm_perm
, SHM_W
, CRED())))
277 if (spt_invalid(flags
)) {
282 flags
= flags
& ~SHM_SHARE_MMU
;
283 if (share_page_table
) {
284 flags
= flags
& ~SHM_PAGEABLE
;
285 flags
= flags
| SHM_SHARE_MMU
;
287 useISM
= (spt_locked(flags
) || spt_pageable(flags
));
288 if (useISM
&& (error
= ipcperm_access(&sp
->shm_perm
, SHM_W
, CRED())))
290 if (useISM
&& isspt(sp
)) {
291 uint_t newsptflags
= flags
| spt_flags(sp
->shm_sptseg
);
293 * If trying to change an existing {D}ISM segment from ISM
294 * to DISM or vice versa, return error. Note that this
295 * validation of flags needs to be done after the effect of
296 * tunables such as ism_off and share_page_table, for
297 * semantics that are consistent with the tunables' settings.
299 if (spt_invalid(newsptflags
)) {
304 ANON_LOCK_ENTER(&sp
->shm_amp
->a_rwlock
, RW_WRITER
);
305 size
= sp
->shm_amp
->size
;
306 ANON_LOCK_EXIT(&sp
->shm_amp
->a_rwlock
);
308 /* somewhere to record spt info for final detach */
309 if (sp
->shm_sptinfo
== NULL
)
310 sp
->shm_sptinfo
= kmem_zalloc(sizeof (sptinfo_t
), KM_SLEEP
);
321 uintptr_t align_hint
;
324 * Pick a share pagesize to use, if (!isspt(sp)).
325 * Otherwise use the already chosen page size.
327 * For the initial shmat (!isspt(sp)), where sptcreate is
328 * called, map_pgsz is called to recommend a [D]ISM pagesize,
329 * important for systems which offer more than one potential
331 * If the shmat is just to attach to an already created
332 * [D]ISM segment, then use the previously selected page size.
335 share_size
= map_pgsz(MAPPGSZ_ISM
, pp
, addr
, size
, 0);
336 if (share_size
== 0) {
341 share_szc
= page_szc(share_size
);
343 share_szc
= sp
->shm_sptseg
->s_szc
;
344 share_size
= page_get_pagesize(share_szc
);
346 size
= P2ROUNDUP(size
, share_size
);
348 align_hint
= share_size
;
349 #if defined(__i386) || defined(__amd64)
351 * For x86, we want to share as much of the page table tree
352 * as possible. We use a large align_hint at first, but
353 * if that fails, then the code below retries with align_hint
356 * The explicit extern here is due to the difficulties
357 * of getting to platform dependent includes. When/if the
358 * platform dependent bits of this function are cleaned up,
359 * another way of doing this should found.
362 extern uint_t ptes_per_table
;
364 while (size
>= ptes_per_table
* (uint64_t)align_hint
)
365 align_hint
*= ptes_per_table
;
367 #endif /* __i386 || __amd64 */
372 addr
= (caddr_t
)align_hint
;
373 map_addr(&addr
, size
, 0ll, 1, MAP_ALIGN
);
374 if (addr
!= NULL
|| align_hint
== share_size
)
376 align_hint
= share_size
;
383 ASSERT(((uintptr_t)addr
& (align_hint
- 1)) == 0);
385 /* Use the user-supplied attach address */
390 * Check that the address range
391 * 1) is properly aligned
392 * 2) is correct in unix terms
393 * 3) is within an unmapped address segment
396 len
= size
; /* use spt aligned size */
397 /* XXX - in SunOS, is sp->shm_segsz */
398 if ((uintptr_t)base
& (share_size
- 1)) {
403 result
= valid_usr_range(base
, len
, prot
, as
,
405 if (result
== RANGE_BADPROT
) {
407 * We try to accomodate processors which
408 * may not support execute permissions on
409 * all ISM segments by trying the check
410 * again but without PROT_EXEC.
413 result
= valid_usr_range(base
, len
, prot
, as
,
417 if (result
!= RANGE_OKAY
||
418 as_gap(as
, len
, &base
, &len
, AH_LO
,
427 error
= sptcreate(size
, &segspt
, sp
->shm_amp
, prot
,
433 sp
->shm_sptinfo
->sptas
= segspt
->s_as
;
434 sp
->shm_sptseg
= segspt
;
435 sp
->shm_sptprot
= prot
;
436 } else if ((prot
& sp
->shm_sptprot
) != sp
->shm_sptprot
) {
438 * Ensure we're attaching to an ISM segment with
439 * fewer or equal permissions than what we're
440 * allowed. Fail if the segment has more
441 * permissions than what we're allowed.
448 ssd
.shm_sptseg
= sp
->shm_sptseg
;
449 ssd
.shm_sptas
= sp
->shm_sptinfo
->sptas
;
450 ssd
.shm_amp
= sp
->shm_amp
;
451 error
= as_map(as
, addr
, size
, segspt_shmattach
, &ssd
);
453 sp
->shm_ismattch
++; /* keep count of ISM attaches */
459 if (flags
& SHM_RDONLY
)
463 /* Let the system pick the attach address */
464 map_addr(&addr
, size
, 0ll, 1, 0);
471 /* Use the user-supplied attach address */
476 addr
= (caddr_t
)((uintptr_t)addr
&
479 * Check that the address range
480 * 1) is properly aligned
481 * 2) is correct in unix terms
482 * 3) is within an unmapped address segment
485 len
= size
; /* use aligned size */
486 /* XXX - in SunOS, is sp->shm_segsz */
487 if ((uintptr_t)base
& PAGEOFFSET
) {
492 result
= valid_usr_range(base
, len
, prot
, as
,
494 if (result
== RANGE_BADPROT
) {
496 result
= valid_usr_range(base
, len
, prot
, as
,
500 if (result
!= RANGE_OKAY
||
501 as_gap(as
, len
, &base
, &len
,
509 /* Initialize the create arguments and map the segment */
510 crargs
= *(struct segvn_crargs
*)zfod_argsp
;
512 crargs
.type
= MAP_SHARED
;
513 crargs
.amp
= sp
->shm_amp
;
515 crargs
.maxprot
= crargs
.prot
;
518 error
= as_map(as
, addr
, size
, segvn_create
, &crargs
);
525 /* record shmem range for the detach */
526 sa_add(pp
, addr
, (size_t)size
, useISM
? SHMSA_ISM
: 0, sp
);
527 *rvp
= (uintptr_t)addr
;
529 sp
->shm_atime
= gethrestime_sec();
530 sp
->shm_lpid
= pp
->p_pid
;
531 ipc_hold(shm_svc
, (kipc_perm_t
*)sp
);
534 * Tell machine specific code that lwp has mapped shared memory
536 LWP_MMODEL_SHARED_AS(addr
, size
);
544 shm_dtor(kipc_perm_t
*perm
)
546 kshmid_t
*sp
= (kshmid_t
*)perm
;
550 ANON_LOCK_ENTER(&sp
->shm_amp
->a_rwlock
, RW_WRITER
);
551 anonmap_purge(sp
->shm_amp
);
552 ANON_LOCK_EXIT(&sp
->shm_amp
->a_rwlock
);
554 if (sp
->shm_sptinfo
) {
556 sptdestroy(sp
->shm_sptinfo
->sptas
, sp
->shm_amp
);
559 kmem_free(sp
->shm_sptinfo
, sizeof (sptinfo_t
));
562 if (sp
->shm_lkcnt
> 0) {
563 shmem_unlock(sp
, sp
->shm_amp
);
567 ANON_LOCK_ENTER(&sp
->shm_amp
->a_rwlock
, RW_WRITER
);
568 cnt
= --sp
->shm_amp
->refcnt
;
569 ANON_LOCK_EXIT(&sp
->shm_amp
->a_rwlock
);
573 if (sp
->shm_perm
.ipc_id
!= IPC_ID_INVAL
) {
574 rsize
= ptob(btopr(sp
->shm_segsz
));
576 sp
->shm_perm
.ipc_proj
->kpj_data
.kpd_shmmax
-= rsize
;
577 sp
->shm_perm
.ipc_zone_ref
.zref_zone
->zone_shmmax
-= rsize
;
578 ipcs_unlock(shm_svc
);
584 shm_rmid(kipc_perm_t
*perm
)
590 * Shmctl system call.
594 shmctl(int shmid
, int cmd
, void *arg
)
596 kshmid_t
*sp
; /* shared memory header ptr */
597 STRUCT_DECL(shmid_ds
, ds
); /* for SVR4 IPC_SET */
599 struct cred
*cr
= CRED();
601 model_t mdl
= get_udatamodel();
602 struct shmid_ds64 ds64
;
605 STRUCT_INIT(ds
, mdl
);
608 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
612 if (copyin(arg
, STRUCT_BUF(ds
), STRUCT_SIZE(ds
)))
617 if (copyin(arg
, &ds64
, sizeof (struct shmid_ds64
)))
622 return (ipc_rmid(shm_svc
, shmid
, cr
));
625 if ((lock
= ipc_lookup(shm_svc
, shmid
, (kipc_perm_t
**)&sp
)) == NULL
)
629 /* Set ownership and permissions. */
631 if (error
= ipcperm_set(shm_svc
, cr
, &sp
->shm_perm
,
632 &STRUCT_BUF(ds
)->shm_perm
, mdl
))
634 sp
->shm_ctime
= gethrestime_sec();
638 if (error
= ipcperm_access(&sp
->shm_perm
, SHM_R
, cr
))
641 nattch
= sp
->shm_perm
.ipc_ref
- 1;
643 ipcperm_stat(&STRUCT_BUF(ds
)->shm_perm
, &sp
->shm_perm
, mdl
);
644 STRUCT_FSET(ds
, shm_segsz
, sp
->shm_segsz
);
645 STRUCT_FSETP(ds
, shm_amp
, NULL
); /* kernel addr */
646 STRUCT_FSET(ds
, shm_lkcnt
, sp
->shm_lkcnt
);
647 STRUCT_FSET(ds
, shm_lpid
, sp
->shm_lpid
);
648 STRUCT_FSET(ds
, shm_cpid
, sp
->shm_cpid
);
649 STRUCT_FSET(ds
, shm_nattch
, nattch
);
650 STRUCT_FSET(ds
, shm_cnattch
, sp
->shm_ismattch
);
651 STRUCT_FSET(ds
, shm_atime
, sp
->shm_atime
);
652 STRUCT_FSET(ds
, shm_dtime
, sp
->shm_dtime
);
653 STRUCT_FSET(ds
, shm_ctime
, sp
->shm_ctime
);
656 if (copyout(STRUCT_BUF(ds
), arg
, STRUCT_SIZE(ds
)))
662 if (error
= ipcperm_set64(shm_svc
, cr
,
663 &sp
->shm_perm
, &ds64
.shmx_perm
))
665 sp
->shm_ctime
= gethrestime_sec();
669 nattch
= sp
->shm_perm
.ipc_ref
- 1;
671 ipcperm_stat64(&ds64
.shmx_perm
, &sp
->shm_perm
);
672 ds64
.shmx_segsz
= sp
->shm_segsz
;
673 ds64
.shmx_lkcnt
= sp
->shm_lkcnt
;
674 ds64
.shmx_lpid
= sp
->shm_lpid
;
675 ds64
.shmx_cpid
= sp
->shm_cpid
;
676 ds64
.shmx_nattch
= nattch
;
677 ds64
.shmx_cnattch
= sp
->shm_ismattch
;
678 ds64
.shmx_atime
= sp
->shm_atime
;
679 ds64
.shmx_dtime
= sp
->shm_dtime
;
680 ds64
.shmx_ctime
= sp
->shm_ctime
;
683 if (copyout(&ds64
, arg
, sizeof (struct shmid_ds64
)))
688 /* Lock segment in memory */
690 if ((error
= secpolicy_lock_memory(cr
)) != 0)
693 /* protect against overflow */
694 if (sp
->shm_lkcnt
>= USHRT_MAX
) {
698 if (!isspt(sp
) && (sp
->shm_lkcnt
++ == 0)) {
699 if (error
= shmem_lock(sp
, sp
->shm_amp
)) {
700 ANON_LOCK_ENTER(&sp
->shm_amp
->a_rwlock
,
702 cmn_err(CE_NOTE
, "shmctl - couldn't lock %ld"
703 " pages into memory", sp
->shm_amp
->size
);
704 ANON_LOCK_EXIT(&sp
->shm_amp
->a_rwlock
);
713 if ((error
= secpolicy_lock_memory(cr
)) != 0)
716 if (sp
->shm_lkcnt
&& (--sp
->shm_lkcnt
== 0)) {
717 shmem_unlock(sp
, sp
->shm_amp
);
730 shm_detach(proc_t
*pp
, segacct_t
*sap
)
732 kshmid_t
*sp
= sap
->sa_id
;
733 size_t len
= sap
->sa_len
;
734 caddr_t addr
= sap
->sa_addr
;
737 * Discard lwpchan mappings.
739 if (pp
->p_lcp
!= NULL
)
740 lwpchan_delete_mapping(pp
, addr
, addr
+ len
);
741 (void) as_unmap(pp
->p_as
, addr
, len
);
744 * Perform some detach-time accounting.
746 (void) ipc_lock(shm_svc
, sp
->shm_perm
.ipc_id
);
747 if (sap
->sa_flags
& SHMSA_ISM
)
749 sp
->shm_dtime
= gethrestime_sec();
750 sp
->shm_lpid
= pp
->p_pid
;
751 ipc_rele(shm_svc
, (kipc_perm_t
*)sp
); /* Drops lock */
753 kmem_free(sap
, sizeof (segacct_t
));
759 proc_t
*pp
= curproc
;
760 segacct_t
*sap
, template;
762 mutex_enter(&pp
->p_lock
);
763 prbarrier(pp
); /* block /proc. See shmgetid(). */
765 template.sa_addr
= addr
;
767 if ((pp
->p_segacct
== NULL
) ||
768 ((sap
= avl_find(pp
->p_segacct
, &template, NULL
)) == NULL
)) {
769 mutex_exit(&pp
->p_lock
);
772 if (sap
->sa_addr
!= addr
) {
773 mutex_exit(&pp
->p_lock
);
776 avl_remove(pp
->p_segacct
, sap
);
777 mutex_exit(&pp
->p_lock
);
785 * Remove all shared memory segments associated with a given zone.
786 * Called by zone_shutdown when the zone is halted.
790 shm_remove_zone(zoneid_t zoneid
, void *arg
)
792 ipc_remove_zone(shm_svc
, zoneid
);
796 * Shmget (create new shmem) system call.
799 shmget(key_t key
, size_t size
, int shmflg
, uintptr_t *rvp
)
801 proc_t
*pp
= curproc
;
807 if (error
= ipc_get(shm_svc
, key
, shmflg
, (kipc_perm_t
**)&sp
, &lock
))
810 if (!IPC_FREE(&sp
->shm_perm
)) {
812 * A segment with the requested key exists.
814 if (size
> sp
->shm_segsz
) {
820 * A new segment should be created.
822 size_t npages
= btopr(size
);
823 size_t rsize
= ptob(npages
);
826 * Check rsize and the per-project and per-zone limit on
827 * shared memory. Checking rsize handles both the size == 0
828 * case and the size < ULONG_MAX & PAGEMASK case (i.e.
829 * rounding up wraps a size_t).
832 (rctl_test(rc_project_shmmax
,
833 pp
->p_task
->tk_proj
->kpj_rctls
, pp
, rsize
,
834 RCA_SAFE
) & RCT_DENY
) ||
835 (rctl_test(rc_zone_shmmax
,
836 pp
->p_zone
->zone_rctls
, pp
, rsize
,
837 RCA_SAFE
) & RCT_DENY
)) {
839 mutex_exit(&pp
->p_lock
);
841 ipc_cleanup(shm_svc
, (kipc_perm_t
*)sp
);
844 mutex_exit(&pp
->p_lock
);
847 if (anon_resv(rsize
) == 0) {
848 ipc_cleanup(shm_svc
, (kipc_perm_t
*)sp
);
853 * If any new failure points are introduced between the
854 * the above anon_resv() and the below ipc_commit_begin(),
855 * these failure points will need to unreserve the anon
856 * reserved using anon_unresv().
858 * Once ipc_commit_begin() is called, the anon reserved
859 * above will be automatically unreserved by future calls to
860 * ipcs_cleanup() -> shm_dtor() -> shm_rm_amp(). If
861 * ipc_commit_begin() fails, it internally calls shm_dtor(),
862 * unreserving the above anon, and freeing the below amp.
865 sp
->shm_amp
= anonmap_alloc(rsize
, rsize
, ANON_SLEEP
);
866 sp
->shm_amp
->a_sp
= sp
;
868 * Store the original user's requested size, in bytes,
869 * rather than the page-aligned size. The former is
870 * used for IPC_STAT and shmget() lookups. The latter
871 * is saved in the anon_map structure and is used for
872 * calls to the vm layer.
874 sp
->shm_segsz
= size
;
875 sp
->shm_atime
= sp
->shm_dtime
= 0;
876 sp
->shm_ctime
= gethrestime_sec();
877 sp
->shm_lpid
= (pid_t
)0;
878 sp
->shm_cpid
= curproc
->p_pid
;
879 sp
->shm_ismattch
= 0;
880 sp
->shm_sptinfo
= NULL
;
882 * Check limits one last time, push id into global
883 * visibility, and update resource usage counts.
885 if (error
= ipc_commit_begin(shm_svc
, key
, shmflg
,
886 (kipc_perm_t
*)sp
)) {
892 if ((rctl_test(rc_project_shmmax
,
893 sp
->shm_perm
.ipc_proj
->kpj_rctls
, pp
, rsize
,
894 RCA_SAFE
) & RCT_DENY
) ||
895 (rctl_test(rc_zone_shmmax
,
896 sp
->shm_perm
.ipc_zone_ref
.zref_zone
->zone_rctls
, pp
, rsize
,
897 RCA_SAFE
) & RCT_DENY
)) {
898 ipc_cleanup(shm_svc
, (kipc_perm_t
*)sp
);
901 sp
->shm_perm
.ipc_proj
->kpj_data
.kpd_shmmax
+= rsize
;
902 sp
->shm_perm
.ipc_zone_ref
.zref_zone
->zone_shmmax
+= rsize
;
904 lock
= ipc_commit_end(shm_svc
, &sp
->shm_perm
);
908 audit_ipcget(AT_IPC_SHM
, (void *)sp
);
910 *rvp
= (uintptr_t)(sp
->shm_perm
.ipc_id
);
917 * shmids system call.
920 shmids(int *buf
, uint_t nids
, uint_t
*pnids
)
922 return (ipc_ids(shm_svc
, buf
, nids
, pnids
));
926 * System entry point for shmat, shmctl, shmdt, and shmget system calls.
929 shmsys(int opcode
, uintptr_t a0
, uintptr_t a1
, uintptr_t a2
)
936 error
= shmat((int)a0
, (caddr_t
)a1
, (int)a2
, &r_val
);
939 error
= shmctl((int)a0
, (int)a1
, (void *)a2
);
942 error
= shmdt((caddr_t
)a0
);
945 error
= shmget((key_t
)a0
, (size_t)a1
, (int)a2
, &r_val
);
948 error
= shmids((int *)a0
, (uint_t
)a1
, (uint_t
*)a2
);
956 return ((uintptr_t)set_errno(error
));
962 * segacct_t comparator
963 * This works as expected, with one minor change: the first of two real
964 * segments with equal addresses is considered to be 'greater than' the
965 * second. We only return equal when searching using a template, in
966 * which case we explicitly set the template segment's length to 0
967 * (which is invalid for a real segment).
970 shm_sacompar(const void *x
, const void *y
)
972 segacct_t
*sa1
= (segacct_t
*)x
;
973 segacct_t
*sa2
= (segacct_t
*)y
;
975 if (sa1
->sa_addr
< sa2
->sa_addr
) {
977 } else if (sa2
->sa_len
!= 0) {
978 if (sa1
->sa_addr
>= sa2
->sa_addr
+ sa2
->sa_len
) {
980 } else if (sa1
->sa_len
!= 0) {
985 } else if (sa1
->sa_addr
> sa2
->sa_addr
) {
993 * add this record to the segacct list.
996 sa_add(struct proc
*pp
, caddr_t addr
, size_t len
, ulong_t flags
, kshmid_t
*id
)
999 avl_tree_t
*tree
= NULL
;
1002 nsap
= kmem_alloc(sizeof (segacct_t
), KM_SLEEP
);
1003 nsap
->sa_addr
= addr
;
1005 nsap
->sa_flags
= flags
;
1008 if (pp
->p_segacct
== NULL
)
1009 tree
= kmem_alloc(sizeof (avl_tree_t
), KM_SLEEP
);
1011 mutex_enter(&pp
->p_lock
);
1012 prbarrier(pp
); /* block /proc. See shmgetid(). */
1014 if (pp
->p_segacct
== NULL
) {
1015 avl_create(tree
, shm_sacompar
, sizeof (segacct_t
),
1016 offsetof(segacct_t
, sa_tree
));
1017 pp
->p_segacct
= tree
;
1019 kmem_free(tree
, sizeof (avl_tree_t
));
1023 * We can ignore the result of avl_find, as the comparator will
1024 * never return equal for segments with non-zero length. This
1025 * is a necessary hack to get around the fact that we do, in
1026 * fact, have duplicate keys.
1028 (void) avl_find(pp
->p_segacct
, nsap
, &where
);
1029 avl_insert(pp
->p_segacct
, nsap
, where
);
1031 mutex_exit(&pp
->p_lock
);
1035 * Duplicate parent's segacct records in child.
1038 shmfork(struct proc
*ppp
, struct proc
*cpp
)
1044 ASSERT(ppp
->p_segacct
!= NULL
);
1047 * We are the only lwp running in the parent so nobody can
1048 * mess with our p_segacct list. Thus it is safe to traverse
1049 * the list without holding p_lock. This is essential because
1050 * we can't hold p_lock during a KM_SLEEP allocation.
1052 for (sap
= (segacct_t
*)avl_first(ppp
->p_segacct
); sap
!= NULL
;
1053 sap
= (segacct_t
*)AVL_NEXT(ppp
->p_segacct
, sap
)) {
1054 sa_add(cpp
, sap
->sa_addr
, sap
->sa_len
, sap
->sa_flags
,
1057 mp
= ipc_lock(shm_svc
, sp
->shm_perm
.ipc_id
);
1058 if (sap
->sa_flags
& SHMSA_ISM
)
1060 ipc_hold(shm_svc
, (kipc_perm_t
*)sp
);
1066 * Detach shared memory segments from exiting process.
1069 shmexit(struct proc
*pp
)
1073 void *cookie
= NULL
;
1075 ASSERT(pp
->p_segacct
!= NULL
);
1077 mutex_enter(&pp
->p_lock
);
1079 tree
= pp
->p_segacct
;
1080 pp
->p_segacct
= NULL
;
1081 mutex_exit(&pp
->p_lock
);
1083 while ((sap
= avl_destroy_nodes(tree
, &cookie
)) != NULL
)
1084 (void) shm_detach(pp
, sap
);
1087 kmem_free(tree
, sizeof (avl_tree_t
));
1091 * At this time pages should be in memory, so just lock them.
1094 lock_again(size_t npages
, kshmid_t
*sp
, struct anon_map
*amp
)
1101 anon_sync_obj_t cookie
;
1103 mutex_enter(&sp
->shm_mlock
);
1104 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
1105 for (anon_idx
= 0; npages
!= 0; anon_idx
++, npages
--) {
1107 anon_array_enter(amp
, anon_idx
, &cookie
);
1108 ap
= anon_get_ptr(amp
->ahp
, anon_idx
);
1110 swap_xlate(ap
, &vp
, &off
);
1111 anon_array_exit(&cookie
);
1113 pp
= page_lookup(&vp
->v_object
, off
, SE_SHARED
);
1115 panic("lock_again: page not in the system");
1118 /* page should already be locked by caller */
1119 ASSERT(pp
->p_lckcnt
> 0);
1120 (void) page_pp_lock(pp
, 0, 0);
1123 ANON_LOCK_EXIT(&
->a_rwlock
);
1124 mutex_exit(&sp
->shm_mlock
);
1128 * Attach the shared memory segment to the process
1129 * address space and lock the pages.
1132 shmem_lock(kshmid_t
*sp
, struct anon_map
*amp
)
1134 size_t npages
= btopr(amp
->size
);
1136 struct segvn_crargs crargs
;
1140 * A later ISM/DISM attach may increase the size of the amp, so
1141 * cache the number of pages locked for the future shmem_unlock()
1143 sp
->shm_lkpages
= npages
;
1146 /* Initialize the create arguments and map the segment */
1147 crargs
= *(struct segvn_crargs
*)zfod_argsp
; /* structure copy */
1149 crargs
.type
= MAP_SHARED
;
1151 crargs
.prot
= PROT_ALL
;
1152 crargs
.maxprot
= crargs
.prot
;
1154 error
= as_map(as
, 0x0, amp
->size
, segvn_create
, &crargs
);
1156 if ((error
= as_ctl(as
, 0x0, amp
->size
, MC_LOCK
, 0, 0,
1158 lock_again(npages
, sp
, amp
);
1160 (void) as_unmap(as
, 0x0, amp
->size
);
1168 * Unlock shared memory
1171 shmem_unlock(kshmid_t
*sp
, struct anon_map
*amp
)
1174 pgcnt_t npages
= sp
->shm_lkpages
;
1179 size_t unlocked_bytes
= 0;
1181 anon_sync_obj_t cookie
;
1183 proj
= sp
->shm_perm
.ipc_proj
;
1184 mutex_enter(&sp
->shm_mlock
);
1185 ANON_LOCK_ENTER(&
->a_rwlock
, RW_READER
);
1186 for (anon_idx
= 0; anon_idx
< npages
; anon_idx
++) {
1188 anon_array_enter(amp
, anon_idx
, &cookie
);
1189 if ((ap
= anon_get_ptr(amp
->ahp
, anon_idx
)) == NULL
) {
1190 panic("shmem_unlock: null app");
1193 swap_xlate(ap
, &vp
, &off
);
1194 anon_array_exit(&cookie
);
1195 pp
= page_lookup(&vp
->v_object
, off
, SE_SHARED
);
1197 panic("shmem_unlock: page not in the system");
1201 * Page should at least have once lock from previous
1204 ASSERT(pp
->p_lckcnt
> 0);
1205 page_pp_unlock(pp
, 0, 0);
1206 if (pp
->p_lckcnt
== 0)
1207 unlocked_bytes
+= PAGESIZE
;
1212 if (unlocked_bytes
> 0) {
1213 rctl_decr_locked_mem(NULL
, proj
, unlocked_bytes
, 0);
1216 ANON_LOCK_EXIT(&
->a_rwlock
);
1217 mutex_exit(&sp
->shm_mlock
);
1221 * We call this routine when we have removed all references to this
1222 * amp. This means all shmdt()s and the IPC_RMID have been done.
1225 shm_rm_amp(kshmid_t
*sp
)
1227 struct anon_map
*amp
= sp
->shm_amp
;
1230 zone
= sp
->shm_perm
.ipc_zone_ref
.zref_zone
;
1231 ASSERT(zone
!= NULL
);
1233 * Free up the anon_map.
1235 lgrp_shm_policy_fini(amp
, NULL
);
1236 ANON_LOCK_ENTER(&
->a_rwlock
, RW_WRITER
);
1237 if (amp
->a_szc
!= 0) {
1238 anon_shmap_free_pages(amp
, 0, amp
->size
);
1240 anon_free(amp
->ahp
, 0, amp
->size
);
1242 ANON_LOCK_EXIT(&
->a_rwlock
);
1243 anon_unresv_zone(amp
->swresv
, zone
);
1248 * Return the shared memory id for the process's virtual address.
1249 * Return SHMID_NONE if addr is not within a SysV shared memory segment.
1250 * Return SHMID_FREE if addr's SysV shared memory segment's id has been freed.
1252 * shmgetid() is called from code in /proc with the process locked but
1253 * with pp->p_lock not held. The address space lock is held, so we
1254 * cannot grab pp->p_lock here due to lock-ordering constraints.
1255 * Because of all this, modifications to the p_segacct list must only
1256 * be made after calling prbarrier() to ensure the process is not locked.
1257 * See shmdt() and sa_add(), above. shmgetid() may also be called on a
1258 * thread's own process without the process locked.
1261 shmgetid(proc_t
*pp
, caddr_t addr
)
1263 segacct_t
*sap
, template;
1265 ASSERT(MUTEX_NOT_HELD(&pp
->p_lock
));
1266 ASSERT((pp
->p_proc_flag
& P_PR_LOCK
) || pp
== curproc
);
1268 if (pp
->p_segacct
== NULL
)
1269 return (SHMID_NONE
);
1271 template.sa_addr
= addr
;
1272 template.sa_len
= 0;
1273 if ((sap
= avl_find(pp
->p_segacct
, &template, NULL
)) == NULL
)
1274 return (SHMID_NONE
);
1276 if (IPC_FREE(&sap
->sa_id
->shm_perm
))
1277 return (SHMID_FREE
);
1279 return (sap
->sa_id
->shm_perm
.ipc_id
);