4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
25 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
26 /* All Rights Reserved */
30 * Common Inter-Process Communication routines.
35 * The System V inter-process communication (IPC) facilities provide
36 * three services, message queues, semaphore arrays, and shared memory
37 * segments, which are mananged using filesystem-like namespaces.
38 * Unlike a filesystem, these namespaces aren't mounted and accessible
39 * via a path -- a special API is used to interact with the different
40 * facilities (nothing precludes a VFS-based interface, but the
41 * standards require the special APIs). Furthermore, these special
42 * APIs don't use file descriptors, nor do they have an equivalent.
43 * This means that every operation which acts on an object needs to
44 * perform the quivalent of a lookup, which in turn means that every
45 * operation can fail if the specified object doesn't exist in the
46 * facility's namespace.
51 * Each object in a namespace has a unique ID, which is assigned by the
52 * system and is used to identify the object when performing operations
53 * on it. An object can also have a key, which is selected by the user
54 * at allocation time and is used as a primitive rendezvous mechanism.
55 * An object without a key is said to have a "private" key.
57 * To perform an operation on an object given its key, one must first
58 * perform a lookup and obtain its ID. The ID is then used to identify
59 * the object when performing the operation. If the object has a
60 * private key, the ID must be known or obtained by other means.
62 * Each object in the namespace has a creator uid and gid, as well as
63 * an owner uid and gid. Both are initialized with the ruid and rgid
64 * of the process which created the object. The creator or current
65 * owner has the ability to change the owner of the object.
67 * Each object in the namespace has a set of file-like permissions,
68 * which, in conjunction with the creator and owner uid and gid,
69 * control read and write access to the object (execute is ignored).
71 * Each object also has a creator project and zone, which are used to
72 * account for its resource usage.
77 * There are five operations which all three facilities have in
78 * common: GET, SET, STAT, RMID, and IDS.
80 * GET, like open, is used to allocate a new object or obtain an
81 * existing one (using its key). It takes a key, a set of flags and
82 * mode bits, and optionally facility-specific arguments. If the key
83 * is IPC_PRIVATE, a new object with the requested mode bits and
84 * facility-specific attributes is created. If the key isn't
85 * IPC_PRIVATE, the GET will attempt to look up the specified key and
86 * either return that or create a new key depending on the state of the
87 * IPC_CREAT and IPC_EXCL flags, much like open. If GET needs to
88 * allocate an object, it can fail if there is insufficient space in
89 * the namespace (the maximum number of ids for the facility has been
90 * exceeded) or if the facility-specific initialization fails. If GET
91 * finds an object it can return, it can still fail if that object's
92 * permissions or facility-specific attributes are less than those
95 * SET is used to adjust facility-specific parameters of an object, in
96 * addition to the owner uid and gid, and mode bits. It can fail if
97 * the caller isn't the creator or owner.
99 * STAT is used to obtain information about an object including the
100 * general attributes object described as well as facility-specific
101 * information. It can fail if the caller doesn't have read
104 * RMID removes an object from the namespace. Subsequent operations
105 * using the object's ID or key will fail (until another object is
106 * created with the same key or ID). Since an RMID may be performed
107 * asynchronously with other operations, it is possible that other
108 * threads and/or processes will have references to the object. While
109 * a facility may have actions which need to be performed at RMID time,
110 * only when all references are dropped can the object be destroyed.
111 * RMID will fail if the caller isn't the creator or owner.
113 * IDS obtains a list of all IDs in a facility's namespace. There are
114 * no facility-specific behaviors of IDS.
119 * Because some IPC facilities provide services whose operations must
120 * scale, a mechanism which allows fast, concurrent access to
121 * individual objects is needed. Of primary importance is object
122 * lookup based on ID (SET, STAT, others). Allocation (GET),
123 * deallocation (RMID), ID enumeration (IDS), and key lookups (GET) are
124 * lesser concerns, but should be implemented in such a way that ID
125 * lookup isn't affected (at least not in the common case).
127 * Starting from the bottom up, each object is represented by a
128 * structure, the first member of which must be a kipc_perm_t. The
129 * kipc_perm_t contains the information described above in "Objects", a
130 * reference count (since the object may continue to exist after it has
131 * been removed from the namespace), as well as some additional
132 * metadata used to manage data structure membership. These objects
133 * are dynamically allocated.
135 * Above the objects is a power-of-two sized table of ID slots. Each
136 * slot contains a pointer to an object, a sequence number, and a
137 * lock. An object's ID is a function of its slot's index in the table
138 * and its slot's sequence number. Every time a slot is released (via
139 * RMID) its sequence number is increased. Strictly speaking, the
140 * sequence number is unnecessary. However, checking the sequence
141 * number after a lookup provides a certain degree of robustness
142 * against the use of stale IDs (useful since nothing else does). When
143 * the table fills up, it is resized (see Locking, below).
145 * Of an ID's 31 bits (an ID is, as defined by the standards, a signed
146 * int) the top IPC_SEQ_BITS are used for the sequence number with the
147 * remainder holding the index into the table. The size of the table
148 * is therefore bounded at 2 ^ (31 - IPC_SEQ_BITS) slots.
150 * Managing this table is the ipc_service structure. It contains a
151 * pointer to the dynamically allocated ID table, a namespace-global
152 * lock, an id_space for managing the free space in the table, and
153 * sundry other metadata necessary for the maintenance of the
154 * namespace. An AVL tree of all keyed objects in the table (sorted by
155 * key) is used for key lookups. An unordered doubly linked list of
156 * all objects in the namespace (keyed or not) is maintained to
157 * facilitate ID enumeration.
159 * To help visualize these relationships, here's a picture of a
160 * namespace with a table of size 8 containing three objects
161 * (IPC_SEQ_BITS = 28):
166 * | keys *---+----------------------\
169 * +----------------+ || |
171 * /-------------------/| |
172 * | /---------------/ |
175 * | +-0------+-1------+-2------+-3------+-4--+---+-5------+-6------+-7------+
176 * | | Seq=3 | | | Seq=1 | : | | | Seq=6 |
177 * | | | | | | : | | | |
178 * | +-*------+--------+--------+-*------+----+---+--------+--------+-*------+
180 * | | /---/ | /----------------/
183 * | +-kipc_perm_t-+ +-kipc_perm_t-+ | +-kipc_perm_t-+
184 * | | id=0x30 | | id=0x13 | | | id=0x67 |
185 * | | key=0xfeed | | key=0xbeef | | | key=0xcafe |
186 * \->| [list] |<------>| [list] |<------>| [list] |
187 * /->| [avl left] x /--->| [avl left] x \--->| [avl left] *---\
188 * | | [avl right] x | | [avl right] x | [avl right] *---+-\
189 * | | | | | | | | | |
190 * | +-------------+ | +-------------+ +-------------+ | |
191 * | \---------------------------------------------/ |
192 * \--------------------------------------------------------------------/
197 * There are three locks (or sets of locks) which are used to ensure
198 * correctness: the slot locks, the namespace lock, and p_lock (needed
199 * when checking resource controls). Their ordering is
201 * namespace lock -> slot lock 0 -> ... -> slot lock t -> p_lock
203 * Generally speaking, the namespace lock is used to protect allocation
204 * and removal from the namespace, ID enumeration, and resizing the ID
205 * table. Specifically:
207 * - write access to all fields of the ipc_service structure
208 * - read access to all variable fields of ipc_service except
209 * ipcs_tabsz (table size) and ipcs_table (the table pointer)
210 * - read/write access to ipc_avl, ipc_list in visible objects'
211 * kipc_perm structures (i.e. objects which have been removed from
212 * the namespace don't have this restriction)
213 * - write access to ipct_seq and ipct_data in the table entries
215 * A slot lock by itself is meaningless (except when resizing). Of
216 * greater interest conceptually is the notion of an ID lock -- a
217 * "virtual lock" which refers to whichever slot lock an object's ID
218 * currently hashes to.
220 * An ID lock protects all objects with that ID. Normally there will
221 * only be one such object: the one pointed to by the locked slot.
222 * However, if an object is removed from the namespace but retains
223 * references (e.g. an attached shared memory segment which has been
224 * RMIDed), it continues to use the lock associated with its original
225 * ID. While this can result in increased contention, operations which
226 * require taking the ID lock of removed objects are infrequent.
228 * Specifically, an ID lock protects the contents of an object's
229 * structure, including the contents of the embedded kipc_perm
230 * structure (but excluding those fields protected by the namespace
231 * lock). It also protects the ipct_seq and ipct_data fields in its
232 * slot (it is really a slot lock, after all).
234 * Recall that the table is resizable. To avoid requiring every ID
235 * lookup to take a global lock, a scheme much like that employed for
236 * file descriptors (see the comment above UF_ENTER in user.h) is
237 * used. Note that the sequence number and data pointer are protected
238 * by both the namespace lock and their slot lock. When the table is
239 * resized, the following operations take place:
241 * 1) A new table is allocated.
242 * 2) The global lock is taken.
243 * 3) All old slots are locked, in order.
244 * 4) The first half of the new slots are locked.
245 * 5) All table entries are copied to the new table, and cleared from
247 * 6) The ipc_service structure is updated to point to the new table.
248 * 7) The ipc_service structure is updated with the new table size.
249 * 8) All slot locks (old and new) are dropped.
251 * Because the slot locks are embedded in the table, ID lookups and
252 * other operations which require taking an slot lock need to verify
253 * that the lock taken wasn't part of a stale table. This is
254 * accomplished by checking the table size before and after
255 * dereferencing the table pointer and taking the lock: if the size
256 * changes, the lock must be dropped and reacquired. It is this
257 * additional work which distinguishes an ID lock from a slot lock.
259 * Because we can't guarantee that threads aren't accessing the old
260 * tables' locks, they are never deallocated. To prevent spurious
261 * reports of memory leaks, a pointer to the discarded table is stored
262 * in the new one in step 5. (Theoretically ipcs_destroy will delete
263 * the discarded tables, but it is only ever called from a failed _init
264 * invocation; i.e. when there aren't any.)
269 * The following interfaces are provided by the ipc module for use by
270 * the individual IPC facilities:
274 * Given an object and a cred structure, determines if the requested
275 * access type is allowed.
277 * ipcperm_set, ipcperm_stat,
278 * ipcperm_set64, ipcperm_stat64
280 * Performs the common portion of an STAT or SET operation. All
281 * (except stat and stat64) can fail, so they should be called before
282 * any facility-specific non-reversible changes are made to an
283 * object. Similarly, the set operations have side effects, so they
284 * should only be called once the possibility of a facility-specific
285 * failure is eliminated.
289 * Creates an IPC namespace for use by an IPC facility.
293 * Destroys an IPC namespace.
295 * ipcs_lock, ipcs_unlock
297 * Takes the namespace lock. Ideally such access wouldn't be
298 * necessary, but there may be facility-specific data protected by
299 * this lock (e.g. project-wide resource consumption).
303 * Takes the lock associated with an ID. Can't fail.
307 * Like ipc_lock, but takes a pointer to a held lock. Drops the lock
308 * unless it is the one that would have been returned by ipc_lock.
309 * Used after calls to cv_wait.
313 * Performs an ID lookup, returns with the ID lock held. Fails if
314 * the ID doesn't exist in the namespace.
318 * Takes a reference on an object.
322 * Releases a reference on an object, and drops the object's lock.
323 * Calls the object's destructor if last reference is being
328 * Releases a reference on an object. Doesn't drop lock, and may
329 * only be called when there is more than one reference to the
332 * ipc_get, ipc_commit_begin, ipc_commit_end, ipc_cleanup
334 * Components of a GET operation. ipc_get performs a key lookup,
335 * allocating an object if the key isn't found (returning with the
336 * namespace lock and p_lock held), and returning the existing object
337 * if it is (with the object lock held). ipc_get doesn't modify the
340 * ipc_commit_begin begins the process of inserting an object
341 * allocated by ipc_get into the namespace, and can fail. If
342 * successful, it returns with the namespace lock and p_lock held.
343 * ipc_commit_end completes the process of inserting an object into
344 * the namespace and can't fail. The facility can call ipc_cleanup
345 * at any time following a successful ipc_get and before
346 * ipc_commit_end or a failed ipc_commit_begin to fail the
347 * allocation. Pseudocode for the suggested GET implementation:
358 * if object meets criteria
359 * unlock object and return success
361 * unlock object and return failure
365 * perform resource control tests
366 * drop namespace lock, p_lock
370 * perform facility-specific initialization
372 * facility-specific cleanup
376 * ( At this point the object should be destructible using the
377 * destructor given to ipcs_create )
385 * perform facility-specific resource control tests/allocations
390 * perform any infallible post-creation actions, unlock, and return
396 * Performs the common portion of an RMID operation -- looks up an ID
397 * removes it, and calls the a facility-specific function to do
398 * RMID-time cleanup on the private portions of the object.
402 * Performs the common portion of an IDS operation.
406 #include <sys/types.h>
407 #include <sys/param.h>
408 #include <sys/cred.h>
409 #include <sys/policy.h>
410 #include <sys/proc.h>
411 #include <sys/user.h>
413 #include <sys/ipc_impl.h>
414 #include <sys/errno.h>
415 #include <sys/systm.h>
416 #include <sys/list.h>
417 #include <sys/atomic.h>
418 #include <sys/zone.h>
419 #include <sys/task.h>
420 #include <sys/modctl.h>
422 #include <c2/audit.h>
424 static struct modlmisc modlmisc
= {
429 static struct modlinkage modlinkage
= {
430 MODREV_1
, (void *)&modlmisc
, NULL
437 return (mod_install(&modlinkage
));
443 return (mod_remove(&modlinkage
));
447 _info(struct modinfo
*modinfop
)
449 return (mod_info(&modlinkage
, modinfop
));
454 * Check message, semaphore, or shared memory access permissions.
456 * This routine verifies the requested access permission for the current
457 * process. The zone ids are compared, and the appropriate bits are
458 * checked corresponding to owner, group (including the list of
459 * supplementary groups), or everyone. Zero is returned on success.
460 * On failure, the security policy is asked to check to override the
461 * permissions check; the policy will either return 0 for access granted
464 * Access to objects in other zones requires that the caller be in the
465 * global zone and have the appropriate IPC_DAC_* privilege, regardless
466 * of whether the uid or gid match those of the object. Note that
467 * cross-zone accesses will normally never get here since they'll
468 * fail in ipc_lookup or ipc_get.
470 * The arguments must be set up as follows:
471 * p - Pointer to permission structure to verify
472 * mode - Desired access permissions
475 ipcperm_access(kipc_perm_t
*p
, int mode
, cred_t
*cr
)
478 uid_t uid
= crgetuid(cr
);
479 zoneid_t zoneid
= getzoneid();
481 if (p
->ipc_zoneid
== zoneid
) {
482 if (uid
!= p
->ipc_uid
&& uid
!= p
->ipc_cuid
) {
484 if (!groupmember(p
->ipc_gid
, cr
) &&
485 !groupmember(p
->ipc_cgid
, cr
))
489 mode
&= ~(p
->ipc_mode
<< shifts
);
493 } else if (zoneid
!= GLOBAL_ZONEID
)
496 return (secpolicy_ipc_access(cr
, p
, mode
));
500 * There are two versions of the ipcperm_set/stat functions:
501 * ipcperm_??? - for use with IPC_SET/STAT
502 * ipcperm_???_64 - for use with IPC_SET64/STAT64
504 * These functions encapsulate the common portions (copying, permission
505 * checks, and auditing) of the set/stat operations. All, except for
506 * stat and stat_64 which are void, return 0 on success or a non-zero
507 * errno value on error.
511 ipcperm_set(ipc_service_t
*service
, struct cred
*cr
,
512 kipc_perm_t
*kperm
, struct ipc_perm
*perm
, model_t model
)
514 STRUCT_HANDLE(ipc_perm
, lperm
);
520 ASSERT(IPC_LOCKED(service
, kperm
));
522 STRUCT_SET_HANDLE(lperm
, model
, perm
);
523 uid
= STRUCT_FGET(lperm
, uid
);
524 gid
= STRUCT_FGET(lperm
, gid
);
525 mode
= STRUCT_FGET(lperm
, mode
);
527 if (secpolicy_ipc_owner(cr
, kperm
) != 0)
530 zone
= crgetzone(cr
);
531 if (!VALID_UID(uid
, zone
) || !VALID_GID(gid
, zone
))
534 kperm
->ipc_uid
= uid
;
535 kperm
->ipc_gid
= gid
;
536 kperm
->ipc_mode
= (mode
& 0777) | (kperm
->ipc_mode
& ~0777);
539 audit_ipcget(service
->ipcs_atype
, kperm
);
545 ipcperm_stat(struct ipc_perm
*perm
, kipc_perm_t
*kperm
, model_t model
)
547 STRUCT_HANDLE(ipc_perm
, lperm
);
549 STRUCT_SET_HANDLE(lperm
, model
, perm
);
550 STRUCT_FSET(lperm
, uid
, kperm
->ipc_uid
);
551 STRUCT_FSET(lperm
, gid
, kperm
->ipc_gid
);
552 STRUCT_FSET(lperm
, cuid
, kperm
->ipc_cuid
);
553 STRUCT_FSET(lperm
, cgid
, kperm
->ipc_cgid
);
554 STRUCT_FSET(lperm
, mode
, kperm
->ipc_mode
);
555 STRUCT_FSET(lperm
, seq
, 0);
556 STRUCT_FSET(lperm
, key
, kperm
->ipc_key
);
560 ipcperm_set64(ipc_service_t
*service
, struct cred
*cr
,
561 kipc_perm_t
*kperm
, ipc_perm64_t
*perm64
)
565 ASSERT(IPC_LOCKED(service
, kperm
));
567 if (secpolicy_ipc_owner(cr
, kperm
) != 0)
570 zone
= crgetzone(cr
);
571 if (!VALID_UID(perm64
->ipcx_uid
, zone
) ||
572 !VALID_GID(perm64
->ipcx_gid
, zone
))
575 kperm
->ipc_uid
= perm64
->ipcx_uid
;
576 kperm
->ipc_gid
= perm64
->ipcx_gid
;
577 kperm
->ipc_mode
= (perm64
->ipcx_mode
& 0777) |
578 (kperm
->ipc_mode
& ~0777);
581 audit_ipcget(service
->ipcs_atype
, kperm
);
587 ipcperm_stat64(ipc_perm64_t
*perm64
, kipc_perm_t
*kperm
)
589 perm64
->ipcx_uid
= kperm
->ipc_uid
;
590 perm64
->ipcx_gid
= kperm
->ipc_gid
;
591 perm64
->ipcx_cuid
= kperm
->ipc_cuid
;
592 perm64
->ipcx_cgid
= kperm
->ipc_cgid
;
593 perm64
->ipcx_mode
= kperm
->ipc_mode
;
594 perm64
->ipcx_key
= kperm
->ipc_key
;
595 perm64
->ipcx_projid
= kperm
->ipc_proj
->kpj_id
;
596 perm64
->ipcx_zoneid
= kperm
->ipc_zoneid
;
601 * ipc key comparator.
604 ipc_key_compar(const void *a
, const void *b
)
606 kipc_perm_t
*aperm
= (kipc_perm_t
*)a
;
607 kipc_perm_t
*bperm
= (kipc_perm_t
*)b
;
608 int ak
= aperm
->ipc_key
;
609 int bk
= bperm
->ipc_key
;
613 ASSERT(ak
!= IPC_PRIVATE
);
614 ASSERT(bk
!= IPC_PRIVATE
);
617 * Compare key first, then zoneid. This optimizes performance for
618 * systems with only one zone, since the zone checks will only be
619 * made when the keys match.
627 az
= aperm
->ipc_zoneid
;
628 bz
= bperm
->ipc_zoneid
;
637 * Create an ipc service.
640 ipcs_create(const char *name
, rctl_hndl_t proj_rctl
, rctl_hndl_t zone_rctl
,
641 size_t size
, ipc_func_t
*dtor
, ipc_func_t
*rmid
, int audit_type
,
644 ipc_service_t
*result
;
646 result
= kmem_alloc(sizeof (ipc_service_t
), KM_SLEEP
);
648 mutex_init(&result
->ipcs_lock
, NULL
, MUTEX_ADAPTIVE
, NULL
);
649 result
->ipcs_count
= 0;
650 avl_create(&result
->ipcs_keys
, ipc_key_compar
, size
, 0);
651 result
->ipcs_tabsz
= IPC_IDS_MIN
;
653 kmem_zalloc(IPC_IDS_MIN
* sizeof (ipc_slot_t
), KM_SLEEP
);
654 result
->ipcs_ssize
= size
;
655 result
->ipcs_ids
= id_space_create(name
, 0, IPC_IDS_MIN
);
656 result
->ipcs_dtor
= dtor
;
657 result
->ipcs_rmid
= rmid
;
658 result
->ipcs_proj_rctl
= proj_rctl
;
659 result
->ipcs_zone_rctl
= zone_rctl
;
660 result
->ipcs_atype
= audit_type
;
661 ASSERT(rctl_offset
< sizeof (ipc_rqty_t
));
662 result
->ipcs_rctlofs
= rctl_offset
;
663 list_create(&result
->ipcs_usedids
, sizeof (kipc_perm_t
),
664 offsetof(kipc_perm_t
, ipc_list
));
670 * Destroy an ipc service.
673 ipcs_destroy(ipc_service_t
*service
)
675 ipc_slot_t
*slot
, *next
;
677 mutex_enter(&service
->ipcs_lock
);
679 ASSERT(service
->ipcs_count
== 0);
680 avl_destroy(&service
->ipcs_keys
);
681 list_destroy(&service
->ipcs_usedids
);
682 id_space_destroy(service
->ipcs_ids
);
684 for (slot
= service
->ipcs_table
; slot
; slot
= next
) {
685 next
= slot
[0].ipct_chain
;
686 kmem_free(slot
, service
->ipcs_tabsz
* sizeof (ipc_slot_t
));
687 service
->ipcs_tabsz
>>= 1;
690 mutex_destroy(&service
->ipcs_lock
);
691 kmem_free(service
, sizeof (ipc_service_t
));
695 * Takes the service lock.
698 ipcs_lock(ipc_service_t
*service
)
700 mutex_enter(&service
->ipcs_lock
);
704 * Releases the service lock.
707 ipcs_unlock(ipc_service_t
*service
)
709 mutex_exit(&service
->ipcs_lock
);
714 * Locks the specified ID. Returns the ID's ID table index.
717 ipc_lock_internal(ipc_service_t
*service
, uint_t id
)
724 tabsz
= service
->ipcs_tabsz
;
726 index
= id
& (tabsz
- 1);
727 mutex
= &service
->ipcs_table
[index
].ipct_lock
;
729 if (tabsz
== service
->ipcs_tabsz
)
738 * Locks the specified ID. Returns a pointer to the ID's lock.
741 ipc_lock(ipc_service_t
*service
, int id
)
746 * These assertions don't reflect requirements of the code
747 * which follows, but they should never fail nonetheless.
750 ASSERT(IPC_INDEX(id
) < service
->ipcs_tabsz
);
751 index
= ipc_lock_internal(service
, id
);
753 return (&service
->ipcs_table
[index
].ipct_lock
);
757 * Checks to see if the held lock provided is the current lock for the
758 * specified id. If so, we return it instead of dropping it and
759 * returning the result of ipc_lock. This is intended to speed up cv
760 * wakeups where we are left holding a lock which could be stale, but
764 ipc_relock(ipc_service_t
*service
, int id
, kmutex_t
*lock
)
767 ASSERT(IPC_INDEX(id
) < service
->ipcs_tabsz
);
768 ASSERT(MUTEX_HELD(lock
));
770 if (&service
->ipcs_table
[IPC_INDEX(id
)].ipct_lock
== lock
)
774 return (ipc_lock(service
, id
));
778 * Performs an ID lookup. If the ID doesn't exist or has been removed,
779 * or isn't visible to the caller (because of zones), NULL is returned.
780 * Otherwise, a pointer to the ID's perm structure and held ID lock are
784 ipc_lookup(ipc_service_t
*service
, int id
, kipc_perm_t
**perm
)
790 * There is no need to check to see if id is in-range (i.e.
791 * positive and fits into the table). If it is out-of-range,
792 * the id simply won't match the object's.
795 index
= ipc_lock_internal(service
, id
);
796 result
= service
->ipcs_table
[index
].ipct_data
;
797 if (result
== NULL
|| result
->ipc_id
!= (uint_t
)id
||
798 !HASZONEACCESS(curproc
, result
->ipc_zoneid
)) {
799 mutex_exit(&service
->ipcs_table
[index
].ipct_lock
);
803 ASSERT(IPC_SEQ(id
) == service
->ipcs_table
[index
].ipct_seq
);
807 audit_ipc(service
->ipcs_atype
, id
, result
);
809 return (&service
->ipcs_table
[index
].ipct_lock
);
813 * Increase the reference count on an ID.
817 ipc_hold(ipc_service_t
*s
, kipc_perm_t
*perm
)
819 ASSERT(IPC_INDEX(perm
->ipc_id
) < s
->ipcs_tabsz
);
820 ASSERT(IPC_LOCKED(s
, perm
));
825 * Decrease the reference count on an ID and drops the ID's lock.
826 * Destroys the ID if the new reference count is zero.
829 ipc_rele(ipc_service_t
*s
, kipc_perm_t
*perm
)
833 ASSERT(IPC_INDEX(perm
->ipc_id
) < s
->ipcs_tabsz
);
834 ASSERT(IPC_LOCKED(s
, perm
));
835 ASSERT(perm
->ipc_ref
> 0);
837 nref
= --perm
->ipc_ref
;
838 mutex_exit(&s
->ipcs_table
[IPC_INDEX(perm
->ipc_id
)].ipct_lock
);
841 ASSERT(IPC_FREE(perm
)); /* ipc_rmid clears IPC_ALLOC */
843 project_rele(perm
->ipc_proj
);
844 zone_rele_ref(&perm
->ipc_zone_ref
, ZONE_REF_IPC
);
845 kmem_free(perm
, s
->ipcs_ssize
);
850 * Decrease the reference count on an ID, but don't drop the ID lock.
851 * Used in cases where one thread needs to remove many references (on
852 * behalf of other parties).
855 ipc_rele_locked(ipc_service_t
*s
, kipc_perm_t
*perm
)
857 ASSERT(perm
->ipc_ref
> 1);
858 ASSERT(IPC_INDEX(perm
->ipc_id
) < s
->ipcs_tabsz
);
859 ASSERT(IPC_LOCKED(s
, perm
));
866 * Internal function to grow the service ID table.
869 ipc_grow(ipc_service_t
*service
)
871 ipc_slot_t
*new, *old
;
872 int i
, oldsize
, newsize
;
874 ASSERT(MUTEX_HELD(&service
->ipcs_lock
));
875 ASSERT(MUTEX_NOT_HELD(&curproc
->p_lock
));
877 if (service
->ipcs_tabsz
== IPC_IDS_MAX
)
880 oldsize
= service
->ipcs_tabsz
;
881 newsize
= oldsize
<< 1;
882 new = kmem_zalloc(newsize
* sizeof (ipc_slot_t
), KM_NOSLEEP
);
886 old
= service
->ipcs_table
;
887 for (i
= 0; i
< oldsize
; i
++) {
888 mutex_enter(&old
[i
].ipct_lock
);
889 mutex_enter(&new[i
].ipct_lock
);
891 new[i
].ipct_seq
= old
[i
].ipct_seq
;
892 new[i
].ipct_data
= old
[i
].ipct_data
;
893 old
[i
].ipct_data
= NULL
;
896 new[0].ipct_chain
= old
;
897 service
->ipcs_table
= new;
899 service
->ipcs_tabsz
= newsize
;
901 for (i
= 0; i
< oldsize
; i
++) {
902 mutex_exit(&old
[i
].ipct_lock
);
903 mutex_exit(&new[i
].ipct_lock
);
906 id_space_extend(service
->ipcs_ids
, oldsize
, service
->ipcs_tabsz
);
913 ipc_keylookup(ipc_service_t
*service
, key_t key
, int flag
, kipc_perm_t
**permp
)
915 kipc_perm_t
*perm
= NULL
;
917 kipc_perm_t
template;
919 ASSERT(MUTEX_HELD(&service
->ipcs_lock
));
921 template.ipc_key
= key
;
922 template.ipc_zoneid
= getzoneid();
923 if (perm
= avl_find(&service
->ipcs_keys
, &template, &where
)) {
924 ASSERT(!IPC_FREE(perm
));
925 if ((flag
& (IPC_CREAT
| IPC_EXCL
)) == (IPC_CREAT
| IPC_EXCL
))
927 if ((flag
& 0777) & ~perm
->ipc_mode
) {
929 audit_ipcget(0, (void *)perm
);
934 } else if (flag
& IPC_CREAT
) {
942 ipc_alloc_test(ipc_service_t
*service
, proc_t
*pp
)
944 ASSERT(MUTEX_HELD(&service
->ipcs_lock
));
947 * Resizing the table first would result in a cleaner code
948 * path, but would also allow a user to (permanently) double
949 * the id table size in cases where the allocation would be
950 * denied. Hence we test the rctl first.
953 mutex_enter(&pp
->p_lock
);
954 if ((rctl_test(service
->ipcs_proj_rctl
, pp
->p_task
->tk_proj
->kpj_rctls
,
955 pp
, 1, RCA_SAFE
) & RCT_DENY
) ||
956 (rctl_test(service
->ipcs_zone_rctl
, pp
->p_zone
->zone_rctls
,
957 pp
, 1, RCA_SAFE
) & RCT_DENY
)) {
958 mutex_exit(&pp
->p_lock
);
962 if (service
->ipcs_count
== service
->ipcs_tabsz
) {
965 mutex_exit(&pp
->p_lock
);
966 if (error
= ipc_grow(service
))
975 * Given a key, search for or create the associated identifier.
977 * If IPC_CREAT is specified and the key isn't found, or if the key is
978 * equal to IPC_PRIVATE, we return 0 and place a pointer to a newly
979 * allocated object structure in permp. A pointer to the held service
980 * lock is placed in lockp. ipc_mode's IPC_ALLOC bit is clear.
982 * If the key is found and no error conditions arise, we return 0 and
983 * place a pointer to the existing object structure in permp. A
984 * pointer to the held ID lock is placed in lockp. ipc_mode's
985 * IPC_ALLOC bit is set.
987 * Otherwise, a non-zero errno value is returned.
990 ipc_get(ipc_service_t
*service
, key_t key
, int flag
, kipc_perm_t
**permp
,
993 kipc_perm_t
*perm
= NULL
;
994 proc_t
*pp
= curproc
;
998 if (key
!= IPC_PRIVATE
) {
1000 mutex_enter(&service
->ipcs_lock
);
1001 error
= ipc_keylookup(service
, key
, flag
, &perm
);
1003 index
= ipc_lock_internal(service
, perm
->ipc_id
);
1004 mutex_exit(&service
->ipcs_lock
);
1007 ASSERT(perm
== NULL
);
1012 ASSERT(!IPC_FREE(perm
));
1014 *lockp
= &service
->ipcs_table
[index
].ipct_lock
;
1018 /* Key not found; fall through */
1021 perm
= kmem_zalloc(service
->ipcs_ssize
, KM_SLEEP
);
1023 mutex_enter(&service
->ipcs_lock
);
1024 if (error
= ipc_alloc_test(service
, pp
)) {
1025 mutex_exit(&service
->ipcs_lock
);
1026 kmem_free(perm
, service
->ipcs_ssize
);
1030 perm
->ipc_cuid
= perm
->ipc_uid
= crgetuid(cr
);
1031 perm
->ipc_cgid
= perm
->ipc_gid
= crgetgid(cr
);
1032 perm
->ipc_zoneid
= getzoneid();
1033 perm
->ipc_mode
= flag
& 0777;
1034 perm
->ipc_key
= key
;
1036 perm
->ipc_id
= IPC_ID_INVAL
;
1038 *lockp
= &service
->ipcs_lock
;
1044 * Attempts to add the a newly created ID to the global namespace. If
1045 * creating it would cause an error, we return the error. If there is
1046 * the possibility that we could obtain the existing ID and return it
1047 * to the user, we return EAGAIN. Otherwise, we return 0 with p_lock
1048 * and the service lock held.
1050 * Since this should be only called after all initialization has been
1051 * completed, on failure we automatically invoke the destructor for the
1052 * object and deallocate the memory associated with it.
1055 ipc_commit_begin(ipc_service_t
*service
, key_t key
, int flag
,
1056 kipc_perm_t
*newperm
)
1060 proc_t
*pp
= curproc
;
1062 ASSERT(newperm
->ipc_ref
== 1);
1063 ASSERT(IPC_FREE(newperm
));
1066 * Set ipc_proj and ipc_zone_ref so that future calls to ipc_cleanup()
1067 * clean up the necessary state. This must be done before the
1068 * potential call to ipcs_dtor() below.
1070 newperm
->ipc_proj
= pp
->p_task
->tk_proj
;
1071 zone_init_ref(&newperm
->ipc_zone_ref
);
1072 zone_hold_ref(pp
->p_zone
, &newperm
->ipc_zone_ref
, ZONE_REF_IPC
);
1074 mutex_enter(&service
->ipcs_lock
);
1076 * Ensure that no-one has raced with us and created the key.
1078 if ((key
!= IPC_PRIVATE
) &&
1079 (((error
= ipc_keylookup(service
, key
, flag
, &perm
)) != 0) ||
1081 error
= error
? error
: EAGAIN
;
1086 * Ensure that no-one has raced with us and used the last of
1087 * the permissible ids, or the last of the free spaces in the
1090 if (error
= ipc_alloc_test(service
, pp
))
1093 ASSERT(MUTEX_HELD(&service
->ipcs_lock
));
1094 ASSERT(MUTEX_HELD(&pp
->p_lock
));
1098 mutex_exit(&service
->ipcs_lock
);
1099 service
->ipcs_dtor(newperm
);
1100 zone_rele_ref(&newperm
->ipc_zone_ref
, ZONE_REF_IPC
);
1101 kmem_free(newperm
, service
->ipcs_ssize
);
1106 * Commit the ID allocation transaction. Called with p_lock and the
1107 * service lock held, both of which are dropped. Returns the held ID
1108 * lock so the caller can extract the ID and perform ipcget auditing.
1111 ipc_commit_end(ipc_service_t
*service
, kipc_perm_t
*perm
)
1118 ASSERT(MUTEX_HELD(&service
->ipcs_lock
));
1119 ASSERT(MUTEX_HELD(&curproc
->p_lock
));
1121 (void) project_hold(perm
->ipc_proj
);
1122 mutex_exit(&curproc
->p_lock
);
1125 * Pick out our slot.
1127 service
->ipcs_count
++;
1128 index
= id_alloc(service
->ipcs_ids
);
1129 ASSERT(index
< service
->ipcs_tabsz
);
1130 slot
= &service
->ipcs_table
[index
];
1131 mutex_enter(&slot
->ipct_lock
);
1132 ASSERT(slot
->ipct_data
== NULL
);
1135 * Update the perm structure.
1137 perm
->ipc_mode
|= IPC_ALLOC
;
1138 perm
->ipc_id
= (slot
->ipct_seq
<< IPC_SEQ_SHIFT
) | index
;
1141 * Push into global visibility.
1143 slot
->ipct_data
= perm
;
1144 if (perm
->ipc_key
!= IPC_PRIVATE
) {
1145 loc
= avl_find(&service
->ipcs_keys
, perm
, &where
);
1146 ASSERT(loc
== NULL
);
1147 avl_insert(&service
->ipcs_keys
, perm
, where
);
1149 list_insert_head(&service
->ipcs_usedids
, perm
);
1152 * Update resource consumption.
1154 IPC_PROJ_USAGE(perm
, service
) += 1;
1155 IPC_ZONE_USAGE(perm
, service
) += 1;
1157 mutex_exit(&service
->ipcs_lock
);
1158 return (&slot
->ipct_lock
);
1162 * Clean up function, in case the allocation fails. If called between
1163 * ipc_lookup and ipc_commit_begin, perm->ipc_proj will be 0 and we
1164 * merely free the perm structure. If called after ipc_commit_begin,
1165 * we also drop locks and call the ID's destructor.
1168 ipc_cleanup(ipc_service_t
*service
, kipc_perm_t
*perm
)
1170 ASSERT(IPC_FREE(perm
));
1171 if (perm
->ipc_proj
) {
1172 mutex_exit(&curproc
->p_lock
);
1173 mutex_exit(&service
->ipcs_lock
);
1174 service
->ipcs_dtor(perm
);
1176 if (perm
->ipc_zone_ref
.zref_zone
!= NULL
)
1177 zone_rele_ref(&perm
->ipc_zone_ref
, ZONE_REF_IPC
);
1178 kmem_free(perm
, service
->ipcs_ssize
);
1183 * Common code to remove an IPC object. This should be called after
1184 * all permissions checks have been performed, and with the service
1185 * and ID locked. Note that this does not remove the object from
1186 * the ipcs_usedids list (this needs to be done by the caller before
1187 * dropping the service lock).
1190 ipc_remove(ipc_service_t
*service
, kipc_perm_t
*perm
)
1192 int id
= perm
->ipc_id
;
1195 ASSERT(MUTEX_HELD(&service
->ipcs_lock
));
1196 ASSERT(IPC_LOCKED(service
, perm
));
1198 index
= IPC_INDEX(id
);
1200 service
->ipcs_table
[index
].ipct_data
= NULL
;
1202 if (perm
->ipc_key
!= IPC_PRIVATE
)
1203 avl_remove(&service
->ipcs_keys
, perm
);
1204 list_remove(&service
->ipcs_usedids
, perm
);
1205 perm
->ipc_mode
&= ~IPC_ALLOC
;
1207 id_free(service
->ipcs_ids
, index
);
1209 if (service
->ipcs_table
[index
].ipct_seq
++ == IPC_SEQ_MASK
)
1210 service
->ipcs_table
[index
].ipct_seq
= 0;
1211 service
->ipcs_count
--;
1212 ASSERT(IPC_PROJ_USAGE(perm
, service
) > 0);
1213 ASSERT(IPC_ZONE_USAGE(perm
, service
) > 0);
1214 IPC_PROJ_USAGE(perm
, service
) -= 1;
1215 IPC_ZONE_USAGE(perm
, service
) -= 1;
1216 ASSERT(service
->ipcs_count
|| ((IPC_PROJ_USAGE(perm
, service
) == 0) &&
1217 (IPC_ZONE_USAGE(perm
, service
) == 0)));
1222 * Common code to perform an IPC_RMID. Returns an errno value on
1223 * failure, 0 on success.
1226 ipc_rmid(ipc_service_t
*service
, int id
, cred_t
*cr
)
1231 mutex_enter(&service
->ipcs_lock
);
1233 lock
= ipc_lookup(service
, id
, &perm
);
1235 mutex_exit(&service
->ipcs_lock
);
1239 ASSERT(service
->ipcs_count
> 0);
1241 if (secpolicy_ipc_owner(cr
, perm
) != 0) {
1243 mutex_exit(&service
->ipcs_lock
);
1248 * Nothing can fail from this point on.
1250 ipc_remove(service
, perm
);
1251 mutex_exit(&service
->ipcs_lock
);
1253 /* perform any per-service removal actions */
1254 service
->ipcs_rmid(perm
);
1256 ipc_rele(service
, perm
);
1262 * Implementation for shmids, semids, and msgids. buf is the address
1263 * of the user buffer, nids is the size, and pnids is a pointer to
1264 * where we write the actual number of ids that [would] have been
1268 ipc_ids(ipc_service_t
*service
, int *buf
, uint_t nids
, uint_t
*pnids
)
1276 zoneid_t zoneid
= getzoneid();
1277 int global
= INGLOBALZONE(curproc
);
1283 * Get an accurate count of the total number of ids, and allocate a
1284 * staging buffer. Since ipcs_count is always sane, we don't have
1285 * to take ipcs_lock for our first guess. If there are no ids, or
1286 * we're in the global zone and the number of ids is greater than
1287 * the size of the specified buffer, we shunt to the end. Otherwise,
1288 * we go through the id list looking for (and counting) what is
1289 * visible in the specified zone.
1291 idcount
= service
->ipcs_count
;
1293 if ((global
&& idcount
> nids
) || idcount
== 0) {
1299 idsize
= idcount
* sizeof (int);
1300 ids
= kmem_alloc(idsize
, KM_SLEEP
);
1302 mutex_enter(&service
->ipcs_lock
);
1303 if (idcount
>= service
->ipcs_count
)
1305 idcount
= service
->ipcs_count
;
1306 mutex_exit(&service
->ipcs_lock
);
1309 kmem_free(ids
, idsize
);
1314 for (perm
= list_head(&service
->ipcs_usedids
); perm
!= NULL
;
1315 perm
= list_next(&service
->ipcs_usedids
, perm
)) {
1316 ASSERT(!IPC_FREE(perm
));
1317 if (global
|| perm
->ipc_zoneid
== zoneid
)
1318 ids
[numids
++] = perm
->ipc_id
;
1320 mutex_exit(&service
->ipcs_lock
);
1323 * If there isn't enough space to hold all of the ids, just
1324 * return the number of ids without copying out any of them.
1330 if (suword32(pnids
, (uint32_t)numids
) ||
1331 (nids
!= 0 && copyout(ids
, buf
, numids
* sizeof (int))))
1334 kmem_free(ids
, idsize
);
1339 * Destroy IPC objects from the given service that are associated with
1342 * We can't hold on to the service lock when freeing objects, so we
1343 * first search the service and move all the objects to a private
1344 * list, then walk through and free them after dropping the lock.
1347 ipc_remove_zone(ipc_service_t
*service
, zoneid_t zoneid
)
1349 kipc_perm_t
*perm
, *next
;
1353 list_create(&rmlist
, sizeof (kipc_perm_t
),
1354 offsetof(kipc_perm_t
, ipc_list
));
1356 mutex_enter(&service
->ipcs_lock
);
1357 for (perm
= list_head(&service
->ipcs_usedids
); perm
!= NULL
;
1359 next
= list_next(&service
->ipcs_usedids
, perm
);
1360 if (perm
->ipc_zoneid
!= zoneid
)
1364 * Remove the object from the service, then put it on
1365 * the removal list so we can defer the call to
1366 * ipc_rele (which will actually free the structure).
1367 * We need to do this since the destructor may grab
1370 ASSERT(!IPC_FREE(perm
));
1371 lock
= ipc_lock(service
, perm
->ipc_id
);
1372 ipc_remove(service
, perm
);
1374 list_insert_tail(&rmlist
, perm
);
1376 mutex_exit(&service
->ipcs_lock
);
1379 * Now that we've dropped the service lock, loop through the
1380 * private list freeing removed objects.
1382 for (perm
= list_head(&rmlist
); perm
!= NULL
; perm
= next
) {
1383 next
= list_next(&rmlist
, perm
);
1384 list_remove(&rmlist
, perm
);
1386 (void) ipc_lock(service
, perm
->ipc_id
);
1388 /* perform any per-service removal actions */
1389 service
->ipcs_rmid(perm
);
1391 /* release reference */
1392 ipc_rele(service
, perm
);
1395 list_destroy(&rmlist
);