4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
27 /* All Rights Reserved */
30 * Inter-Process Communication Semaphore Facility.
32 * See os/ipc.c for a description of common IPC functionality.
37 * Control: zone.max-sem-ids (rc_zone_semmni)
38 * Description: Maximum number of semaphore ids allowed a zone.
40 * When semget() is used to allocate a semaphore set, one id is
41 * allocated. If the id allocation doesn't succeed, semget() fails
42 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID)
43 * the id is deallocated.
45 * Control: project.max-sem-ids (rc_project_semmni)
46 * Description: Maximum number of semaphore ids allowed a project.
48 * When semget() is used to allocate a semaphore set, one id is
49 * allocated. If the id allocation doesn't succeed, semget() fails
50 * and errno is set to ENOSPC. Upon successful semctl(, IPC_RMID)
51 * the id is deallocated.
53 * Control: process.max-sem-nsems (rc_process_semmsl)
54 * Description: Maximum number of semaphores allowed per semaphore set.
56 * When semget() is used to allocate a semaphore set, the size of the
57 * set is compared with this limit. If the number of semaphores
58 * exceeds the limit, semget() fails and errno is set to EINVAL.
60 * Control: process.max-sem-ops (rc_process_semopm)
61 * Description: Maximum number of semaphore operations allowed per
64 * When semget() successfully allocates a semaphore set, the minimum
65 * enforced value of this limit is used to initialize the
66 * "system-imposed maximum" number of operations a semop() call for
67 * this set can perform.
72 * Removing the undo structure tunables involved a serious redesign of
73 * how they were implemented. There is now one undo structure for
74 * every process/semaphore array combination (lazily allocated, of
75 * course), and each is equal in size to the semaphore it corresponds
76 * to. To avoid scalability and performance problems, the undo
77 * structures are stored in two places: a per-process AVL tree sorted
78 * by ksemid pointer (p_semacct, protected by p_lock) and an unsorted
79 * per-semaphore linked list (sem_undos, protected by the semaphore's
80 * ID lock). The former is used by semop, where a lookup is performed
81 * once and cached if SEM_UNDO is specified for any of the operations,
82 * and at process exit where the undoable operations are rolled back.
83 * The latter is used when removing the semaphore, so the undo
84 * structures can be removed from the appropriate processes' trees.
86 * The undo structure itself contains pointers to the ksemid and proc
87 * to which it corresponds, a list node, an AVL node, and an array of
88 * adjust-on-exit (AOE) values. When an undo structure is allocated it
89 * is immediately added to both the process's tree and the semaphore's
90 * list. Lastly, the reference count on the semaphore is increased.
92 * Avoiding a lock ordering violation between p_lock and the ID lock,
93 * wont to occur when there is a race between a process exiting and the
94 * removal of a semaphore, mandates the delicate dance that exists
95 * between semexit and sem_rmid.
97 * sem_rmid, holding the ID lock, iterates through all undo structures
98 * and for each takes the appropriate process's p_lock and checks to
99 * see if p_semacct is NULL. If it is, it skips that undo structure
100 * and continues to the next. Otherwise, it removes the undo structure
101 * from both the AVL tree and the semaphore's list, and releases the
102 * hold that the undo structure had on the semaphore.
104 * The important other half of this is semexit, which will immediately
105 * take p_lock, obtain the AVL pointer, clear p_semacct, and drop
106 * p_lock. From this point on it is semexit's responsibility to clean
107 * up all undo structures found in the tree -- a coexecuting sem_rmid
108 * will see the NULL p_semacct and skip that undo structure. It walks
109 * the AVL tree (using avl_destroy_nodes) and for each undo structure
110 * takes the appropriate semaphore's ID lock (always legal since the
111 * undo structure has a hold on the semaphore), updates all semaphores
112 * with non-zero AOE values, and removes the structure from the
113 * semaphore's list. It then drops the structure's reference on the
114 * semaphore, drops the ID lock, and frees the undo structure.
117 #include <sys/types.h>
118 #include <sys/t_lock.h>
119 #include <sys/param.h>
120 #include <sys/systm.h>
121 #include <sys/sysmacros.h>
122 #include <sys/cred.h>
123 #include <sys/vmem.h>
124 #include <sys/kmem.h>
125 #include <sys/errno.h>
126 #include <sys/time.h>
128 #include <sys/ipc_impl.h>
130 #include <sys/sem_impl.h>
131 #include <sys/user.h>
132 #include <sys/proc.h>
133 #include <sys/cpuvar.h>
134 #include <sys/debug.h>
136 #include <sys/cmn_err.h>
137 #include <sys/modctl.h>
138 #include <sys/syscall.h>
140 #include <sys/list.h>
141 #include <sys/zone.h>
143 #include <c2/audit.h>
145 extern rctl_hndl_t rc_zone_semmni
;
146 extern rctl_hndl_t rc_project_semmni
;
147 extern rctl_hndl_t rc_process_semmsl
;
148 extern rctl_hndl_t rc_process_semopm
;
149 static ipc_service_t
*sem_svc
;
150 static zone_key_t sem_zone_key
;
153 * The following tunables are obsolete. Though for compatibility we
154 * still read and interpret seminfo_semmsl, seminfo_semopm and
155 * seminfo_semmni (see os/project.c and os/rctl_proc.c), the preferred
156 * mechanism for administrating the IPC Semaphore facility is through
157 * the resource controls described at the top of this file.
159 int seminfo_semaem
= 16384; /* (obsolete) */
160 int seminfo_semmap
= 10; /* (obsolete) */
161 int seminfo_semmni
= 10; /* (obsolete) */
162 int seminfo_semmns
= 60; /* (obsolete) */
163 int seminfo_semmnu
= 30; /* (obsolete) */
164 int seminfo_semmsl
= 25; /* (obsolete) */
165 int seminfo_semopm
= 10; /* (obsolete) */
166 int seminfo_semume
= 10; /* (obsolete) */
167 int seminfo_semusz
= 96; /* (obsolete) */
168 int seminfo_semvmx
= 32767; /* (obsolete) */
170 #define SEM_MAXUCOPS 4096 /* max # of unchecked ops per semop call */
171 #define SEM_UNDOSZ(n) (sizeof (struct sem_undo) + (n - 1) * sizeof (int))
173 static int semsys(int opcode
, uintptr_t a0
, uintptr_t a1
,
174 uintptr_t a2
, uintptr_t a3
);
175 static void sem_dtor(kipc_perm_t
*);
176 static void sem_rmid(kipc_perm_t
*);
177 static void sem_remove_zone(zoneid_t
, void *);
179 static struct sysent ipcsem_sysent
= {
181 SE_NOUNLOAD
| SE_ARGC
| SE_32RVAL1
,
186 * Module linkage information for the kernel.
188 static struct modlsys modlsys
= {
189 &mod_syscallops
, "System V semaphore facility", &ipcsem_sysent
192 #ifdef _SYSCALL32_IMPL
193 static struct modlsys modlsys32
= {
194 &mod_syscallops32
, "32-bit System V semaphore facility", &ipcsem_sysent
198 static struct modlinkage modlinkage
= {
201 #ifdef _SYSCALL32_IMPL
213 sem_svc
= ipcs_create("semids", rc_project_semmni
, rc_zone_semmni
,
214 sizeof (ksemid_t
), sem_dtor
, sem_rmid
, AT_IPC_SEM
,
215 offsetof(ipc_rqty_t
, ipcq_semmni
));
216 zone_key_create(&sem_zone_key
, NULL
, sem_remove_zone
, NULL
);
218 if ((result
= mod_install(&modlinkage
)) == 0)
221 (void) zone_key_delete(sem_zone_key
);
222 ipcs_destroy(sem_svc
);
234 _info(struct modinfo
*modinfop
)
236 return (mod_info(&modlinkage
, modinfop
));
240 sem_dtor(kipc_perm_t
*perm
)
242 ksemid_t
*sp
= (ksemid_t
*)perm
;
244 kmem_free(sp
->sem_base
,
245 P2ROUNDUP(sp
->sem_nsems
* sizeof (struct sem
), 64));
246 list_destroy(&sp
->sem_undos
);
250 * sem_undo_add - Create or update adjust on exit entry.
253 sem_undo_add(short val
, ushort_t num
, struct sem_undo
*undo
)
255 int newval
= undo
->un_aoe
[num
] - val
;
257 if (newval
> USHRT_MAX
|| newval
< -USHRT_MAX
)
259 undo
->un_aoe
[num
] = newval
;
265 * sem_undo_clear - clears all undo entries for specified semaphores
267 * Used when semaphores are reset by SETVAL or SETALL.
270 sem_undo_clear(ksemid_t
*sp
, ushort_t low
, ushort_t high
)
272 struct sem_undo
*undo
;
276 ASSERT(high
< sp
->sem_nsems
);
278 for (undo
= list_head(&sp
->sem_undos
); undo
;
279 undo
= list_next(&sp
->sem_undos
, undo
))
280 for (i
= low
; i
<= high
; i
++)
285 * sem_rollback - roll back work done so far if unable to complete operation
288 sem_rollback(ksemid_t
*sp
, struct sembuf
*op
, int n
, struct sem_undo
*undo
)
290 struct sem
*semp
; /* semaphore ptr */
292 for (op
+= n
- 1; n
--; op
--) {
295 semp
= &sp
->sem_base
[op
->sem_num
];
296 semp
->semval
-= op
->sem_op
;
297 if (op
->sem_flg
& SEM_UNDO
) {
298 ASSERT(undo
!= NULL
);
299 (void) sem_undo_add(-op
->sem_op
, op
->sem_num
, undo
);
305 sem_rmid(kipc_perm_t
*perm
)
307 ksemid_t
*sp
= (ksemid_t
*)perm
;
309 struct sem_undo
*undo
;
310 size_t size
= SEM_UNDOSZ(sp
->sem_nsems
);
314 while (undo
= list_head(&sp
->sem_undos
)) {
315 list_remove(&sp
->sem_undos
, undo
);
316 mutex_enter(&undo
->un_proc
->p_lock
);
317 if (undo
->un_proc
->p_semacct
== NULL
) {
318 mutex_exit(&undo
->un_proc
->p_lock
);
321 avl_remove(undo
->un_proc
->p_semacct
, undo
);
322 mutex_exit(&undo
->un_proc
->p_lock
);
323 kmem_free(undo
, size
);
324 ipc_rele_locked(sem_svc
, (kipc_perm_t
*)sp
);
327 for (i
= 0; i
< sp
->sem_nsems
; i
++) {
328 semp
= &sp
->sem_base
[i
];
329 semp
->semval
= semp
->sempid
= 0;
331 cv_broadcast(&semp
->semncnt_cv
);
335 cv_broadcast(&semp
->semzcnt_cv
);
342 * semctl - Semctl system call.
345 semctl(int semid
, uint_t semnum
, int cmd
, uintptr_t arg
)
347 ksemid_t
*sp
; /* ptr to semaphore header */
348 struct sem
*p
; /* ptr to semaphore */
349 unsigned int i
; /* loop control */
356 model_t mdl
= get_udatamodel();
357 STRUCT_DECL(semid_ds
, sid
);
358 struct semid_ds64 ds64
;
360 STRUCT_INIT(sid
, mdl
);
364 * Perform pre- or non-lookup actions (e.g. copyins, RMID).
368 if (copyin((void *)arg
, STRUCT_BUF(sid
), STRUCT_SIZE(sid
)))
369 return (set_errno(EFAULT
));
373 if (copyin((void *)arg
, &ds64
, sizeof (struct semid_ds64
)))
374 return (set_errno(EFAULT
));
378 if ((lock
= ipc_lookup(sem_svc
, semid
,
379 (kipc_perm_t
**)&sp
)) == NULL
)
380 return (set_errno(EINVAL
));
381 vsize
= sp
->sem_nsems
* sizeof (*vals
);
384 /* allocate space to hold all semaphore values */
385 vals
= kmem_alloc(vsize
, KM_SLEEP
);
387 if (copyin((void *)arg
, vals
, vsize
)) {
388 kmem_free(vals
, vsize
);
389 return (set_errno(EFAULT
));
394 if (error
= ipc_rmid(sem_svc
, semid
, cr
))
395 return (set_errno(error
));
399 if ((lock
= ipc_lookup(sem_svc
, semid
, (kipc_perm_t
**)&sp
)) == NULL
) {
401 kmem_free(vals
, vsize
);
402 return (set_errno(EINVAL
));
405 /* Set ownership and permissions. */
408 if (error
= ipcperm_set(sem_svc
, cr
, &sp
->sem_perm
,
409 &STRUCT_BUF(sid
)->sem_perm
, mdl
)) {
411 return (set_errno(error
));
413 sp
->sem_ctime
= gethrestime_sec();
417 /* Get semaphore data structure. */
420 if (error
= ipcperm_access(&sp
->sem_perm
, SEM_R
, cr
)) {
422 return (set_errno(error
));
425 ipcperm_stat(&STRUCT_BUF(sid
)->sem_perm
, &sp
->sem_perm
, mdl
);
426 STRUCT_FSETP(sid
, sem_base
, NULL
); /* kernel addr */
427 STRUCT_FSET(sid
, sem_nsems
, sp
->sem_nsems
);
428 STRUCT_FSET(sid
, sem_otime
, sp
->sem_otime
);
429 STRUCT_FSET(sid
, sem_ctime
, sp
->sem_ctime
);
430 STRUCT_FSET(sid
, sem_binary
, sp
->sem_binary
);
433 if (copyout(STRUCT_BUF(sid
), (void *)arg
, STRUCT_SIZE(sid
)))
434 return (set_errno(EFAULT
));
439 if (error
= ipcperm_set64(sem_svc
, cr
, &sp
->sem_perm
,
442 return (set_errno(error
));
444 sp
->sem_ctime
= gethrestime_sec();
450 ipcperm_stat64(&ds64
.semx_perm
, &sp
->sem_perm
);
451 ds64
.semx_nsems
= sp
->sem_nsems
;
452 ds64
.semx_otime
= sp
->sem_otime
;
453 ds64
.semx_ctime
= sp
->sem_ctime
;
456 if (copyout(&ds64
, (void *)arg
, sizeof (struct semid_ds64
)))
457 return (set_errno(EFAULT
));
461 /* Get # of processes sleeping for greater semval. */
463 if (error
= ipcperm_access(&sp
->sem_perm
, SEM_R
, cr
)) {
465 return (set_errno(error
));
467 if (semnum
>= sp
->sem_nsems
) {
469 return (set_errno(EINVAL
));
471 retval
= sp
->sem_base
[semnum
].semncnt
;
475 /* Get pid of last process to operate on semaphore. */
477 if (error
= ipcperm_access(&sp
->sem_perm
, SEM_R
, cr
)) {
479 return (set_errno(error
));
481 if (semnum
>= sp
->sem_nsems
) {
483 return (set_errno(EINVAL
));
485 retval
= sp
->sem_base
[semnum
].sempid
;
489 /* Get semval of one semaphore. */
491 if (error
= ipcperm_access(&sp
->sem_perm
, SEM_R
, cr
)) {
493 return (set_errno(error
));
495 if (semnum
>= sp
->sem_nsems
) {
497 return (set_errno(EINVAL
));
499 retval
= sp
->sem_base
[semnum
].semval
;
503 /* Get all semvals in set. */
505 if (error
= ipcperm_access(&sp
->sem_perm
, SEM_R
, cr
)) {
507 return (set_errno(error
));
510 /* allocate space to hold all semaphore values */
511 vsize
= sp
->sem_nsems
* sizeof (*vals
);
512 vals
= vp
= kmem_alloc(vsize
, KM_SLEEP
);
514 for (i
= sp
->sem_nsems
, p
= sp
->sem_base
; i
--; p
++, vp
++)
515 bcopy(&p
->semval
, vp
, sizeof (p
->semval
));
519 if (copyout((void *)vals
, (void *)arg
, vsize
)) {
520 kmem_free(vals
, vsize
);
521 return (set_errno(EFAULT
));
524 kmem_free(vals
, vsize
);
527 /* Get # of processes sleeping for semval to become zero. */
529 if (error
= ipcperm_access(&sp
->sem_perm
, SEM_R
, cr
)) {
531 return (set_errno(error
));
533 if (semnum
>= sp
->sem_nsems
) {
535 return (set_errno(EINVAL
));
537 retval
= sp
->sem_base
[semnum
].semzcnt
;
541 /* Set semval of one semaphore. */
543 if (error
= ipcperm_access(&sp
->sem_perm
, SEM_A
, cr
)) {
545 return (set_errno(error
));
547 if (semnum
>= sp
->sem_nsems
) {
549 return (set_errno(EINVAL
));
551 if ((uint_t
)arg
> USHRT_MAX
) {
553 return (set_errno(ERANGE
));
555 p
= &sp
->sem_base
[semnum
];
556 if ((p
->semval
= (ushort_t
)arg
) != 0) {
558 cv_broadcast(&p
->semncnt_cv
);
560 } else if (p
->semzcnt
) {
561 cv_broadcast(&p
->semzcnt_cv
);
563 p
->sempid
= curproc
->p_pid
;
564 sem_undo_clear(sp
, (ushort_t
)semnum
, (ushort_t
)semnum
);
568 /* Set semvals of all semaphores in set. */
570 /* Check if semaphore set has been deleted and reallocated. */
571 if (sp
->sem_nsems
* sizeof (*vals
) != vsize
) {
572 error
= set_errno(EINVAL
);
575 if (error
= ipcperm_access(&sp
->sem_perm
, SEM_A
, cr
)) {
576 error
= set_errno(error
);
579 sem_undo_clear(sp
, 0, sp
->sem_nsems
- 1);
580 for (i
= 0, p
= sp
->sem_base
; i
< sp
->sem_nsems
;
581 (p
++)->sempid
= curproc
->p_pid
) {
582 if ((p
->semval
= vals
[i
++]) != 0) {
584 cv_broadcast(&p
->semncnt_cv
);
586 } else if (p
->semzcnt
) {
587 cv_broadcast(&p
->semzcnt_cv
);
592 kmem_free(vals
, vsize
);
597 return (set_errno(EINVAL
));
604 * semexit - Called by exit() to clean up on process exit.
610 struct sem_undo
*undo
;
613 mutex_enter(&pp
->p_lock
);
614 tree
= pp
->p_semacct
;
615 pp
->p_semacct
= NULL
;
616 mutex_exit(&pp
->p_lock
);
618 while (undo
= avl_destroy_nodes(tree
, &cookie
)) {
619 ksemid_t
*sp
= undo
->un_sp
;
620 size_t size
= SEM_UNDOSZ(sp
->sem_nsems
);
623 (void) ipc_lock(sem_svc
, sp
->sem_perm
.ipc_id
);
624 if (!IPC_FREE(&sp
->sem_perm
)) {
625 for (i
= 0; i
< sp
->sem_nsems
; i
++) {
626 int adj
= undo
->un_aoe
[i
];
628 struct sem
*semp
= &sp
->sem_base
[i
];
629 int v
= (int)semp
->semval
+ adj
;
631 if (v
< 0 || v
> USHRT_MAX
)
633 semp
->semval
= (ushort_t
)v
;
634 if (v
== 0 && semp
->semzcnt
)
635 cv_broadcast(&semp
->semzcnt_cv
);
636 if (adj
> 0 && semp
->semncnt
)
637 cv_broadcast(&semp
->semncnt_cv
);
640 list_remove(&sp
->sem_undos
, undo
);
642 ipc_rele(sem_svc
, (kipc_perm_t
*)sp
);
643 kmem_free(undo
, size
);
647 kmem_free(tree
, sizeof (avl_tree_t
));
651 * Remove all semaphores associated with a given zone. Called by
652 * zone_shutdown when the zone is halted.
656 sem_remove_zone(zoneid_t zoneid
, void *arg
)
658 ipc_remove_zone(sem_svc
, zoneid
);
662 * semget - Semget system call.
665 semget(key_t key
, int nsems
, int semflg
)
670 proc_t
*pp
= curproc
;
673 if (error
= ipc_get(sem_svc
, key
, semflg
, (kipc_perm_t
**)&sp
, &lock
))
674 return (set_errno(error
));
676 if (!IPC_FREE(&sp
->sem_perm
)) {
678 * A semaphore with the requested key exists.
680 if (!((nsems
>= 0) && (nsems
<= sp
->sem_nsems
))) {
682 return (set_errno(EINVAL
));
686 * This is a new semaphore set. Finish initialization.
688 if (nsems
<= 0 || (rctl_test(rc_process_semmsl
, pp
->p_rctls
, pp
,
689 nsems
, RCA_SAFE
) & RCT_DENY
)) {
691 mutex_exit(&pp
->p_lock
);
692 ipc_cleanup(sem_svc
, (kipc_perm_t
*)sp
);
693 return (set_errno(EINVAL
));
696 mutex_exit(&pp
->p_lock
);
699 * We round the allocation up to coherency granularity
700 * so that multiple semaphore allocations won't result
701 * in the false sharing of their sem structures.
704 kmem_zalloc(P2ROUNDUP(nsems
* sizeof (struct sem
), 64),
706 sp
->sem_binary
= (nsems
== 1);
707 sp
->sem_nsems
= (ushort_t
)nsems
;
708 sp
->sem_ctime
= gethrestime_sec();
710 list_create(&sp
->sem_undos
, sizeof (struct sem_undo
),
711 offsetof(struct sem_undo
, un_list
));
713 if (error
= ipc_commit_begin(sem_svc
, key
, semflg
,
714 (kipc_perm_t
*)sp
)) {
717 return (set_errno(error
));
720 rctl_enforced_value(rc_process_semopm
, pp
->p_rctls
, pp
);
721 if (rctl_test(rc_process_semmsl
, pp
->p_rctls
, pp
, nsems
,
722 RCA_SAFE
) & RCT_DENY
) {
723 ipc_cleanup(sem_svc
, (kipc_perm_t
*)sp
);
724 return (set_errno(EINVAL
));
726 lock
= ipc_commit_end(sem_svc
, &sp
->sem_perm
);
730 audit_ipcget(AT_IPC_SEM
, (void *)sp
);
732 id
= sp
->sem_perm
.ipc_id
;
738 * semids system call.
741 semids(int *buf
, uint_t nids
, uint_t
*pnids
)
745 if (error
= ipc_ids(sem_svc
, buf
, nids
, pnids
))
746 return (set_errno(error
));
753 * Helper function for semop - copies in the provided timespec and
754 * computes the absolute future time after which we must return.
757 compute_timeout(timespec_t
**tsp
, timespec_t
*ts
, timespec_t
*now
,
760 model_t datamodel
= get_udatamodel();
762 if (datamodel
== DATAMODEL_NATIVE
) {
763 if (copyin(timeout
, ts
, sizeof (timespec_t
)))
768 if (copyin(timeout
, &ts32
, sizeof (timespec32_t
)))
770 TIMESPEC32_TO_TIMESPEC(ts
, &ts32
)
773 if (itimerspecfix(ts
))
777 * Convert the timespec value into absolute time.
779 timespecadd(ts
, now
);
786 * Undo structure comparator. We sort based on ksemid_t pointer.
789 sem_undo_compar(const void *x
, const void *y
)
791 struct sem_undo
*undo1
= (struct sem_undo
*)x
;
792 struct sem_undo
*undo2
= (struct sem_undo
*)y
;
794 if (undo1
->un_sp
< undo2
->un_sp
)
796 if (undo1
->un_sp
> undo2
->un_sp
)
802 * Helper function for semop - creates an undo structure and adds it to
803 * the process's avl tree and the semaphore's list.
806 sem_undo_alloc(proc_t
*pp
, ksemid_t
*sp
, kmutex_t
**lock
,
807 struct sem_undo
*template, struct sem_undo
**un
)
810 struct sem_undo
*undo
;
811 avl_tree_t
*tree
= NULL
;
816 size
= SEM_UNDOSZ(sp
->sem_nsems
);
817 undo
= kmem_zalloc(size
, KM_SLEEP
);
821 if (pp
->p_semacct
== NULL
)
822 tree
= kmem_alloc(sizeof (avl_tree_t
), KM_SLEEP
);
824 *lock
= ipc_lock(sem_svc
, sp
->sem_perm
.ipc_id
);
825 if (IPC_FREE(&sp
->sem_perm
)) {
826 kmem_free(undo
, size
);
828 kmem_free(tree
, sizeof (avl_tree_t
));
832 mutex_enter(&pp
->p_lock
);
834 if (pp
->p_semacct
== NULL
) {
835 avl_create(tree
, sem_undo_compar
,
836 sizeof (struct sem_undo
),
837 offsetof(struct sem_undo
, un_avl
));
838 pp
->p_semacct
= tree
;
840 kmem_free(tree
, sizeof (avl_tree_t
));
844 if (*un
= avl_find(pp
->p_semacct
, template, &where
)) {
845 mutex_exit(&pp
->p_lock
);
846 kmem_free(undo
, size
);
849 avl_insert(pp
->p_semacct
, undo
, where
);
850 mutex_exit(&pp
->p_lock
);
851 list_insert_head(&sp
->sem_undos
, undo
);
852 ipc_hold(sem_svc
, (kipc_perm_t
*)sp
);
860 * semop - Semop system call.
863 semop(int semid
, struct sembuf
*sops
, size_t nsops
, timespec_t
*timeout
)
867 struct sembuf
*op
; /* ptr to operation */
868 int i
; /* loop control */
869 struct sem
*semp
; /* ptr to semaphore */
871 struct sembuf
*uops
; /* ptr to copy of user ops */
872 struct sembuf x_sem
; /* avoid kmem_alloc's */
873 timespec_t now
, ts
, *tsp
= NULL
;
875 int cvres
, needundo
, mode
;
876 struct sem_undo
*undo
;
877 proc_t
*pp
= curproc
;
880 CPU_STATS_ADDQ(CPU
, sys
, sema
, 1); /* bump semaphore op count */
883 * To avoid the cost of copying in 'timeout' in the common
884 * case, we could only grab the time here and defer the copyin
885 * and associated computations until we are about to block.
887 * The down side to this is that we would then have to spin
888 * some goto top nonsense to avoid the copyin behind the semid
889 * lock. As a common use of timed semaphores is as an explicit
890 * blocking mechanism, this could incur a greater penalty.
892 * If we eventually decide that this would be a wise route to
893 * take, the deferrable functionality is completely contained
894 * in 'compute_timeout', and the interface is defined such that
895 * we can legally not validate 'timeout' if it is unused.
897 if (timeout
!= NULL
) {
898 timecheck
= timechanged
;
900 if (error
= compute_timeout(&tsp
, &ts
, &now
, timeout
))
901 return (set_errno(error
));
905 * Allocate space to hold the vector of semaphore ops. If
906 * there is only 1 operation we use a preallocated buffer on
907 * the stack for speed.
909 * Since we don't want to allow the user to allocate an
910 * arbitrary amount of kernel memory, we need to check against
911 * the number of operations allowed by the semaphore. We only
912 * bother doing this if the number of operations is larger than
919 else if (nsops
<= SEM_MAXUCOPS
)
920 uops
= kmem_alloc(nsops
* sizeof (*uops
), KM_SLEEP
);
922 if (nsops
> SEM_MAXUCOPS
) {
923 if ((lock
= ipc_lookup(sem_svc
, semid
,
924 (kipc_perm_t
**)&sp
)) == NULL
)
925 return (set_errno(EFAULT
));
927 if (nsops
> sp
->sem_maxops
) {
929 return (set_errno(E2BIG
));
932 ipc_hold(sem_svc
, (kipc_perm_t
*)sp
);
935 uops
= kmem_alloc(nsops
* sizeof (*uops
), KM_SLEEP
);
936 if (copyin(sops
, uops
, nsops
* sizeof (*op
))) {
938 (void) ipc_lock(sem_svc
, sp
->sem_perm
.ipc_id
);
942 lock
= ipc_lock(sem_svc
, sp
->sem_perm
.ipc_id
);
943 if (IPC_FREE(&sp
->sem_perm
)) {
949 * This could be interleaved with the above code, but
950 * keeping them separate improves readability.
952 if (copyin(sops
, uops
, nsops
* sizeof (*op
))) {
954 goto semoperr_unlocked
;
957 if ((lock
= ipc_lookup(sem_svc
, semid
,
958 (kipc_perm_t
**)&sp
)) == NULL
) {
960 goto semoperr_unlocked
;
963 if (nsops
> sp
->sem_maxops
) {
970 * Scan all operations. Verify that sem #s are in range and
971 * this process is allowed the requested operations. If any
972 * operations are marked SEM_UNDO, find (or allocate) the undo
973 * structure for this process and semaphore.
977 for (i
= 0, op
= uops
; i
++ < nsops
; op
++) {
978 mode
|= op
->sem_op
? SEM_A
: SEM_R
;
979 if (op
->sem_num
>= sp
->sem_nsems
) {
983 if ((op
->sem_flg
& SEM_UNDO
) && op
->sem_op
)
986 if (error
= ipcperm_access(&sp
->sem_perm
, mode
, CRED()))
990 struct sem_undo
template;
993 mutex_enter(&pp
->p_lock
);
995 undo
= avl_find(pp
->p_semacct
, &template, NULL
);
998 mutex_exit(&pp
->p_lock
);
1002 ipc_hold(sem_svc
, (kipc_perm_t
*)sp
);
1004 if (error
= sem_undo_alloc(pp
, sp
, &lock
, &template,
1008 /* sem_undo_alloc unlocks the semaphore */
1009 if (error
= ipcperm_access(&sp
->sem_perm
, mode
, CRED()))
1016 * Loop waiting for the operations to be satisfied atomically.
1017 * Actually, do the operations and undo them if a wait is needed
1018 * or an error is detected.
1020 for (i
= 0; i
< nsops
; i
++) {
1022 semp
= &sp
->sem_base
[op
->sem_num
];
1025 * Raise the semaphore (i.e. sema_v)
1027 if (op
->sem_op
> 0) {
1028 if (op
->sem_op
+ (int)semp
->semval
> USHRT_MAX
||
1029 ((op
->sem_flg
& SEM_UNDO
) &&
1030 (error
= sem_undo_add(op
->sem_op
, op
->sem_num
,
1033 sem_rollback(sp
, uops
, i
, undo
);
1038 semp
->semval
+= op
->sem_op
;
1040 * If we are only incrementing the semaphore value
1041 * by one on a binary semaphore, we can cv_signal.
1043 if (semp
->semncnt
) {
1044 if (op
->sem_op
== 1 && sp
->sem_binary
)
1045 cv_signal(&semp
->semncnt_cv
);
1047 cv_broadcast(&semp
->semncnt_cv
);
1049 if (semp
->semzcnt
&& !semp
->semval
)
1050 cv_broadcast(&semp
->semzcnt_cv
);
1055 * Lower the semaphore (i.e. sema_p)
1057 if (op
->sem_op
< 0) {
1058 if (semp
->semval
>= (unsigned)(-op
->sem_op
)) {
1059 if ((op
->sem_flg
& SEM_UNDO
) &&
1060 (error
= sem_undo_add(op
->sem_op
,
1061 op
->sem_num
, undo
))) {
1063 sem_rollback(sp
, uops
, i
, undo
);
1066 semp
->semval
+= op
->sem_op
;
1067 if (semp
->semzcnt
&& !semp
->semval
)
1068 cv_broadcast(&semp
->semzcnt_cv
);
1072 sem_rollback(sp
, uops
, i
, undo
);
1073 if (op
->sem_flg
& IPC_NOWAIT
) {
1079 * Mark the semaphore set as not a binary type
1080 * if we are decrementing the value by more than 1.
1082 * V operations will resort to cv_broadcast
1083 * for this set because there are too many weird
1084 * cases that have to be caught.
1086 if (op
->sem_op
< -1)
1090 ipc_hold(sem_svc
, (kipc_perm_t
*)sp
);
1093 cvres
= cv_waituntil_sig(&semp
->semncnt_cv
, lock
,
1095 lock
= ipc_relock(sem_svc
, sp
->sem_perm
.ipc_id
, lock
);
1097 if (!IPC_FREE(&sp
->sem_perm
)) {
1098 ASSERT(semp
->semncnt
!= 0);
1100 if (cvres
> 0) /* normal wakeup */
1104 /* EINTR or EAGAIN overrides EIDRM */
1115 * Wait for zero value
1119 sem_rollback(sp
, uops
, i
, undo
);
1120 if (op
->sem_flg
& IPC_NOWAIT
) {
1127 ipc_hold(sem_svc
, (kipc_perm_t
*)sp
);
1130 cvres
= cv_waituntil_sig(&semp
->semzcnt_cv
, lock
,
1132 lock
= ipc_relock(sem_svc
, sp
->sem_perm
.ipc_id
, lock
);
1135 * Don't touch semp if the semaphores have been removed.
1137 if (!IPC_FREE(&sp
->sem_perm
)) {
1138 ASSERT(semp
->semzcnt
!= 0);
1140 if (cvres
> 0) /* normal wakeup */
1144 /* EINTR or EAGAIN overrides EIDRM */
1155 /* All operations succeeded. Update sempid for accessed semaphores. */
1156 for (i
= 0, op
= uops
; i
++ < nsops
;
1157 sp
->sem_base
[(op
++)->sem_num
].sempid
= pp
->p_pid
)
1159 sp
->sem_otime
= gethrestime_sec();
1161 ipc_rele(sem_svc
, (kipc_perm_t
*)sp
);
1165 /* Before leaving, deallocate the buffer that held the user semops */
1167 kmem_free(uops
, sizeof (*uops
) * nsops
);
1171 * Error return labels
1175 ipc_rele(sem_svc
, (kipc_perm_t
*)sp
);
1181 /* Before leaving, deallocate the buffer that held the user semops */
1183 kmem_free(uops
, sizeof (*uops
) * nsops
);
1184 return (set_errno(error
));
1188 * semsys - System entry point for semctl, semget, and semop system calls.
1191 semsys(int opcode
, uintptr_t a1
, uintptr_t a2
, uintptr_t a3
, uintptr_t a4
)
1197 error
= semctl((int)a1
, (uint_t
)a2
, (int)a3
, a4
);
1200 error
= semget((key_t
)a1
, (int)a2
, (int)a3
);
1203 error
= semop((int)a1
, (struct sembuf
*)a2
, (size_t)a3
, 0);
1206 error
= semids((int *)a1
, (uint_t
)a2
, (uint_t
*)a3
);
1209 error
= semop((int)a1
, (struct sembuf
*)a2
, (size_t)a3
,
1213 error
= set_errno(EINVAL
);