3 * Copyright (C) 1992 Krishna Balasubramanian
4 * Copyright (C) 1995 Eric Schenk, Bruno Haible
6 * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995):
7 * This code underwent a massive rewrite in order to solve some problems
8 * with the original code. In particular the original code failed to
9 * wake up processes that were waiting for semval to go to 0 if the
10 * value went to 0 and was then incremented rapidly enough. In solving
11 * this problem I have also modified the implementation so that it
12 * processes pending operations in a FIFO manner, thus give a guarantee
13 * that processes waiting for a lock on the semaphore won't starve
14 * unless another locking process fails to unlock.
15 * In addition the following two changes in behavior have been introduced:
16 * - The original implementation of semop returned the value
17 * last semaphore element examined on success. This does not
18 * match the manual page specifications, and effectively
19 * allows the user to read the semaphore even if they do not
20 * have read permissions. The implementation now returns 0
21 * on success as stated in the manual page.
22 * - There is some confusion over whether the set of undo adjustments
23 * to be performed at exit should be done in an atomic manner.
24 * That is, if we are attempting to decrement the semval should we queue
25 * up and wait until we can do so legally?
26 * The original implementation attempted to do this.
27 * The current implementation does not do so. This is because I don't
28 * think it is the right thing (TM) to do, and because I couldn't
29 * see a clean way to get the old behavior with the new design.
30 * The POSIX standard and SVID should be consulted to determine
31 * what behavior is mandated.
33 * Further notes on refinement (Christoph Rohland, December 1998):
34 * - The POSIX standard says, that the undo adjustments simply should
35 * redo. So the current implementation is o.K.
36 * - The previous code had two flaws:
37 * 1) It actively gave the semaphore to the next waiting process
38 * sleeping on the semaphore. Since this process did not have the
39 * cpu this led to many unnecessary context switches and bad
40 * performance. Now we only check which process should be able to
41 * get the semaphore and if this process wants to reduce some
42 * semaphore value we simply wake it up without doing the
43 * operation. So it has to try to get it later. Thus e.g. the
44 * running process may reaquire the semaphore during the current
45 * time slice. If it only waits for zero or increases the semaphore,
46 * we do the operation in advance and wake it up.
47 * 2) It did not wake up all zero waiting processes. We try to do
48 * better but only get the semops right which only wait for zero or
49 * increase. If there are decrement operations in the operations
50 * array we do the same as before.
53 #include <linux/malloc.h>
54 #include <linux/smp_lock.h>
55 #include <linux/init.h>
57 #include <asm/uaccess.h>
59 extern int ipcperms (struct ipc_perm
*ipcp
, short semflg
);
60 static int newary (key_t
, int, int);
61 static int findkey (key_t key
);
62 static void freeary (int id
);
64 static struct semid_ds
*semary
[SEMMNI
];
65 static int used_sems
= 0, used_semids
= 0;
66 static struct wait_queue
*sem_lock
= NULL
;
67 static int max_semid
= 0;
69 static unsigned short sem_seq
= 0;
71 void __init
sem_init (void)
76 used_sems
= used_semids
= max_semid
= sem_seq
= 0;
77 for (i
= 0; i
< SEMMNI
; i
++)
78 semary
[i
] = (struct semid_ds
*) IPC_UNUSED
;
82 static int findkey (key_t key
)
87 for (id
= 0; id
<= max_semid
; id
++) {
88 while ((sma
= semary
[id
]) == IPC_NOID
)
89 interruptible_sleep_on (&sem_lock
);
90 if (sma
== IPC_UNUSED
)
92 if (key
== sma
->sem_perm
.key
)
98 static int newary (key_t key
, int nsems
, int semflg
)
101 struct semid_ds
*sma
;
102 struct ipc_perm
*ipcp
;
107 if (used_sems
+ nsems
> SEMMNS
)
109 for (id
= 0; id
< SEMMNI
; id
++)
110 if (semary
[id
] == IPC_UNUSED
) {
111 semary
[id
] = (struct semid_ds
*) IPC_NOID
;
116 size
= sizeof (*sma
) + nsems
* sizeof (struct sem
);
118 sma
= (struct semid_ds
*) kmalloc (size
, GFP_KERNEL
);
120 semary
[id
] = (struct semid_ds
*) IPC_UNUSED
;
125 memset (sma
, 0, size
);
126 sma
->sem_base
= (struct sem
*) &sma
[1];
127 ipcp
= &sma
->sem_perm
;
128 ipcp
->mode
= (semflg
& S_IRWXUGO
);
130 ipcp
->cuid
= ipcp
->uid
= current
->euid
;
131 ipcp
->gid
= ipcp
->cgid
= current
->egid
;
132 sma
->sem_perm
.seq
= sem_seq
;
133 /* sma->sem_pending = NULL; */
134 sma
->sem_pending_last
= &sma
->sem_pending
;
135 /* sma->undo = NULL; */
136 sma
->sem_nsems
= nsems
;
137 sma
->sem_ctime
= CURRENT_TIME
;
143 return (unsigned int) sma
->sem_perm
.seq
* SEMMNI
+ id
;
146 asmlinkage
int sys_semget (key_t key
, int nsems
, int semflg
)
148 int id
, err
= -EINVAL
;
149 struct semid_ds
*sma
;
152 if (nsems
< 0 || nsems
> SEMMSL
)
154 if (key
== IPC_PRIVATE
) {
155 err
= newary(key
, nsems
, semflg
);
156 } else if ((id
= findkey (key
)) == -1) { /* key not used */
157 if (!(semflg
& IPC_CREAT
))
160 err
= newary(key
, nsems
, semflg
);
161 } else if (semflg
& IPC_CREAT
&& semflg
& IPC_EXCL
) {
165 if (nsems
> sma
->sem_nsems
)
167 else if (ipcperms(&sma
->sem_perm
, semflg
))
170 err
= (int) sma
->sem_perm
.seq
* SEMMNI
+ id
;
177 /* Manage the doubly linked list sma->sem_pending as a FIFO:
178 * insert new queue elements at the tail sma->sem_pending_last.
180 static inline void append_to_queue (struct semid_ds
* sma
,
181 struct sem_queue
* q
)
183 *(q
->prev
= sma
->sem_pending_last
) = q
;
184 *(sma
->sem_pending_last
= &q
->next
) = NULL
;
187 static inline void prepend_to_queue (struct semid_ds
* sma
,
188 struct sem_queue
* q
)
190 q
->next
= sma
->sem_pending
;
191 *(q
->prev
= &sma
->sem_pending
) = q
;
193 q
->next
->prev
= &q
->next
;
194 else /* sma->sem_pending_last == &sma->sem_pending */
195 sma
->sem_pending_last
= &q
->next
;
198 static inline void remove_from_queue (struct semid_ds
* sma
,
199 struct sem_queue
* q
)
201 *(q
->prev
) = q
->next
;
203 q
->next
->prev
= q
->prev
;
204 else /* sma->sem_pending_last == &q->next */
205 sma
->sem_pending_last
= q
->prev
;
206 q
->prev
= NULL
; /* mark as removed */
210 * Determine whether a sequence of semaphore operations would succeed
211 * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
214 static int try_atomic_semop (struct semid_ds
* sma
, struct sembuf
* sops
,
215 int nsops
, struct sem_undo
*un
, int pid
,
222 for (sop
= sops
; sop
< sops
+ nsops
; sop
++) {
223 curr
= sma
->sem_base
+ sop
->sem_num
;
224 sem_op
= sop
->sem_op
;
226 if (!sem_op
&& curr
->semval
)
229 curr
->sempid
= (curr
->sempid
<< 16) | pid
;
230 curr
->semval
+= sem_op
;
231 if (sop
->sem_flg
& SEM_UNDO
)
232 un
->semadj
[sop
->sem_num
] -= sem_op
;
234 if (curr
->semval
< 0)
236 if (curr
->semval
> SEMVMX
)
247 sma
->sem_otime
= CURRENT_TIME
;
255 if (sop
->sem_flg
& IPC_NOWAIT
)
261 while (sop
>= sops
) {
262 curr
= sma
->sem_base
+ sop
->sem_num
;
263 curr
->semval
-= sop
->sem_op
;
266 if (sop
->sem_flg
& SEM_UNDO
)
267 un
->semadj
[sop
->sem_num
] += sop
->sem_op
;
274 /* Go through the pending queue for the indicated semaphore
275 * looking for tasks that can be completed.
277 static void update_queue (struct semid_ds
* sma
)
280 struct sem_queue
* q
;
282 for (q
= sma
->sem_pending
; q
; q
= q
->next
) {
285 return; /* wait for other process */
287 error
= try_atomic_semop(sma
, q
->sops
, q
->nsops
,
288 q
->undo
, q
->pid
, q
->alter
);
290 /* Does q->sleeper still need to sleep? */
292 /* Found one, wake it up */
293 wake_up_interruptible(&q
->sleeper
);
294 if (error
== 0 && q
->alter
) {
295 /* if q-> alter let it self try */
300 remove_from_queue(sma
,q
);
305 /* The following counts are associated to each semaphore:
306 * semncnt number of tasks waiting on semval being nonzero
307 * semzcnt number of tasks waiting on semval being zero
308 * This model assumes that a task waits on exactly one semaphore.
309 * Since semaphore operations are to be performed atomically, tasks actually
310 * wait on a whole sequence of semaphores simultaneously.
311 * The counts we return here are a rough approximation, but still
312 * warrant that semncnt+semzcnt>0 if the task is on the pending queue.
314 static int count_semncnt (struct semid_ds
* sma
, ushort semnum
)
317 struct sem_queue
* q
;
320 for (q
= sma
->sem_pending
; q
; q
= q
->next
) {
321 struct sembuf
* sops
= q
->sops
;
322 int nsops
= q
->nsops
;
324 for (i
= 0; i
< nsops
; i
++)
325 if (sops
[i
].sem_num
== semnum
326 && (sops
[i
].sem_op
< 0)
327 && !(sops
[i
].sem_flg
& IPC_NOWAIT
))
332 static int count_semzcnt (struct semid_ds
* sma
, ushort semnum
)
335 struct sem_queue
* q
;
338 for (q
= sma
->sem_pending
; q
; q
= q
->next
) {
339 struct sembuf
* sops
= q
->sops
;
340 int nsops
= q
->nsops
;
342 for (i
= 0; i
< nsops
; i
++)
343 if (sops
[i
].sem_num
== semnum
344 && (sops
[i
].sem_op
== 0)
345 && !(sops
[i
].sem_flg
& IPC_NOWAIT
))
351 /* Free a semaphore set. */
352 static void freeary (int id
)
354 struct semid_ds
*sma
= semary
[id
];
358 /* Invalidate this semaphore set */
360 sem_seq
= (sem_seq
+1) % ((unsigned)(1<<31)/SEMMNI
); /* increment, but avoid overflow */
361 used_sems
-= sma
->sem_nsems
;
363 while (max_semid
&& (semary
[--max_semid
] == IPC_UNUSED
));
364 semary
[id
] = (struct semid_ds
*) IPC_UNUSED
;
367 /* Invalidate the existing undo structures for this semaphore set.
368 * (They will be freed without any further action in sem_exit().)
370 for (un
= sma
->undo
; un
; un
= un
->id_next
)
373 /* Wake up all pending processes and let them fail with EIDRM. */
374 for (q
= sma
->sem_pending
; q
; q
= q
->next
) {
377 wake_up_interruptible(&q
->sleeper
); /* doesn't sleep! */
383 asmlinkage
int sys_semctl (int semid
, int semnum
, int cmd
, union semun arg
)
385 struct semid_ds
*buf
= NULL
;
386 struct semid_ds tbuf
;
388 struct semid_ds
*sma
;
389 struct ipc_perm
*ipcp
;
390 struct sem
*curr
= NULL
;
393 ushort
*array
= NULL
;
394 ushort sem_io
[SEMMSL
];
398 if (semid
< 0 || semnum
< 0 || cmd
< 0)
405 struct seminfo seminfo
, *tmp
= arg
.__buf
;
406 seminfo
.semmni
= SEMMNI
;
407 seminfo
.semmns
= SEMMNS
;
408 seminfo
.semmsl
= SEMMSL
;
409 seminfo
.semopm
= SEMOPM
;
410 seminfo
.semvmx
= SEMVMX
;
411 seminfo
.semmnu
= SEMMNU
;
412 seminfo
.semmap
= SEMMAP
;
413 seminfo
.semume
= SEMUME
;
414 seminfo
.semusz
= SEMUSZ
;
415 seminfo
.semaem
= SEMAEM
;
416 if (cmd
== SEM_INFO
) {
417 seminfo
.semusz
= used_semids
;
418 seminfo
.semaem
= used_sems
;
421 if (copy_to_user (tmp
, &seminfo
, sizeof(struct seminfo
)))
430 if (semid
> max_semid
)
433 if (sma
== IPC_UNUSED
|| sma
== IPC_NOID
)
436 if (ipcperms (&sma
->sem_perm
, S_IRUGO
))
438 id
= (unsigned int) sma
->sem_perm
.seq
* SEMMNI
+ semid
;
439 tbuf
.sem_perm
= sma
->sem_perm
;
440 tbuf
.sem_otime
= sma
->sem_otime
;
441 tbuf
.sem_ctime
= sma
->sem_ctime
;
442 tbuf
.sem_nsems
= sma
->sem_nsems
;
444 if (copy_to_user (buf
, &tbuf
, sizeof(*buf
)) == 0)
449 id
= (unsigned int) semid
% SEMMNI
;
452 if (sma
== IPC_UNUSED
|| sma
== IPC_NOID
)
454 ipcp
= &sma
->sem_perm
;
455 nsems
= sma
->sem_nsems
;
457 if (sma
->sem_perm
.seq
!= (unsigned int) semid
/ SEMMNI
)
469 curr
= &sma
->sem_base
[semnum
];
480 if (ipcperms (ipcp
, S_IRUGO
))
483 case GETVAL
: err
= curr
->semval
; goto out
;
484 case GETPID
: err
= curr
->sempid
& 0xffff; goto out
;
485 case GETNCNT
: err
= count_semncnt(sma
,semnum
); goto out
;
486 case GETZCNT
: err
= count_semzcnt(sma
,semnum
); goto out
;
495 if (val
> SEMVMX
|| val
< 0)
499 if (current
->euid
== ipcp
->cuid
||
500 current
->euid
== ipcp
->uid
|| capable(CAP_SYS_ADMIN
)) {
507 case SETALL
: /* arg is a pointer to an array of ushort */
510 if (copy_from_user (sem_io
, array
, nsems
*sizeof(ushort
)))
513 for (i
= 0; i
< nsems
; i
++)
514 if (sem_io
[i
] > SEMVMX
) {
524 err
= copy_from_user (&tbuf
, buf
, sizeof (*buf
));
531 if (semary
[id
] == IPC_UNUSED
|| semary
[id
] == IPC_NOID
)
533 if (sma
->sem_perm
.seq
!= (unsigned int) semid
/ SEMMNI
)
539 if (ipcperms (ipcp
, S_IRUGO
))
541 for (i
= 0; i
< sma
->sem_nsems
; i
++)
542 sem_io
[i
] = sma
->sem_base
[i
].semval
;
543 if (copy_to_user (array
, sem_io
, nsems
*sizeof(ushort
)))
548 if (ipcperms (ipcp
, S_IWUGO
))
550 for (un
= sma
->undo
; un
; un
= un
->id_next
)
551 un
->semadj
[semnum
] = 0;
553 sma
->sem_ctime
= CURRENT_TIME
;
554 /* maybe some queued-up processes were waiting for this */
558 if (current
->euid
== ipcp
->cuid
||
559 current
->euid
== ipcp
->uid
|| capable(CAP_SYS_ADMIN
)) {
560 ipcp
->uid
= tbuf
.sem_perm
.uid
;
561 ipcp
->gid
= tbuf
.sem_perm
.gid
;
562 ipcp
->mode
= (ipcp
->mode
& ~S_IRWXUGO
)
563 | (tbuf
.sem_perm
.mode
& S_IRWXUGO
);
564 sma
->sem_ctime
= CURRENT_TIME
;
572 if (ipcperms (ipcp
, S_IRUGO
))
574 tbuf
.sem_perm
= sma
->sem_perm
;
575 tbuf
.sem_otime
= sma
->sem_otime
;
576 tbuf
.sem_ctime
= sma
->sem_ctime
;
577 tbuf
.sem_nsems
= sma
->sem_nsems
;
578 if (copy_to_user (buf
, &tbuf
, sizeof(*buf
)))
583 if (ipcperms (ipcp
, S_IWUGO
))
585 for (i
= 0; i
< nsems
; i
++)
586 sma
->sem_base
[i
].semval
= sem_io
[i
];
587 for (un
= sma
->undo
; un
; un
= un
->id_next
)
588 for (i
= 0; i
< nsems
; i
++)
590 sma
->sem_ctime
= CURRENT_TIME
;
591 /* maybe some queued-up processes were waiting for this */
604 asmlinkage
int sys_semop (int semid
, struct sembuf
*tsops
, unsigned nsops
)
606 int id
, size
, error
= -EINVAL
;
607 struct semid_ds
*sma
;
608 struct sembuf sops
[SEMOPM
], *sop
;
610 int undos
= 0, decrease
= 0, alter
= 0;
611 struct sem_queue queue
;
614 if (nsops
< 1 || semid
< 0)
620 if (copy_from_user (sops
, tsops
, nsops
* sizeof(*tsops
)))
622 id
= (unsigned int) semid
% SEMMNI
;
624 if ((sma
= semary
[id
]) == IPC_UNUSED
|| sma
== IPC_NOID
)
627 if (sma
->sem_perm
.seq
!= (unsigned int) semid
/ SEMMNI
)
631 for (sop
= sops
; sop
< sops
+ nsops
; sop
++) {
632 if (sop
->sem_num
>= sma
->sem_nsems
)
634 if (sop
->sem_flg
& SEM_UNDO
)
644 if (ipcperms(&sma
->sem_perm
, alter
? S_IWUGO
: S_IRUGO
))
647 /* Make sure we have an undo structure
648 * for this process and this semaphore set.
650 for (un
= current
->semundo
; un
; un
= un
->proc_next
)
651 if (un
->semid
== semid
)
654 size
= sizeof(struct sem_undo
) + sizeof(short)*sma
->sem_nsems
;
655 un
= (struct sem_undo
*) kmalloc(size
, GFP_ATOMIC
);
661 un
->semadj
= (short *) &un
[1];
663 un
->proc_next
= current
->semundo
;
664 current
->semundo
= un
;
665 un
->id_next
= sma
->undo
;
671 error
= try_atomic_semop (sma
, sops
, nsops
, un
, current
->pid
, 0);
675 /* We need to sleep on this operation, so we put the current
676 * task into the pending queue and go to sleep.
683 queue
.pid
= current
->pid
;
684 queue
.alter
= decrease
;
685 current
->semsleeping
= &queue
;
687 append_to_queue(sma
,&queue
);
689 prepend_to_queue(sma
,&queue
);
692 queue
.status
= -EINTR
;
693 queue
.sleeper
= NULL
;
694 interruptible_sleep_on(&queue
.sleeper
);
697 * If queue.status == 1 we where woken up and
698 * have to retry else we simply return.
699 * If an interrupt occurred we have to clean up the
703 if (queue
.status
== 1)
705 error
= try_atomic_semop (sma
, sops
, nsops
, un
,
710 error
= queue
.status
;;
711 if (queue
.prev
) /* got Interrupt */
713 /* Everything done by update_queue */
714 current
->semsleeping
= NULL
;
718 current
->semsleeping
= NULL
;
719 remove_from_queue(sma
,&queue
);
729 * add semadj values to semaphores, free undo structures.
730 * undo structures are not freed when semaphore arrays are destroyed
731 * so some of them may be out of date.
732 * IMPLEMENTATION NOTE: There is some confusion over whether the
733 * set of adjustments that needs to be done should be done in an atomic
734 * manner or not. That is, if we are attempting to decrement the semval
735 * should we queue up and wait until we can do so legally?
736 * The original implementation attempted to do this (queue and wait).
737 * The current implementation does not do so. The POSIX standard
738 * and SVID should be consulted to determine what behavior is mandated.
743 struct sem_undo
*u
, *un
= NULL
, **up
, **unp
;
744 struct semid_ds
*sma
;
747 /* If the current process was sleeping for a semaphore,
748 * remove it from the queue.
750 if ((q
= current
->semsleeping
)) {
752 remove_from_queue(q
->sma
,q
);
753 current
->semsleeping
= NULL
;
756 for (up
= ¤t
->semundo
; (u
= *up
); *up
= u
->proc_next
, kfree(u
)) {
759 sma
= semary
[(unsigned int) u
->semid
% SEMMNI
];
760 if (sma
== IPC_UNUSED
|| sma
== IPC_NOID
)
762 if (sma
->sem_perm
.seq
!= (unsigned int) u
->semid
/ SEMMNI
)
764 /* remove u from the sma->undo list */
765 for (unp
= &sma
->undo
; (un
= *unp
); unp
= &un
->id_next
) {
769 printk ("sem_exit undo list error id=%d\n", u
->semid
);
773 /* perform adjustments registered in u */
774 nsems
= sma
->sem_nsems
;
775 for (i
= 0; i
< nsems
; i
++) {
776 struct sem
* sem
= &sma
->sem_base
[i
];
777 sem
->semval
+= u
->semadj
[i
];
779 sem
->semval
= 0; /* shouldn't happen */
780 sem
->sempid
= current
->pid
;
782 sma
->sem_otime
= CURRENT_TIME
;
783 /* maybe some queued-up processes were waiting for this */
786 current
->semundo
= NULL
;