3 * Copyright (C) 1992 Krishna Balasubramanian
4 * Copyright (C) 1995 Eric Schenk, Bruno Haible
6 * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995):
7 * This code underwent a massive rewrite in order to solve some problems
8 * with the original code. In particular the original code failed to
9 * wake up processes that were waiting for semval to go to 0 if the
10 * value went to 0 and was then incremented rapidly enough. In solving
11 * this problem I have also modified the implementation so that it
12 * processes pending operations in a FIFO manner, thus give a guarantee
13 * that processes waiting for a lock on the semaphore won't starve
14 * unless another locking process fails to unlock.
15 * In addition the following two changes in behavior have been introduced:
16 * - The original implementation of semop returned the value
17 * last semaphore element examined on success. This does not
18 * match the manual page specifications, and effectively
19 * allows the user to read the semaphore even if they do not
20 * have read permissions. The implementation now returns 0
21 * on success as stated in the manual page.
22 * - There is some confusion over whether the set of undo adjustments
23 * to be performed at exit should be done in an atomic manner.
24 * That is, if we are attempting to decrement the semval should we queue
25 * up and wait until we can do so legally?
26 * The original implementation attempted to do this.
27 * The current implementation does not do so. This is because I don't
28 * think it is the right thing (TM) to do, and because I couldn't
29 * see a clean way to get the old behavior with the new design.
30 * The POSIX standard and SVID should be consulted to determine
31 * what behavior is mandated.
34 #include <linux/errno.h>
35 #include <linux/string.h>
36 #include <linux/sched.h>
37 #include <linux/sem.h>
38 #include <linux/ipc.h>
39 #include <linux/stat.h>
40 #include <linux/malloc.h>
41 #include <linux/smp.h>
42 #include <linux/smp_lock.h>
43 #include <linux/init.h>
45 #include <asm/uaccess.h>
47 extern int ipcperms (struct ipc_perm
*ipcp
, short semflg
);
48 static int newary (key_t
, int, int);
49 static int findkey (key_t key
);
50 static void freeary (int id
);
52 static struct semid_ds
*semary
[SEMMNI
];
53 static int used_sems
= 0, used_semids
= 0;
54 static struct wait_queue
*sem_lock
= NULL
;
55 static int max_semid
= 0;
57 static unsigned short sem_seq
= 0;
59 void __init
sem_init (void)
64 used_sems
= used_semids
= max_semid
= sem_seq
= 0;
65 for (i
= 0; i
< SEMMNI
; i
++)
66 semary
[i
] = (struct semid_ds
*) IPC_UNUSED
;
70 static int findkey (key_t key
)
75 for (id
= 0; id
<= max_semid
; id
++) {
76 while ((sma
= semary
[id
]) == IPC_NOID
)
77 interruptible_sleep_on (&sem_lock
);
78 if (sma
== IPC_UNUSED
)
80 if (key
== sma
->sem_perm
.key
)
86 static int newary (key_t key
, int nsems
, int semflg
)
90 struct ipc_perm
*ipcp
;
95 if (used_sems
+ nsems
> SEMMNS
)
97 for (id
= 0; id
< SEMMNI
; id
++)
98 if (semary
[id
] == IPC_UNUSED
) {
99 semary
[id
] = (struct semid_ds
*) IPC_NOID
;
104 size
= sizeof (*sma
) + nsems
* sizeof (struct sem
);
106 sma
= (struct semid_ds
*) kmalloc (size
, GFP_KERNEL
);
108 semary
[id
] = (struct semid_ds
*) IPC_UNUSED
;
113 memset (sma
, 0, size
);
114 sma
->sem_base
= (struct sem
*) &sma
[1];
115 ipcp
= &sma
->sem_perm
;
116 ipcp
->mode
= (semflg
& S_IRWXUGO
);
118 ipcp
->cuid
= ipcp
->uid
= current
->euid
;
119 ipcp
->gid
= ipcp
->cgid
= current
->egid
;
120 sma
->sem_perm
.seq
= sem_seq
;
121 /* sma->sem_pending = NULL; */
122 sma
->sem_pending_last
= &sma
->sem_pending
;
123 /* sma->undo = NULL; */
124 sma
->sem_nsems
= nsems
;
125 sma
->sem_ctime
= CURRENT_TIME
;
131 return (unsigned int) sma
->sem_perm
.seq
* SEMMNI
+ id
;
134 asmlinkage
int sys_semget (key_t key
, int nsems
, int semflg
)
136 int id
, err
= -EINVAL
;
137 struct semid_ds
*sma
;
140 if (nsems
< 0 || nsems
> SEMMSL
)
142 if (key
== IPC_PRIVATE
) {
143 err
= newary(key
, nsems
, semflg
);
144 } else if ((id
= findkey (key
)) == -1) { /* key not used */
145 if (!(semflg
& IPC_CREAT
))
148 err
= newary(key
, nsems
, semflg
);
149 } else if (semflg
& IPC_CREAT
&& semflg
& IPC_EXCL
) {
153 if (nsems
> sma
->sem_nsems
)
155 else if (ipcperms(&sma
->sem_perm
, semflg
))
158 err
= (int) sma
->sem_perm
.seq
* SEMMNI
+ id
;
165 /* Manage the doubly linked list sma->sem_pending as a FIFO:
166 * insert new queue elements at the tail sma->sem_pending_last.
168 static inline void insert_into_queue (struct semid_ds
* sma
, struct sem_queue
* q
)
170 *(q
->prev
= sma
->sem_pending_last
) = q
;
171 *(sma
->sem_pending_last
= &q
->next
) = NULL
;
173 static inline void remove_from_queue (struct semid_ds
* sma
, struct sem_queue
* q
)
175 *(q
->prev
) = q
->next
;
177 q
->next
->prev
= q
->prev
;
178 else /* sma->sem_pending_last == &q->next */
179 sma
->sem_pending_last
= q
->prev
;
180 q
->prev
= NULL
; /* mark as removed */
183 /* Determine whether a sequence of semaphore operations would succeed
184 * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
186 static int try_semop (struct semid_ds
* sma
, struct sembuf
* sops
, int nsops
)
192 struct sembuf
* sop
= &sops
[i
];
193 struct sem
* curr
= &sma
->sem_base
[sop
->sem_num
];
194 if (sop
->sem_op
+ curr
->semval
> SEMVMX
) {
198 if (!sop
->sem_op
&& curr
->semval
) {
199 if (sop
->sem_flg
& IPC_NOWAIT
)
206 curr
->semval
+= sop
->sem_op
;
207 if (curr
->semval
< 0) {
208 if (sop
->sem_flg
& IPC_NOWAIT
)
216 struct sembuf
* sop
= &sops
[i
];
217 struct sem
* curr
= &sma
->sem_base
[sop
->sem_num
];
218 curr
->semval
-= sop
->sem_op
;
223 /* Actually perform a sequence of semaphore operations. Atomically. */
224 /* This assumes that try_semop() already returned 0. */
225 static int do_semop (struct semid_ds
* sma
, struct sembuf
* sops
, int nsops
,
226 struct sem_undo
* un
, int pid
)
230 for (i
= 0; i
< nsops
; i
++) {
231 struct sembuf
* sop
= &sops
[i
];
232 struct sem
* curr
= &sma
->sem_base
[sop
->sem_num
];
233 if (sop
->sem_op
+ curr
->semval
> SEMVMX
) {
234 printk("do_semop: race\n");
239 printk("do_semop: race\n");
243 curr
->semval
+= sop
->sem_op
;
244 if (curr
->semval
< 0) {
245 printk("do_semop: race\n");
248 if (sop
->sem_flg
& SEM_UNDO
)
249 un
->semadj
[sop
->sem_num
] -= sop
->sem_op
;
253 sma
->sem_otime
= CURRENT_TIME
;
255 /* Previous implementation returned the last semaphore's semval.
256 * This is wrong because we may not have checked read permission,
257 * only write permission.
262 /* Go through the pending queue for the indicated semaphore
263 * looking for tasks that can be completed. Keep cycling through
264 * the queue until a pass is made in which no process is woken up.
266 static void update_queue (struct semid_ds
* sma
)
269 struct sem_queue
* q
;
273 for (q
= sma
->sem_pending
; q
; q
= q
->next
) {
274 error
= try_semop(sma
, q
->sops
, q
->nsops
);
275 /* Does q->sleeper still need to sleep? */
278 /* Perform the operations the sleeper was waiting for */
280 error
= do_semop(sma
, q
->sops
, q
->nsops
, q
->undo
, q
->pid
);
282 /* Remove it from the queue */
283 remove_from_queue(sma
,q
);
285 wake_up_interruptible(&q
->sleeper
); /* doesn't sleep! */
291 /* The following counts are associated to each semaphore:
292 * semncnt number of tasks waiting on semval being nonzero
293 * semzcnt number of tasks waiting on semval being zero
294 * This model assumes that a task waits on exactly one semaphore.
295 * Since semaphore operations are to be performed atomically, tasks actually
296 * wait on a whole sequence of semaphores simultaneously.
297 * The counts we return here are a rough approximation, but still
298 * warrant that semncnt+semzcnt>0 if the task is on the pending queue.
300 static int count_semncnt (struct semid_ds
* sma
, ushort semnum
)
303 struct sem_queue
* q
;
306 for (q
= sma
->sem_pending
; q
; q
= q
->next
) {
307 struct sembuf
* sops
= q
->sops
;
308 int nsops
= q
->nsops
;
310 for (i
= 0; i
< nsops
; i
++)
311 if (sops
[i
].sem_num
== semnum
312 && (sops
[i
].sem_op
< 0)
313 && !(sops
[i
].sem_flg
& IPC_NOWAIT
))
318 static int count_semzcnt (struct semid_ds
* sma
, ushort semnum
)
321 struct sem_queue
* q
;
324 for (q
= sma
->sem_pending
; q
; q
= q
->next
) {
325 struct sembuf
* sops
= q
->sops
;
326 int nsops
= q
->nsops
;
328 for (i
= 0; i
< nsops
; i
++)
329 if (sops
[i
].sem_num
== semnum
330 && (sops
[i
].sem_op
== 0)
331 && !(sops
[i
].sem_flg
& IPC_NOWAIT
))
337 /* Free a semaphore set. */
338 static void freeary (int id
)
340 struct semid_ds
*sma
= semary
[id
];
344 /* Invalidate this semaphore set */
346 sem_seq
= (sem_seq
+1) % ((unsigned)(1<<31)/SEMMNI
); /* increment, but avoid overflow */
347 used_sems
-= sma
->sem_nsems
;
349 while (max_semid
&& (semary
[--max_semid
] == IPC_UNUSED
));
350 semary
[id
] = (struct semid_ds
*) IPC_UNUSED
;
353 /* Invalidate the existing undo structures for this semaphore set.
354 * (They will be freed without any further action in sem_exit().)
356 for (un
= sma
->undo
; un
; un
= un
->id_next
)
359 /* Wake up all pending processes and let them fail with EIDRM. */
360 for (q
= sma
->sem_pending
; q
; q
= q
->next
) {
363 wake_up_interruptible(&q
->sleeper
); /* doesn't sleep! */
369 asmlinkage
int sys_semctl (int semid
, int semnum
, int cmd
, union semun arg
)
371 struct semid_ds
*buf
= NULL
;
372 struct semid_ds tbuf
;
374 struct semid_ds
*sma
;
375 struct ipc_perm
*ipcp
;
376 struct sem
*curr
= NULL
;
379 ushort
*array
= NULL
;
380 ushort sem_io
[SEMMSL
];
384 if (semid
< 0 || semnum
< 0 || cmd
< 0)
391 struct seminfo seminfo
, *tmp
= arg
.__buf
;
392 seminfo
.semmni
= SEMMNI
;
393 seminfo
.semmns
= SEMMNS
;
394 seminfo
.semmsl
= SEMMSL
;
395 seminfo
.semopm
= SEMOPM
;
396 seminfo
.semvmx
= SEMVMX
;
397 seminfo
.semmnu
= SEMMNU
;
398 seminfo
.semmap
= SEMMAP
;
399 seminfo
.semume
= SEMUME
;
400 seminfo
.semusz
= SEMUSZ
;
401 seminfo
.semaem
= SEMAEM
;
402 if (cmd
== SEM_INFO
) {
403 seminfo
.semusz
= used_semids
;
404 seminfo
.semaem
= used_sems
;
407 if (copy_to_user (tmp
, &seminfo
, sizeof(struct seminfo
)))
416 if (semid
> max_semid
)
419 if (sma
== IPC_UNUSED
|| sma
== IPC_NOID
)
422 if (ipcperms (&sma
->sem_perm
, S_IRUGO
))
424 id
= (unsigned int) sma
->sem_perm
.seq
* SEMMNI
+ semid
;
425 tbuf
.sem_perm
= sma
->sem_perm
;
426 tbuf
.sem_otime
= sma
->sem_otime
;
427 tbuf
.sem_ctime
= sma
->sem_ctime
;
428 tbuf
.sem_nsems
= sma
->sem_nsems
;
430 if (copy_to_user (buf
, &tbuf
, sizeof(*buf
)) == 0)
435 id
= (unsigned int) semid
% SEMMNI
;
438 if (sma
== IPC_UNUSED
|| sma
== IPC_NOID
)
440 ipcp
= &sma
->sem_perm
;
441 nsems
= sma
->sem_nsems
;
443 if (sma
->sem_perm
.seq
!= (unsigned int) semid
/ SEMMNI
)
455 curr
= &sma
->sem_base
[semnum
];
466 if (ipcperms (ipcp
, S_IRUGO
))
469 case GETVAL
: return curr
->semval
;
470 case GETPID
: return curr
->sempid
;
471 case GETNCNT
: return count_semncnt(sma
,semnum
);
472 case GETZCNT
: return count_semzcnt(sma
,semnum
);
481 if (val
> SEMVMX
|| val
< 0)
485 if (current
->euid
== ipcp
->cuid
||
486 current
->euid
== ipcp
->uid
|| capable(CAP_SYS_ADMIN
)) {
493 case SETALL
: /* arg is a pointer to an array of ushort */
496 if (copy_from_user (sem_io
, array
, nsems
*sizeof(ushort
)))
499 for (i
= 0; i
< nsems
; i
++)
500 if (sem_io
[i
] > SEMVMX
) {
510 err
= copy_from_user (&tbuf
, buf
, sizeof (*buf
));
517 if (semary
[id
] == IPC_UNUSED
|| semary
[id
] == IPC_NOID
)
519 if (sma
->sem_perm
.seq
!= (unsigned int) semid
/ SEMMNI
)
525 if (ipcperms (ipcp
, S_IRUGO
))
527 for (i
= 0; i
< sma
->sem_nsems
; i
++)
528 sem_io
[i
] = sma
->sem_base
[i
].semval
;
529 if (copy_to_user (array
, sem_io
, nsems
*sizeof(ushort
)))
534 if (ipcperms (ipcp
, S_IWUGO
))
536 for (un
= sma
->undo
; un
; un
= un
->id_next
)
537 un
->semadj
[semnum
] = 0;
539 sma
->sem_ctime
= CURRENT_TIME
;
540 /* maybe some queued-up processes were waiting for this */
544 if (current
->euid
== ipcp
->cuid
||
545 current
->euid
== ipcp
->uid
|| capable(CAP_SYS_ADMIN
)) {
546 ipcp
->uid
= tbuf
.sem_perm
.uid
;
547 ipcp
->gid
= tbuf
.sem_perm
.gid
;
548 ipcp
->mode
= (ipcp
->mode
& ~S_IRWXUGO
)
549 | (tbuf
.sem_perm
.mode
& S_IRWXUGO
);
550 sma
->sem_ctime
= CURRENT_TIME
;
558 if (ipcperms (ipcp
, S_IRUGO
))
560 tbuf
.sem_perm
= sma
->sem_perm
;
561 tbuf
.sem_otime
= sma
->sem_otime
;
562 tbuf
.sem_ctime
= sma
->sem_ctime
;
563 tbuf
.sem_nsems
= sma
->sem_nsems
;
564 if (copy_to_user (buf
, &tbuf
, sizeof(*buf
)))
569 if (ipcperms (ipcp
, S_IWUGO
))
571 for (i
= 0; i
< nsems
; i
++)
572 sma
->sem_base
[i
].semval
= sem_io
[i
];
573 for (un
= sma
->undo
; un
; un
= un
->id_next
)
574 for (i
= 0; i
< nsems
; i
++)
576 sma
->sem_ctime
= CURRENT_TIME
;
577 /* maybe some queued-up processes were waiting for this */
590 asmlinkage
int sys_semop (int semid
, struct sembuf
*tsops
, unsigned nsops
)
592 int i
, id
, size
, error
= -EINVAL
;
593 struct semid_ds
*sma
;
594 struct sembuf sops
[SEMOPM
], *sop
;
596 int undos
= 0, alter
= 0;
599 if (nsops
< 1 || semid
< 0)
607 if (copy_from_user (sops
, tsops
, nsops
* sizeof(*tsops
)))
609 id
= (unsigned int) semid
% SEMMNI
;
611 if ((sma
= semary
[id
]) == IPC_UNUSED
|| sma
== IPC_NOID
)
614 if (sma
->sem_perm
.seq
!= (unsigned int) semid
/ SEMMNI
)
616 for (i
= 0; i
< nsops
; i
++) {
619 if (sop
->sem_num
>= sma
->sem_nsems
)
621 if (sop
->sem_flg
& SEM_UNDO
)
627 if (ipcperms(&sma
->sem_perm
, alter
? S_IWUGO
: S_IRUGO
))
629 error
= try_semop(sma
, sops
, nsops
);
633 /* Make sure we have an undo structure
634 * for this process and this semaphore set.
636 for (un
= current
->semundo
; un
; un
= un
->proc_next
)
637 if (un
->semid
== semid
)
640 size
= sizeof(struct sem_undo
) + sizeof(short)*sma
->sem_nsems
;
641 un
= (struct sem_undo
*) kmalloc(size
, GFP_ATOMIC
);
647 un
->semadj
= (short *) &un
[1];
649 un
->proc_next
= current
->semundo
;
650 current
->semundo
= un
;
651 un
->id_next
= sma
->undo
;
657 /* the operations go through immediately */
658 error
= do_semop(sma
, sops
, nsops
, un
, current
->pid
);
659 /* maybe some queued-up processes were waiting for this */
663 /* We need to sleep on this operation, so we put the current
664 * task into the pending queue and go to sleep.
666 struct sem_queue queue
;
672 queue
.pid
= current
->pid
;
674 insert_into_queue(sma
,&queue
);
675 queue
.sleeper
= NULL
;
676 current
->semsleeping
= &queue
;
677 interruptible_sleep_on(&queue
.sleeper
);
678 current
->semsleeping
= NULL
;
679 /* When we wake up, either the operation is finished,
680 * or some kind of error happened.
683 /* operation is finished, update_queue() removed us */
684 error
= queue
.status
;
686 remove_from_queue(sma
,&queue
);
696 * add semadj values to semaphores, free undo structures.
697 * undo structures are not freed when semaphore arrays are destroyed
698 * so some of them may be out of date.
699 * IMPLEMENTATION NOTE: There is some confusion over whether the
700 * set of adjustments that needs to be done should be done in an atomic
701 * manner or not. That is, if we are attempting to decrement the semval
702 * should we queue up and wait until we can do so legally?
703 * The original implementation attempted to do this (queue and wait).
704 * The current implementation does not do so. The POSIX standard
705 * and SVID should be consulted to determine what behavior is mandated.
710 struct sem_undo
*u
, *un
= NULL
, **up
, **unp
;
711 struct semid_ds
*sma
;
714 /* If the current process was sleeping for a semaphore,
715 * remove it from the queue.
717 if ((q
= current
->semsleeping
)) {
719 remove_from_queue(q
->sma
,q
);
720 current
->semsleeping
= NULL
;
723 for (up
= ¤t
->semundo
; (u
= *up
); *up
= u
->proc_next
, kfree(u
)) {
726 sma
= semary
[(unsigned int) u
->semid
% SEMMNI
];
727 if (sma
== IPC_UNUSED
|| sma
== IPC_NOID
)
729 if (sma
->sem_perm
.seq
!= (unsigned int) u
->semid
/ SEMMNI
)
731 /* remove u from the sma->undo list */
732 for (unp
= &sma
->undo
; (un
= *unp
); unp
= &un
->id_next
) {
736 printk ("sem_exit undo list error id=%d\n", u
->semid
);
740 /* perform adjustments registered in u */
741 nsems
= sma
->sem_nsems
;
742 for (i
= 0; i
< nsems
; i
++) {
743 struct sem
* sem
= &sma
->sem_base
[i
];
744 sem
->semval
+= u
->semadj
[i
];
746 sem
->semval
= 0; /* shouldn't happen */
747 sem
->sempid
= current
->pid
;
749 sma
->sem_otime
= CURRENT_TIME
;
750 /* maybe some queued-up processes were waiting for this */
753 current
->semundo
= NULL
;