Merge with Linux 2.5.48.
[linux-2.6/linux-mips.git] / ipc / sem.c
bloba798decd5daedf5f5047377a8428e32bd1b1df11
1 /*
2 * linux/ipc/sem.c
3 * Copyright (C) 1992 Krishna Balasubramanian
4 * Copyright (C) 1995 Eric Schenk, Bruno Haible
6 * IMPLEMENTATION NOTES ON CODE REWRITE (Eric Schenk, January 1995):
7 * This code underwent a massive rewrite in order to solve some problems
8 * with the original code. In particular the original code failed to
9 * wake up processes that were waiting for semval to go to 0 if the
10 * value went to 0 and was then incremented rapidly enough. In solving
11 * this problem I have also modified the implementation so that it
12 * processes pending operations in a FIFO manner, thus give a guarantee
13 * that processes waiting for a lock on the semaphore won't starve
14 * unless another locking process fails to unlock.
15 * In addition the following two changes in behavior have been introduced:
16 * - The original implementation of semop returned the value
17 * last semaphore element examined on success. This does not
18 * match the manual page specifications, and effectively
19 * allows the user to read the semaphore even if they do not
20 * have read permissions. The implementation now returns 0
21 * on success as stated in the manual page.
22 * - There is some confusion over whether the set of undo adjustments
23 * to be performed at exit should be done in an atomic manner.
24 * That is, if we are attempting to decrement the semval should we queue
25 * up and wait until we can do so legally?
26 * The original implementation attempted to do this.
27 * The current implementation does not do so. This is because I don't
28 * think it is the right thing (TM) to do, and because I couldn't
29 * see a clean way to get the old behavior with the new design.
30 * The POSIX standard and SVID should be consulted to determine
31 * what behavior is mandated.
33 * Further notes on refinement (Christoph Rohland, December 1998):
34 * - The POSIX standard says, that the undo adjustments simply should
35 * redo. So the current implementation is o.K.
36 * - The previous code had two flaws:
37 * 1) It actively gave the semaphore to the next waiting process
38 * sleeping on the semaphore. Since this process did not have the
39 * cpu this led to many unnecessary context switches and bad
40 * performance. Now we only check which process should be able to
41 * get the semaphore and if this process wants to reduce some
42 * semaphore value we simply wake it up without doing the
43 * operation. So it has to try to get it later. Thus e.g. the
44 * running process may reacquire the semaphore during the current
45 * time slice. If it only waits for zero or increases the semaphore,
46 * we do the operation in advance and wake it up.
47 * 2) It did not wake up all zero waiting processes. We try to do
48 * better but only get the semops right which only wait for zero or
49 * increase. If there are decrement operations in the operations
50 * array we do the same as before.
52 * /proc/sysvipc/sem support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
54 * SMP-threaded, sysctl's added
55 * (c) 1999 Manfred Spraul <manfreds@colorfullife.com>
56 * Enforced range limit on SEM_UNDO
57 * (c) 2001 Red Hat Inc <alan@redhat.com>
60 #include <linux/config.h>
61 #include <linux/slab.h>
62 #include <linux/spinlock.h>
63 #include <linux/init.h>
64 #include <linux/proc_fs.h>
65 #include <linux/smp_lock.h>
66 #include <linux/security.h>
67 #include <asm/uaccess.h>
68 #include "util.h"
71 #define sem_lock(id) ((struct sem_array*)ipc_lock(&sem_ids,id))
72 #define sem_unlock(sma) ipc_unlock(&(sma)->sem_perm)
73 #define sem_rmid(id) ((struct sem_array*)ipc_rmid(&sem_ids,id))
74 #define sem_checkid(sma, semid) \
75 ipc_checkid(&sem_ids,&sma->sem_perm,semid)
76 #define sem_buildid(id, seq) \
77 ipc_buildid(&sem_ids, id, seq)
78 static struct ipc_ids sem_ids;
80 static int newary (key_t, int, int);
81 static void freeary (int id);
82 #ifdef CONFIG_PROC_FS
83 static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
84 #endif
86 #define SEMMSL_FAST 256 /* 512 bytes on stack */
87 #define SEMOPM_FAST 64 /* ~ 372 bytes on stack */
90 * linked list protection:
91 * sem_undo.id_next,
92 * sem_array.sem_pending{,last},
93 * sem_array.sem_undo: sem_lock() for read/write
94 * sem_undo.proc_next: only "current" is allowed to read/write that field.
98 int sem_ctls[4] = {SEMMSL, SEMMNS, SEMOPM, SEMMNI};
99 #define sc_semmsl (sem_ctls[0])
100 #define sc_semmns (sem_ctls[1])
101 #define sc_semopm (sem_ctls[2])
102 #define sc_semmni (sem_ctls[3])
104 static int used_sems;
106 void __init sem_init (void)
108 used_sems = 0;
109 ipc_init_ids(&sem_ids,sc_semmni);
111 #ifdef CONFIG_PROC_FS
112 create_proc_read_entry("sysvipc/sem", 0, 0, sysvipc_sem_read_proc, NULL);
113 #endif
116 static int newary (key_t key, int nsems, int semflg)
118 int id;
119 int retval;
120 struct sem_array *sma;
121 int size;
123 if (!nsems)
124 return -EINVAL;
125 if (used_sems + nsems > sc_semmns)
126 return -ENOSPC;
128 size = sizeof (*sma) + nsems * sizeof (struct sem);
129 sma = ipc_rcu_alloc(size);
130 if (!sma) {
131 return -ENOMEM;
133 memset (sma, 0, size);
135 sma->sem_perm.mode = (semflg & S_IRWXUGO);
136 sma->sem_perm.key = key;
138 sma->sem_perm.security = NULL;
139 retval = security_ops->sem_alloc_security(sma);
140 if (retval) {
141 ipc_rcu_free(sma, size);
142 return retval;
145 id = ipc_addid(&sem_ids, &sma->sem_perm, sc_semmni);
146 if(id == -1) {
147 security_ops->sem_free_security(sma);
148 ipc_rcu_free(sma, size);
149 return -ENOSPC;
151 used_sems += nsems;
153 sma->sem_base = (struct sem *) &sma[1];
154 /* sma->sem_pending = NULL; */
155 sma->sem_pending_last = &sma->sem_pending;
156 /* sma->undo = NULL; */
157 sma->sem_nsems = nsems;
158 sma->sem_ctime = get_seconds();
159 sem_unlock(sma);
161 return sem_buildid(id, sma->sem_perm.seq);
164 asmlinkage long sys_semget (key_t key, int nsems, int semflg)
166 int id, err = -EINVAL;
167 struct sem_array *sma;
169 if (nsems < 0 || nsems > sc_semmsl)
170 return -EINVAL;
171 down(&sem_ids.sem);
173 if (key == IPC_PRIVATE) {
174 err = newary(key, nsems, semflg);
175 } else if ((id = ipc_findkey(&sem_ids, key)) == -1) { /* key not used */
176 if (!(semflg & IPC_CREAT))
177 err = -ENOENT;
178 else
179 err = newary(key, nsems, semflg);
180 } else if (semflg & IPC_CREAT && semflg & IPC_EXCL) {
181 err = -EEXIST;
182 } else {
183 sma = sem_lock(id);
184 if(sma==NULL)
185 BUG();
186 if (nsems > sma->sem_nsems)
187 err = -EINVAL;
188 else if (ipcperms(&sma->sem_perm, semflg))
189 err = -EACCES;
190 else
191 err = sem_buildid(id, sma->sem_perm.seq);
192 sem_unlock(sma);
195 up(&sem_ids.sem);
196 return err;
199 /* doesn't acquire the sem_lock on error! */
200 static int sem_revalidate(int semid, struct sem_array* sma, int nsems, short flg)
202 struct sem_array* smanew;
204 smanew = sem_lock(semid);
205 if(smanew==NULL)
206 return -EIDRM;
207 if(smanew != sma || sem_checkid(sma,semid) || sma->sem_nsems != nsems) {
208 sem_unlock(smanew);
209 return -EIDRM;
212 if (ipcperms(&sma->sem_perm, flg)) {
213 sem_unlock(smanew);
214 return -EACCES;
216 return 0;
218 /* Manage the doubly linked list sma->sem_pending as a FIFO:
219 * insert new queue elements at the tail sma->sem_pending_last.
221 static inline void append_to_queue (struct sem_array * sma,
222 struct sem_queue * q)
224 *(q->prev = sma->sem_pending_last) = q;
225 *(sma->sem_pending_last = &q->next) = NULL;
228 static inline void prepend_to_queue (struct sem_array * sma,
229 struct sem_queue * q)
231 q->next = sma->sem_pending;
232 *(q->prev = &sma->sem_pending) = q;
233 if (q->next)
234 q->next->prev = &q->next;
235 else /* sma->sem_pending_last == &sma->sem_pending */
236 sma->sem_pending_last = &q->next;
239 static inline void remove_from_queue (struct sem_array * sma,
240 struct sem_queue * q)
242 *(q->prev) = q->next;
243 if (q->next)
244 q->next->prev = q->prev;
245 else /* sma->sem_pending_last == &q->next */
246 sma->sem_pending_last = q->prev;
247 q->prev = NULL; /* mark as removed */
251 * Determine whether a sequence of semaphore operations would succeed
252 * all at once. Return 0 if yes, 1 if need to sleep, else return error code.
255 static int try_atomic_semop (struct sem_array * sma, struct sembuf * sops,
256 int nsops, struct sem_undo *un, int pid,
257 int do_undo)
259 int result, sem_op;
260 struct sembuf *sop;
261 struct sem * curr;
263 for (sop = sops; sop < sops + nsops; sop++) {
264 curr = sma->sem_base + sop->sem_num;
265 sem_op = sop->sem_op;
267 if (!sem_op && curr->semval)
268 goto would_block;
270 curr->sempid = (curr->sempid << 16) | pid;
271 curr->semval += sem_op;
272 if (sop->sem_flg & SEM_UNDO)
274 int undo = un->semadj[sop->sem_num] - sem_op;
276 * Exceeding the undo range is an error.
278 if (undo < (-SEMAEM - 1) || undo > SEMAEM)
280 /* Don't undo the undo */
281 sop->sem_flg &= ~SEM_UNDO;
282 goto out_of_range;
284 un->semadj[sop->sem_num] = undo;
286 if (curr->semval < 0)
287 goto would_block;
288 if (curr->semval > SEMVMX)
289 goto out_of_range;
292 if (do_undo)
294 sop--;
295 result = 0;
296 goto undo;
299 sma->sem_otime = get_seconds();
300 return 0;
302 out_of_range:
303 result = -ERANGE;
304 goto undo;
306 would_block:
307 if (sop->sem_flg & IPC_NOWAIT)
308 result = -EAGAIN;
309 else
310 result = 1;
312 undo:
313 while (sop >= sops) {
314 curr = sma->sem_base + sop->sem_num;
315 curr->semval -= sop->sem_op;
316 curr->sempid >>= 16;
318 if (sop->sem_flg & SEM_UNDO)
319 un->semadj[sop->sem_num] += sop->sem_op;
320 sop--;
323 return result;
326 /* Go through the pending queue for the indicated semaphore
327 * looking for tasks that can be completed.
329 static void update_queue (struct sem_array * sma)
331 int error;
332 struct sem_queue * q;
334 for (q = sma->sem_pending; q; q = q->next) {
336 if (q->status == 1)
337 continue; /* this one was woken up before */
339 error = try_atomic_semop(sma, q->sops, q->nsops,
340 q->undo, q->pid, q->alter);
342 /* Does q->sleeper still need to sleep? */
343 if (error <= 0) {
344 /* Found one, wake it up */
345 wake_up_process(q->sleeper);
346 if (error == 0 && q->alter) {
347 /* if q-> alter let it self try */
348 q->status = 1;
349 return;
351 q->status = error;
352 remove_from_queue(sma,q);
357 /* The following counts are associated to each semaphore:
358 * semncnt number of tasks waiting on semval being nonzero
359 * semzcnt number of tasks waiting on semval being zero
360 * This model assumes that a task waits on exactly one semaphore.
361 * Since semaphore operations are to be performed atomically, tasks actually
362 * wait on a whole sequence of semaphores simultaneously.
363 * The counts we return here are a rough approximation, but still
364 * warrant that semncnt+semzcnt>0 if the task is on the pending queue.
366 static int count_semncnt (struct sem_array * sma, ushort semnum)
368 int semncnt;
369 struct sem_queue * q;
371 semncnt = 0;
372 for (q = sma->sem_pending; q; q = q->next) {
373 struct sembuf * sops = q->sops;
374 int nsops = q->nsops;
375 int i;
376 for (i = 0; i < nsops; i++)
377 if (sops[i].sem_num == semnum
378 && (sops[i].sem_op < 0)
379 && !(sops[i].sem_flg & IPC_NOWAIT))
380 semncnt++;
382 return semncnt;
384 static int count_semzcnt (struct sem_array * sma, ushort semnum)
386 int semzcnt;
387 struct sem_queue * q;
389 semzcnt = 0;
390 for (q = sma->sem_pending; q; q = q->next) {
391 struct sembuf * sops = q->sops;
392 int nsops = q->nsops;
393 int i;
394 for (i = 0; i < nsops; i++)
395 if (sops[i].sem_num == semnum
396 && (sops[i].sem_op == 0)
397 && !(sops[i].sem_flg & IPC_NOWAIT))
398 semzcnt++;
400 return semzcnt;
403 /* Free a semaphore set. */
404 static void freeary (int id)
406 struct sem_array *sma;
407 struct sem_undo *un;
408 struct sem_queue *q;
409 int size;
411 sma = sem_rmid(id);
413 /* Invalidate the existing undo structures for this semaphore set.
414 * (They will be freed without any further action in sem_exit()
415 * or during the next semop.)
417 for (un = sma->undo; un; un = un->id_next)
418 un->semid = -1;
420 /* Wake up all pending processes and let them fail with EIDRM. */
421 for (q = sma->sem_pending; q; q = q->next) {
422 q->status = -EIDRM;
423 q->prev = NULL;
424 wake_up_process(q->sleeper); /* doesn't sleep */
426 sem_unlock(sma);
428 used_sems -= sma->sem_nsems;
429 size = sizeof (*sma) + sma->sem_nsems * sizeof (struct sem);
430 security_ops->sem_free_security(sma);
431 ipc_rcu_free(sma, size);
434 static unsigned long copy_semid_to_user(void *buf, struct semid64_ds *in, int version)
436 switch(version) {
437 case IPC_64:
438 return copy_to_user(buf, in, sizeof(*in));
439 case IPC_OLD:
441 struct semid_ds out;
443 ipc64_perm_to_ipc_perm(&in->sem_perm, &out.sem_perm);
445 out.sem_otime = in->sem_otime;
446 out.sem_ctime = in->sem_ctime;
447 out.sem_nsems = in->sem_nsems;
449 return copy_to_user(buf, &out, sizeof(out));
451 default:
452 return -EINVAL;
456 static int semctl_nolock(int semid, int semnum, int cmd, int version, union semun arg)
458 int err = -EINVAL;
459 struct sem_array *sma;
461 switch(cmd) {
462 case IPC_INFO:
463 case SEM_INFO:
465 struct seminfo seminfo;
466 int max_id;
468 memset(&seminfo,0,sizeof(seminfo));
469 seminfo.semmni = sc_semmni;
470 seminfo.semmns = sc_semmns;
471 seminfo.semmsl = sc_semmsl;
472 seminfo.semopm = sc_semopm;
473 seminfo.semvmx = SEMVMX;
474 seminfo.semmnu = SEMMNU;
475 seminfo.semmap = SEMMAP;
476 seminfo.semume = SEMUME;
477 down(&sem_ids.sem);
478 if (cmd == SEM_INFO) {
479 seminfo.semusz = sem_ids.in_use;
480 seminfo.semaem = used_sems;
481 } else {
482 seminfo.semusz = SEMUSZ;
483 seminfo.semaem = SEMAEM;
485 max_id = sem_ids.max_id;
486 up(&sem_ids.sem);
487 if (copy_to_user (arg.__buf, &seminfo, sizeof(struct seminfo)))
488 return -EFAULT;
489 return (max_id < 0) ? 0: max_id;
491 case SEM_STAT:
493 struct semid64_ds tbuf;
494 int id;
496 if(semid >= sem_ids.size)
497 return -EINVAL;
499 memset(&tbuf,0,sizeof(tbuf));
501 sma = sem_lock(semid);
502 if(sma == NULL)
503 return -EINVAL;
505 err = -EACCES;
506 if (ipcperms (&sma->sem_perm, S_IRUGO))
507 goto out_unlock;
508 id = sem_buildid(semid, sma->sem_perm.seq);
510 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
511 tbuf.sem_otime = sma->sem_otime;
512 tbuf.sem_ctime = sma->sem_ctime;
513 tbuf.sem_nsems = sma->sem_nsems;
514 sem_unlock(sma);
515 if (copy_semid_to_user (arg.buf, &tbuf, version))
516 return -EFAULT;
517 return id;
519 default:
520 return -EINVAL;
522 return err;
523 out_unlock:
524 sem_unlock(sma);
525 return err;
528 static int semctl_main(int semid, int semnum, int cmd, int version, union semun arg)
530 struct sem_array *sma;
531 struct sem* curr;
532 int err;
533 ushort fast_sem_io[SEMMSL_FAST];
534 ushort* sem_io = fast_sem_io;
535 int nsems;
537 sma = sem_lock(semid);
538 if(sma==NULL)
539 return -EINVAL;
541 nsems = sma->sem_nsems;
543 err=-EIDRM;
544 if (sem_checkid(sma,semid))
545 goto out_unlock;
547 err = -EACCES;
548 if (ipcperms (&sma->sem_perm, (cmd==SETVAL||cmd==SETALL)?S_IWUGO:S_IRUGO))
549 goto out_unlock;
551 switch (cmd) {
552 case GETALL:
554 ushort *array = arg.array;
555 int i;
557 if(nsems > SEMMSL_FAST) {
558 sem_unlock(sma);
559 sem_io = ipc_alloc(sizeof(ushort)*nsems);
560 if(sem_io == NULL)
561 return -ENOMEM;
562 err = sem_revalidate(semid, sma, nsems, S_IRUGO);
563 if(err)
564 goto out_free;
567 for (i = 0; i < sma->sem_nsems; i++)
568 sem_io[i] = sma->sem_base[i].semval;
569 sem_unlock(sma);
570 err = 0;
571 if(copy_to_user(array, sem_io, nsems*sizeof(ushort)))
572 err = -EFAULT;
573 goto out_free;
575 case SETALL:
577 int i;
578 struct sem_undo *un;
580 sem_unlock(sma);
582 if(nsems > SEMMSL_FAST) {
583 sem_io = ipc_alloc(sizeof(ushort)*nsems);
584 if(sem_io == NULL)
585 return -ENOMEM;
588 if (copy_from_user (sem_io, arg.array, nsems*sizeof(ushort))) {
589 err = -EFAULT;
590 goto out_free;
593 for (i = 0; i < nsems; i++) {
594 if (sem_io[i] > SEMVMX) {
595 err = -ERANGE;
596 goto out_free;
599 err = sem_revalidate(semid, sma, nsems, S_IWUGO);
600 if(err)
601 goto out_free;
603 for (i = 0; i < nsems; i++)
604 sma->sem_base[i].semval = sem_io[i];
605 for (un = sma->undo; un; un = un->id_next)
606 for (i = 0; i < nsems; i++)
607 un->semadj[i] = 0;
608 sma->sem_ctime = get_seconds();
609 /* maybe some queued-up processes were waiting for this */
610 update_queue(sma);
611 err = 0;
612 goto out_unlock;
614 case IPC_STAT:
616 struct semid64_ds tbuf;
617 memset(&tbuf,0,sizeof(tbuf));
618 kernel_to_ipc64_perm(&sma->sem_perm, &tbuf.sem_perm);
619 tbuf.sem_otime = sma->sem_otime;
620 tbuf.sem_ctime = sma->sem_ctime;
621 tbuf.sem_nsems = sma->sem_nsems;
622 sem_unlock(sma);
623 if (copy_semid_to_user (arg.buf, &tbuf, version))
624 return -EFAULT;
625 return 0;
627 /* GETVAL, GETPID, GETNCTN, GETZCNT, SETVAL: fall-through */
629 err = -EINVAL;
630 if(semnum < 0 || semnum >= nsems)
631 goto out_unlock;
633 curr = &sma->sem_base[semnum];
635 switch (cmd) {
636 case GETVAL:
637 err = curr->semval;
638 goto out_unlock;
639 case GETPID:
640 err = curr->sempid & 0xffff;
641 goto out_unlock;
642 case GETNCNT:
643 err = count_semncnt(sma,semnum);
644 goto out_unlock;
645 case GETZCNT:
646 err = count_semzcnt(sma,semnum);
647 goto out_unlock;
648 case SETVAL:
650 int val = arg.val;
651 struct sem_undo *un;
652 err = -ERANGE;
653 if (val > SEMVMX || val < 0)
654 goto out_unlock;
656 for (un = sma->undo; un; un = un->id_next)
657 un->semadj[semnum] = 0;
658 curr->semval = val;
659 curr->sempid = current->pid;
660 sma->sem_ctime = get_seconds();
661 /* maybe some queued-up processes were waiting for this */
662 update_queue(sma);
663 err = 0;
664 goto out_unlock;
667 out_unlock:
668 sem_unlock(sma);
669 out_free:
670 if(sem_io != fast_sem_io)
671 ipc_free(sem_io, sizeof(ushort)*nsems);
672 return err;
675 struct sem_setbuf {
676 uid_t uid;
677 gid_t gid;
678 mode_t mode;
681 static inline unsigned long copy_semid_from_user(struct sem_setbuf *out, void *buf, int version)
683 switch(version) {
684 case IPC_64:
686 struct semid64_ds tbuf;
688 if(copy_from_user(&tbuf, buf, sizeof(tbuf)))
689 return -EFAULT;
691 out->uid = tbuf.sem_perm.uid;
692 out->gid = tbuf.sem_perm.gid;
693 out->mode = tbuf.sem_perm.mode;
695 return 0;
697 case IPC_OLD:
699 struct semid_ds tbuf_old;
701 if(copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
702 return -EFAULT;
704 out->uid = tbuf_old.sem_perm.uid;
705 out->gid = tbuf_old.sem_perm.gid;
706 out->mode = tbuf_old.sem_perm.mode;
708 return 0;
710 default:
711 return -EINVAL;
715 static int semctl_down(int semid, int semnum, int cmd, int version, union semun arg)
717 struct sem_array *sma;
718 int err;
719 struct sem_setbuf setbuf;
720 struct kern_ipc_perm *ipcp;
722 if(cmd == IPC_SET) {
723 if(copy_semid_from_user (&setbuf, arg.buf, version))
724 return -EFAULT;
726 sma = sem_lock(semid);
727 if(sma==NULL)
728 return -EINVAL;
730 if (sem_checkid(sma,semid)) {
731 err=-EIDRM;
732 goto out_unlock;
734 ipcp = &sma->sem_perm;
736 if (current->euid != ipcp->cuid &&
737 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN)) {
738 err=-EPERM;
739 goto out_unlock;
742 switch(cmd){
743 case IPC_RMID:
744 freeary(semid);
745 err = 0;
746 break;
747 case IPC_SET:
748 ipcp->uid = setbuf.uid;
749 ipcp->gid = setbuf.gid;
750 ipcp->mode = (ipcp->mode & ~S_IRWXUGO)
751 | (setbuf.mode & S_IRWXUGO);
752 sma->sem_ctime = get_seconds();
753 sem_unlock(sma);
754 err = 0;
755 break;
756 default:
757 sem_unlock(sma);
758 err = -EINVAL;
759 break;
761 return err;
763 out_unlock:
764 sem_unlock(sma);
765 return err;
768 asmlinkage long sys_semctl (int semid, int semnum, int cmd, union semun arg)
770 int err = -EINVAL;
771 int version;
773 if (semid < 0)
774 return -EINVAL;
776 version = ipc_parse_version(&cmd);
778 switch(cmd) {
779 case IPC_INFO:
780 case SEM_INFO:
781 case SEM_STAT:
782 err = semctl_nolock(semid,semnum,cmd,version,arg);
783 return err;
784 case GETALL:
785 case GETVAL:
786 case GETPID:
787 case GETNCNT:
788 case GETZCNT:
789 case IPC_STAT:
790 case SETVAL:
791 case SETALL:
792 err = semctl_main(semid,semnum,cmd,version,arg);
793 return err;
794 case IPC_RMID:
795 case IPC_SET:
796 down(&sem_ids.sem);
797 err = semctl_down(semid,semnum,cmd,version,arg);
798 up(&sem_ids.sem);
799 return err;
800 default:
801 return -EINVAL;
805 static inline void lock_semundo(void)
807 struct sem_undo_list *undo_list;
809 undo_list = current->sysvsem.undo_list;
810 if ((undo_list != NULL) && (atomic_read(&undo_list->refcnt) != 1))
811 spin_lock(&undo_list->lock);
814 /* This code has an interaction with copy_semundo().
815 * Consider; two tasks are sharing the undo_list. task1
816 * acquires the undo_list lock in lock_semundo(). If task2 now
817 * exits before task1 releases the lock (by calling
818 * unlock_semundo()), then task1 will never call spin_unlock().
819 * This leave the sem_undo_list in a locked state. If task1 now creats task3
820 * and once again shares the sem_undo_list, the sem_undo_list will still be
821 * locked, and future SEM_UNDO operations will deadlock. This case is
822 * dealt with in copy_semundo() by having it reinitialize the spin lock when
823 * the refcnt goes from 1 to 2.
825 static inline void unlock_semundo(void)
827 struct sem_undo_list *undo_list;
829 undo_list = current->sysvsem.undo_list;
830 if ((undo_list != NULL) && (atomic_read(&undo_list->refcnt) != 1))
831 spin_unlock(&undo_list->lock);
835 /* If the task doesn't already have a undo_list, then allocate one
836 * here. We guarantee there is only one thread using this undo list,
837 * and current is THE ONE
839 * If this allocation and assignment succeeds, but later
840 * portions of this code fail, there is no need to free the sem_undo_list.
841 * Just let it stay associated with the task, and it'll be freed later
842 * at exit time.
844 * This can block, so callers must hold no locks.
846 static inline int get_undo_list(struct sem_undo_list **undo_listp)
848 struct sem_undo_list *undo_list;
849 int size;
851 undo_list = current->sysvsem.undo_list;
852 if (!undo_list) {
853 size = sizeof(struct sem_undo_list);
854 undo_list = (struct sem_undo_list *) kmalloc(size, GFP_KERNEL);
855 if (undo_list == NULL)
856 return -ENOMEM;
857 memset(undo_list, 0, size);
858 /* don't initialize unodhd->lock here. It's done
859 * in copy_semundo() instead.
861 atomic_set(&undo_list->refcnt, 1);
862 current->sysvsem.undo_list = undo_list;
864 *undo_listp = undo_list;
865 return 0;
868 static struct sem_undo* freeundos(struct sem_undo* un)
870 struct sem_undo* u;
871 struct sem_undo** up;
873 for(up = &current->sysvsem.undo_list->proc_list;(u=*up);up=&u->proc_next) {
874 if(un==u) {
875 un=u->proc_next;
876 *up=un;
877 kfree(u);
878 return un;
881 printk ("freeundos undo list error id=%d\n", un->semid);
882 return un->proc_next;
885 static inline struct sem_undo *find_undo(int semid)
887 struct sem_undo *un;
889 un = NULL;
890 if (current->sysvsem.undo_list != NULL) {
891 un = current->sysvsem.undo_list->proc_list;
893 while(un != NULL) {
894 if(un->semid==semid)
895 break;
896 if(un->semid==-1)
897 un=freeundos(un);
898 else
899 un=un->proc_next;
901 return un;
904 /* returns without sem_lock and semundo list locks on error! */
905 static int alloc_undo(struct sem_array *sma, struct sem_undo** unp, int semid, int alter)
907 int size, nsems, error;
908 struct sem_undo *un, *new_un;
909 struct sem_undo_list *undo_list;
910 unsigned long saved_add_count;
913 nsems = sma->sem_nsems;
914 saved_add_count = 0;
915 if (current->sysvsem.undo_list != NULL)
916 saved_add_count = current->sysvsem.undo_list->add_count;
917 sem_unlock(sma);
918 unlock_semundo();
920 error = get_undo_list(&undo_list);
921 if (error)
922 return error;
924 size = sizeof(struct sem_undo) + sizeof(short)*nsems;
925 un = (struct sem_undo *) kmalloc(size, GFP_KERNEL);
926 if (!un)
927 return -ENOMEM;
929 memset(un, 0, size);
930 lock_semundo();
931 error = sem_revalidate(semid, sma, nsems, alter ? S_IWUGO : S_IRUGO);
932 if(error) {
933 unlock_semundo();
934 kfree(un);
935 return error;
939 /* alloc_undo has just
940 * released all locks and reacquired them.
941 * But, another thread may have
942 * added the semundo we were looking for
943 * during that time.
944 * So, we check for it again.
945 * only initialize and add the new one
946 * if we don't discover one.
948 new_un = NULL;
949 if (current->sysvsem.undo_list->add_count != saved_add_count)
950 new_un = find_undo(semid);
952 if (new_un != NULL) {
953 if (sma->undo != new_un)
954 BUG();
955 kfree(un);
956 un = new_un;
957 } else {
958 current->sysvsem.undo_list->add_count++;
959 un->semadj = (short *) &un[1];
960 un->semid = semid;
961 un->proc_next = undo_list->proc_list;
962 undo_list->proc_list = un;
963 un->id_next = sma->undo;
964 sma->undo = un;
966 *unp = un;
967 return 0;
970 asmlinkage long sys_semop (int semid, struct sembuf *tsops, unsigned nsops)
972 int error = -EINVAL;
973 struct sem_array *sma;
974 struct sembuf fast_sops[SEMOPM_FAST];
975 struct sembuf* sops = fast_sops, *sop;
976 struct sem_undo *un;
977 int undos = 0, decrease = 0, alter = 0;
978 struct sem_queue queue;
981 if (nsops < 1 || semid < 0)
982 return -EINVAL;
983 if (nsops > sc_semopm)
984 return -E2BIG;
985 if(nsops > SEMOPM_FAST) {
986 sops = kmalloc(sizeof(*sops)*nsops,GFP_KERNEL);
987 if(sops==NULL)
988 return -ENOMEM;
990 if (copy_from_user (sops, tsops, nsops * sizeof(*tsops))) {
991 error=-EFAULT;
992 goto out_free;
994 lock_semundo();
995 sma = sem_lock(semid);
996 error=-EINVAL;
997 if(sma==NULL)
998 goto out_semundo_free;
999 error = -EIDRM;
1000 if (sem_checkid(sma,semid))
1001 goto out_unlock_semundo_free;
1002 error = -EFBIG;
1003 for (sop = sops; sop < sops + nsops; sop++) {
1004 if (sop->sem_num >= sma->sem_nsems)
1005 goto out_unlock_semundo_free;
1006 if (sop->sem_flg & SEM_UNDO)
1007 undos++;
1008 if (sop->sem_op < 0)
1009 decrease = 1;
1010 if (sop->sem_op > 0)
1011 alter = 1;
1013 alter |= decrease;
1015 error = -EACCES;
1016 if (ipcperms(&sma->sem_perm, alter ? S_IWUGO : S_IRUGO))
1017 goto out_unlock_semundo_free;
1018 if (undos) {
1019 /* Make sure we have an undo structure
1020 * for this process and this semaphore set.
1023 un = find_undo(semid);
1024 if (!un) {
1025 error = alloc_undo(sma,&un,semid,alter);
1026 if (error)
1027 goto out_free;
1030 } else
1031 un = NULL;
1033 error = try_atomic_semop (sma, sops, nsops, un, current->pid, 0);
1034 if (error <= 0)
1035 goto update;
1037 /* We need to sleep on this operation, so we put the current
1038 * task into the pending queue and go to sleep.
1041 queue.sma = sma;
1042 queue.sops = sops;
1043 queue.nsops = nsops;
1044 queue.undo = un;
1045 queue.pid = current->pid;
1046 queue.alter = decrease;
1047 queue.id = semid;
1048 if (alter)
1049 append_to_queue(sma ,&queue);
1050 else
1051 prepend_to_queue(sma ,&queue);
1052 current->sysvsem.sleep_list = &queue;
1054 for (;;) {
1055 queue.status = -EINTR;
1056 queue.sleeper = current;
1057 current->state = TASK_INTERRUPTIBLE;
1058 sem_unlock(sma);
1059 unlock_semundo();
1061 schedule();
1063 lock_semundo();
1064 sma = sem_lock(semid);
1065 if(sma==NULL) {
1066 if(queue.prev != NULL)
1067 BUG();
1068 current->sysvsem.sleep_list = NULL;
1069 error = -EIDRM;
1070 goto out_semundo_free;
1073 * If queue.status == 1 we where woken up and
1074 * have to retry else we simply return.
1075 * If an interrupt occurred we have to clean up the
1076 * queue
1079 if (queue.status == 1)
1081 error = try_atomic_semop (sma, sops, nsops, un,
1082 current->pid,0);
1083 if (error <= 0)
1084 break;
1085 } else {
1086 error = queue.status;
1087 if (queue.prev) /* got Interrupt */
1088 break;
1089 /* Everything done by update_queue */
1090 current->sysvsem.sleep_list = NULL;
1091 goto out_unlock_semundo_free;
1094 current->sysvsem.sleep_list = NULL;
1095 remove_from_queue(sma,&queue);
1096 update:
1097 if (alter)
1098 update_queue (sma);
1099 out_unlock_semundo_free:
1100 sem_unlock(sma);
1101 out_semundo_free:
1102 unlock_semundo();
1103 out_free:
1104 if(sops != fast_sops)
1105 kfree(sops);
1106 return error;
1109 /* If CLONE_SYSVSEM is set, establish sharing of SEM_UNDO state between
1110 * parent and child tasks.
1112 * See the notes above unlock_semundo() regarding the spin_lock_init()
1113 * in this code. Initialize the undo_list->lock here instead of get_undo_list()
1114 * because of the reasoning in the comment above unlock_semundo.
1117 int copy_semundo(unsigned long clone_flags, struct task_struct *tsk)
1119 struct sem_undo_list *undo_list;
1120 int error;
1122 if (clone_flags & CLONE_SYSVSEM) {
1123 error = get_undo_list(&undo_list);
1124 if (error)
1125 return error;
1126 if (atomic_read(&undo_list->refcnt) == 1)
1127 spin_lock_init(&undo_list->lock);
1128 atomic_inc(&undo_list->refcnt);
1129 tsk->sysvsem.undo_list = undo_list;
1130 } else
1131 tsk->sysvsem.undo_list = NULL;
1133 return 0;
1136 static inline void __exit_semundo(struct task_struct *tsk)
1138 struct sem_undo_list *undo_list;
1140 undo_list = tsk->sysvsem.undo_list;
1141 if (!atomic_dec_and_test(&undo_list->refcnt))
1142 kfree(undo_list);
1145 void exit_semundo(struct task_struct *tsk)
1147 if (tsk->sysvsem.undo_list != NULL)
1148 __exit_semundo(tsk);
1152 * add semadj values to semaphores, free undo structures.
1153 * undo structures are not freed when semaphore arrays are destroyed
1154 * so some of them may be out of date.
1155 * IMPLEMENTATION NOTE: There is some confusion over whether the
1156 * set of adjustments that needs to be done should be done in an atomic
1157 * manner or not. That is, if we are attempting to decrement the semval
1158 * should we queue up and wait until we can do so legally?
1159 * The original implementation attempted to do this (queue and wait).
1160 * The current implementation does not do so. The POSIX standard
1161 * and SVID should be consulted to determine what behavior is mandated.
1163 void sem_exit (void)
1165 struct sem_queue *q;
1166 struct sem_undo *u, *un = NULL, **up, **unp;
1167 struct sem_array *sma;
1168 struct sem_undo_list *undo_list;
1169 int nsems, i;
1171 lock_kernel();
1173 /* If the current process was sleeping for a semaphore,
1174 * remove it from the queue.
1176 if ((q = current->sysvsem.sleep_list)) {
1177 int semid = q->id;
1178 sma = sem_lock(semid);
1179 current->sysvsem.sleep_list = NULL;
1181 if (q->prev) {
1182 if(sma==NULL)
1183 BUG();
1184 remove_from_queue(q->sma,q);
1186 if(sma!=NULL)
1187 sem_unlock(sma);
1190 undo_list = current->sysvsem.undo_list;
1191 if ((undo_list == NULL) || (atomic_read(&undo_list->refcnt) != 1)) {
1192 unlock_kernel();
1193 return;
1196 /* There's no need to hold the semundo list lock, as current
1197 * is the last task exiting for this undo list.
1199 for (up = &undo_list->proc_list; (u = *up); *up = u->proc_next, kfree(u)) {
1200 int semid = u->semid;
1201 if(semid == -1)
1202 continue;
1203 sma = sem_lock(semid);
1204 if (sma == NULL)
1205 continue;
1207 if (u->semid == -1)
1208 goto next_entry;
1210 if (sem_checkid(sma,u->semid))
1211 goto next_entry;
1213 /* remove u from the sma->undo list */
1214 for (unp = &sma->undo; (un = *unp); unp = &un->id_next) {
1215 if (u == un)
1216 goto found;
1218 printk ("sem_exit undo list error id=%d\n", u->semid);
1219 goto next_entry;
1220 found:
1221 *unp = un->id_next;
1222 /* perform adjustments registered in u */
1223 nsems = sma->sem_nsems;
1224 for (i = 0; i < nsems; i++) {
1225 struct sem * sem = &sma->sem_base[i];
1226 sem->semval += u->semadj[i];
1227 if (sem->semval < 0)
1228 sem->semval = 0; /* shouldn't happen */
1229 sem->sempid = current->pid;
1231 sma->sem_otime = get_seconds();
1232 /* maybe some queued-up processes were waiting for this */
1233 update_queue(sma);
1234 next_entry:
1235 sem_unlock(sma);
1237 __exit_semundo(current);
1239 unlock_kernel();
1242 #ifdef CONFIG_PROC_FS
1243 static int sysvipc_sem_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
1245 off_t pos = 0;
1246 off_t begin = 0;
1247 int i, len = 0;
1249 len += sprintf(buffer, " key semid perms nsems uid gid cuid cgid otime ctime\n");
1250 down(&sem_ids.sem);
1252 for(i = 0; i <= sem_ids.max_id; i++) {
1253 struct sem_array *sma;
1254 sma = sem_lock(i);
1255 if(sma) {
1256 len += sprintf(buffer + len, "%10d %10d %4o %10lu %5u %5u %5u %5u %10lu %10lu\n",
1257 sma->sem_perm.key,
1258 sem_buildid(i,sma->sem_perm.seq),
1259 sma->sem_perm.mode,
1260 sma->sem_nsems,
1261 sma->sem_perm.uid,
1262 sma->sem_perm.gid,
1263 sma->sem_perm.cuid,
1264 sma->sem_perm.cgid,
1265 sma->sem_otime,
1266 sma->sem_ctime);
1267 sem_unlock(sma);
1269 pos += len;
1270 if(pos < offset) {
1271 len = 0;
1272 begin = pos;
1274 if(pos > offset + length)
1275 goto done;
1278 *eof = 1;
1279 done:
1280 up(&sem_ids.sem);
1281 *start = buffer + (offset - begin);
1282 len -= (offset - begin);
1283 if(len > length)
1284 len = length;
1285 if(len < 0)
1286 len = 0;
1287 return len;
1289 #endif