Import 2.3.25
[davej-history.git] / ipc / msg.c
blob583bc837fba473c0ac6b0357e1b47d688a46325b
1 /*
2 * linux/ipc/msg.c
3 * Copyright (C) 1992 Krishna Balasubramanian
5 * Removed all the remaining kerneld mess
6 * Catch the -EFAULT stuff properly
7 * Use GFP_KERNEL for messages as in 1.2
8 * Fixed up the unchecked user space derefs
9 * Copyright (C) 1998 Alan Cox & Andi Kleen
11 * /proc/sysvipc/msg support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
13 * mostly rewritten, threaded and wake-one semantics added
14 * (c) 1999 Manfred Spraul <manfreds@colorfullife.com>
17 #include <linux/config.h>
18 #include <linux/malloc.h>
19 #include <linux/msg.h>
20 #include <linux/spinlock.h>
21 #include <linux/init.h>
22 #include <linux/proc_fs.h>
23 #include <linux/list.h>
25 #include <asm/uaccess.h>
27 #define USHRT_MAX 0xffff
28 /* one ms_receiver structure for each sleeping receiver */
29 struct msg_receiver {
30 struct list_head r_list;
31 struct task_struct* r_tsk;
33 int r_mode;
34 long r_msgtype;
35 long r_maxsize;
37 struct msg_msg* volatile r_msg;
40 /* one msg_msg structure for each message */
41 struct msg_msg {
42 struct list_head m_list;
43 long m_type;
44 int m_ts; /* message text size */
45 /* the actual message follows immediately */
49 /* one msq_queue structure for each present queue on the system */
50 struct msg_queue {
51 struct ipc_perm q_perm;
52 __kernel_time_t q_stime; /* last msgsnd time */
53 __kernel_time_t q_rtime; /* last msgrcv time */
54 __kernel_time_t q_ctime; /* last change time */
55 unsigned int q_cbytes; /* current number of bytes on queue */
56 unsigned int q_qnum; /* number of messages in queue */
57 unsigned int q_qbytes; /* max number of bytes on queue */
58 __kernel_ipc_pid_t q_lspid; /* pid of last msgsnd */
59 __kernel_ipc_pid_t q_lrpid; /* last receive pid */
61 struct list_head q_messages;
62 struct list_head q_receivers;
63 wait_queue_head_t q_rwait;
66 /* one msq_array structure for each possible queue on the system */
67 struct msg_array {
68 spinlock_t lock;
69 struct msg_queue* q;
72 #define SEARCH_ANY 1
73 #define SEARCH_EQUAL 2
74 #define SEARCH_NOTEQUAL 3
75 #define SEARCH_LESSEQUAL 4
77 static DECLARE_MUTEX(msg_lock);
78 static struct msg_array msg_que[MSGMNI];
80 static unsigned short msg_seq = 0;
81 static int msg_used_queues = 0;
82 static int msg_max_id = -1;
84 static atomic_t msg_bytes = ATOMIC_INIT(0);
85 static atomic_t msg_hdrs = ATOMIC_INIT(0);
87 static void freeque (int id);
88 static int newque (key_t key, int msgflg);
89 static int findkey (key_t key);
90 #ifdef CONFIG_PROC_FS
91 static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
92 #endif
94 /* implemented in ipc/util.c, thread-safe */
95 extern int ipcperms (struct ipc_perm *ipcp, short msgflg);
97 void __init msg_init (void)
99 int id;
101 for (id = 0; id < MSGMNI; id++) {
102 msg_que[id].lock = SPIN_LOCK_UNLOCKED;
103 msg_que[id].q = NULL;
105 #ifdef CONFIG_PROC_FS
106 create_proc_read_entry("sysvipc/msg", 0, 0, sysvipc_msg_read_proc, NULL);
107 #endif
110 static int findkey (key_t key)
112 int id;
113 struct msg_queue *msq;
115 for (id = 0; id <= msg_max_id; id++) {
116 msq = msg_que[id].q;
117 if(msq == NULL)
118 continue;
119 if (key == msq->q_perm.key)
120 return id;
122 return -1;
125 static int newque (key_t key, int msgflg)
127 int id;
128 struct msg_queue *msq;
129 struct ipc_perm *ipcp;
131 for (id = 0; id < MSGMNI; id++) {
132 if (msg_que[id].q == NULL)
133 break;
135 if(id == MSGMNI)
136 return -ENOSPC;
138 msq = (struct msg_queue *) kmalloc (sizeof (*msq), GFP_KERNEL);
139 if (!msq)
140 return -ENOMEM;
142 ipcp = &msq->q_perm;
143 ipcp->mode = (msgflg & S_IRWXUGO);
144 ipcp->key = key;
145 ipcp->cuid = ipcp->uid = current->euid;
146 ipcp->gid = ipcp->cgid = current->egid;
148 /* ipcp->seq*MSGMNI must be a positive integer.
149 * this limits MSGMNI to 32768
151 ipcp->seq = msg_seq++;
153 msq->q_stime = msq->q_rtime = 0;
154 msq->q_ctime = CURRENT_TIME;
155 msq->q_cbytes = msq->q_qnum = 0;
156 msq->q_qbytes = MSGMNB;
157 msq->q_lspid = msq->q_lrpid = 0;
158 INIT_LIST_HEAD(&msq->q_messages);
159 INIT_LIST_HEAD(&msq->q_receivers);
160 init_waitqueue_head(&msq->q_rwait);
162 if (id > msg_max_id)
163 msg_max_id = id;
164 spin_lock(&msg_que[id].lock);
165 msg_que[id].q = msq;
166 spin_unlock(&msg_que[id].lock);
167 msg_used_queues++;
169 return (int)msq->q_perm.seq * MSGMNI + id;
172 static void expunge_all(struct msg_queue* msq, int res)
174 struct list_head *tmp;
176 tmp = msq->q_receivers.next;
177 while (tmp != &msq->q_receivers) {
178 struct msg_receiver* msr;
180 msr = list_entry(tmp,struct msg_receiver,r_list);
181 tmp = tmp->next;
182 msr->r_msg = ERR_PTR(res);
183 wake_up_process(msr->r_tsk);
187 static void freeque (int id)
189 struct msg_queue *msq;
190 struct list_head *tmp;
192 msq=msg_que[id].q;
193 msg_que[id].q = NULL;
194 if (id == msg_max_id) {
195 while ((msg_que[msg_max_id].q == NULL)) {
196 if(msg_max_id--== 0)
197 break;
200 msg_used_queues--;
202 expunge_all(msq,-EIDRM);
204 while(waitqueue_active(&msq->q_rwait)) {
205 wake_up(&msq->q_rwait);
206 spin_unlock(&msg_que[id].lock);
207 current->policy |= SCHED_YIELD;
208 schedule();
209 spin_lock(&msg_que[id].lock);
211 spin_unlock(&msg_que[id].lock);
213 tmp = msq->q_messages.next;
214 while(tmp != &msq->q_messages) {
215 struct msg_msg* msg = list_entry(tmp,struct msg_msg,m_list);
216 tmp = tmp->next;
217 atomic_dec(&msg_hdrs);
218 kfree(msg);
220 atomic_sub(msq->q_cbytes, &msg_bytes);
221 kfree(msq);
225 asmlinkage long sys_msgget (key_t key, int msgflg)
227 int id, ret = -EPERM;
228 struct msg_queue *msq;
230 down(&msg_lock);
231 if (key == IPC_PRIVATE)
232 ret = newque(key, msgflg);
233 else if ((id = findkey (key)) == -1) { /* key not used */
234 if (!(msgflg & IPC_CREAT))
235 ret = -ENOENT;
236 else
237 ret = newque(key, msgflg);
238 } else if (msgflg & IPC_CREAT && msgflg & IPC_EXCL) {
239 ret = -EEXIST;
240 } else {
241 msq = msg_que[id].q;
242 if (ipcperms(&msq->q_perm, msgflg))
243 ret = -EACCES;
244 else
245 ret = (unsigned int) msq->q_perm.seq * MSGMNI + id;
247 up(&msg_lock);
248 return ret;
251 asmlinkage long sys_msgctl (int msqid, int cmd, struct msqid_ds *buf)
253 int id, err;
254 struct msg_queue *msq;
255 struct msqid_ds tbuf;
256 struct ipc_perm *ipcp;
258 if (msqid < 0 || cmd < 0)
259 return -EINVAL;
260 id = msqid % MSGMNI;
261 switch (cmd) {
262 case IPC_INFO:
263 case MSG_INFO:
265 struct msginfo msginfo;
266 if (!buf)
267 return -EFAULT;
268 /* We must not return kernel stack data.
269 * due to variable alignment, it's not enough
270 * to set all member fields.
272 memset(&msginfo,0,sizeof(msginfo));
273 msginfo.msgmni = MSGMNI;
274 msginfo.msgmax = MSGMAX;
275 msginfo.msgmnb = MSGMNB;
276 msginfo.msgmap = MSGMAP;
277 msginfo.msgpool = MSGPOOL;
278 msginfo.msgtql = MSGTQL;
279 msginfo.msgssz = MSGSSZ;
280 msginfo.msgseg = MSGSEG;
281 if (cmd == MSG_INFO) {
282 msginfo.msgpool = msg_used_queues;
283 msginfo.msgmap = atomic_read(&msg_hdrs);
284 msginfo.msgtql = atomic_read(&msg_bytes);
287 if (copy_to_user (buf, &msginfo, sizeof(struct msginfo)))
288 return -EFAULT;
289 return (msg_max_id < 0) ? 0: msg_max_id;
291 case MSG_STAT:
292 case IPC_STAT:
294 int success_return;
295 if (!buf)
296 return -EFAULT;
297 if(cmd == MSG_STAT && msqid > MSGMNI)
298 return -EINVAL;
300 spin_lock(&msg_que[id].lock);
301 msq = msg_que[id].q;
302 err = -EINVAL;
303 if (msq == NULL)
304 goto out_unlock;
305 if(cmd == MSG_STAT) {
306 success_return = (unsigned int) msq->q_perm.seq * MSGMNI + msqid;
307 } else {
308 err = -EIDRM;
309 if (msq->q_perm.seq != (unsigned int) msqid / MSGMNI)
310 goto out_unlock;
311 success_return = 0;
313 err = -EACCES;
314 if (ipcperms (&msq->q_perm, S_IRUGO))
315 goto out_unlock;
317 memset(&tbuf,0,sizeof(tbuf));
318 tbuf.msg_perm = msq->q_perm;
319 /* tbuf.msg_{first,last}: not reported.*/
320 tbuf.msg_stime = msq->q_stime;
321 tbuf.msg_rtime = msq->q_rtime;
322 tbuf.msg_ctime = msq->q_ctime;
323 if(msq->q_cbytes > USHRT_MAX)
324 tbuf.msg_cbytes = USHRT_MAX;
325 else
326 tbuf.msg_cbytes = msq->q_cbytes;
327 tbuf.msg_lcbytes = msq->q_cbytes;
329 if(msq->q_qnum > USHRT_MAX)
330 tbuf.msg_qnum = USHRT_MAX;
331 else
332 tbuf.msg_qnum = msq->q_qnum;
334 if(msq->q_qbytes > USHRT_MAX)
335 tbuf.msg_qbytes = USHRT_MAX;
336 else
337 tbuf.msg_qbytes = msq->q_qbytes;
338 tbuf.msg_lqbytes = msq->q_qbytes;
340 tbuf.msg_lspid = msq->q_lspid;
341 tbuf.msg_lrpid = msq->q_lrpid;
342 spin_unlock(&msg_que[id].lock);
343 if (copy_to_user (buf, &tbuf, sizeof(*buf)))
344 return -EFAULT;
345 return success_return;
347 case IPC_SET:
348 if (!buf)
349 return -EFAULT;
350 if (copy_from_user (&tbuf, buf, sizeof (*buf)))
351 return -EFAULT;
352 break;
353 case IPC_RMID:
354 break;
355 default:
356 return -EINVAL;
359 down(&msg_lock);
360 spin_lock(&msg_que[id].lock);
361 msq = msg_que[id].q;
362 err = -EINVAL;
363 if (msq == NULL)
364 goto out_unlock_up;
365 err = -EIDRM;
366 if (msq->q_perm.seq != (unsigned int) msqid / MSGMNI)
367 goto out_unlock_up;
368 ipcp = &msq->q_perm;
370 switch (cmd) {
371 case IPC_SET:
373 int newqbytes;
374 err = -EPERM;
375 if (current->euid != ipcp->cuid &&
376 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
377 /* We _could_ check for CAP_CHOWN above, but we don't */
378 goto out_unlock_up;
380 if(tbuf.msg_qbytes == 0)
381 newqbytes = tbuf.msg_lqbytes;
382 else
383 newqbytes = tbuf.msg_qbytes;
384 if (newqbytes > MSGMNB && !capable(CAP_SYS_RESOURCE))
385 goto out_unlock_up;
386 msq->q_qbytes = newqbytes;
388 ipcp->uid = tbuf.msg_perm.uid;
389 ipcp->gid = tbuf.msg_perm.gid;
390 ipcp->mode = (ipcp->mode & ~S_IRWXUGO) |
391 (S_IRWXUGO & tbuf.msg_perm.mode);
392 msq->q_ctime = CURRENT_TIME;
393 /* sleeping receivers might be excluded by
394 * stricter permissions.
396 expunge_all(msq,-EAGAIN);
397 /* sleeping senders might be able to send
398 * due to a larger queue size.
400 wake_up(&msq->q_rwait);
401 spin_unlock(&msg_que[id].lock);
402 break;
404 case IPC_RMID:
405 err = -EPERM;
406 if (current->euid != ipcp->cuid &&
407 current->euid != ipcp->uid && !capable(CAP_SYS_ADMIN))
408 goto out_unlock;
409 freeque (id);
410 break;
412 err = 0;
413 out_up:
414 up(&msg_lock);
415 return err;
416 out_unlock_up:
417 spin_unlock(&msg_que[id].lock);
418 goto out_up;
419 out_unlock:
420 spin_unlock(&msg_que[id].lock);
421 return err;
424 static int testmsg(struct msg_msg* msg,long type,int mode)
426 switch(mode)
428 case SEARCH_ANY:
429 return 1;
430 case SEARCH_LESSEQUAL:
431 if(msg->m_type <=type)
432 return 1;
433 break;
434 case SEARCH_EQUAL:
435 if(msg->m_type == type)
436 return 1;
437 break;
438 case SEARCH_NOTEQUAL:
439 if(msg->m_type != type)
440 return 1;
441 break;
443 return 0;
446 int inline pipelined_send(struct msg_queue* msq, struct msg_msg* msg)
448 struct list_head* tmp;
450 tmp = msq->q_receivers.next;
451 while (tmp != &msq->q_receivers) {
452 struct msg_receiver* msr;
453 msr = list_entry(tmp,struct msg_receiver,r_list);
454 tmp = tmp->next;
455 if(testmsg(msg,msr->r_msgtype,msr->r_mode)) {
456 list_del(&msr->r_list);
457 if(msr->r_maxsize < msg->m_ts) {
458 msr->r_msg = ERR_PTR(-E2BIG);
459 wake_up_process(msr->r_tsk);
460 } else {
461 msr->r_msg = msg;
462 msq->q_lspid = msr->r_tsk->pid;
463 msq->q_rtime = CURRENT_TIME;
464 wake_up_process(msr->r_tsk);
465 return 1;
469 return 0;
472 asmlinkage long sys_msgsnd (int msqid, struct msgbuf *msgp, size_t msgsz, int msgflg)
474 int id;
475 struct msg_queue *msq;
476 struct msg_msg *msg;
477 long mtype;
478 int err;
480 if (msgsz > MSGMAX || (long) msgsz < 0 || msqid < 0)
481 return -EINVAL;
482 if (get_user(mtype, &msgp->mtype))
483 return -EFAULT;
484 if (mtype < 1)
485 return -EINVAL;
487 msg = (struct msg_msg *) kmalloc (sizeof(*msg) + msgsz, GFP_KERNEL);
488 if(msg==NULL)
489 return -ENOMEM;
491 if (copy_from_user(msg+1, msgp->mtext, msgsz)) {
492 kfree(msg);
493 return -EFAULT;
495 msg->m_type = mtype;
496 msg->m_ts = msgsz;
498 id = (unsigned int) msqid % MSGMNI;
499 spin_lock(&msg_que[id].lock);
500 err= -EINVAL;
501 retry:
502 msq = msg_que[id].q;
503 if (msq == NULL)
504 goto out_free;
506 err= -EIDRM;
507 if (msq->q_perm.seq != (unsigned int) msqid / MSGMNI)
508 goto out_free;
510 err=-EACCES;
511 if (ipcperms(&msq->q_perm, S_IWUGO))
512 goto out_free;
514 if(msgsz + msq->q_cbytes > msq->q_qbytes) {
515 DECLARE_WAITQUEUE(wait,current);
517 if(msgflg&IPC_NOWAIT) {
518 err=-EAGAIN;
519 goto out_free;
521 current->state = TASK_INTERRUPTIBLE;
522 add_wait_queue(&msq->q_rwait,&wait);
523 spin_unlock(&msg_que[id].lock);
524 schedule();
525 current->state= TASK_RUNNING;
527 remove_wait_queue(&msq->q_rwait,&wait);
528 if (signal_pending(current)) {
529 kfree(msg);
530 return -EINTR;
533 spin_lock(&msg_que[id].lock);
534 err = -EIDRM;
535 goto retry;
538 if(!pipelined_send(msq,msg)) {
539 /* noone is waiting for this message, enqueue it */
540 list_add_tail(&msg->m_list,&msq->q_messages);
541 msq->q_cbytes += msgsz;
542 msq->q_qnum++;
543 atomic_add(msgsz,&msg_bytes);
544 atomic_inc(&msg_hdrs);
547 err = 0;
548 msg = NULL;
549 msq->q_lspid = current->pid;
550 msq->q_stime = CURRENT_TIME;
552 out_free:
553 if(msg!=NULL)
554 kfree(msg);
555 spin_unlock(&msg_que[id].lock);
556 return err;
559 int inline convert_mode(long* msgtyp, int msgflg)
562 * find message of correct type.
563 * msgtyp = 0 => get first.
564 * msgtyp > 0 => get first message of matching type.
565 * msgtyp < 0 => get message with least type must be < abs(msgtype).
567 if(*msgtyp==0)
568 return SEARCH_ANY;
569 if(*msgtyp<0) {
570 *msgtyp=-(*msgtyp);
571 return SEARCH_LESSEQUAL;
573 if(msgflg & MSG_EXCEPT)
574 return SEARCH_NOTEQUAL;
575 return SEARCH_EQUAL;
578 asmlinkage long sys_msgrcv (int msqid, struct msgbuf *msgp, size_t msgsz,
579 long msgtyp, int msgflg)
581 struct msg_queue *msq;
582 struct msg_receiver msr_d;
583 struct list_head* tmp;
584 struct msg_msg* msg, *found_msg;
585 int id;
586 int err;
587 int mode;
589 if (msqid < 0 || (long) msgsz < 0)
590 return -EINVAL;
591 mode = convert_mode(&msgtyp,msgflg);
593 id = (unsigned int) msqid % MSGMNI;
594 spin_lock(&msg_que[id].lock);
595 retry:
596 msq = msg_que[id].q;
597 err=-EINVAL;
598 if (msq == NULL)
599 goto out_unlock;
600 err=-EACCES;
601 if (ipcperms (&msq->q_perm, S_IRUGO))
602 goto out_unlock;
604 tmp = msq->q_messages.next;
605 found_msg=NULL;
606 while (tmp != &msq->q_messages) {
607 msg = list_entry(tmp,struct msg_msg,m_list);
608 if(testmsg(msg,msgtyp,mode)) {
609 found_msg = msg;
610 if(mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
611 found_msg=msg;
612 msgtyp=msg->m_type-1;
613 } else {
614 found_msg=msg;
615 break;
618 tmp = tmp->next;
620 if(found_msg) {
621 msg=found_msg;
622 if ((msgsz < msg->m_ts) && !(msgflg & MSG_NOERROR)) {
623 err=-E2BIG;
624 goto out_unlock;
626 list_del(&msg->m_list);
627 msq->q_qnum--;
628 msq->q_rtime = CURRENT_TIME;
629 msq->q_lrpid = current->pid;
630 msq->q_cbytes -= msg->m_ts;
631 atomic_sub(msg->m_ts,&msg_bytes);
632 atomic_dec(&msg_hdrs);
633 if(waitqueue_active(&msq->q_rwait))
634 wake_up(&msq->q_rwait);
635 out_success_unlock:
636 spin_unlock(&msg_que[id].lock);
637 out_success:
638 msgsz = (msgsz > msg->m_ts) ? msg->m_ts : msgsz;
639 if (put_user (msg->m_type, &msgp->mtype) ||
640 copy_to_user (msgp->mtext, msg+1, msgsz))
642 msgsz = -EFAULT;
644 kfree(msg);
645 return msgsz;
646 } else
648 /* no message waiting. Prepare for pipelined
649 * receive.
651 if (msgflg & IPC_NOWAIT) {
652 err=-ENOMSG;
653 goto out_unlock;
655 list_add_tail(&msr_d.r_list,&msq->q_receivers);
656 msr_d.r_tsk = current;
657 msr_d.r_msgtype = msgtyp;
658 msr_d.r_mode = mode;
659 if(msgflg & MSG_NOERROR)
660 msr_d.r_maxsize = MSGMAX;
661 else
662 msr_d.r_maxsize = msgsz;
663 msr_d.r_msg = ERR_PTR(-EAGAIN);
664 current->state = TASK_INTERRUPTIBLE;
665 spin_unlock(&msg_que[id].lock);
666 schedule();
667 current->state = TASK_RUNNING;
669 msg = (struct msg_msg*) msr_d.r_msg;
670 if(!IS_ERR(msg))
671 goto out_success;
673 spin_lock(&msg_que[id].lock);
674 msg = (struct msg_msg*)msr_d.r_msg;
675 if(!IS_ERR(msg)) {
676 /* our message arived while we waited for
677 * the spinlock. Process it.
679 goto out_success_unlock;
681 err = PTR_ERR(msg);
682 if(err == -EAGAIN) {
683 list_del(&msr_d.r_list);
684 if (signal_pending(current))
685 err=-EINTR;
686 else
687 goto retry;
690 out_unlock:
691 spin_unlock(&msg_que[id].lock);
692 return err;
695 #ifdef CONFIG_PROC_FS
696 static int sysvipc_msg_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
698 off_t pos = 0;
699 off_t begin = 0;
700 int i, len = 0;
702 down(&msg_lock);
703 len += sprintf(buffer, " key msqid perms cbytes qnum lspid lrpid uid gid cuid cgid stime rtime ctime\n");
705 for(i = 0; i <= msg_max_id; i++) {
706 spin_lock(&msg_que[i].lock);
707 if(msg_que[i].q != NULL) {
708 len += sprintf(buffer + len, "%10d %10d %4o %5u %5u %5u %5u %5u %5u %5u %5u %10lu %10lu %10lu\n",
709 msg_que[i].q->q_perm.key,
710 msg_que[i].q->q_perm.seq * MSGMNI + i,
711 msg_que[i].q->q_perm.mode,
712 msg_que[i].q->q_cbytes,
713 msg_que[i].q->q_qnum,
714 msg_que[i].q->q_lspid,
715 msg_que[i].q->q_lrpid,
716 msg_que[i].q->q_perm.uid,
717 msg_que[i].q->q_perm.gid,
718 msg_que[i].q->q_perm.cuid,
719 msg_que[i].q->q_perm.cgid,
720 msg_que[i].q->q_stime,
721 msg_que[i].q->q_rtime,
722 msg_que[i].q->q_ctime);
723 spin_unlock(&msg_que[i].lock);
725 pos += len;
726 if(pos < offset) {
727 len = 0;
728 begin = pos;
730 if(pos > offset + length)
731 goto done;
732 } else {
733 spin_unlock(&msg_que[i].lock);
736 *eof = 1;
737 done:
738 up(&msg_lock);
739 *start = buffer + (offset - begin);
740 len -= (offset - begin);
741 if(len > length)
742 len = length;
743 if(len < 0)
744 len = 0;
745 return len;
747 #endif