kernel - kqueue - refactor kqueue_scan(), rename tick to ustick
[dragonfly.git] / sys / kern / sys_mqueue.c
blob08347d23e4791aa68b3ebaaad00004156eb89cbb
1 /* $NetBSD: sys_mqueue.c,v 1.16 2009/04/11 23:05:26 christos Exp $ */
3 /*
4 * Copyright (c) 2007, 2008 Mindaugas Rasiukevicius <rmind at NetBSD org>
5 * All rights reserved.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
30 * Implementation of POSIX message queues.
31 * Defined in the Base Definitions volume of IEEE Std 1003.1-2001.
33 * Locking
35 * Global list of message queues (mqueue_head) and proc_t::p_mqueue_cnt
36 * counter are protected by mqlist_mtx lock. The very message queue and
37 * its members are protected by mqueue::mq_mtx.
39 * Lock order:
40 * mqlist_mtx
41 * -> mqueue::mq_mtx
44 #include <stdbool.h>
45 #include <sys/param.h>
46 #include <sys/types.h>
47 #include <sys/errno.h>
48 #include <sys/fcntl.h>
49 #include <sys/file.h>
50 #include <sys/filedesc.h>
51 #include <sys/ucred.h>
52 #include <sys/priv.h>
53 #include <sys/kernel.h>
54 #include <sys/malloc.h>
55 #include <sys/mqueue.h>
56 #include <sys/objcache.h>
57 #include <sys/poll.h>
58 #include <sys/proc.h>
59 #include <sys/queue.h>
60 #include <sys/select.h>
61 #include <sys/serialize.h>
62 #include <sys/signal.h>
63 #include <sys/signalvar.h>
64 #include <sys/spinlock.h>
65 #include <sys/spinlock2.h>
66 #include <sys/stat.h>
67 #include <sys/sysctl.h>
68 #include <sys/sysproto.h>
69 #include <sys/systm.h>
70 #include <sys/lock.h>
71 #include <sys/unistd.h>
72 #include <sys/vnode.h>
74 /* System-wide limits. */
75 static u_int mq_open_max = MQ_OPEN_MAX;
76 static u_int mq_prio_max = MQ_PRIO_MAX;
77 static u_int mq_max_msgsize = 16 * MQ_DEF_MSGSIZE;
78 static u_int mq_def_maxmsg = 32;
80 struct lock mqlist_mtx;
81 static struct objcache * mqmsg_cache;
82 static LIST_HEAD(, mqueue) mqueue_head =
83 LIST_HEAD_INITIALIZER(mqueue_head);
85 typedef struct file file_t; /* XXX: Should we put this in sys/types.h ? */
87 /* Function prototypes */
88 static int mq_poll_fop(file_t *, int, struct ucred *cred);
89 static int mq_stat_fop(file_t *, struct stat *, struct ucred *cred);
90 static int mq_close_fop(file_t *);
92 /* Some time-related utility functions */
93 static int itimespecfix(struct timespec *ts);
94 static int tstohz(const struct timespec *ts);
96 /* File operations vector */
97 static struct fileops mqops = {
98 .fo_read = badfo_readwrite,
99 .fo_write = badfo_readwrite,
100 .fo_ioctl = badfo_ioctl,
101 .fo_poll = mq_poll_fop,
102 .fo_stat = mq_stat_fop,
103 .fo_close = mq_close_fop,
104 .fo_kqfilter = badfo_kqfilter,
105 .fo_shutdown = badfo_shutdown
108 /* Define a new malloc type for message queues */
109 MALLOC_DECLARE(M_MQBUF);
110 MALLOC_DEFINE(M_MQBUF, "mqueues", "Buffers to message queues");
112 /* Malloc arguments for object cache */
113 struct objcache_malloc_args mqueue_malloc_args = {
114 sizeof(struct mqueue), M_MQBUF };
116 /* Spinlock around the process list */
117 extern struct spinlock allproc_spin;
120 * Initialize POSIX message queue subsystem.
122 void
123 mqueue_sysinit(void)
125 mqmsg_cache = objcache_create("mqmsg_cache",
126 0, /* infinite depot's capacity */
127 0, /* default magazine's capacity */
128 NULL, /* constructor */
129 NULL, /* deconstructor */
130 NULL,
131 objcache_malloc_alloc,
132 objcache_malloc_free,
133 &mqueue_malloc_args);
135 lockinit(&mqlist_mtx, "mqlist_mtx", 0, LK_CANRECURSE);
139 * Free the message.
141 static void
142 mqueue_freemsg(struct mq_msg *msg, const size_t size)
145 if (size > MQ_DEF_MSGSIZE)
146 kfree(msg, M_MQBUF);
147 else
148 objcache_put(mqmsg_cache, msg);
152 * Destroy the message queue.
154 static void
155 mqueue_destroy(struct mqueue *mq)
157 struct mq_msg *msg;
158 size_t msz;
159 u_int i;
161 /* Note MQ_PQSIZE + 1. */
162 for (i = 0; i < MQ_PQSIZE + 1; i++) {
163 while ((msg = TAILQ_FIRST(&mq->mq_head[i])) != NULL) {
164 TAILQ_REMOVE(&mq->mq_head[i], msg, msg_queue);
165 msz = sizeof(struct mq_msg) + msg->msg_len;
166 mqueue_freemsg(msg, msz);
169 lockuninit(&mq->mq_mtx);
170 kfree(mq, M_MQBUF);
174 * Lookup for file name in general list of message queues.
175 * => locks the message queue
177 static void *
178 mqueue_lookup(char *name)
180 struct mqueue *mq;
182 KKASSERT(lockstatus(&mqlist_mtx, curthread));
184 LIST_FOREACH(mq, &mqueue_head, mq_list) {
185 if (strncmp(mq->mq_name, name, MQ_NAMELEN) == 0) {
186 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE);
187 return mq;
191 return NULL;
195 * mqueue_get: get the mqueue from the descriptor.
196 * => locks the message queue, if found.
197 * => hold a reference on the file descriptor.
199 static int
200 mqueue_get(struct lwp *l, mqd_t mqd, file_t **fpr)
202 struct mqueue *mq;
203 file_t *fp;
205 fp = holdfp(curproc->p_fd, (int)mqd, -1); /* XXX: Why -1 ? */
206 if (__predict_false(fp == NULL))
207 return EBADF;
209 if (__predict_false(fp->f_type != DTYPE_MQUEUE)) {
210 fdrop(fp);
211 return EBADF;
213 mq = fp->f_data;
214 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE);
216 *fpr = fp;
217 return 0;
221 * mqueue_linear_insert: perform linear insert according to the message
222 * priority into the reserved queue (MQ_PQRESQ). Reserved queue is a
223 * sorted list used only when mq_prio_max is increased via sysctl.
225 static inline void
226 mqueue_linear_insert(struct mqueue *mq, struct mq_msg *msg)
228 struct mq_msg *mit;
230 TAILQ_FOREACH(mit, &mq->mq_head[MQ_PQRESQ], msg_queue) {
231 if (msg->msg_prio > mit->msg_prio)
232 break;
234 if (mit == NULL) {
235 TAILQ_INSERT_TAIL(&mq->mq_head[MQ_PQRESQ], msg, msg_queue);
236 } else {
237 TAILQ_INSERT_BEFORE(mit, msg, msg_queue);
242 * Validate input.
245 itimespecfix(struct timespec *ts)
247 if (ts->tv_sec < 0 || ts->tv_nsec < 0 || ts->tv_nsec >= 1000000000)
248 return (EINVAL);
249 if (ts->tv_sec == 0 && ts->tv_nsec != 0 && ts->tv_nsec < nstick)
250 ts->tv_nsec = nstick;
251 return (0);
255 * Compute number of ticks in the specified amount of time.
258 tstohz(const struct timespec *ts)
260 struct timeval tv;
263 * usec has great enough resolution for hz, so convert to a
264 * timeval and use tvtohz() above.
266 TIMESPEC_TO_TIMEVAL(&tv, ts);
267 return tvtohz_high(&tv); /* XXX Why _high() and not _low() ? */
271 * Converter from struct timespec to the ticks.
272 * Used by mq_timedreceive(), mq_timedsend().
275 abstimeout2timo(struct timespec *ts, int *timo)
277 struct timespec tsd;
278 int error;
280 getnanotime(&tsd);
281 timespecsub(ts, &tsd);
282 if (ts->tv_sec < 0 || (ts->tv_sec == 0 && ts->tv_nsec <= 0)) {
283 return ETIMEDOUT;
285 error = itimespecfix(ts);
286 if (error) {
287 return error;
289 *timo = tstohz(ts);
290 KKASSERT(*timo != 0);
292 return 0;
295 static int
296 mq_stat_fop(file_t *fp, struct stat *st, struct ucred *cred)
298 struct mqueue *mq = fp->f_data;
300 (void)memset(st, 0, sizeof(*st));
302 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE);
303 st->st_mode = mq->mq_mode;
304 st->st_uid = mq->mq_euid;
305 st->st_gid = mq->mq_egid;
306 st->st_atimespec = mq->mq_atime;
307 st->st_mtimespec = mq->mq_mtime;
308 /*st->st_ctimespec = st->st_birthtimespec = mq->mq_btime;*/
309 st->st_uid = fp->f_cred->cr_uid;
310 st->st_gid = fp->f_cred->cr_svgid;
311 lockmgr(&mq->mq_mtx, LK_RELEASE);
313 return 0;
316 static int
317 mq_poll_fop(file_t *fp, int events, struct ucred *cred)
319 struct mqueue *mq = fp->f_data;
320 int revents = 0;
322 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE);
323 if (events & (POLLIN | POLLRDNORM)) {
324 /* Ready for receiving, if there are messages in the queue */
325 if (mq->mq_attrib.mq_curmsgs)
326 revents |= (POLLIN | POLLRDNORM);
327 else
328 selrecord(curthread, &mq->mq_rsel);
330 if (events & (POLLOUT | POLLWRNORM)) {
331 /* Ready for sending, if the message queue is not full */
332 if (mq->mq_attrib.mq_curmsgs < mq->mq_attrib.mq_maxmsg)
333 revents |= (POLLOUT | POLLWRNORM);
334 else
335 selrecord(curthread, &mq->mq_wsel);
337 lockmgr(&mq->mq_mtx, LK_RELEASE);
339 return revents;
342 static int
343 mq_close_fop(file_t *fp)
345 struct proc *p = curproc;
346 struct mqueue *mq = fp->f_data;
347 bool destroy;
349 lockmgr(&mqlist_mtx, LK_EXCLUSIVE);
350 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE);
352 /* Decrease the counters */
353 p->p_mqueue_cnt--;
354 mq->mq_refcnt--;
356 /* Remove notification if registered for this process */
357 if (mq->mq_notify_proc == p)
358 mq->mq_notify_proc = NULL;
361 * If this is the last reference and mqueue is marked for unlink,
362 * remove and later destroy the message queue.
364 if (mq->mq_refcnt == 0 && (mq->mq_attrib.mq_flags & MQ_UNLINK)) {
365 LIST_REMOVE(mq, mq_list);
366 destroy = true;
367 } else
368 destroy = false;
370 lockmgr(&mq->mq_mtx, LK_RELEASE);
371 lockmgr(&mqlist_mtx, LK_RELEASE);
373 if (destroy)
374 mqueue_destroy(mq);
376 return 0;
380 * General mqueue system calls.
384 sys_mq_open(struct mq_open_args *uap)
386 /* {
387 syscallarg(const char *) name;
388 syscallarg(int) oflag;
389 syscallarg(mode_t) mode;
390 syscallarg(struct mq_attr) attr;
391 } */
392 struct thread *td = curthread;
393 struct proc *p = td->td_proc;
394 struct filedesc *fdp = p->p_fd;
395 struct mqueue *mq, *mq_new = NULL;
396 file_t *fp;
397 char *name;
398 int mqd, error, oflag;
400 /* Check access mode flags */
401 oflag = SCARG(uap, oflag);
402 if ((oflag & O_ACCMODE) == (O_WRONLY | O_RDWR)) {
403 return EINVAL;
406 /* Get the name from the user-space */
407 name = kmalloc(MQ_NAMELEN, M_MQBUF, M_WAITOK | M_ZERO);
408 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL);
409 if (error) {
410 kfree(name, M_MQBUF);
411 return error;
414 if (oflag & O_CREAT) {
415 struct mq_attr attr;
416 u_int i;
418 /* Check the limit */
419 if (p->p_mqueue_cnt == mq_open_max) {
420 kfree(name, M_MQBUF);
421 return EMFILE;
424 /* Empty name is invalid */
425 if (name[0] == '\0') {
426 kfree(name, M_MQBUF);
427 return EINVAL;
430 /* Check for mqueue attributes */
431 if (SCARG(uap, attr)) {
432 error = copyin(SCARG(uap, attr), &attr,
433 sizeof(struct mq_attr));
434 if (error) {
435 kfree(name, M_MQBUF);
436 return error;
438 if (attr.mq_maxmsg <= 0 || attr.mq_msgsize <= 0 ||
439 attr.mq_msgsize > mq_max_msgsize) {
440 kfree(name, M_MQBUF);
441 return EINVAL;
443 attr.mq_curmsgs = 0;
444 } else {
445 memset(&attr, 0, sizeof(struct mq_attr));
446 attr.mq_maxmsg = mq_def_maxmsg;
447 attr.mq_msgsize =
448 MQ_DEF_MSGSIZE - sizeof(struct mq_msg);
452 * Allocate new mqueue, initialize data structures,
453 * copy the name, attributes and set the flag.
455 mq_new = kmalloc(sizeof(struct mqueue), M_MQBUF, M_WAITOK | M_ZERO);
457 lockinit(&mq_new->mq_mtx, "mq_new->mq_mtx", 0, LK_CANRECURSE);
458 for (i = 0; i < (MQ_PQSIZE + 1); i++) {
459 TAILQ_INIT(&mq_new->mq_head[i]);
462 strlcpy(mq_new->mq_name, name, MQ_NAMELEN);
463 memcpy(&mq_new->mq_attrib, &attr, sizeof(struct mq_attr));
465 /*CTASSERT((O_MASK & (MQ_UNLINK | MQ_RECEIVE)) == 0);*/
466 /* mq_new->mq_attrib.mq_flags = (O_MASK & oflag); */
467 mq_new->mq_attrib.mq_flags = oflag;
469 /* Store mode and effective UID with GID */
470 mq_new->mq_mode = ((SCARG(uap, mode) &
471 ~p->p_fd->fd_cmask) & ALLPERMS) & ~S_ISTXT;
472 mq_new->mq_euid = td->td_ucred->cr_uid;
473 mq_new->mq_egid = td->td_ucred->cr_svgid;
476 /* Allocate file structure and descriptor */
477 error = falloc(td->td_lwp, &fp, &mqd);
478 if (error) {
479 if (mq_new)
480 mqueue_destroy(mq_new);
481 kfree(name, M_MQBUF);
482 return error;
484 fp->f_type = DTYPE_MQUEUE;
485 fp->f_flag = FFLAGS(oflag) & (FREAD | FWRITE);
486 fp->f_ops = &mqops;
488 /* Look up for mqueue with such name */
489 lockmgr(&mqlist_mtx, LK_EXCLUSIVE);
490 mq = mqueue_lookup(name);
491 if (mq) {
492 int acc_mode;
494 KKASSERT(lockstatus(&mq->mq_mtx, curthread));
496 /* Check if mqueue is not marked as unlinking */
497 if (mq->mq_attrib.mq_flags & MQ_UNLINK) {
498 error = EACCES;
499 goto exit;
501 /* Fail if O_EXCL is set, and mqueue already exists */
502 if ((oflag & O_CREAT) && (oflag & O_EXCL)) {
503 error = EEXIST;
504 goto exit;
508 * Check the permissions. Note the difference between
509 * VREAD/VWRITE and FREAD/FWRITE.
511 acc_mode = 0;
512 if (fp->f_flag & FREAD) {
513 acc_mode |= VREAD;
515 if (fp->f_flag & FWRITE) {
516 acc_mode |= VWRITE;
518 if (vaccess(VNON, mq->mq_mode, mq->mq_euid, mq->mq_egid,
519 acc_mode, td->td_ucred)) {
521 error = EACCES;
522 goto exit;
524 } else {
525 /* Fail if mqueue neither exists, nor we create it */
526 if ((oflag & O_CREAT) == 0) {
527 lockmgr(&mqlist_mtx, LK_RELEASE);
528 KKASSERT(mq_new == NULL);
529 fsetfd(fdp, NULL, mqd);
530 fp->f_ops = &badfileops;
531 fdrop(fp);
532 kfree(name, M_MQBUF);
533 return ENOENT;
536 /* Check the limit */
537 if (p->p_mqueue_cnt == mq_open_max) {
538 error = EMFILE;
539 goto exit;
542 /* Insert the queue to the list */
543 mq = mq_new;
544 lockmgr(&mq->mq_mtx, LK_EXCLUSIVE);
545 LIST_INSERT_HEAD(&mqueue_head, mq, mq_list);
546 mq_new = NULL;
547 getnanotime(&mq->mq_btime);
548 mq->mq_atime = mq->mq_mtime = mq->mq_btime;
551 /* Increase the counters, and make descriptor ready */
552 p->p_mqueue_cnt++;
553 mq->mq_refcnt++;
554 fp->f_data = mq;
555 exit:
556 lockmgr(&mq->mq_mtx, LK_RELEASE);
557 lockmgr(&mqlist_mtx, LK_RELEASE);
559 if (mq_new)
560 mqueue_destroy(mq_new);
561 if (error) {
562 fsetfd(fdp, NULL, mqd);
563 fp->f_ops = &badfileops;
564 } else {
565 fsetfd(fdp, fp, mqd);
566 uap->sysmsg_result = mqd;
568 fdrop(fp);
569 kfree(name, M_MQBUF);
571 return error;
575 sys_mq_close(struct mq_close_args *uap)
577 return sys_close((void *)uap);
581 * Primary mq_receive1() function.
584 mq_receive1(struct lwp *l, mqd_t mqdes, void *msg_ptr, size_t msg_len,
585 unsigned *msg_prio, struct timespec *ts, ssize_t *mlen)
587 file_t *fp = NULL;
588 struct mqueue *mq;
589 struct mq_msg *msg = NULL;
590 struct mq_attr *mqattr;
591 u_int idx;
592 int error;
594 /* Get the message queue */
595 error = mqueue_get(l, mqdes, &fp);
596 if (error) {
597 return error;
599 mq = fp->f_data;
600 if ((fp->f_flag & FREAD) == 0) {
601 error = EBADF;
602 goto error;
604 getnanotime(&mq->mq_atime);
605 mqattr = &mq->mq_attrib;
607 /* Check the message size limits */
608 if (msg_len < mqattr->mq_msgsize) {
609 error = EMSGSIZE;
610 goto error;
613 /* Check if queue is empty */
614 while (mqattr->mq_curmsgs == 0) {
615 int t;
617 if (mqattr->mq_flags & O_NONBLOCK) {
618 error = EAGAIN;
619 goto error;
621 error = abstimeout2timo(ts, &t);
622 if (error) {
623 goto error;
626 * Block until someone sends the message.
627 * While doing this, notification should not be sent.
629 mqattr->mq_flags |= MQ_RECEIVE;
630 error = tsleep(&mq->mq_send_cv, PCATCH, "mqsend", t);
631 mqattr->mq_flags &= ~MQ_RECEIVE;
632 if (error || (mqattr->mq_flags & MQ_UNLINK)) {
633 error = (error == EWOULDBLOCK) ? ETIMEDOUT : EINTR;
634 goto error;
640 * Find the highest priority message, and remove it from the queue.
641 * At first, reserved queue is checked, bitmap is next.
643 msg = TAILQ_FIRST(&mq->mq_head[MQ_PQRESQ]);
644 if (__predict_true(msg == NULL)) {
645 idx = ffs(mq->mq_bitmap);
646 msg = TAILQ_FIRST(&mq->mq_head[idx]);
647 KKASSERT(msg != NULL);
648 } else {
649 idx = MQ_PQRESQ;
651 TAILQ_REMOVE(&mq->mq_head[idx], msg, msg_queue);
653 /* Unmark the bit, if last message. */
654 if (__predict_true(idx) && TAILQ_EMPTY(&mq->mq_head[idx])) {
655 KKASSERT((MQ_PQSIZE - idx) == msg->msg_prio);
656 mq->mq_bitmap &= ~(1 << --idx);
659 /* Decrement the counter and signal waiter, if any */
660 mqattr->mq_curmsgs--;
661 wakeup_one(&mq->mq_recv_cv);
663 /* Ready for sending now */
664 selwakeup(&mq->mq_wsel);
665 error:
666 lockmgr(&mq->mq_mtx, LK_RELEASE);
667 fdrop(fp);
668 if (error)
669 return error;
672 * Copy the data to the user-space.
673 * Note: According to POSIX, no message should be removed from the
674 * queue in case of fail - this would be violated.
676 *mlen = msg->msg_len;
677 error = copyout(msg->msg_ptr, msg_ptr, msg->msg_len);
678 if (error == 0 && msg_prio)
679 error = copyout(&msg->msg_prio, msg_prio, sizeof(unsigned));
680 mqueue_freemsg(msg, sizeof(struct mq_msg) + msg->msg_len);
682 return error;
686 sys_mq_receive(struct mq_receive_args *uap)
688 /* {
689 syscallarg(mqd_t) mqdes;
690 syscallarg(char *) msg_ptr;
691 syscallarg(size_t) msg_len;
692 syscallarg(unsigned *) msg_prio;
693 } */
694 ssize_t mlen;
695 int error;
697 error = mq_receive1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr),
698 SCARG(uap, msg_len), SCARG(uap, msg_prio), 0, &mlen);
699 if (error == 0)
700 uap->sysmsg_result = mlen;
702 return error;
706 sys_mq_timedreceive(struct mq_timedreceive_args *uap)
708 /* {
709 syscallarg(mqd_t) mqdes;
710 syscallarg(char *) msg_ptr;
711 syscallarg(size_t) msg_len;
712 syscallarg(unsigned *) msg_prio;
713 syscallarg(const struct timespec *) abs_timeout;
714 } */
715 int error;
716 ssize_t mlen;
717 struct timespec ts, *tsp;
719 /* Get and convert time value */
720 if (SCARG(uap, abs_timeout)) {
721 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts));
722 if (error)
723 return error;
724 tsp = &ts;
725 } else {
726 tsp = NULL;
729 error = mq_receive1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr),
730 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp, &mlen);
731 if (error == 0)
732 uap->sysmsg_result = mlen;
734 return error;
738 * Primary mq_send1() function.
741 mq_send1(struct lwp *l, mqd_t mqdes, const char *msg_ptr, size_t msg_len,
742 unsigned msg_prio, struct timespec *ts)
744 file_t *fp = NULL;
745 struct mqueue *mq;
746 struct mq_msg *msg;
747 struct mq_attr *mqattr;
748 struct proc *notify = NULL;
749 /*ksiginfo_t ksi;*/
750 size_t size;
751 int error;
753 /* Check the priority range */
754 if (msg_prio >= mq_prio_max)
755 return EINVAL;
757 /* Allocate a new message */
758 size = sizeof(struct mq_msg) + msg_len;
759 if (size > mq_max_msgsize)
760 return EMSGSIZE;
762 if (size > MQ_DEF_MSGSIZE) {
763 msg = kmalloc(size, M_MQBUF, M_WAITOK);
764 } else {
765 msg = objcache_get(mqmsg_cache, M_WAITOK);
768 /* Get the data from user-space */
769 error = copyin(msg_ptr, msg->msg_ptr, msg_len);
770 if (error) {
771 mqueue_freemsg(msg, size);
772 return error;
774 msg->msg_len = msg_len;
775 msg->msg_prio = msg_prio;
777 /* Get the mqueue */
778 error = mqueue_get(l, mqdes, &fp);
779 if (error) {
780 mqueue_freemsg(msg, size);
781 return error;
783 mq = fp->f_data;
784 if ((fp->f_flag & FWRITE) == 0) {
785 error = EBADF;
786 goto error;
788 getnanotime(&mq->mq_mtime);
789 mqattr = &mq->mq_attrib;
791 /* Check the message size limit */
792 if (msg_len <= 0 || msg_len > mqattr->mq_msgsize) {
793 error = EMSGSIZE;
794 goto error;
797 /* Check if queue is full */
798 while (mqattr->mq_curmsgs >= mqattr->mq_maxmsg) {
799 int t;
801 if (mqattr->mq_flags & O_NONBLOCK) {
802 error = EAGAIN;
803 goto error;
805 error = abstimeout2timo(ts, &t);
806 if (error) {
807 goto error;
809 /* Block until queue becomes available */
810 error = tsleep(&mq->mq_recv_cv, PCATCH, "mqrecv", t);
811 if (error || (mqattr->mq_flags & MQ_UNLINK)) {
812 error = (error == EWOULDBLOCK) ? ETIMEDOUT : error;
813 goto error;
816 KKASSERT(mq->mq_attrib.mq_curmsgs < mq->mq_attrib.mq_maxmsg);
819 * Insert message into the queue, according to the priority.
820 * Note the difference between index and priority.
822 if (__predict_true(msg_prio < MQ_PQSIZE)) {
823 u_int idx = MQ_PQSIZE - msg_prio;
825 KKASSERT(idx != MQ_PQRESQ);
826 TAILQ_INSERT_TAIL(&mq->mq_head[idx], msg, msg_queue);
827 mq->mq_bitmap |= (1 << --idx);
828 } else {
829 mqueue_linear_insert(mq, msg);
832 /* Check for the notify */
833 if (mqattr->mq_curmsgs == 0 && mq->mq_notify_proc &&
834 (mqattr->mq_flags & MQ_RECEIVE) == 0) {
835 /* Initialize the signal */
836 /*KSI_INIT(&ksi);*/
837 /*ksi.ksi_signo = mq->mq_sig_notify.sigev_signo;*/
838 /*ksi.ksi_code = SI_MESGQ;*/
839 /*ksi.ksi_value = mq->mq_sig_notify.sigev_value;*/
840 /* Unregister the process */
841 notify = mq->mq_notify_proc;
842 mq->mq_notify_proc = NULL;
845 /* Increment the counter and signal waiter, if any */
846 mqattr->mq_curmsgs++;
847 wakeup_one(&mq->mq_send_cv);
849 /* Ready for receiving now */
850 selwakeup(&mq->mq_rsel);
851 error:
852 lockmgr(&mq->mq_mtx, LK_RELEASE);
853 fdrop(fp);
855 if (error) {
856 mqueue_freemsg(msg, size);
857 } else if (notify) {
858 /* Send the notify, if needed */
859 spin_lock_wr(&allproc_spin);
860 /*kpsignal(notify, &ksi, NULL);*/
861 ksignal(notify, mq->mq_sig_notify.sigev_signo);
862 spin_unlock_wr(&allproc_spin);
865 return error;
869 sys_mq_send(struct mq_send_args *uap)
871 /* {
872 syscallarg(mqd_t) mqdes;
873 syscallarg(const char *) msg_ptr;
874 syscallarg(size_t) msg_len;
875 syscallarg(unsigned) msg_prio;
876 } */
878 return mq_send1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr),
879 SCARG(uap, msg_len), SCARG(uap, msg_prio), 0);
883 sys_mq_timedsend(struct mq_timedsend_args *uap)
885 /* {
886 syscallarg(mqd_t) mqdes;
887 syscallarg(const char *) msg_ptr;
888 syscallarg(size_t) msg_len;
889 syscallarg(unsigned) msg_prio;
890 syscallarg(const struct timespec *) abs_timeout;
891 } */
892 struct timespec ts, *tsp;
893 int error;
895 /* Get and convert time value */
896 if (SCARG(uap, abs_timeout)) {
897 error = copyin(SCARG(uap, abs_timeout), &ts, sizeof(ts));
898 if (error)
899 return error;
900 tsp = &ts;
901 } else {
902 tsp = NULL;
905 return mq_send1(curthread->td_lwp, SCARG(uap, mqdes), SCARG(uap, msg_ptr),
906 SCARG(uap, msg_len), SCARG(uap, msg_prio), tsp);
910 sys_mq_notify(struct mq_notify_args *uap)
912 /* {
913 syscallarg(mqd_t) mqdes;
914 syscallarg(const struct sigevent *) notification;
915 } */
916 file_t *fp = NULL;
917 struct mqueue *mq;
918 struct sigevent sig;
919 int error;
921 if (SCARG(uap, notification)) {
922 /* Get the signal from user-space */
923 error = copyin(SCARG(uap, notification), &sig,
924 sizeof(struct sigevent));
925 if (error)
926 return error;
929 error = mqueue_get(curthread->td_lwp, SCARG(uap, mqdes), &fp);
930 if (error)
931 return error;
932 mq = fp->f_data;
934 if (SCARG(uap, notification)) {
935 /* Register notification: set the signal and target process */
936 if (mq->mq_notify_proc == NULL) {
937 memcpy(&mq->mq_sig_notify, &sig,
938 sizeof(struct sigevent));
939 mq->mq_notify_proc = curproc;
940 } else {
941 /* Fail if someone else already registered */
942 error = EBUSY;
944 } else {
945 /* Unregister the notification */
946 mq->mq_notify_proc = NULL;
948 lockmgr(&mq->mq_mtx, LK_RELEASE);
949 fdrop(fp);
951 return error;
955 sys_mq_getattr(struct mq_getattr_args *uap)
957 /* {
958 syscallarg(mqd_t) mqdes;
959 syscallarg(struct mq_attr *) mqstat;
960 } */
961 file_t *fp = NULL;
962 struct mqueue *mq;
963 struct mq_attr attr;
964 int error;
966 /* Get the message queue */
967 error = mqueue_get(curthread->td_lwp, SCARG(uap, mqdes), &fp);
968 if (error)
969 return error;
970 mq = fp->f_data;
971 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr));
972 lockmgr(&mq->mq_mtx, LK_RELEASE);
973 fdrop(fp);
975 return copyout(&attr, SCARG(uap, mqstat), sizeof(struct mq_attr));
979 sys_mq_setattr(struct mq_setattr_args *uap)
981 /* {
982 syscallarg(mqd_t) mqdes;
983 syscallarg(const struct mq_attr *) mqstat;
984 syscallarg(struct mq_attr *) omqstat;
985 } */
986 file_t *fp = NULL;
987 struct mqueue *mq;
988 struct mq_attr attr;
989 int error, nonblock;
991 error = copyin(SCARG(uap, mqstat), &attr, sizeof(struct mq_attr));
992 if (error)
993 return error;
994 nonblock = (attr.mq_flags & O_NONBLOCK);
996 /* Get the message queue */
997 error = mqueue_get(curthread->td_lwp, SCARG(uap, mqdes), &fp);
998 if (error)
999 return error;
1000 mq = fp->f_data;
1002 /* Copy the old attributes, if needed */
1003 if (SCARG(uap, omqstat))
1004 memcpy(&attr, &mq->mq_attrib, sizeof(struct mq_attr));
1006 /* Ignore everything, except O_NONBLOCK */
1007 if (nonblock)
1008 mq->mq_attrib.mq_flags |= O_NONBLOCK;
1009 else
1010 mq->mq_attrib.mq_flags &= ~O_NONBLOCK;
1012 lockmgr(&mq->mq_mtx, LK_RELEASE);
1013 fdrop(fp);
1016 * Copy the data to the user-space.
1017 * Note: According to POSIX, the new attributes should not be set in
1018 * case of fail - this would be violated.
1020 if (SCARG(uap, omqstat))
1021 error = copyout(&attr, SCARG(uap, omqstat),
1022 sizeof(struct mq_attr));
1024 return error;
1028 sys_mq_unlink(struct mq_unlink_args *uap)
1030 /* {
1031 syscallarg(const char *) name;
1032 } */
1033 struct thread *td = curthread;
1034 struct mqueue *mq;
1035 char *name;
1036 int error, refcnt = 0;
1038 /* Get the name from the user-space */
1039 name = kmalloc(MQ_NAMELEN, M_MQBUF, M_WAITOK | M_ZERO);
1040 error = copyinstr(SCARG(uap, name), name, MQ_NAMELEN - 1, NULL);
1041 if (error) {
1042 kfree(name, M_MQBUF);
1043 return error;
1046 /* Lookup for this file */
1047 lockmgr(&mqlist_mtx, LK_EXCLUSIVE);
1048 mq = mqueue_lookup(name);
1049 if (mq == NULL) {
1050 error = ENOENT;
1051 goto error;
1054 /* Check the permissions */
1055 if (td->td_ucred->cr_uid != mq->mq_euid &&
1056 priv_check(td, PRIV_ROOT) != 0) {
1057 lockmgr(&mq->mq_mtx, LK_RELEASE);
1058 error = EACCES;
1059 goto error;
1062 /* Mark message queue as unlinking, before leaving the window */
1063 mq->mq_attrib.mq_flags |= MQ_UNLINK;
1065 /* Wake up all waiters, if there are such */
1066 wakeup(&mq->mq_send_cv);
1067 wakeup(&mq->mq_recv_cv);
1069 selwakeup(&mq->mq_rsel);
1070 selwakeup(&mq->mq_wsel);
1072 refcnt = mq->mq_refcnt;
1073 if (refcnt == 0)
1074 LIST_REMOVE(mq, mq_list);
1076 lockmgr(&mq->mq_mtx, LK_RELEASE);
1077 error:
1078 lockmgr(&mqlist_mtx, LK_RELEASE);
1081 * If there are no references - destroy the message
1082 * queue, otherwise, the last mq_close() will do that.
1084 if (error == 0 && refcnt == 0)
1085 mqueue_destroy(mq);
1087 kfree(name, M_MQBUF);
1088 return error;
1092 * SysCtl.
1094 SYSCTL_NODE(_kern, OID_AUTO, mqueue,
1095 CTLFLAG_RW, 0, "Message queue options");
1097 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_open_max,
1098 CTLFLAG_RW, &mq_open_max, 0,
1099 "Maximal number of message queue descriptors per process");
1101 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_prio_max,
1102 CTLFLAG_RW, &mq_prio_max, 0,
1103 "Maximal priority of the message");
1105 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_max_msgsize,
1106 CTLFLAG_RW, &mq_max_msgsize, 0,
1107 "Maximal allowed size of the message");
1109 SYSCTL_INT(_kern_mqueue, OID_AUTO, mq_def_maxmsg,
1110 CTLFLAG_RW, &mq_def_maxmsg, 0,
1111 "Default maximal message count");
1113 SYSINIT(sys_mqueue_init, SI_SUB_PRE_DRIVERS, SI_ORDER_ANY, mqueue_sysinit, NULL);