Linux-2.6.12-rc2
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / net / unix / af_unix.c
blobacc73fe686983d4f71333a636cadd1d3f17e743e
1 /*
2 * NET4: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan.cox@linux.org>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Version: $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
13 * Fixes:
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
23 * Mike Shaver's work.
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
30 * reference counting
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
33 * Lots of bug fixes.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
45 * dgram receiver.
46 * Artur Skawina : Hash function optimizations
47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
48 * Malcolm Beattie : Set peercred for socketpair
49 * Michal Ostrowski : Module initialization cleanup.
50 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
51 * the core infrastructure is doing that
52 * for all net proto families now (2.5.69+)
55 * Known differences from reference BSD that was tested:
57 * [TO FIX]
58 * ECONNREFUSED is not returned from one end of a connected() socket to the
59 * other the moment one end closes.
60 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
61 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
62 * [NOT TO FIX]
63 * accept() returns a path name even if the connecting socket has closed
64 * in the meantime (BSD loses the path and gives up).
65 * accept() returns 0 length path for an unbound connector. BSD returns 16
66 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
67 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
68 * BSD af_unix apparently has connect forgetting to block properly.
69 * (need to check this with the POSIX spec in detail)
71 * Differences from 2.0.0-11-... (ANK)
72 * Bug fixes and improvements.
73 * - client shutdown killed server socket.
74 * - removed all useless cli/sti pairs.
76 * Semantic changes/extensions.
77 * - generic control message passing.
78 * - SCM_CREDENTIALS control message.
79 * - "Abstract" (not FS based) socket bindings.
80 * Abstract names are sequences of bytes (not zero terminated)
81 * started by 0, so that this name space does not intersect
82 * with BSD names.
85 #include <linux/module.h>
86 #include <linux/config.h>
87 #include <linux/kernel.h>
88 #include <linux/major.h>
89 #include <linux/signal.h>
90 #include <linux/sched.h>
91 #include <linux/errno.h>
92 #include <linux/string.h>
93 #include <linux/stat.h>
94 #include <linux/dcache.h>
95 #include <linux/namei.h>
96 #include <linux/socket.h>
97 #include <linux/un.h>
98 #include <linux/fcntl.h>
99 #include <linux/termios.h>
100 #include <linux/sockios.h>
101 #include <linux/net.h>
102 #include <linux/in.h>
103 #include <linux/fs.h>
104 #include <linux/slab.h>
105 #include <asm/uaccess.h>
106 #include <linux/skbuff.h>
107 #include <linux/netdevice.h>
108 #include <net/sock.h>
109 #include <linux/tcp.h>
110 #include <net/af_unix.h>
111 #include <linux/proc_fs.h>
112 #include <linux/seq_file.h>
113 #include <net/scm.h>
114 #include <linux/init.h>
115 #include <linux/poll.h>
116 #include <linux/smp_lock.h>
117 #include <linux/rtnetlink.h>
118 #include <linux/mount.h>
119 #include <net/checksum.h>
120 #include <linux/security.h>
122 int sysctl_unix_max_dgram_qlen = 10;
124 struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
125 DEFINE_RWLOCK(unix_table_lock);
126 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
128 #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
130 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
133 * SMP locking strategy:
134 * hash table is protected with rwlock unix_table_lock
135 * each socket state is protected by separate rwlock.
138 static inline unsigned unix_hash_fold(unsigned hash)
140 hash ^= hash>>16;
141 hash ^= hash>>8;
142 return hash&(UNIX_HASH_SIZE-1);
145 #define unix_peer(sk) (unix_sk(sk)->peer)
147 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
149 return unix_peer(osk) == sk;
152 static inline int unix_may_send(struct sock *sk, struct sock *osk)
154 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
157 static struct sock *unix_peer_get(struct sock *s)
159 struct sock *peer;
161 unix_state_rlock(s);
162 peer = unix_peer(s);
163 if (peer)
164 sock_hold(peer);
165 unix_state_runlock(s);
166 return peer;
169 static inline void unix_release_addr(struct unix_address *addr)
171 if (atomic_dec_and_test(&addr->refcnt))
172 kfree(addr);
176 * Check unix socket name:
177 * - should be not zero length.
178 * - if started by not zero, should be NULL terminated (FS object)
179 * - if started by zero, it is abstract name.
182 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
184 if (len <= sizeof(short) || len > sizeof(*sunaddr))
185 return -EINVAL;
186 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
187 return -EINVAL;
188 if (sunaddr->sun_path[0]) {
190 * This may look like an off by one error but it is a bit more
191 * subtle. 108 is the longest valid AF_UNIX path for a binding.
192 * sun_path[108] doesnt as such exist. However in kernel space
193 * we are guaranteed that it is a valid memory location in our
194 * kernel address buffer.
196 ((char *)sunaddr)[len]=0;
197 len = strlen(sunaddr->sun_path)+1+sizeof(short);
198 return len;
201 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
202 return len;
205 static void __unix_remove_socket(struct sock *sk)
207 sk_del_node_init(sk);
210 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
212 BUG_TRAP(sk_unhashed(sk));
213 sk_add_node(sk, list);
216 static inline void unix_remove_socket(struct sock *sk)
218 write_lock(&unix_table_lock);
219 __unix_remove_socket(sk);
220 write_unlock(&unix_table_lock);
223 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
225 write_lock(&unix_table_lock);
226 __unix_insert_socket(list, sk);
227 write_unlock(&unix_table_lock);
230 static struct sock *__unix_find_socket_byname(struct sockaddr_un *sunname,
231 int len, int type, unsigned hash)
233 struct sock *s;
234 struct hlist_node *node;
236 sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
237 struct unix_sock *u = unix_sk(s);
239 if (u->addr->len == len &&
240 !memcmp(u->addr->name, sunname, len))
241 goto found;
243 s = NULL;
244 found:
245 return s;
248 static inline struct sock *unix_find_socket_byname(struct sockaddr_un *sunname,
249 int len, int type,
250 unsigned hash)
252 struct sock *s;
254 read_lock(&unix_table_lock);
255 s = __unix_find_socket_byname(sunname, len, type, hash);
256 if (s)
257 sock_hold(s);
258 read_unlock(&unix_table_lock);
259 return s;
262 static struct sock *unix_find_socket_byinode(struct inode *i)
264 struct sock *s;
265 struct hlist_node *node;
267 read_lock(&unix_table_lock);
268 sk_for_each(s, node,
269 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
270 struct dentry *dentry = unix_sk(s)->dentry;
272 if(dentry && dentry->d_inode == i)
274 sock_hold(s);
275 goto found;
278 s = NULL;
279 found:
280 read_unlock(&unix_table_lock);
281 return s;
284 static inline int unix_writable(struct sock *sk)
286 return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
289 static void unix_write_space(struct sock *sk)
291 read_lock(&sk->sk_callback_lock);
292 if (unix_writable(sk)) {
293 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
294 wake_up_interruptible(sk->sk_sleep);
295 sk_wake_async(sk, 2, POLL_OUT);
297 read_unlock(&sk->sk_callback_lock);
300 /* When dgram socket disconnects (or changes its peer), we clear its receive
301 * queue of packets arrived from previous peer. First, it allows to do
302 * flow control based only on wmem_alloc; second, sk connected to peer
303 * may receive messages only from that peer. */
304 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
306 if (skb_queue_len(&sk->sk_receive_queue)) {
307 skb_queue_purge(&sk->sk_receive_queue);
308 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
310 /* If one link of bidirectional dgram pipe is disconnected,
311 * we signal error. Messages are lost. Do not make this,
312 * when peer was not connected to us.
314 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
315 other->sk_err = ECONNRESET;
316 other->sk_error_report(other);
321 static void unix_sock_destructor(struct sock *sk)
323 struct unix_sock *u = unix_sk(sk);
325 skb_queue_purge(&sk->sk_receive_queue);
327 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
328 BUG_TRAP(sk_unhashed(sk));
329 BUG_TRAP(!sk->sk_socket);
330 if (!sock_flag(sk, SOCK_DEAD)) {
331 printk("Attempt to release alive unix socket: %p\n", sk);
332 return;
335 if (u->addr)
336 unix_release_addr(u->addr);
338 atomic_dec(&unix_nr_socks);
339 #ifdef UNIX_REFCNT_DEBUG
340 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
341 #endif
344 static int unix_release_sock (struct sock *sk, int embrion)
346 struct unix_sock *u = unix_sk(sk);
347 struct dentry *dentry;
348 struct vfsmount *mnt;
349 struct sock *skpair;
350 struct sk_buff *skb;
351 int state;
353 unix_remove_socket(sk);
355 /* Clear state */
356 unix_state_wlock(sk);
357 sock_orphan(sk);
358 sk->sk_shutdown = SHUTDOWN_MASK;
359 dentry = u->dentry;
360 u->dentry = NULL;
361 mnt = u->mnt;
362 u->mnt = NULL;
363 state = sk->sk_state;
364 sk->sk_state = TCP_CLOSE;
365 unix_state_wunlock(sk);
367 wake_up_interruptible_all(&u->peer_wait);
369 skpair=unix_peer(sk);
371 if (skpair!=NULL) {
372 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
373 unix_state_wlock(skpair);
374 /* No more writes */
375 skpair->sk_shutdown = SHUTDOWN_MASK;
376 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
377 skpair->sk_err = ECONNRESET;
378 unix_state_wunlock(skpair);
379 skpair->sk_state_change(skpair);
380 read_lock(&skpair->sk_callback_lock);
381 sk_wake_async(skpair,1,POLL_HUP);
382 read_unlock(&skpair->sk_callback_lock);
384 sock_put(skpair); /* It may now die */
385 unix_peer(sk) = NULL;
388 /* Try to flush out this socket. Throw out buffers at least */
390 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
391 if (state==TCP_LISTEN)
392 unix_release_sock(skb->sk, 1);
393 /* passed fds are erased in the kfree_skb hook */
394 kfree_skb(skb);
397 if (dentry) {
398 dput(dentry);
399 mntput(mnt);
402 sock_put(sk);
404 /* ---- Socket is dead now and most probably destroyed ---- */
407 * Fixme: BSD difference: In BSD all sockets connected to use get
408 * ECONNRESET and we die on the spot. In Linux we behave
409 * like files and pipes do and wait for the last
410 * dereference.
412 * Can't we simply set sock->err?
414 * What the above comment does talk about? --ANK(980817)
417 if (atomic_read(&unix_tot_inflight))
418 unix_gc(); /* Garbage collect fds */
420 return 0;
423 static int unix_listen(struct socket *sock, int backlog)
425 int err;
426 struct sock *sk = sock->sk;
427 struct unix_sock *u = unix_sk(sk);
429 err = -EOPNOTSUPP;
430 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
431 goto out; /* Only stream/seqpacket sockets accept */
432 err = -EINVAL;
433 if (!u->addr)
434 goto out; /* No listens on an unbound socket */
435 unix_state_wlock(sk);
436 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
437 goto out_unlock;
438 if (backlog > sk->sk_max_ack_backlog)
439 wake_up_interruptible_all(&u->peer_wait);
440 sk->sk_max_ack_backlog = backlog;
441 sk->sk_state = TCP_LISTEN;
442 /* set credentials so connect can copy them */
443 sk->sk_peercred.pid = current->tgid;
444 sk->sk_peercred.uid = current->euid;
445 sk->sk_peercred.gid = current->egid;
446 err = 0;
448 out_unlock:
449 unix_state_wunlock(sk);
450 out:
451 return err;
454 static int unix_release(struct socket *);
455 static int unix_bind(struct socket *, struct sockaddr *, int);
456 static int unix_stream_connect(struct socket *, struct sockaddr *,
457 int addr_len, int flags);
458 static int unix_socketpair(struct socket *, struct socket *);
459 static int unix_accept(struct socket *, struct socket *, int);
460 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
461 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
462 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
463 static int unix_shutdown(struct socket *, int);
464 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
465 struct msghdr *, size_t);
466 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
467 struct msghdr *, size_t, int);
468 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
469 struct msghdr *, size_t);
470 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
471 struct msghdr *, size_t, int);
472 static int unix_dgram_connect(struct socket *, struct sockaddr *,
473 int, int);
474 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
475 struct msghdr *, size_t);
477 static struct proto_ops unix_stream_ops = {
478 .family = PF_UNIX,
479 .owner = THIS_MODULE,
480 .release = unix_release,
481 .bind = unix_bind,
482 .connect = unix_stream_connect,
483 .socketpair = unix_socketpair,
484 .accept = unix_accept,
485 .getname = unix_getname,
486 .poll = unix_poll,
487 .ioctl = unix_ioctl,
488 .listen = unix_listen,
489 .shutdown = unix_shutdown,
490 .setsockopt = sock_no_setsockopt,
491 .getsockopt = sock_no_getsockopt,
492 .sendmsg = unix_stream_sendmsg,
493 .recvmsg = unix_stream_recvmsg,
494 .mmap = sock_no_mmap,
495 .sendpage = sock_no_sendpage,
498 static struct proto_ops unix_dgram_ops = {
499 .family = PF_UNIX,
500 .owner = THIS_MODULE,
501 .release = unix_release,
502 .bind = unix_bind,
503 .connect = unix_dgram_connect,
504 .socketpair = unix_socketpair,
505 .accept = sock_no_accept,
506 .getname = unix_getname,
507 .poll = datagram_poll,
508 .ioctl = unix_ioctl,
509 .listen = sock_no_listen,
510 .shutdown = unix_shutdown,
511 .setsockopt = sock_no_setsockopt,
512 .getsockopt = sock_no_getsockopt,
513 .sendmsg = unix_dgram_sendmsg,
514 .recvmsg = unix_dgram_recvmsg,
515 .mmap = sock_no_mmap,
516 .sendpage = sock_no_sendpage,
519 static struct proto_ops unix_seqpacket_ops = {
520 .family = PF_UNIX,
521 .owner = THIS_MODULE,
522 .release = unix_release,
523 .bind = unix_bind,
524 .connect = unix_stream_connect,
525 .socketpair = unix_socketpair,
526 .accept = unix_accept,
527 .getname = unix_getname,
528 .poll = datagram_poll,
529 .ioctl = unix_ioctl,
530 .listen = unix_listen,
531 .shutdown = unix_shutdown,
532 .setsockopt = sock_no_setsockopt,
533 .getsockopt = sock_no_getsockopt,
534 .sendmsg = unix_seqpacket_sendmsg,
535 .recvmsg = unix_dgram_recvmsg,
536 .mmap = sock_no_mmap,
537 .sendpage = sock_no_sendpage,
540 static struct proto unix_proto = {
541 .name = "UNIX",
542 .owner = THIS_MODULE,
543 .obj_size = sizeof(struct unix_sock),
546 static struct sock * unix_create1(struct socket *sock)
548 struct sock *sk = NULL;
549 struct unix_sock *u;
551 if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
552 goto out;
554 sk = sk_alloc(PF_UNIX, GFP_KERNEL, &unix_proto, 1);
555 if (!sk)
556 goto out;
558 atomic_inc(&unix_nr_socks);
560 sock_init_data(sock,sk);
562 sk->sk_write_space = unix_write_space;
563 sk->sk_max_ack_backlog = sysctl_unix_max_dgram_qlen;
564 sk->sk_destruct = unix_sock_destructor;
565 u = unix_sk(sk);
566 u->dentry = NULL;
567 u->mnt = NULL;
568 rwlock_init(&u->lock);
569 atomic_set(&u->inflight, sock ? 0 : -1);
570 init_MUTEX(&u->readsem); /* single task reading lock */
571 init_waitqueue_head(&u->peer_wait);
572 unix_insert_socket(unix_sockets_unbound, sk);
573 out:
574 return sk;
577 static int unix_create(struct socket *sock, int protocol)
579 if (protocol && protocol != PF_UNIX)
580 return -EPROTONOSUPPORT;
582 sock->state = SS_UNCONNECTED;
584 switch (sock->type) {
585 case SOCK_STREAM:
586 sock->ops = &unix_stream_ops;
587 break;
589 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
590 * nothing uses it.
592 case SOCK_RAW:
593 sock->type=SOCK_DGRAM;
594 case SOCK_DGRAM:
595 sock->ops = &unix_dgram_ops;
596 break;
597 case SOCK_SEQPACKET:
598 sock->ops = &unix_seqpacket_ops;
599 break;
600 default:
601 return -ESOCKTNOSUPPORT;
604 return unix_create1(sock) ? 0 : -ENOMEM;
607 static int unix_release(struct socket *sock)
609 struct sock *sk = sock->sk;
611 if (!sk)
612 return 0;
614 sock->sk = NULL;
616 return unix_release_sock (sk, 0);
619 static int unix_autobind(struct socket *sock)
621 struct sock *sk = sock->sk;
622 struct unix_sock *u = unix_sk(sk);
623 static u32 ordernum = 1;
624 struct unix_address * addr;
625 int err;
627 down(&u->readsem);
629 err = 0;
630 if (u->addr)
631 goto out;
633 err = -ENOMEM;
634 addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
635 if (!addr)
636 goto out;
638 memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
639 addr->name->sun_family = AF_UNIX;
640 atomic_set(&addr->refcnt, 1);
642 retry:
643 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
644 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
646 write_lock(&unix_table_lock);
647 ordernum = (ordernum+1)&0xFFFFF;
649 if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
650 addr->hash)) {
651 write_unlock(&unix_table_lock);
652 /* Sanity yield. It is unusual case, but yet... */
653 if (!(ordernum&0xFF))
654 yield();
655 goto retry;
657 addr->hash ^= sk->sk_type;
659 __unix_remove_socket(sk);
660 u->addr = addr;
661 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
662 write_unlock(&unix_table_lock);
663 err = 0;
665 out: up(&u->readsem);
666 return err;
669 static struct sock *unix_find_other(struct sockaddr_un *sunname, int len,
670 int type, unsigned hash, int *error)
672 struct sock *u;
673 struct nameidata nd;
674 int err = 0;
676 if (sunname->sun_path[0]) {
677 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
678 if (err)
679 goto fail;
680 err = permission(nd.dentry->d_inode,MAY_WRITE, &nd);
681 if (err)
682 goto put_fail;
684 err = -ECONNREFUSED;
685 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
686 goto put_fail;
687 u=unix_find_socket_byinode(nd.dentry->d_inode);
688 if (!u)
689 goto put_fail;
691 if (u->sk_type == type)
692 touch_atime(nd.mnt, nd.dentry);
694 path_release(&nd);
696 err=-EPROTOTYPE;
697 if (u->sk_type != type) {
698 sock_put(u);
699 goto fail;
701 } else {
702 err = -ECONNREFUSED;
703 u=unix_find_socket_byname(sunname, len, type, hash);
704 if (u) {
705 struct dentry *dentry;
706 dentry = unix_sk(u)->dentry;
707 if (dentry)
708 touch_atime(unix_sk(u)->mnt, dentry);
709 } else
710 goto fail;
712 return u;
714 put_fail:
715 path_release(&nd);
716 fail:
717 *error=err;
718 return NULL;
722 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
724 struct sock *sk = sock->sk;
725 struct unix_sock *u = unix_sk(sk);
726 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
727 struct dentry * dentry = NULL;
728 struct nameidata nd;
729 int err;
730 unsigned hash;
731 struct unix_address *addr;
732 struct hlist_head *list;
734 err = -EINVAL;
735 if (sunaddr->sun_family != AF_UNIX)
736 goto out;
738 if (addr_len==sizeof(short)) {
739 err = unix_autobind(sock);
740 goto out;
743 err = unix_mkname(sunaddr, addr_len, &hash);
744 if (err < 0)
745 goto out;
746 addr_len = err;
748 down(&u->readsem);
750 err = -EINVAL;
751 if (u->addr)
752 goto out_up;
754 err = -ENOMEM;
755 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
756 if (!addr)
757 goto out_up;
759 memcpy(addr->name, sunaddr, addr_len);
760 addr->len = addr_len;
761 addr->hash = hash ^ sk->sk_type;
762 atomic_set(&addr->refcnt, 1);
764 if (sunaddr->sun_path[0]) {
765 unsigned int mode;
766 err = 0;
768 * Get the parent directory, calculate the hash for last
769 * component.
771 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
772 if (err)
773 goto out_mknod_parent;
775 * Yucky last component or no last component at all?
776 * (foo/., foo/.., /////)
778 err = -EEXIST;
779 if (nd.last_type != LAST_NORM)
780 goto out_mknod;
782 * Lock the directory.
784 down(&nd.dentry->d_inode->i_sem);
786 * Do the final lookup.
788 dentry = lookup_hash(&nd.last, nd.dentry);
789 err = PTR_ERR(dentry);
790 if (IS_ERR(dentry))
791 goto out_mknod_unlock;
792 err = -ENOENT;
794 * Special case - lookup gave negative, but... we had foo/bar/
795 * From the vfs_mknod() POV we just have a negative dentry -
796 * all is fine. Let's be bastards - you had / on the end, you've
797 * been asking for (non-existent) directory. -ENOENT for you.
799 if (nd.last.name[nd.last.len] && !dentry->d_inode)
800 goto out_mknod_dput;
802 * All right, let's create it.
804 mode = S_IFSOCK |
805 (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
806 err = vfs_mknod(nd.dentry->d_inode, dentry, mode, 0);
807 if (err)
808 goto out_mknod_dput;
809 up(&nd.dentry->d_inode->i_sem);
810 dput(nd.dentry);
811 nd.dentry = dentry;
813 addr->hash = UNIX_HASH_SIZE;
816 write_lock(&unix_table_lock);
818 if (!sunaddr->sun_path[0]) {
819 err = -EADDRINUSE;
820 if (__unix_find_socket_byname(sunaddr, addr_len,
821 sk->sk_type, hash)) {
822 unix_release_addr(addr);
823 goto out_unlock;
826 list = &unix_socket_table[addr->hash];
827 } else {
828 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
829 u->dentry = nd.dentry;
830 u->mnt = nd.mnt;
833 err = 0;
834 __unix_remove_socket(sk);
835 u->addr = addr;
836 __unix_insert_socket(list, sk);
838 out_unlock:
839 write_unlock(&unix_table_lock);
840 out_up:
841 up(&u->readsem);
842 out:
843 return err;
845 out_mknod_dput:
846 dput(dentry);
847 out_mknod_unlock:
848 up(&nd.dentry->d_inode->i_sem);
849 out_mknod:
850 path_release(&nd);
851 out_mknod_parent:
852 if (err==-EEXIST)
853 err=-EADDRINUSE;
854 unix_release_addr(addr);
855 goto out_up;
858 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
859 int alen, int flags)
861 struct sock *sk = sock->sk;
862 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
863 struct sock *other;
864 unsigned hash;
865 int err;
867 if (addr->sa_family != AF_UNSPEC) {
868 err = unix_mkname(sunaddr, alen, &hash);
869 if (err < 0)
870 goto out;
871 alen = err;
873 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
874 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
875 goto out;
877 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
878 if (!other)
879 goto out;
881 unix_state_wlock(sk);
883 err = -EPERM;
884 if (!unix_may_send(sk, other))
885 goto out_unlock;
887 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
888 if (err)
889 goto out_unlock;
891 } else {
893 * 1003.1g breaking connected state with AF_UNSPEC
895 other = NULL;
896 unix_state_wlock(sk);
900 * If it was connected, reconnect.
902 if (unix_peer(sk)) {
903 struct sock *old_peer = unix_peer(sk);
904 unix_peer(sk)=other;
905 unix_state_wunlock(sk);
907 if (other != old_peer)
908 unix_dgram_disconnected(sk, old_peer);
909 sock_put(old_peer);
910 } else {
911 unix_peer(sk)=other;
912 unix_state_wunlock(sk);
914 return 0;
916 out_unlock:
917 unix_state_wunlock(sk);
918 sock_put(other);
919 out:
920 return err;
923 static long unix_wait_for_peer(struct sock *other, long timeo)
925 struct unix_sock *u = unix_sk(other);
926 int sched;
927 DEFINE_WAIT(wait);
929 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
931 sched = !sock_flag(other, SOCK_DEAD) &&
932 !(other->sk_shutdown & RCV_SHUTDOWN) &&
933 (skb_queue_len(&other->sk_receive_queue) >
934 other->sk_max_ack_backlog);
936 unix_state_runlock(other);
938 if (sched)
939 timeo = schedule_timeout(timeo);
941 finish_wait(&u->peer_wait, &wait);
942 return timeo;
945 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
946 int addr_len, int flags)
948 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
949 struct sock *sk = sock->sk;
950 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
951 struct sock *newsk = NULL;
952 struct sock *other = NULL;
953 struct sk_buff *skb = NULL;
954 unsigned hash;
955 int st;
956 int err;
957 long timeo;
959 err = unix_mkname(sunaddr, addr_len, &hash);
960 if (err < 0)
961 goto out;
962 addr_len = err;
964 if (test_bit(SOCK_PASSCRED, &sock->flags)
965 && !u->addr && (err = unix_autobind(sock)) != 0)
966 goto out;
968 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
970 /* First of all allocate resources.
971 If we will make it after state is locked,
972 we will have to recheck all again in any case.
975 err = -ENOMEM;
977 /* create new sock for complete connection */
978 newsk = unix_create1(NULL);
979 if (newsk == NULL)
980 goto out;
982 /* Allocate skb for sending to listening sock */
983 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
984 if (skb == NULL)
985 goto out;
987 restart:
988 /* Find listening sock. */
989 other = unix_find_other(sunaddr, addr_len, sk->sk_type, hash, &err);
990 if (!other)
991 goto out;
993 /* Latch state of peer */
994 unix_state_rlock(other);
996 /* Apparently VFS overslept socket death. Retry. */
997 if (sock_flag(other, SOCK_DEAD)) {
998 unix_state_runlock(other);
999 sock_put(other);
1000 goto restart;
1003 err = -ECONNREFUSED;
1004 if (other->sk_state != TCP_LISTEN)
1005 goto out_unlock;
1007 if (skb_queue_len(&other->sk_receive_queue) >
1008 other->sk_max_ack_backlog) {
1009 err = -EAGAIN;
1010 if (!timeo)
1011 goto out_unlock;
1013 timeo = unix_wait_for_peer(other, timeo);
1015 err = sock_intr_errno(timeo);
1016 if (signal_pending(current))
1017 goto out;
1018 sock_put(other);
1019 goto restart;
1022 /* Latch our state.
1024 It is tricky place. We need to grab write lock and cannot
1025 drop lock on peer. It is dangerous because deadlock is
1026 possible. Connect to self case and simultaneous
1027 attempt to connect are eliminated by checking socket
1028 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1029 check this before attempt to grab lock.
1031 Well, and we have to recheck the state after socket locked.
1033 st = sk->sk_state;
1035 switch (st) {
1036 case TCP_CLOSE:
1037 /* This is ok... continue with connect */
1038 break;
1039 case TCP_ESTABLISHED:
1040 /* Socket is already connected */
1041 err = -EISCONN;
1042 goto out_unlock;
1043 default:
1044 err = -EINVAL;
1045 goto out_unlock;
1048 unix_state_wlock(sk);
1050 if (sk->sk_state != st) {
1051 unix_state_wunlock(sk);
1052 unix_state_runlock(other);
1053 sock_put(other);
1054 goto restart;
1057 err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1058 if (err) {
1059 unix_state_wunlock(sk);
1060 goto out_unlock;
1063 /* The way is open! Fastly set all the necessary fields... */
1065 sock_hold(sk);
1066 unix_peer(newsk) = sk;
1067 newsk->sk_state = TCP_ESTABLISHED;
1068 newsk->sk_type = sk->sk_type;
1069 newsk->sk_peercred.pid = current->tgid;
1070 newsk->sk_peercred.uid = current->euid;
1071 newsk->sk_peercred.gid = current->egid;
1072 newu = unix_sk(newsk);
1073 newsk->sk_sleep = &newu->peer_wait;
1074 otheru = unix_sk(other);
1076 /* copy address information from listening to new sock*/
1077 if (otheru->addr) {
1078 atomic_inc(&otheru->addr->refcnt);
1079 newu->addr = otheru->addr;
1081 if (otheru->dentry) {
1082 newu->dentry = dget(otheru->dentry);
1083 newu->mnt = mntget(otheru->mnt);
1086 /* Set credentials */
1087 sk->sk_peercred = other->sk_peercred;
1089 sock_hold(newsk);
1090 unix_peer(sk) = newsk;
1091 sock->state = SS_CONNECTED;
1092 sk->sk_state = TCP_ESTABLISHED;
1094 unix_state_wunlock(sk);
1096 /* take ten and and send info to listening sock */
1097 spin_lock(&other->sk_receive_queue.lock);
1098 __skb_queue_tail(&other->sk_receive_queue, skb);
1099 /* Undo artificially decreased inflight after embrion
1100 * is installed to listening socket. */
1101 atomic_inc(&newu->inflight);
1102 spin_unlock(&other->sk_receive_queue.lock);
1103 unix_state_runlock(other);
1104 other->sk_data_ready(other, 0);
1105 sock_put(other);
1106 return 0;
1108 out_unlock:
1109 if (other)
1110 unix_state_runlock(other);
1112 out:
1113 if (skb)
1114 kfree_skb(skb);
1115 if (newsk)
1116 unix_release_sock(newsk, 0);
1117 if (other)
1118 sock_put(other);
1119 return err;
1122 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1124 struct sock *ska=socka->sk, *skb = sockb->sk;
1126 /* Join our sockets back to back */
1127 sock_hold(ska);
1128 sock_hold(skb);
1129 unix_peer(ska)=skb;
1130 unix_peer(skb)=ska;
1131 ska->sk_peercred.pid = skb->sk_peercred.pid = current->tgid;
1132 ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1133 ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1135 if (ska->sk_type != SOCK_DGRAM) {
1136 ska->sk_state = TCP_ESTABLISHED;
1137 skb->sk_state = TCP_ESTABLISHED;
1138 socka->state = SS_CONNECTED;
1139 sockb->state = SS_CONNECTED;
1141 return 0;
1144 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1146 struct sock *sk = sock->sk;
1147 struct sock *tsk;
1148 struct sk_buff *skb;
1149 int err;
1151 err = -EOPNOTSUPP;
1152 if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1153 goto out;
1155 err = -EINVAL;
1156 if (sk->sk_state != TCP_LISTEN)
1157 goto out;
1159 /* If socket state is TCP_LISTEN it cannot change (for now...),
1160 * so that no locks are necessary.
1163 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1164 if (!skb) {
1165 /* This means receive shutdown. */
1166 if (err == 0)
1167 err = -EINVAL;
1168 goto out;
1171 tsk = skb->sk;
1172 skb_free_datagram(sk, skb);
1173 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1175 /* attach accepted sock to socket */
1176 unix_state_wlock(tsk);
1177 newsock->state = SS_CONNECTED;
1178 sock_graft(tsk, newsock);
1179 unix_state_wunlock(tsk);
1180 return 0;
1182 out:
1183 return err;
1187 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1189 struct sock *sk = sock->sk;
1190 struct unix_sock *u;
1191 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1192 int err = 0;
1194 if (peer) {
1195 sk = unix_peer_get(sk);
1197 err = -ENOTCONN;
1198 if (!sk)
1199 goto out;
1200 err = 0;
1201 } else {
1202 sock_hold(sk);
1205 u = unix_sk(sk);
1206 unix_state_rlock(sk);
1207 if (!u->addr) {
1208 sunaddr->sun_family = AF_UNIX;
1209 sunaddr->sun_path[0] = 0;
1210 *uaddr_len = sizeof(short);
1211 } else {
1212 struct unix_address *addr = u->addr;
1214 *uaddr_len = addr->len;
1215 memcpy(sunaddr, addr->name, *uaddr_len);
1217 unix_state_runlock(sk);
1218 sock_put(sk);
1219 out:
1220 return err;
1223 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1225 int i;
1227 scm->fp = UNIXCB(skb).fp;
1228 skb->destructor = sock_wfree;
1229 UNIXCB(skb).fp = NULL;
1231 for (i=scm->fp->count-1; i>=0; i--)
1232 unix_notinflight(scm->fp->fp[i]);
1235 static void unix_destruct_fds(struct sk_buff *skb)
1237 struct scm_cookie scm;
1238 memset(&scm, 0, sizeof(scm));
1239 unix_detach_fds(&scm, skb);
1241 /* Alas, it calls VFS */
1242 /* So fscking what? fput() had been SMP-safe since the last Summer */
1243 scm_destroy(&scm);
1244 sock_wfree(skb);
1247 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1249 int i;
1250 for (i=scm->fp->count-1; i>=0; i--)
1251 unix_inflight(scm->fp->fp[i]);
1252 UNIXCB(skb).fp = scm->fp;
1253 skb->destructor = unix_destruct_fds;
1254 scm->fp = NULL;
1258 * Send AF_UNIX data.
1261 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1262 struct msghdr *msg, size_t len)
1264 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1265 struct sock *sk = sock->sk;
1266 struct unix_sock *u = unix_sk(sk);
1267 struct sockaddr_un *sunaddr=msg->msg_name;
1268 struct sock *other = NULL;
1269 int namelen = 0; /* fake GCC */
1270 int err;
1271 unsigned hash;
1272 struct sk_buff *skb;
1273 long timeo;
1274 struct scm_cookie tmp_scm;
1276 if (NULL == siocb->scm)
1277 siocb->scm = &tmp_scm;
1278 err = scm_send(sock, msg, siocb->scm);
1279 if (err < 0)
1280 return err;
1282 err = -EOPNOTSUPP;
1283 if (msg->msg_flags&MSG_OOB)
1284 goto out;
1286 if (msg->msg_namelen) {
1287 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1288 if (err < 0)
1289 goto out;
1290 namelen = err;
1291 } else {
1292 sunaddr = NULL;
1293 err = -ENOTCONN;
1294 other = unix_peer_get(sk);
1295 if (!other)
1296 goto out;
1299 if (test_bit(SOCK_PASSCRED, &sock->flags)
1300 && !u->addr && (err = unix_autobind(sock)) != 0)
1301 goto out;
1303 err = -EMSGSIZE;
1304 if (len > sk->sk_sndbuf - 32)
1305 goto out;
1307 skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1308 if (skb==NULL)
1309 goto out;
1311 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1312 if (siocb->scm->fp)
1313 unix_attach_fds(siocb->scm, skb);
1315 skb->h.raw = skb->data;
1316 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1317 if (err)
1318 goto out_free;
1320 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1322 restart:
1323 if (!other) {
1324 err = -ECONNRESET;
1325 if (sunaddr == NULL)
1326 goto out_free;
1328 other = unix_find_other(sunaddr, namelen, sk->sk_type,
1329 hash, &err);
1330 if (other==NULL)
1331 goto out_free;
1334 unix_state_rlock(other);
1335 err = -EPERM;
1336 if (!unix_may_send(sk, other))
1337 goto out_unlock;
1339 if (sock_flag(other, SOCK_DEAD)) {
1341 * Check with 1003.1g - what should
1342 * datagram error
1344 unix_state_runlock(other);
1345 sock_put(other);
1347 err = 0;
1348 unix_state_wlock(sk);
1349 if (unix_peer(sk) == other) {
1350 unix_peer(sk)=NULL;
1351 unix_state_wunlock(sk);
1353 unix_dgram_disconnected(sk, other);
1354 sock_put(other);
1355 err = -ECONNREFUSED;
1356 } else {
1357 unix_state_wunlock(sk);
1360 other = NULL;
1361 if (err)
1362 goto out_free;
1363 goto restart;
1366 err = -EPIPE;
1367 if (other->sk_shutdown & RCV_SHUTDOWN)
1368 goto out_unlock;
1370 if (sk->sk_type != SOCK_SEQPACKET) {
1371 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1372 if (err)
1373 goto out_unlock;
1376 if (unix_peer(other) != sk &&
1377 (skb_queue_len(&other->sk_receive_queue) >
1378 other->sk_max_ack_backlog)) {
1379 if (!timeo) {
1380 err = -EAGAIN;
1381 goto out_unlock;
1384 timeo = unix_wait_for_peer(other, timeo);
1386 err = sock_intr_errno(timeo);
1387 if (signal_pending(current))
1388 goto out_free;
1390 goto restart;
1393 skb_queue_tail(&other->sk_receive_queue, skb);
1394 unix_state_runlock(other);
1395 other->sk_data_ready(other, len);
1396 sock_put(other);
1397 scm_destroy(siocb->scm);
1398 return len;
1400 out_unlock:
1401 unix_state_runlock(other);
1402 out_free:
1403 kfree_skb(skb);
1404 out:
1405 if (other)
1406 sock_put(other);
1407 scm_destroy(siocb->scm);
1408 return err;
1412 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1413 struct msghdr *msg, size_t len)
1415 struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1416 struct sock *sk = sock->sk;
1417 struct sock *other = NULL;
1418 struct sockaddr_un *sunaddr=msg->msg_name;
1419 int err,size;
1420 struct sk_buff *skb;
1421 int sent=0;
1422 struct scm_cookie tmp_scm;
1424 if (NULL == siocb->scm)
1425 siocb->scm = &tmp_scm;
1426 err = scm_send(sock, msg, siocb->scm);
1427 if (err < 0)
1428 return err;
1430 err = -EOPNOTSUPP;
1431 if (msg->msg_flags&MSG_OOB)
1432 goto out_err;
1434 if (msg->msg_namelen) {
1435 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1436 goto out_err;
1437 } else {
1438 sunaddr = NULL;
1439 err = -ENOTCONN;
1440 other = unix_peer_get(sk);
1441 if (!other)
1442 goto out_err;
1445 if (sk->sk_shutdown & SEND_SHUTDOWN)
1446 goto pipe_err;
1448 while(sent < len)
1451 * Optimisation for the fact that under 0.01% of X messages typically
1452 * need breaking up.
1455 size=len-sent;
1457 /* Keep two messages in the pipe so it schedules better */
1458 if (size > sk->sk_sndbuf / 2 - 64)
1459 size = sk->sk_sndbuf / 2 - 64;
1461 if (size > SKB_MAX_ALLOC)
1462 size = SKB_MAX_ALLOC;
1465 * Grab a buffer
1468 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1470 if (skb==NULL)
1471 goto out_err;
1474 * If you pass two values to the sock_alloc_send_skb
1475 * it tries to grab the large buffer with GFP_NOFS
1476 * (which can fail easily), and if it fails grab the
1477 * fallback size buffer which is under a page and will
1478 * succeed. [Alan]
1480 size = min_t(int, size, skb_tailroom(skb));
1482 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1483 if (siocb->scm->fp)
1484 unix_attach_fds(siocb->scm, skb);
1486 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1487 kfree_skb(skb);
1488 goto out_err;
1491 unix_state_rlock(other);
1493 if (sock_flag(other, SOCK_DEAD) ||
1494 (other->sk_shutdown & RCV_SHUTDOWN))
1495 goto pipe_err_free;
1497 skb_queue_tail(&other->sk_receive_queue, skb);
1498 unix_state_runlock(other);
1499 other->sk_data_ready(other, size);
1500 sent+=size;
1502 sock_put(other);
1504 scm_destroy(siocb->scm);
1505 siocb->scm = NULL;
1507 return sent;
1509 pipe_err_free:
1510 unix_state_runlock(other);
1511 kfree_skb(skb);
1512 pipe_err:
1513 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1514 send_sig(SIGPIPE,current,0);
1515 err = -EPIPE;
1516 out_err:
1517 if (other)
1518 sock_put(other);
1519 scm_destroy(siocb->scm);
1520 siocb->scm = NULL;
1521 return sent ? : err;
1524 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1525 struct msghdr *msg, size_t len)
1527 int err;
1528 struct sock *sk = sock->sk;
1530 err = sock_error(sk);
1531 if (err)
1532 return err;
1534 if (sk->sk_state != TCP_ESTABLISHED)
1535 return -ENOTCONN;
1537 if (msg->msg_namelen)
1538 msg->msg_namelen = 0;
1540 return unix_dgram_sendmsg(kiocb, sock, msg, len);
1543 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1545 struct unix_sock *u = unix_sk(sk);
1547 msg->msg_namelen = 0;
1548 if (u->addr) {
1549 msg->msg_namelen = u->addr->len;
1550 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1554 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1555 struct msghdr *msg, size_t size,
1556 int flags)
1558 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1559 struct scm_cookie tmp_scm;
1560 struct sock *sk = sock->sk;
1561 struct unix_sock *u = unix_sk(sk);
1562 int noblock = flags & MSG_DONTWAIT;
1563 struct sk_buff *skb;
1564 int err;
1566 err = -EOPNOTSUPP;
1567 if (flags&MSG_OOB)
1568 goto out;
1570 msg->msg_namelen = 0;
1572 down(&u->readsem);
1574 skb = skb_recv_datagram(sk, flags, noblock, &err);
1575 if (!skb)
1576 goto out_unlock;
1578 wake_up_interruptible(&u->peer_wait);
1580 if (msg->msg_name)
1581 unix_copy_addr(msg, skb->sk);
1583 if (size > skb->len)
1584 size = skb->len;
1585 else if (size < skb->len)
1586 msg->msg_flags |= MSG_TRUNC;
1588 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1589 if (err)
1590 goto out_free;
1592 if (!siocb->scm) {
1593 siocb->scm = &tmp_scm;
1594 memset(&tmp_scm, 0, sizeof(tmp_scm));
1596 siocb->scm->creds = *UNIXCREDS(skb);
1598 if (!(flags & MSG_PEEK))
1600 if (UNIXCB(skb).fp)
1601 unix_detach_fds(siocb->scm, skb);
1603 else
1605 /* It is questionable: on PEEK we could:
1606 - do not return fds - good, but too simple 8)
1607 - return fds, and do not return them on read (old strategy,
1608 apparently wrong)
1609 - clone fds (I chose it for now, it is the most universal
1610 solution)
1612 POSIX 1003.1g does not actually define this clearly
1613 at all. POSIX 1003.1g doesn't define a lot of things
1614 clearly however!
1617 if (UNIXCB(skb).fp)
1618 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1620 err = size;
1622 scm_recv(sock, msg, siocb->scm, flags);
1624 out_free:
1625 skb_free_datagram(sk,skb);
1626 out_unlock:
1627 up(&u->readsem);
1628 out:
1629 return err;
1633 * Sleep until data has arrive. But check for races..
1636 static long unix_stream_data_wait(struct sock * sk, long timeo)
1638 DEFINE_WAIT(wait);
1640 unix_state_rlock(sk);
1642 for (;;) {
1643 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1645 if (skb_queue_len(&sk->sk_receive_queue) ||
1646 sk->sk_err ||
1647 (sk->sk_shutdown & RCV_SHUTDOWN) ||
1648 signal_pending(current) ||
1649 !timeo)
1650 break;
1652 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1653 unix_state_runlock(sk);
1654 timeo = schedule_timeout(timeo);
1655 unix_state_rlock(sk);
1656 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1659 finish_wait(sk->sk_sleep, &wait);
1660 unix_state_runlock(sk);
1661 return timeo;
1666 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1667 struct msghdr *msg, size_t size,
1668 int flags)
1670 struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1671 struct scm_cookie tmp_scm;
1672 struct sock *sk = sock->sk;
1673 struct unix_sock *u = unix_sk(sk);
1674 struct sockaddr_un *sunaddr=msg->msg_name;
1675 int copied = 0;
1676 int check_creds = 0;
1677 int target;
1678 int err = 0;
1679 long timeo;
1681 err = -EINVAL;
1682 if (sk->sk_state != TCP_ESTABLISHED)
1683 goto out;
1685 err = -EOPNOTSUPP;
1686 if (flags&MSG_OOB)
1687 goto out;
1689 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1690 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1692 msg->msg_namelen = 0;
1694 /* Lock the socket to prevent queue disordering
1695 * while sleeps in memcpy_tomsg
1698 if (!siocb->scm) {
1699 siocb->scm = &tmp_scm;
1700 memset(&tmp_scm, 0, sizeof(tmp_scm));
1703 down(&u->readsem);
1707 int chunk;
1708 struct sk_buff *skb;
1710 skb = skb_dequeue(&sk->sk_receive_queue);
1711 if (skb==NULL)
1713 if (copied >= target)
1714 break;
1717 * POSIX 1003.1g mandates this order.
1720 if ((err = sock_error(sk)) != 0)
1721 break;
1722 if (sk->sk_shutdown & RCV_SHUTDOWN)
1723 break;
1724 err = -EAGAIN;
1725 if (!timeo)
1726 break;
1727 up(&u->readsem);
1729 timeo = unix_stream_data_wait(sk, timeo);
1731 if (signal_pending(current)) {
1732 err = sock_intr_errno(timeo);
1733 goto out;
1735 down(&u->readsem);
1736 continue;
1739 if (check_creds) {
1740 /* Never glue messages from different writers */
1741 if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1742 skb_queue_head(&sk->sk_receive_queue, skb);
1743 break;
1745 } else {
1746 /* Copy credentials */
1747 siocb->scm->creds = *UNIXCREDS(skb);
1748 check_creds = 1;
1751 /* Copy address just once */
1752 if (sunaddr)
1754 unix_copy_addr(msg, skb->sk);
1755 sunaddr = NULL;
1758 chunk = min_t(unsigned int, skb->len, size);
1759 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1760 skb_queue_head(&sk->sk_receive_queue, skb);
1761 if (copied == 0)
1762 copied = -EFAULT;
1763 break;
1765 copied += chunk;
1766 size -= chunk;
1768 /* Mark read part of skb as used */
1769 if (!(flags & MSG_PEEK))
1771 skb_pull(skb, chunk);
1773 if (UNIXCB(skb).fp)
1774 unix_detach_fds(siocb->scm, skb);
1776 /* put the skb back if we didn't use it up.. */
1777 if (skb->len)
1779 skb_queue_head(&sk->sk_receive_queue, skb);
1780 break;
1783 kfree_skb(skb);
1785 if (siocb->scm->fp)
1786 break;
1788 else
1790 /* It is questionable, see note in unix_dgram_recvmsg.
1792 if (UNIXCB(skb).fp)
1793 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1795 /* put message back and return */
1796 skb_queue_head(&sk->sk_receive_queue, skb);
1797 break;
1799 } while (size);
1801 up(&u->readsem);
1802 scm_recv(sock, msg, siocb->scm, flags);
1803 out:
1804 return copied ? : err;
1807 static int unix_shutdown(struct socket *sock, int mode)
1809 struct sock *sk = sock->sk;
1810 struct sock *other;
1812 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1814 if (mode) {
1815 unix_state_wlock(sk);
1816 sk->sk_shutdown |= mode;
1817 other=unix_peer(sk);
1818 if (other)
1819 sock_hold(other);
1820 unix_state_wunlock(sk);
1821 sk->sk_state_change(sk);
1823 if (other &&
1824 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1826 int peer_mode = 0;
1828 if (mode&RCV_SHUTDOWN)
1829 peer_mode |= SEND_SHUTDOWN;
1830 if (mode&SEND_SHUTDOWN)
1831 peer_mode |= RCV_SHUTDOWN;
1832 unix_state_wlock(other);
1833 other->sk_shutdown |= peer_mode;
1834 unix_state_wunlock(other);
1835 other->sk_state_change(other);
1836 read_lock(&other->sk_callback_lock);
1837 if (peer_mode == SHUTDOWN_MASK)
1838 sk_wake_async(other,1,POLL_HUP);
1839 else if (peer_mode & RCV_SHUTDOWN)
1840 sk_wake_async(other,1,POLL_IN);
1841 read_unlock(&other->sk_callback_lock);
1843 if (other)
1844 sock_put(other);
1846 return 0;
1849 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1851 struct sock *sk = sock->sk;
1852 long amount=0;
1853 int err;
1855 switch(cmd)
1857 case SIOCOUTQ:
1858 amount = atomic_read(&sk->sk_wmem_alloc);
1859 err = put_user(amount, (int __user *)arg);
1860 break;
1861 case SIOCINQ:
1863 struct sk_buff *skb;
1865 if (sk->sk_state == TCP_LISTEN) {
1866 err = -EINVAL;
1867 break;
1870 spin_lock(&sk->sk_receive_queue.lock);
1871 if (sk->sk_type == SOCK_STREAM ||
1872 sk->sk_type == SOCK_SEQPACKET) {
1873 skb_queue_walk(&sk->sk_receive_queue, skb)
1874 amount += skb->len;
1875 } else {
1876 skb = skb_peek(&sk->sk_receive_queue);
1877 if (skb)
1878 amount=skb->len;
1880 spin_unlock(&sk->sk_receive_queue.lock);
1881 err = put_user(amount, (int __user *)arg);
1882 break;
1885 default:
1886 err = dev_ioctl(cmd, (void __user *)arg);
1887 break;
1889 return err;
1892 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1894 struct sock *sk = sock->sk;
1895 unsigned int mask;
1897 poll_wait(file, sk->sk_sleep, wait);
1898 mask = 0;
1900 /* exceptional events? */
1901 if (sk->sk_err)
1902 mask |= POLLERR;
1903 if (sk->sk_shutdown == SHUTDOWN_MASK)
1904 mask |= POLLHUP;
1906 /* readable? */
1907 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1908 (sk->sk_shutdown & RCV_SHUTDOWN))
1909 mask |= POLLIN | POLLRDNORM;
1911 /* Connection-based need to check for termination and startup */
1912 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1913 mask |= POLLHUP;
1916 * we set writable also when the other side has shut down the
1917 * connection. This prevents stuck sockets.
1919 if (unix_writable(sk))
1920 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1922 return mask;
1926 #ifdef CONFIG_PROC_FS
1927 static struct sock *unix_seq_idx(int *iter, loff_t pos)
1929 loff_t off = 0;
1930 struct sock *s;
1932 for (s = first_unix_socket(iter); s; s = next_unix_socket(iter, s)) {
1933 if (off == pos)
1934 return s;
1935 ++off;
1937 return NULL;
1941 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
1943 read_lock(&unix_table_lock);
1944 return *pos ? unix_seq_idx(seq->private, *pos - 1) : ((void *) 1);
1947 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1949 ++*pos;
1951 if (v == (void *)1)
1952 return first_unix_socket(seq->private);
1953 return next_unix_socket(seq->private, v);
1956 static void unix_seq_stop(struct seq_file *seq, void *v)
1958 read_unlock(&unix_table_lock);
1961 static int unix_seq_show(struct seq_file *seq, void *v)
1964 if (v == (void *)1)
1965 seq_puts(seq, "Num RefCount Protocol Flags Type St "
1966 "Inode Path\n");
1967 else {
1968 struct sock *s = v;
1969 struct unix_sock *u = unix_sk(s);
1970 unix_state_rlock(s);
1972 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
1974 atomic_read(&s->sk_refcnt),
1976 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1977 s->sk_type,
1978 s->sk_socket ?
1979 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1980 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1981 sock_i_ino(s));
1983 if (u->addr) {
1984 int i, len;
1985 seq_putc(seq, ' ');
1987 i = 0;
1988 len = u->addr->len - sizeof(short);
1989 if (!UNIX_ABSTRACT(s))
1990 len--;
1991 else {
1992 seq_putc(seq, '@');
1993 i++;
1995 for ( ; i < len; i++)
1996 seq_putc(seq, u->addr->name->sun_path[i]);
1998 unix_state_runlock(s);
1999 seq_putc(seq, '\n');
2002 return 0;
2005 static struct seq_operations unix_seq_ops = {
2006 .start = unix_seq_start,
2007 .next = unix_seq_next,
2008 .stop = unix_seq_stop,
2009 .show = unix_seq_show,
2013 static int unix_seq_open(struct inode *inode, struct file *file)
2015 struct seq_file *seq;
2016 int rc = -ENOMEM;
2017 int *iter = kmalloc(sizeof(int), GFP_KERNEL);
2019 if (!iter)
2020 goto out;
2022 rc = seq_open(file, &unix_seq_ops);
2023 if (rc)
2024 goto out_kfree;
2026 seq = file->private_data;
2027 seq->private = iter;
2028 *iter = 0;
2029 out:
2030 return rc;
2031 out_kfree:
2032 kfree(iter);
2033 goto out;
2036 static struct file_operations unix_seq_fops = {
2037 .owner = THIS_MODULE,
2038 .open = unix_seq_open,
2039 .read = seq_read,
2040 .llseek = seq_lseek,
2041 .release = seq_release_private,
2044 #endif
2046 static struct net_proto_family unix_family_ops = {
2047 .family = PF_UNIX,
2048 .create = unix_create,
2049 .owner = THIS_MODULE,
2052 #ifdef CONFIG_SYSCTL
2053 extern void unix_sysctl_register(void);
2054 extern void unix_sysctl_unregister(void);
2055 #else
2056 static inline void unix_sysctl_register(void) {}
2057 static inline void unix_sysctl_unregister(void) {}
2058 #endif
2060 static int __init af_unix_init(void)
2062 int rc = -1;
2063 struct sk_buff *dummy_skb;
2065 if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) {
2066 printk(KERN_CRIT "%s: panic\n", __FUNCTION__);
2067 goto out;
2070 rc = proto_register(&unix_proto, 1);
2071 if (rc != 0) {
2072 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2073 __FUNCTION__);
2074 goto out;
2077 sock_register(&unix_family_ops);
2078 #ifdef CONFIG_PROC_FS
2079 proc_net_fops_create("unix", 0, &unix_seq_fops);
2080 #endif
2081 unix_sysctl_register();
2082 out:
2083 return rc;
2086 static void __exit af_unix_exit(void)
2088 sock_unregister(PF_UNIX);
2089 unix_sysctl_unregister();
2090 proc_net_remove("unix");
2091 proto_unregister(&unix_proto);
2094 module_init(af_unix_init);
2095 module_exit(af_unix_exit);
2097 MODULE_LICENSE("GPL");
2098 MODULE_ALIAS_NETPROTO(PF_UNIX);