2 * NET4: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
53 * Known differences from reference BSD that was tested:
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
83 #include <linux/module.h>
84 #include <linux/kernel.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/dcache.h>
91 #include <linux/namei.h>
92 #include <linux/socket.h>
94 #include <linux/fcntl.h>
95 #include <linux/termios.h>
96 #include <linux/sockios.h>
97 #include <linux/net.h>
100 #include <linux/slab.h>
101 #include <asm/uaccess.h>
102 #include <linux/skbuff.h>
103 #include <linux/netdevice.h>
104 #include <net/net_namespace.h>
105 #include <net/sock.h>
106 #include <net/tcp_states.h>
107 #include <net/af_unix.h>
108 #include <linux/proc_fs.h>
109 #include <linux/seq_file.h>
111 #include <linux/init.h>
112 #include <linux/poll.h>
113 #include <linux/rtnetlink.h>
114 #include <linux/mount.h>
115 #include <net/checksum.h>
116 #include <linux/security.h>
118 static struct hlist_head unix_socket_table
[UNIX_HASH_SIZE
+ 1];
119 static DEFINE_SPINLOCK(unix_table_lock
);
120 static atomic_t unix_nr_socks
= ATOMIC_INIT(0);
122 #define unix_sockets_unbound (&unix_socket_table[UNIX_HASH_SIZE])
124 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
126 #ifdef CONFIG_SECURITY_NETWORK
127 static void unix_get_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
129 memcpy(UNIXSID(skb
), &scm
->secid
, sizeof(u32
));
132 static inline void unix_set_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
134 scm
->secid
= *UNIXSID(skb
);
137 static inline void unix_get_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
140 static inline void unix_set_secdata(struct scm_cookie
*scm
, struct sk_buff
*skb
)
142 #endif /* CONFIG_SECURITY_NETWORK */
145 * SMP locking strategy:
146 * hash table is protected with spinlock unix_table_lock
147 * each socket state is protected by separate spin lock.
150 static inline unsigned unix_hash_fold(__wsum n
)
152 unsigned hash
= (__force
unsigned)n
;
155 return hash
&(UNIX_HASH_SIZE
-1);
158 #define unix_peer(sk) (unix_sk(sk)->peer)
160 static inline int unix_our_peer(struct sock
*sk
, struct sock
*osk
)
162 return unix_peer(osk
) == sk
;
165 static inline int unix_may_send(struct sock
*sk
, struct sock
*osk
)
167 return unix_peer(osk
) == NULL
|| unix_our_peer(sk
, osk
);
170 static inline int unix_recvq_full(struct sock
const *sk
)
172 return skb_queue_len(&sk
->sk_receive_queue
) > sk
->sk_max_ack_backlog
;
175 static struct sock
*unix_peer_get(struct sock
*s
)
183 unix_state_unlock(s
);
187 static inline void unix_release_addr(struct unix_address
*addr
)
189 if (atomic_dec_and_test(&addr
->refcnt
))
194 * Check unix socket name:
195 * - should be not zero length.
196 * - if started by not zero, should be NULL terminated (FS object)
197 * - if started by zero, it is abstract name.
200 static int unix_mkname(struct sockaddr_un
*sunaddr
, int len
, unsigned *hashp
)
202 if (len
<= sizeof(short) || len
> sizeof(*sunaddr
))
204 if (!sunaddr
|| sunaddr
->sun_family
!= AF_UNIX
)
206 if (sunaddr
->sun_path
[0]) {
208 * This may look like an off by one error but it is a bit more
209 * subtle. 108 is the longest valid AF_UNIX path for a binding.
210 * sun_path[108] doesnt as such exist. However in kernel space
211 * we are guaranteed that it is a valid memory location in our
212 * kernel address buffer.
214 ((char *)sunaddr
)[len
] = 0;
215 len
= strlen(sunaddr
->sun_path
)+1+sizeof(short);
219 *hashp
= unix_hash_fold(csum_partial(sunaddr
, len
, 0));
223 static void __unix_remove_socket(struct sock
*sk
)
225 sk_del_node_init(sk
);
228 static void __unix_insert_socket(struct hlist_head
*list
, struct sock
*sk
)
230 WARN_ON(!sk_unhashed(sk
));
231 sk_add_node(sk
, list
);
234 static inline void unix_remove_socket(struct sock
*sk
)
236 spin_lock(&unix_table_lock
);
237 __unix_remove_socket(sk
);
238 spin_unlock(&unix_table_lock
);
241 static inline void unix_insert_socket(struct hlist_head
*list
, struct sock
*sk
)
243 spin_lock(&unix_table_lock
);
244 __unix_insert_socket(list
, sk
);
245 spin_unlock(&unix_table_lock
);
248 static struct sock
*__unix_find_socket_byname(struct net
*net
,
249 struct sockaddr_un
*sunname
,
250 int len
, int type
, unsigned hash
)
253 struct hlist_node
*node
;
255 sk_for_each(s
, node
, &unix_socket_table
[hash
^ type
]) {
256 struct unix_sock
*u
= unix_sk(s
);
258 if (!net_eq(sock_net(s
), net
))
261 if (u
->addr
->len
== len
&&
262 !memcmp(u
->addr
->name
, sunname
, len
))
270 static inline struct sock
*unix_find_socket_byname(struct net
*net
,
271 struct sockaddr_un
*sunname
,
277 spin_lock(&unix_table_lock
);
278 s
= __unix_find_socket_byname(net
, sunname
, len
, type
, hash
);
281 spin_unlock(&unix_table_lock
);
285 static struct sock
*unix_find_socket_byinode(struct inode
*i
)
288 struct hlist_node
*node
;
290 spin_lock(&unix_table_lock
);
292 &unix_socket_table
[i
->i_ino
& (UNIX_HASH_SIZE
- 1)]) {
293 struct dentry
*dentry
= unix_sk(s
)->dentry
;
295 if (dentry
&& dentry
->d_inode
== i
) {
302 spin_unlock(&unix_table_lock
);
306 static inline int unix_writable(struct sock
*sk
)
308 return (atomic_read(&sk
->sk_wmem_alloc
) << 2) <= sk
->sk_sndbuf
;
311 static void unix_write_space(struct sock
*sk
)
313 struct socket_wq
*wq
;
316 if (unix_writable(sk
)) {
317 wq
= rcu_dereference(sk
->sk_wq
);
318 if (wq_has_sleeper(wq
))
319 wake_up_interruptible_sync(&wq
->wait
);
320 sk_wake_async(sk
, SOCK_WAKE_SPACE
, POLL_OUT
);
325 /* When dgram socket disconnects (or changes its peer), we clear its receive
326 * queue of packets arrived from previous peer. First, it allows to do
327 * flow control based only on wmem_alloc; second, sk connected to peer
328 * may receive messages only from that peer. */
329 static void unix_dgram_disconnected(struct sock
*sk
, struct sock
*other
)
331 if (!skb_queue_empty(&sk
->sk_receive_queue
)) {
332 skb_queue_purge(&sk
->sk_receive_queue
);
333 wake_up_interruptible_all(&unix_sk(sk
)->peer_wait
);
335 /* If one link of bidirectional dgram pipe is disconnected,
336 * we signal error. Messages are lost. Do not make this,
337 * when peer was not connected to us.
339 if (!sock_flag(other
, SOCK_DEAD
) && unix_peer(other
) == sk
) {
340 other
->sk_err
= ECONNRESET
;
341 other
->sk_error_report(other
);
346 static void unix_sock_destructor(struct sock
*sk
)
348 struct unix_sock
*u
= unix_sk(sk
);
350 skb_queue_purge(&sk
->sk_receive_queue
);
352 WARN_ON(atomic_read(&sk
->sk_wmem_alloc
));
353 WARN_ON(!sk_unhashed(sk
));
354 WARN_ON(sk
->sk_socket
);
355 if (!sock_flag(sk
, SOCK_DEAD
)) {
356 printk(KERN_INFO
"Attempt to release alive unix socket: %p\n", sk
);
361 unix_release_addr(u
->addr
);
363 atomic_dec(&unix_nr_socks
);
365 sock_prot_inuse_add(sock_net(sk
), sk
->sk_prot
, -1);
367 #ifdef UNIX_REFCNT_DEBUG
368 printk(KERN_DEBUG
"UNIX %p is destroyed, %d are still alive.\n", sk
,
369 atomic_read(&unix_nr_socks
));
373 static int unix_release_sock(struct sock
*sk
, int embrion
)
375 struct unix_sock
*u
= unix_sk(sk
);
376 struct dentry
*dentry
;
377 struct vfsmount
*mnt
;
382 unix_remove_socket(sk
);
387 sk
->sk_shutdown
= SHUTDOWN_MASK
;
392 state
= sk
->sk_state
;
393 sk
->sk_state
= TCP_CLOSE
;
394 unix_state_unlock(sk
);
396 wake_up_interruptible_all(&u
->peer_wait
);
398 skpair
= unix_peer(sk
);
400 if (skpair
!= NULL
) {
401 if (sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
) {
402 unix_state_lock(skpair
);
404 skpair
->sk_shutdown
= SHUTDOWN_MASK
;
405 if (!skb_queue_empty(&sk
->sk_receive_queue
) || embrion
)
406 skpair
->sk_err
= ECONNRESET
;
407 unix_state_unlock(skpair
);
408 skpair
->sk_state_change(skpair
);
409 sk_wake_async(skpair
, SOCK_WAKE_WAITD
, POLL_HUP
);
411 sock_put(skpair
); /* It may now die */
412 unix_peer(sk
) = NULL
;
415 /* Try to flush out this socket. Throw out buffers at least */
417 while ((skb
= skb_dequeue(&sk
->sk_receive_queue
)) != NULL
) {
418 if (state
== TCP_LISTEN
)
419 unix_release_sock(skb
->sk
, 1);
420 /* passed fds are erased in the kfree_skb hook */
431 /* ---- Socket is dead now and most probably destroyed ---- */
434 if (unix_tot_inflight
)
435 unix_gc(); /* Garbage collect fds */
440 static void init_peercred(struct sock
*sk
)
442 put_pid(sk
->sk_peer_pid
);
443 if (sk
->sk_peer_cred
)
444 put_cred(sk
->sk_peer_cred
);
445 sk
->sk_peer_pid
= get_pid(task_tgid(current
));
446 sk
->sk_peer_cred
= get_current_cred();
449 static void copy_peercred(struct sock
*sk
, struct sock
*peersk
)
451 put_pid(sk
->sk_peer_pid
);
452 if (sk
->sk_peer_cred
)
453 put_cred(sk
->sk_peer_cred
);
454 sk
->sk_peer_pid
= get_pid(peersk
->sk_peer_pid
);
455 sk
->sk_peer_cred
= get_cred(peersk
->sk_peer_cred
);
458 static int unix_listen(struct socket
*sock
, int backlog
)
461 struct sock
*sk
= sock
->sk
;
462 struct unix_sock
*u
= unix_sk(sk
);
463 struct pid
*old_pid
= NULL
;
464 const struct cred
*old_cred
= NULL
;
467 if (sock
->type
!= SOCK_STREAM
&& sock
->type
!= SOCK_SEQPACKET
)
468 goto out
; /* Only stream/seqpacket sockets accept */
471 goto out
; /* No listens on an unbound socket */
473 if (sk
->sk_state
!= TCP_CLOSE
&& sk
->sk_state
!= TCP_LISTEN
)
475 if (backlog
> sk
->sk_max_ack_backlog
)
476 wake_up_interruptible_all(&u
->peer_wait
);
477 sk
->sk_max_ack_backlog
= backlog
;
478 sk
->sk_state
= TCP_LISTEN
;
479 /* set credentials so connect can copy them */
484 unix_state_unlock(sk
);
492 static int unix_release(struct socket
*);
493 static int unix_bind(struct socket
*, struct sockaddr
*, int);
494 static int unix_stream_connect(struct socket
*, struct sockaddr
*,
495 int addr_len
, int flags
);
496 static int unix_socketpair(struct socket
*, struct socket
*);
497 static int unix_accept(struct socket
*, struct socket
*, int);
498 static int unix_getname(struct socket
*, struct sockaddr
*, int *, int);
499 static unsigned int unix_poll(struct file
*, struct socket
*, poll_table
*);
500 static unsigned int unix_dgram_poll(struct file
*, struct socket
*,
502 static int unix_ioctl(struct socket
*, unsigned int, unsigned long);
503 static int unix_shutdown(struct socket
*, int);
504 static int unix_stream_sendmsg(struct kiocb
*, struct socket
*,
505 struct msghdr
*, size_t);
506 static int unix_stream_recvmsg(struct kiocb
*, struct socket
*,
507 struct msghdr
*, size_t, int);
508 static int unix_dgram_sendmsg(struct kiocb
*, struct socket
*,
509 struct msghdr
*, size_t);
510 static int unix_dgram_recvmsg(struct kiocb
*, struct socket
*,
511 struct msghdr
*, size_t, int);
512 static int unix_dgram_connect(struct socket
*, struct sockaddr
*,
514 static int unix_seqpacket_sendmsg(struct kiocb
*, struct socket
*,
515 struct msghdr
*, size_t);
517 static const struct proto_ops unix_stream_ops
= {
519 .owner
= THIS_MODULE
,
520 .release
= unix_release
,
522 .connect
= unix_stream_connect
,
523 .socketpair
= unix_socketpair
,
524 .accept
= unix_accept
,
525 .getname
= unix_getname
,
528 .listen
= unix_listen
,
529 .shutdown
= unix_shutdown
,
530 .setsockopt
= sock_no_setsockopt
,
531 .getsockopt
= sock_no_getsockopt
,
532 .sendmsg
= unix_stream_sendmsg
,
533 .recvmsg
= unix_stream_recvmsg
,
534 .mmap
= sock_no_mmap
,
535 .sendpage
= sock_no_sendpage
,
538 static const struct proto_ops unix_dgram_ops
= {
540 .owner
= THIS_MODULE
,
541 .release
= unix_release
,
543 .connect
= unix_dgram_connect
,
544 .socketpair
= unix_socketpair
,
545 .accept
= sock_no_accept
,
546 .getname
= unix_getname
,
547 .poll
= unix_dgram_poll
,
549 .listen
= sock_no_listen
,
550 .shutdown
= unix_shutdown
,
551 .setsockopt
= sock_no_setsockopt
,
552 .getsockopt
= sock_no_getsockopt
,
553 .sendmsg
= unix_dgram_sendmsg
,
554 .recvmsg
= unix_dgram_recvmsg
,
555 .mmap
= sock_no_mmap
,
556 .sendpage
= sock_no_sendpage
,
559 static const struct proto_ops unix_seqpacket_ops
= {
561 .owner
= THIS_MODULE
,
562 .release
= unix_release
,
564 .connect
= unix_stream_connect
,
565 .socketpair
= unix_socketpair
,
566 .accept
= unix_accept
,
567 .getname
= unix_getname
,
568 .poll
= unix_dgram_poll
,
570 .listen
= unix_listen
,
571 .shutdown
= unix_shutdown
,
572 .setsockopt
= sock_no_setsockopt
,
573 .getsockopt
= sock_no_getsockopt
,
574 .sendmsg
= unix_seqpacket_sendmsg
,
575 .recvmsg
= unix_dgram_recvmsg
,
576 .mmap
= sock_no_mmap
,
577 .sendpage
= sock_no_sendpage
,
580 static struct proto unix_proto
= {
582 .owner
= THIS_MODULE
,
583 .obj_size
= sizeof(struct unix_sock
),
587 * AF_UNIX sockets do not interact with hardware, hence they
588 * dont trigger interrupts - so it's safe for them to have
589 * bh-unsafe locking for their sk_receive_queue.lock. Split off
590 * this special lock-class by reinitializing the spinlock key:
592 static struct lock_class_key af_unix_sk_receive_queue_lock_key
;
594 static struct sock
*unix_create1(struct net
*net
, struct socket
*sock
)
596 struct sock
*sk
= NULL
;
599 atomic_inc(&unix_nr_socks
);
600 if (atomic_read(&unix_nr_socks
) > 2 * get_max_files())
603 sk
= sk_alloc(net
, PF_UNIX
, GFP_KERNEL
, &unix_proto
);
607 sock_init_data(sock
, sk
);
608 lockdep_set_class(&sk
->sk_receive_queue
.lock
,
609 &af_unix_sk_receive_queue_lock_key
);
611 sk
->sk_write_space
= unix_write_space
;
612 sk
->sk_max_ack_backlog
= net
->unx
.sysctl_max_dgram_qlen
;
613 sk
->sk_destruct
= unix_sock_destructor
;
617 spin_lock_init(&u
->lock
);
618 atomic_long_set(&u
->inflight
, 0);
619 INIT_LIST_HEAD(&u
->link
);
620 mutex_init(&u
->readlock
); /* single task reading lock */
621 init_waitqueue_head(&u
->peer_wait
);
622 unix_insert_socket(unix_sockets_unbound
, sk
);
625 atomic_dec(&unix_nr_socks
);
628 sock_prot_inuse_add(sock_net(sk
), sk
->sk_prot
, 1);
634 static int unix_create(struct net
*net
, struct socket
*sock
, int protocol
,
637 if (protocol
&& protocol
!= PF_UNIX
)
638 return -EPROTONOSUPPORT
;
640 sock
->state
= SS_UNCONNECTED
;
642 switch (sock
->type
) {
644 sock
->ops
= &unix_stream_ops
;
647 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
651 sock
->type
= SOCK_DGRAM
;
653 sock
->ops
= &unix_dgram_ops
;
656 sock
->ops
= &unix_seqpacket_ops
;
659 return -ESOCKTNOSUPPORT
;
662 return unix_create1(net
, sock
) ? 0 : -ENOMEM
;
665 static int unix_release(struct socket
*sock
)
667 struct sock
*sk
= sock
->sk
;
674 return unix_release_sock(sk
, 0);
677 static int unix_autobind(struct socket
*sock
)
679 struct sock
*sk
= sock
->sk
;
680 struct net
*net
= sock_net(sk
);
681 struct unix_sock
*u
= unix_sk(sk
);
682 static u32 ordernum
= 1;
683 struct unix_address
*addr
;
685 unsigned int retries
= 0;
687 mutex_lock(&u
->readlock
);
694 addr
= kzalloc(sizeof(*addr
) + sizeof(short) + 16, GFP_KERNEL
);
698 addr
->name
->sun_family
= AF_UNIX
;
699 atomic_set(&addr
->refcnt
, 1);
702 addr
->len
= sprintf(addr
->name
->sun_path
+1, "%05x", ordernum
) + 1 + sizeof(short);
703 addr
->hash
= unix_hash_fold(csum_partial(addr
->name
, addr
->len
, 0));
705 spin_lock(&unix_table_lock
);
706 ordernum
= (ordernum
+1)&0xFFFFF;
708 if (__unix_find_socket_byname(net
, addr
->name
, addr
->len
, sock
->type
,
710 spin_unlock(&unix_table_lock
);
712 * __unix_find_socket_byname() may take long time if many names
713 * are already in use.
716 /* Give up if all names seems to be in use. */
717 if (retries
++ == 0xFFFFF) {
724 addr
->hash
^= sk
->sk_type
;
726 __unix_remove_socket(sk
);
728 __unix_insert_socket(&unix_socket_table
[addr
->hash
], sk
);
729 spin_unlock(&unix_table_lock
);
732 out
: mutex_unlock(&u
->readlock
);
736 static struct sock
*unix_find_other(struct net
*net
,
737 struct sockaddr_un
*sunname
, int len
,
738 int type
, unsigned hash
, int *error
)
744 if (sunname
->sun_path
[0]) {
746 err
= kern_path(sunname
->sun_path
, LOOKUP_FOLLOW
, &path
);
749 inode
= path
.dentry
->d_inode
;
750 err
= inode_permission(inode
, MAY_WRITE
);
755 if (!S_ISSOCK(inode
->i_mode
))
757 u
= unix_find_socket_byinode(inode
);
761 if (u
->sk_type
== type
)
762 touch_atime(path
.mnt
, path
.dentry
);
767 if (u
->sk_type
!= type
) {
773 u
= unix_find_socket_byname(net
, sunname
, len
, type
, hash
);
775 struct dentry
*dentry
;
776 dentry
= unix_sk(u
)->dentry
;
778 touch_atime(unix_sk(u
)->mnt
, dentry
);
792 static int unix_bind(struct socket
*sock
, struct sockaddr
*uaddr
, int addr_len
)
794 struct sock
*sk
= sock
->sk
;
795 struct net
*net
= sock_net(sk
);
796 struct unix_sock
*u
= unix_sk(sk
);
797 struct sockaddr_un
*sunaddr
= (struct sockaddr_un
*)uaddr
;
798 struct dentry
*dentry
= NULL
;
802 struct unix_address
*addr
;
803 struct hlist_head
*list
;
806 if (sunaddr
->sun_family
!= AF_UNIX
)
809 if (addr_len
== sizeof(short)) {
810 err
= unix_autobind(sock
);
814 err
= unix_mkname(sunaddr
, addr_len
, &hash
);
819 mutex_lock(&u
->readlock
);
826 addr
= kmalloc(sizeof(*addr
)+addr_len
, GFP_KERNEL
);
830 memcpy(addr
->name
, sunaddr
, addr_len
);
831 addr
->len
= addr_len
;
832 addr
->hash
= hash
^ sk
->sk_type
;
833 atomic_set(&addr
->refcnt
, 1);
835 if (sunaddr
->sun_path
[0]) {
839 * Get the parent directory, calculate the hash for last
842 err
= path_lookup(sunaddr
->sun_path
, LOOKUP_PARENT
, &nd
);
844 goto out_mknod_parent
;
846 dentry
= lookup_create(&nd
, 0);
847 err
= PTR_ERR(dentry
);
849 goto out_mknod_unlock
;
852 * All right, let's create it.
855 (SOCK_INODE(sock
)->i_mode
& ~current_umask());
856 err
= mnt_want_write(nd
.path
.mnt
);
859 err
= security_path_mknod(&nd
.path
, dentry
, mode
, 0);
861 goto out_mknod_drop_write
;
862 err
= vfs_mknod(nd
.path
.dentry
->d_inode
, dentry
, mode
, 0);
863 out_mknod_drop_write
:
864 mnt_drop_write(nd
.path
.mnt
);
867 mutex_unlock(&nd
.path
.dentry
->d_inode
->i_mutex
);
868 dput(nd
.path
.dentry
);
869 nd
.path
.dentry
= dentry
;
871 addr
->hash
= UNIX_HASH_SIZE
;
874 spin_lock(&unix_table_lock
);
876 if (!sunaddr
->sun_path
[0]) {
878 if (__unix_find_socket_byname(net
, sunaddr
, addr_len
,
879 sk
->sk_type
, hash
)) {
880 unix_release_addr(addr
);
884 list
= &unix_socket_table
[addr
->hash
];
886 list
= &unix_socket_table
[dentry
->d_inode
->i_ino
& (UNIX_HASH_SIZE
-1)];
887 u
->dentry
= nd
.path
.dentry
;
888 u
->mnt
= nd
.path
.mnt
;
892 __unix_remove_socket(sk
);
894 __unix_insert_socket(list
, sk
);
897 spin_unlock(&unix_table_lock
);
899 mutex_unlock(&u
->readlock
);
906 mutex_unlock(&nd
.path
.dentry
->d_inode
->i_mutex
);
911 unix_release_addr(addr
);
915 static void unix_state_double_lock(struct sock
*sk1
, struct sock
*sk2
)
917 if (unlikely(sk1
== sk2
) || !sk2
) {
918 unix_state_lock(sk1
);
922 unix_state_lock(sk1
);
923 unix_state_lock_nested(sk2
);
925 unix_state_lock(sk2
);
926 unix_state_lock_nested(sk1
);
930 static void unix_state_double_unlock(struct sock
*sk1
, struct sock
*sk2
)
932 if (unlikely(sk1
== sk2
) || !sk2
) {
933 unix_state_unlock(sk1
);
936 unix_state_unlock(sk1
);
937 unix_state_unlock(sk2
);
940 static int unix_dgram_connect(struct socket
*sock
, struct sockaddr
*addr
,
943 struct sock
*sk
= sock
->sk
;
944 struct net
*net
= sock_net(sk
);
945 struct sockaddr_un
*sunaddr
= (struct sockaddr_un
*)addr
;
950 if (addr
->sa_family
!= AF_UNSPEC
) {
951 err
= unix_mkname(sunaddr
, alen
, &hash
);
956 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) &&
957 !unix_sk(sk
)->addr
&& (err
= unix_autobind(sock
)) != 0)
961 other
= unix_find_other(net
, sunaddr
, alen
, sock
->type
, hash
, &err
);
965 unix_state_double_lock(sk
, other
);
967 /* Apparently VFS overslept socket death. Retry. */
968 if (sock_flag(other
, SOCK_DEAD
)) {
969 unix_state_double_unlock(sk
, other
);
975 if (!unix_may_send(sk
, other
))
978 err
= security_unix_may_send(sk
->sk_socket
, other
->sk_socket
);
984 * 1003.1g breaking connected state with AF_UNSPEC
987 unix_state_double_lock(sk
, other
);
991 * If it was connected, reconnect.
994 struct sock
*old_peer
= unix_peer(sk
);
995 unix_peer(sk
) = other
;
996 unix_state_double_unlock(sk
, other
);
998 if (other
!= old_peer
)
999 unix_dgram_disconnected(sk
, old_peer
);
1002 unix_peer(sk
) = other
;
1003 unix_state_double_unlock(sk
, other
);
1008 unix_state_double_unlock(sk
, other
);
1014 static long unix_wait_for_peer(struct sock
*other
, long timeo
)
1016 struct unix_sock
*u
= unix_sk(other
);
1020 prepare_to_wait_exclusive(&u
->peer_wait
, &wait
, TASK_INTERRUPTIBLE
);
1022 sched
= !sock_flag(other
, SOCK_DEAD
) &&
1023 !(other
->sk_shutdown
& RCV_SHUTDOWN
) &&
1024 unix_recvq_full(other
);
1026 unix_state_unlock(other
);
1029 timeo
= schedule_timeout(timeo
);
1031 finish_wait(&u
->peer_wait
, &wait
);
1035 static int unix_stream_connect(struct socket
*sock
, struct sockaddr
*uaddr
,
1036 int addr_len
, int flags
)
1038 struct sockaddr_un
*sunaddr
= (struct sockaddr_un
*)uaddr
;
1039 struct sock
*sk
= sock
->sk
;
1040 struct net
*net
= sock_net(sk
);
1041 struct unix_sock
*u
= unix_sk(sk
), *newu
, *otheru
;
1042 struct sock
*newsk
= NULL
;
1043 struct sock
*other
= NULL
;
1044 struct sk_buff
*skb
= NULL
;
1050 err
= unix_mkname(sunaddr
, addr_len
, &hash
);
1055 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) && !u
->addr
&&
1056 (err
= unix_autobind(sock
)) != 0)
1059 timeo
= sock_sndtimeo(sk
, flags
& O_NONBLOCK
);
1061 /* First of all allocate resources.
1062 If we will make it after state is locked,
1063 we will have to recheck all again in any case.
1068 /* create new sock for complete connection */
1069 newsk
= unix_create1(sock_net(sk
), NULL
);
1073 /* Allocate skb for sending to listening sock */
1074 skb
= sock_wmalloc(newsk
, 1, 0, GFP_KERNEL
);
1079 /* Find listening sock. */
1080 other
= unix_find_other(net
, sunaddr
, addr_len
, sk
->sk_type
, hash
, &err
);
1084 /* Latch state of peer */
1085 unix_state_lock(other
);
1087 /* Apparently VFS overslept socket death. Retry. */
1088 if (sock_flag(other
, SOCK_DEAD
)) {
1089 unix_state_unlock(other
);
1094 err
= -ECONNREFUSED
;
1095 if (other
->sk_state
!= TCP_LISTEN
)
1097 if (other
->sk_shutdown
& RCV_SHUTDOWN
)
1100 if (unix_recvq_full(other
)) {
1105 timeo
= unix_wait_for_peer(other
, timeo
);
1107 err
= sock_intr_errno(timeo
);
1108 if (signal_pending(current
))
1116 It is tricky place. We need to grab write lock and cannot
1117 drop lock on peer. It is dangerous because deadlock is
1118 possible. Connect to self case and simultaneous
1119 attempt to connect are eliminated by checking socket
1120 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1121 check this before attempt to grab lock.
1123 Well, and we have to recheck the state after socket locked.
1129 /* This is ok... continue with connect */
1131 case TCP_ESTABLISHED
:
1132 /* Socket is already connected */
1140 unix_state_lock_nested(sk
);
1142 if (sk
->sk_state
!= st
) {
1143 unix_state_unlock(sk
);
1144 unix_state_unlock(other
);
1149 err
= security_unix_stream_connect(sock
, other
->sk_socket
, newsk
);
1151 unix_state_unlock(sk
);
1155 /* The way is open! Fastly set all the necessary fields... */
1158 unix_peer(newsk
) = sk
;
1159 newsk
->sk_state
= TCP_ESTABLISHED
;
1160 newsk
->sk_type
= sk
->sk_type
;
1161 init_peercred(newsk
);
1162 newu
= unix_sk(newsk
);
1163 newsk
->sk_wq
= &newu
->peer_wq
;
1164 otheru
= unix_sk(other
);
1166 /* copy address information from listening to new sock*/
1168 atomic_inc(&otheru
->addr
->refcnt
);
1169 newu
->addr
= otheru
->addr
;
1171 if (otheru
->dentry
) {
1172 newu
->dentry
= dget(otheru
->dentry
);
1173 newu
->mnt
= mntget(otheru
->mnt
);
1176 /* Set credentials */
1177 copy_peercred(sk
, other
);
1179 sock
->state
= SS_CONNECTED
;
1180 sk
->sk_state
= TCP_ESTABLISHED
;
1183 smp_mb__after_atomic_inc(); /* sock_hold() does an atomic_inc() */
1184 unix_peer(sk
) = newsk
;
1186 unix_state_unlock(sk
);
1188 /* take ten and and send info to listening sock */
1189 spin_lock(&other
->sk_receive_queue
.lock
);
1190 __skb_queue_tail(&other
->sk_receive_queue
, skb
);
1191 spin_unlock(&other
->sk_receive_queue
.lock
);
1192 unix_state_unlock(other
);
1193 other
->sk_data_ready(other
, 0);
1199 unix_state_unlock(other
);
1204 unix_release_sock(newsk
, 0);
1210 static int unix_socketpair(struct socket
*socka
, struct socket
*sockb
)
1212 struct sock
*ska
= socka
->sk
, *skb
= sockb
->sk
;
1214 /* Join our sockets back to back */
1217 unix_peer(ska
) = skb
;
1218 unix_peer(skb
) = ska
;
1222 if (ska
->sk_type
!= SOCK_DGRAM
) {
1223 ska
->sk_state
= TCP_ESTABLISHED
;
1224 skb
->sk_state
= TCP_ESTABLISHED
;
1225 socka
->state
= SS_CONNECTED
;
1226 sockb
->state
= SS_CONNECTED
;
1231 static int unix_accept(struct socket
*sock
, struct socket
*newsock
, int flags
)
1233 struct sock
*sk
= sock
->sk
;
1235 struct sk_buff
*skb
;
1239 if (sock
->type
!= SOCK_STREAM
&& sock
->type
!= SOCK_SEQPACKET
)
1243 if (sk
->sk_state
!= TCP_LISTEN
)
1246 /* If socket state is TCP_LISTEN it cannot change (for now...),
1247 * so that no locks are necessary.
1250 skb
= skb_recv_datagram(sk
, 0, flags
&O_NONBLOCK
, &err
);
1252 /* This means receive shutdown. */
1259 skb_free_datagram(sk
, skb
);
1260 wake_up_interruptible(&unix_sk(sk
)->peer_wait
);
1262 /* attach accepted sock to socket */
1263 unix_state_lock(tsk
);
1264 newsock
->state
= SS_CONNECTED
;
1265 sock_graft(tsk
, newsock
);
1266 unix_state_unlock(tsk
);
1274 static int unix_getname(struct socket
*sock
, struct sockaddr
*uaddr
, int *uaddr_len
, int peer
)
1276 struct sock
*sk
= sock
->sk
;
1277 struct unix_sock
*u
;
1278 DECLARE_SOCKADDR(struct sockaddr_un
*, sunaddr
, uaddr
);
1282 sk
= unix_peer_get(sk
);
1293 unix_state_lock(sk
);
1295 sunaddr
->sun_family
= AF_UNIX
;
1296 sunaddr
->sun_path
[0] = 0;
1297 *uaddr_len
= sizeof(short);
1299 struct unix_address
*addr
= u
->addr
;
1301 *uaddr_len
= addr
->len
;
1302 memcpy(sunaddr
, addr
->name
, *uaddr_len
);
1304 unix_state_unlock(sk
);
1310 static void unix_detach_fds(struct scm_cookie
*scm
, struct sk_buff
*skb
)
1314 scm
->fp
= UNIXCB(skb
).fp
;
1315 UNIXCB(skb
).fp
= NULL
;
1317 for (i
= scm
->fp
->count
-1; i
>= 0; i
--)
1318 unix_notinflight(scm
->fp
->fp
[i
]);
1321 static void unix_destruct_scm(struct sk_buff
*skb
)
1323 struct scm_cookie scm
;
1324 memset(&scm
, 0, sizeof(scm
));
1325 scm
.pid
= UNIXCB(skb
).pid
;
1326 scm
.cred
= UNIXCB(skb
).cred
;
1328 unix_detach_fds(&scm
, skb
);
1330 /* Alas, it calls VFS */
1331 /* So fscking what? fput() had been SMP-safe since the last Summer */
1336 #define MAX_RECURSION_LEVEL 4
1338 static int unix_attach_fds(struct scm_cookie
*scm
, struct sk_buff
*skb
)
1341 unsigned char max_level
= 0;
1342 int unix_sock_count
= 0;
1344 for (i
= scm
->fp
->count
- 1; i
>= 0; i
--) {
1345 struct sock
*sk
= unix_get_socket(scm
->fp
->fp
[i
]);
1349 max_level
= max(max_level
,
1350 unix_sk(sk
)->recursion_level
);
1353 if (unlikely(max_level
> MAX_RECURSION_LEVEL
))
1354 return -ETOOMANYREFS
;
1357 * Need to duplicate file references for the sake of garbage
1358 * collection. Otherwise a socket in the fps might become a
1359 * candidate for GC while the skb is not yet queued.
1361 UNIXCB(skb
).fp
= scm_fp_dup(scm
->fp
);
1362 if (!UNIXCB(skb
).fp
)
1365 if (unix_sock_count
) {
1366 for (i
= scm
->fp
->count
- 1; i
>= 0; i
--)
1367 unix_inflight(scm
->fp
->fp
[i
]);
1372 static int unix_scm_to_skb(struct scm_cookie
*scm
, struct sk_buff
*skb
, bool send_fds
)
1375 UNIXCB(skb
).pid
= get_pid(scm
->pid
);
1376 UNIXCB(skb
).cred
= get_cred(scm
->cred
);
1377 UNIXCB(skb
).fp
= NULL
;
1378 if (scm
->fp
&& send_fds
)
1379 err
= unix_attach_fds(scm
, skb
);
1381 skb
->destructor
= unix_destruct_scm
;
1386 * Send AF_UNIX data.
1389 static int unix_dgram_sendmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1390 struct msghdr
*msg
, size_t len
)
1392 struct sock_iocb
*siocb
= kiocb_to_siocb(kiocb
);
1393 struct sock
*sk
= sock
->sk
;
1394 struct net
*net
= sock_net(sk
);
1395 struct unix_sock
*u
= unix_sk(sk
);
1396 struct sockaddr_un
*sunaddr
= msg
->msg_name
;
1397 struct sock
*other
= NULL
;
1398 int namelen
= 0; /* fake GCC */
1401 struct sk_buff
*skb
;
1403 struct scm_cookie tmp_scm
;
1406 if (NULL
== siocb
->scm
)
1407 siocb
->scm
= &tmp_scm
;
1409 err
= scm_send(sock
, msg
, siocb
->scm
);
1414 if (msg
->msg_flags
&MSG_OOB
)
1417 if (msg
->msg_namelen
) {
1418 err
= unix_mkname(sunaddr
, msg
->msg_namelen
, &hash
);
1425 other
= unix_peer_get(sk
);
1430 if (test_bit(SOCK_PASSCRED
, &sock
->flags
) && !u
->addr
1431 && (err
= unix_autobind(sock
)) != 0)
1435 if (len
> sk
->sk_sndbuf
- 32)
1438 skb
= sock_alloc_send_skb(sk
, len
, msg
->msg_flags
&MSG_DONTWAIT
, &err
);
1442 err
= unix_scm_to_skb(siocb
->scm
, skb
, true);
1445 max_level
= err
+ 1;
1446 unix_get_secdata(siocb
->scm
, skb
);
1448 skb_reset_transport_header(skb
);
1449 err
= memcpy_fromiovec(skb_put(skb
, len
), msg
->msg_iov
, len
);
1453 timeo
= sock_sndtimeo(sk
, msg
->msg_flags
& MSG_DONTWAIT
);
1458 if (sunaddr
== NULL
)
1461 other
= unix_find_other(net
, sunaddr
, namelen
, sk
->sk_type
,
1467 unix_state_lock(other
);
1469 if (!unix_may_send(sk
, other
))
1472 if (sock_flag(other
, SOCK_DEAD
)) {
1474 * Check with 1003.1g - what should
1477 unix_state_unlock(other
);
1481 unix_state_lock(sk
);
1482 if (unix_peer(sk
) == other
) {
1483 unix_peer(sk
) = NULL
;
1484 unix_state_unlock(sk
);
1486 unix_dgram_disconnected(sk
, other
);
1488 err
= -ECONNREFUSED
;
1490 unix_state_unlock(sk
);
1500 if (other
->sk_shutdown
& RCV_SHUTDOWN
)
1503 if (sk
->sk_type
!= SOCK_SEQPACKET
) {
1504 err
= security_unix_may_send(sk
->sk_socket
, other
->sk_socket
);
1509 if (unix_peer(other
) != sk
&& unix_recvq_full(other
)) {
1515 timeo
= unix_wait_for_peer(other
, timeo
);
1517 err
= sock_intr_errno(timeo
);
1518 if (signal_pending(current
))
1524 skb_queue_tail(&other
->sk_receive_queue
, skb
);
1525 if (max_level
> unix_sk(other
)->recursion_level
)
1526 unix_sk(other
)->recursion_level
= max_level
;
1527 unix_state_unlock(other
);
1528 other
->sk_data_ready(other
, len
);
1530 scm_destroy(siocb
->scm
);
1534 unix_state_unlock(other
);
1540 scm_destroy(siocb
->scm
);
1545 static int unix_stream_sendmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1546 struct msghdr
*msg
, size_t len
)
1548 struct sock_iocb
*siocb
= kiocb_to_siocb(kiocb
);
1549 struct sock
*sk
= sock
->sk
;
1550 struct sock
*other
= NULL
;
1551 struct sockaddr_un
*sunaddr
= msg
->msg_name
;
1553 struct sk_buff
*skb
;
1555 struct scm_cookie tmp_scm
;
1556 bool fds_sent
= false;
1559 if (NULL
== siocb
->scm
)
1560 siocb
->scm
= &tmp_scm
;
1562 err
= scm_send(sock
, msg
, siocb
->scm
);
1567 if (msg
->msg_flags
&MSG_OOB
)
1570 if (msg
->msg_namelen
) {
1571 err
= sk
->sk_state
== TCP_ESTABLISHED
? -EISCONN
: -EOPNOTSUPP
;
1576 other
= unix_peer(sk
);
1581 if (sk
->sk_shutdown
& SEND_SHUTDOWN
)
1584 while (sent
< len
) {
1586 * Optimisation for the fact that under 0.01% of X
1587 * messages typically need breaking up.
1592 /* Keep two messages in the pipe so it schedules better */
1593 if (size
> ((sk
->sk_sndbuf
>> 1) - 64))
1594 size
= (sk
->sk_sndbuf
>> 1) - 64;
1596 if (size
> SKB_MAX_ALLOC
)
1597 size
= SKB_MAX_ALLOC
;
1603 skb
= sock_alloc_send_skb(sk
, size
, msg
->msg_flags
&MSG_DONTWAIT
,
1610 * If you pass two values to the sock_alloc_send_skb
1611 * it tries to grab the large buffer with GFP_NOFS
1612 * (which can fail easily), and if it fails grab the
1613 * fallback size buffer which is under a page and will
1616 size
= min_t(int, size
, skb_tailroom(skb
));
1619 /* Only send the fds in the first buffer */
1620 err
= unix_scm_to_skb(siocb
->scm
, skb
, !fds_sent
);
1625 max_level
= err
+ 1;
1628 err
= memcpy_fromiovec(skb_put(skb
, size
), msg
->msg_iov
, size
);
1634 unix_state_lock(other
);
1636 if (sock_flag(other
, SOCK_DEAD
) ||
1637 (other
->sk_shutdown
& RCV_SHUTDOWN
))
1640 skb_queue_tail(&other
->sk_receive_queue
, skb
);
1641 if (max_level
> unix_sk(other
)->recursion_level
)
1642 unix_sk(other
)->recursion_level
= max_level
;
1643 unix_state_unlock(other
);
1644 other
->sk_data_ready(other
, size
);
1648 scm_destroy(siocb
->scm
);
1654 unix_state_unlock(other
);
1657 if (sent
== 0 && !(msg
->msg_flags
&MSG_NOSIGNAL
))
1658 send_sig(SIGPIPE
, current
, 0);
1661 scm_destroy(siocb
->scm
);
1663 return sent
? : err
;
1666 static int unix_seqpacket_sendmsg(struct kiocb
*kiocb
, struct socket
*sock
,
1667 struct msghdr
*msg
, size_t len
)
1670 struct sock
*sk
= sock
->sk
;
1672 err
= sock_error(sk
);
1676 if (sk
->sk_state
!= TCP_ESTABLISHED
)
1679 if (msg
->msg_namelen
)
1680 msg
->msg_namelen
= 0;
1682 return unix_dgram_sendmsg(kiocb
, sock
, msg
, len
);
1685 static void unix_copy_addr(struct msghdr
*msg
, struct sock
*sk
)
1687 struct unix_sock
*u
= unix_sk(sk
);
1689 msg
->msg_namelen
= 0;
1691 msg
->msg_namelen
= u
->addr
->len
;
1692 memcpy(msg
->msg_name
, u
->addr
->name
, u
->addr
->len
);
1696 static int unix_dgram_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
1697 struct msghdr
*msg
, size_t size
,
1700 struct sock_iocb
*siocb
= kiocb_to_siocb(iocb
);
1701 struct scm_cookie tmp_scm
;
1702 struct sock
*sk
= sock
->sk
;
1703 struct unix_sock
*u
= unix_sk(sk
);
1704 int noblock
= flags
& MSG_DONTWAIT
;
1705 struct sk_buff
*skb
;
1712 msg
->msg_namelen
= 0;
1714 mutex_lock(&u
->readlock
);
1716 skb
= skb_recv_datagram(sk
, flags
, noblock
, &err
);
1718 unix_state_lock(sk
);
1719 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1720 if (sk
->sk_type
== SOCK_SEQPACKET
&& err
== -EAGAIN
&&
1721 (sk
->sk_shutdown
& RCV_SHUTDOWN
))
1723 unix_state_unlock(sk
);
1727 wake_up_interruptible_sync(&u
->peer_wait
);
1730 unix_copy_addr(msg
, skb
->sk
);
1732 if (size
> skb
->len
)
1734 else if (size
< skb
->len
)
1735 msg
->msg_flags
|= MSG_TRUNC
;
1737 err
= skb_copy_datagram_iovec(skb
, 0, msg
->msg_iov
, size
);
1742 siocb
->scm
= &tmp_scm
;
1743 memset(&tmp_scm
, 0, sizeof(tmp_scm
));
1745 scm_set_cred(siocb
->scm
, UNIXCB(skb
).pid
, UNIXCB(skb
).cred
);
1746 unix_set_secdata(siocb
->scm
, skb
);
1748 if (!(flags
& MSG_PEEK
)) {
1750 unix_detach_fds(siocb
->scm
, skb
);
1752 /* It is questionable: on PEEK we could:
1753 - do not return fds - good, but too simple 8)
1754 - return fds, and do not return them on read (old strategy,
1756 - clone fds (I chose it for now, it is the most universal
1759 POSIX 1003.1g does not actually define this clearly
1760 at all. POSIX 1003.1g doesn't define a lot of things
1765 siocb
->scm
->fp
= scm_fp_dup(UNIXCB(skb
).fp
);
1769 scm_recv(sock
, msg
, siocb
->scm
, flags
);
1772 skb_free_datagram(sk
, skb
);
1774 mutex_unlock(&u
->readlock
);
1780 * Sleep until data has arrive. But check for races..
1783 static long unix_stream_data_wait(struct sock
*sk
, long timeo
)
1787 unix_state_lock(sk
);
1790 prepare_to_wait(sk_sleep(sk
), &wait
, TASK_INTERRUPTIBLE
);
1792 if (!skb_queue_empty(&sk
->sk_receive_queue
) ||
1794 (sk
->sk_shutdown
& RCV_SHUTDOWN
) ||
1795 signal_pending(current
) ||
1799 set_bit(SOCK_ASYNC_WAITDATA
, &sk
->sk_socket
->flags
);
1800 unix_state_unlock(sk
);
1801 timeo
= schedule_timeout(timeo
);
1802 unix_state_lock(sk
);
1803 clear_bit(SOCK_ASYNC_WAITDATA
, &sk
->sk_socket
->flags
);
1806 finish_wait(sk_sleep(sk
), &wait
);
1807 unix_state_unlock(sk
);
1813 static int unix_stream_recvmsg(struct kiocb
*iocb
, struct socket
*sock
,
1814 struct msghdr
*msg
, size_t size
,
1817 struct sock_iocb
*siocb
= kiocb_to_siocb(iocb
);
1818 struct scm_cookie tmp_scm
;
1819 struct sock
*sk
= sock
->sk
;
1820 struct unix_sock
*u
= unix_sk(sk
);
1821 struct sockaddr_un
*sunaddr
= msg
->msg_name
;
1823 int check_creds
= 0;
1829 if (sk
->sk_state
!= TCP_ESTABLISHED
)
1836 target
= sock_rcvlowat(sk
, flags
&MSG_WAITALL
, size
);
1837 timeo
= sock_rcvtimeo(sk
, flags
&MSG_DONTWAIT
);
1839 msg
->msg_namelen
= 0;
1841 /* Lock the socket to prevent queue disordering
1842 * while sleeps in memcpy_tomsg
1846 siocb
->scm
= &tmp_scm
;
1847 memset(&tmp_scm
, 0, sizeof(tmp_scm
));
1850 mutex_lock(&u
->readlock
);
1854 struct sk_buff
*skb
;
1856 unix_state_lock(sk
);
1857 skb
= skb_dequeue(&sk
->sk_receive_queue
);
1859 unix_sk(sk
)->recursion_level
= 0;
1860 if (copied
>= target
)
1864 * POSIX 1003.1g mandates this order.
1867 err
= sock_error(sk
);
1870 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
1873 unix_state_unlock(sk
);
1877 mutex_unlock(&u
->readlock
);
1879 timeo
= unix_stream_data_wait(sk
, timeo
);
1881 if (signal_pending(current
)) {
1882 err
= sock_intr_errno(timeo
);
1885 mutex_lock(&u
->readlock
);
1888 unix_state_unlock(sk
);
1891 unix_state_unlock(sk
);
1894 /* Never glue messages from different writers */
1895 if ((UNIXCB(skb
).pid
!= siocb
->scm
->pid
) ||
1896 (UNIXCB(skb
).cred
!= siocb
->scm
->cred
)) {
1897 skb_queue_head(&sk
->sk_receive_queue
, skb
);
1901 /* Copy credentials */
1902 scm_set_cred(siocb
->scm
, UNIXCB(skb
).pid
, UNIXCB(skb
).cred
);
1906 /* Copy address just once */
1908 unix_copy_addr(msg
, skb
->sk
);
1912 chunk
= min_t(unsigned int, skb
->len
, size
);
1913 if (memcpy_toiovec(msg
->msg_iov
, skb
->data
, chunk
)) {
1914 skb_queue_head(&sk
->sk_receive_queue
, skb
);
1922 /* Mark read part of skb as used */
1923 if (!(flags
& MSG_PEEK
)) {
1924 skb_pull(skb
, chunk
);
1927 unix_detach_fds(siocb
->scm
, skb
);
1929 /* put the skb back if we didn't use it up.. */
1931 skb_queue_head(&sk
->sk_receive_queue
, skb
);
1940 /* It is questionable, see note in unix_dgram_recvmsg.
1943 siocb
->scm
->fp
= scm_fp_dup(UNIXCB(skb
).fp
);
1945 /* put message back and return */
1946 skb_queue_head(&sk
->sk_receive_queue
, skb
);
1951 mutex_unlock(&u
->readlock
);
1952 scm_recv(sock
, msg
, siocb
->scm
, flags
);
1954 return copied
? : err
;
1957 static int unix_shutdown(struct socket
*sock
, int mode
)
1959 struct sock
*sk
= sock
->sk
;
1962 mode
= (mode
+1)&(RCV_SHUTDOWN
|SEND_SHUTDOWN
);
1965 unix_state_lock(sk
);
1966 sk
->sk_shutdown
|= mode
;
1967 other
= unix_peer(sk
);
1970 unix_state_unlock(sk
);
1971 sk
->sk_state_change(sk
);
1974 (sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
)) {
1978 if (mode
&RCV_SHUTDOWN
)
1979 peer_mode
|= SEND_SHUTDOWN
;
1980 if (mode
&SEND_SHUTDOWN
)
1981 peer_mode
|= RCV_SHUTDOWN
;
1982 unix_state_lock(other
);
1983 other
->sk_shutdown
|= peer_mode
;
1984 unix_state_unlock(other
);
1985 other
->sk_state_change(other
);
1986 if (peer_mode
== SHUTDOWN_MASK
)
1987 sk_wake_async(other
, SOCK_WAKE_WAITD
, POLL_HUP
);
1988 else if (peer_mode
& RCV_SHUTDOWN
)
1989 sk_wake_async(other
, SOCK_WAKE_WAITD
, POLL_IN
);
1997 static int unix_ioctl(struct socket
*sock
, unsigned int cmd
, unsigned long arg
)
1999 struct sock
*sk
= sock
->sk
;
2005 amount
= sk_wmem_alloc_get(sk
);
2006 err
= put_user(amount
, (int __user
*)arg
);
2010 struct sk_buff
*skb
;
2012 if (sk
->sk_state
== TCP_LISTEN
) {
2017 spin_lock(&sk
->sk_receive_queue
.lock
);
2018 if (sk
->sk_type
== SOCK_STREAM
||
2019 sk
->sk_type
== SOCK_SEQPACKET
) {
2020 skb_queue_walk(&sk
->sk_receive_queue
, skb
)
2023 skb
= skb_peek(&sk
->sk_receive_queue
);
2027 spin_unlock(&sk
->sk_receive_queue
.lock
);
2028 err
= put_user(amount
, (int __user
*)arg
);
2039 static unsigned int unix_poll(struct file
*file
, struct socket
*sock
, poll_table
*wait
)
2041 struct sock
*sk
= sock
->sk
;
2044 sock_poll_wait(file
, sk_sleep(sk
), wait
);
2047 /* exceptional events? */
2050 if (sk
->sk_shutdown
== SHUTDOWN_MASK
)
2052 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
2056 if (!skb_queue_empty(&sk
->sk_receive_queue
) ||
2057 (sk
->sk_shutdown
& RCV_SHUTDOWN
))
2058 mask
|= POLLIN
| POLLRDNORM
;
2060 /* Connection-based need to check for termination and startup */
2061 if ((sk
->sk_type
== SOCK_STREAM
|| sk
->sk_type
== SOCK_SEQPACKET
) &&
2062 sk
->sk_state
== TCP_CLOSE
)
2066 * we set writable also when the other side has shut down the
2067 * connection. This prevents stuck sockets.
2069 if (unix_writable(sk
))
2070 mask
|= POLLOUT
| POLLWRNORM
| POLLWRBAND
;
2075 static unsigned int unix_dgram_poll(struct file
*file
, struct socket
*sock
,
2078 struct sock
*sk
= sock
->sk
, *other
;
2079 unsigned int mask
, writable
;
2081 sock_poll_wait(file
, sk_sleep(sk
), wait
);
2084 /* exceptional events? */
2085 if (sk
->sk_err
|| !skb_queue_empty(&sk
->sk_error_queue
))
2087 if (sk
->sk_shutdown
& RCV_SHUTDOWN
)
2089 if (sk
->sk_shutdown
== SHUTDOWN_MASK
)
2093 if (!skb_queue_empty(&sk
->sk_receive_queue
) ||
2094 (sk
->sk_shutdown
& RCV_SHUTDOWN
))
2095 mask
|= POLLIN
| POLLRDNORM
;
2097 /* Connection-based need to check for termination and startup */
2098 if (sk
->sk_type
== SOCK_SEQPACKET
) {
2099 if (sk
->sk_state
== TCP_CLOSE
)
2101 /* connection hasn't started yet? */
2102 if (sk
->sk_state
== TCP_SYN_SENT
)
2107 writable
= unix_writable(sk
);
2109 other
= unix_peer_get(sk
);
2111 if (unix_peer(other
) != sk
) {
2112 sock_poll_wait(file
, &unix_sk(other
)->peer_wait
,
2114 if (unix_recvq_full(other
))
2123 mask
|= POLLOUT
| POLLWRNORM
| POLLWRBAND
;
2125 set_bit(SOCK_ASYNC_NOSPACE
, &sk
->sk_socket
->flags
);
2130 #ifdef CONFIG_PROC_FS
2131 static struct sock
*first_unix_socket(int *i
)
2133 for (*i
= 0; *i
<= UNIX_HASH_SIZE
; (*i
)++) {
2134 if (!hlist_empty(&unix_socket_table
[*i
]))
2135 return __sk_head(&unix_socket_table
[*i
]);
2140 static struct sock
*next_unix_socket(int *i
, struct sock
*s
)
2142 struct sock
*next
= sk_next(s
);
2143 /* More in this chain? */
2146 /* Look for next non-empty chain. */
2147 for ((*i
)++; *i
<= UNIX_HASH_SIZE
; (*i
)++) {
2148 if (!hlist_empty(&unix_socket_table
[*i
]))
2149 return __sk_head(&unix_socket_table
[*i
]);
2154 struct unix_iter_state
{
2155 struct seq_net_private p
;
2159 static struct sock
*unix_seq_idx(struct seq_file
*seq
, loff_t pos
)
2161 struct unix_iter_state
*iter
= seq
->private;
2165 for (s
= first_unix_socket(&iter
->i
); s
; s
= next_unix_socket(&iter
->i
, s
)) {
2166 if (sock_net(s
) != seq_file_net(seq
))
2175 static void *unix_seq_start(struct seq_file
*seq
, loff_t
*pos
)
2176 __acquires(unix_table_lock
)
2178 spin_lock(&unix_table_lock
);
2179 return *pos
? unix_seq_idx(seq
, *pos
- 1) : SEQ_START_TOKEN
;
2182 static void *unix_seq_next(struct seq_file
*seq
, void *v
, loff_t
*pos
)
2184 struct unix_iter_state
*iter
= seq
->private;
2185 struct sock
*sk
= v
;
2188 if (v
== SEQ_START_TOKEN
)
2189 sk
= first_unix_socket(&iter
->i
);
2191 sk
= next_unix_socket(&iter
->i
, sk
);
2192 while (sk
&& (sock_net(sk
) != seq_file_net(seq
)))
2193 sk
= next_unix_socket(&iter
->i
, sk
);
2197 static void unix_seq_stop(struct seq_file
*seq
, void *v
)
2198 __releases(unix_table_lock
)
2200 spin_unlock(&unix_table_lock
);
2203 static int unix_seq_show(struct seq_file
*seq
, void *v
)
2206 if (v
== SEQ_START_TOKEN
)
2207 seq_puts(seq
, "Num RefCount Protocol Flags Type St "
2211 struct unix_sock
*u
= unix_sk(s
);
2214 seq_printf(seq
, "%p: %08X %08X %08X %04X %02X %5lu",
2216 atomic_read(&s
->sk_refcnt
),
2218 s
->sk_state
== TCP_LISTEN
? __SO_ACCEPTCON
: 0,
2221 (s
->sk_state
== TCP_ESTABLISHED
? SS_CONNECTED
: SS_UNCONNECTED
) :
2222 (s
->sk_state
== TCP_ESTABLISHED
? SS_CONNECTING
: SS_DISCONNECTING
),
2230 len
= u
->addr
->len
- sizeof(short);
2231 if (!UNIX_ABSTRACT(s
))
2237 for ( ; i
< len
; i
++)
2238 seq_putc(seq
, u
->addr
->name
->sun_path
[i
]);
2240 unix_state_unlock(s
);
2241 seq_putc(seq
, '\n');
2247 static const struct seq_operations unix_seq_ops
= {
2248 .start
= unix_seq_start
,
2249 .next
= unix_seq_next
,
2250 .stop
= unix_seq_stop
,
2251 .show
= unix_seq_show
,
2254 static int unix_seq_open(struct inode
*inode
, struct file
*file
)
2256 return seq_open_net(inode
, file
, &unix_seq_ops
,
2257 sizeof(struct unix_iter_state
));
2260 static const struct file_operations unix_seq_fops
= {
2261 .owner
= THIS_MODULE
,
2262 .open
= unix_seq_open
,
2264 .llseek
= seq_lseek
,
2265 .release
= seq_release_net
,
2270 static const struct net_proto_family unix_family_ops
= {
2272 .create
= unix_create
,
2273 .owner
= THIS_MODULE
,
2277 static int __net_init
unix_net_init(struct net
*net
)
2279 int error
= -ENOMEM
;
2281 net
->unx
.sysctl_max_dgram_qlen
= 10;
2282 if (unix_sysctl_register(net
))
2285 #ifdef CONFIG_PROC_FS
2286 if (!proc_net_fops_create(net
, "unix", 0, &unix_seq_fops
)) {
2287 unix_sysctl_unregister(net
);
2296 static void __net_exit
unix_net_exit(struct net
*net
)
2298 unix_sysctl_unregister(net
);
2299 proc_net_remove(net
, "unix");
2302 static struct pernet_operations unix_net_ops
= {
2303 .init
= unix_net_init
,
2304 .exit
= unix_net_exit
,
2307 static int __init
af_unix_init(void)
2310 struct sk_buff
*dummy_skb
;
2312 BUILD_BUG_ON(sizeof(struct unix_skb_parms
) > sizeof(dummy_skb
->cb
));
2314 rc
= proto_register(&unix_proto
, 1);
2316 printk(KERN_CRIT
"%s: Cannot create unix_sock SLAB cache!\n",
2321 sock_register(&unix_family_ops
);
2322 register_pernet_subsys(&unix_net_ops
);
2327 static void __exit
af_unix_exit(void)
2329 sock_unregister(PF_UNIX
);
2330 proto_unregister(&unix_proto
);
2331 unregister_pernet_subsys(&unix_net_ops
);
2334 /* Earlier than device_initcall() so that other drivers invoking
2335 request_module() don't end up in a loop when modprobe tries
2336 to use a UNIX socket. But later than subsys_initcall() because
2337 we depend on stuff initialised there */
2338 fs_initcall(af_unix_init
);
2339 module_exit(af_unix_exit
);
2341 MODULE_LICENSE("GPL");
2342 MODULE_ALIAS_NETPROTO(PF_UNIX
);