Ok. I didn't make 2.4.0 in 2000. Tough. I tried, but we had some
[davej-history.git] / net / unix / af_unix.c
blobe48b8549a35bdba0bb511d04b79475702ddaa6c3
1 /*
2 * NET3: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan.cox@linux.org>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Version: $Id: af_unix.c,v 1.108 2000/11/10 04:02:04 davem Exp $
13 * Fixes:
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
23 * Mike Shaver's work.
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
30 * reference counting
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
33 * Lots of bug fixes.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
45 * dgram receiver.
46 * Artur Skawina : Hash function optimizations
47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
48 * Malcolm Beattie : Set peercred for socketpair
49 * Michal Ostrowski : Module initialization cleanup.
52 * Known differences from reference BSD that was tested:
54 * [TO FIX]
55 * ECONNREFUSED is not returned from one end of a connected() socket to the
56 * other the moment one end closes.
57 * fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark
58 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
59 * [NOT TO FIX]
60 * accept() returns a path name even if the connecting socket has closed
61 * in the meantime (BSD loses the path and gives up).
62 * accept() returns 0 length path for an unbound connector. BSD returns 16
63 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
64 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
65 * BSD af_unix apparently has connect forgetting to block properly.
66 * (need to check this with the POSIX spec in detail)
68 * Differences from 2.0.0-11-... (ANK)
69 * Bug fixes and improvements.
70 * - client shutdown killed server socket.
71 * - removed all useless cli/sti pairs.
73 * Semantic changes/extensions.
74 * - generic control message passing.
75 * - SCM_CREDENTIALS control message.
76 * - "Abstract" (not FS based) socket bindings.
77 * Abstract names are sequences of bytes (not zero terminated)
78 * started by 0, so that this name space does not intersect
79 * with BSD names.
82 #include <linux/module.h>
83 #include <linux/config.h>
84 #include <linux/kernel.h>
85 #include <linux/major.h>
86 #include <linux/signal.h>
87 #include <linux/sched.h>
88 #include <linux/errno.h>
89 #include <linux/string.h>
90 #include <linux/stat.h>
91 #include <linux/socket.h>
92 #include <linux/un.h>
93 #include <linux/fcntl.h>
94 #include <linux/termios.h>
95 #include <linux/sockios.h>
96 #include <linux/net.h>
97 #include <linux/in.h>
98 #include <linux/fs.h>
99 #include <linux/malloc.h>
100 #include <asm/uaccess.h>
101 #include <linux/skbuff.h>
102 #include <linux/netdevice.h>
103 #include <net/sock.h>
104 #include <net/tcp.h>
105 #include <net/af_unix.h>
106 #include <linux/proc_fs.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/smp_lock.h>
112 #include <asm/checksum.h>
114 #define min(a,b) (((a)<(b))?(a):(b))
116 int sysctl_unix_max_dgram_qlen = 10;
118 unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
119 rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;
120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
122 #define unix_sockets_unbound (unix_socket_table[UNIX_HASH_SIZE])
124 #define UNIX_ABSTRACT(sk) ((sk)->protinfo.af_unix.addr->hash!=UNIX_HASH_SIZE)
127 SMP locking strategy.
128 * hash table is protceted with rwlock unix_table_lock
129 * each socket state is protected by separate rwlock.
133 extern __inline__ unsigned unix_hash_fold(unsigned hash)
135 hash ^= hash>>16;
136 hash ^= hash>>8;
137 return hash&(UNIX_HASH_SIZE-1);
140 #define unix_peer(sk) ((sk)->pair)
142 extern __inline__ int unix_our_peer(unix_socket *sk, unix_socket *osk)
144 return unix_peer(osk) == sk;
147 extern __inline__ int unix_may_send(unix_socket *sk, unix_socket *osk)
149 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
152 static __inline__ unix_socket * unix_peer_get(unix_socket *s)
154 unix_socket *peer;
156 unix_state_rlock(s);
157 peer = unix_peer(s);
158 if (peer)
159 sock_hold(peer);
160 unix_state_runlock(s);
161 return peer;
164 extern __inline__ void unix_release_addr(struct unix_address *addr)
166 if (atomic_dec_and_test(&addr->refcnt))
167 kfree(addr);
171 * Check unix socket name:
172 * - should be not zero length.
173 * - if started by not zero, should be NULL terminated (FS object)
174 * - if started by zero, it is abstract name.
177 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
179 if (len <= sizeof(short) || len > sizeof(*sunaddr))
180 return -EINVAL;
181 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
182 return -EINVAL;
183 if (sunaddr->sun_path[0])
186 * This may look like an off by one error but it is
187 * a bit more subtle. 108 is the longest valid AF_UNIX
188 * path for a binding. sun_path[108] doesnt as such
189 * exist. However in kernel space we are guaranteed that
190 * it is a valid memory location in our kernel
191 * address buffer.
193 if (len > sizeof(*sunaddr))
194 len = sizeof(*sunaddr);
195 ((char *)sunaddr)[len]=0;
196 len = strlen(sunaddr->sun_path)+1+sizeof(short);
197 return len;
200 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
201 return len;
204 static void __unix_remove_socket(unix_socket *sk)
206 unix_socket **list = sk->protinfo.af_unix.list;
207 if (list) {
208 if (sk->next)
209 sk->next->prev = sk->prev;
210 if (sk->prev)
211 sk->prev->next = sk->next;
212 if (*list == sk)
213 *list = sk->next;
214 sk->protinfo.af_unix.list = NULL;
215 sk->prev = NULL;
216 sk->next = NULL;
217 __sock_put(sk);
221 static void __unix_insert_socket(unix_socket **list, unix_socket *sk)
223 BUG_TRAP(sk->protinfo.af_unix.list==NULL);
225 sk->protinfo.af_unix.list = list;
226 sk->prev = NULL;
227 sk->next = *list;
228 if (*list)
229 (*list)->prev = sk;
230 *list=sk;
231 sock_hold(sk);
234 static __inline__ void unix_remove_socket(unix_socket *sk)
236 write_lock(&unix_table_lock);
237 __unix_remove_socket(sk);
238 write_unlock(&unix_table_lock);
241 static __inline__ void unix_insert_socket(unix_socket **list, unix_socket *sk)
243 write_lock(&unix_table_lock);
244 __unix_insert_socket(list, sk);
245 write_unlock(&unix_table_lock);
248 static unix_socket *__unix_find_socket_byname(struct sockaddr_un *sunname,
249 int len, int type, unsigned hash)
251 unix_socket *s;
253 for (s=unix_socket_table[hash^type]; s; s=s->next) {
254 if(s->protinfo.af_unix.addr->len==len &&
255 memcmp(s->protinfo.af_unix.addr->name, sunname, len) == 0)
256 return s;
258 return NULL;
261 static __inline__ unix_socket *
262 unix_find_socket_byname(struct sockaddr_un *sunname,
263 int len, int type, unsigned hash)
265 unix_socket *s;
267 read_lock(&unix_table_lock);
268 s = __unix_find_socket_byname(sunname, len, type, hash);
269 if (s)
270 sock_hold(s);
271 read_unlock(&unix_table_lock);
272 return s;
275 static unix_socket *unix_find_socket_byinode(struct inode *i)
277 unix_socket *s;
279 read_lock(&unix_table_lock);
280 for (s=unix_socket_table[i->i_ino & (UNIX_HASH_SIZE-1)]; s; s=s->next)
282 struct dentry *dentry = s->protinfo.af_unix.dentry;
284 if(dentry && dentry->d_inode == i)
286 sock_hold(s);
287 break;
290 read_unlock(&unix_table_lock);
291 return s;
294 static __inline__ int unix_writable(struct sock *sk)
296 return ((atomic_read(&sk->wmem_alloc)<<2) <= sk->sndbuf);
299 static void unix_write_space(struct sock *sk)
301 read_lock(&sk->callback_lock);
302 if (unix_writable(sk)) {
303 if (sk->sleep && waitqueue_active(sk->sleep))
304 wake_up_interruptible(sk->sleep);
305 sk_wake_async(sk, 2, POLL_OUT);
307 read_unlock(&sk->callback_lock);
310 /* When dgram socket disconnects (or changes its peer), we clear its receive
311 * queue of packets arrived from previous peer. First, it allows to do
312 * flow control based only on wmem_alloc; second, sk connected to peer
313 * may receive messages only from that peer. */
314 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
316 if (skb_queue_len(&sk->receive_queue)) {
317 skb_queue_purge(&sk->receive_queue);
318 wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
320 /* If one link of bidirectional dgram pipe is disconnected,
321 * we signal error. Messages are lost. Do not make this,
322 * when peer was not connected to us.
324 if (!other->dead && unix_peer(other) == sk) {
325 other->err = ECONNRESET;
326 other->error_report(other);
331 static void unix_sock_destructor(struct sock *sk)
333 skb_queue_purge(&sk->receive_queue);
335 BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0);
336 BUG_TRAP(sk->protinfo.af_unix.list==NULL);
337 BUG_TRAP(sk->socket==NULL);
338 if (sk->dead==0) {
339 printk("Attempt to release alive unix socket: %p\n", sk);
340 return;
343 if (sk->protinfo.af_unix.addr)
344 unix_release_addr(sk->protinfo.af_unix.addr);
346 atomic_dec(&unix_nr_socks);
347 #ifdef UNIX_REFCNT_DEBUG
348 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
349 #endif
350 MOD_DEC_USE_COUNT;
353 static int unix_release_sock (unix_socket *sk, int embrion)
355 struct dentry *dentry;
356 struct vfsmount *mnt;
357 unix_socket *skpair;
358 struct sk_buff *skb;
359 int state;
361 unix_remove_socket(sk);
363 /* Clear state */
364 unix_state_wlock(sk);
365 sock_orphan(sk);
366 sk->shutdown = SHUTDOWN_MASK;
367 dentry = sk->protinfo.af_unix.dentry;
368 sk->protinfo.af_unix.dentry=NULL;
369 mnt = sk->protinfo.af_unix.mnt;
370 sk->protinfo.af_unix.mnt=NULL;
371 state = sk->state;
372 sk->state = TCP_CLOSE;
373 unix_state_wunlock(sk);
375 wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
377 skpair=unix_peer(sk);
379 if (skpair!=NULL) {
380 if (sk->type==SOCK_STREAM) {
381 unix_state_wlock(skpair);
382 skpair->shutdown=SHUTDOWN_MASK; /* No more writes*/
383 if (!skb_queue_empty(&sk->receive_queue) || embrion)
384 skpair->err = ECONNRESET;
385 unix_state_wunlock(skpair);
386 skpair->state_change(skpair);
387 read_lock(&skpair->callback_lock);
388 sk_wake_async(skpair,1,POLL_HUP);
389 read_unlock(&skpair->callback_lock);
391 sock_put(skpair); /* It may now die */
392 unix_peer(sk) = NULL;
395 /* Try to flush out this socket. Throw out buffers at least */
397 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
399 if (state==TCP_LISTEN)
400 unix_release_sock(skb->sk, 1);
401 /* passed fds are erased in the kfree_skb hook */
402 kfree_skb(skb);
405 if (dentry) {
406 dput(dentry);
407 mntput(mnt);
410 sock_put(sk);
412 /* ---- Socket is dead now and most probably destroyed ---- */
415 * Fixme: BSD difference: In BSD all sockets connected to use get
416 * ECONNRESET and we die on the spot. In Linux we behave
417 * like files and pipes do and wait for the last
418 * dereference.
420 * Can't we simply set sock->err?
422 * What the above comment does talk about? --ANK(980817)
425 if (atomic_read(&unix_tot_inflight))
426 unix_gc(); /* Garbage collect fds */
428 return 0;
431 static int unix_listen(struct socket *sock, int backlog)
433 int err;
434 struct sock *sk = sock->sk;
436 err = -EOPNOTSUPP;
437 if (sock->type!=SOCK_STREAM)
438 goto out; /* Only stream sockets accept */
439 err = -EINVAL;
440 if (!sk->protinfo.af_unix.addr)
441 goto out; /* No listens on an unbound socket */
442 unix_state_wlock(sk);
443 if (sk->state != TCP_CLOSE && sk->state != TCP_LISTEN)
444 goto out_unlock;
445 if (backlog > sk->max_ack_backlog)
446 wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
447 sk->max_ack_backlog=backlog;
448 sk->state=TCP_LISTEN;
449 /* set credentials so connect can copy them */
450 sk->peercred.pid = current->pid;
451 sk->peercred.uid = current->euid;
452 sk->peercred.gid = current->egid;
453 err = 0;
455 out_unlock:
456 unix_state_wunlock(sk);
457 out:
458 return err;
461 extern struct proto_ops unix_stream_ops;
462 extern struct proto_ops unix_dgram_ops;
464 static struct sock * unix_create1(struct socket *sock)
466 struct sock *sk;
468 if (atomic_read(&unix_nr_socks) >= 2*files_stat.max_files)
469 return NULL;
471 MOD_INC_USE_COUNT;
472 sk = sk_alloc(PF_UNIX, GFP_KERNEL, 1);
473 if (!sk) {
474 MOD_DEC_USE_COUNT;
475 return NULL;
478 atomic_inc(&unix_nr_socks);
480 sock_init_data(sock,sk);
482 sk->write_space = unix_write_space;
484 sk->max_ack_backlog = sysctl_unix_max_dgram_qlen;
485 sk->destruct = unix_sock_destructor;
486 sk->protinfo.af_unix.dentry=NULL;
487 sk->protinfo.af_unix.mnt=NULL;
488 sk->protinfo.af_unix.lock = RW_LOCK_UNLOCKED;
489 atomic_set(&sk->protinfo.af_unix.inflight, 0);
490 init_MUTEX(&sk->protinfo.af_unix.readsem);/* single task reading lock */
491 init_waitqueue_head(&sk->protinfo.af_unix.peer_wait);
492 sk->protinfo.af_unix.list=NULL;
493 unix_insert_socket(&unix_sockets_unbound, sk);
495 return sk;
498 static int unix_create(struct socket *sock, int protocol)
500 if (protocol && protocol != PF_UNIX)
501 return -EPROTONOSUPPORT;
503 sock->state = SS_UNCONNECTED;
505 switch (sock->type) {
506 case SOCK_STREAM:
507 sock->ops = &unix_stream_ops;
508 break;
510 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
511 * nothing uses it.
513 case SOCK_RAW:
514 sock->type=SOCK_DGRAM;
515 case SOCK_DGRAM:
516 sock->ops = &unix_dgram_ops;
517 break;
518 default:
519 return -ESOCKTNOSUPPORT;
522 return unix_create1(sock) ? 0 : -ENOMEM;
525 static int unix_release(struct socket *sock)
527 unix_socket *sk = sock->sk;
529 if (!sk)
530 return 0;
532 sock->sk = NULL;
534 return unix_release_sock (sk, 0);
537 static int unix_autobind(struct socket *sock)
539 struct sock *sk = sock->sk;
540 static u32 ordernum = 1;
541 struct unix_address * addr;
542 int err;
544 down(&sk->protinfo.af_unix.readsem);
546 err = 0;
547 if (sk->protinfo.af_unix.addr)
548 goto out;
550 err = -ENOMEM;
551 addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
552 if (!addr)
553 goto out;
555 memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
556 addr->name->sun_family = AF_UNIX;
557 atomic_set(&addr->refcnt, 1);
559 retry:
560 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
561 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
563 write_lock(&unix_table_lock);
564 ordernum = (ordernum+1)&0xFFFFF;
566 if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
567 addr->hash)) {
568 write_unlock(&unix_table_lock);
569 /* Sanity yield. It is unusual case, but yet... */
570 if (!(ordernum&0xFF)) {
571 current->policy |= SCHED_YIELD;
572 schedule();
574 goto retry;
576 addr->hash ^= sk->type;
578 __unix_remove_socket(sk);
579 sk->protinfo.af_unix.addr = addr;
580 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
581 write_unlock(&unix_table_lock);
582 err = 0;
584 out:
585 up(&sk->protinfo.af_unix.readsem);
586 return err;
589 static unix_socket *unix_find_other(struct sockaddr_un *sunname, int len,
590 int type, unsigned hash, int *error)
592 unix_socket *u;
593 struct nameidata nd;
594 int err = 0;
596 if (sunname->sun_path[0]) {
597 if (path_init(sunname->sun_path,
598 LOOKUP_POSITIVE|LOOKUP_FOLLOW, &nd))
599 err = path_walk(sunname->sun_path, &nd);
600 if (err)
601 goto fail;
602 err = permission(nd.dentry->d_inode,MAY_WRITE);
603 if (err)
604 goto put_fail;
606 err = -ECONNREFUSED;
607 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
608 goto put_fail;
609 u=unix_find_socket_byinode(nd.dentry->d_inode);
610 if (!u)
611 goto put_fail;
613 path_release(&nd);
615 err=-EPROTOTYPE;
616 if (u->type != type) {
617 sock_put(u);
618 goto fail;
620 } else {
621 err = -ECONNREFUSED;
622 u=unix_find_socket_byname(sunname, len, type, hash);
623 if (!u)
624 goto fail;
626 return u;
628 put_fail:
629 path_release(&nd);
630 fail:
631 *error=err;
632 return NULL;
636 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
638 struct sock *sk = sock->sk;
639 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
640 struct dentry * dentry = NULL;
641 struct nameidata nd;
642 int err;
643 unsigned hash;
644 struct unix_address *addr;
645 unix_socket **list;
647 err = -EINVAL;
648 if (sunaddr->sun_family != AF_UNIX)
649 goto out;
651 if (addr_len==sizeof(short)) {
652 err = unix_autobind(sock);
653 goto out;
656 err = unix_mkname(sunaddr, addr_len, &hash);
657 if (err < 0)
658 goto out;
659 addr_len = err;
661 down(&sk->protinfo.af_unix.readsem);
663 err = -EINVAL;
664 if (sk->protinfo.af_unix.addr)
665 goto out_up;
667 err = -ENOMEM;
668 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
669 if (!addr)
670 goto out_up;
672 memcpy(addr->name, sunaddr, addr_len);
673 addr->len = addr_len;
674 addr->hash = hash^sk->type;
675 atomic_set(&addr->refcnt, 1);
677 if (sunaddr->sun_path[0]) {
678 err = 0;
680 * Get the parent directory, calculate the hash for last
681 * component.
683 if (path_init(sunaddr->sun_path, LOOKUP_PARENT, &nd))
684 err = path_walk(sunaddr->sun_path, &nd);
685 if (err)
686 goto out_mknod_parent;
688 * Yucky last component or no last component at all?
689 * (foo/., foo/.., /////)
691 err = -EEXIST;
692 if (nd.last_type != LAST_NORM)
693 goto out_mknod;
695 * Lock the directory.
697 down(&nd.dentry->d_inode->i_sem);
699 * Do the final lookup.
701 dentry = lookup_hash(&nd.last, nd.dentry);
702 err = PTR_ERR(dentry);
703 if (IS_ERR(dentry))
704 goto out_mknod_unlock;
705 err = -ENOENT;
707 * Special case - lookup gave negative, but... we had foo/bar/
708 * From the vfs_mknod() POV we just have a negative dentry -
709 * all is fine. Let's be bastards - you had / on the end, you've
710 * been asking for (non-existent) directory. -ENOENT for you.
712 if (nd.last.name[nd.last.len] && !dentry->d_inode)
713 goto out_mknod_dput;
715 * All right, let's create it.
717 err = vfs_mknod(nd.dentry->d_inode, dentry,
718 S_IFSOCK|sock->inode->i_mode, 0);
719 if (err)
720 goto out_mknod_dput;
721 up(&nd.dentry->d_inode->i_sem);
722 dput(nd.dentry);
723 nd.dentry = dentry;
725 addr->hash = UNIX_HASH_SIZE;
728 write_lock(&unix_table_lock);
730 if (!sunaddr->sun_path[0]) {
731 err = -EADDRINUSE;
732 if (__unix_find_socket_byname(sunaddr, addr_len,
733 sk->type, hash)) {
734 unix_release_addr(addr);
735 goto out_unlock;
738 list = &unix_socket_table[addr->hash];
739 } else {
740 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
741 sk->protinfo.af_unix.dentry = nd.dentry;
742 sk->protinfo.af_unix.mnt = nd.mnt;
745 err = 0;
746 __unix_remove_socket(sk);
747 sk->protinfo.af_unix.addr = addr;
748 __unix_insert_socket(list, sk);
750 out_unlock:
751 write_unlock(&unix_table_lock);
752 out_up:
753 up(&sk->protinfo.af_unix.readsem);
754 out:
755 return err;
757 out_mknod_dput:
758 dput(dentry);
759 out_mknod_unlock:
760 up(&nd.dentry->d_inode->i_sem);
761 out_mknod:
762 path_release(&nd);
763 out_mknod_parent:
764 if (err==-EEXIST)
765 err=-EADDRINUSE;
766 unix_release_addr(addr);
767 goto out_up;
770 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
771 int alen, int flags)
773 struct sock *sk = sock->sk;
774 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
775 struct sock *other;
776 unsigned hash;
777 int err;
779 if (addr->sa_family != AF_UNSPEC) {
780 err = unix_mkname(sunaddr, alen, &hash);
781 if (err < 0)
782 goto out;
783 alen = err;
785 if (sock->passcred && !sk->protinfo.af_unix.addr &&
786 (err = unix_autobind(sock)) != 0)
787 goto out;
789 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
790 if (!other)
791 goto out;
793 unix_state_wlock(sk);
795 err = -EPERM;
796 if (!unix_may_send(sk, other))
797 goto out_unlock;
798 } else {
800 * 1003.1g breaking connected state with AF_UNSPEC
802 other = NULL;
803 unix_state_wlock(sk);
807 * If it was connected, reconnect.
809 if (unix_peer(sk)) {
810 struct sock *old_peer = unix_peer(sk);
811 unix_peer(sk)=other;
812 unix_state_wunlock(sk);
814 if (other != old_peer)
815 unix_dgram_disconnected(sk, old_peer);
816 sock_put(old_peer);
817 } else {
818 unix_peer(sk)=other;
819 unix_state_wunlock(sk);
821 return 0;
823 out_unlock:
824 unix_state_wunlock(sk);
825 sock_put(other);
826 out:
827 return err;
830 static long unix_wait_for_peer(unix_socket *other, long timeo)
832 int sched;
833 DECLARE_WAITQUEUE(wait, current);
835 __set_current_state(TASK_INTERRUPTIBLE);
836 add_wait_queue_exclusive(&other->protinfo.af_unix.peer_wait, &wait);
838 sched = (!other->dead &&
839 !(other->shutdown&RCV_SHUTDOWN) &&
840 skb_queue_len(&other->receive_queue) > other->max_ack_backlog);
842 unix_state_runlock(other);
844 if (sched)
845 timeo = schedule_timeout(timeo);
847 __set_current_state(TASK_RUNNING);
848 remove_wait_queue(&other->protinfo.af_unix.peer_wait, &wait);
849 return timeo;
852 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
853 int addr_len, int flags)
855 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
856 struct sock *sk = sock->sk;
857 struct sock *newsk = NULL;
858 unix_socket *other = NULL;
859 struct sk_buff *skb = NULL;
860 unsigned hash;
861 int st;
862 int err;
863 long timeo;
865 err = unix_mkname(sunaddr, addr_len, &hash);
866 if (err < 0)
867 goto out;
868 addr_len = err;
870 if (sock->passcred && !sk->protinfo.af_unix.addr &&
871 (err = unix_autobind(sock)) != 0)
872 goto out;
874 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
876 /* First of all allocate resources.
877 If we will make it after state is locked,
878 we will have to recheck all again in any case.
881 err = -ENOMEM;
883 /* create new sock for complete connection */
884 newsk = unix_create1(NULL);
885 if (newsk == NULL)
886 goto out;
888 /* Allocate skb for sending to listening sock */
889 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
890 if (skb == NULL)
891 goto out;
893 restart:
894 /* Find listening sock. */
895 other=unix_find_other(sunaddr, addr_len, sk->type, hash, &err);
896 if (!other)
897 goto out;
899 /* Latch state of peer */
900 unix_state_rlock(other);
902 /* Apparently VFS overslept socket death. Retry. */
903 if (other->dead) {
904 unix_state_runlock(other);
905 sock_put(other);
906 goto restart;
909 err = -ECONNREFUSED;
910 if (other->state != TCP_LISTEN)
911 goto out_unlock;
913 if (skb_queue_len(&other->receive_queue) > other->max_ack_backlog) {
914 err = -EAGAIN;
915 if (!timeo)
916 goto out_unlock;
918 timeo = unix_wait_for_peer(other, timeo);
920 err = sock_intr_errno(timeo);
921 if (signal_pending(current))
922 goto out;
923 sock_put(other);
924 goto restart;
927 /* Latch our state.
929 It is tricky place. We need to grab write lock and cannot
930 drop lock on peer. It is dangerous because deadlock is
931 possible. Connect to self case and simultaneous
932 attempt to connect are eliminated by checking socket
933 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
934 check this before attempt to grab lock.
936 Well, and we have to recheck the state after socket locked.
938 st = sk->state;
940 switch (st) {
941 case TCP_CLOSE:
942 /* This is ok... continue with connect */
943 break;
944 case TCP_ESTABLISHED:
945 /* Socket is already connected */
946 err = -EISCONN;
947 goto out_unlock;
948 default:
949 err = -EINVAL;
950 goto out_unlock;
953 unix_state_wlock(sk);
955 if (sk->state != st) {
956 unix_state_wunlock(sk);
957 unix_state_runlock(other);
958 sock_put(other);
959 goto restart;
962 /* The way is open! Fastly set all the necessary fields... */
964 sock_hold(sk);
965 unix_peer(newsk)=sk;
966 newsk->state=TCP_ESTABLISHED;
967 newsk->type=SOCK_STREAM;
968 newsk->peercred.pid = current->pid;
969 newsk->peercred.uid = current->euid;
970 newsk->peercred.gid = current->egid;
971 newsk->sleep = &newsk->protinfo.af_unix.peer_wait;
973 /* copy address information from listening to new sock*/
974 if (other->protinfo.af_unix.addr)
976 atomic_inc(&other->protinfo.af_unix.addr->refcnt);
977 newsk->protinfo.af_unix.addr=other->protinfo.af_unix.addr;
979 if (other->protinfo.af_unix.dentry) {
980 newsk->protinfo.af_unix.dentry=dget(other->protinfo.af_unix.dentry);
981 newsk->protinfo.af_unix.mnt=mntget(other->protinfo.af_unix.mnt);
984 /* Set credentials */
985 sk->peercred = other->peercred;
987 sock_hold(newsk);
988 unix_peer(sk)=newsk;
989 sock->state=SS_CONNECTED;
990 sk->state=TCP_ESTABLISHED;
992 unix_state_wunlock(sk);
994 /* take ten and and send info to listening sock */
995 skb_queue_tail(&other->receive_queue,skb);
996 unix_state_runlock(other);
997 other->data_ready(other, 0);
998 sock_put(other);
999 return 0;
1001 out_unlock:
1002 if (other)
1003 unix_state_runlock(other);
1005 out:
1006 if (skb)
1007 kfree_skb(skb);
1008 if (newsk)
1009 unix_release_sock(newsk, 0);
1010 if (other)
1011 sock_put(other);
1012 return err;
1015 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1017 struct sock *ska=socka->sk, *skb = sockb->sk;
1019 /* Join our sockets back to back */
1020 sock_hold(ska);
1021 sock_hold(skb);
1022 unix_peer(ska)=skb;
1023 unix_peer(skb)=ska;
1024 ska->peercred.pid = skb->peercred.pid = current->pid;
1025 ska->peercred.uid = skb->peercred.uid = current->euid;
1026 ska->peercred.gid = skb->peercred.gid = current->egid;
1028 if (ska->type != SOCK_DGRAM)
1030 ska->state=TCP_ESTABLISHED;
1031 skb->state=TCP_ESTABLISHED;
1032 socka->state=SS_CONNECTED;
1033 sockb->state=SS_CONNECTED;
1035 return 0;
1038 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1040 unix_socket *sk = sock->sk;
1041 unix_socket *tsk;
1042 struct sk_buff *skb;
1043 int err;
1045 err = -EOPNOTSUPP;
1046 if (sock->type!=SOCK_STREAM)
1047 goto out;
1049 err = -EINVAL;
1050 if (sk->state!=TCP_LISTEN)
1051 goto out;
1053 /* If socket state is TCP_LISTEN it cannot change (for now...),
1054 * so that no locks are necessary.
1057 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1058 if (!skb)
1059 goto out;
1061 tsk = skb->sk;
1062 skb_free_datagram(sk, skb);
1063 wake_up_interruptible(&sk->protinfo.af_unix.peer_wait);
1065 /* attach accepted sock to socket */
1066 unix_state_wlock(tsk);
1067 newsock->state = SS_CONNECTED;
1068 sock_graft(tsk, newsock);
1069 unix_state_wunlock(tsk);
1070 return 0;
1072 out:
1073 return err;
1077 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1079 struct sock *sk = sock->sk;
1080 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1081 int err = 0;
1083 if (peer) {
1084 sk = unix_peer_get(sk);
1086 err = -ENOTCONN;
1087 if (!sk)
1088 goto out;
1089 err = 0;
1090 } else {
1091 sock_hold(sk);
1094 unix_state_rlock(sk);
1095 if (!sk->protinfo.af_unix.addr) {
1096 sunaddr->sun_family = AF_UNIX;
1097 sunaddr->sun_path[0] = 0;
1098 *uaddr_len = sizeof(short);
1099 } else {
1100 struct unix_address *addr = sk->protinfo.af_unix.addr;
1102 *uaddr_len = addr->len;
1103 memcpy(sunaddr, addr->name, *uaddr_len);
1105 unix_state_runlock(sk);
1106 sock_put(sk);
1107 out:
1108 return err;
1111 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1113 int i;
1115 scm->fp = UNIXCB(skb).fp;
1116 skb->destructor = sock_wfree;
1117 UNIXCB(skb).fp = NULL;
1119 for (i=scm->fp->count-1; i>=0; i--)
1120 unix_notinflight(scm->fp->fp[i]);
1123 static void unix_destruct_fds(struct sk_buff *skb)
1125 struct scm_cookie scm;
1126 memset(&scm, 0, sizeof(scm));
1127 unix_detach_fds(&scm, skb);
1129 /* Alas, it calls VFS */
1130 /* So fscking what? fput() had been SMP-safe since the last Summer */
1131 scm_destroy(&scm);
1132 sock_wfree(skb);
1135 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1137 int i;
1138 for (i=scm->fp->count-1; i>=0; i--)
1139 unix_inflight(scm->fp->fp[i]);
1140 UNIXCB(skb).fp = scm->fp;
1141 skb->destructor = unix_destruct_fds;
1142 scm->fp = NULL;
1146 * Send AF_UNIX data.
1149 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, int len,
1150 struct scm_cookie *scm)
1152 struct sock *sk = sock->sk;
1153 struct sockaddr_un *sunaddr=msg->msg_name;
1154 unix_socket *other = NULL;
1155 int namelen = 0; /* fake GCC */
1156 int err;
1157 unsigned hash;
1158 struct sk_buff *skb;
1159 long timeo;
1161 err = -EOPNOTSUPP;
1162 if (msg->msg_flags&MSG_OOB)
1163 goto out;
1165 if (msg->msg_namelen) {
1166 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1167 if (err < 0)
1168 goto out;
1169 namelen = err;
1170 } else {
1171 sunaddr = NULL;
1172 err = -ENOTCONN;
1173 other = unix_peer_get(sk);
1174 if (!other)
1175 goto out;
1178 if (sock->passcred && !sk->protinfo.af_unix.addr &&
1179 (err = unix_autobind(sock)) != 0)
1180 goto out;
1182 err = -EMSGSIZE;
1183 if ((unsigned)len > sk->sndbuf - 32)
1184 goto out;
1186 skb = sock_alloc_send_skb(sk, len, 0, msg->msg_flags&MSG_DONTWAIT, &err);
1187 if (skb==NULL)
1188 goto out;
1190 memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
1191 if (scm->fp)
1192 unix_attach_fds(scm, skb);
1194 skb->h.raw = skb->data;
1195 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1196 if (err)
1197 goto out_free;
1199 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1201 restart:
1202 if (!other) {
1203 err = -ECONNRESET;
1204 if (sunaddr == NULL)
1205 goto out_free;
1207 other = unix_find_other(sunaddr, namelen, sk->type, hash, &err);
1208 if (other==NULL)
1209 goto out_free;
1212 unix_state_rlock(other);
1213 err = -EPERM;
1214 if (!unix_may_send(sk, other))
1215 goto out_unlock;
1217 if (other->dead) {
1219 * Check with 1003.1g - what should
1220 * datagram error
1222 unix_state_runlock(other);
1223 sock_put(other);
1225 err = 0;
1226 unix_state_wlock(sk);
1227 if (unix_peer(sk) == other) {
1228 unix_peer(sk)=NULL;
1229 unix_state_wunlock(sk);
1231 unix_dgram_disconnected(sk, other);
1232 sock_put(other);
1233 err = -ECONNREFUSED;
1234 } else {
1235 unix_state_wunlock(sk);
1238 other = NULL;
1239 if (err)
1240 goto out_free;
1241 goto restart;
1244 err = -EPIPE;
1245 if (other->shutdown&RCV_SHUTDOWN)
1246 goto out_unlock;
1248 if (unix_peer(other) != sk &&
1249 skb_queue_len(&other->receive_queue) > other->max_ack_backlog) {
1250 if (!timeo) {
1251 err = -EAGAIN;
1252 goto out_unlock;
1255 timeo = unix_wait_for_peer(other, timeo);
1257 err = sock_intr_errno(timeo);
1258 if (signal_pending(current))
1259 goto out_free;
1261 goto restart;
1264 skb_queue_tail(&other->receive_queue, skb);
1265 unix_state_runlock(other);
1266 other->data_ready(other, len);
1267 sock_put(other);
1268 return len;
1270 out_unlock:
1271 unix_state_runlock(other);
1272 out_free:
1273 kfree_skb(skb);
1274 out:
1275 if (other)
1276 sock_put(other);
1277 return err;
1281 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
1282 struct scm_cookie *scm)
1284 struct sock *sk = sock->sk;
1285 unix_socket *other = NULL;
1286 struct sockaddr_un *sunaddr=msg->msg_name;
1287 int err,size;
1288 struct sk_buff *skb;
1289 int limit=0;
1290 int sent=0;
1292 err = -EOPNOTSUPP;
1293 if (msg->msg_flags&MSG_OOB)
1294 goto out_err;
1296 if (msg->msg_namelen) {
1297 err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP);
1298 goto out_err;
1299 } else {
1300 sunaddr = NULL;
1301 err = -ENOTCONN;
1302 other = unix_peer_get(sk);
1303 if (!other)
1304 goto out_err;
1307 if (sk->shutdown&SEND_SHUTDOWN)
1308 goto pipe_err;
1310 while(sent < len)
1313 * Optimisation for the fact that under 0.01% of X messages typically
1314 * need breaking up.
1317 size=len-sent;
1319 /* Keep two messages in the pipe so it schedules better */
1320 if (size > sk->sndbuf/2 - 16)
1321 size = sk->sndbuf/2 - 16;
1324 * Keep to page sized kmalloc()'s as various people
1325 * have suggested. Big mallocs stress the vm too
1326 * much.
1329 if (size > PAGE_SIZE-16)
1330 limit = PAGE_SIZE-16; /* Fall back to a page if we can't grab a big buffer this instant */
1331 else
1332 limit = 0; /* Otherwise just grab and wait */
1335 * Grab a buffer
1338 skb=sock_alloc_send_skb(sk,size,limit,msg->msg_flags&MSG_DONTWAIT, &err);
1340 if (skb==NULL)
1341 goto out_err;
1344 * If you pass two values to the sock_alloc_send_skb
1345 * it tries to grab the large buffer with GFP_BUFFER
1346 * (which can fail easily), and if it fails grab the
1347 * fallback size buffer which is under a page and will
1348 * succeed. [Alan]
1350 size = min(size, skb_tailroom(skb));
1352 memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
1353 if (scm->fp)
1354 unix_attach_fds(scm, skb);
1356 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1357 kfree_skb(skb);
1358 goto out_err;
1361 unix_state_rlock(other);
1363 if (other->dead || (other->shutdown & RCV_SHUTDOWN))
1364 goto pipe_err_free;
1366 skb_queue_tail(&other->receive_queue, skb);
1367 unix_state_runlock(other);
1368 other->data_ready(other, size);
1369 sent+=size;
1371 sock_put(other);
1372 return sent;
1374 pipe_err_free:
1375 unix_state_runlock(other);
1376 kfree_skb(skb);
1377 pipe_err:
1378 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1379 send_sig(SIGPIPE,current,0);
1380 err = -EPIPE;
1381 out_err:
1382 if (other)
1383 sock_put(other);
1384 return sent ? : err;
1387 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1389 msg->msg_namelen = sizeof(short);
1390 if (sk->protinfo.af_unix.addr) {
1391 msg->msg_namelen=sk->protinfo.af_unix.addr->len;
1392 memcpy(msg->msg_name,
1393 sk->protinfo.af_unix.addr->name,
1394 sk->protinfo.af_unix.addr->len);
1398 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int size,
1399 int flags, struct scm_cookie *scm)
1401 struct sock *sk = sock->sk;
1402 int noblock = flags & MSG_DONTWAIT;
1403 struct sk_buff *skb;
1404 int err;
1406 err = -EOPNOTSUPP;
1407 if (flags&MSG_OOB)
1408 goto out;
1410 msg->msg_namelen = 0;
1412 skb = skb_recv_datagram(sk, flags, noblock, &err);
1413 if (!skb)
1414 goto out;
1416 wake_up_interruptible(&sk->protinfo.af_unix.peer_wait);
1418 if (msg->msg_name)
1419 unix_copy_addr(msg, skb->sk);
1421 if (size > skb->len)
1422 size = skb->len;
1423 else if (size < skb->len)
1424 msg->msg_flags |= MSG_TRUNC;
1426 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1427 if (err)
1428 goto out_free;
1430 scm->creds = *UNIXCREDS(skb);
1432 if (!(flags & MSG_PEEK))
1434 if (UNIXCB(skb).fp)
1435 unix_detach_fds(scm, skb);
1437 else
1439 /* It is questionable: on PEEK we could:
1440 - do not return fds - good, but too simple 8)
1441 - return fds, and do not return them on read (old strategy,
1442 apparently wrong)
1443 - clone fds (I choosed it for now, it is the most universal
1444 solution)
1446 POSIX 1003.1g does not actually define this clearly
1447 at all. POSIX 1003.1g doesn't define a lot of things
1448 clearly however!
1451 if (UNIXCB(skb).fp)
1452 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1454 err = size;
1456 out_free:
1457 skb_free_datagram(sk,skb);
1458 out:
1459 return err;
1463 * Sleep until data has arrive. But check for races..
1466 static long unix_stream_data_wait(unix_socket * sk, long timeo)
1468 DECLARE_WAITQUEUE(wait, current);
1470 unix_state_rlock(sk);
1472 add_wait_queue(sk->sleep, &wait);
1474 for (;;) {
1475 set_current_state(TASK_INTERRUPTIBLE);
1477 if (skb_queue_len(&sk->receive_queue) ||
1478 sk->err ||
1479 (sk->shutdown & RCV_SHUTDOWN) ||
1480 signal_pending(current) ||
1481 !timeo)
1482 break;
1484 set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
1485 unix_state_runlock(sk);
1486 timeo = schedule_timeout(timeo);
1487 unix_state_rlock(sk);
1488 clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
1491 __set_current_state(TASK_RUNNING);
1492 remove_wait_queue(sk->sleep, &wait);
1493 unix_state_runlock(sk);
1494 return timeo;
1499 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size,
1500 int flags, struct scm_cookie *scm)
1502 struct sock *sk = sock->sk;
1503 struct sockaddr_un *sunaddr=msg->msg_name;
1504 int copied = 0;
1505 int check_creds = 0;
1506 int target;
1507 int err = 0;
1508 long timeo;
1510 err = -EINVAL;
1511 if (sk->state != TCP_ESTABLISHED)
1512 goto out;
1514 err = -EOPNOTSUPP;
1515 if (flags&MSG_OOB)
1516 goto out;
1518 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1519 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1521 msg->msg_namelen = 0;
1523 /* Lock the socket to prevent queue disordering
1524 * while sleeps in memcpy_tomsg
1527 down(&sk->protinfo.af_unix.readsem);
1531 int chunk;
1532 struct sk_buff *skb;
1534 skb=skb_dequeue(&sk->receive_queue);
1535 if (skb==NULL)
1537 if (copied >= target)
1538 break;
1541 * POSIX 1003.1g mandates this order.
1544 if ((err = sock_error(sk)) != 0)
1545 break;
1546 if (sk->shutdown & RCV_SHUTDOWN)
1547 break;
1548 err = -EAGAIN;
1549 if (!timeo)
1550 break;
1551 up(&sk->protinfo.af_unix.readsem);
1553 timeo = unix_stream_data_wait(sk, timeo);
1555 if (signal_pending(current)) {
1556 err = sock_intr_errno(timeo);
1557 goto out;
1559 down(&sk->protinfo.af_unix.readsem);
1560 continue;
1563 if (check_creds) {
1564 /* Never glue messages from different writers */
1565 if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) {
1566 skb_queue_head(&sk->receive_queue, skb);
1567 break;
1569 } else {
1570 /* Copy credentials */
1571 scm->creds = *UNIXCREDS(skb);
1572 check_creds = 1;
1575 /* Copy address just once */
1576 if (sunaddr)
1578 unix_copy_addr(msg, skb->sk);
1579 sunaddr = NULL;
1582 chunk = min(skb->len, size);
1583 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1584 skb_queue_head(&sk->receive_queue, skb);
1585 if (copied == 0)
1586 copied = -EFAULT;
1587 break;
1589 copied += chunk;
1590 size -= chunk;
1592 /* Mark read part of skb as used */
1593 if (!(flags & MSG_PEEK))
1595 skb_pull(skb, chunk);
1597 if (UNIXCB(skb).fp)
1598 unix_detach_fds(scm, skb);
1600 /* put the skb back if we didn't use it up.. */
1601 if (skb->len)
1603 skb_queue_head(&sk->receive_queue, skb);
1604 break;
1607 kfree_skb(skb);
1609 if (scm->fp)
1610 break;
1612 else
1614 /* It is questionable, see note in unix_dgram_recvmsg.
1616 if (UNIXCB(skb).fp)
1617 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1619 /* put message back and return */
1620 skb_queue_head(&sk->receive_queue, skb);
1621 break;
1623 } while (size);
1625 up(&sk->protinfo.af_unix.readsem);
1626 out:
1627 return copied ? : err;
1630 static int unix_shutdown(struct socket *sock, int mode)
1632 struct sock *sk = sock->sk;
1633 unix_socket *other;
1635 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1637 if (mode) {
1638 unix_state_wlock(sk);
1639 sk->shutdown |= mode;
1640 other=unix_peer(sk);
1641 if (other)
1642 sock_hold(other);
1643 unix_state_wunlock(sk);
1644 sk->state_change(sk);
1646 if (other && sk->type == SOCK_STREAM) {
1647 int peer_mode = 0;
1649 if (mode&RCV_SHUTDOWN)
1650 peer_mode |= SEND_SHUTDOWN;
1651 if (mode&SEND_SHUTDOWN)
1652 peer_mode |= RCV_SHUTDOWN;
1653 unix_state_wlock(other);
1654 other->shutdown |= peer_mode;
1655 unix_state_wunlock(other);
1656 other->state_change(other);
1657 read_lock(&other->callback_lock);
1658 if (peer_mode == SHUTDOWN_MASK)
1659 sk_wake_async(other,1,POLL_HUP);
1660 else if (peer_mode & RCV_SHUTDOWN)
1661 sk_wake_async(other,1,POLL_IN);
1662 read_unlock(&other->callback_lock);
1664 if (other)
1665 sock_put(other);
1667 return 0;
1670 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1672 struct sock *sk = sock->sk;
1673 long amount=0;
1674 int err;
1676 switch(cmd)
1678 case SIOCOUTQ:
1679 amount = atomic_read(&sk->wmem_alloc);
1680 err = put_user(amount, (int *)arg);
1681 break;
1682 case SIOCINQ:
1684 struct sk_buff *skb;
1685 if (sk->state==TCP_LISTEN) {
1686 err = -EINVAL;
1687 break;
1690 spin_lock(&sk->receive_queue.lock);
1691 if((skb=skb_peek(&sk->receive_queue))!=NULL)
1692 amount=skb->len;
1693 spin_unlock(&sk->receive_queue.lock);
1694 err = put_user(amount, (int *)arg);
1695 break;
1698 default:
1699 err = dev_ioctl(cmd, (void *)arg);
1700 break;
1702 return err;
1705 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1707 struct sock *sk = sock->sk;
1708 unsigned int mask;
1710 poll_wait(file, sk->sleep, wait);
1711 mask = 0;
1713 /* exceptional events? */
1714 if (sk->err)
1715 mask |= POLLERR;
1716 if (sk->shutdown == SHUTDOWN_MASK)
1717 mask |= POLLHUP;
1719 /* readable? */
1720 if (!skb_queue_empty(&sk->receive_queue) || (sk->shutdown&RCV_SHUTDOWN))
1721 mask |= POLLIN | POLLRDNORM;
1723 /* Connection-based need to check for termination and startup */
1724 if (sk->type == SOCK_STREAM && sk->state==TCP_CLOSE)
1725 mask |= POLLHUP;
1728 * we set writable also when the other side has shut down the
1729 * connection. This prevents stuck sockets.
1731 if (unix_writable(sk))
1732 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1734 return mask;
1738 #ifdef CONFIG_PROC_FS
1739 static int unix_read_proc(char *buffer, char **start, off_t offset,
1740 int length, int *eof, void *data)
1742 off_t pos=0;
1743 off_t begin=0;
1744 int len=0;
1745 int i;
1746 unix_socket *s;
1748 len+= sprintf(buffer,"Num RefCount Protocol Flags Type St "
1749 "Inode Path\n");
1751 read_lock(&unix_table_lock);
1752 forall_unix_sockets (i,s)
1754 unix_state_rlock(s);
1756 len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5ld",
1758 atomic_read(&s->refcnt),
1760 s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1761 s->type,
1762 s->socket ?
1763 (s->state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1764 (s->state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1765 sock_i_ino(s));
1767 if (s->protinfo.af_unix.addr)
1769 buffer[len++] = ' ';
1770 memcpy(buffer+len, s->protinfo.af_unix.addr->name->sun_path,
1771 s->protinfo.af_unix.addr->len-sizeof(short));
1772 if (!UNIX_ABSTRACT(s))
1773 len--;
1774 else
1775 buffer[len] = '@';
1776 len += s->protinfo.af_unix.addr->len - sizeof(short);
1778 unix_state_runlock(s);
1780 buffer[len++]='\n';
1782 pos = begin + len;
1783 if(pos<offset)
1785 len=0;
1786 begin=pos;
1788 if(pos>offset+length)
1789 goto done;
1791 *eof = 1;
1792 done:
1793 read_unlock(&unix_table_lock);
1794 *start=buffer+(offset-begin);
1795 len-=(offset-begin);
1796 if(len>length)
1797 len=length;
1798 if (len < 0)
1799 len = 0;
1800 return len;
1802 #endif
1804 struct proto_ops unix_stream_ops = {
1805 family: PF_UNIX,
1807 release: unix_release,
1808 bind: unix_bind,
1809 connect: unix_stream_connect,
1810 socketpair: unix_socketpair,
1811 accept: unix_accept,
1812 getname: unix_getname,
1813 poll: unix_poll,
1814 ioctl: unix_ioctl,
1815 listen: unix_listen,
1816 shutdown: unix_shutdown,
1817 setsockopt: sock_no_setsockopt,
1818 getsockopt: sock_no_getsockopt,
1819 sendmsg: unix_stream_sendmsg,
1820 recvmsg: unix_stream_recvmsg,
1821 mmap: sock_no_mmap,
1824 struct proto_ops unix_dgram_ops = {
1825 family: PF_UNIX,
1827 release: unix_release,
1828 bind: unix_bind,
1829 connect: unix_dgram_connect,
1830 socketpair: unix_socketpair,
1831 accept: sock_no_accept,
1832 getname: unix_getname,
1833 poll: datagram_poll,
1834 ioctl: unix_ioctl,
1835 listen: sock_no_listen,
1836 shutdown: unix_shutdown,
1837 setsockopt: sock_no_setsockopt,
1838 getsockopt: sock_no_getsockopt,
1839 sendmsg: unix_dgram_sendmsg,
1840 recvmsg: unix_dgram_recvmsg,
1841 mmap: sock_no_mmap,
1844 struct net_proto_family unix_family_ops = {
1845 PF_UNIX,
1846 unix_create
1849 #ifdef CONFIG_SYSCTL
1850 extern void unix_sysctl_register(void);
1851 extern void unix_sysctl_unregister(void);
1852 #endif
1854 static int __init af_unix_init(void)
1856 struct sk_buff *dummy_skb;
1858 printk(KERN_INFO "NET4: Unix domain sockets 1.0/SMP for Linux NET4.0.\n");
1859 if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb))
1861 printk(KERN_CRIT "unix_proto_init: panic\n");
1862 return -1;
1864 sock_register(&unix_family_ops);
1865 #ifdef CONFIG_PROC_FS
1866 create_proc_read_entry("net/unix", 0, 0, unix_read_proc, NULL);
1867 #endif
1869 #ifdef CONFIG_SYSCTL
1870 unix_sysctl_register();
1871 #endif
1873 return 0;
1876 static void __exit af_unix_exit(void)
1878 sock_unregister(PF_UNIX);
1879 #ifdef CONFIG_SYSCTL
1880 unix_sysctl_unregister();
1881 #endif
1882 #ifdef CONFIG_PROC_FS
1883 remove_proc_entry("net/unix", 0);
1884 #endif
1887 module_init(af_unix_init);
1888 module_exit(af_unix_exit);
1891 * Local variables:
1892 * compile-command: "gcc -g -D__KERNEL__ -Wall -O6 -I/usr/src/linux/include -c af_unix.c"
1893 * End: