Import 2.4.0-test2pre7
[davej-history.git] / net / unix / af_unix.c
blob2f37380941a1304f8cf299fca0db72b6df7c3a7e
1 /*
2 * NET3: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan.cox@linux.org>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
11 * Version: $Id: af_unix.c,v 1.98 2000/06/19 06:24:59 davem Exp $
13 * Fixes:
14 * Linus Torvalds : Assorted bug cures.
15 * Niibe Yutaka : async I/O support.
16 * Carsten Paeth : PF_UNIX check, address fixes.
17 * Alan Cox : Limit size of allocated blocks.
18 * Alan Cox : Fixed the stupid socketpair bug.
19 * Alan Cox : BSD compatibility fine tuning.
20 * Alan Cox : Fixed a bug in connect when interrupted.
21 * Alan Cox : Sorted out a proper draft version of
22 * file descriptor passing hacked up from
23 * Mike Shaver's work.
24 * Marty Leisner : Fixes to fd passing
25 * Nick Nevin : recvmsg bugfix.
26 * Alan Cox : Started proper garbage collector
27 * Heiko EiBfeldt : Missing verify_area check
28 * Alan Cox : Started POSIXisms
29 * Andreas Schwab : Replace inode by dentry for proper
30 * reference counting
31 * Kirk Petersen : Made this a module
32 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
33 * Lots of bug fixes.
34 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
35 * by above two patches.
36 * Andrea Arcangeli : If possible we block in connect(2)
37 * if the max backlog of the listen socket
38 * is been reached. This won't break
39 * old apps and it will avoid huge amount
40 * of socks hashed (this for unix_gc()
41 * performances reasons).
42 * Security fix that limits the max
43 * number of socks to 2*max_files and
44 * the number of skb queueable in the
45 * dgram receiver.
46 * Artur Skawina : Hash function optimizations
47 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
48 * Malcolm Beattie : Set peercred for socketpair
51 * Known differences from reference BSD that was tested:
53 * [TO FIX]
54 * ECONNREFUSED is not returned from one end of a connected() socket to the
55 * other the moment one end closes.
56 * fstat() doesn't return st_dev=NODEV, and give the blksize as high water mark
57 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
58 * [NOT TO FIX]
59 * accept() returns a path name even if the connecting socket has closed
60 * in the meantime (BSD loses the path and gives up).
61 * accept() returns 0 length path for an unbound connector. BSD returns 16
62 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
63 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
64 * BSD af_unix apparently has connect forgetting to block properly.
65 * (need to check this with the POSIX spec in detail)
67 * Differences from 2.0.0-11-... (ANK)
68 * Bug fixes and improvements.
69 * - client shutdown killed server socket.
70 * - removed all useless cli/sti pairs.
72 * Semantic changes/extensions.
73 * - generic control message passing.
74 * - SCM_CREDENTIALS control message.
75 * - "Abstract" (not FS based) socket bindings.
76 * Abstract names are sequences of bytes (not zero terminated)
77 * started by 0, so that this name space does not intersect
78 * with BSD names.
81 #include <linux/module.h>
82 #include <linux/config.h>
83 #include <linux/kernel.h>
84 #include <linux/major.h>
85 #include <linux/signal.h>
86 #include <linux/sched.h>
87 #include <linux/errno.h>
88 #include <linux/string.h>
89 #include <linux/stat.h>
90 #include <linux/socket.h>
91 #include <linux/un.h>
92 #include <linux/fcntl.h>
93 #include <linux/termios.h>
94 #include <linux/sockios.h>
95 #include <linux/net.h>
96 #include <linux/in.h>
97 #include <linux/fs.h>
98 #include <linux/malloc.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <net/sock.h>
103 #include <net/tcp.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <net/scm.h>
107 #include <linux/init.h>
108 #include <linux/poll.h>
109 #include <linux/smp_lock.h>
111 #include <asm/checksum.h>
113 #define min(a,b) (((a)<(b))?(a):(b))
115 int sysctl_unix_max_dgram_qlen = 10;
117 unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
118 rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;
119 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
121 #define unix_sockets_unbound (unix_socket_table[UNIX_HASH_SIZE])
123 #define UNIX_ABSTRACT(sk) ((sk)->protinfo.af_unix.addr->hash!=UNIX_HASH_SIZE)
126 SMP locking strategy.
127 * hash table is protceted with rwlock unix_table_lock
128 * each socket state is protected by separate rwlock.
132 extern __inline__ unsigned unix_hash_fold(unsigned hash)
134 hash ^= hash>>16;
135 hash ^= hash>>8;
136 return hash&(UNIX_HASH_SIZE-1);
139 #define unix_peer(sk) ((sk)->pair)
141 extern __inline__ int unix_our_peer(unix_socket *sk, unix_socket *osk)
143 return unix_peer(osk) == sk;
146 extern __inline__ int unix_may_send(unix_socket *sk, unix_socket *osk)
148 return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
151 static __inline__ unix_socket * unix_peer_get(unix_socket *s)
153 unix_socket *peer;
155 unix_state_rlock(s);
156 peer = unix_peer(s);
157 if (peer)
158 sock_hold(peer);
159 unix_state_runlock(s);
160 return peer;
163 extern __inline__ void unix_release_addr(struct unix_address *addr)
165 if (atomic_dec_and_test(&addr->refcnt))
166 kfree(addr);
170 * Check unix socket name:
171 * - should be not zero length.
172 * - if started by not zero, should be NULL terminated (FS object)
173 * - if started by zero, it is abstract name.
176 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
178 if (len <= sizeof(short) || len > sizeof(*sunaddr))
179 return -EINVAL;
180 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
181 return -EINVAL;
182 if (sunaddr->sun_path[0])
185 * This may look like an off by one error but it is
186 * a bit more subtle. 108 is the longest valid AF_UNIX
187 * path for a binding. sun_path[108] doesnt as such
188 * exist. However in kernel space we are guaranteed that
189 * it is a valid memory location in our kernel
190 * address buffer.
192 if (len > sizeof(*sunaddr))
193 len = sizeof(*sunaddr);
194 ((char *)sunaddr)[len]=0;
195 len = strlen(sunaddr->sun_path)+1+sizeof(short);
196 return len;
199 *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
200 return len;
203 static void __unix_remove_socket(unix_socket *sk)
205 unix_socket **list = sk->protinfo.af_unix.list;
206 if (list) {
207 if (sk->next)
208 sk->next->prev = sk->prev;
209 if (sk->prev)
210 sk->prev->next = sk->next;
211 if (*list == sk)
212 *list = sk->next;
213 sk->protinfo.af_unix.list = NULL;
214 sk->prev = NULL;
215 sk->next = NULL;
216 __sock_put(sk);
220 static void __unix_insert_socket(unix_socket **list, unix_socket *sk)
222 BUG_TRAP(sk->protinfo.af_unix.list==NULL);
224 sk->protinfo.af_unix.list = list;
225 sk->prev = NULL;
226 sk->next = *list;
227 if (*list)
228 (*list)->prev = sk;
229 *list=sk;
230 sock_hold(sk);
233 static __inline__ void unix_remove_socket(unix_socket *sk)
235 write_lock(&unix_table_lock);
236 __unix_remove_socket(sk);
237 write_unlock(&unix_table_lock);
240 static __inline__ void unix_insert_socket(unix_socket **list, unix_socket *sk)
242 write_lock(&unix_table_lock);
243 __unix_insert_socket(list, sk);
244 write_unlock(&unix_table_lock);
247 static unix_socket *__unix_find_socket_byname(struct sockaddr_un *sunname,
248 int len, int type, unsigned hash)
250 unix_socket *s;
252 for (s=unix_socket_table[hash^type]; s; s=s->next) {
253 if(s->protinfo.af_unix.addr->len==len &&
254 memcmp(s->protinfo.af_unix.addr->name, sunname, len) == 0)
255 return s;
257 return NULL;
260 static __inline__ unix_socket *
261 unix_find_socket_byname(struct sockaddr_un *sunname,
262 int len, int type, unsigned hash)
264 unix_socket *s;
266 read_lock(&unix_table_lock);
267 s = __unix_find_socket_byname(sunname, len, type, hash);
268 if (s)
269 sock_hold(s);
270 read_unlock(&unix_table_lock);
271 return s;
274 static unix_socket *unix_find_socket_byinode(struct inode *i)
276 unix_socket *s;
278 read_lock(&unix_table_lock);
279 for (s=unix_socket_table[i->i_ino & (UNIX_HASH_SIZE-1)]; s; s=s->next)
281 struct dentry *dentry = s->protinfo.af_unix.dentry;
283 if(dentry && dentry->d_inode == i)
285 sock_hold(s);
286 break;
289 read_unlock(&unix_table_lock);
290 return s;
293 static __inline__ int unix_writable(struct sock *sk)
295 return ((atomic_read(&sk->wmem_alloc)<<2) <= sk->sndbuf);
298 static void unix_write_space(struct sock *sk)
300 read_lock(&sk->callback_lock);
301 if (unix_writable(sk)) {
302 if (sk->sleep && waitqueue_active(sk->sleep))
303 wake_up_interruptible(sk->sleep);
304 sk_wake_async(sk, 2, POLL_OUT);
306 read_unlock(&sk->callback_lock);
309 static void unix_sock_destructor(struct sock *sk)
311 skb_queue_purge(&sk->receive_queue);
313 BUG_TRAP(atomic_read(&sk->wmem_alloc) == 0);
314 BUG_TRAP(sk->protinfo.af_unix.list==NULL);
315 BUG_TRAP(sk->socket==NULL);
316 if (sk->dead==0) {
317 printk("Attempt to release alive unix socket: %p\n", sk);
318 return;
321 if (sk->protinfo.af_unix.addr)
322 unix_release_addr(sk->protinfo.af_unix.addr);
324 atomic_dec(&unix_nr_socks);
325 #ifdef UNIX_REFCNT_DEBUG
326 printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
327 #endif
328 MOD_DEC_USE_COUNT;
331 static int unix_release_sock (unix_socket *sk, int embrion)
333 struct dentry *dentry;
334 struct vfsmount *mnt;
335 unix_socket *skpair;
336 struct sk_buff *skb;
337 int state;
339 unix_remove_socket(sk);
341 /* Clear state */
342 unix_state_wlock(sk);
343 sock_orphan(sk);
344 sk->shutdown = SHUTDOWN_MASK;
345 dentry = sk->protinfo.af_unix.dentry;
346 sk->protinfo.af_unix.dentry=NULL;
347 mnt = sk->protinfo.af_unix.mnt;
348 sk->protinfo.af_unix.mnt=NULL;
349 state = sk->state;
350 sk->state = TCP_CLOSE;
351 unix_state_wunlock(sk);
353 wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
355 skpair=unix_peer(sk);
357 if (skpair!=NULL) {
358 if (sk->type==SOCK_STREAM) {
359 unix_state_wlock(skpair);
360 skpair->shutdown=SHUTDOWN_MASK; /* No more writes*/
361 if (!skb_queue_empty(&sk->receive_queue) || embrion)
362 skpair->err = ECONNRESET;
363 unix_state_wunlock(skpair);
364 skpair->state_change(skpair);
365 read_lock(&skpair->callback_lock);
366 sk_wake_async(skpair,1,POLL_HUP);
367 read_unlock(&skpair->callback_lock);
369 sock_put(skpair); /* It may now die */
370 unix_peer(sk) = NULL;
373 /* Try to flush out this socket. Throw out buffers at least */
375 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
377 if (state==TCP_LISTEN)
378 unix_release_sock(skb->sk, 1);
379 /* passed fds are erased in the kfree_skb hook */
380 kfree_skb(skb);
383 if (dentry) {
384 lock_kernel();
385 dput(dentry);
386 mntput(mnt);
387 unlock_kernel();
390 sock_put(sk);
392 /* ---- Socket is dead now and most probably destroyed ---- */
395 * Fixme: BSD difference: In BSD all sockets connected to use get
396 * ECONNRESET and we die on the spot. In Linux we behave
397 * like files and pipes do and wait for the last
398 * dereference.
400 * Can't we simply set sock->err?
402 * What the above comment does talk about? --ANK(980817)
405 if (atomic_read(&unix_tot_inflight))
406 unix_gc(); /* Garbage collect fds */
408 return 0;
411 static int unix_listen(struct socket *sock, int backlog)
413 int err;
414 struct sock *sk = sock->sk;
416 err = -EOPNOTSUPP;
417 if (sock->type!=SOCK_STREAM)
418 goto out; /* Only stream sockets accept */
419 err = -EINVAL;
420 if (!sk->protinfo.af_unix.addr)
421 goto out; /* No listens on an unbound socket */
422 unix_state_wlock(sk);
423 if (sk->state != TCP_CLOSE && sk->state != TCP_LISTEN)
424 goto out_unlock;
425 if (backlog > sk->max_ack_backlog)
426 wake_up_interruptible_all(&sk->protinfo.af_unix.peer_wait);
427 sk->max_ack_backlog=backlog;
428 sk->state=TCP_LISTEN;
429 /* set credentials so connect can copy them */
430 sk->peercred.pid = current->pid;
431 sk->peercred.uid = current->euid;
432 sk->peercred.gid = current->egid;
433 err = 0;
435 out_unlock:
436 unix_state_wunlock(sk);
437 out:
438 return err;
441 extern struct proto_ops unix_stream_ops;
442 extern struct proto_ops unix_dgram_ops;
444 static struct sock * unix_create1(struct socket *sock)
446 struct sock *sk;
448 if (atomic_read(&unix_nr_socks) >= 2*max_files)
449 return NULL;
451 MOD_INC_USE_COUNT;
452 sk = sk_alloc(PF_UNIX, GFP_KERNEL, 1);
453 if (!sk) {
454 MOD_DEC_USE_COUNT;
455 return NULL;
458 atomic_inc(&unix_nr_socks);
460 sock_init_data(sock,sk);
462 sk->write_space = unix_write_space;
464 sk->max_ack_backlog = sysctl_unix_max_dgram_qlen;
465 sk->destruct = unix_sock_destructor;
466 sk->protinfo.af_unix.dentry=NULL;
467 sk->protinfo.af_unix.mnt=NULL;
468 sk->protinfo.af_unix.lock = RW_LOCK_UNLOCKED;
469 atomic_set(&sk->protinfo.af_unix.inflight, 0);
470 init_MUTEX(&sk->protinfo.af_unix.readsem);/* single task reading lock */
471 init_waitqueue_head(&sk->protinfo.af_unix.peer_wait);
472 sk->protinfo.af_unix.list=NULL;
473 unix_insert_socket(&unix_sockets_unbound, sk);
475 return sk;
478 static int unix_create(struct socket *sock, int protocol)
480 if (protocol && protocol != PF_UNIX)
481 return -EPROTONOSUPPORT;
483 sock->state = SS_UNCONNECTED;
485 switch (sock->type) {
486 case SOCK_STREAM:
487 sock->ops = &unix_stream_ops;
488 break;
490 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
491 * nothing uses it.
493 case SOCK_RAW:
494 sock->type=SOCK_DGRAM;
495 case SOCK_DGRAM:
496 sock->ops = &unix_dgram_ops;
497 break;
498 default:
499 return -ESOCKTNOSUPPORT;
502 return unix_create1(sock) ? 0 : -ENOMEM;
505 static int unix_release(struct socket *sock)
507 unix_socket *sk = sock->sk;
509 if (!sk)
510 return 0;
512 sock->sk = NULL;
514 return unix_release_sock (sk, 0);
517 static int unix_autobind(struct socket *sock)
519 struct sock *sk = sock->sk;
520 static u32 ordernum = 1;
521 struct unix_address * addr;
522 int err;
524 down(&sk->protinfo.af_unix.readsem);
526 err = 0;
527 if (sk->protinfo.af_unix.addr)
528 goto out;
530 err = -ENOMEM;
531 addr = kmalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
532 if (!addr)
533 goto out;
535 memset(addr, 0, sizeof(*addr) + sizeof(short) + 16);
536 addr->name->sun_family = AF_UNIX;
537 atomic_set(&addr->refcnt, 1);
539 retry:
540 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
541 addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
543 write_lock(&unix_table_lock);
544 ordernum = (ordernum+1)&0xFFFFF;
546 if (__unix_find_socket_byname(addr->name, addr->len, sock->type,
547 addr->hash)) {
548 write_unlock(&unix_table_lock);
549 /* Sanity yield. It is unusual case, but yet... */
550 if (!(ordernum&0xFF)) {
551 current->policy |= SCHED_YIELD;
552 schedule();
554 goto retry;
556 addr->hash ^= sk->type;
558 __unix_remove_socket(sk);
559 sk->protinfo.af_unix.addr = addr;
560 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
561 write_unlock(&unix_table_lock);
562 err = 0;
564 out:
565 up(&sk->protinfo.af_unix.readsem);
566 return err;
569 static unix_socket *unix_find_other(struct sockaddr_un *sunname, int len,
570 int type, unsigned hash, int *error)
572 unix_socket *u;
573 struct nameidata nd;
574 int err = 0;
576 if (sunname->sun_path[0]) {
577 /* Do not believe to VFS, grab kernel lock */
578 lock_kernel();
579 if (path_init(sunname->sun_path, LOOKUP_POSITIVE, &nd))
580 err = path_walk(sunname->sun_path, &nd);
581 if (err) {
582 unlock_kernel();
583 goto fail;
585 err = permission(nd.dentry->d_inode,MAY_WRITE);
586 if (err)
587 goto put_fail;
589 err = -ECONNREFUSED;
590 if (!S_ISSOCK(nd.dentry->d_inode->i_mode))
591 goto put_fail;
592 u=unix_find_socket_byinode(nd.dentry->d_inode);
593 if (!u)
594 goto put_fail;
596 path_release(&nd);
597 unlock_kernel();
599 err=-EPROTOTYPE;
600 if (u->type != type) {
601 sock_put(u);
602 goto fail;
604 } else {
605 err = -ECONNREFUSED;
606 u=unix_find_socket_byname(sunname, len, type, hash);
607 if (!u)
608 goto fail;
610 return u;
612 put_fail:
613 path_release(&nd);
614 unlock_kernel();
615 fail:
616 *error=err;
617 return NULL;
621 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
623 struct sock *sk = sock->sk;
624 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
625 struct dentry * dentry = NULL;
626 struct nameidata nd;
627 int err;
628 unsigned hash;
629 struct unix_address *addr;
630 unix_socket **list;
632 err = -EINVAL;
633 if (sunaddr->sun_family != AF_UNIX)
634 goto out;
636 if (addr_len==sizeof(short)) {
637 err = unix_autobind(sock);
638 goto out;
641 err = unix_mkname(sunaddr, addr_len, &hash);
642 if (err < 0)
643 goto out;
644 addr_len = err;
646 down(&sk->protinfo.af_unix.readsem);
648 err = -EINVAL;
649 if (sk->protinfo.af_unix.addr)
650 goto out_up;
652 err = -ENOMEM;
653 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
654 if (!addr)
655 goto out_up;
657 memcpy(addr->name, sunaddr, addr_len);
658 addr->len = addr_len;
659 addr->hash = hash^sk->type;
660 atomic_set(&addr->refcnt, 1);
662 if (sunaddr->sun_path[0]) {
663 lock_kernel();
664 err = 0;
666 * Get the parent directory, calculate the hash for last
667 * component.
669 if (path_init(sunaddr->sun_path, LOOKUP_PARENT, &nd))
670 err = path_walk(sunaddr->sun_path, &nd);
671 if (err)
672 goto out_mknod_parent;
674 * Yucky last component or no last component at all?
675 * (foo/., foo/.., /////)
677 err = -EEXIST;
678 if (nd.last_type != LAST_NORM)
679 goto out_mknod;
681 * Lock the directory.
683 down(&nd.dentry->d_inode->i_sem);
685 * Do the final lookup.
687 dentry = lookup_hash(&nd.last, nd.dentry);
688 err = PTR_ERR(dentry);
689 if (IS_ERR(dentry))
690 goto out_mknod_unlock;
691 err = -ENOENT;
693 * Special case - lookup gave negative, but... we had foo/bar/
694 * From the vfs_mknod() POV we just have a negative dentry -
695 * all is fine. Let's be bastards - you had / on the end, you've
696 * been asking for (non-existent) directory. -ENOENT for you.
698 if (nd.last.name[nd.last.len] && !dentry->d_inode)
699 goto out_mknod_dput;
701 * All right, let's create it.
703 err = vfs_mknod(nd.dentry->d_inode, dentry,
704 S_IFSOCK|sock->inode->i_mode, 0);
705 if (err)
706 goto out_mknod_dput;
707 up(&nd.dentry->d_inode->i_sem);
708 dput(nd.dentry);
709 nd.dentry = dentry;
710 unlock_kernel();
712 addr->hash = UNIX_HASH_SIZE;
715 write_lock(&unix_table_lock);
717 if (!sunaddr->sun_path[0]) {
718 err = -EADDRINUSE;
719 if (__unix_find_socket_byname(sunaddr, addr_len,
720 sk->type, hash)) {
721 unix_release_addr(addr);
722 goto out_unlock;
725 list = &unix_socket_table[addr->hash];
726 } else {
727 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
728 sk->protinfo.af_unix.dentry = nd.dentry;
729 sk->protinfo.af_unix.mnt = nd.mnt;
732 err = 0;
733 __unix_remove_socket(sk);
734 sk->protinfo.af_unix.addr = addr;
735 __unix_insert_socket(list, sk);
737 out_unlock:
738 write_unlock(&unix_table_lock);
739 out_up:
740 up(&sk->protinfo.af_unix.readsem);
741 out:
742 return err;
744 out_mknod_dput:
745 dput(dentry);
746 out_mknod_unlock:
747 up(&nd.dentry->d_inode->i_sem);
748 out_mknod:
749 path_release(&nd);
750 out_mknod_parent:
751 unlock_kernel();
752 if (err==-EEXIST)
753 err=-EADDRINUSE;
754 unix_release_addr(addr);
755 goto out_up;
758 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
759 int alen, int flags)
761 struct sock *sk = sock->sk;
762 struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
763 struct sock *other;
764 unsigned hash;
765 int err;
767 if (addr->sa_family != AF_UNSPEC) {
768 err = unix_mkname(sunaddr, alen, &hash);
769 if (err < 0)
770 goto out;
771 alen = err;
773 if (sock->passcred && !sk->protinfo.af_unix.addr &&
774 (err = unix_autobind(sock)) != 0)
775 goto out;
777 other=unix_find_other(sunaddr, alen, sock->type, hash, &err);
778 if (!other)
779 goto out;
781 unix_state_wlock(sk);
783 err = -EPERM;
784 if (!unix_may_send(sk, other))
785 goto out_unlock;
786 } else {
788 * 1003.1g breaking connected state with AF_UNSPEC
790 other = NULL;
791 unix_state_wlock(sk);
795 * If it was connected, reconnect.
797 if (unix_peer(sk)) {
798 struct sock *old_peer = unix_peer(sk);
799 unix_peer(sk)=other;
800 unix_state_wunlock(sk);
802 sock_put(old_peer);
803 } else {
804 unix_peer(sk)=other;
805 unix_state_wunlock(sk);
807 return 0;
809 out_unlock:
810 unix_state_wunlock(sk);
811 sock_put(other);
812 out:
813 return err;
816 static long unix_wait_for_peer(unix_socket *other, long timeo)
818 int sched;
819 DECLARE_WAITQUEUE(wait, current);
821 __set_current_state(TASK_INTERRUPTIBLE|TASK_EXCLUSIVE);
822 add_wait_queue_exclusive(&other->protinfo.af_unix.peer_wait, &wait);
824 sched = (!other->dead &&
825 !(other->shutdown&RCV_SHUTDOWN) &&
826 skb_queue_len(&other->receive_queue) > other->max_ack_backlog);
828 unix_state_runlock(other);
830 if (sched)
831 timeo = schedule_timeout(timeo);
833 __set_current_state(TASK_RUNNING);
834 remove_wait_queue(&other->protinfo.af_unix.peer_wait, &wait);
835 return timeo;
838 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
839 int addr_len, int flags)
841 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
842 struct sock *sk = sock->sk;
843 struct sock *newsk = NULL;
844 unix_socket *other = NULL;
845 struct sk_buff *skb = NULL;
846 unsigned hash;
847 int st;
848 int err;
849 long timeo;
851 err = unix_mkname(sunaddr, addr_len, &hash);
852 if (err < 0)
853 goto out;
854 addr_len = err;
856 if (sock->passcred && !sk->protinfo.af_unix.addr &&
857 (err = unix_autobind(sock)) != 0)
858 goto out;
860 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
862 /* First of all allocate resources.
863 If we will make it after state is locked,
864 we will have to recheck all again in any case.
867 err = -ENOMEM;
869 /* create new sock for complete connection */
870 newsk = unix_create1(NULL);
871 if (newsk == NULL)
872 goto out;
874 /* Allocate skb for sending to listening sock */
875 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
876 if (skb == NULL)
877 goto out;
879 restart:
880 /* Find listening sock. */
881 other=unix_find_other(sunaddr, addr_len, sk->type, hash, &err);
882 if (!other)
883 goto out;
885 /* Latch state of peer */
886 unix_state_rlock(other);
888 /* Apparently VFS overslept socket death. Retry. */
889 if (other->dead) {
890 unix_state_runlock(other);
891 sock_put(other);
892 goto restart;
895 err = -ECONNREFUSED;
896 if (other->state != TCP_LISTEN)
897 goto out_unlock;
899 if (skb_queue_len(&other->receive_queue) > other->max_ack_backlog) {
900 err = -EAGAIN;
901 if (!timeo)
902 goto out_unlock;
904 timeo = unix_wait_for_peer(other, timeo);
906 err = sock_intr_errno(timeo);
907 if (signal_pending(current))
908 goto out;
909 sock_put(other);
910 goto restart;
913 /* Latch our state.
915 It is tricky place. We need to grab write lock and cannot
916 drop lock on peer. It is dangerous because deadlock is
917 possible. Connect to self case and simultaneous
918 attempt to connect are eliminated by checking socket
919 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
920 check this before attempt to grab lock.
922 Well, and we have to recheck the state after socket locked.
924 st = sk->state;
926 switch (st) {
927 case TCP_CLOSE:
928 /* This is ok... continue with connect */
929 break;
930 case TCP_ESTABLISHED:
931 /* Socket is already connected */
932 err = -EISCONN;
933 goto out_unlock;
934 default:
935 err = -EINVAL;
936 goto out_unlock;
939 unix_state_wlock(sk);
941 if (sk->state != st) {
942 unix_state_wunlock(sk);
943 unix_state_runlock(other);
944 sock_put(other);
945 goto restart;
948 /* The way is open! Fastly set all the necessary fields... */
950 sock_hold(sk);
951 unix_peer(newsk)=sk;
952 newsk->state=TCP_ESTABLISHED;
953 newsk->type=SOCK_STREAM;
954 newsk->peercred.pid = current->pid;
955 newsk->peercred.uid = current->euid;
956 newsk->peercred.gid = current->egid;
957 newsk->sleep = &newsk->protinfo.af_unix.peer_wait;
959 /* copy address information from listening to new sock*/
960 if (other->protinfo.af_unix.addr)
962 atomic_inc(&other->protinfo.af_unix.addr->refcnt);
963 newsk->protinfo.af_unix.addr=other->protinfo.af_unix.addr;
965 if (other->protinfo.af_unix.dentry) {
966 /* Damn, even dget is not SMP safe. It becomes ridiculous... */
967 lock_kernel();
968 newsk->protinfo.af_unix.dentry=dget(other->protinfo.af_unix.dentry);
969 newsk->protinfo.af_unix.mnt=mntget(other->protinfo.af_unix.mnt);
970 unlock_kernel();
973 /* Set credentials */
974 sk->peercred = other->peercred;
976 sock_hold(newsk);
977 unix_peer(sk)=newsk;
978 sock->state=SS_CONNECTED;
979 sk->state=TCP_ESTABLISHED;
981 unix_state_wunlock(sk);
983 /* take ten and and send info to listening sock */
984 skb_queue_tail(&other->receive_queue,skb);
985 unix_state_runlock(other);
986 other->data_ready(other, 0);
987 sock_put(other);
988 return 0;
990 out_unlock:
991 if (other)
992 unix_state_runlock(other);
994 out:
995 if (skb)
996 kfree_skb(skb);
997 if (newsk)
998 unix_release_sock(newsk, 0);
999 if (other)
1000 sock_put(other);
1001 return err;
1004 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1006 struct sock *ska=socka->sk, *skb = sockb->sk;
1008 /* Join our sockets back to back */
1009 sock_hold(ska);
1010 sock_hold(skb);
1011 unix_peer(ska)=skb;
1012 unix_peer(skb)=ska;
1013 ska->peercred.pid = skb->peercred.pid = current->pid;
1014 ska->peercred.uid = skb->peercred.uid = current->euid;
1015 ska->peercred.gid = skb->peercred.gid = current->egid;
1017 if (ska->type != SOCK_DGRAM)
1019 ska->state=TCP_ESTABLISHED;
1020 skb->state=TCP_ESTABLISHED;
1021 socka->state=SS_CONNECTED;
1022 sockb->state=SS_CONNECTED;
1024 return 0;
1027 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1029 unix_socket *sk = sock->sk;
1030 unix_socket *tsk;
1031 struct sk_buff *skb;
1032 int err;
1034 err = -EOPNOTSUPP;
1035 if (sock->type!=SOCK_STREAM)
1036 goto out;
1038 err = -EINVAL;
1039 if (sk->state!=TCP_LISTEN)
1040 goto out;
1042 /* If socket state is TCP_LISTEN it cannot change (for now...),
1043 * so that no locks are necessary.
1046 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1047 if (!skb)
1048 goto out;
1050 tsk = skb->sk;
1051 skb_free_datagram(sk, skb);
1052 wake_up_interruptible(&sk->protinfo.af_unix.peer_wait);
1054 /* attach accepted sock to socket */
1055 unix_state_wlock(tsk);
1056 newsock->state = SS_CONNECTED;
1057 sock_graft(tsk, newsock);
1058 unix_state_wunlock(tsk);
1059 return 0;
1061 out:
1062 return err;
1066 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1068 struct sock *sk = sock->sk;
1069 struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1070 int err = 0;
1072 if (peer) {
1073 sk = unix_peer_get(sk);
1075 err = -ENOTCONN;
1076 if (!sk)
1077 goto out;
1078 err = 0;
1079 } else {
1080 sock_hold(sk);
1083 unix_state_rlock(sk);
1084 if (!sk->protinfo.af_unix.addr) {
1085 sunaddr->sun_family = AF_UNIX;
1086 sunaddr->sun_path[0] = 0;
1087 *uaddr_len = sizeof(short);
1088 } else {
1089 struct unix_address *addr = sk->protinfo.af_unix.addr;
1091 *uaddr_len = addr->len;
1092 memcpy(sunaddr, addr->name, *uaddr_len);
1094 unix_state_runlock(sk);
1095 sock_put(sk);
1096 out:
1097 return err;
1100 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1102 int i;
1104 scm->fp = UNIXCB(skb).fp;
1105 skb->destructor = sock_wfree;
1106 UNIXCB(skb).fp = NULL;
1108 for (i=scm->fp->count-1; i>=0; i--)
1109 unix_notinflight(scm->fp->fp[i]);
1112 static void unix_destruct_fds(struct sk_buff *skb)
1114 struct scm_cookie scm;
1115 memset(&scm, 0, sizeof(scm));
1116 unix_detach_fds(&scm, skb);
1118 /* Alas, it calls VFS */
1119 /* So fscking what? fput() had been SMP-safe since the last Summer */
1120 scm_destroy(&scm);
1121 sock_wfree(skb);
1124 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1126 int i;
1127 for (i=scm->fp->count-1; i>=0; i--)
1128 unix_inflight(scm->fp->fp[i]);
1129 UNIXCB(skb).fp = scm->fp;
1130 skb->destructor = unix_destruct_fds;
1131 scm->fp = NULL;
1135 * Send AF_UNIX data.
1138 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, int len,
1139 struct scm_cookie *scm)
1141 struct sock *sk = sock->sk;
1142 struct sockaddr_un *sunaddr=msg->msg_name;
1143 unix_socket *other = NULL;
1144 int namelen = 0; /* fake GCC */
1145 int err;
1146 unsigned hash;
1147 struct sk_buff *skb;
1148 long timeo;
1150 err = -EOPNOTSUPP;
1151 if (msg->msg_flags&MSG_OOB)
1152 goto out;
1154 if (msg->msg_namelen) {
1155 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1156 if (err < 0)
1157 goto out;
1158 namelen = err;
1159 } else {
1160 sunaddr = NULL;
1161 err = -ENOTCONN;
1162 other = unix_peer_get(sk);
1163 if (!other)
1164 goto out;
1167 if (sock->passcred && !sk->protinfo.af_unix.addr &&
1168 (err = unix_autobind(sock)) != 0)
1169 goto out;
1171 err = -EMSGSIZE;
1172 if ((unsigned)len > sk->sndbuf - 32)
1173 goto out;
1175 skb = sock_alloc_send_skb(sk, len, 0, msg->msg_flags&MSG_DONTWAIT, &err);
1176 if (skb==NULL)
1177 goto out;
1179 memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
1180 if (scm->fp)
1181 unix_attach_fds(scm, skb);
1183 skb->h.raw = skb->data;
1184 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1185 if (err)
1186 goto out_free;
1188 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1190 restart:
1191 if (!other) {
1192 err = -ECONNRESET;
1193 if (sunaddr == NULL)
1194 goto out_free;
1196 other = unix_find_other(sunaddr, namelen, sk->type, hash, &err);
1197 if (other==NULL)
1198 goto out_free;
1201 unix_state_rlock(other);
1202 err = -EPERM;
1203 if (!unix_may_send(sk, other))
1204 goto out_unlock;
1206 if (other->dead) {
1208 * Check with 1003.1g - what should
1209 * datagram error
1211 unix_state_runlock(other);
1212 sock_put(other);
1214 err = 0;
1215 unix_state_wlock(sk);
1216 if (unix_peer(sk) == other) {
1217 unix_peer(sk)=NULL;
1218 unix_state_wunlock(sk);
1220 sock_put(other);
1221 err = -ECONNREFUSED;
1222 } else {
1223 unix_state_wunlock(sk);
1226 other = NULL;
1227 if (err)
1228 goto out_free;
1229 goto restart;
1232 err = -EPIPE;
1233 if (other->shutdown&RCV_SHUTDOWN)
1234 goto out_unlock;
1236 if (skb_queue_len(&other->receive_queue) > other->max_ack_backlog) {
1237 if (!timeo) {
1238 err = -EAGAIN;
1239 goto out_unlock;
1242 timeo = unix_wait_for_peer(other, timeo);
1244 err = sock_intr_errno(timeo);
1245 if (signal_pending(current))
1246 goto out_free;
1248 goto restart;
1251 skb_queue_tail(&other->receive_queue, skb);
1252 unix_state_runlock(other);
1253 other->data_ready(other, len);
1254 sock_put(other);
1255 return len;
1257 out_unlock:
1258 unix_state_runlock(other);
1259 out_free:
1260 kfree_skb(skb);
1261 out:
1262 if (other)
1263 sock_put(other);
1264 return err;
1268 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
1269 struct scm_cookie *scm)
1271 struct sock *sk = sock->sk;
1272 unix_socket *other = NULL;
1273 struct sockaddr_un *sunaddr=msg->msg_name;
1274 int err,size;
1275 struct sk_buff *skb;
1276 int limit=0;
1277 int sent=0;
1279 err = -EOPNOTSUPP;
1280 if (msg->msg_flags&MSG_OOB)
1281 goto out_err;
1283 if (msg->msg_namelen) {
1284 err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP);
1285 goto out_err;
1286 } else {
1287 sunaddr = NULL;
1288 err = -ENOTCONN;
1289 other = unix_peer_get(sk);
1290 if (!other)
1291 goto out_err;
1294 if (sk->shutdown&SEND_SHUTDOWN)
1295 goto pipe_err;
1297 while(sent < len)
1300 * Optimisation for the fact that under 0.01% of X messages typically
1301 * need breaking up.
1304 size=len-sent;
1306 /* Keep two messages in the pipe so it schedules better */
1307 if (size > sk->sndbuf/2 - 16)
1308 size = sk->sndbuf/2 - 16;
1311 * Keep to page sized kmalloc()'s as various people
1312 * have suggested. Big mallocs stress the vm too
1313 * much.
1316 if (size > PAGE_SIZE-16)
1317 limit = PAGE_SIZE-16; /* Fall back to a page if we can't grab a big buffer this instant */
1318 else
1319 limit = 0; /* Otherwise just grab and wait */
1322 * Grab a buffer
1325 skb=sock_alloc_send_skb(sk,size,limit,msg->msg_flags&MSG_DONTWAIT, &err);
1327 if (skb==NULL)
1328 goto out_err;
1331 * If you pass two values to the sock_alloc_send_skb
1332 * it tries to grab the large buffer with GFP_BUFFER
1333 * (which can fail easily), and if it fails grab the
1334 * fallback size buffer which is under a page and will
1335 * succeed. [Alan]
1337 size = min(size, skb_tailroom(skb));
1339 memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
1340 if (scm->fp)
1341 unix_attach_fds(scm, skb);
1343 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1344 kfree_skb(skb);
1345 goto out_err;
1348 unix_state_rlock(other);
1350 if (other->dead || (other->shutdown & RCV_SHUTDOWN))
1351 goto pipe_err_free;
1353 skb_queue_tail(&other->receive_queue, skb);
1354 unix_state_runlock(other);
1355 other->data_ready(other, size);
1356 sent+=size;
1358 sock_put(other);
1359 return sent;
1361 pipe_err_free:
1362 unix_state_runlock(other);
1363 kfree_skb(skb);
1364 pipe_err:
1365 if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1366 send_sig(SIGPIPE,current,0);
1367 err = -EPIPE;
1368 out_err:
1369 if (other)
1370 sock_put(other);
1371 return sent ? : err;
1374 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1376 msg->msg_namelen = sizeof(short);
1377 if (sk->protinfo.af_unix.addr) {
1378 msg->msg_namelen=sk->protinfo.af_unix.addr->len;
1379 memcpy(msg->msg_name,
1380 sk->protinfo.af_unix.addr->name,
1381 sk->protinfo.af_unix.addr->len);
1385 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, int size,
1386 int flags, struct scm_cookie *scm)
1388 struct sock *sk = sock->sk;
1389 int noblock = flags & MSG_DONTWAIT;
1390 struct sk_buff *skb;
1391 int err;
1393 err = -EOPNOTSUPP;
1394 if (flags&MSG_OOB)
1395 goto out;
1397 msg->msg_namelen = 0;
1399 skb = skb_recv_datagram(sk, flags, noblock, &err);
1400 if (!skb)
1401 goto out;
1403 wake_up_interruptible(&sk->protinfo.af_unix.peer_wait);
1405 if (msg->msg_name)
1406 unix_copy_addr(msg, skb->sk);
1408 if (size > skb->len)
1409 size = skb->len;
1410 else if (size < skb->len)
1411 msg->msg_flags |= MSG_TRUNC;
1413 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1414 if (err)
1415 goto out_free;
1417 scm->creds = *UNIXCREDS(skb);
1419 if (!(flags & MSG_PEEK))
1421 if (UNIXCB(skb).fp)
1422 unix_detach_fds(scm, skb);
1424 else
1426 /* It is questionable: on PEEK we could:
1427 - do not return fds - good, but too simple 8)
1428 - return fds, and do not return them on read (old strategy,
1429 apparently wrong)
1430 - clone fds (I choosed it for now, it is the most universal
1431 solution)
1433 POSIX 1003.1g does not actually define this clearly
1434 at all. POSIX 1003.1g doesn't define a lot of things
1435 clearly however!
1438 if (UNIXCB(skb).fp)
1439 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1441 err = size;
1443 out_free:
1444 skb_free_datagram(sk,skb);
1445 out:
1446 return err;
1450 * Sleep until data has arrive. But check for races..
1453 static long unix_stream_data_wait(unix_socket * sk, long timeo)
1455 DECLARE_WAITQUEUE(wait, current);
1457 unix_state_rlock(sk);
1459 add_wait_queue(sk->sleep, &wait);
1461 for (;;) {
1462 set_current_state(TASK_INTERRUPTIBLE);
1464 if (skb_queue_len(&sk->receive_queue) ||
1465 sk->err ||
1466 (sk->shutdown & RCV_SHUTDOWN) ||
1467 signal_pending(current) ||
1468 !timeo)
1469 break;
1471 set_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
1472 unix_state_runlock(sk);
1473 timeo = schedule_timeout(timeo);
1474 unix_state_rlock(sk);
1475 clear_bit(SOCK_ASYNC_WAITDATA, &sk->socket->flags);
1478 __set_current_state(TASK_RUNNING);
1479 remove_wait_queue(sk->sleep, &wait);
1480 unix_state_runlock(sk);
1481 return timeo;
1486 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size,
1487 int flags, struct scm_cookie *scm)
1489 struct sock *sk = sock->sk;
1490 struct sockaddr_un *sunaddr=msg->msg_name;
1491 int copied = 0;
1492 int check_creds = 0;
1493 int target;
1494 int err = 0;
1495 long timeo;
1497 err = -EINVAL;
1498 if (sk->state != TCP_ESTABLISHED)
1499 goto out;
1501 err = -EOPNOTSUPP;
1502 if (flags&MSG_OOB)
1503 goto out;
1505 target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1506 timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1508 msg->msg_namelen = 0;
1510 /* Lock the socket to prevent queue disordering
1511 * while sleeps in memcpy_tomsg
1514 down(&sk->protinfo.af_unix.readsem);
1518 int chunk;
1519 struct sk_buff *skb;
1521 skb=skb_dequeue(&sk->receive_queue);
1522 if (skb==NULL)
1524 if (copied >= target)
1525 break;
1528 * POSIX 1003.1g mandates this order.
1531 if ((err = sock_error(sk)) != 0)
1532 break;
1533 if (sk->shutdown & RCV_SHUTDOWN)
1534 break;
1535 err = -EAGAIN;
1536 if (!timeo)
1537 break;
1538 up(&sk->protinfo.af_unix.readsem);
1540 timeo = unix_stream_data_wait(sk, timeo);
1542 if (signal_pending(current)) {
1543 err = sock_intr_errno(timeo);
1544 goto out;
1546 down(&sk->protinfo.af_unix.readsem);
1547 continue;
1550 if (check_creds) {
1551 /* Never glue messages from different writers */
1552 if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) {
1553 skb_queue_head(&sk->receive_queue, skb);
1554 break;
1556 } else {
1557 /* Copy credentials */
1558 scm->creds = *UNIXCREDS(skb);
1559 check_creds = 1;
1562 /* Copy address just once */
1563 if (sunaddr)
1565 unix_copy_addr(msg, skb->sk);
1566 sunaddr = NULL;
1569 chunk = min(skb->len, size);
1570 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1571 skb_queue_head(&sk->receive_queue, skb);
1572 if (copied == 0)
1573 copied = -EFAULT;
1574 break;
1576 copied += chunk;
1577 size -= chunk;
1579 /* Mark read part of skb as used */
1580 if (!(flags & MSG_PEEK))
1582 skb_pull(skb, chunk);
1584 if (UNIXCB(skb).fp)
1585 unix_detach_fds(scm, skb);
1587 /* put the skb back if we didn't use it up.. */
1588 if (skb->len)
1590 skb_queue_head(&sk->receive_queue, skb);
1591 break;
1594 kfree_skb(skb);
1596 if (scm->fp)
1597 break;
1599 else
1601 /* It is questionable, see note in unix_dgram_recvmsg.
1603 if (UNIXCB(skb).fp)
1604 scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1606 /* put message back and return */
1607 skb_queue_head(&sk->receive_queue, skb);
1608 break;
1610 } while (size);
1612 up(&sk->protinfo.af_unix.readsem);
1613 out:
1614 return copied ? : err;
1617 static int unix_shutdown(struct socket *sock, int mode)
1619 struct sock *sk = sock->sk;
1620 unix_socket *other;
1622 mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1624 if (mode) {
1625 unix_state_wlock(sk);
1626 sk->shutdown |= mode;
1627 other=unix_peer(sk);
1628 if (other)
1629 sock_hold(other);
1630 unix_state_wunlock(sk);
1631 sk->state_change(sk);
1633 if (other && sk->type == SOCK_STREAM) {
1634 int peer_mode = 0;
1636 if (mode&RCV_SHUTDOWN)
1637 peer_mode |= SEND_SHUTDOWN;
1638 if (mode&SEND_SHUTDOWN)
1639 peer_mode |= RCV_SHUTDOWN;
1640 unix_state_wlock(other);
1641 other->shutdown |= peer_mode;
1642 unix_state_wunlock(other);
1643 other->state_change(other);
1644 read_lock(&other->callback_lock);
1645 if (peer_mode == SHUTDOWN_MASK)
1646 sk_wake_async(other,1,POLL_HUP);
1647 else if (peer_mode & RCV_SHUTDOWN)
1648 sk_wake_async(other,1,POLL_IN);
1649 read_unlock(&other->callback_lock);
1651 if (other)
1652 sock_put(other);
1654 return 0;
1658 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1660 struct sock *sk = sock->sk;
1661 long amount=0;
1662 int err;
1664 switch(cmd)
1666 case SIOCOUTQ:
1667 amount = atomic_read(&sk->wmem_alloc);
1668 err = put_user(amount, (int *)arg);
1669 break;
1670 case SIOCINQ:
1672 struct sk_buff *skb;
1673 if (sk->state==TCP_LISTEN) {
1674 err = -EINVAL;
1675 break;
1678 spin_lock(&sk->receive_queue.lock);
1679 if((skb=skb_peek(&sk->receive_queue))!=NULL)
1680 amount=skb->len;
1681 spin_unlock(&sk->receive_queue.lock);
1682 err = put_user(amount, (int *)arg);
1683 break;
1686 default:
1687 err = -EINVAL;
1688 break;
1690 return err;
1693 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1695 struct sock *sk = sock->sk;
1696 unsigned int mask;
1698 poll_wait(file, sk->sleep, wait);
1699 mask = 0;
1701 /* exceptional events? */
1702 if (sk->err)
1703 mask |= POLLERR;
1704 if (sk->shutdown == SHUTDOWN_MASK)
1705 mask |= POLLHUP;
1707 /* readable? */
1708 if (!skb_queue_empty(&sk->receive_queue) || (sk->shutdown&RCV_SHUTDOWN))
1709 mask |= POLLIN | POLLRDNORM;
1711 /* Connection-based need to check for termination and startup */
1712 if (sk->type == SOCK_STREAM && sk->state==TCP_CLOSE)
1713 mask |= POLLHUP;
1716 * we set writable also when the other side has shut down the
1717 * connection. This prevents stuck sockets.
1719 if (unix_writable(sk))
1720 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1722 return mask;
1726 #ifdef CONFIG_PROC_FS
1727 static int unix_read_proc(char *buffer, char **start, off_t offset,
1728 int length, int *eof, void *data)
1730 off_t pos=0;
1731 off_t begin=0;
1732 int len=0;
1733 int i;
1734 unix_socket *s;
1736 len+= sprintf(buffer,"Num RefCount Protocol Flags Type St "
1737 "Inode Path\n");
1739 read_lock(&unix_table_lock);
1740 forall_unix_sockets (i,s)
1742 unix_state_rlock(s);
1744 len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5ld",
1746 atomic_read(&s->refcnt),
1748 s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
1749 s->type,
1750 s->socket ?
1751 (s->state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
1752 (s->state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
1753 s->socket ? s->socket->inode->i_ino : 0);
1755 if (s->protinfo.af_unix.addr)
1757 buffer[len++] = ' ';
1758 memcpy(buffer+len, s->protinfo.af_unix.addr->name->sun_path,
1759 s->protinfo.af_unix.addr->len-sizeof(short));
1760 if (!UNIX_ABSTRACT(s))
1761 len--;
1762 else
1763 buffer[len] = '@';
1764 len += s->protinfo.af_unix.addr->len - sizeof(short);
1766 unix_state_runlock(s);
1768 buffer[len++]='\n';
1770 pos = begin + len;
1771 if(pos<offset)
1773 len=0;
1774 begin=pos;
1776 if(pos>offset+length)
1777 goto done;
1779 *eof = 1;
1780 done:
1781 read_unlock(&unix_table_lock);
1782 *start=buffer+(offset-begin);
1783 len-=(offset-begin);
1784 if(len>length)
1785 len=length;
1786 if (len < 0)
1787 len = 0;
1788 return len;
1790 #endif
1792 struct proto_ops unix_stream_ops = {
1793 family: PF_UNIX,
1795 release: unix_release,
1796 bind: unix_bind,
1797 connect: unix_stream_connect,
1798 socketpair: unix_socketpair,
1799 accept: unix_accept,
1800 getname: unix_getname,
1801 poll: unix_poll,
1802 ioctl: unix_ioctl,
1803 listen: unix_listen,
1804 shutdown: unix_shutdown,
1805 setsockopt: sock_no_setsockopt,
1806 getsockopt: sock_no_getsockopt,
1807 sendmsg: unix_stream_sendmsg,
1808 recvmsg: unix_stream_recvmsg,
1809 mmap: sock_no_mmap,
1812 struct proto_ops unix_dgram_ops = {
1813 family: PF_UNIX,
1815 release: unix_release,
1816 bind: unix_bind,
1817 connect: unix_dgram_connect,
1818 socketpair: unix_socketpair,
1819 accept: sock_no_accept,
1820 getname: unix_getname,
1821 poll: datagram_poll,
1822 ioctl: unix_ioctl,
1823 listen: sock_no_listen,
1824 shutdown: unix_shutdown,
1825 setsockopt: sock_no_setsockopt,
1826 getsockopt: sock_no_getsockopt,
1827 sendmsg: unix_dgram_sendmsg,
1828 recvmsg: unix_dgram_recvmsg,
1829 mmap: sock_no_mmap,
1832 struct net_proto_family unix_family_ops = {
1833 PF_UNIX,
1834 unix_create
1837 #ifdef MODULE
1838 #ifdef CONFIG_SYSCTL
1839 extern void unix_sysctl_register(void);
1840 extern void unix_sysctl_unregister(void);
1841 #endif
1843 int init_module(void)
1844 #else
1845 void __init unix_proto_init(struct net_proto *pro)
1846 #endif
1848 struct sk_buff *dummy_skb;
1850 printk(KERN_INFO "NET4: Unix domain sockets 1.0/SMP for Linux NET4.0.\n");
1851 if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb))
1853 printk(KERN_CRIT "unix_proto_init: panic\n");
1854 #ifdef MODULE
1855 return -1;
1856 #else
1857 return;
1858 #endif
1860 sock_register(&unix_family_ops);
1861 #ifdef CONFIG_PROC_FS
1862 create_proc_read_entry("net/unix", 0, 0, unix_read_proc, NULL);
1863 #endif
1865 #ifdef MODULE
1866 #ifdef CONFIG_SYSCTL
1867 unix_sysctl_register();
1868 #endif
1870 return 0;
1871 #endif
1874 #ifdef MODULE
1875 void cleanup_module(void)
1877 sock_unregister(PF_UNIX);
1878 #ifdef CONFIG_SYSCTL
1879 unix_sysctl_unregister();
1880 #endif
1881 #ifdef CONFIG_PROC_FS
1882 remove_proc_entry("net/unix", 0);
1883 #endif
1885 #endif
1888 * Local variables:
1889 * compile-command: "gcc -g -D__KERNEL__ -Wall -O6 -I/usr/src/linux/include -c af_unix.c"
1890 * End: