net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan.cox@linux.org>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Version:     $Id: af_unix.c,v 1.133 2002/02/08 03:57:19 davem Exp $
  12  *
  13  * Fixes:
  14  *              Linus Torvalds  :       Assorted bug cures.
  15  *              Niibe Yutaka    :       async I/O support.
  16  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  17  *              Alan Cox        :       Limit size of allocated blocks.
  18  *              Alan Cox        :       Fixed the stupid socketpair bug.
  19  *              Alan Cox        :       BSD compatibility fine tuning.
  20  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  21  *              Alan Cox        :       Sorted out a proper draft version of
  22  *                                      file descriptor passing hacked up from
  23  *                                      Mike Shaver's work.
  24  *              Marty Leisner   :       Fixes to fd passing
  25  *              Nick Nevin      :       recvmsg bugfix.
  26  *              Alan Cox        :       Started proper garbage collector
  27  *              Heiko EiBfeldt  :       Missing verify_area check
  28  *              Alan Cox        :       Started POSIXisms
  29  *              Andreas Schwab  :       Replace inode by dentry for proper
  30  *                                      reference counting
  31  *              Kirk Petersen   :       Made this a module
  32  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  33  *                                      Lots of bug fixes.
  34  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  35  *                                      by above two patches.
  36  *           Andrea Arcangeli   :       If possible we block in connect(2)
  37  *                                      if the max backlog of the listen socket
  38  *                                      is been reached. This won't break
  39  *                                      old apps and it will avoid huge amount
  40  *                                      of socks hashed (this for unix_gc()
  41  *                                      performances reasons).
  42  *                                      Security fix that limits the max
  43  *                                      number of socks to 2*max_files and
  44  *                                      the number of skb queueable in the
  45  *                                      dgram receiver.
  46  *              Artur Skawina   :       Hash function optimizations
  47  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  48  *            Malcolm Beattie   :       Set peercred for socketpair
  49  *           Michal Ostrowski   :       Module initialization cleanup.
  50  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  51  *                                      the core infrastructure is doing that
  52  *                                      for all net proto families now (2.5.69+)
  53  *
  54  *
  55  * Known differences from reference BSD that was tested:
  56  *
  57  *      [TO FIX]
  58  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  59  *              other the moment one end closes.
  60  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  61  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  62  *      [NOT TO FIX]
  63  *      accept() returns a path name even if the connecting socket has closed
  64  *              in the meantime (BSD loses the path and gives up).
  65  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  66  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  67  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  68  *      BSD af_unix apparently has connect forgetting to block properly.
  69  *              (need to check this with the POSIX spec in detail)
  70  *
  71  * Differences from 2.0.0-11-... (ANK)
  72  *      Bug fixes and improvements.
  73  *              - client shutdown killed server socket.
  74  *              - removed all useless cli/sti pairs.
  75  *
  76  *      Semantic changes/extensions.
  77  *              - generic control message passing.
  78  *              - SCM_CREDENTIALS control message.
  79  *              - "Abstract" (not FS based) socket bindings.
  80  *                Abstract names are sequences of bytes (not zero terminated)
  81  *                started by 0, so that this name space does not intersect
  82  *                with BSD names.
  83  */
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <asm/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119
 120 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 121 static DEFINE_SPINLOCK(unix_table_lock);
 122 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 123
 124 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
 125
 126 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 127
 128 #ifdef CONFIG_SECURITY_NETWORK
 129 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 130 {
 131         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 132 }
 133
 134 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 135 {
 136         scm->secid = *UNIXSID(skb);
 137 }
 138 #else
 139 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 140 { }
 141
 142 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 143 { }
 144 #endif /* CONFIG_SECURITY_NETWORK */
 145
 146 /*
 147  *  SMP locking strategy:
 148  *    hash table is protected with spinlock unix_table_lock
 149  *    each socket state is protected by separate rwlock.
 150  */
 151
 152 static inline unsigned unix_hash_fold(__wsum n)
 153 {
 154         unsigned hash = (__force unsigned)n;
 155         hash ^= hash>>16;
 156         hash ^= hash>>8;
 157         return hash&(UNIX_HASH_SIZE-1);
 158 }
 159
 160 #define unix_peer(sk) (unix_sk(sk)->peer)
 161
 162 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 163 {
 164         return unix_peer(osk) == sk;
 165 }
 166
 167 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 168 {
 169         return (unix_peer(osk) == NULL || unix_our_peer(sk, osk));
 170 }
 171
 172 static struct sock *unix_peer_get(struct sock *s)
 173 {
 174         struct sock *peer;
 175
 176         unix_state_lock(s);
 177         peer = unix_peer(s);
 178         if (peer)
 179                 sock_hold(peer);
 180         unix_state_unlock(s);
 181         return peer;
 182 }
 183
 184 static inline void unix_release_addr(struct unix_address *addr)
 185 {
 186         if (atomic_dec_and_test(&addr->refcnt))
 187                 kfree(addr);
 188 }
 189
 190 /*
 191  *      Check unix socket name:
 192  *              - should be not zero length.
 193  *              - if started by not zero, should be NULL terminated (FS object)
 194  *              - if started by zero, it is abstract name.
 195  */
 196
 197 static int unix_mkname(struct sockaddr_un * sunaddr, int len, unsigned *hashp)
 198 {
 199         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 200                 return -EINVAL;
 201         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 202                 return -EINVAL;
 203         if (sunaddr->sun_path[0]) {
 204                 /*
 205                  * This may look like an off by one error but it is a bit more
 206                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 207                  * sun_path[108] doesnt as such exist.  However in kernel space
 208                  * we are guaranteed that it is a valid memory location in our
 209                  * kernel address buffer.
 210                  */
 211                 ((char *)sunaddr)[len]=0;
 212                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 213                 return len;
 214         }
 215
 216         *hashp = unix_hash_fold(csum_partial((char*)sunaddr, len, 0));
 217         return len;
 218 }
 219
 220 static void __unix_remove_socket(struct sock *sk)
 221 {
 222         sk_del_node_init(sk);
 223 }
 224
 225 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 226 {
 227         BUG_TRAP(sk_unhashed(sk));
 228         sk_add_node(sk, list);
 229 }
 230
 231 static inline void unix_remove_socket(struct sock *sk)
 232 {
 233         spin_lock(&unix_table_lock);
 234         __unix_remove_socket(sk);
 235         spin_unlock(&unix_table_lock);
 236 }
 237
 238 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 239 {
 240         spin_lock(&unix_table_lock);
 241         __unix_insert_socket(list, sk);
 242         spin_unlock(&unix_table_lock);
 243 }
 244
 245 static struct sock *__unix_find_socket_byname(struct net *net,
 246                                               struct sockaddr_un *sunname,
 247                                               int len, int type, unsigned hash)
 248 {
 249         struct sock *s;
 250         struct hlist_node *node;
 251
 252         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 253                 struct unix_sock *u = unix_sk(s);
 254
 255                 if (!net_eq(sock_net(s), net))
 256                         continue;
 257
 258                 if (u->addr->len == len &&
 259                     !memcmp(u->addr->name, sunname, len))
 260                         goto found;
 261         }
 262         s = NULL;
 263 found:
 264         return s;
 265 }
 266
 267 static inline struct sock *unix_find_socket_byname(struct net *net,
 268                                                    struct sockaddr_un *sunname,
 269                                                    int len, int type,
 270                                                    unsigned hash)
 271 {
 272         struct sock *s;
 273
 274         spin_lock(&unix_table_lock);
 275         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 276         if (s)
 277                 sock_hold(s);
 278         spin_unlock(&unix_table_lock);
 279         return s;
 280 }
 281
 282 static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i)
 283 {
 284         struct sock *s;
 285         struct hlist_node *node;
 286
 287         spin_lock(&unix_table_lock);
 288         sk_for_each(s, node,
 289                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 290                 struct dentry *dentry = unix_sk(s)->dentry;
 291
 292                 if (!net_eq(sock_net(s), net))
 293                         continue;
 294
 295                 if(dentry && dentry->d_inode == i)
 296                 {
 297                         sock_hold(s);
 298                         goto found;
 299                 }
 300         }
 301         s = NULL;
 302 found:
 303         spin_unlock(&unix_table_lock);
 304         return s;
 305 }
 306
 307 static inline int unix_writable(struct sock *sk)
 308 {
 309         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 310 }
 311
 312 static void unix_write_space(struct sock *sk)
 313 {
 314         read_lock(&sk->sk_callback_lock);
 315         if (unix_writable(sk)) {
 316                 if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
 317                         wake_up_interruptible_sync(sk->sk_sleep);
 318                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 319         }
 320         read_unlock(&sk->sk_callback_lock);
 321 }
 322
 323 /* When dgram socket disconnects (or changes its peer), we clear its receive
 324  * queue of packets arrived from previous peer. First, it allows to do
 325  * flow control based only on wmem_alloc; second, sk connected to peer
 326  * may receive messages only from that peer. */
 327 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 328 {
 329         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 330                 skb_queue_purge(&sk->sk_receive_queue);
 331                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 332
 333                 /* If one link of bidirectional dgram pipe is disconnected,
 334                  * we signal error. Messages are lost. Do not make this,
 335                  * when peer was not connected to us.
 336                  */
 337                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 338                         other->sk_err = ECONNRESET;
 339                         other->sk_error_report(other);
 340                 }
 341         }
 342 }
 343
 344 static void unix_sock_destructor(struct sock *sk)
 345 {
 346         struct unix_sock *u = unix_sk(sk);
 347
 348         skb_queue_purge(&sk->sk_receive_queue);
 349
 350         BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
 351         BUG_TRAP(sk_unhashed(sk));
 352         BUG_TRAP(!sk->sk_socket);
 353         if (!sock_flag(sk, SOCK_DEAD)) {
 354                 printk("Attempt to release alive unix socket: %p\n", sk);
 355                 return;
 356         }
 357
 358         if (u->addr)
 359                 unix_release_addr(u->addr);
 360
 361         atomic_dec(&unix_nr_socks);
 362 #ifdef UNIX_REFCNT_DEBUG
 363         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk, atomic_read(&unix_nr_socks));
 364 #endif
 365 }
 366
 367 static int unix_release_sock (struct sock *sk, int embrion)
 368 {
 369         struct unix_sock *u = unix_sk(sk);
 370         struct dentry *dentry;
 371         struct vfsmount *mnt;
 372         struct sock *skpair;
 373         struct sk_buff *skb;
 374         int state;
 375
 376         unix_remove_socket(sk);
 377
 378         /* Clear state */
 379         unix_state_lock(sk);
 380         sock_orphan(sk);
 381         sk->sk_shutdown = SHUTDOWN_MASK;
 382         dentry       = u->dentry;
 383         u->dentry    = NULL;
 384         mnt          = u->mnt;
 385         u->mnt       = NULL;
 386         state = sk->sk_state;
 387         sk->sk_state = TCP_CLOSE;
 388         unix_state_unlock(sk);
 389
 390         wake_up_interruptible_all(&u->peer_wait);
 391
 392         skpair=unix_peer(sk);
 393
 394         if (skpair!=NULL) {
 395                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 396                         unix_state_lock(skpair);
 397                         /* No more writes */
 398                         skpair->sk_shutdown = SHUTDOWN_MASK;
 399                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 400                                 skpair->sk_err = ECONNRESET;
 401                         unix_state_unlock(skpair);
 402                         skpair->sk_state_change(skpair);
 403                         read_lock(&skpair->sk_callback_lock);
 404                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 405                         read_unlock(&skpair->sk_callback_lock);
 406                 }
 407                 sock_put(skpair); /* It may now die */
 408                 unix_peer(sk) = NULL;
 409         }
 410
 411         /* Try to flush out this socket. Throw out buffers at least */
 412
 413         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 414                 if (state==TCP_LISTEN)
 415                         unix_release_sock(skb->sk, 1);
 416                 /* passed fds are erased in the kfree_skb hook        */
 417                 kfree_skb(skb);
 418         }
 419
 420         if (dentry) {
 421                 dput(dentry);
 422                 mntput(mnt);
 423         }
 424
 425         sock_put(sk);
 426
 427         /* ---- Socket is dead now and most probably destroyed ---- */
 428
 429         /*
 430          * Fixme: BSD difference: In BSD all sockets connected to use get
 431          *        ECONNRESET and we die on the spot. In Linux we behave
 432          *        like files and pipes do and wait for the last
 433          *        dereference.
 434          *
 435          * Can't we simply set sock->err?
 436          *
 437          *        What the above comment does talk about? --ANK(980817)
 438          */
 439
 440         if (unix_tot_inflight)
 441                 unix_gc();              /* Garbage collect fds */
 442
 443         return 0;
 444 }
 445
 446 static int unix_listen(struct socket *sock, int backlog)
 447 {
 448         int err;
 449         struct sock *sk = sock->sk;
 450         struct unix_sock *u = unix_sk(sk);
 451
 452         err = -EOPNOTSUPP;
 453         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
 454                 goto out;                       /* Only stream/seqpacket sockets accept */
 455         err = -EINVAL;
 456         if (!u->addr)
 457                 goto out;                       /* No listens on an unbound socket */
 458         unix_state_lock(sk);
 459         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 460                 goto out_unlock;
 461         if (backlog > sk->sk_max_ack_backlog)
 462                 wake_up_interruptible_all(&u->peer_wait);
 463         sk->sk_max_ack_backlog  = backlog;
 464         sk->sk_state            = TCP_LISTEN;
 465         /* set credentials so connect can copy them */
 466         sk->sk_peercred.pid     = task_tgid_vnr(current);
 467         sk->sk_peercred.uid     = current->euid;
 468         sk->sk_peercred.gid     = current->egid;
 469         err = 0;
 470
 471 out_unlock:
 472         unix_state_unlock(sk);
 473 out:
 474         return err;
 475 }
 476
 477 static int unix_release(struct socket *);
 478 static int unix_bind(struct socket *, struct sockaddr *, int);
 479 static int unix_stream_connect(struct socket *, struct sockaddr *,
 480                                int addr_len, int flags);
 481 static int unix_socketpair(struct socket *, struct socket *);
 482 static int unix_accept(struct socket *, struct socket *, int);
 483 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 484 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 485 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 486 static int unix_shutdown(struct socket *, int);
 487 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 488                                struct msghdr *, size_t);
 489 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 490                                struct msghdr *, size_t, int);
 491 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 492                               struct msghdr *, size_t);
 493 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 494                               struct msghdr *, size_t, int);
 495 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 496                               int, int);
 497 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 498                                   struct msghdr *, size_t);
 499
 500 static const struct proto_ops unix_stream_ops = {
 501         .family =       PF_UNIX,
 502         .owner =        THIS_MODULE,
 503         .release =      unix_release,
 504         .bind =         unix_bind,
 505         .connect =      unix_stream_connect,
 506         .socketpair =   unix_socketpair,
 507         .accept =       unix_accept,
 508         .getname =      unix_getname,
 509         .poll =         unix_poll,
 510         .ioctl =        unix_ioctl,
 511         .listen =       unix_listen,
 512         .shutdown =     unix_shutdown,
 513         .setsockopt =   sock_no_setsockopt,
 514         .getsockopt =   sock_no_getsockopt,
 515         .sendmsg =      unix_stream_sendmsg,
 516         .recvmsg =      unix_stream_recvmsg,
 517         .mmap =         sock_no_mmap,
 518         .sendpage =     sock_no_sendpage,
 519 };
 520
 521 static const struct proto_ops unix_dgram_ops = {
 522         .family =       PF_UNIX,
 523         .owner =        THIS_MODULE,
 524         .release =      unix_release,
 525         .bind =         unix_bind,
 526         .connect =      unix_dgram_connect,
 527         .socketpair =   unix_socketpair,
 528         .accept =       sock_no_accept,
 529         .getname =      unix_getname,
 530         .poll =         datagram_poll,
 531         .ioctl =        unix_ioctl,
 532         .listen =       sock_no_listen,
 533         .shutdown =     unix_shutdown,
 534         .setsockopt =   sock_no_setsockopt,
 535         .getsockopt =   sock_no_getsockopt,
 536         .sendmsg =      unix_dgram_sendmsg,
 537         .recvmsg =      unix_dgram_recvmsg,
 538         .mmap =         sock_no_mmap,
 539         .sendpage =     sock_no_sendpage,
 540 };
 541
 542 static const struct proto_ops unix_seqpacket_ops = {
 543         .family =       PF_UNIX,
 544         .owner =        THIS_MODULE,
 545         .release =      unix_release,
 546         .bind =         unix_bind,
 547         .connect =      unix_stream_connect,
 548         .socketpair =   unix_socketpair,
 549         .accept =       unix_accept,
 550         .getname =      unix_getname,
 551         .poll =         datagram_poll,
 552         .ioctl =        unix_ioctl,
 553         .listen =       unix_listen,
 554         .shutdown =     unix_shutdown,
 555         .setsockopt =   sock_no_setsockopt,
 556         .getsockopt =   sock_no_getsockopt,
 557         .sendmsg =      unix_seqpacket_sendmsg,
 558         .recvmsg =      unix_dgram_recvmsg,
 559         .mmap =         sock_no_mmap,
 560         .sendpage =     sock_no_sendpage,
 561 };
 562
 563 static struct proto unix_proto = {
 564         .name     = "UNIX",
 565         .owner    = THIS_MODULE,
 566         .obj_size = sizeof(struct unix_sock),
 567 };
 568
 569 /*
 570  * AF_UNIX sockets do not interact with hardware, hence they
 571  * dont trigger interrupts - so it's safe for them to have
 572  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 573  * this special lock-class by reinitializing the spinlock key:
 574  */
 575 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 576
 577 static struct sock * unix_create1(struct net *net, struct socket *sock)
 578 {
 579         struct sock *sk = NULL;
 580         struct unix_sock *u;
 581
 582         atomic_inc(&unix_nr_socks);
 583         if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
 584                 goto out;
 585
 586         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 587         if (!sk)
 588                 goto out;
 589
 590         sock_init_data(sock,sk);
 591         lockdep_set_class(&sk->sk_receive_queue.lock,
 592                                 &af_unix_sk_receive_queue_lock_key);
 593
 594         sk->sk_write_space      = unix_write_space;
 595         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 596         sk->sk_destruct         = unix_sock_destructor;
 597         u         = unix_sk(sk);
 598         u->dentry = NULL;
 599         u->mnt    = NULL;
 600         spin_lock_init(&u->lock);
 601         atomic_set(&u->inflight, 0);
 602         INIT_LIST_HEAD(&u->link);
 603         mutex_init(&u->readlock); /* single task reading lock */
 604         init_waitqueue_head(&u->peer_wait);
 605         unix_insert_socket(unix_sockets_unbound, sk);
 606 out:
 607         if (sk == NULL)
 608                 atomic_dec(&unix_nr_socks);
 609         return sk;
 610 }
 611
 612 static int unix_create(struct net *net, struct socket *sock, int protocol)
 613 {
 614         if (protocol && protocol != PF_UNIX)
 615                 return -EPROTONOSUPPORT;
 616
 617         sock->state = SS_UNCONNECTED;
 618
 619         switch (sock->type) {
 620         case SOCK_STREAM:
 621                 sock->ops = &unix_stream_ops;
 622                 break;
 623                 /*
 624                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 625                  *      nothing uses it.
 626                  */
 627         case SOCK_RAW:
 628                 sock->type=SOCK_DGRAM;
 629         case SOCK_DGRAM:
 630                 sock->ops = &unix_dgram_ops;
 631                 break;
 632         case SOCK_SEQPACKET:
 633                 sock->ops = &unix_seqpacket_ops;
 634                 break;
 635         default:
 636                 return -ESOCKTNOSUPPORT;
 637         }
 638
 639         return unix_create1(net, sock) ? 0 : -ENOMEM;
 640 }
 641
 642 static int unix_release(struct socket *sock)
 643 {
 644         struct sock *sk = sock->sk;
 645
 646         if (!sk)
 647                 return 0;
 648
 649         sock->sk = NULL;
 650
 651         return unix_release_sock (sk, 0);
 652 }
 653
 654 static int unix_autobind(struct socket *sock)
 655 {
 656         struct sock *sk = sock->sk;
 657         struct net *net = sock_net(sk);
 658         struct unix_sock *u = unix_sk(sk);
 659         static u32 ordernum = 1;
 660         struct unix_address * addr;
 661         int err;
 662
 663         mutex_lock(&u->readlock);
 664
 665         err = 0;
 666         if (u->addr)
 667                 goto out;
 668
 669         err = -ENOMEM;
 670         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 671         if (!addr)
 672                 goto out;
 673
 674         addr->name->sun_family = AF_UNIX;
 675         atomic_set(&addr->refcnt, 1);
 676
 677 retry:
 678         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 679         addr->hash = unix_hash_fold(csum_partial((void*)addr->name, addr->len, 0));
 680
 681         spin_lock(&unix_table_lock);
 682         ordernum = (ordernum+1)&0xFFFFF;
 683
 684         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 685                                       addr->hash)) {
 686                 spin_unlock(&unix_table_lock);
 687                 /* Sanity yield. It is unusual case, but yet... */
 688                 if (!(ordernum&0xFF))
 689                         yield();
 690                 goto retry;
 691         }
 692         addr->hash ^= sk->sk_type;
 693
 694         __unix_remove_socket(sk);
 695         u->addr = addr;
 696         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 697         spin_unlock(&unix_table_lock);
 698         err = 0;
 699
 700 out:    mutex_unlock(&u->readlock);
 701         return err;
 702 }
 703
 704 static struct sock *unix_find_other(struct net *net,
 705                                     struct sockaddr_un *sunname, int len,
 706                                     int type, unsigned hash, int *error)
 707 {
 708         struct sock *u;
 709         struct nameidata nd;
 710         int err = 0;
 711
 712         if (sunname->sun_path[0]) {
 713                 err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
 714                 if (err)
 715                         goto fail;
 716                 err = vfs_permission(&nd, MAY_WRITE);
 717                 if (err)
 718                         goto put_fail;
 719
 720                 err = -ECONNREFUSED;
 721                 if (!S_ISSOCK(nd.path.dentry->d_inode->i_mode))
 722                         goto put_fail;
 723                 u = unix_find_socket_byinode(net, nd.path.dentry->d_inode);
 724                 if (!u)
 725                         goto put_fail;
 726
 727                 if (u->sk_type == type)
 728                         touch_atime(nd.path.mnt, nd.path.dentry);
 729
 730                 path_put(&nd.path);
 731
 732                 err=-EPROTOTYPE;
 733                 if (u->sk_type != type) {
 734                         sock_put(u);
 735                         goto fail;
 736                 }
 737         } else {
 738                 err = -ECONNREFUSED;
 739                 u=unix_find_socket_byname(net, sunname, len, type, hash);
 740                 if (u) {
 741                         struct dentry *dentry;
 742                         dentry = unix_sk(u)->dentry;
 743                         if (dentry)
 744                                 touch_atime(unix_sk(u)->mnt, dentry);
 745                 } else
 746                         goto fail;
 747         }
 748         return u;
 749
 750 put_fail:
 751         path_put(&nd.path);
 752 fail:
 753         *error=err;
 754         return NULL;
 755 }
 756
 757
 758 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 759 {
 760         struct sock *sk = sock->sk;
 761         struct net *net = sock_net(sk);
 762         struct unix_sock *u = unix_sk(sk);
 763         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
 764         struct dentry * dentry = NULL;
 765         struct nameidata nd;
 766         int err;
 767         unsigned hash;
 768         struct unix_address *addr;
 769         struct hlist_head *list;
 770
 771         err = -EINVAL;
 772         if (sunaddr->sun_family != AF_UNIX)
 773                 goto out;
 774
 775         if (addr_len==sizeof(short)) {
 776                 err = unix_autobind(sock);
 777                 goto out;
 778         }
 779
 780         err = unix_mkname(sunaddr, addr_len, &hash);
 781         if (err < 0)
 782                 goto out;
 783         addr_len = err;
 784
 785         mutex_lock(&u->readlock);
 786
 787         err = -EINVAL;
 788         if (u->addr)
 789                 goto out_up;
 790
 791         err = -ENOMEM;
 792         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 793         if (!addr)
 794                 goto out_up;
 795
 796         memcpy(addr->name, sunaddr, addr_len);
 797         addr->len = addr_len;
 798         addr->hash = hash ^ sk->sk_type;
 799         atomic_set(&addr->refcnt, 1);
 800
 801         if (sunaddr->sun_path[0]) {
 802                 unsigned int mode;
 803                 err = 0;
 804                 /*
 805                  * Get the parent directory, calculate the hash for last
 806                  * component.
 807                  */
 808                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
 809                 if (err)
 810                         goto out_mknod_parent;
 811
 812                 dentry = lookup_create(&nd, 0);
 813                 err = PTR_ERR(dentry);
 814                 if (IS_ERR(dentry))
 815                         goto out_mknod_unlock;
 816
 817                 /*
 818                  * All right, let's create it.
 819                  */
 820                 mode = S_IFSOCK |
 821                        (SOCK_INODE(sock)->i_mode & ~current->fs->umask);
 822                 err = mnt_want_write(nd.path.mnt);
 823                 if (err)
 824                         goto out_mknod_dput;
 825                 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
 826                 mnt_drop_write(nd.path.mnt);
 827                 if (err)
 828                         goto out_mknod_dput;
 829                 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 830                 dput(nd.path.dentry);
 831                 nd.path.dentry = dentry;
 832
 833                 addr->hash = UNIX_HASH_SIZE;
 834         }
 835
 836         spin_lock(&unix_table_lock);
 837
 838         if (!sunaddr->sun_path[0]) {
 839                 err = -EADDRINUSE;
 840                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
 841                                               sk->sk_type, hash)) {
 842                         unix_release_addr(addr);
 843                         goto out_unlock;
 844                 }
 845
 846                 list = &unix_socket_table[addr->hash];
 847         } else {
 848                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
 849                 u->dentry = nd.path.dentry;
 850                 u->mnt    = nd.path.mnt;
 851         }
 852
 853         err = 0;
 854         __unix_remove_socket(sk);
 855         u->addr = addr;
 856         __unix_insert_socket(list, sk);
 857
 858 out_unlock:
 859         spin_unlock(&unix_table_lock);
 860 out_up:
 861         mutex_unlock(&u->readlock);
 862 out:
 863         return err;
 864
 865 out_mknod_dput:
 866         dput(dentry);
 867 out_mknod_unlock:
 868         mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 869         path_put(&nd.path);
 870 out_mknod_parent:
 871         if (err==-EEXIST)
 872                 err=-EADDRINUSE;
 873         unix_release_addr(addr);
 874         goto out_up;
 875 }
 876
 877 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 878 {
 879         if (unlikely(sk1 == sk2) || !sk2) {
 880                 unix_state_lock(sk1);
 881                 return;
 882         }
 883         if (sk1 < sk2) {
 884                 unix_state_lock(sk1);
 885                 unix_state_lock_nested(sk2);
 886         } else {
 887                 unix_state_lock(sk2);
 888                 unix_state_lock_nested(sk1);
 889         }
 890 }
 891
 892 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
 893 {
 894         if (unlikely(sk1 == sk2) || !sk2) {
 895                 unix_state_unlock(sk1);
 896                 return;
 897         }
 898         unix_state_unlock(sk1);
 899         unix_state_unlock(sk2);
 900 }
 901
 902 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 903                               int alen, int flags)
 904 {
 905         struct sock *sk = sock->sk;
 906         struct net *net = sock_net(sk);
 907         struct sockaddr_un *sunaddr=(struct sockaddr_un*)addr;
 908         struct sock *other;
 909         unsigned hash;
 910         int err;
 911
 912         if (addr->sa_family != AF_UNSPEC) {
 913                 err = unix_mkname(sunaddr, alen, &hash);
 914                 if (err < 0)
 915                         goto out;
 916                 alen = err;
 917
 918                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
 919                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 920                         goto out;
 921
 922 restart:
 923                 other=unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 924                 if (!other)
 925                         goto out;
 926
 927                 unix_state_double_lock(sk, other);
 928
 929                 /* Apparently VFS overslept socket death. Retry. */
 930                 if (sock_flag(other, SOCK_DEAD)) {
 931                         unix_state_double_unlock(sk, other);
 932                         sock_put(other);
 933                         goto restart;
 934                 }
 935
 936                 err = -EPERM;
 937                 if (!unix_may_send(sk, other))
 938                         goto out_unlock;
 939
 940                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
 941                 if (err)
 942                         goto out_unlock;
 943
 944         } else {
 945                 /*
 946                  *      1003.1g breaking connected state with AF_UNSPEC
 947                  */
 948                 other = NULL;
 949                 unix_state_double_lock(sk, other);
 950         }
 951
 952         /*
 953          * If it was connected, reconnect.
 954          */
 955         if (unix_peer(sk)) {
 956                 struct sock *old_peer = unix_peer(sk);
 957                 unix_peer(sk)=other;
 958                 unix_state_double_unlock(sk, other);
 959
 960                 if (other != old_peer)
 961                         unix_dgram_disconnected(sk, old_peer);
 962                 sock_put(old_peer);
 963         } else {
 964                 unix_peer(sk)=other;
 965                 unix_state_double_unlock(sk, other);
 966         }
 967         return 0;
 968
 969 out_unlock:
 970         unix_state_double_unlock(sk, other);
 971         sock_put(other);
 972 out:
 973         return err;
 974 }
 975
 976 static long unix_wait_for_peer(struct sock *other, long timeo)
 977 {
 978         struct unix_sock *u = unix_sk(other);
 979         int sched;
 980         DEFINE_WAIT(wait);
 981
 982         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
 983
 984         sched = !sock_flag(other, SOCK_DEAD) &&
 985                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
 986                 (skb_queue_len(&other->sk_receive_queue) >
 987                  other->sk_max_ack_backlog);
 988
 989         unix_state_unlock(other);
 990
 991         if (sched)
 992                 timeo = schedule_timeout(timeo);
 993
 994         finish_wait(&u->peer_wait, &wait);
 995         return timeo;
 996 }
 997
 998 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
 999                                int addr_len, int flags)
1000 {
1001         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1002         struct sock *sk = sock->sk;
1003         struct net *net = sock_net(sk);
1004         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1005         struct sock *newsk = NULL;
1006         struct sock *other = NULL;
1007         struct sk_buff *skb = NULL;
1008         unsigned hash;
1009         int st;
1010         int err;
1011         long timeo;
1012
1013         err = unix_mkname(sunaddr, addr_len, &hash);
1014         if (err < 0)
1015                 goto out;
1016         addr_len = err;
1017
1018         if (test_bit(SOCK_PASSCRED, &sock->flags)
1019                 && !u->addr && (err = unix_autobind(sock)) != 0)
1020                 goto out;
1021
1022         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1023
1024         /* First of all allocate resources.
1025            If we will make it after state is locked,
1026            we will have to recheck all again in any case.
1027          */
1028
1029         err = -ENOMEM;
1030
1031         /* create new sock for complete connection */
1032         newsk = unix_create1(sock_net(sk), NULL);
1033         if (newsk == NULL)
1034                 goto out;
1035
1036         /* Allocate skb for sending to listening sock */
1037         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1038         if (skb == NULL)
1039                 goto out;
1040
1041 restart:
1042         /*  Find listening sock. */
1043         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1044         if (!other)
1045                 goto out;
1046
1047         /* Latch state of peer */
1048         unix_state_lock(other);
1049
1050         /* Apparently VFS overslept socket death. Retry. */
1051         if (sock_flag(other, SOCK_DEAD)) {
1052                 unix_state_unlock(other);
1053                 sock_put(other);
1054                 goto restart;
1055         }
1056
1057         err = -ECONNREFUSED;
1058         if (other->sk_state != TCP_LISTEN)
1059                 goto out_unlock;
1060
1061         if (skb_queue_len(&other->sk_receive_queue) >
1062             other->sk_max_ack_backlog) {
1063                 err = -EAGAIN;
1064                 if (!timeo)
1065                         goto out_unlock;
1066
1067                 timeo = unix_wait_for_peer(other, timeo);
1068
1069                 err = sock_intr_errno(timeo);
1070                 if (signal_pending(current))
1071                         goto out;
1072                 sock_put(other);
1073                 goto restart;
1074         }
1075
1076         /* Latch our state.
1077
1078            It is tricky place. We need to grab write lock and cannot
1079            drop lock on peer. It is dangerous because deadlock is
1080            possible. Connect to self case and simultaneous
1081            attempt to connect are eliminated by checking socket
1082            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1083            check this before attempt to grab lock.
1084
1085            Well, and we have to recheck the state after socket locked.
1086          */
1087         st = sk->sk_state;
1088
1089         switch (st) {
1090         case TCP_CLOSE:
1091                 /* This is ok... continue with connect */
1092                 break;
1093         case TCP_ESTABLISHED:
1094                 /* Socket is already connected */
1095                 err = -EISCONN;
1096                 goto out_unlock;
1097         default:
1098                 err = -EINVAL;
1099                 goto out_unlock;
1100         }
1101
1102         unix_state_lock_nested(sk);
1103
1104         if (sk->sk_state != st) {
1105                 unix_state_unlock(sk);
1106                 unix_state_unlock(other);
1107                 sock_put(other);
1108                 goto restart;
1109         }
1110
1111         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1112         if (err) {
1113                 unix_state_unlock(sk);
1114                 goto out_unlock;
1115         }
1116
1117         /* The way is open! Fastly set all the necessary fields... */
1118
1119         sock_hold(sk);
1120         unix_peer(newsk)        = sk;
1121         newsk->sk_state         = TCP_ESTABLISHED;
1122         newsk->sk_type          = sk->sk_type;
1123         newsk->sk_peercred.pid  = task_tgid_vnr(current);
1124         newsk->sk_peercred.uid  = current->euid;
1125         newsk->sk_peercred.gid  = current->egid;
1126         newu = unix_sk(newsk);
1127         newsk->sk_sleep         = &newu->peer_wait;
1128         otheru = unix_sk(other);
1129
1130         /* copy address information from listening to new sock*/
1131         if (otheru->addr) {
1132                 atomic_inc(&otheru->addr->refcnt);
1133                 newu->addr = otheru->addr;
1134         }
1135         if (otheru->dentry) {
1136                 newu->dentry    = dget(otheru->dentry);
1137                 newu->mnt       = mntget(otheru->mnt);
1138         }
1139
1140         /* Set credentials */
1141         sk->sk_peercred = other->sk_peercred;
1142
1143         sock->state     = SS_CONNECTED;
1144         sk->sk_state    = TCP_ESTABLISHED;
1145         sock_hold(newsk);
1146
1147         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1148         unix_peer(sk)   = newsk;
1149
1150         unix_state_unlock(sk);
1151
1152         /* take ten and and send info to listening sock */
1153         spin_lock(&other->sk_receive_queue.lock);
1154         __skb_queue_tail(&other->sk_receive_queue, skb);
1155         spin_unlock(&other->sk_receive_queue.lock);
1156         unix_state_unlock(other);
1157         other->sk_data_ready(other, 0);
1158         sock_put(other);
1159         return 0;
1160
1161 out_unlock:
1162         if (other)
1163                 unix_state_unlock(other);
1164
1165 out:
1166         if (skb)
1167                 kfree_skb(skb);
1168         if (newsk)
1169                 unix_release_sock(newsk, 0);
1170         if (other)
1171                 sock_put(other);
1172         return err;
1173 }
1174
1175 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1176 {
1177         struct sock *ska=socka->sk, *skb = sockb->sk;
1178
1179         /* Join our sockets back to back */
1180         sock_hold(ska);
1181         sock_hold(skb);
1182         unix_peer(ska)=skb;
1183         unix_peer(skb)=ska;
1184         ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current);
1185         ska->sk_peercred.uid = skb->sk_peercred.uid = current->euid;
1186         ska->sk_peercred.gid = skb->sk_peercred.gid = current->egid;
1187
1188         if (ska->sk_type != SOCK_DGRAM) {
1189                 ska->sk_state = TCP_ESTABLISHED;
1190                 skb->sk_state = TCP_ESTABLISHED;
1191                 socka->state  = SS_CONNECTED;
1192                 sockb->state  = SS_CONNECTED;
1193         }
1194         return 0;
1195 }
1196
1197 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1198 {
1199         struct sock *sk = sock->sk;
1200         struct sock *tsk;
1201         struct sk_buff *skb;
1202         int err;
1203
1204         err = -EOPNOTSUPP;
1205         if (sock->type!=SOCK_STREAM && sock->type!=SOCK_SEQPACKET)
1206                 goto out;
1207
1208         err = -EINVAL;
1209         if (sk->sk_state != TCP_LISTEN)
1210                 goto out;
1211
1212         /* If socket state is TCP_LISTEN it cannot change (for now...),
1213          * so that no locks are necessary.
1214          */
1215
1216         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1217         if (!skb) {
1218                 /* This means receive shutdown. */
1219                 if (err == 0)
1220                         err = -EINVAL;
1221                 goto out;
1222         }
1223
1224         tsk = skb->sk;
1225         skb_free_datagram(sk, skb);
1226         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1227
1228         /* attach accepted sock to socket */
1229         unix_state_lock(tsk);
1230         newsock->state = SS_CONNECTED;
1231         sock_graft(tsk, newsock);
1232         unix_state_unlock(tsk);
1233         return 0;
1234
1235 out:
1236         return err;
1237 }
1238
1239
1240 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1241 {
1242         struct sock *sk = sock->sk;
1243         struct unix_sock *u;
1244         struct sockaddr_un *sunaddr=(struct sockaddr_un *)uaddr;
1245         int err = 0;
1246
1247         if (peer) {
1248                 sk = unix_peer_get(sk);
1249
1250                 err = -ENOTCONN;
1251                 if (!sk)
1252                         goto out;
1253                 err = 0;
1254         } else {
1255                 sock_hold(sk);
1256         }
1257
1258         u = unix_sk(sk);
1259         unix_state_lock(sk);
1260         if (!u->addr) {
1261                 sunaddr->sun_family = AF_UNIX;
1262                 sunaddr->sun_path[0] = 0;
1263                 *uaddr_len = sizeof(short);
1264         } else {
1265                 struct unix_address *addr = u->addr;
1266
1267                 *uaddr_len = addr->len;
1268                 memcpy(sunaddr, addr->name, *uaddr_len);
1269         }
1270         unix_state_unlock(sk);
1271         sock_put(sk);
1272 out:
1273         return err;
1274 }
1275
1276 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1277 {
1278         int i;
1279
1280         scm->fp = UNIXCB(skb).fp;
1281         skb->destructor = sock_wfree;
1282         UNIXCB(skb).fp = NULL;
1283
1284         for (i=scm->fp->count-1; i>=0; i--)
1285                 unix_notinflight(scm->fp->fp[i]);
1286 }
1287
1288 static void unix_destruct_fds(struct sk_buff *skb)
1289 {
1290         struct scm_cookie scm;
1291         memset(&scm, 0, sizeof(scm));
1292         unix_detach_fds(&scm, skb);
1293
1294         /* Alas, it calls VFS */
1295         /* So fscking what? fput() had been SMP-safe since the last Summer */
1296         scm_destroy(&scm);
1297         sock_wfree(skb);
1298 }
1299
1300 static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1301 {
1302         int i;
1303         for (i=scm->fp->count-1; i>=0; i--)
1304                 unix_inflight(scm->fp->fp[i]);
1305         UNIXCB(skb).fp = scm->fp;
1306         skb->destructor = unix_destruct_fds;
1307         scm->fp = NULL;
1308 }
1309
1310 /*
1311  *      Send AF_UNIX data.
1312  */
1313
1314 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1315                               struct msghdr *msg, size_t len)
1316 {
1317         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1318         struct sock *sk = sock->sk;
1319         struct net *net = sock_net(sk);
1320         struct unix_sock *u = unix_sk(sk);
1321         struct sockaddr_un *sunaddr=msg->msg_name;
1322         struct sock *other = NULL;
1323         int namelen = 0; /* fake GCC */
1324         int err;
1325         unsigned hash;
1326         struct sk_buff *skb;
1327         long timeo;
1328         struct scm_cookie tmp_scm;
1329
1330         if (NULL == siocb->scm)
1331                 siocb->scm = &tmp_scm;
1332         err = scm_send(sock, msg, siocb->scm);
1333         if (err < 0)
1334                 return err;
1335
1336         err = -EOPNOTSUPP;
1337         if (msg->msg_flags&MSG_OOB)
1338                 goto out;
1339
1340         if (msg->msg_namelen) {
1341                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1342                 if (err < 0)
1343                         goto out;
1344                 namelen = err;
1345         } else {
1346                 sunaddr = NULL;
1347                 err = -ENOTCONN;
1348                 other = unix_peer_get(sk);
1349                 if (!other)
1350                         goto out;
1351         }
1352
1353         if (test_bit(SOCK_PASSCRED, &sock->flags)
1354                 && !u->addr && (err = unix_autobind(sock)) != 0)
1355                 goto out;
1356
1357         err = -EMSGSIZE;
1358         if (len > sk->sk_sndbuf - 32)
1359                 goto out;
1360
1361         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1362         if (skb==NULL)
1363                 goto out;
1364
1365         memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1366         if (siocb->scm->fp)
1367                 unix_attach_fds(siocb->scm, skb);
1368         unix_get_secdata(siocb->scm, skb);
1369
1370         skb_reset_transport_header(skb);
1371         err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
1372         if (err)
1373                 goto out_free;
1374
1375         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1376
1377 restart:
1378         if (!other) {
1379                 err = -ECONNRESET;
1380                 if (sunaddr == NULL)
1381                         goto out_free;
1382
1383                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1384                                         hash, &err);
1385                 if (other==NULL)
1386                         goto out_free;
1387         }
1388
1389         unix_state_lock(other);
1390         err = -EPERM;
1391         if (!unix_may_send(sk, other))
1392                 goto out_unlock;
1393
1394         if (sock_flag(other, SOCK_DEAD)) {
1395                 /*
1396                  *      Check with 1003.1g - what should
1397                  *      datagram error
1398                  */
1399                 unix_state_unlock(other);
1400                 sock_put(other);
1401
1402                 err = 0;
1403                 unix_state_lock(sk);
1404                 if (unix_peer(sk) == other) {
1405                         unix_peer(sk)=NULL;
1406                         unix_state_unlock(sk);
1407
1408                         unix_dgram_disconnected(sk, other);
1409                         sock_put(other);
1410                         err = -ECONNREFUSED;
1411                 } else {
1412                         unix_state_unlock(sk);
1413                 }
1414
1415                 other = NULL;
1416                 if (err)
1417                         goto out_free;
1418                 goto restart;
1419         }
1420
1421         err = -EPIPE;
1422         if (other->sk_shutdown & RCV_SHUTDOWN)
1423                 goto out_unlock;
1424
1425         if (sk->sk_type != SOCK_SEQPACKET) {
1426                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1427                 if (err)
1428                         goto out_unlock;
1429         }
1430
1431         if (unix_peer(other) != sk &&
1432             (skb_queue_len(&other->sk_receive_queue) >
1433              other->sk_max_ack_backlog)) {
1434                 if (!timeo) {
1435                         err = -EAGAIN;
1436                         goto out_unlock;
1437                 }
1438
1439                 timeo = unix_wait_for_peer(other, timeo);
1440
1441                 err = sock_intr_errno(timeo);
1442                 if (signal_pending(current))
1443                         goto out_free;
1444
1445                 goto restart;
1446         }
1447
1448         skb_queue_tail(&other->sk_receive_queue, skb);
1449         unix_state_unlock(other);
1450         other->sk_data_ready(other, len);
1451         sock_put(other);
1452         scm_destroy(siocb->scm);
1453         return len;
1454
1455 out_unlock:
1456         unix_state_unlock(other);
1457 out_free:
1458         kfree_skb(skb);
1459 out:
1460         if (other)
1461                 sock_put(other);
1462         scm_destroy(siocb->scm);
1463         return err;
1464 }
1465
1466
1467 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1468                                struct msghdr *msg, size_t len)
1469 {
1470         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1471         struct sock *sk = sock->sk;
1472         struct sock *other = NULL;
1473         struct sockaddr_un *sunaddr=msg->msg_name;
1474         int err,size;
1475         struct sk_buff *skb;
1476         int sent=0;
1477         struct scm_cookie tmp_scm;
1478
1479         if (NULL == siocb->scm)
1480                 siocb->scm = &tmp_scm;
1481         err = scm_send(sock, msg, siocb->scm);
1482         if (err < 0)
1483                 return err;
1484
1485         err = -EOPNOTSUPP;
1486         if (msg->msg_flags&MSG_OOB)
1487                 goto out_err;
1488
1489         if (msg->msg_namelen) {
1490                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1491                 goto out_err;
1492         } else {
1493                 sunaddr = NULL;
1494                 err = -ENOTCONN;
1495                 other = unix_peer(sk);
1496                 if (!other)
1497                         goto out_err;
1498         }
1499
1500         if (sk->sk_shutdown & SEND_SHUTDOWN)
1501                 goto pipe_err;
1502
1503         while(sent < len)
1504         {
1505                 /*
1506                  *      Optimisation for the fact that under 0.01% of X
1507                  *      messages typically need breaking up.
1508                  */
1509
1510                 size = len-sent;
1511
1512                 /* Keep two messages in the pipe so it schedules better */
1513                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1514                         size = (sk->sk_sndbuf >> 1) - 64;
1515
1516                 if (size > SKB_MAX_ALLOC)
1517                         size = SKB_MAX_ALLOC;
1518
1519                 /*
1520                  *      Grab a buffer
1521                  */
1522
1523                 skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
1524
1525                 if (skb==NULL)
1526                         goto out_err;
1527
1528                 /*
1529                  *      If you pass two values to the sock_alloc_send_skb
1530                  *      it tries to grab the large buffer with GFP_NOFS
1531                  *      (which can fail easily), and if it fails grab the
1532                  *      fallback size buffer which is under a page and will
1533                  *      succeed. [Alan]
1534                  */
1535                 size = min_t(int, size, skb_tailroom(skb));
1536
1537                 memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred));
1538                 if (siocb->scm->fp)
1539                         unix_attach_fds(siocb->scm, skb);
1540
1541                 if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
1542                         kfree_skb(skb);
1543                         goto out_err;
1544                 }
1545
1546                 unix_state_lock(other);
1547
1548                 if (sock_flag(other, SOCK_DEAD) ||
1549                     (other->sk_shutdown & RCV_SHUTDOWN))
1550                         goto pipe_err_free;
1551
1552                 skb_queue_tail(&other->sk_receive_queue, skb);
1553                 unix_state_unlock(other);
1554                 other->sk_data_ready(other, size);
1555                 sent+=size;
1556         }
1557
1558         scm_destroy(siocb->scm);
1559         siocb->scm = NULL;
1560
1561         return sent;
1562
1563 pipe_err_free:
1564         unix_state_unlock(other);
1565         kfree_skb(skb);
1566 pipe_err:
1567         if (sent==0 && !(msg->msg_flags&MSG_NOSIGNAL))
1568                 send_sig(SIGPIPE,current,0);
1569         err = -EPIPE;
1570 out_err:
1571         scm_destroy(siocb->scm);
1572         siocb->scm = NULL;
1573         return sent ? : err;
1574 }
1575
1576 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1577                                   struct msghdr *msg, size_t len)
1578 {
1579         int err;
1580         struct sock *sk = sock->sk;
1581
1582         err = sock_error(sk);
1583         if (err)
1584                 return err;
1585
1586         if (sk->sk_state != TCP_ESTABLISHED)
1587                 return -ENOTCONN;
1588
1589         if (msg->msg_namelen)
1590                 msg->msg_namelen = 0;
1591
1592         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1593 }
1594
1595 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1596 {
1597         struct unix_sock *u = unix_sk(sk);
1598
1599         msg->msg_namelen = 0;
1600         if (u->addr) {
1601                 msg->msg_namelen = u->addr->len;
1602                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1603         }
1604 }
1605
1606 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1607                               struct msghdr *msg, size_t size,
1608                               int flags)
1609 {
1610         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1611         struct scm_cookie tmp_scm;
1612         struct sock *sk = sock->sk;
1613         struct unix_sock *u = unix_sk(sk);
1614         int noblock = flags & MSG_DONTWAIT;
1615         struct sk_buff *skb;
1616         int err;
1617
1618         err = -EOPNOTSUPP;
1619         if (flags&MSG_OOB)
1620                 goto out;
1621
1622         msg->msg_namelen = 0;
1623
1624         mutex_lock(&u->readlock);
1625
1626         skb = skb_recv_datagram(sk, flags, noblock, &err);
1627         if (!skb) {
1628                 unix_state_lock(sk);
1629                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1630                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1631                     (sk->sk_shutdown & RCV_SHUTDOWN))
1632                         err = 0;
1633                 unix_state_unlock(sk);
1634                 goto out_unlock;
1635         }
1636
1637         wake_up_interruptible_sync(&u->peer_wait);
1638
1639         if (msg->msg_name)
1640                 unix_copy_addr(msg, skb->sk);
1641
1642         if (size > skb->len)
1643                 size = skb->len;
1644         else if (size < skb->len)
1645                 msg->msg_flags |= MSG_TRUNC;
1646
1647         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1648         if (err)
1649                 goto out_free;
1650
1651         if (!siocb->scm) {
1652                 siocb->scm = &tmp_scm;
1653                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1654         }
1655         siocb->scm->creds = *UNIXCREDS(skb);
1656         unix_set_secdata(siocb->scm, skb);
1657
1658         if (!(flags & MSG_PEEK))
1659         {
1660                 if (UNIXCB(skb).fp)
1661                         unix_detach_fds(siocb->scm, skb);
1662         }
1663         else
1664         {
1665                 /* It is questionable: on PEEK we could:
1666                    - do not return fds - good, but too simple 8)
1667                    - return fds, and do not return them on read (old strategy,
1668                      apparently wrong)
1669                    - clone fds (I chose it for now, it is the most universal
1670                      solution)
1671
1672                    POSIX 1003.1g does not actually define this clearly
1673                    at all. POSIX 1003.1g doesn't define a lot of things
1674                    clearly however!
1675
1676                 */
1677                 if (UNIXCB(skb).fp)
1678                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1679         }
1680         err = size;
1681
1682         scm_recv(sock, msg, siocb->scm, flags);
1683
1684 out_free:
1685         skb_free_datagram(sk,skb);
1686 out_unlock:
1687         mutex_unlock(&u->readlock);
1688 out:
1689         return err;
1690 }
1691
1692 /*
1693  *      Sleep until data has arrive. But check for races..
1694  */
1695
1696 static long unix_stream_data_wait(struct sock * sk, long timeo)
1697 {
1698         DEFINE_WAIT(wait);
1699
1700         unix_state_lock(sk);
1701
1702         for (;;) {
1703                 prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
1704
1705                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1706                     sk->sk_err ||
1707                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1708                     signal_pending(current) ||
1709                     !timeo)
1710                         break;
1711
1712                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1713                 unix_state_unlock(sk);
1714                 timeo = schedule_timeout(timeo);
1715                 unix_state_lock(sk);
1716                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1717         }
1718
1719         finish_wait(sk->sk_sleep, &wait);
1720         unix_state_unlock(sk);
1721         return timeo;
1722 }
1723
1724
1725
1726 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1727                                struct msghdr *msg, size_t size,
1728                                int flags)
1729 {
1730         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1731         struct scm_cookie tmp_scm;
1732         struct sock *sk = sock->sk;
1733         struct unix_sock *u = unix_sk(sk);
1734         struct sockaddr_un *sunaddr=msg->msg_name;
1735         int copied = 0;
1736         int check_creds = 0;
1737         int target;
1738         int err = 0;
1739         long timeo;
1740
1741         err = -EINVAL;
1742         if (sk->sk_state != TCP_ESTABLISHED)
1743                 goto out;
1744
1745         err = -EOPNOTSUPP;
1746         if (flags&MSG_OOB)
1747                 goto out;
1748
1749         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1750         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1751
1752         msg->msg_namelen = 0;
1753
1754         /* Lock the socket to prevent queue disordering
1755          * while sleeps in memcpy_tomsg
1756          */
1757
1758         if (!siocb->scm) {
1759                 siocb->scm = &tmp_scm;
1760                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1761         }
1762
1763         mutex_lock(&u->readlock);
1764
1765         do
1766         {
1767                 int chunk;
1768                 struct sk_buff *skb;
1769
1770                 unix_state_lock(sk);
1771                 skb = skb_dequeue(&sk->sk_receive_queue);
1772                 if (skb==NULL)
1773                 {
1774                         if (copied >= target)
1775                                 goto unlock;
1776
1777                         /*
1778                          *      POSIX 1003.1g mandates this order.
1779                          */
1780
1781                         if ((err = sock_error(sk)) != 0)
1782                                 goto unlock;
1783                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1784                                 goto unlock;
1785
1786                         unix_state_unlock(sk);
1787                         err = -EAGAIN;
1788                         if (!timeo)
1789                                 break;
1790                         mutex_unlock(&u->readlock);
1791
1792                         timeo = unix_stream_data_wait(sk, timeo);
1793
1794                         if (signal_pending(current)) {
1795                                 err = sock_intr_errno(timeo);
1796                                 goto out;
1797                         }
1798                         mutex_lock(&u->readlock);
1799                         continue;
1800  unlock:
1801                         unix_state_unlock(sk);
1802                         break;
1803                 }
1804                 unix_state_unlock(sk);
1805
1806                 if (check_creds) {
1807                         /* Never glue messages from different writers */
1808                         if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, sizeof(siocb->scm->creds)) != 0) {
1809                                 skb_queue_head(&sk->sk_receive_queue, skb);
1810                                 break;
1811                         }
1812                 } else {
1813                         /* Copy credentials */
1814                         siocb->scm->creds = *UNIXCREDS(skb);
1815                         check_creds = 1;
1816                 }
1817
1818                 /* Copy address just once */
1819                 if (sunaddr)
1820                 {
1821                         unix_copy_addr(msg, skb->sk);
1822                         sunaddr = NULL;
1823                 }
1824
1825                 chunk = min_t(unsigned int, skb->len, size);
1826                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1827                         skb_queue_head(&sk->sk_receive_queue, skb);
1828                         if (copied == 0)
1829                                 copied = -EFAULT;
1830                         break;
1831                 }
1832                 copied += chunk;
1833                 size -= chunk;
1834
1835                 /* Mark read part of skb as used */
1836                 if (!(flags & MSG_PEEK))
1837                 {
1838                         skb_pull(skb, chunk);
1839
1840                         if (UNIXCB(skb).fp)
1841                                 unix_detach_fds(siocb->scm, skb);
1842
1843                         /* put the skb back if we didn't use it up.. */
1844                         if (skb->len)
1845                         {
1846                                 skb_queue_head(&sk->sk_receive_queue, skb);
1847                                 break;
1848                         }
1849
1850                         kfree_skb(skb);
1851
1852                         if (siocb->scm->fp)
1853                                 break;
1854                 }
1855                 else
1856                 {
1857                         /* It is questionable, see note in unix_dgram_recvmsg.
1858                          */
1859                         if (UNIXCB(skb).fp)
1860                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1861
1862                         /* put message back and return */
1863                         skb_queue_head(&sk->sk_receive_queue, skb);
1864                         break;
1865                 }
1866         } while (size);
1867
1868         mutex_unlock(&u->readlock);
1869         scm_recv(sock, msg, siocb->scm, flags);
1870 out:
1871         return copied ? : err;
1872 }
1873
1874 static int unix_shutdown(struct socket *sock, int mode)
1875 {
1876         struct sock *sk = sock->sk;
1877         struct sock *other;
1878
1879         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1880
1881         if (mode) {
1882                 unix_state_lock(sk);
1883                 sk->sk_shutdown |= mode;
1884                 other=unix_peer(sk);
1885                 if (other)
1886                         sock_hold(other);
1887                 unix_state_unlock(sk);
1888                 sk->sk_state_change(sk);
1889
1890                 if (other &&
1891                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1892
1893                         int peer_mode = 0;
1894
1895                         if (mode&RCV_SHUTDOWN)
1896                                 peer_mode |= SEND_SHUTDOWN;
1897                         if (mode&SEND_SHUTDOWN)
1898                                 peer_mode |= RCV_SHUTDOWN;
1899                         unix_state_lock(other);
1900                         other->sk_shutdown |= peer_mode;
1901                         unix_state_unlock(other);
1902                         other->sk_state_change(other);
1903                         read_lock(&other->sk_callback_lock);
1904                         if (peer_mode == SHUTDOWN_MASK)
1905                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1906                         else if (peer_mode & RCV_SHUTDOWN)
1907                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1908                         read_unlock(&other->sk_callback_lock);
1909                 }
1910                 if (other)
1911                         sock_put(other);
1912         }
1913         return 0;
1914 }
1915
1916 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1917 {
1918         struct sock *sk = sock->sk;
1919         long amount=0;
1920         int err;
1921
1922         switch(cmd)
1923         {
1924                 case SIOCOUTQ:
1925                         amount = atomic_read(&sk->sk_wmem_alloc);
1926                         err = put_user(amount, (int __user *)arg);
1927                         break;
1928                 case SIOCINQ:
1929                 {
1930                         struct sk_buff *skb;
1931
1932                         if (sk->sk_state == TCP_LISTEN) {
1933                                 err = -EINVAL;
1934                                 break;
1935                         }
1936
1937                         spin_lock(&sk->sk_receive_queue.lock);
1938                         if (sk->sk_type == SOCK_STREAM ||
1939                             sk->sk_type == SOCK_SEQPACKET) {
1940                                 skb_queue_walk(&sk->sk_receive_queue, skb)
1941                                         amount += skb->len;
1942                         } else {
1943                                 skb = skb_peek(&sk->sk_receive_queue);
1944                                 if (skb)
1945                                         amount=skb->len;
1946                         }
1947                         spin_unlock(&sk->sk_receive_queue.lock);
1948                         err = put_user(amount, (int __user *)arg);
1949                         break;
1950                 }
1951
1952                 default:
1953                         err = -ENOIOCTLCMD;
1954                         break;
1955         }
1956         return err;
1957 }
1958
1959 static unsigned int unix_poll(struct file * file, struct socket *sock, poll_table *wait)
1960 {
1961         struct sock *sk = sock->sk;
1962         unsigned int mask;
1963
1964         poll_wait(file, sk->sk_sleep, wait);
1965         mask = 0;
1966
1967         /* exceptional events? */
1968         if (sk->sk_err)
1969                 mask |= POLLERR;
1970         if (sk->sk_shutdown == SHUTDOWN_MASK)
1971                 mask |= POLLHUP;
1972         if (sk->sk_shutdown & RCV_SHUTDOWN)
1973                 mask |= POLLRDHUP;
1974
1975         /* readable? */
1976         if (!skb_queue_empty(&sk->sk_receive_queue) ||
1977             (sk->sk_shutdown & RCV_SHUTDOWN))
1978                 mask |= POLLIN | POLLRDNORM;
1979
1980         /* Connection-based need to check for termination and startup */
1981         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) && sk->sk_state == TCP_CLOSE)
1982                 mask |= POLLHUP;
1983
1984         /*
1985          * we set writable also when the other side has shut down the
1986          * connection. This prevents stuck sockets.
1987          */
1988         if (unix_writable(sk))
1989                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
1990
1991         return mask;
1992 }
1993
1994
1995 #ifdef CONFIG_PROC_FS
1996 static struct sock *first_unix_socket(int *i)
1997 {
1998         for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
1999                 if (!hlist_empty(&unix_socket_table[*i]))
2000                         return __sk_head(&unix_socket_table[*i]);
2001         }
2002         return NULL;
2003 }
2004
2005 static struct sock *next_unix_socket(int *i, struct sock *s)
2006 {
2007         struct sock *next = sk_next(s);
2008         /* More in this chain? */
2009         if (next)
2010                 return next;
2011         /* Look for next non-empty chain. */
2012         for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2013                 if (!hlist_empty(&unix_socket_table[*i]))
2014                         return __sk_head(&unix_socket_table[*i]);
2015         }
2016         return NULL;
2017 }
2018
2019 struct unix_iter_state {
2020         struct seq_net_private p;
2021         int i;
2022 };
2023 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2024 {
2025         struct unix_iter_state *iter = seq->private;
2026         loff_t off = 0;
2027         struct sock *s;
2028
2029         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2030                 if (sock_net(s) != seq_file_net(seq))
2031                         continue;
2032                 if (off == pos)
2033                         return s;
2034                 ++off;
2035         }
2036         return NULL;
2037 }
2038
2039
2040 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2041         __acquires(unix_table_lock)
2042 {
2043         spin_lock(&unix_table_lock);
2044         return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2045 }
2046
2047 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2048 {
2049         struct unix_iter_state *iter = seq->private;
2050         struct sock *sk = v;
2051         ++*pos;
2052
2053         if (v == SEQ_START_TOKEN)
2054                 sk = first_unix_socket(&iter->i);
2055         else
2056                 sk = next_unix_socket(&iter->i, sk);
2057         while (sk && (sock_net(sk) != seq_file_net(seq)))
2058                 sk = next_unix_socket(&iter->i, sk);
2059         return sk;
2060 }
2061
2062 static void unix_seq_stop(struct seq_file *seq, void *v)
2063         __releases(unix_table_lock)
2064 {
2065         spin_unlock(&unix_table_lock);
2066 }
2067
2068 static int unix_seq_show(struct seq_file *seq, void *v)
2069 {
2070
2071         if (v == SEQ_START_TOKEN)
2072                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2073                          "Inode Path\n");
2074         else {
2075                 struct sock *s = v;
2076                 struct unix_sock *u = unix_sk(s);
2077                 unix_state_lock(s);
2078
2079                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2080                         s,
2081                         atomic_read(&s->sk_refcnt),
2082                         0,
2083                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2084                         s->sk_type,
2085                         s->sk_socket ?
2086                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2087                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2088                         sock_i_ino(s));
2089
2090                 if (u->addr) {
2091                         int i, len;
2092                         seq_putc(seq, ' ');
2093
2094                         i = 0;
2095                         len = u->addr->len - sizeof(short);
2096                         if (!UNIX_ABSTRACT(s))
2097                                 len--;
2098                         else {
2099                                 seq_putc(seq, '@');
2100                                 i++;
2101                         }
2102                         for ( ; i < len; i++)
2103                                 seq_putc(seq, u->addr->name->sun_path[i]);
2104                 }
2105                 unix_state_unlock(s);
2106                 seq_putc(seq, '\n');
2107         }
2108
2109         return 0;
2110 }
2111
2112 static const struct seq_operations unix_seq_ops = {
2113         .start  = unix_seq_start,
2114         .next   = unix_seq_next,
2115         .stop   = unix_seq_stop,
2116         .show   = unix_seq_show,
2117 };
2118
2119
2120 static int unix_seq_open(struct inode *inode, struct file *file)
2121 {
2122         return seq_open_net(inode, file, &unix_seq_ops,
2123                             sizeof(struct unix_iter_state));
2124 }
2125
2126 static const struct file_operations unix_seq_fops = {
2127         .owner          = THIS_MODULE,
2128         .open           = unix_seq_open,
2129         .read           = seq_read,
2130         .llseek         = seq_lseek,
2131         .release        = seq_release_net,
2132 };
2133
2134 #endif
2135
2136 static struct net_proto_family unix_family_ops = {
2137         .family = PF_UNIX,
2138         .create = unix_create,
2139         .owner  = THIS_MODULE,
2140 };
2141
2142
2143 static int unix_net_init(struct net *net)
2144 {
2145         int error = -ENOMEM;
2146
2147         net->unx.sysctl_max_dgram_qlen = 10;
2148         if (unix_sysctl_register(net))
2149                 goto out;
2150
2151 #ifdef CONFIG_PROC_FS
2152         if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2153                 unix_sysctl_unregister(net);
2154                 goto out;
2155         }
2156 #endif
2157         error = 0;
2158 out:
2159         return 0;
2160 }
2161
2162 static void unix_net_exit(struct net *net)
2163 {
2164         unix_sysctl_unregister(net);
2165         proc_net_remove(net, "unix");
2166 }
2167
2168 static struct pernet_operations unix_net_ops = {
2169         .init = unix_net_init,
2170         .exit = unix_net_exit,
2171 };
2172
2173 static int __init af_unix_init(void)
2174 {
2175         int rc = -1;
2176         struct sk_buff *dummy_skb;
2177
2178         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2179
2180         rc = proto_register(&unix_proto, 1);
2181         if (rc != 0) {
2182                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2183                        __func__);
2184                 goto out;
2185         }
2186
2187         sock_register(&unix_family_ops);
2188         register_pernet_subsys(&unix_net_ops);
2189 out:
2190         return rc;
2191 }
2192
2193 static void __exit af_unix_exit(void)
2194 {
2195         sock_unregister(PF_UNIX);
2196         proto_unregister(&unix_proto);
2197         unregister_pernet_subsys(&unix_net_ops);
2198 }
2199
2200 /* Earlier than device_initcall() so that other drivers invoking
2201    request_module() don't end up in a loop when modprobe tries
2202    to use a UNIX socket. But later than subsys_initcall() because
2203    we depend on stuff initialised there */
2204 fs_initcall(af_unix_init);
2205 module_exit(af_unix_exit);
2206
2207 MODULE_LICENSE("GPL");
2208 MODULE_ALIAS_NETPROTO(PF_UNIX);