release/src-rt-6.x.4708/linux/linux-2.6.36/net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #include <linux/module.h>
  84 #include <linux/kernel.h>
  85 #include <linux/signal.h>
  86 #include <linux/sched.h>
  87 #include <linux/errno.h>
  88 #include <linux/string.h>
  89 #include <linux/stat.h>
  90 #include <linux/dcache.h>
  91 #include <linux/namei.h>
  92 #include <linux/socket.h>
  93 #include <linux/un.h>
  94 #include <linux/fcntl.h>
  95 #include <linux/termios.h>
  96 #include <linux/sockios.h>
  97 #include <linux/net.h>
  98 #include <linux/in.h>
  99 #include <linux/fs.h>
 100 #include <linux/slab.h>
 101 #include <asm/uaccess.h>
 102 #include <linux/skbuff.h>
 103 #include <linux/netdevice.h>
 104 #include <net/net_namespace.h>
 105 #include <net/sock.h>
 106 #include <net/tcp_states.h>
 107 #include <net/af_unix.h>
 108 #include <linux/proc_fs.h>
 109 #include <linux/seq_file.h>
 110 #include <net/scm.h>
 111 #include <linux/init.h>
 112 #include <linux/poll.h>
 113 #include <linux/rtnetlink.h>
 114 #include <linux/mount.h>
 115 #include <net/checksum.h>
 116 #include <linux/security.h>
 117
 118 static struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1];
 119 static DEFINE_SPINLOCK(unix_table_lock);
 120 static atomic_t unix_nr_socks = ATOMIC_INIT(0);
 121
 122 #define unix_sockets_unbound    (&unix_socket_table[UNIX_HASH_SIZE])
 123
 124 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash != UNIX_HASH_SIZE)
 125
 126 #ifdef CONFIG_SECURITY_NETWORK
 127 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 128 {
 129         memcpy(UNIXSID(skb), &scm->secid, sizeof(u32));
 130 }
 131
 132 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 133 {
 134         scm->secid = *UNIXSID(skb);
 135 }
 136 #else
 137 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 138 { }
 139
 140 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 141 { }
 142 #endif /* CONFIG_SECURITY_NETWORK */
 143
 144 /*
 145  *  SMP locking strategy:
 146  *    hash table is protected with spinlock unix_table_lock
 147  *    each socket state is protected by separate spin lock.
 148  */
 149
 150 static inline unsigned unix_hash_fold(__wsum n)
 151 {
 152         unsigned hash = (__force unsigned)n;
 153         hash ^= hash>>16;
 154         hash ^= hash>>8;
 155         return hash&(UNIX_HASH_SIZE-1);
 156 }
 157
 158 #define unix_peer(sk) (unix_sk(sk)->peer)
 159
 160 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 161 {
 162         return unix_peer(osk) == sk;
 163 }
 164
 165 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 166 {
 167         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 168 }
 169
 170 static inline int unix_recvq_full(struct sock const *sk)
 171 {
 172         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 173 }
 174
 175 static struct sock *unix_peer_get(struct sock *s)
 176 {
 177         struct sock *peer;
 178
 179         unix_state_lock(s);
 180         peer = unix_peer(s);
 181         if (peer)
 182                 sock_hold(peer);
 183         unix_state_unlock(s);
 184         return peer;
 185 }
 186
 187 static inline void unix_release_addr(struct unix_address *addr)
 188 {
 189         if (atomic_dec_and_test(&addr->refcnt))
 190                 kfree(addr);
 191 }
 192
 193 /*
 194  *      Check unix socket name:
 195  *              - should be not zero length.
 196  *              - if started by not zero, should be NULL terminated (FS object)
 197  *              - if started by zero, it is abstract name.
 198  */
 199
 200 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned *hashp)
 201 {
 202         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 203                 return -EINVAL;
 204         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 205                 return -EINVAL;
 206         if (sunaddr->sun_path[0]) {
 207                 /*
 208                  * This may look like an off by one error but it is a bit more
 209                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 210                  * sun_path[108] doesnt as such exist.  However in kernel space
 211                  * we are guaranteed that it is a valid memory location in our
 212                  * kernel address buffer.
 213                  */
 214                 ((char *)sunaddr)[len] = 0;
 215                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 216                 return len;
 217         }
 218
 219         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 220         return len;
 221 }
 222
 223 static void __unix_remove_socket(struct sock *sk)
 224 {
 225         sk_del_node_init(sk);
 226 }
 227
 228 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 229 {
 230         WARN_ON(!sk_unhashed(sk));
 231         sk_add_node(sk, list);
 232 }
 233
 234 static inline void unix_remove_socket(struct sock *sk)
 235 {
 236         spin_lock(&unix_table_lock);
 237         __unix_remove_socket(sk);
 238         spin_unlock(&unix_table_lock);
 239 }
 240
 241 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 242 {
 243         spin_lock(&unix_table_lock);
 244         __unix_insert_socket(list, sk);
 245         spin_unlock(&unix_table_lock);
 246 }
 247
 248 static struct sock *__unix_find_socket_byname(struct net *net,
 249                                               struct sockaddr_un *sunname,
 250                                               int len, int type, unsigned hash)
 251 {
 252         struct sock *s;
 253         struct hlist_node *node;
 254
 255         sk_for_each(s, node, &unix_socket_table[hash ^ type]) {
 256                 struct unix_sock *u = unix_sk(s);
 257
 258                 if (!net_eq(sock_net(s), net))
 259                         continue;
 260
 261                 if (u->addr->len == len &&
 262                     !memcmp(u->addr->name, sunname, len))
 263                         goto found;
 264         }
 265         s = NULL;
 266 found:
 267         return s;
 268 }
 269
 270 static inline struct sock *unix_find_socket_byname(struct net *net,
 271                                                    struct sockaddr_un *sunname,
 272                                                    int len, int type,
 273                                                    unsigned hash)
 274 {
 275         struct sock *s;
 276
 277         spin_lock(&unix_table_lock);
 278         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 279         if (s)
 280                 sock_hold(s);
 281         spin_unlock(&unix_table_lock);
 282         return s;
 283 }
 284
 285 static struct sock *unix_find_socket_byinode(struct inode *i)
 286 {
 287         struct sock *s;
 288         struct hlist_node *node;
 289
 290         spin_lock(&unix_table_lock);
 291         sk_for_each(s, node,
 292                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 293                 struct dentry *dentry = unix_sk(s)->dentry;
 294
 295                 if (dentry && dentry->d_inode == i) {
 296                         sock_hold(s);
 297                         goto found;
 298                 }
 299         }
 300         s = NULL;
 301 found:
 302         spin_unlock(&unix_table_lock);
 303         return s;
 304 }
 305
 306 static inline int unix_writable(struct sock *sk)
 307 {
 308         return (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 309 }
 310
 311 static void unix_write_space(struct sock *sk)
 312 {
 313         struct socket_wq *wq;
 314
 315         rcu_read_lock();
 316         if (unix_writable(sk)) {
 317                 wq = rcu_dereference(sk->sk_wq);
 318                 if (wq_has_sleeper(wq))
 319                         wake_up_interruptible_sync(&wq->wait);
 320                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 321         }
 322         rcu_read_unlock();
 323 }
 324
 325 /* When dgram socket disconnects (or changes its peer), we clear its receive
 326  * queue of packets arrived from previous peer. First, it allows to do
 327  * flow control based only on wmem_alloc; second, sk connected to peer
 328  * may receive messages only from that peer. */
 329 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 330 {
 331         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 332                 skb_queue_purge(&sk->sk_receive_queue);
 333                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 334
 335                 /* If one link of bidirectional dgram pipe is disconnected,
 336                  * we signal error. Messages are lost. Do not make this,
 337                  * when peer was not connected to us.
 338                  */
 339                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 340                         other->sk_err = ECONNRESET;
 341                         other->sk_error_report(other);
 342                 }
 343         }
 344 }
 345
 346 static void unix_sock_destructor(struct sock *sk)
 347 {
 348         struct unix_sock *u = unix_sk(sk);
 349
 350         skb_queue_purge(&sk->sk_receive_queue);
 351
 352         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 353         WARN_ON(!sk_unhashed(sk));
 354         WARN_ON(sk->sk_socket);
 355         if (!sock_flag(sk, SOCK_DEAD)) {
 356                 printk(KERN_INFO "Attempt to release alive unix socket: %p\n", sk);
 357                 return;
 358         }
 359
 360         if (u->addr)
 361                 unix_release_addr(u->addr);
 362
 363         atomic_dec(&unix_nr_socks);
 364         local_bh_disable();
 365         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 366         local_bh_enable();
 367 #ifdef UNIX_REFCNT_DEBUG
 368         printk(KERN_DEBUG "UNIX %p is destroyed, %d are still alive.\n", sk,
 369                 atomic_read(&unix_nr_socks));
 370 #endif
 371 }
 372
 373 static int unix_release_sock(struct sock *sk, int embrion)
 374 {
 375         struct unix_sock *u = unix_sk(sk);
 376         struct dentry *dentry;
 377         struct vfsmount *mnt;
 378         struct sock *skpair;
 379         struct sk_buff *skb;
 380         int state;
 381
 382         unix_remove_socket(sk);
 383
 384         /* Clear state */
 385         unix_state_lock(sk);
 386         sock_orphan(sk);
 387         sk->sk_shutdown = SHUTDOWN_MASK;
 388         dentry       = u->dentry;
 389         u->dentry    = NULL;
 390         mnt          = u->mnt;
 391         u->mnt       = NULL;
 392         state = sk->sk_state;
 393         sk->sk_state = TCP_CLOSE;
 394         unix_state_unlock(sk);
 395
 396         wake_up_interruptible_all(&u->peer_wait);
 397
 398         skpair = unix_peer(sk);
 399
 400         if (skpair != NULL) {
 401                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 402                         unix_state_lock(skpair);
 403                         /* No more writes */
 404                         skpair->sk_shutdown = SHUTDOWN_MASK;
 405                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 406                                 skpair->sk_err = ECONNRESET;
 407                         unix_state_unlock(skpair);
 408                         skpair->sk_state_change(skpair);
 409                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 410                 }
 411                 sock_put(skpair); /* It may now die */
 412                 unix_peer(sk) = NULL;
 413         }
 414
 415         /* Try to flush out this socket. Throw out buffers at least */
 416
 417         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 418                 if (state == TCP_LISTEN)
 419                         unix_release_sock(skb->sk, 1);
 420                 /* passed fds are erased in the kfree_skb hook        */
 421                 kfree_skb(skb);
 422         }
 423
 424         if (dentry) {
 425                 dput(dentry);
 426                 mntput(mnt);
 427         }
 428
 429         sock_put(sk);
 430
 431         /* ---- Socket is dead now and most probably destroyed ---- */
 432
 433
 434         if (unix_tot_inflight)
 435                 unix_gc();              /* Garbage collect fds */
 436
 437         return 0;
 438 }
 439
 440 static void init_peercred(struct sock *sk)
 441 {
 442         put_pid(sk->sk_peer_pid);
 443         if (sk->sk_peer_cred)
 444                 put_cred(sk->sk_peer_cred);
 445         sk->sk_peer_pid  = get_pid(task_tgid(current));
 446         sk->sk_peer_cred = get_current_cred();
 447 }
 448
 449 static void copy_peercred(struct sock *sk, struct sock *peersk)
 450 {
 451         put_pid(sk->sk_peer_pid);
 452         if (sk->sk_peer_cred)
 453                 put_cred(sk->sk_peer_cred);
 454         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 455         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 456 }
 457
 458 static int unix_listen(struct socket *sock, int backlog)
 459 {
 460         int err;
 461         struct sock *sk = sock->sk;
 462         struct unix_sock *u = unix_sk(sk);
 463         struct pid *old_pid = NULL;
 464         const struct cred *old_cred = NULL;
 465
 466         err = -EOPNOTSUPP;
 467         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 468                 goto out;       /* Only stream/seqpacket sockets accept */
 469         err = -EINVAL;
 470         if (!u->addr)
 471                 goto out;       /* No listens on an unbound socket */
 472         unix_state_lock(sk);
 473         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 474                 goto out_unlock;
 475         if (backlog > sk->sk_max_ack_backlog)
 476                 wake_up_interruptible_all(&u->peer_wait);
 477         sk->sk_max_ack_backlog  = backlog;
 478         sk->sk_state            = TCP_LISTEN;
 479         /* set credentials so connect can copy them */
 480         init_peercred(sk);
 481         err = 0;
 482
 483 out_unlock:
 484         unix_state_unlock(sk);
 485         put_pid(old_pid);
 486         if (old_cred)
 487                 put_cred(old_cred);
 488 out:
 489         return err;
 490 }
 491
 492 static int unix_release(struct socket *);
 493 static int unix_bind(struct socket *, struct sockaddr *, int);
 494 static int unix_stream_connect(struct socket *, struct sockaddr *,
 495                                int addr_len, int flags);
 496 static int unix_socketpair(struct socket *, struct socket *);
 497 static int unix_accept(struct socket *, struct socket *, int);
 498 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 499 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 500 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 501                                     poll_table *);
 502 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 503 static int unix_shutdown(struct socket *, int);
 504 static int unix_stream_sendmsg(struct kiocb *, struct socket *,
 505                                struct msghdr *, size_t);
 506 static int unix_stream_recvmsg(struct kiocb *, struct socket *,
 507                                struct msghdr *, size_t, int);
 508 static int unix_dgram_sendmsg(struct kiocb *, struct socket *,
 509                               struct msghdr *, size_t);
 510 static int unix_dgram_recvmsg(struct kiocb *, struct socket *,
 511                               struct msghdr *, size_t, int);
 512 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 513                               int, int);
 514 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 515                                   struct msghdr *, size_t);
 516
 517 static const struct proto_ops unix_stream_ops = {
 518         .family =       PF_UNIX,
 519         .owner =        THIS_MODULE,
 520         .release =      unix_release,
 521         .bind =         unix_bind,
 522         .connect =      unix_stream_connect,
 523         .socketpair =   unix_socketpair,
 524         .accept =       unix_accept,
 525         .getname =      unix_getname,
 526         .poll =         unix_poll,
 527         .ioctl =        unix_ioctl,
 528         .listen =       unix_listen,
 529         .shutdown =     unix_shutdown,
 530         .setsockopt =   sock_no_setsockopt,
 531         .getsockopt =   sock_no_getsockopt,
 532         .sendmsg =      unix_stream_sendmsg,
 533         .recvmsg =      unix_stream_recvmsg,
 534         .mmap =         sock_no_mmap,
 535         .sendpage =     sock_no_sendpage,
 536 };
 537
 538 static const struct proto_ops unix_dgram_ops = {
 539         .family =       PF_UNIX,
 540         .owner =        THIS_MODULE,
 541         .release =      unix_release,
 542         .bind =         unix_bind,
 543         .connect =      unix_dgram_connect,
 544         .socketpair =   unix_socketpair,
 545         .accept =       sock_no_accept,
 546         .getname =      unix_getname,
 547         .poll =         unix_dgram_poll,
 548         .ioctl =        unix_ioctl,
 549         .listen =       sock_no_listen,
 550         .shutdown =     unix_shutdown,
 551         .setsockopt =   sock_no_setsockopt,
 552         .getsockopt =   sock_no_getsockopt,
 553         .sendmsg =      unix_dgram_sendmsg,
 554         .recvmsg =      unix_dgram_recvmsg,
 555         .mmap =         sock_no_mmap,
 556         .sendpage =     sock_no_sendpage,
 557 };
 558
 559 static const struct proto_ops unix_seqpacket_ops = {
 560         .family =       PF_UNIX,
 561         .owner =        THIS_MODULE,
 562         .release =      unix_release,
 563         .bind =         unix_bind,
 564         .connect =      unix_stream_connect,
 565         .socketpair =   unix_socketpair,
 566         .accept =       unix_accept,
 567         .getname =      unix_getname,
 568         .poll =         unix_dgram_poll,
 569         .ioctl =        unix_ioctl,
 570         .listen =       unix_listen,
 571         .shutdown =     unix_shutdown,
 572         .setsockopt =   sock_no_setsockopt,
 573         .getsockopt =   sock_no_getsockopt,
 574         .sendmsg =      unix_seqpacket_sendmsg,
 575         .recvmsg =      unix_dgram_recvmsg,
 576         .mmap =         sock_no_mmap,
 577         .sendpage =     sock_no_sendpage,
 578 };
 579
 580 static struct proto unix_proto = {
 581         .name                   = "UNIX",
 582         .owner                  = THIS_MODULE,
 583         .obj_size               = sizeof(struct unix_sock),
 584 };
 585
 586 /*
 587  * AF_UNIX sockets do not interact with hardware, hence they
 588  * dont trigger interrupts - so it's safe for them to have
 589  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 590  * this special lock-class by reinitializing the spinlock key:
 591  */
 592 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 593
 594 static struct sock *unix_create1(struct net *net, struct socket *sock)
 595 {
 596         struct sock *sk = NULL;
 597         struct unix_sock *u;
 598
 599         atomic_inc(&unix_nr_socks);
 600         if (atomic_read(&unix_nr_socks) > 2 * get_max_files())
 601                 goto out;
 602
 603         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto);
 604         if (!sk)
 605                 goto out;
 606
 607         sock_init_data(sock, sk);
 608         lockdep_set_class(&sk->sk_receive_queue.lock,
 609                                 &af_unix_sk_receive_queue_lock_key);
 610
 611         sk->sk_write_space      = unix_write_space;
 612         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 613         sk->sk_destruct         = unix_sock_destructor;
 614         u         = unix_sk(sk);
 615         u->dentry = NULL;
 616         u->mnt    = NULL;
 617         spin_lock_init(&u->lock);
 618         atomic_long_set(&u->inflight, 0);
 619         INIT_LIST_HEAD(&u->link);
 620         mutex_init(&u->readlock); /* single task reading lock */
 621         init_waitqueue_head(&u->peer_wait);
 622         unix_insert_socket(unix_sockets_unbound, sk);
 623 out:
 624         if (sk == NULL)
 625                 atomic_dec(&unix_nr_socks);
 626         else {
 627                 local_bh_disable();
 628                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 629                 local_bh_enable();
 630         }
 631         return sk;
 632 }
 633
 634 static int unix_create(struct net *net, struct socket *sock, int protocol,
 635                        int kern)
 636 {
 637         if (protocol && protocol != PF_UNIX)
 638                 return -EPROTONOSUPPORT;
 639
 640         sock->state = SS_UNCONNECTED;
 641
 642         switch (sock->type) {
 643         case SOCK_STREAM:
 644                 sock->ops = &unix_stream_ops;
 645                 break;
 646                 /*
 647                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 648                  *      nothing uses it.
 649                  */
 650         case SOCK_RAW:
 651                 sock->type = SOCK_DGRAM;
 652         case SOCK_DGRAM:
 653                 sock->ops = &unix_dgram_ops;
 654                 break;
 655         case SOCK_SEQPACKET:
 656                 sock->ops = &unix_seqpacket_ops;
 657                 break;
 658         default:
 659                 return -ESOCKTNOSUPPORT;
 660         }
 661
 662         return unix_create1(net, sock) ? 0 : -ENOMEM;
 663 }
 664
 665 static int unix_release(struct socket *sock)
 666 {
 667         struct sock *sk = sock->sk;
 668
 669         if (!sk)
 670                 return 0;
 671
 672         sock->sk = NULL;
 673
 674         return unix_release_sock(sk, 0);
 675 }
 676
 677 static int unix_autobind(struct socket *sock)
 678 {
 679         struct sock *sk = sock->sk;
 680         struct net *net = sock_net(sk);
 681         struct unix_sock *u = unix_sk(sk);
 682         static u32 ordernum = 1;
 683         struct unix_address *addr;
 684         int err;
 685         unsigned int retries = 0;
 686
 687         mutex_lock(&u->readlock);
 688
 689         err = 0;
 690         if (u->addr)
 691                 goto out;
 692
 693         err = -ENOMEM;
 694         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 695         if (!addr)
 696                 goto out;
 697
 698         addr->name->sun_family = AF_UNIX;
 699         atomic_set(&addr->refcnt, 1);
 700
 701 retry:
 702         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 703         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 704
 705         spin_lock(&unix_table_lock);
 706         ordernum = (ordernum+1)&0xFFFFF;
 707
 708         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 709                                       addr->hash)) {
 710                 spin_unlock(&unix_table_lock);
 711                 /*
 712                  * __unix_find_socket_byname() may take long time if many names
 713                  * are already in use.
 714                  */
 715                 cond_resched();
 716                 /* Give up if all names seems to be in use. */
 717                 if (retries++ == 0xFFFFF) {
 718                         err = -ENOSPC;
 719                         kfree(addr);
 720                         goto out;
 721                 }
 722                 goto retry;
 723         }
 724         addr->hash ^= sk->sk_type;
 725
 726         __unix_remove_socket(sk);
 727         u->addr = addr;
 728         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 729         spin_unlock(&unix_table_lock);
 730         err = 0;
 731
 732 out:    mutex_unlock(&u->readlock);
 733         return err;
 734 }
 735
 736 static struct sock *unix_find_other(struct net *net,
 737                                     struct sockaddr_un *sunname, int len,
 738                                     int type, unsigned hash, int *error)
 739 {
 740         struct sock *u;
 741         struct path path;
 742         int err = 0;
 743
 744         if (sunname->sun_path[0]) {
 745                 struct inode *inode;
 746                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 747                 if (err)
 748                         goto fail;
 749                 inode = path.dentry->d_inode;
 750                 err = inode_permission(inode, MAY_WRITE);
 751                 if (err)
 752                         goto put_fail;
 753
 754                 err = -ECONNREFUSED;
 755                 if (!S_ISSOCK(inode->i_mode))
 756                         goto put_fail;
 757                 u = unix_find_socket_byinode(inode);
 758                 if (!u)
 759                         goto put_fail;
 760
 761                 if (u->sk_type == type)
 762                         touch_atime(path.mnt, path.dentry);
 763
 764                 path_put(&path);
 765
 766                 err = -EPROTOTYPE;
 767                 if (u->sk_type != type) {
 768                         sock_put(u);
 769                         goto fail;
 770                 }
 771         } else {
 772                 err = -ECONNREFUSED;
 773                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 774                 if (u) {
 775                         struct dentry *dentry;
 776                         dentry = unix_sk(u)->dentry;
 777                         if (dentry)
 778                                 touch_atime(unix_sk(u)->mnt, dentry);
 779                 } else
 780                         goto fail;
 781         }
 782         return u;
 783
 784 put_fail:
 785         path_put(&path);
 786 fail:
 787         *error = err;
 788         return NULL;
 789 }
 790
 791
 792 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 793 {
 794         struct sock *sk = sock->sk;
 795         struct net *net = sock_net(sk);
 796         struct unix_sock *u = unix_sk(sk);
 797         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 798         struct dentry *dentry = NULL;
 799         struct nameidata nd;
 800         int err;
 801         unsigned hash;
 802         struct unix_address *addr;
 803         struct hlist_head *list;
 804
 805         err = -EINVAL;
 806         if (sunaddr->sun_family != AF_UNIX)
 807                 goto out;
 808
 809         if (addr_len == sizeof(short)) {
 810                 err = unix_autobind(sock);
 811                 goto out;
 812         }
 813
 814         err = unix_mkname(sunaddr, addr_len, &hash);
 815         if (err < 0)
 816                 goto out;
 817         addr_len = err;
 818
 819         mutex_lock(&u->readlock);
 820
 821         err = -EINVAL;
 822         if (u->addr)
 823                 goto out_up;
 824
 825         err = -ENOMEM;
 826         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
 827         if (!addr)
 828                 goto out_up;
 829
 830         memcpy(addr->name, sunaddr, addr_len);
 831         addr->len = addr_len;
 832         addr->hash = hash ^ sk->sk_type;
 833         atomic_set(&addr->refcnt, 1);
 834
 835         if (sunaddr->sun_path[0]) {
 836                 unsigned int mode;
 837                 err = 0;
 838                 /*
 839                  * Get the parent directory, calculate the hash for last
 840                  * component.
 841                  */
 842                 err = path_lookup(sunaddr->sun_path, LOOKUP_PARENT, &nd);
 843                 if (err)
 844                         goto out_mknod_parent;
 845
 846                 dentry = lookup_create(&nd, 0);
 847                 err = PTR_ERR(dentry);
 848                 if (IS_ERR(dentry))
 849                         goto out_mknod_unlock;
 850
 851                 /*
 852                  * All right, let's create it.
 853                  */
 854                 mode = S_IFSOCK |
 855                        (SOCK_INODE(sock)->i_mode & ~current_umask());
 856                 err = mnt_want_write(nd.path.mnt);
 857                 if (err)
 858                         goto out_mknod_dput;
 859                 err = security_path_mknod(&nd.path, dentry, mode, 0);
 860                 if (err)
 861                         goto out_mknod_drop_write;
 862                 err = vfs_mknod(nd.path.dentry->d_inode, dentry, mode, 0);
 863 out_mknod_drop_write:
 864                 mnt_drop_write(nd.path.mnt);
 865                 if (err)
 866                         goto out_mknod_dput;
 867                 mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 868                 dput(nd.path.dentry);
 869                 nd.path.dentry = dentry;
 870
 871                 addr->hash = UNIX_HASH_SIZE;
 872         }
 873
 874         spin_lock(&unix_table_lock);
 875
 876         if (!sunaddr->sun_path[0]) {
 877                 err = -EADDRINUSE;
 878                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
 879                                               sk->sk_type, hash)) {
 880                         unix_release_addr(addr);
 881                         goto out_unlock;
 882                 }
 883
 884                 list = &unix_socket_table[addr->hash];
 885         } else {
 886                 list = &unix_socket_table[dentry->d_inode->i_ino & (UNIX_HASH_SIZE-1)];
 887                 u->dentry = nd.path.dentry;
 888                 u->mnt    = nd.path.mnt;
 889         }
 890
 891         err = 0;
 892         __unix_remove_socket(sk);
 893         u->addr = addr;
 894         __unix_insert_socket(list, sk);
 895
 896 out_unlock:
 897         spin_unlock(&unix_table_lock);
 898 out_up:
 899         mutex_unlock(&u->readlock);
 900 out:
 901         return err;
 902
 903 out_mknod_dput:
 904         dput(dentry);
 905 out_mknod_unlock:
 906         mutex_unlock(&nd.path.dentry->d_inode->i_mutex);
 907         path_put(&nd.path);
 908 out_mknod_parent:
 909         if (err == -EEXIST)
 910                 err = -EADDRINUSE;
 911         unix_release_addr(addr);
 912         goto out_up;
 913 }
 914
 915 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
 916 {
 917         if (unlikely(sk1 == sk2) || !sk2) {
 918                 unix_state_lock(sk1);
 919                 return;
 920         }
 921         if (sk1 < sk2) {
 922                 unix_state_lock(sk1);
 923                 unix_state_lock_nested(sk2);
 924         } else {
 925                 unix_state_lock(sk2);
 926                 unix_state_lock_nested(sk1);
 927         }
 928 }
 929
 930 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
 931 {
 932         if (unlikely(sk1 == sk2) || !sk2) {
 933                 unix_state_unlock(sk1);
 934                 return;
 935         }
 936         unix_state_unlock(sk1);
 937         unix_state_unlock(sk2);
 938 }
 939
 940 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
 941                               int alen, int flags)
 942 {
 943         struct sock *sk = sock->sk;
 944         struct net *net = sock_net(sk);
 945         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
 946         struct sock *other;
 947         unsigned hash;
 948         int err;
 949
 950         if (addr->sa_family != AF_UNSPEC) {
 951                 err = unix_mkname(sunaddr, alen, &hash);
 952                 if (err < 0)
 953                         goto out;
 954                 alen = err;
 955
 956                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
 957                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
 958                         goto out;
 959
 960 restart:
 961                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
 962                 if (!other)
 963                         goto out;
 964
 965                 unix_state_double_lock(sk, other);
 966
 967                 /* Apparently VFS overslept socket death. Retry. */
 968                 if (sock_flag(other, SOCK_DEAD)) {
 969                         unix_state_double_unlock(sk, other);
 970                         sock_put(other);
 971                         goto restart;
 972                 }
 973
 974                 err = -EPERM;
 975                 if (!unix_may_send(sk, other))
 976                         goto out_unlock;
 977
 978                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
 979                 if (err)
 980                         goto out_unlock;
 981
 982         } else {
 983                 /*
 984                  *      1003.1g breaking connected state with AF_UNSPEC
 985                  */
 986                 other = NULL;
 987                 unix_state_double_lock(sk, other);
 988         }
 989
 990         /*
 991          * If it was connected, reconnect.
 992          */
 993         if (unix_peer(sk)) {
 994                 struct sock *old_peer = unix_peer(sk);
 995                 unix_peer(sk) = other;
 996                 unix_state_double_unlock(sk, other);
 997
 998                 if (other != old_peer)
 999                         unix_dgram_disconnected(sk, old_peer);
1000                 sock_put(old_peer);
1001         } else {
1002                 unix_peer(sk) = other;
1003                 unix_state_double_unlock(sk, other);
1004         }
1005         return 0;
1006
1007 out_unlock:
1008         unix_state_double_unlock(sk, other);
1009         sock_put(other);
1010 out:
1011         return err;
1012 }
1013
1014 static long unix_wait_for_peer(struct sock *other, long timeo)
1015 {
1016         struct unix_sock *u = unix_sk(other);
1017         int sched;
1018         DEFINE_WAIT(wait);
1019
1020         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1021
1022         sched = !sock_flag(other, SOCK_DEAD) &&
1023                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1024                 unix_recvq_full(other);
1025
1026         unix_state_unlock(other);
1027
1028         if (sched)
1029                 timeo = schedule_timeout(timeo);
1030
1031         finish_wait(&u->peer_wait, &wait);
1032         return timeo;
1033 }
1034
1035 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1036                                int addr_len, int flags)
1037 {
1038         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1039         struct sock *sk = sock->sk;
1040         struct net *net = sock_net(sk);
1041         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1042         struct sock *newsk = NULL;
1043         struct sock *other = NULL;
1044         struct sk_buff *skb = NULL;
1045         unsigned hash;
1046         int st;
1047         int err;
1048         long timeo;
1049
1050         err = unix_mkname(sunaddr, addr_len, &hash);
1051         if (err < 0)
1052                 goto out;
1053         addr_len = err;
1054
1055         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1056             (err = unix_autobind(sock)) != 0)
1057                 goto out;
1058
1059         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1060
1061         /* First of all allocate resources.
1062            If we will make it after state is locked,
1063            we will have to recheck all again in any case.
1064          */
1065
1066         err = -ENOMEM;
1067
1068         /* create new sock for complete connection */
1069         newsk = unix_create1(sock_net(sk), NULL);
1070         if (newsk == NULL)
1071                 goto out;
1072
1073         /* Allocate skb for sending to listening sock */
1074         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1075         if (skb == NULL)
1076                 goto out;
1077
1078 restart:
1079         /*  Find listening sock. */
1080         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1081         if (!other)
1082                 goto out;
1083
1084         /* Latch state of peer */
1085         unix_state_lock(other);
1086
1087         /* Apparently VFS overslept socket death. Retry. */
1088         if (sock_flag(other, SOCK_DEAD)) {
1089                 unix_state_unlock(other);
1090                 sock_put(other);
1091                 goto restart;
1092         }
1093
1094         err = -ECONNREFUSED;
1095         if (other->sk_state != TCP_LISTEN)
1096                 goto out_unlock;
1097         if (other->sk_shutdown & RCV_SHUTDOWN)
1098                 goto out_unlock;
1099
1100         if (unix_recvq_full(other)) {
1101                 err = -EAGAIN;
1102                 if (!timeo)
1103                         goto out_unlock;
1104
1105                 timeo = unix_wait_for_peer(other, timeo);
1106
1107                 err = sock_intr_errno(timeo);
1108                 if (signal_pending(current))
1109                         goto out;
1110                 sock_put(other);
1111                 goto restart;
1112         }
1113
1114         /* Latch our state.
1115
1116            It is tricky place. We need to grab write lock and cannot
1117            drop lock on peer. It is dangerous because deadlock is
1118            possible. Connect to self case and simultaneous
1119            attempt to connect are eliminated by checking socket
1120            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1121            check this before attempt to grab lock.
1122
1123            Well, and we have to recheck the state after socket locked.
1124          */
1125         st = sk->sk_state;
1126
1127         switch (st) {
1128         case TCP_CLOSE:
1129                 /* This is ok... continue with connect */
1130                 break;
1131         case TCP_ESTABLISHED:
1132                 /* Socket is already connected */
1133                 err = -EISCONN;
1134                 goto out_unlock;
1135         default:
1136                 err = -EINVAL;
1137                 goto out_unlock;
1138         }
1139
1140         unix_state_lock_nested(sk);
1141
1142         if (sk->sk_state != st) {
1143                 unix_state_unlock(sk);
1144                 unix_state_unlock(other);
1145                 sock_put(other);
1146                 goto restart;
1147         }
1148
1149         err = security_unix_stream_connect(sock, other->sk_socket, newsk);
1150         if (err) {
1151                 unix_state_unlock(sk);
1152                 goto out_unlock;
1153         }
1154
1155         /* The way is open! Fastly set all the necessary fields... */
1156
1157         sock_hold(sk);
1158         unix_peer(newsk)        = sk;
1159         newsk->sk_state         = TCP_ESTABLISHED;
1160         newsk->sk_type          = sk->sk_type;
1161         init_peercred(newsk);
1162         newu = unix_sk(newsk);
1163         newsk->sk_wq            = &newu->peer_wq;
1164         otheru = unix_sk(other);
1165
1166         /* copy address information from listening to new sock*/
1167         if (otheru->addr) {
1168                 atomic_inc(&otheru->addr->refcnt);
1169                 newu->addr = otheru->addr;
1170         }
1171         if (otheru->dentry) {
1172                 newu->dentry    = dget(otheru->dentry);
1173                 newu->mnt       = mntget(otheru->mnt);
1174         }
1175
1176         /* Set credentials */
1177         copy_peercred(sk, other);
1178
1179         sock->state     = SS_CONNECTED;
1180         sk->sk_state    = TCP_ESTABLISHED;
1181         sock_hold(newsk);
1182
1183         smp_mb__after_atomic_inc();     /* sock_hold() does an atomic_inc() */
1184         unix_peer(sk)   = newsk;
1185
1186         unix_state_unlock(sk);
1187
1188         /* take ten and and send info to listening sock */
1189         spin_lock(&other->sk_receive_queue.lock);
1190         __skb_queue_tail(&other->sk_receive_queue, skb);
1191         spin_unlock(&other->sk_receive_queue.lock);
1192         unix_state_unlock(other);
1193         other->sk_data_ready(other, 0);
1194         sock_put(other);
1195         return 0;
1196
1197 out_unlock:
1198         if (other)
1199                 unix_state_unlock(other);
1200
1201 out:
1202         kfree_skb(skb);
1203         if (newsk)
1204                 unix_release_sock(newsk, 0);
1205         if (other)
1206                 sock_put(other);
1207         return err;
1208 }
1209
1210 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1211 {
1212         struct sock *ska = socka->sk, *skb = sockb->sk;
1213
1214         /* Join our sockets back to back */
1215         sock_hold(ska);
1216         sock_hold(skb);
1217         unix_peer(ska) = skb;
1218         unix_peer(skb) = ska;
1219         init_peercred(ska);
1220         init_peercred(skb);
1221
1222         if (ska->sk_type != SOCK_DGRAM) {
1223                 ska->sk_state = TCP_ESTABLISHED;
1224                 skb->sk_state = TCP_ESTABLISHED;
1225                 socka->state  = SS_CONNECTED;
1226                 sockb->state  = SS_CONNECTED;
1227         }
1228         return 0;
1229 }
1230
1231 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1232 {
1233         struct sock *sk = sock->sk;
1234         struct sock *tsk;
1235         struct sk_buff *skb;
1236         int err;
1237
1238         err = -EOPNOTSUPP;
1239         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1240                 goto out;
1241
1242         err = -EINVAL;
1243         if (sk->sk_state != TCP_LISTEN)
1244                 goto out;
1245
1246         /* If socket state is TCP_LISTEN it cannot change (for now...),
1247          * so that no locks are necessary.
1248          */
1249
1250         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1251         if (!skb) {
1252                 /* This means receive shutdown. */
1253                 if (err == 0)
1254                         err = -EINVAL;
1255                 goto out;
1256         }
1257
1258         tsk = skb->sk;
1259         skb_free_datagram(sk, skb);
1260         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1261
1262         /* attach accepted sock to socket */
1263         unix_state_lock(tsk);
1264         newsock->state = SS_CONNECTED;
1265         sock_graft(tsk, newsock);
1266         unix_state_unlock(tsk);
1267         return 0;
1268
1269 out:
1270         return err;
1271 }
1272
1273
1274 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1275 {
1276         struct sock *sk = sock->sk;
1277         struct unix_sock *u;
1278         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1279         int err = 0;
1280
1281         if (peer) {
1282                 sk = unix_peer_get(sk);
1283
1284                 err = -ENOTCONN;
1285                 if (!sk)
1286                         goto out;
1287                 err = 0;
1288         } else {
1289                 sock_hold(sk);
1290         }
1291
1292         u = unix_sk(sk);
1293         unix_state_lock(sk);
1294         if (!u->addr) {
1295                 sunaddr->sun_family = AF_UNIX;
1296                 sunaddr->sun_path[0] = 0;
1297                 *uaddr_len = sizeof(short);
1298         } else {
1299                 struct unix_address *addr = u->addr;
1300
1301                 *uaddr_len = addr->len;
1302                 memcpy(sunaddr, addr->name, *uaddr_len);
1303         }
1304         unix_state_unlock(sk);
1305         sock_put(sk);
1306 out:
1307         return err;
1308 }
1309
1310 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1311 {
1312         int i;
1313
1314         scm->fp = UNIXCB(skb).fp;
1315         UNIXCB(skb).fp = NULL;
1316
1317         for (i = scm->fp->count-1; i >= 0; i--)
1318                 unix_notinflight(scm->fp->fp[i]);
1319 }
1320
1321 static void unix_destruct_scm(struct sk_buff *skb)
1322 {
1323         struct scm_cookie scm;
1324         memset(&scm, 0, sizeof(scm));
1325         scm.pid  = UNIXCB(skb).pid;
1326         scm.cred = UNIXCB(skb).cred;
1327         if (UNIXCB(skb).fp)
1328                 unix_detach_fds(&scm, skb);
1329
1330         /* Alas, it calls VFS */
1331         /* So fscking what? fput() had been SMP-safe since the last Summer */
1332         scm_destroy(&scm);
1333         sock_wfree(skb);
1334 }
1335
1336 #define MAX_RECURSION_LEVEL 4
1337
1338 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1339 {
1340         int i;
1341         unsigned char max_level = 0;
1342         int unix_sock_count = 0;
1343
1344         for (i = scm->fp->count - 1; i >= 0; i--) {
1345                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1346
1347                 if (sk) {
1348                         unix_sock_count++;
1349                         max_level = max(max_level,
1350                                         unix_sk(sk)->recursion_level);
1351                 }
1352         }
1353         if (unlikely(max_level > MAX_RECURSION_LEVEL))
1354                 return -ETOOMANYREFS;
1355
1356         /*
1357          * Need to duplicate file references for the sake of garbage
1358          * collection.  Otherwise a socket in the fps might become a
1359          * candidate for GC while the skb is not yet queued.
1360          */
1361         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1362         if (!UNIXCB(skb).fp)
1363                 return -ENOMEM;
1364
1365         if (unix_sock_count) {
1366                 for (i = scm->fp->count - 1; i >= 0; i--)
1367                         unix_inflight(scm->fp->fp[i]);
1368         }
1369         return max_level;
1370 }
1371
1372 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1373 {
1374         int err = 0;
1375         UNIXCB(skb).pid  = get_pid(scm->pid);
1376         UNIXCB(skb).cred = get_cred(scm->cred);
1377         UNIXCB(skb).fp = NULL;
1378         if (scm->fp && send_fds)
1379                 err = unix_attach_fds(scm, skb);
1380
1381         skb->destructor = unix_destruct_scm;
1382         return err;
1383 }
1384
1385 /*
1386  *      Send AF_UNIX data.
1387  */
1388
1389 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
1390                               struct msghdr *msg, size_t len)
1391 {
1392         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1393         struct sock *sk = sock->sk;
1394         struct net *net = sock_net(sk);
1395         struct unix_sock *u = unix_sk(sk);
1396         struct sockaddr_un *sunaddr = msg->msg_name;
1397         struct sock *other = NULL;
1398         int namelen = 0; /* fake GCC */
1399         int err;
1400         unsigned hash;
1401         struct sk_buff *skb;
1402         long timeo;
1403         struct scm_cookie tmp_scm;
1404         int max_level;
1405
1406         if (NULL == siocb->scm)
1407                 siocb->scm = &tmp_scm;
1408         wait_for_unix_gc();
1409         err = scm_send(sock, msg, siocb->scm);
1410         if (err < 0)
1411                 return err;
1412
1413         err = -EOPNOTSUPP;
1414         if (msg->msg_flags&MSG_OOB)
1415                 goto out;
1416
1417         if (msg->msg_namelen) {
1418                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1419                 if (err < 0)
1420                         goto out;
1421                 namelen = err;
1422         } else {
1423                 sunaddr = NULL;
1424                 err = -ENOTCONN;
1425                 other = unix_peer_get(sk);
1426                 if (!other)
1427                         goto out;
1428         }
1429
1430         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1431             && (err = unix_autobind(sock)) != 0)
1432                 goto out;
1433
1434         err = -EMSGSIZE;
1435         if (len > sk->sk_sndbuf - 32)
1436                 goto out;
1437
1438         skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
1439         if (skb == NULL)
1440                 goto out;
1441
1442         err = unix_scm_to_skb(siocb->scm, skb, true);
1443         if (err < 0)
1444                 goto out_free;
1445         max_level = err + 1;
1446         unix_get_secdata(siocb->scm, skb);
1447
1448         skb_reset_transport_header(skb);
1449         err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
1450         if (err)
1451                 goto out_free;
1452
1453         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1454
1455 restart:
1456         if (!other) {
1457                 err = -ECONNRESET;
1458                 if (sunaddr == NULL)
1459                         goto out_free;
1460
1461                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1462                                         hash, &err);
1463                 if (other == NULL)
1464                         goto out_free;
1465         }
1466
1467         unix_state_lock(other);
1468         err = -EPERM;
1469         if (!unix_may_send(sk, other))
1470                 goto out_unlock;
1471
1472         if (sock_flag(other, SOCK_DEAD)) {
1473                 /*
1474                  *      Check with 1003.1g - what should
1475                  *      datagram error
1476                  */
1477                 unix_state_unlock(other);
1478                 sock_put(other);
1479
1480                 err = 0;
1481                 unix_state_lock(sk);
1482                 if (unix_peer(sk) == other) {
1483                         unix_peer(sk) = NULL;
1484                         unix_state_unlock(sk);
1485
1486                         unix_dgram_disconnected(sk, other);
1487                         sock_put(other);
1488                         err = -ECONNREFUSED;
1489                 } else {
1490                         unix_state_unlock(sk);
1491                 }
1492
1493                 other = NULL;
1494                 if (err)
1495                         goto out_free;
1496                 goto restart;
1497         }
1498
1499         err = -EPIPE;
1500         if (other->sk_shutdown & RCV_SHUTDOWN)
1501                 goto out_unlock;
1502
1503         if (sk->sk_type != SOCK_SEQPACKET) {
1504                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1505                 if (err)
1506                         goto out_unlock;
1507         }
1508
1509         if (unix_peer(other) != sk && unix_recvq_full(other)) {
1510                 if (!timeo) {
1511                         err = -EAGAIN;
1512                         goto out_unlock;
1513                 }
1514
1515                 timeo = unix_wait_for_peer(other, timeo);
1516
1517                 err = sock_intr_errno(timeo);
1518                 if (signal_pending(current))
1519                         goto out_free;
1520
1521                 goto restart;
1522         }
1523
1524         skb_queue_tail(&other->sk_receive_queue, skb);
1525         if (max_level > unix_sk(other)->recursion_level)
1526                 unix_sk(other)->recursion_level = max_level;
1527         unix_state_unlock(other);
1528         other->sk_data_ready(other, len);
1529         sock_put(other);
1530         scm_destroy(siocb->scm);
1531         return len;
1532
1533 out_unlock:
1534         unix_state_unlock(other);
1535 out_free:
1536         kfree_skb(skb);
1537 out:
1538         if (other)
1539                 sock_put(other);
1540         scm_destroy(siocb->scm);
1541         return err;
1542 }
1543
1544
1545 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
1546                                struct msghdr *msg, size_t len)
1547 {
1548         struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
1549         struct sock *sk = sock->sk;
1550         struct sock *other = NULL;
1551         struct sockaddr_un *sunaddr = msg->msg_name;
1552         int err, size;
1553         struct sk_buff *skb;
1554         int sent = 0;
1555         struct scm_cookie tmp_scm;
1556         bool fds_sent = false;
1557         int max_level;
1558
1559         if (NULL == siocb->scm)
1560                 siocb->scm = &tmp_scm;
1561         wait_for_unix_gc();
1562         err = scm_send(sock, msg, siocb->scm);
1563         if (err < 0)
1564                 return err;
1565
1566         err = -EOPNOTSUPP;
1567         if (msg->msg_flags&MSG_OOB)
1568                 goto out_err;
1569
1570         if (msg->msg_namelen) {
1571                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1572                 goto out_err;
1573         } else {
1574                 sunaddr = NULL;
1575                 err = -ENOTCONN;
1576                 other = unix_peer(sk);
1577                 if (!other)
1578                         goto out_err;
1579         }
1580
1581         if (sk->sk_shutdown & SEND_SHUTDOWN)
1582                 goto pipe_err;
1583
1584         while (sent < len) {
1585                 /*
1586                  *      Optimisation for the fact that under 0.01% of X
1587                  *      messages typically need breaking up.
1588                  */
1589
1590                 size = len-sent;
1591
1592                 /* Keep two messages in the pipe so it schedules better */
1593                 if (size > ((sk->sk_sndbuf >> 1) - 64))
1594                         size = (sk->sk_sndbuf >> 1) - 64;
1595
1596                 if (size > SKB_MAX_ALLOC)
1597                         size = SKB_MAX_ALLOC;
1598
1599                 /*
1600                  *      Grab a buffer
1601                  */
1602
1603                 skb = sock_alloc_send_skb(sk, size, msg->msg_flags&MSG_DONTWAIT,
1604                                           &err);
1605
1606                 if (skb == NULL)
1607                         goto out_err;
1608
1609                 /*
1610                  *      If you pass two values to the sock_alloc_send_skb
1611                  *      it tries to grab the large buffer with GFP_NOFS
1612                  *      (which can fail easily), and if it fails grab the
1613                  *      fallback size buffer which is under a page and will
1614                  *      succeed. [Alan]
1615                  */
1616                 size = min_t(int, size, skb_tailroom(skb));
1617
1618
1619                 /* Only send the fds in the first buffer */
1620                 err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
1621                 if (err < 0) {
1622                         kfree_skb(skb);
1623                         goto out_err;
1624                 }
1625                 max_level = err + 1;
1626                 fds_sent = true;
1627
1628                 err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
1629                 if (err) {
1630                         kfree_skb(skb);
1631                         goto out_err;
1632                 }
1633
1634                 unix_state_lock(other);
1635
1636                 if (sock_flag(other, SOCK_DEAD) ||
1637                     (other->sk_shutdown & RCV_SHUTDOWN))
1638                         goto pipe_err_free;
1639
1640                 skb_queue_tail(&other->sk_receive_queue, skb);
1641                 if (max_level > unix_sk(other)->recursion_level)
1642                         unix_sk(other)->recursion_level = max_level;
1643                 unix_state_unlock(other);
1644                 other->sk_data_ready(other, size);
1645                 sent += size;
1646         }
1647
1648         scm_destroy(siocb->scm);
1649         siocb->scm = NULL;
1650
1651         return sent;
1652
1653 pipe_err_free:
1654         unix_state_unlock(other);
1655         kfree_skb(skb);
1656 pipe_err:
1657         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1658                 send_sig(SIGPIPE, current, 0);
1659         err = -EPIPE;
1660 out_err:
1661         scm_destroy(siocb->scm);
1662         siocb->scm = NULL;
1663         return sent ? : err;
1664 }
1665
1666 static int unix_seqpacket_sendmsg(struct kiocb *kiocb, struct socket *sock,
1667                                   struct msghdr *msg, size_t len)
1668 {
1669         int err;
1670         struct sock *sk = sock->sk;
1671
1672         err = sock_error(sk);
1673         if (err)
1674                 return err;
1675
1676         if (sk->sk_state != TCP_ESTABLISHED)
1677                 return -ENOTCONN;
1678
1679         if (msg->msg_namelen)
1680                 msg->msg_namelen = 0;
1681
1682         return unix_dgram_sendmsg(kiocb, sock, msg, len);
1683 }
1684
1685 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
1686 {
1687         struct unix_sock *u = unix_sk(sk);
1688
1689         msg->msg_namelen = 0;
1690         if (u->addr) {
1691                 msg->msg_namelen = u->addr->len;
1692                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
1693         }
1694 }
1695
1696 static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
1697                               struct msghdr *msg, size_t size,
1698                               int flags)
1699 {
1700         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1701         struct scm_cookie tmp_scm;
1702         struct sock *sk = sock->sk;
1703         struct unix_sock *u = unix_sk(sk);
1704         int noblock = flags & MSG_DONTWAIT;
1705         struct sk_buff *skb;
1706         int err;
1707
1708         err = -EOPNOTSUPP;
1709         if (flags&MSG_OOB)
1710                 goto out;
1711
1712         msg->msg_namelen = 0;
1713
1714         mutex_lock(&u->readlock);
1715
1716         skb = skb_recv_datagram(sk, flags, noblock, &err);
1717         if (!skb) {
1718                 unix_state_lock(sk);
1719                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
1720                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
1721                     (sk->sk_shutdown & RCV_SHUTDOWN))
1722                         err = 0;
1723                 unix_state_unlock(sk);
1724                 goto out_unlock;
1725         }
1726
1727         wake_up_interruptible_sync(&u->peer_wait);
1728
1729         if (msg->msg_name)
1730                 unix_copy_addr(msg, skb->sk);
1731
1732         if (size > skb->len)
1733                 size = skb->len;
1734         else if (size < skb->len)
1735                 msg->msg_flags |= MSG_TRUNC;
1736
1737         err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size);
1738         if (err)
1739                 goto out_free;
1740
1741         if (!siocb->scm) {
1742                 siocb->scm = &tmp_scm;
1743                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1744         }
1745         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1746         unix_set_secdata(siocb->scm, skb);
1747
1748         if (!(flags & MSG_PEEK)) {
1749                 if (UNIXCB(skb).fp)
1750                         unix_detach_fds(siocb->scm, skb);
1751         } else {
1752                 /* It is questionable: on PEEK we could:
1753                    - do not return fds - good, but too simple 8)
1754                    - return fds, and do not return them on read (old strategy,
1755                      apparently wrong)
1756                    - clone fds (I chose it for now, it is the most universal
1757                      solution)
1758
1759                    POSIX 1003.1g does not actually define this clearly
1760                    at all. POSIX 1003.1g doesn't define a lot of things
1761                    clearly however!
1762
1763                 */
1764                 if (UNIXCB(skb).fp)
1765                         siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1766         }
1767         err = size;
1768
1769         scm_recv(sock, msg, siocb->scm, flags);
1770
1771 out_free:
1772         skb_free_datagram(sk, skb);
1773 out_unlock:
1774         mutex_unlock(&u->readlock);
1775 out:
1776         return err;
1777 }
1778
1779 /*
1780  *      Sleep until data has arrive. But check for races..
1781  */
1782
1783 static long unix_stream_data_wait(struct sock *sk, long timeo)
1784 {
1785         DEFINE_WAIT(wait);
1786
1787         unix_state_lock(sk);
1788
1789         for (;;) {
1790                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
1791
1792                 if (!skb_queue_empty(&sk->sk_receive_queue) ||
1793                     sk->sk_err ||
1794                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
1795                     signal_pending(current) ||
1796                     !timeo)
1797                         break;
1798
1799                 set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1800                 unix_state_unlock(sk);
1801                 timeo = schedule_timeout(timeo);
1802                 unix_state_lock(sk);
1803                 clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
1804         }
1805
1806         finish_wait(sk_sleep(sk), &wait);
1807         unix_state_unlock(sk);
1808         return timeo;
1809 }
1810
1811
1812
1813 static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
1814                                struct msghdr *msg, size_t size,
1815                                int flags)
1816 {
1817         struct sock_iocb *siocb = kiocb_to_siocb(iocb);
1818         struct scm_cookie tmp_scm;
1819         struct sock *sk = sock->sk;
1820         struct unix_sock *u = unix_sk(sk);
1821         struct sockaddr_un *sunaddr = msg->msg_name;
1822         int copied = 0;
1823         int check_creds = 0;
1824         int target;
1825         int err = 0;
1826         long timeo;
1827
1828         err = -EINVAL;
1829         if (sk->sk_state != TCP_ESTABLISHED)
1830                 goto out;
1831
1832         err = -EOPNOTSUPP;
1833         if (flags&MSG_OOB)
1834                 goto out;
1835
1836         target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
1837         timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
1838
1839         msg->msg_namelen = 0;
1840
1841         /* Lock the socket to prevent queue disordering
1842          * while sleeps in memcpy_tomsg
1843          */
1844
1845         if (!siocb->scm) {
1846                 siocb->scm = &tmp_scm;
1847                 memset(&tmp_scm, 0, sizeof(tmp_scm));
1848         }
1849
1850         mutex_lock(&u->readlock);
1851
1852         do {
1853                 int chunk;
1854                 struct sk_buff *skb;
1855
1856                 unix_state_lock(sk);
1857                 skb = skb_dequeue(&sk->sk_receive_queue);
1858                 if (skb == NULL) {
1859                         unix_sk(sk)->recursion_level = 0;
1860                         if (copied >= target)
1861                                 goto unlock;
1862
1863                         /*
1864                          *      POSIX 1003.1g mandates this order.
1865                          */
1866
1867                         err = sock_error(sk);
1868                         if (err)
1869                                 goto unlock;
1870                         if (sk->sk_shutdown & RCV_SHUTDOWN)
1871                                 goto unlock;
1872
1873                         unix_state_unlock(sk);
1874                         err = -EAGAIN;
1875                         if (!timeo)
1876                                 break;
1877                         mutex_unlock(&u->readlock);
1878
1879                         timeo = unix_stream_data_wait(sk, timeo);
1880
1881                         if (signal_pending(current)) {
1882                                 err = sock_intr_errno(timeo);
1883                                 goto out;
1884                         }
1885                         mutex_lock(&u->readlock);
1886                         continue;
1887  unlock:
1888                         unix_state_unlock(sk);
1889                         break;
1890                 }
1891                 unix_state_unlock(sk);
1892
1893                 if (check_creds) {
1894                         /* Never glue messages from different writers */
1895                         if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
1896                             (UNIXCB(skb).cred != siocb->scm->cred)) {
1897                                 skb_queue_head(&sk->sk_receive_queue, skb);
1898                                 break;
1899                         }
1900                 } else {
1901                         /* Copy credentials */
1902                         scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred);
1903                         check_creds = 1;
1904                 }
1905
1906                 /* Copy address just once */
1907                 if (sunaddr) {
1908                         unix_copy_addr(msg, skb->sk);
1909                         sunaddr = NULL;
1910                 }
1911
1912                 chunk = min_t(unsigned int, skb->len, size);
1913                 if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
1914                         skb_queue_head(&sk->sk_receive_queue, skb);
1915                         if (copied == 0)
1916                                 copied = -EFAULT;
1917                         break;
1918                 }
1919                 copied += chunk;
1920                 size -= chunk;
1921
1922                 /* Mark read part of skb as used */
1923                 if (!(flags & MSG_PEEK)) {
1924                         skb_pull(skb, chunk);
1925
1926                         if (UNIXCB(skb).fp)
1927                                 unix_detach_fds(siocb->scm, skb);
1928
1929                         /* put the skb back if we didn't use it up.. */
1930                         if (skb->len) {
1931                                 skb_queue_head(&sk->sk_receive_queue, skb);
1932                                 break;
1933                         }
1934
1935                         consume_skb(skb);
1936
1937                         if (siocb->scm->fp)
1938                                 break;
1939                 } else {
1940                         /* It is questionable, see note in unix_dgram_recvmsg.
1941                          */
1942                         if (UNIXCB(skb).fp)
1943                                 siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
1944
1945                         /* put message back and return */
1946                         skb_queue_head(&sk->sk_receive_queue, skb);
1947                         break;
1948                 }
1949         } while (size);
1950
1951         mutex_unlock(&u->readlock);
1952         scm_recv(sock, msg, siocb->scm, flags);
1953 out:
1954         return copied ? : err;
1955 }
1956
1957 static int unix_shutdown(struct socket *sock, int mode)
1958 {
1959         struct sock *sk = sock->sk;
1960         struct sock *other;
1961
1962         mode = (mode+1)&(RCV_SHUTDOWN|SEND_SHUTDOWN);
1963
1964         if (mode) {
1965                 unix_state_lock(sk);
1966                 sk->sk_shutdown |= mode;
1967                 other = unix_peer(sk);
1968                 if (other)
1969                         sock_hold(other);
1970                 unix_state_unlock(sk);
1971                 sk->sk_state_change(sk);
1972
1973                 if (other &&
1974                         (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
1975
1976                         int peer_mode = 0;
1977
1978                         if (mode&RCV_SHUTDOWN)
1979                                 peer_mode |= SEND_SHUTDOWN;
1980                         if (mode&SEND_SHUTDOWN)
1981                                 peer_mode |= RCV_SHUTDOWN;
1982                         unix_state_lock(other);
1983                         other->sk_shutdown |= peer_mode;
1984                         unix_state_unlock(other);
1985                         other->sk_state_change(other);
1986                         if (peer_mode == SHUTDOWN_MASK)
1987                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
1988                         else if (peer_mode & RCV_SHUTDOWN)
1989                                 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
1990                 }
1991                 if (other)
1992                         sock_put(other);
1993         }
1994         return 0;
1995 }
1996
1997 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
1998 {
1999         struct sock *sk = sock->sk;
2000         long amount = 0;
2001         int err;
2002
2003         switch (cmd) {
2004         case SIOCOUTQ:
2005                 amount = sk_wmem_alloc_get(sk);
2006                 err = put_user(amount, (int __user *)arg);
2007                 break;
2008         case SIOCINQ:
2009                 {
2010                         struct sk_buff *skb;
2011
2012                         if (sk->sk_state == TCP_LISTEN) {
2013                                 err = -EINVAL;
2014                                 break;
2015                         }
2016
2017                         spin_lock(&sk->sk_receive_queue.lock);
2018                         if (sk->sk_type == SOCK_STREAM ||
2019                             sk->sk_type == SOCK_SEQPACKET) {
2020                                 skb_queue_walk(&sk->sk_receive_queue, skb)
2021                                         amount += skb->len;
2022                         } else {
2023                                 skb = skb_peek(&sk->sk_receive_queue);
2024                                 if (skb)
2025                                         amount = skb->len;
2026                         }
2027                         spin_unlock(&sk->sk_receive_queue.lock);
2028                         err = put_user(amount, (int __user *)arg);
2029                         break;
2030                 }
2031
2032         default:
2033                 err = -ENOIOCTLCMD;
2034                 break;
2035         }
2036         return err;
2037 }
2038
2039 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2040 {
2041         struct sock *sk = sock->sk;
2042         unsigned int mask;
2043
2044         sock_poll_wait(file, sk_sleep(sk), wait);
2045         mask = 0;
2046
2047         /* exceptional events? */
2048         if (sk->sk_err)
2049                 mask |= POLLERR;
2050         if (sk->sk_shutdown == SHUTDOWN_MASK)
2051                 mask |= POLLHUP;
2052         if (sk->sk_shutdown & RCV_SHUTDOWN)
2053                 mask |= POLLRDHUP;
2054
2055         /* readable? */
2056         if (!skb_queue_empty(&sk->sk_receive_queue) ||
2057             (sk->sk_shutdown & RCV_SHUTDOWN))
2058                 mask |= POLLIN | POLLRDNORM;
2059
2060         /* Connection-based need to check for termination and startup */
2061         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2062             sk->sk_state == TCP_CLOSE)
2063                 mask |= POLLHUP;
2064
2065         /*
2066          * we set writable also when the other side has shut down the
2067          * connection. This prevents stuck sockets.
2068          */
2069         if (unix_writable(sk))
2070                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2071
2072         return mask;
2073 }
2074
2075 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2076                                     poll_table *wait)
2077 {
2078         struct sock *sk = sock->sk, *other;
2079         unsigned int mask, writable;
2080
2081         sock_poll_wait(file, sk_sleep(sk), wait);
2082         mask = 0;
2083
2084         /* exceptional events? */
2085         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2086                 mask |= POLLERR;
2087         if (sk->sk_shutdown & RCV_SHUTDOWN)
2088                 mask |= POLLRDHUP;
2089         if (sk->sk_shutdown == SHUTDOWN_MASK)
2090                 mask |= POLLHUP;
2091
2092         /* readable? */
2093         if (!skb_queue_empty(&sk->sk_receive_queue) ||
2094             (sk->sk_shutdown & RCV_SHUTDOWN))
2095                 mask |= POLLIN | POLLRDNORM;
2096
2097         /* Connection-based need to check for termination and startup */
2098         if (sk->sk_type == SOCK_SEQPACKET) {
2099                 if (sk->sk_state == TCP_CLOSE)
2100                         mask |= POLLHUP;
2101                 /* connection hasn't started yet? */
2102                 if (sk->sk_state == TCP_SYN_SENT)
2103                         return mask;
2104         }
2105
2106         /* writable? */
2107         writable = unix_writable(sk);
2108         if (writable) {
2109                 other = unix_peer_get(sk);
2110                 if (other) {
2111                         if (unix_peer(other) != sk) {
2112                                 sock_poll_wait(file, &unix_sk(other)->peer_wait,
2113                                           wait);
2114                                 if (unix_recvq_full(other))
2115                                         writable = 0;
2116                         }
2117
2118                         sock_put(other);
2119                 }
2120         }
2121
2122         if (writable)
2123                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2124         else
2125                 set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
2126
2127         return mask;
2128 }
2129
2130 #ifdef CONFIG_PROC_FS
2131 static struct sock *first_unix_socket(int *i)
2132 {
2133         for (*i = 0; *i <= UNIX_HASH_SIZE; (*i)++) {
2134                 if (!hlist_empty(&unix_socket_table[*i]))
2135                         return __sk_head(&unix_socket_table[*i]);
2136         }
2137         return NULL;
2138 }
2139
2140 static struct sock *next_unix_socket(int *i, struct sock *s)
2141 {
2142         struct sock *next = sk_next(s);
2143         /* More in this chain? */
2144         if (next)
2145                 return next;
2146         /* Look for next non-empty chain. */
2147         for ((*i)++; *i <= UNIX_HASH_SIZE; (*i)++) {
2148                 if (!hlist_empty(&unix_socket_table[*i]))
2149                         return __sk_head(&unix_socket_table[*i]);
2150         }
2151         return NULL;
2152 }
2153
2154 struct unix_iter_state {
2155         struct seq_net_private p;
2156         int i;
2157 };
2158
2159 static struct sock *unix_seq_idx(struct seq_file *seq, loff_t pos)
2160 {
2161         struct unix_iter_state *iter = seq->private;
2162         loff_t off = 0;
2163         struct sock *s;
2164
2165         for (s = first_unix_socket(&iter->i); s; s = next_unix_socket(&iter->i, s)) {
2166                 if (sock_net(s) != seq_file_net(seq))
2167                         continue;
2168                 if (off == pos)
2169                         return s;
2170                 ++off;
2171         }
2172         return NULL;
2173 }
2174
2175 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2176         __acquires(unix_table_lock)
2177 {
2178         spin_lock(&unix_table_lock);
2179         return *pos ? unix_seq_idx(seq, *pos - 1) : SEQ_START_TOKEN;
2180 }
2181
2182 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2183 {
2184         struct unix_iter_state *iter = seq->private;
2185         struct sock *sk = v;
2186         ++*pos;
2187
2188         if (v == SEQ_START_TOKEN)
2189                 sk = first_unix_socket(&iter->i);
2190         else
2191                 sk = next_unix_socket(&iter->i, sk);
2192         while (sk && (sock_net(sk) != seq_file_net(seq)))
2193                 sk = next_unix_socket(&iter->i, sk);
2194         return sk;
2195 }
2196
2197 static void unix_seq_stop(struct seq_file *seq, void *v)
2198         __releases(unix_table_lock)
2199 {
2200         spin_unlock(&unix_table_lock);
2201 }
2202
2203 static int unix_seq_show(struct seq_file *seq, void *v)
2204 {
2205
2206         if (v == SEQ_START_TOKEN)
2207                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2208                          "Inode Path\n");
2209         else {
2210                 struct sock *s = v;
2211                 struct unix_sock *u = unix_sk(s);
2212                 unix_state_lock(s);
2213
2214                 seq_printf(seq, "%p: %08X %08X %08X %04X %02X %5lu",
2215                         s,
2216                         atomic_read(&s->sk_refcnt),
2217                         0,
2218                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2219                         s->sk_type,
2220                         s->sk_socket ?
2221                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2222                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2223                         sock_i_ino(s));
2224
2225                 if (u->addr) {
2226                         int i, len;
2227                         seq_putc(seq, ' ');
2228
2229                         i = 0;
2230                         len = u->addr->len - sizeof(short);
2231                         if (!UNIX_ABSTRACT(s))
2232                                 len--;
2233                         else {
2234                                 seq_putc(seq, '@');
2235                                 i++;
2236                         }
2237                         for ( ; i < len; i++)
2238                                 seq_putc(seq, u->addr->name->sun_path[i]);
2239                 }
2240                 unix_state_unlock(s);
2241                 seq_putc(seq, '\n');
2242         }
2243
2244         return 0;
2245 }
2246
2247 static const struct seq_operations unix_seq_ops = {
2248         .start  = unix_seq_start,
2249         .next   = unix_seq_next,
2250         .stop   = unix_seq_stop,
2251         .show   = unix_seq_show,
2252 };
2253
2254 static int unix_seq_open(struct inode *inode, struct file *file)
2255 {
2256         return seq_open_net(inode, file, &unix_seq_ops,
2257                             sizeof(struct unix_iter_state));
2258 }
2259
2260 static const struct file_operations unix_seq_fops = {
2261         .owner          = THIS_MODULE,
2262         .open           = unix_seq_open,
2263         .read           = seq_read,
2264         .llseek         = seq_lseek,
2265         .release        = seq_release_net,
2266 };
2267
2268 #endif
2269
2270 static const struct net_proto_family unix_family_ops = {
2271         .family = PF_UNIX,
2272         .create = unix_create,
2273         .owner  = THIS_MODULE,
2274 };
2275
2276
2277 static int __net_init unix_net_init(struct net *net)
2278 {
2279         int error = -ENOMEM;
2280
2281         net->unx.sysctl_max_dgram_qlen = 10;
2282         if (unix_sysctl_register(net))
2283                 goto out;
2284
2285 #ifdef CONFIG_PROC_FS
2286         if (!proc_net_fops_create(net, "unix", 0, &unix_seq_fops)) {
2287                 unix_sysctl_unregister(net);
2288                 goto out;
2289         }
2290 #endif
2291         error = 0;
2292 out:
2293         return error;
2294 }
2295
2296 static void __net_exit unix_net_exit(struct net *net)
2297 {
2298         unix_sysctl_unregister(net);
2299         proc_net_remove(net, "unix");
2300 }
2301
2302 static struct pernet_operations unix_net_ops = {
2303         .init = unix_net_init,
2304         .exit = unix_net_exit,
2305 };
2306
2307 static int __init af_unix_init(void)
2308 {
2309         int rc = -1;
2310         struct sk_buff *dummy_skb;
2311
2312         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb));
2313
2314         rc = proto_register(&unix_proto, 1);
2315         if (rc != 0) {
2316                 printk(KERN_CRIT "%s: Cannot create unix_sock SLAB cache!\n",
2317                        __func__);
2318                 goto out;
2319         }
2320
2321         sock_register(&unix_family_ops);
2322         register_pernet_subsys(&unix_net_ops);
2323 out:
2324         return rc;
2325 }
2326
2327 static void __exit af_unix_exit(void)
2328 {
2329         sock_unregister(PF_UNIX);
2330         proto_unregister(&unix_proto);
2331         unregister_pernet_subsys(&unix_net_ops);
2332 }
2333
2334 /* Earlier than device_initcall() so that other drivers invoking
2335    request_module() don't end up in a loop when modprobe tries
2336    to use a UNIX socket. But later than subsys_initcall() because
2337    we depend on stuff initialised there */
2338 fs_initcall(af_unix_init);
2339 module_exit(af_unix_exit);
2340
2341 MODULE_LICENSE("GPL");
2342 MODULE_ALIAS_NETPROTO(PF_UNIX);