release/src/linux/linux/net/socket.c

   1 /*
   2  * NET          An implementation of the SOCKET network access protocol.
   3  *
   4  * Version:     @(#)socket.c    1.1.93  18/02/95
   5  *
   6  * Authors:     Orest Zborowski, <obz@Kodak.COM>
   7  *              Ross Biro, <bir7@leland.Stanford.Edu>
   8  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
   9  *
  10  * Fixes:
  11  *              Anonymous       :       NOTSOCK/BADF cleanup. Error fix in
  12  *                                      shutdown()
  13  *              Alan Cox        :       verify_area() fixes
  14  *              Alan Cox        :       Removed DDI
  15  *              Jonathan Kamens :       SOCK_DGRAM reconnect bug
  16  *              Alan Cox        :       Moved a load of checks to the very
  17  *                                      top level.
  18  *              Alan Cox        :       Move address structures to/from user
  19  *                                      mode above the protocol layers.
  20  *              Rob Janssen     :       Allow 0 length sends.
  21  *              Alan Cox        :       Asynchronous I/O support (cribbed from the
  22  *                                      tty drivers).
  23  *              Niibe Yutaka    :       Asynchronous I/O for writes (4.4BSD style)
  24  *              Jeff Uphoff     :       Made max number of sockets command-line
  25  *                                      configurable.
  26  *              Matti Aarnio    :       Made the number of sockets dynamic,
  27  *                                      to be allocated when needed, and mr.
  28  *                                      Uphoff's max is used as max to be
  29  *                                      allowed to allocate.
  30  *              Linus           :       Argh. removed all the socket allocation
  31  *                                      altogether: it's in the inode now.
  32  *              Alan Cox        :       Made sock_alloc()/sock_release() public
  33  *                                      for NetROM and future kernel nfsd type
  34  *                                      stuff.
  35  *              Alan Cox        :       sendmsg/recvmsg basics.
  36  *              Tom Dyas        :       Export net symbols.
  37  *              Marcin Dalecki  :       Fixed problems with CONFIG_NET="n".
  38  *              Alan Cox        :       Added thread locking to sys_* calls
  39  *                                      for sockets. May have errors at the
  40  *                                      moment.
  41  *              Kevin Buhr      :       Fixed the dumb errors in the above.
  42  *              Andi Kleen      :       Some small cleanups, optimizations,
  43  *                                      and fixed a copy_from_user() bug.
  44  *              Tigran Aivazian :       sys_send(args) calls sys_sendto(args, NULL, 0)
  45  *              Tigran Aivazian :       Made listen(2) backlog sanity checks
  46  *                                      protocol-independent
  47  *
  48  *
  49  *              This program is free software; you can redistribute it and/or
  50  *              modify it under the terms of the GNU General Public License
  51  *              as published by the Free Software Foundation; either version
  52  *              2 of the License, or (at your option) any later version.
  53  *
  54  *
  55  *      This module is effectively the top level interface to the BSD socket
  56  *      paradigm.
  57  *
  58  */
  59
  60 #include <linux/config.h>
  61 #include <linux/mm.h>
  62 #include <linux/smp_lock.h>
  63 #include <linux/socket.h>
  64 #include <linux/file.h>
  65 #include <linux/net.h>
  66 #include <linux/interrupt.h>
  67 #include <linux/netdevice.h>
  68 #include <linux/proc_fs.h>
  69 #include <linux/wanrouter.h>
  70 #include <linux/netlink.h>
  71 #include <linux/rtnetlink.h>
  72 #include <linux/init.h>
  73 #include <linux/poll.h>
  74 #include <linux/cache.h>
  75 #include <linux/module.h>
  76 #include <linux/highmem.h>
  77
  78 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
  79 #include <linux/kmod.h>
  80 #endif
  81
  82 #include <asm/uaccess.h>
  83
  84 #include <net/sock.h>
  85 #include <net/scm.h>
  86 #include <linux/netfilter.h>
  87
  88 static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
  89 static ssize_t sock_read(struct file *file, char *buf,
  90                          size_t size, loff_t *ppos);
  91 static ssize_t sock_write(struct file *file, const char *buf,
  92                           size_t size, loff_t *ppos);
  93 static int sock_mmap(struct file *file, struct vm_area_struct * vma);
  94
  95 static int sock_close(struct inode *inode, struct file *file);
  96 static unsigned int sock_poll(struct file *file,
  97                               struct poll_table_struct *wait);
  98 static int sock_ioctl(struct inode *inode, struct file *file,
  99                       unsigned int cmd, unsigned long arg);
 100 static int sock_fasync(int fd, struct file *filp, int on);
 101 static ssize_t sock_readv(struct file *file, const struct iovec *vector,
 102                           unsigned long count, loff_t *ppos);
 103 static ssize_t sock_writev(struct file *file, const struct iovec *vector,
 104                           unsigned long count, loff_t *ppos);
 105 static ssize_t sock_sendpage(struct file *file, struct page *page,
 106                              int offset, size_t size, loff_t *ppos, int more);
 107
 108
 109 /*
 110  *      Socket files have a set of 'special' operations as well as the generic file ones. These don't appear
 111  *      in the operation structures but are done directly via the socketcall() multiplexor.
 112  */
 113
 114 static struct file_operations socket_file_ops = {
 115         llseek:         no_llseek,
 116         read:           sock_read,
 117         write:          sock_write,
 118         poll:           sock_poll,
 119         ioctl:          sock_ioctl,
 120         mmap:           sock_mmap,
 121         open:           sock_no_open,   /* special open code to disallow open via /proc */
 122         release:        sock_close,
 123         fasync:         sock_fasync,
 124         readv:          sock_readv,
 125         writev:         sock_writev,
 126         sendpage:       sock_sendpage
 127 };
 128
 129 /*
 130  *      The protocol list. Each protocol is registered in here.
 131  */
 132
 133 static struct net_proto_family *net_families[NPROTO];
 134
 135 #ifdef CONFIG_SMP
 136 static atomic_t net_family_lockct = ATOMIC_INIT(0);
 137 static spinlock_t net_family_lock = SPIN_LOCK_UNLOCKED;
 138
 139 /* The strategy is: modifications net_family vector are short, do not
 140    sleep and veeery rare, but read access should be free of any exclusive
 141    locks.
 142  */
 143
 144 static void net_family_write_lock(void)
 145 {
 146         spin_lock(&net_family_lock);
 147         while (atomic_read(&net_family_lockct) != 0) {
 148                 spin_unlock(&net_family_lock);
 149
 150                 yield();
 151
 152                 spin_lock(&net_family_lock);
 153         }
 154 }
 155
 156 static __inline__ void net_family_write_unlock(void)
 157 {
 158         spin_unlock(&net_family_lock);
 159 }
 160
 161 static __inline__ void net_family_read_lock(void)
 162 {
 163         atomic_inc(&net_family_lockct);
 164         spin_unlock_wait(&net_family_lock);
 165 }
 166
 167 static __inline__ void net_family_read_unlock(void)
 168 {
 169         atomic_dec(&net_family_lockct);
 170 }
 171
 172 #else
 173 #define net_family_write_lock() do { } while(0)
 174 #define net_family_write_unlock() do { } while(0)
 175 #define net_family_read_lock() do { } while(0)
 176 #define net_family_read_unlock() do { } while(0)
 177 #endif
 178
 179
 180 /*
 181  *      Statistics counters of the socket lists
 182  */
 183
 184 static union {
 185         int     counter;
 186         char    __pad[SMP_CACHE_BYTES];
 187 } sockets_in_use[NR_CPUS] __cacheline_aligned = {{0}};
 188
 189 /*
 190  *      Support routines. Move socket addresses back and forth across the kernel/user
 191  *      divide and look after the messy bits.
 192  */
 193
 194 #define MAX_SOCK_ADDR   128             /* 108 for Unix domain -
 195                                            16 for IP, 16 for IPX,
 196                                            24 for IPv6,
 197                                            about 80 for AX.25
 198                                            must be at least one bigger than
 199                                            the AF_UNIX size (see net/unix/af_unix.c
 200                                            :unix_mkname()).
 201                                          */
 202
 203 /**
 204  *      move_addr_to_kernel     -       copy a socket address into kernel space
 205  *      @uaddr: Address in user space
 206  *      @kaddr: Address in kernel space
 207  *      @ulen: Length in user space
 208  *
 209  *      The address is copied into kernel space. If the provided address is
 210  *      too long an error code of -EINVAL is returned. If the copy gives
 211  *      invalid addresses -EFAULT is returned. On a success 0 is returned.
 212  */
 213
 214 int move_addr_to_kernel(void *uaddr, int ulen, void *kaddr)
 215 {
 216         if(ulen<0||ulen>MAX_SOCK_ADDR)
 217                 return -EINVAL;
 218         if(ulen==0)
 219                 return 0;
 220         if(copy_from_user(kaddr,uaddr,ulen))
 221                 return -EFAULT;
 222         return 0;
 223 }
 224
 225 /**
 226  *      move_addr_to_user       -       copy an address to user space
 227  *      @kaddr: kernel space address
 228  *      @klen: length of address in kernel
 229  *      @uaddr: user space address
 230  *      @ulen: pointer to user length field
 231  *
 232  *      The value pointed to by ulen on entry is the buffer length available.
 233  *      This is overwritten with the buffer space used. -EINVAL is returned
 234  *      if an overlong buffer is specified or a negative buffer size. -EFAULT
 235  *      is returned if either the buffer or the length field are not
 236  *      accessible.
 237  *      After copying the data up to the limit the user specifies, the true
 238  *      length of the data is written over the length limit the user
 239  *      specified. Zero is returned for a success.
 240  */
 241
 242 int move_addr_to_user(void *kaddr, int klen, void *uaddr, int *ulen)
 243 {
 244         int err;
 245         int len;
 246
 247         if((err=get_user(len, ulen)))
 248                 return err;
 249         if(len>klen)
 250                 len=klen;
 251         if(len<0 || len> MAX_SOCK_ADDR)
 252                 return -EINVAL;
 253         if(len)
 254         {
 255                 if(copy_to_user(uaddr,kaddr,len))
 256                         return -EFAULT;
 257         }
 258         /*
 259          *      "fromlen shall refer to the value before truncation.."
 260          *                      1003.1g
 261          */
 262         return __put_user(klen, ulen);
 263 }
 264
 265 #define SOCKFS_MAGIC 0x534F434B
 266 static int sockfs_statfs(struct super_block *sb, struct statfs *buf)
 267 {
 268         buf->f_type = SOCKFS_MAGIC;
 269         buf->f_bsize = 1024;
 270         buf->f_namelen = 255;
 271         return 0;
 272 }
 273
 274 static struct super_operations sockfs_ops = {
 275         statfs:         sockfs_statfs,
 276 };
 277
 278 static struct super_block * sockfs_read_super(struct super_block *sb, void *data, int silent)
 279 {
 280         struct inode *root = new_inode(sb);
 281         if (!root)
 282                 return NULL;
 283         root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
 284         root->i_uid = root->i_gid = 0;
 285         root->i_atime = root->i_mtime = root->i_ctime = CURRENT_TIME;
 286         sb->s_blocksize = 1024;
 287         sb->s_blocksize_bits = 10;
 288         sb->s_magic = SOCKFS_MAGIC;
 289         sb->s_op        = &sockfs_ops;
 290         sb->s_root = d_alloc(NULL, &(const struct qstr) { "socket:", 7, 0 });
 291         if (!sb->s_root) {
 292                 iput(root);
 293                 return NULL;
 294         }
 295         sb->s_root->d_sb = sb;
 296         sb->s_root->d_parent = sb->s_root;
 297         d_instantiate(sb->s_root, root);
 298         return sb;
 299 }
 300
 301 static struct vfsmount *sock_mnt;
 302 static DECLARE_FSTYPE(sock_fs_type, "sockfs", sockfs_read_super, FS_NOMOUNT);
 303 static int sockfs_delete_dentry(struct dentry *dentry)
 304 {
 305         return 1;
 306 }
 307 static struct dentry_operations sockfs_dentry_operations = {
 308         d_delete:       sockfs_delete_dentry,
 309 };
 310
 311 /*
 312  *      Obtains the first available file descriptor and sets it up for use.
 313  *
 314  *      This functions creates file structure and maps it to fd space
 315  *      of current process. On success it returns file descriptor
 316  *      and file struct implicitly stored in sock->file.
 317  *      Note that another thread may close file descriptor before we return
 318  *      from this function. We use the fact that now we do not refer
 319  *      to socket after mapping. If one day we will need it, this
 320  *      function will inincrement ref. count on file by 1.
 321  *
 322  *      In any case returned fd MAY BE not valid!
 323  *      This race condition is inavoidable
 324  *      with shared fd spaces, we cannot solve is inside kernel,
 325  *      but we take care of internal coherence yet.
 326  */
 327
 328 static int sock_map_fd(struct socket *sock)
 329 {
 330         int fd;
 331         struct qstr this;
 332         char name[32];
 333
 334         /*
 335          *      Find a file descriptor suitable for return to the user.
 336          */
 337
 338         fd = get_unused_fd();
 339         if (fd >= 0) {
 340                 struct file *file = get_empty_filp();
 341
 342                 if (!file) {
 343                         put_unused_fd(fd);
 344                         fd = -ENFILE;
 345                         goto out;
 346                 }
 347
 348                 sprintf(name, "[%lu]", sock->inode->i_ino);
 349                 this.name = name;
 350                 this.len = strlen(name);
 351                 this.hash = sock->inode->i_ino;
 352
 353                 file->f_dentry = d_alloc(sock_mnt->mnt_sb->s_root, &this);
 354                 if (!file->f_dentry) {
 355                         put_filp(file);
 356                         put_unused_fd(fd);
 357                         fd = -ENOMEM;
 358                         goto out;
 359                 }
 360                 file->f_dentry->d_op = &sockfs_dentry_operations;
 361                 d_add(file->f_dentry, sock->inode);
 362                 file->f_vfsmnt = mntget(sock_mnt);
 363
 364                 sock->file = file;
 365                 file->f_op = sock->inode->i_fop = &socket_file_ops;
 366                 file->f_mode = 3;
 367                 file->f_flags = O_RDWR;
 368                 file->f_pos = 0;
 369                 fd_install(fd, file);
 370         }
 371
 372 out:
 373         return fd;
 374 }
 375
 376 extern __inline__ struct socket *socki_lookup(struct inode *inode)
 377 {
 378         return &inode->u.socket_i;
 379 }
 380
 381 /**
 382  *      sockfd_lookup   -       Go from a file number to its socket slot
 383  *      @fd: file handle
 384  *      @err: pointer to an error code return
 385  *
 386  *      The file handle passed in is locked and the socket it is bound
 387  *      too is returned. If an error occurs the err pointer is overwritten
 388  *      with a negative errno code and NULL is returned. The function checks
 389  *      for both invalid handles and passing a handle which is not a socket.
 390  *
 391  *      On a success the socket object pointer is returned.
 392  */
 393
 394 struct socket *sockfd_lookup(int fd, int *err)
 395 {
 396         struct file *file;
 397         struct inode *inode;
 398         struct socket *sock;
 399
 400         if (!(file = fget(fd)))
 401         {
 402                 *err = -EBADF;
 403                 return NULL;
 404         }
 405
 406         inode = file->f_dentry->d_inode;
 407         if (!inode->i_sock || !(sock = socki_lookup(inode)))
 408         {
 409                 *err = -ENOTSOCK;
 410                 fput(file);
 411                 return NULL;
 412         }
 413
 414         if (sock->file != file) {
 415                 printk(KERN_ERR "socki_lookup: socket file changed!\n");
 416                 sock->file = file;
 417         }
 418         return sock;
 419 }
 420
 421 extern __inline__ void sockfd_put(struct socket *sock)
 422 {
 423         fput(sock->file);
 424 }
 425
 426 /**
 427  *      sock_alloc      -       allocate a socket
 428  *
 429  *      Allocate a new inode and socket object. The two are bound together
 430  *      and initialised. The socket is then returned. If we are out of inodes
 431  *      NULL is returned.
 432  */
 433
 434 struct socket *sock_alloc(void)
 435 {
 436         struct inode * inode;
 437         struct socket * sock;
 438
 439         inode = get_empty_inode();
 440         if (!inode)
 441                 return NULL;
 442
 443         inode->i_sb = sock_mnt->mnt_sb;
 444         sock = socki_lookup(inode);
 445
 446         inode->i_mode = S_IFSOCK|S_IRWXUGO;
 447         inode->i_sock = 1;
 448         inode->i_uid = current->fsuid;
 449         inode->i_gid = current->fsgid;
 450
 451         sock->inode = inode;
 452         init_waitqueue_head(&sock->wait);
 453         sock->fasync_list = NULL;
 454         sock->state = SS_UNCONNECTED;
 455         sock->flags = 0;
 456         sock->ops = NULL;
 457         sock->sk = NULL;
 458         sock->file = NULL;
 459
 460         sockets_in_use[smp_processor_id()].counter++;
 461         return sock;
 462 }
 463
 464 /*
 465  *      In theory you can't get an open on this inode, but /proc provides
 466  *      a back door. Remember to keep it shut otherwise you'll let the
 467  *      creepy crawlies in.
 468  */
 469
 470 static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
 471 {
 472         return -ENXIO;
 473 }
 474
 475 /**
 476  *      sock_release    -       close a socket
 477  *      @sock: socket to close
 478  *
 479  *      The socket is released from the protocol stack if it has a release
 480  *      callback, and the inode is then released if the socket is bound to
 481  *      an inode not a file.
 482  */
 483
 484 void sock_release(struct socket *sock)
 485 {
 486         if (sock->ops)
 487                 sock->ops->release(sock);
 488
 489         if (sock->fasync_list)
 490                 printk(KERN_ERR "sock_release: fasync list not empty!\n");
 491
 492         sockets_in_use[smp_processor_id()].counter--;
 493         if (!sock->file) {
 494                 iput(sock->inode);
 495                 return;
 496         }
 497         sock->file=NULL;
 498 }
 499
 500 int sock_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 501 {
 502         int err;
 503         struct scm_cookie scm;
 504
 505         err = scm_send(sock, msg, &scm);
 506         if (err >= 0) {
 507                 err = sock->ops->sendmsg(sock, msg, size, &scm);
 508                 scm_destroy(&scm);
 509         }
 510         return err;
 511 }
 512
 513 int sock_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags)
 514 {
 515         struct scm_cookie scm;
 516
 517         memset(&scm, 0, sizeof(scm));
 518
 519         size = sock->ops->recvmsg(sock, msg, size, flags, &scm);
 520         if (size >= 0)
 521                 scm_recv(sock, msg, &scm, flags);
 522
 523         return size;
 524 }
 525
 526
 527 /*
 528  *      Read data from a socket. ubuf is a user mode pointer. We make sure the user
 529  *      area ubuf...ubuf+size-1 is writable before asking the protocol.
 530  */
 531
 532 static ssize_t sock_read(struct file *file, char *ubuf,
 533                          size_t size, loff_t *ppos)
 534 {
 535         struct socket *sock;
 536         struct iovec iov;
 537         struct msghdr msg;
 538         int flags;
 539
 540         if (ppos != &file->f_pos)
 541                 return -ESPIPE;
 542         if (size==0)            /* Match SYS5 behaviour */
 543                 return 0;
 544
 545         sock = socki_lookup(file->f_dentry->d_inode);
 546
 547         msg.msg_name=NULL;
 548         msg.msg_namelen=0;
 549         msg.msg_iov=&iov;
 550         msg.msg_iovlen=1;
 551         msg.msg_control=NULL;
 552         msg.msg_controllen=0;
 553         iov.iov_base=ubuf;
 554         iov.iov_len=size;
 555         flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 556
 557         return sock_recvmsg(sock, &msg, size, flags);
 558 }
 559
 560
 561 /*
 562  *      Write data to a socket. We verify that the user area ubuf..ubuf+size-1
 563  *      is readable by the user process.
 564  */
 565
 566 static ssize_t sock_write(struct file *file, const char *ubuf,
 567                           size_t size, loff_t *ppos)
 568 {
 569         struct socket *sock;
 570         struct msghdr msg;
 571         struct iovec iov;
 572
 573         if (ppos != &file->f_pos)
 574                 return -ESPIPE;
 575         if(size==0)             /* Match SYS5 behaviour */
 576                 return 0;
 577
 578         sock = socki_lookup(file->f_dentry->d_inode);
 579
 580         msg.msg_name=NULL;
 581         msg.msg_namelen=0;
 582         msg.msg_iov=&iov;
 583         msg.msg_iovlen=1;
 584         msg.msg_control=NULL;
 585         msg.msg_controllen=0;
 586         msg.msg_flags=!(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 587         if (sock->type == SOCK_SEQPACKET)
 588                 msg.msg_flags |= MSG_EOR;
 589         iov.iov_base=(void *)ubuf;
 590         iov.iov_len=size;
 591
 592         return sock_sendmsg(sock, &msg, size);
 593 }
 594
 595 ssize_t sock_sendpage(struct file *file, struct page *page,
 596                       int offset, size_t size, loff_t *ppos, int more)
 597 {
 598         struct socket *sock;
 599         int flags;
 600
 601         if (ppos != &file->f_pos)
 602                 return -ESPIPE;
 603
 604         sock = socki_lookup(file->f_dentry->d_inode);
 605
 606         flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT;
 607         if (more)
 608                 flags |= MSG_MORE;
 609
 610         if (!sock->ops->sendpage)
 611                 return sock_no_sendpage(sock, page, offset, size, flags);
 612
 613         return sock->ops->sendpage(sock, page, offset, size, flags);
 614 }
 615
 616 int sock_readv_writev(int type, struct inode * inode, struct file * file,
 617                       const struct iovec * iov, long count, long size)
 618 {
 619         struct msghdr msg;
 620         struct socket *sock;
 621
 622         sock = socki_lookup(inode);
 623
 624         msg.msg_name = NULL;
 625         msg.msg_namelen = 0;
 626         msg.msg_control = NULL;
 627         msg.msg_controllen = 0;
 628         msg.msg_iov = (struct iovec *) iov;
 629         msg.msg_iovlen = count;
 630         msg.msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 631
 632         /* read() does a VERIFY_WRITE */
 633         if (type == VERIFY_WRITE)
 634                 return sock_recvmsg(sock, &msg, size, msg.msg_flags);
 635
 636         if (sock->type == SOCK_SEQPACKET)
 637                 msg.msg_flags |= MSG_EOR;
 638
 639         return sock_sendmsg(sock, &msg, size);
 640 }
 641
 642 static ssize_t sock_readv(struct file *file, const struct iovec *vector,
 643                           unsigned long count, loff_t *ppos)
 644 {
 645         size_t tot_len = 0;
 646         int i;
 647         for (i = 0 ; i < count ; i++)
 648                 tot_len += vector[i].iov_len;
 649         return sock_readv_writev(VERIFY_WRITE, file->f_dentry->d_inode,
 650                                  file, vector, count, tot_len);
 651 }
 652
 653 static ssize_t sock_writev(struct file *file, const struct iovec *vector,
 654                            unsigned long count, loff_t *ppos)
 655 {
 656         size_t tot_len = 0;
 657         int i;
 658         for (i = 0 ; i < count ; i++)
 659                 tot_len += vector[i].iov_len;
 660         return sock_readv_writev(VERIFY_READ, file->f_dentry->d_inode,
 661                                  file, vector, count, tot_len);
 662 }
 663
 664 /*
 665  *      With an ioctl arg may well be a user mode pointer, but we don't know what to do
 666  *      with it - that's up to the protocol still.
 667  */
 668
 669 int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
 670            unsigned long arg)
 671 {
 672         struct socket *sock;
 673         int err;
 674
 675         unlock_kernel();
 676         sock = socki_lookup(inode);
 677         err = sock->ops->ioctl(sock, cmd, arg);
 678         lock_kernel();
 679
 680         return err;
 681 }
 682
 683
 684 /* No kernel lock held - perfect */
 685 static unsigned int sock_poll(struct file *file, poll_table * wait)
 686 {
 687         struct socket *sock;
 688
 689         /*
 690          *      We can't return errors to poll, so it's either yes or no.
 691          */
 692         sock = socki_lookup(file->f_dentry->d_inode);
 693         return sock->ops->poll(file, sock, wait);
 694 }
 695
 696 static int sock_mmap(struct file * file, struct vm_area_struct * vma)
 697 {
 698         struct socket *sock = socki_lookup(file->f_dentry->d_inode);
 699
 700         return sock->ops->mmap(file, sock, vma);
 701 }
 702
 703 int sock_close(struct inode *inode, struct file *filp)
 704 {
 705         /*
 706          *      It was possible the inode is NULL we were
 707          *      closing an unfinished socket.
 708          */
 709
 710         if (!inode)
 711         {
 712                 printk(KERN_DEBUG "sock_close: NULL inode\n");
 713                 return 0;
 714         }
 715         sock_fasync(-1, filp, 0);
 716         sock_release(socki_lookup(inode));
 717         return 0;
 718 }
 719
 720 /*
 721  *      Update the socket async list
 722  *
 723  *      Fasync_list locking strategy.
 724  *
 725  *      1. fasync_list is modified only under process context socket lock
 726  *         i.e. under semaphore.
 727  *      2. fasync_list is used under read_lock(&sk->callback_lock)
 728  *         or under socket lock.
 729  *      3. fasync_list can be used from softirq context, so that
 730  *         modification under socket lock have to be enhanced with
 731  *         write_lock_bh(&sk->callback_lock).
 732  *                                                      --ANK (990710)
 733  */
 734
 735 static int sock_fasync(int fd, struct file *filp, int on)
 736 {
 737         struct fasync_struct *fa, *fna=NULL, **prev;
 738         struct socket *sock;
 739         struct sock *sk;
 740
 741         if (on)
 742         {
 743                 fna=(struct fasync_struct *)kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 744                 if(fna==NULL)
 745                         return -ENOMEM;
 746         }
 747
 748         sock = socki_lookup(filp->f_dentry->d_inode);
 749
 750         if ((sk=sock->sk) == NULL) {
 751                 if (fna)
 752                         kfree(fna);
 753                 return -EINVAL;
 754         }
 755
 756         lock_sock(sk);
 757
 758         prev=&(sock->fasync_list);
 759
 760         for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev)
 761                 if (fa->fa_file==filp)
 762                         break;
 763
 764         if(on)
 765         {
 766                 if(fa!=NULL)
 767                 {
 768                         write_lock_bh(&sk->callback_lock);
 769                         fa->fa_fd=fd;
 770                         write_unlock_bh(&sk->callback_lock);
 771
 772                         kfree(fna);
 773                         goto out;
 774                 }
 775                 fna->fa_file=filp;
 776                 fna->fa_fd=fd;
 777                 fna->magic=FASYNC_MAGIC;
 778                 fna->fa_next=sock->fasync_list;
 779                 write_lock_bh(&sk->callback_lock);
 780                 sock->fasync_list=fna;
 781                 write_unlock_bh(&sk->callback_lock);
 782         }
 783         else
 784         {
 785                 if (fa!=NULL)
 786                 {
 787                         write_lock_bh(&sk->callback_lock);
 788                         *prev=fa->fa_next;
 789                         write_unlock_bh(&sk->callback_lock);
 790                         kfree(fa);
 791                 }
 792         }
 793
 794 out:
 795         release_sock(sock->sk);
 796         return 0;
 797 }
 798
 799 /* This function may be called only under socket lock or callback_lock */
 800
 801 int sock_wake_async(struct socket *sock, int how, int band)
 802 {
 803         if (!sock || !sock->fasync_list)
 804                 return -1;
 805         switch (how)
 806         {
 807         case 1:
 808
 809                 if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
 810                         break;
 811                 goto call_kill;
 812         case 2:
 813                 if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags))
 814                         break;
 815                 /* fall through */
 816         case 0:
 817         call_kill:
 818                 __kill_fasync(sock->fasync_list, SIGIO, band);
 819                 break;
 820         case 3:
 821                 __kill_fasync(sock->fasync_list, SIGURG, band);
 822         }
 823         return 0;
 824 }
 825
 826
 827 int sock_create(int family, int type, int protocol, struct socket **res)
 828 {
 829         int i;
 830         struct socket *sock;
 831
 832         /*
 833          *      Check protocol is in range
 834          */
 835         if (family < 0 || family >= NPROTO)
 836                 return -EAFNOSUPPORT;
 837         if (type < 0 || type >= SOCK_MAX)
 838                 return -EINVAL;
 839
 840         /* Compatibility.
 841
 842            This uglymoron is moved from INET layer to here to avoid
 843            deadlock in module load.
 844          */
 845         if (family == PF_INET && type == SOCK_PACKET) {
 846                 static int warned;
 847                 if (!warned) {
 848                         warned = 1;
 849                         printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm);
 850                 }
 851                 family = PF_PACKET;
 852         }
 853
 854 #if defined(CONFIG_KMOD) && defined(CONFIG_NET)
 855         /* Attempt to load a protocol module if the find failed.
 856          *
 857          * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user
 858          * requested real, full-featured networking support upon configuration.
 859          * Otherwise module support will break!
 860          */
 861         if (net_families[family]==NULL)
 862         {
 863                 char module_name[30];
 864                 sprintf(module_name,"net-pf-%d",family);
 865                 request_module(module_name);
 866         }
 867 #endif
 868
 869         net_family_read_lock();
 870         if (net_families[family] == NULL) {
 871                 i = -EAFNOSUPPORT;
 872                 goto out;
 873         }
 874
 875 /*
 876  *      Allocate the socket and allow the family to set things up. if
 877  *      the protocol is 0, the family is instructed to select an appropriate
 878  *      default.
 879  */
 880
 881         if (!(sock = sock_alloc()))
 882         {
 883                 printk(KERN_WARNING "socket: no more sockets\n");
 884                 i = -ENFILE;            /* Not exactly a match, but its the
 885                                            closest posix thing */
 886                 goto out;
 887         }
 888
 889         sock->type  = type;
 890
 891         if ((i = net_families[family]->create(sock, protocol)) < 0)
 892         {
 893                 sock_release(sock);
 894                 goto out;
 895         }
 896
 897         *res = sock;
 898
 899 out:
 900         net_family_read_unlock();
 901         return i;
 902 }
 903
 904 asmlinkage long sys_socket(int family, int type, int protocol)
 905 {
 906         int retval;
 907         struct socket *sock;
 908
 909         retval = sock_create(family, type, protocol, &sock);
 910         if (retval < 0)
 911                 goto out;
 912
 913         retval = sock_map_fd(sock);
 914         if (retval < 0)
 915                 goto out_release;
 916
 917 out:
 918         /* It may be already another descriptor 8) Not kernel problem. */
 919         return retval;
 920
 921 out_release:
 922         sock_release(sock);
 923         return retval;
 924 }
 925
 926 /*
 927  *      Create a pair of connected sockets.
 928  */
 929
 930 asmlinkage long sys_socketpair(int family, int type, int protocol, int usockvec[2])
 931 {
 932         struct socket *sock1, *sock2;
 933         int fd1, fd2, err;
 934
 935         /*
 936          * Obtain the first socket and check if the underlying protocol
 937          * supports the socketpair call.
 938          */
 939
 940         err = sock_create(family, type, protocol, &sock1);
 941         if (err < 0)
 942                 goto out;
 943
 944         err = sock_create(family, type, protocol, &sock2);
 945         if (err < 0)
 946                 goto out_release_1;
 947
 948         err = sock1->ops->socketpair(sock1, sock2);
 949         if (err < 0)
 950                 goto out_release_both;
 951
 952         fd1 = fd2 = -1;
 953
 954         err = sock_map_fd(sock1);
 955         if (err < 0)
 956                 goto out_release_both;
 957         fd1 = err;
 958
 959         err = sock_map_fd(sock2);
 960         if (err < 0)
 961                 goto out_close_1;
 962         fd2 = err;
 963
 964         /* fd1 and fd2 may be already another descriptors.
 965          * Not kernel problem.
 966          */
 967
 968         err = put_user(fd1, &usockvec[0]);
 969         if (!err)
 970                 err = put_user(fd2, &usockvec[1]);
 971         if (!err)
 972                 return 0;
 973
 974         sys_close(fd2);
 975         sys_close(fd1);
 976         return err;
 977
 978 out_close_1:
 979         sock_release(sock2);
 980         sys_close(fd1);
 981         return err;
 982
 983 out_release_both:
 984         sock_release(sock2);
 985 out_release_1:
 986         sock_release(sock1);
 987 out:
 988         return err;
 989 }
 990
 991
 992 /*
 993  *      Bind a name to a socket. Nothing much to do here since it's
 994  *      the protocol's responsibility to handle the local address.
 995  *
 996  *      We move the socket address to kernel space before we call
 997  *      the protocol layer (having also checked the address is ok).
 998  */
 999
1000 asmlinkage long sys_bind(int fd, struct sockaddr *umyaddr, int addrlen)
1001 {
1002         struct socket *sock;
1003         char address[MAX_SOCK_ADDR];
1004         int err;
1005
1006         if((sock = sockfd_lookup(fd,&err))!=NULL)
1007         {
1008                 if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0)
1009                         err = sock->ops->bind(sock, (struct sockaddr *)address, addrlen);
1010                 sockfd_put(sock);
1011         }
1012         return err;
1013 }
1014
1015
1016 /*
1017  *      Perform a listen. Basically, we allow the protocol to do anything
1018  *      necessary for a listen, and if that works, we mark the socket as
1019  *      ready for listening.
1020  */
1021
1022 asmlinkage long sys_listen(int fd, int backlog)
1023 {
1024         struct socket *sock;
1025         int err;
1026
1027         if ((sock = sockfd_lookup(fd, &err)) != NULL) {
1028                 if ((unsigned) backlog > SOMAXCONN)
1029                         backlog = SOMAXCONN;
1030                 err=sock->ops->listen(sock, backlog);
1031                 sockfd_put(sock);
1032         }
1033         return err;
1034 }
1035
1036
1037 /*
1038  *      For accept, we attempt to create a new socket, set up the link
1039  *      with the client, wake up the client, then return the new
1040  *      connected fd. We collect the address of the connector in kernel
1041  *      space and move it to user at the very end. This is unclean because
1042  *      we open the socket then return an error.
1043  *
1044  *      1003.1g adds the ability to recvmsg() to query connection pending
1045  *      status to recvmsg. We need to add that support in a way thats
1046  *      clean when we restucture accept also.
1047  */
1048
1049 asmlinkage long sys_accept(int fd, struct sockaddr *upeer_sockaddr, int *upeer_addrlen)
1050 {
1051         struct socket *sock, *newsock;
1052         int err, len;
1053         char address[MAX_SOCK_ADDR];
1054
1055         sock = sockfd_lookup(fd, &err);
1056         if (!sock)
1057                 goto out;
1058
1059         err = -EMFILE;
1060         if (!(newsock = sock_alloc()))
1061                 goto out_put;
1062
1063         newsock->type = sock->type;
1064         newsock->ops = sock->ops;
1065
1066         err = sock->ops->accept(sock, newsock, sock->file->f_flags);
1067         if (err < 0)
1068                 goto out_release;
1069
1070         if (upeer_sockaddr) {
1071                 if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) {
1072                         err = -ECONNABORTED;
1073                         goto out_release;
1074                 }
1075                 err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen);
1076                 if (err < 0)
1077                         goto out_release;
1078         }
1079
1080         /* File flags are not inherited via accept() unlike another OSes. */
1081
1082         if ((err = sock_map_fd(newsock)) < 0)
1083                 goto out_release;
1084
1085 out_put:
1086         sockfd_put(sock);
1087 out:
1088         return err;
1089
1090 out_release:
1091         sock_release(newsock);
1092         goto out_put;
1093 }
1094
1095
1096 /*
1097  *      Attempt to connect to a socket with the server address.  The address
1098  *      is in user space so we verify it is OK and move it to kernel space.
1099  *
1100  *      For 1003.1g we need to add clean support for a bind to AF_UNSPEC to
1101  *      break bindings
1102  *
1103  *      NOTE: 1003.1g draft 6.3 is broken with respect to AX.25/NetROM and
1104  *      other SEQPACKET protocols that take time to connect() as it doesn't
1105  *      include the -EINPROGRESS status for such sockets.
1106  */
1107
1108 asmlinkage long sys_connect(int fd, struct sockaddr *uservaddr, int addrlen)
1109 {
1110         struct socket *sock;
1111         char address[MAX_SOCK_ADDR];
1112         int err;
1113
1114         sock = sockfd_lookup(fd, &err);
1115         if (!sock)
1116                 goto out;
1117         err = move_addr_to_kernel(uservaddr, addrlen, address);
1118         if (err < 0)
1119                 goto out_put;
1120         err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen,
1121                                  sock->file->f_flags);
1122 out_put:
1123         sockfd_put(sock);
1124 out:
1125         return err;
1126 }
1127
1128 /*
1129  *      Get the local address ('name') of a socket object. Move the obtained
1130  *      name to user space.
1131  */
1132
1133 asmlinkage long sys_getsockname(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
1134 {
1135         struct socket *sock;
1136         char address[MAX_SOCK_ADDR];
1137         int len, err;
1138
1139         sock = sockfd_lookup(fd, &err);
1140         if (!sock)
1141                 goto out;
1142         err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 0);
1143         if (err)
1144                 goto out_put;
1145         err = move_addr_to_user(address, len, usockaddr, usockaddr_len);
1146
1147 out_put:
1148         sockfd_put(sock);
1149 out:
1150         return err;
1151 }
1152
1153 /*
1154  *      Get the remote address ('name') of a socket object. Move the obtained
1155  *      name to user space.
1156  */
1157
1158 asmlinkage long sys_getpeername(int fd, struct sockaddr *usockaddr, int *usockaddr_len)
1159 {
1160         struct socket *sock;
1161         char address[MAX_SOCK_ADDR];
1162         int len, err;
1163
1164         if ((sock = sockfd_lookup(fd, &err))!=NULL)
1165         {
1166                 err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1);
1167                 if (!err)
1168                         err=move_addr_to_user(address,len, usockaddr, usockaddr_len);
1169                 sockfd_put(sock);
1170         }
1171         return err;
1172 }
1173
1174 /*
1175  *      Send a datagram to a given address. We move the address into kernel
1176  *      space and check the user space data area is readable before invoking
1177  *      the protocol.
1178  */
1179
1180 asmlinkage long sys_sendto(int fd, void * buff, size_t len, unsigned flags,
1181                            struct sockaddr *addr, int addr_len)
1182 {
1183         struct socket *sock;
1184         char address[MAX_SOCK_ADDR];
1185         int err;
1186         struct msghdr msg;
1187         struct iovec iov;
1188
1189         sock = sockfd_lookup(fd, &err);
1190         if (!sock)
1191                 goto out;
1192         iov.iov_base=buff;
1193         iov.iov_len=len;
1194         msg.msg_name=NULL;
1195         msg.msg_iov=&iov;
1196         msg.msg_iovlen=1;
1197         msg.msg_control=NULL;
1198         msg.msg_controllen=0;
1199         msg.msg_namelen=0;
1200         if(addr)
1201         {
1202                 err = move_addr_to_kernel(addr, addr_len, address);
1203                 if (err < 0)
1204                         goto out_put;
1205                 msg.msg_name=address;
1206                 msg.msg_namelen=addr_len;
1207         }
1208         if (sock->file->f_flags & O_NONBLOCK)
1209                 flags |= MSG_DONTWAIT;
1210         msg.msg_flags = flags;
1211         err = sock_sendmsg(sock, &msg, len);
1212
1213 out_put:
1214         sockfd_put(sock);
1215 out:
1216         return err;
1217 }
1218
1219 /*
1220  *      Send a datagram down a socket.
1221  */
1222
1223 asmlinkage long sys_send(int fd, void * buff, size_t len, unsigned flags)
1224 {
1225         return sys_sendto(fd, buff, len, flags, NULL, 0);
1226 }
1227
1228 /*
1229  *      Receive a frame from the socket and optionally record the address of the
1230  *      sender. We verify the buffers are writable and if needed move the
1231  *      sender address from kernel to user space.
1232  */
1233
1234 asmlinkage long sys_recvfrom(int fd, void * ubuf, size_t size, unsigned flags,
1235                              struct sockaddr *addr, int *addr_len)
1236 {
1237         struct socket *sock;
1238         struct iovec iov;
1239         struct msghdr msg;
1240         char address[MAX_SOCK_ADDR];
1241         int err,err2;
1242
1243         sock = sockfd_lookup(fd, &err);
1244         if (!sock)
1245                 goto out;
1246
1247         msg.msg_control=NULL;
1248         msg.msg_controllen=0;
1249         msg.msg_iovlen=1;
1250         msg.msg_iov=&iov;
1251         iov.iov_len=size;
1252         iov.iov_base=ubuf;
1253         msg.msg_name=address;
1254         msg.msg_namelen=MAX_SOCK_ADDR;
1255         if (sock->file->f_flags & O_NONBLOCK)
1256                 flags |= MSG_DONTWAIT;
1257         err=sock_recvmsg(sock, &msg, size, flags);
1258
1259         if(err >= 0 && addr != NULL && msg.msg_namelen)
1260         {
1261                 err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len);
1262                 if(err2<0)
1263                         err=err2;
1264         }
1265         sockfd_put(sock);
1266 out:
1267         return err;
1268 }
1269
1270 /*
1271  *      Receive a datagram from a socket.
1272  */
1273
1274 asmlinkage long sys_recv(int fd, void * ubuf, size_t size, unsigned flags)
1275 {
1276         return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL);
1277 }
1278
1279 /*
1280  *      Set a socket option. Because we don't know the option lengths we have
1281  *      to pass the user mode parameter for the protocols to sort out.
1282  */
1283
1284 asmlinkage long sys_setsockopt(int fd, int level, int optname, char *optval, int optlen)
1285 {
1286         int err;
1287         struct socket *sock;
1288
1289         if (optlen < 0)
1290                 return -EINVAL;
1291
1292         if ((sock = sockfd_lookup(fd, &err))!=NULL)
1293         {
1294                 if (level == SOL_SOCKET)
1295                         err=sock_setsockopt(sock,level,optname,optval,optlen);
1296                 else
1297                         err=sock->ops->setsockopt(sock, level, optname, optval, optlen);
1298                 sockfd_put(sock);
1299         }
1300         return err;
1301 }
1302
1303 /*
1304  *      Get a socket option. Because we don't know the option lengths we have
1305  *      to pass a user mode parameter for the protocols to sort out.
1306  */
1307
1308 asmlinkage long sys_getsockopt(int fd, int level, int optname, char *optval, int *optlen)
1309 {
1310         int err;
1311         struct socket *sock;
1312
1313         if ((sock = sockfd_lookup(fd, &err))!=NULL)
1314         {
1315                 if (level == SOL_SOCKET)
1316                         err=sock_getsockopt(sock,level,optname,optval,optlen);
1317                 else
1318                         err=sock->ops->getsockopt(sock, level, optname, optval, optlen);
1319                 sockfd_put(sock);
1320         }
1321         return err;
1322 }
1323
1324
1325 /*
1326  *      Shutdown a socket.
1327  */
1328
1329 asmlinkage long sys_shutdown(int fd, int how)
1330 {
1331         int err;
1332         struct socket *sock;
1333
1334         if ((sock = sockfd_lookup(fd, &err))!=NULL)
1335         {
1336                 err=sock->ops->shutdown(sock, how);
1337                 sockfd_put(sock);
1338         }
1339         return err;
1340 }
1341
1342 /*
1343  *      BSD sendmsg interface
1344  */
1345
1346 asmlinkage long sys_sendmsg(int fd, struct msghdr *msg, unsigned flags)
1347 {
1348         struct socket *sock;
1349         char address[MAX_SOCK_ADDR];
1350         struct iovec iovstack[UIO_FASTIOV], *iov = iovstack;
1351         unsigned char ctl[sizeof(struct cmsghdr) + 20]; /* 20 is size of ipv6_pktinfo */
1352         unsigned char *ctl_buf = ctl;
1353         struct msghdr msg_sys;
1354         int err, ctl_len, iov_size, total_len;
1355
1356         err = -EFAULT;
1357         if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1358                 goto out;
1359
1360         sock = sockfd_lookup(fd, &err);
1361         if (!sock)
1362                 goto out;
1363
1364         /* do not move before msg_sys is valid */
1365         err = -EINVAL;
1366         if (msg_sys.msg_iovlen > UIO_MAXIOV)
1367                 goto out_put;
1368
1369         /* Check whether to allocate the iovec area*/
1370         err = -ENOMEM;
1371         iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1372         if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1373                 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1374                 if (!iov)
1375                         goto out_put;
1376         }
1377
1378         /* This will also move the address data into kernel space */
1379         err = verify_iovec(&msg_sys, iov, address, VERIFY_READ);
1380         if (err < 0)
1381                 goto out_freeiov;
1382         total_len = err;
1383
1384         err = -ENOBUFS;
1385
1386         if (msg_sys.msg_controllen > INT_MAX)
1387                 goto out_freeiov;
1388         ctl_len = msg_sys.msg_controllen;
1389         if (ctl_len)
1390         {
1391                 if (ctl_len > sizeof(ctl))
1392                 {
1393                         ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL);
1394                         if (ctl_buf == NULL)
1395                                 goto out_freeiov;
1396                 }
1397                 err = -EFAULT;
1398                 if (copy_from_user(ctl_buf, msg_sys.msg_control, ctl_len))
1399                         goto out_freectl;
1400                 msg_sys.msg_control = ctl_buf;
1401         }
1402         msg_sys.msg_flags = flags;
1403
1404         if (sock->file->f_flags & O_NONBLOCK)
1405                 msg_sys.msg_flags |= MSG_DONTWAIT;
1406         err = sock_sendmsg(sock, &msg_sys, total_len);
1407
1408 out_freectl:
1409         if (ctl_buf != ctl)
1410                 sock_kfree_s(sock->sk, ctl_buf, ctl_len);
1411 out_freeiov:
1412         if (iov != iovstack)
1413                 sock_kfree_s(sock->sk, iov, iov_size);
1414 out_put:
1415         sockfd_put(sock);
1416 out:
1417         return err;
1418 }
1419
1420 /*
1421  *      BSD recvmsg interface
1422  */
1423
1424 asmlinkage long sys_recvmsg(int fd, struct msghdr *msg, unsigned int flags)
1425 {
1426         struct socket *sock;
1427         struct iovec iovstack[UIO_FASTIOV];
1428         struct iovec *iov=iovstack;
1429         struct msghdr msg_sys;
1430         unsigned long cmsg_ptr;
1431         int err, iov_size, total_len, len;
1432
1433         /* kernel mode address */
1434         char addr[MAX_SOCK_ADDR];
1435
1436         /* user mode address pointers */
1437         struct sockaddr *uaddr;
1438         int *uaddr_len;
1439
1440         err=-EFAULT;
1441         if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr)))
1442                 goto out;
1443
1444         sock = sockfd_lookup(fd, &err);
1445         if (!sock)
1446                 goto out;
1447
1448         err = -EINVAL;
1449         if (msg_sys.msg_iovlen > UIO_MAXIOV)
1450                 goto out_put;
1451
1452         /* Check whether to allocate the iovec area*/
1453         err = -ENOMEM;
1454         iov_size = msg_sys.msg_iovlen * sizeof(struct iovec);
1455         if (msg_sys.msg_iovlen > UIO_FASTIOV) {
1456                 iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL);
1457                 if (!iov)
1458                         goto out_put;
1459         }
1460
1461         /*
1462          *      Save the user-mode address (verify_iovec will change the
1463          *      kernel msghdr to use the kernel address space)
1464          */
1465
1466         uaddr = msg_sys.msg_name;
1467         uaddr_len = &msg->msg_namelen;
1468         err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE);
1469         if (err < 0)
1470                 goto out_freeiov;
1471         total_len=err;
1472
1473         cmsg_ptr = (unsigned long)msg_sys.msg_control;
1474         msg_sys.msg_flags = 0;
1475
1476         if (sock->file->f_flags & O_NONBLOCK)
1477                 flags |= MSG_DONTWAIT;
1478         err = sock_recvmsg(sock, &msg_sys, total_len, flags);
1479         if (err < 0)
1480                 goto out_freeiov;
1481         len = err;
1482
1483         if (uaddr != NULL && msg_sys.msg_namelen) {
1484                 err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len);
1485                 if (err < 0)
1486                         goto out_freeiov;
1487         }
1488         err = __put_user(msg_sys.msg_flags, &msg->msg_flags);
1489         if (err)
1490                 goto out_freeiov;
1491         err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr,
1492                                                          &msg->msg_controllen);
1493         if (err)
1494                 goto out_freeiov;
1495         err = len;
1496
1497 out_freeiov:
1498         if (iov != iovstack)
1499                 sock_kfree_s(sock->sk, iov, iov_size);
1500 out_put:
1501         sockfd_put(sock);
1502 out:
1503         return err;
1504 }
1505
1506
1507 /*
1508  *      Perform a file control on a socket file descriptor.
1509  *
1510  *      Doesn't acquire a fd lock, because no network fcntl
1511  *      function sleeps currently.
1512  */
1513
1514 int sock_fcntl(struct file *filp, unsigned int cmd, unsigned long arg)
1515 {
1516         struct socket *sock;
1517
1518         sock = socki_lookup (filp->f_dentry->d_inode);
1519         if (sock && sock->ops)
1520                 return sock_no_fcntl(sock, cmd, arg);
1521         return(-EINVAL);
1522 }
1523
1524 /* Argument list sizes for sys_socketcall */
1525 #define AL(x) ((x) * sizeof(unsigned long))
1526 static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
1527                                 AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
1528                                 AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)};
1529 #undef AL
1530
1531 /*
1532  *      System call vectors.
1533  *
1534  *      Argument checking cleaned up. Saved 20% in size.
1535  *  This function doesn't need to set the kernel lock because
1536  *  it is set by the callees.
1537  */
1538
1539 asmlinkage long sys_socketcall(int call, unsigned long *args)
1540 {
1541         unsigned long a[6];
1542         unsigned long a0,a1;
1543         int err;
1544
1545         if(call<1||call>SYS_RECVMSG)
1546                 return -EINVAL;
1547
1548         /* copy_from_user should be SMP safe. */
1549         if (copy_from_user(a, args, nargs[call]))
1550                 return -EFAULT;
1551
1552         a0=a[0];
1553         a1=a[1];
1554
1555         switch(call)
1556         {
1557                 case SYS_SOCKET:
1558                         err = sys_socket(a0,a1,a[2]);
1559                         break;
1560                 case SYS_BIND:
1561                         err = sys_bind(a0,(struct sockaddr *)a1, a[2]);
1562                         break;
1563                 case SYS_CONNECT:
1564                         err = sys_connect(a0, (struct sockaddr *)a1, a[2]);
1565                         break;
1566                 case SYS_LISTEN:
1567                         err = sys_listen(a0,a1);
1568                         break;
1569                 case SYS_ACCEPT:
1570                         err = sys_accept(a0,(struct sockaddr *)a1, (int *)a[2]);
1571                         break;
1572                 case SYS_GETSOCKNAME:
1573                         err = sys_getsockname(a0,(struct sockaddr *)a1, (int *)a[2]);
1574                         break;
1575                 case SYS_GETPEERNAME:
1576                         err = sys_getpeername(a0, (struct sockaddr *)a1, (int *)a[2]);
1577                         break;
1578                 case SYS_SOCKETPAIR:
1579                         err = sys_socketpair(a0,a1, a[2], (int *)a[3]);
1580                         break;
1581                 case SYS_SEND:
1582                         err = sys_send(a0, (void *)a1, a[2], a[3]);
1583                         break;
1584                 case SYS_SENDTO:
1585                         err = sys_sendto(a0,(void *)a1, a[2], a[3],
1586                                          (struct sockaddr *)a[4], a[5]);
1587                         break;
1588                 case SYS_RECV:
1589                         err = sys_recv(a0, (void *)a1, a[2], a[3]);
1590                         break;
1591                 case SYS_RECVFROM:
1592                         err = sys_recvfrom(a0, (void *)a1, a[2], a[3],
1593                                            (struct sockaddr *)a[4], (int *)a[5]);
1594                         break;
1595                 case SYS_SHUTDOWN:
1596                         err = sys_shutdown(a0,a1);
1597                         break;
1598                 case SYS_SETSOCKOPT:
1599                         err = sys_setsockopt(a0, a1, a[2], (char *)a[3], a[4]);
1600                         break;
1601                 case SYS_GETSOCKOPT:
1602                         err = sys_getsockopt(a0, a1, a[2], (char *)a[3], (int *)a[4]);
1603                         break;
1604                 case SYS_SENDMSG:
1605                         err = sys_sendmsg(a0, (struct msghdr *) a1, a[2]);
1606                         break;
1607                 case SYS_RECVMSG:
1608                         err = sys_recvmsg(a0, (struct msghdr *) a1, a[2]);
1609                         break;
1610                 default:
1611                         err = -EINVAL;
1612                         break;
1613         }
1614         return err;
1615 }
1616
1617 /*
1618  *      This function is called by a protocol handler that wants to
1619  *      advertise its address family, and have it linked into the
1620  *      SOCKET module.
1621  */
1622
1623 int sock_register(struct net_proto_family *ops)
1624 {
1625         int err;
1626
1627         if (ops->family >= NPROTO) {
1628                 printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO);
1629                 return -ENOBUFS;
1630         }
1631         net_family_write_lock();
1632         err = -EEXIST;
1633         if (net_families[ops->family] == NULL) {
1634                 net_families[ops->family]=ops;
1635                 err = 0;
1636         }
1637         net_family_write_unlock();
1638         return err;
1639 }
1640
1641 /*
1642  *      This function is called by a protocol handler that wants to
1643  *      remove its address family, and have it unlinked from the
1644  *      SOCKET module.
1645  */
1646
1647 int sock_unregister(int family)
1648 {
1649         if (family < 0 || family >= NPROTO)
1650                 return -1;
1651
1652         net_family_write_lock();
1653         net_families[family]=NULL;
1654         net_family_write_unlock();
1655         return 0;
1656 }
1657
1658
1659 extern void sk_init(void);
1660
1661 #ifdef CONFIG_WAN_ROUTER
1662 extern void wanrouter_init(void);
1663 #endif
1664
1665 #ifdef CONFIG_BLUEZ
1666 extern void bluez_init(void);
1667 #endif
1668
1669 void __init sock_init(void)
1670 {
1671         int i;
1672
1673         printk(KERN_INFO "Linux NET4.0 for Linux 2.4\n");
1674         printk(KERN_INFO "Based upon Swansea University Computer Society NET3.039\n");
1675
1676         /*
1677          *      Initialize all address (protocol) families.
1678          */
1679
1680         for (i = 0; i < NPROTO; i++)
1681                 net_families[i] = NULL;
1682
1683         /*
1684          *      Initialize sock SLAB cache.
1685          */
1686
1687         sk_init();
1688
1689 #ifdef SLAB_SKB
1690         /*
1691          *      Initialize skbuff SLAB cache
1692          */
1693         skb_init();
1694 #endif
1695
1696         /*
1697          *      Wan router layer.
1698          */
1699
1700 #ifdef CONFIG_WAN_ROUTER
1701         wanrouter_init();
1702 #endif
1703
1704         /*
1705          *      Initialize the protocols module.
1706          */
1707
1708         register_filesystem(&sock_fs_type);
1709         sock_mnt = kern_mount(&sock_fs_type);
1710         /* The real protocol initialization is performed when
1711          *  do_initcalls is run.
1712          */
1713
1714
1715         /*
1716          * The netlink device handler may be needed early.
1717          */
1718
1719 #ifdef CONFIG_NET
1720         rtnetlink_init();
1721 #endif
1722 #ifdef CONFIG_NETLINK_DEV
1723         init_netlink();
1724 #endif
1725 #ifdef CONFIG_NETFILTER
1726         netfilter_init();
1727 #endif
1728
1729 #ifdef CONFIG_BLUEZ
1730         bluez_init();
1731 #endif
1732 }
1733
1734 int socket_get_info(char *buffer, char **start, off_t offset, int length)
1735 {
1736         int len, cpu;
1737         int counter = 0;
1738
1739         for (cpu=0; cpu<smp_num_cpus; cpu++)
1740                 counter += sockets_in_use[cpu_logical_map(cpu)].counter;
1741
1742         /* It can be negative, by the way. 8) */
1743         if (counter < 0)
1744                 counter = 0;
1745
1746         len = sprintf(buffer, "sockets: used %d\n", counter);
1747         if (offset >= len)
1748         {
1749                 *start = buffer;
1750                 return 0;
1751         }
1752         *start = buffer + offset;
1753         len -= offset;
1754         if (len > length)
1755                 len = length;
1756         if (len < 0)
1757                 len = 0;
1758         return len;
1759 }