fs/fcntl.c

   1 /*
   2  *  linux/fs/fcntl.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/mm.h>
   8 #include <linux/file.h>
   9 #include <linux/smp_lock.h>
  10 #include <linux/slab.h>
  11
  12 #include <asm/poll.h>
  13 #include <asm/siginfo.h>
  14 #include <asm/uaccess.h>
  15
  16 extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);
  17
  18 /*
  19  * locate_fd finds a free file descriptor in the open_fds fdset,
  20  * expanding the fd arrays if necessary.  The files write lock will be
  21  * held on exit to ensure that the fd can be entered atomically.
  22  */
  23
  24 static inline int locate_fd(struct files_struct *files,
  25                             struct file *file, int start)
  26 {
  27         unsigned int newfd;
  28         int error;
  29
  30         write_lock(&files->file_lock);
  31
  32 repeat:
  33         error = -EMFILE;
  34         if (start < files->next_fd)
  35                 start = files->next_fd;
  36         if (start >= files->max_fdset) {
  37         expand:
  38                 error = expand_files(files, start);
  39                 if (error < 0)
  40                         goto out;
  41                 goto repeat;
  42         }
  43
  44         newfd = find_next_zero_bit(files->open_fds->fds_bits,
  45                                    files->max_fdset, start);
  46
  47         error = -EMFILE;
  48         if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
  49                 goto out;
  50         if (newfd >= files->max_fdset)
  51                 goto expand;
  52
  53         error = expand_files(files, newfd);
  54         if (error < 0)
  55                 goto out;
  56         if (error) /* If we might have blocked, try again. */
  57                 goto repeat;
  58
  59         if (start <= files->next_fd)
  60                 files->next_fd = newfd + 1;
  61
  62         error = newfd;
  63
  64 out:
  65         return error;
  66 }
  67
  68 static inline void allocate_fd(struct files_struct *files,
  69                                         struct file *file, int fd)
  70 {
  71         FD_SET(fd, files->open_fds);
  72         FD_CLR(fd, files->close_on_exec);
  73         write_unlock(&files->file_lock);
  74         fd_install(fd, file);
  75 }
  76
  77 static int dupfd(struct file *file, int start)
  78 {
  79         struct files_struct * files = current->files;
  80         int ret;
  81
  82         ret = locate_fd(files, file, start);
  83         if (ret < 0)
  84                 goto out_putf;
  85         allocate_fd(files, file, ret);
  86         return ret;
  87
  88 out_putf:
  89         write_unlock(&files->file_lock);
  90         fput(file);
  91         return ret;
  92 }
  93
  94 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
  95 {
  96         int err = -EBADF;
  97         struct file * file;
  98         struct files_struct * files = current->files;
  99
 100         write_lock(&current->files->file_lock);
 101         if (!(file = fcheck(oldfd)))
 102                 goto out_unlock;
 103         err = newfd;
 104         if (newfd == oldfd)
 105                 goto out_unlock;
 106         err = -EBADF;
 107         if (newfd >= NR_OPEN)
 108                 goto out_unlock;        /* following POSIX.1 6.2.1 */
 109         get_file(file);                 /* We are now finished with oldfd */
 110
 111         err = expand_files(files, newfd);
 112         if (err < 0) {
 113                 write_unlock(&files->file_lock);
 114                 fput(file);
 115                 goto out;
 116         }
 117
 118         /* To avoid races with open() and dup(), we will mark the fd as
 119          * in-use in the open-file bitmap throughout the entire dup2()
 120          * process.  This is quite safe: do_close() uses the fd array
 121          * entry, not the bitmap, to decide what work needs to be
 122          * done.  --sct */
 123         FD_SET(newfd, files->open_fds);
 124         write_unlock(&files->file_lock);
 125
 126         do_close(newfd, 0);
 127
 128         write_lock(&files->file_lock);
 129         allocate_fd(files, file, newfd);
 130         err = newfd;
 131
 132 out:
 133         return err;
 134 out_unlock:
 135         write_unlock(&current->files->file_lock);
 136         goto out;
 137 }
 138
 139 asmlinkage long sys_dup(unsigned int fildes)
 140 {
 141         int ret = -EBADF;
 142         struct file * file = fget(fildes);
 143
 144         if (file)
 145                 ret = dupfd(file, 0);
 146         return ret;
 147 }
 148
 149 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC)
 150
 151 static int setfl(int fd, struct file * filp, unsigned long arg)
 152 {
 153         struct inode * inode = filp->f_dentry->d_inode;
 154
 155         /*
 156          * In the case of an append-only file, O_APPEND
 157          * cannot be cleared
 158          */
 159         if (!(arg & O_APPEND) && IS_APPEND(inode))
 160                 return -EPERM;
 161
 162         /* Did FASYNC state change? */
 163         if ((arg ^ filp->f_flags) & FASYNC) {
 164                 if (filp->f_op && filp->f_op->fasync)
 165                         filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
 166         }
 167
 168         /* required for strict SunOS emulation */
 169         if (O_NONBLOCK != O_NDELAY)
 170                if (arg & O_NDELAY)
 171                    arg |= O_NONBLOCK;
 172
 173         filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
 174         return 0;
 175 }
 176
 177 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
 178 {
 179         struct file * filp;
 180         long err = -EBADF;
 181
 182         filp = fget(fd);
 183         if (!filp)
 184                 goto out;
 185         err = 0;
 186         lock_kernel();
 187         switch (cmd) {
 188                 case F_DUPFD:
 189                         err = -EINVAL;
 190                         if (arg < NR_OPEN) {
 191                                 get_file(filp);
 192                                 err = dupfd(filp, arg);
 193                         }
 194                         break;
 195                 case F_GETFD:
 196                         err = FD_ISSET(fd, current->files->close_on_exec);
 197                         break;
 198                 case F_SETFD:
 199                         if (arg&1)
 200                                 FD_SET(fd, current->files->close_on_exec);
 201                         else
 202                                 FD_CLR(fd, current->files->close_on_exec);
 203                         break;
 204                 case F_GETFL:
 205                         err = filp->f_flags;
 206                         break;
 207                 case F_SETFL:
 208                         err = setfl(fd, filp, arg);
 209                         break;
 210                 case F_GETLK:
 211                         err = fcntl_getlk(fd, (struct flock *) arg);
 212                         break;
 213                 case F_SETLK:
 214                         err = fcntl_setlk(fd, cmd, (struct flock *) arg);
 215                         break;
 216                 case F_SETLKW:
 217                         err = fcntl_setlk(fd, cmd, (struct flock *) arg);
 218                         break;
 219                 case F_GETOWN:
 220                         /*
 221                          * XXX If f_owner is a process group, the
 222                          * negative return value will get converted
 223                          * into an error.  Oops.  If we keep the
 224                          * current syscall conventions, the only way
 225                          * to fix this will be in libc.
 226                          */
 227                         err = filp->f_owner.pid;
 228                         break;
 229                 case F_SETOWN:
 230                         filp->f_owner.pid = arg;
 231                         filp->f_owner.uid = current->uid;
 232                         filp->f_owner.euid = current->euid;
 233                         if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
 234                                 err = sock_fcntl (filp, F_SETOWN, arg);
 235                         break;
 236                 case F_GETSIG:
 237                         err = filp->f_owner.signum;
 238                         break;
 239                 case F_SETSIG:
 240                         /* arg == 0 restores default behaviour. */
 241                         if (arg < 0 || arg > _NSIG) {
 242                                 err = -EINVAL;
 243                                 break;
 244                         }
 245                         err = 0;
 246                         filp->f_owner.signum = arg;
 247                         break;
 248                 default:
 249                         /* sockets need a few special fcntls. */
 250                         err = -EINVAL;
 251                         if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
 252                                 err = sock_fcntl (filp, cmd, arg);
 253                         break;
 254         }
 255         unlock_kernel();
 256         fput(filp);
 257 out:
 258         return err;
 259 }
 260
 261 /* Table to convert sigio signal codes into poll band bitmaps */
 262
 263 static long band_table[NSIGPOLL] = {
 264         POLLIN | POLLRDNORM,                    /* POLL_IN */
 265         POLLOUT | POLLWRNORM | POLLWRBAND,      /* POLL_OUT */
 266         POLLIN | POLLRDNORM | POLLMSG,          /* POLL_MSG */
 267         POLLERR,                                /* POLL_ERR */
 268         POLLPRI | POLLRDBAND,                   /* POLL_PRI */
 269         POLLHUP | POLLERR                       /* POLL_HUP */
 270 };
 271
 272 static void send_sigio_to_task(struct task_struct *p,
 273                                struct fown_struct *fown,
 274                                struct fasync_struct *fa,
 275                                int reason)
 276 {
 277         if ((fown->euid != 0) &&
 278             (fown->euid ^ p->suid) && (fown->euid ^ p->uid) &&
 279             (fown->uid ^ p->suid) && (fown->uid ^ p->uid))
 280                 return;
 281         switch (fown->signum) {
 282                 siginfo_t si;
 283                 default:
 284                         /* Queue a rt signal with the appropriate fd as its
 285                            value.  We use SI_SIGIO as the source, not
 286                            SI_KERNEL, since kernel signals always get
 287                            delivered even if we can't queue.  Failure to
 288                            queue in this case _should_ be reported; we fall
 289                            back to SIGIO in that case. --sct */
 290                         si.si_signo = fown->signum;
 291                         si.si_errno = 0;
 292                         si.si_code  = reason;
 293                         /* Make sure we are called with one of the POLL_*
 294                            reasons, otherwise we could leak kernel stack into
 295                            userspace.  */
 296                         if ((reason & __SI_MASK) != __SI_POLL)
 297                                 BUG();
 298                         if (reason - POLL_IN > NSIGPOLL)
 299                                 si.si_band  = ~0L;
 300                         else
 301                                 si.si_band = band_table[reason - POLL_IN];
 302                         si.si_fd    = fa->fa_fd;
 303                         if (!send_sig_info(fown->signum, &si, p))
 304                                 break;
 305                 /* fall-through: fall back on the old plain SIGIO signal */
 306                 case 0:
 307                         send_sig(SIGIO, p, 1);
 308         }
 309 }
 310
 311 static void send_sigio(struct fown_struct *fown, struct fasync_struct *fa,
 312                        int band)
 313 {
 314         struct task_struct * p;
 315         int   pid       = fown->pid;
 316
 317         read_lock(&tasklist_lock);
 318         if ( (pid > 0) && (p = find_task_by_pid(pid)) ) {
 319                 send_sigio_to_task(p, fown, fa, band);
 320                 goto out;
 321         }
 322         for_each_task(p) {
 323                 int match = p->pid;
 324                 if (pid < 0)
 325                         match = -p->pgrp;
 326                 if (pid != match)
 327                         continue;
 328                 send_sigio_to_task(p, fown, fa, band);
 329         }
 330 out:
 331         read_unlock(&tasklist_lock);
 332 }
 333
 334 /*
 335  * fasync_helper() is used by some character device drivers (mainly mice)
 336  * to set up the fasync queue. It returns negative on error, 0 if it did
 337  * no changes and positive if it added/deleted the entry.
 338  */
 339 static rwlock_t fasync_lock = RW_LOCK_UNLOCKED;
 340 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
 341 {
 342         struct fasync_struct *fa, **fp;
 343         struct fasync_struct *new = NULL;
 344         int result = 0;
 345
 346         if (on) {
 347                 new = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 348                 if (!new)
 349                         return -ENOMEM;
 350         }
 351         write_lock_irq(&fasync_lock);
 352         for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 353                 if (fa->fa_file == filp) {
 354                         if(on) {
 355                                 fa->fa_fd = fd;
 356                                 kfree(new);
 357                         } else {
 358                                 *fp = fa->fa_next;
 359                                 kfree(fa);
 360                                 result = 1;
 361                         }
 362                         goto out;
 363                 }
 364         }
 365
 366         if (on) {
 367                 new->magic = FASYNC_MAGIC;
 368                 new->fa_file = filp;
 369                 new->fa_fd = fd;
 370                 new->fa_next = *fapp;
 371                 *fapp = new;
 372                 result = 1;
 373         }
 374 out:
 375         write_unlock_irq(&fasync_lock);
 376         return result;
 377 }
 378
 379 void __kill_fasync(struct fasync_struct *fa, int sig, int band)
 380 {
 381         while (fa) {
 382                 struct fown_struct * fown;
 383                 if (fa->magic != FASYNC_MAGIC) {
 384                         printk("kill_fasync: bad magic number in "
 385                                "fasync_struct!\n");
 386                         return;
 387                 }
 388                 fown = &fa->fa_file->f_owner;
 389                 /* Don't send SIGURG to processes which have not set a
 390                    queued signum: SIGURG has its own default signalling
 391                    mechanism. */
 392                 if (fown->pid && !(sig == SIGURG && fown->signum == 0))
 393                         send_sigio(fown, fa, band);
 394                 fa = fa->fa_next;
 395         }
 396 }
 397
 398 void kill_fasync(struct fasync_struct **fp, int sig, int band)
 399 {
 400         read_lock(&fasync_lock);
 401         __kill_fasync(*fp, sig, band);
 402         read_unlock(&fasync_lock);
 403 }