fs/fcntl.c

   1 /*
   2  *  linux/fs/fcntl.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6
   7 #include <linux/mm.h>
   8 #include <linux/file.h>
   9 #include <linux/smp_lock.h>
  10 #include <linux/slab.h>
  11
  12 #include <asm/poll.h>
  13 #include <asm/siginfo.h>
  14 #include <asm/uaccess.h>
  15
  16 extern int sock_fcntl (struct file *, unsigned int cmd, unsigned long arg);
  17
  18 /* Expand files.  Return <0 on error; 0 nothing done; 1 files expanded,
  19  * we may have blocked.
  20  *
  21  * Should be called with the files->file_lock spinlock held for write.
  22  */
  23 static int expand_files(struct files_struct *files, int nr)
  24 {
  25         int err, expand = 0;
  26 #ifdef FDSET_DEBUG
  27         printk (KERN_ERR __FUNCTION__ " %d: nr = %d\n", current->pid, nr);
  28 #endif
  29
  30         if (nr >= files->max_fdset) {
  31                 expand = 1;
  32                 if ((err = expand_fdset(files, nr)))
  33                         goto out;
  34         }
  35         if (nr >= files->max_fds) {
  36                 expand = 1;
  37                 if ((err = expand_fd_array(files, nr)))
  38                         goto out;
  39         }
  40         err = expand;
  41  out:
  42 #ifdef FDSET_DEBUG
  43         if (err)
  44                 printk (KERN_ERR __FUNCTION__ " %d: return %d\n", current->pid, err);
  45 #endif
  46         return err;
  47 }
  48
  49 /*
  50  * locate_fd finds a free file descriptor in the open_fds fdset,
  51  * expanding the fd arrays if necessary.  The files write lock will be
  52  * held on exit to ensure that the fd can be entered atomically.
  53  */
  54
  55 static int locate_fd(struct files_struct *files,
  56                             struct file *file, int orig_start)
  57 {
  58         unsigned int newfd;
  59         int error;
  60         int start;
  61
  62         write_lock(&files->file_lock);
  63
  64 repeat:
  65         /*
  66          * Someone might have closed fd's in the range
  67          * orig_start..files->next_fd
  68          */
  69         start = orig_start;
  70         if (start < files->next_fd)
  71                 start = files->next_fd;
  72
  73         newfd = start;
  74         if (start < files->max_fdset) {
  75                 newfd = find_next_zero_bit(files->open_fds->fds_bits,
  76                         files->max_fdset, start);
  77         }
  78
  79         error = -EMFILE;
  80         if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
  81                 goto out;
  82
  83         error = expand_files(files, newfd);
  84         if (error < 0)
  85                 goto out;
  86
  87         /*
  88          * If we needed to expand the fs array we
  89          * might have blocked - try again.
  90          */
  91         if (error)
  92                 goto repeat;
  93
  94         if (start <= files->next_fd)
  95                 files->next_fd = newfd + 1;
  96
  97         error = newfd;
  98
  99 out:
 100         return error;
 101 }
 102
 103 static inline void allocate_fd(struct files_struct *files,
 104                                         struct file *file, int fd)
 105 {
 106         FD_SET(fd, files->open_fds);
 107         FD_CLR(fd, files->close_on_exec);
 108         write_unlock(&files->file_lock);
 109         fd_install(fd, file);
 110 }
 111
 112 static int dupfd(struct file *file, int start)
 113 {
 114         struct files_struct * files = current->files;
 115         int ret;
 116
 117         ret = locate_fd(files, file, start);
 118         if (ret < 0)
 119                 goto out_putf;
 120         allocate_fd(files, file, ret);
 121         return ret;
 122
 123 out_putf:
 124         write_unlock(&files->file_lock);
 125         fput(file);
 126         return ret;
 127 }
 128
 129 asmlinkage long sys_dup2(unsigned int oldfd, unsigned int newfd)
 130 {
 131         int err = -EBADF;
 132         struct file * file, *tofree;
 133         struct files_struct * files = current->files;
 134
 135         write_lock(&files->file_lock);
 136         if (!(file = fcheck(oldfd)))
 137                 goto out_unlock;
 138         err = newfd;
 139         if (newfd == oldfd)
 140                 goto out_unlock;
 141         err = -EBADF;
 142         if (newfd >= current->rlim[RLIMIT_NOFILE].rlim_cur)
 143                 goto out_unlock;
 144         get_file(file);                 /* We are now finished with oldfd */
 145
 146         err = expand_files(files, newfd);
 147         if (err < 0)
 148                 goto out_fput;
 149
 150         /* To avoid races with open() and dup(), we will mark the fd as
 151          * in-use in the open-file bitmap throughout the entire dup2()
 152          * process.  This is quite safe: do_close() uses the fd array
 153          * entry, not the bitmap, to decide what work needs to be
 154          * done.  --sct */
 155         /* Doesn't work. open() might be there first. --AV */
 156
 157         /* Yes. It's a race. In user space. Nothing sane to do */
 158         err = -EBUSY;
 159         tofree = files->fd[newfd];
 160         if (!tofree && FD_ISSET(newfd, files->open_fds))
 161                 goto out_fput;
 162
 163         files->fd[newfd] = file;
 164         FD_SET(newfd, files->open_fds);
 165         FD_CLR(newfd, files->close_on_exec);
 166         write_unlock(&files->file_lock);
 167
 168         if (tofree)
 169                 filp_close(tofree, files);
 170         err = newfd;
 171 out:
 172         return err;
 173 out_unlock:
 174         write_unlock(&files->file_lock);
 175         goto out;
 176
 177 out_fput:
 178         write_unlock(&files->file_lock);
 179         fput(file);
 180         goto out;
 181 }
 182
 183 asmlinkage long sys_dup(unsigned int fildes)
 184 {
 185         int ret = -EBADF;
 186         struct file * file = fget(fildes);
 187
 188         if (file)
 189                 ret = dupfd(file, 0);
 190         return ret;
 191 }
 192
 193 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC)
 194
 195 static int setfl(int fd, struct file * filp, unsigned long arg)
 196 {
 197         struct inode * inode = filp->f_dentry->d_inode;
 198
 199         /*
 200          * In the case of an append-only file, O_APPEND
 201          * cannot be cleared
 202          */
 203         if (!(arg & O_APPEND) && IS_APPEND(inode))
 204                 return -EPERM;
 205
 206         /* Did FASYNC state change? */
 207         if ((arg ^ filp->f_flags) & FASYNC) {
 208                 if (filp->f_op && filp->f_op->fasync)
 209                         filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
 210         }
 211
 212         /* required for strict SunOS emulation */
 213         if (O_NONBLOCK != O_NDELAY)
 214                if (arg & O_NDELAY)
 215                    arg |= O_NONBLOCK;
 216
 217         filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
 218         return 0;
 219 }
 220
 221 static long do_fcntl(unsigned int fd, unsigned int cmd,
 222                      unsigned long arg, struct file * filp)
 223 {
 224         long err = 0;
 225
 226         switch (cmd) {
 227                 case F_DUPFD:
 228                         err = -EINVAL;
 229                         if (arg < NR_OPEN) {
 230                                 get_file(filp);
 231                                 err = dupfd(filp, arg);
 232                         }
 233                         break;
 234                 case F_GETFD:
 235                         err = get_close_on_exec(fd);
 236                         break;
 237                 case F_SETFD:
 238                         set_close_on_exec(fd, arg&1);
 239                         break;
 240                 case F_GETFL:
 241                         err = filp->f_flags;
 242                         break;
 243                 case F_SETFL:
 244                         err = setfl(fd, filp, arg);
 245                         break;
 246                 case F_GETLK:
 247                         err = fcntl_getlk(fd, (struct flock *) arg);
 248                         break;
 249                 case F_SETLK:
 250                         err = fcntl_setlk(fd, cmd, (struct flock *) arg);
 251                         break;
 252                 case F_SETLKW:
 253                         err = fcntl_setlk(fd, cmd, (struct flock *) arg);
 254                         break;
 255                 case F_GETOWN:
 256                         /*
 257                          * XXX If f_owner is a process group, the
 258                          * negative return value will get converted
 259                          * into an error.  Oops.  If we keep the
 260                          * current syscall conventions, the only way
 261                          * to fix this will be in libc.
 262                          */
 263                         err = filp->f_owner.pid;
 264                         break;
 265                 case F_SETOWN:
 266                         filp->f_owner.pid = arg;
 267                         filp->f_owner.uid = current->uid;
 268                         filp->f_owner.euid = current->euid;
 269                         if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
 270                                 err = sock_fcntl (filp, F_SETOWN, arg);
 271                         break;
 272                 case F_GETSIG:
 273                         err = filp->f_owner.signum;
 274                         break;
 275                 case F_SETSIG:
 276                         /* arg == 0 restores default behaviour. */
 277                         if (arg < 0 || arg > _NSIG) {
 278                                 err = -EINVAL;
 279                                 break;
 280                         }
 281                         err = 0;
 282                         filp->f_owner.signum = arg;
 283                         break;
 284                 default:
 285                         /* sockets need a few special fcntls. */
 286                         err = -EINVAL;
 287                         if (S_ISSOCK (filp->f_dentry->d_inode->i_mode))
 288                                 err = sock_fcntl (filp, cmd, arg);
 289                         break;
 290         }
 291
 292         return err;
 293 }
 294
 295 asmlinkage long sys_fcntl(unsigned int fd, unsigned int cmd, unsigned long arg)
 296 {
 297         struct file * filp;
 298         long err = -EBADF;
 299
 300         filp = fget(fd);
 301         if (!filp)
 302                 goto out;
 303
 304         lock_kernel();
 305         err = do_fcntl(fd, cmd, arg, filp);
 306         unlock_kernel();
 307
 308         fput(filp);
 309 out:
 310         return err;
 311 }
 312
 313 #if BITS_PER_LONG == 32
 314 asmlinkage long sys_fcntl64(unsigned int fd, unsigned int cmd, unsigned long arg)
 315 {
 316         struct file * filp;
 317         long err;
 318
 319         err = -EBADF;
 320         filp = fget(fd);
 321         if (!filp)
 322                 goto out;
 323
 324         lock_kernel();
 325         switch (cmd) {
 326                 case F_GETLK64:
 327                         err = fcntl_getlk64(fd, (struct flock64 *) arg);
 328                         break;
 329                 case F_SETLK64:
 330                         err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
 331                         break;
 332                 case F_SETLKW64:
 333                         err = fcntl_setlk64(fd, cmd, (struct flock64 *) arg);
 334                         break;
 335                 default:
 336                         err = do_fcntl(fd, cmd, arg, filp);
 337                         break;
 338         }
 339         unlock_kernel();
 340         fput(filp);
 341 out:
 342         return err;
 343 }
 344 #endif
 345
 346 /* Table to convert sigio signal codes into poll band bitmaps */
 347
 348 static long band_table[NSIGPOLL] = {
 349         POLLIN | POLLRDNORM,                    /* POLL_IN */
 350         POLLOUT | POLLWRNORM | POLLWRBAND,      /* POLL_OUT */
 351         POLLIN | POLLRDNORM | POLLMSG,          /* POLL_MSG */
 352         POLLERR,                                /* POLL_ERR */
 353         POLLPRI | POLLRDBAND,                   /* POLL_PRI */
 354         POLLHUP | POLLERR                       /* POLL_HUP */
 355 };
 356
 357 static void send_sigio_to_task(struct task_struct *p,
 358                                struct fown_struct *fown,
 359                                struct fasync_struct *fa,
 360                                int reason)
 361 {
 362         if ((fown->euid != 0) &&
 363             (fown->euid ^ p->suid) && (fown->euid ^ p->uid) &&
 364             (fown->uid ^ p->suid) && (fown->uid ^ p->uid))
 365                 return;
 366         switch (fown->signum) {
 367                 siginfo_t si;
 368                 default:
 369                         /* Queue a rt signal with the appropriate fd as its
 370                            value.  We use SI_SIGIO as the source, not
 371                            SI_KERNEL, since kernel signals always get
 372                            delivered even if we can't queue.  Failure to
 373                            queue in this case _should_ be reported; we fall
 374                            back to SIGIO in that case. --sct */
 375                         si.si_signo = fown->signum;
 376                         si.si_errno = 0;
 377                         si.si_code  = reason;
 378                         /* Make sure we are called with one of the POLL_*
 379                            reasons, otherwise we could leak kernel stack into
 380                            userspace.  */
 381                         if ((reason & __SI_MASK) != __SI_POLL)
 382                                 BUG();
 383                         if (reason - POLL_IN > NSIGPOLL)
 384                                 si.si_band  = ~0L;
 385                         else
 386                                 si.si_band = band_table[reason - POLL_IN];
 387                         si.si_fd    = fa->fa_fd;
 388                         if (!send_sig_info(fown->signum, &si, p))
 389                                 break;
 390                 /* fall-through: fall back on the old plain SIGIO signal */
 391                 case 0:
 392                         send_sig(SIGIO, p, 1);
 393         }
 394 }
 395
 396 static void send_sigio(struct fown_struct *fown, struct fasync_struct *fa,
 397                        int band)
 398 {
 399         struct task_struct * p;
 400         int   pid       = fown->pid;
 401
 402         read_lock(&tasklist_lock);
 403         if ( (pid > 0) && (p = find_task_by_pid(pid)) ) {
 404                 send_sigio_to_task(p, fown, fa, band);
 405                 goto out;
 406         }
 407         for_each_task(p) {
 408                 int match = p->pid;
 409                 if (pid < 0)
 410                         match = -p->pgrp;
 411                 if (pid != match)
 412                         continue;
 413                 send_sigio_to_task(p, fown, fa, band);
 414         }
 415 out:
 416         read_unlock(&tasklist_lock);
 417 }
 418
 419 /*
 420  * fasync_helper() is used by some character device drivers (mainly mice)
 421  * to set up the fasync queue. It returns negative on error, 0 if it did
 422  * no changes and positive if it added/deleted the entry.
 423  */
 424 static rwlock_t fasync_lock = RW_LOCK_UNLOCKED;
 425 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
 426 {
 427         struct fasync_struct *fa, **fp;
 428         struct fasync_struct *new = NULL;
 429         int result = 0;
 430
 431         if (on) {
 432                 new = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
 433                 if (!new)
 434                         return -ENOMEM;
 435         }
 436         write_lock_irq(&fasync_lock);
 437         for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 438                 if (fa->fa_file == filp) {
 439                         if(on) {
 440                                 fa->fa_fd = fd;
 441                                 kfree(new);
 442                         } else {
 443                                 *fp = fa->fa_next;
 444                                 kfree(fa);
 445                                 result = 1;
 446                         }
 447                         goto out;
 448                 }
 449         }
 450
 451         if (on) {
 452                 new->magic = FASYNC_MAGIC;
 453                 new->fa_file = filp;
 454                 new->fa_fd = fd;
 455                 new->fa_next = *fapp;
 456                 *fapp = new;
 457                 result = 1;
 458         }
 459 out:
 460         write_unlock_irq(&fasync_lock);
 461         return result;
 462 }
 463
 464 void __kill_fasync(struct fasync_struct *fa, int sig, int band)
 465 {
 466         while (fa) {
 467                 struct fown_struct * fown;
 468                 if (fa->magic != FASYNC_MAGIC) {
 469                         printk("kill_fasync: bad magic number in "
 470                                "fasync_struct!\n");
 471                         return;
 472                 }
 473                 fown = &fa->fa_file->f_owner;
 474                 /* Don't send SIGURG to processes which have not set a
 475                    queued signum: SIGURG has its own default signalling
 476                    mechanism. */
 477                 if (fown->pid && !(sig == SIGURG && fown->signum == 0))
 478                         send_sigio(fown, fa, band);
 479                 fa = fa->fa_next;
 480         }
 481 }
 482
 483 void kill_fasync(struct fasync_struct **fp, int sig, int band)
 484 {
 485         read_lock(&fasync_lock);
 486         __kill_fasync(*fp, sig, band);
 487         read_unlock(&fasync_lock);
 488 }