usr/src/uts/common/syscall/fcntl.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright (c) 1994, 2010, Oracle and/or its affiliates. All rights reserved.
  24  * Copyright (c) 2013, OmniTI Computer Consulting, Inc. All rights reserved.
  25  * Copyright 2015, Joyent, Inc.
  26  */
  27
  28 /*      Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T     */
  29 /*        All Rights Reserved   */
  30
  31 /*
  32  * Portions of this source code were derived from Berkeley 4.3 BSD
  33  * under license from the Regents of the University of California.
  34  */
  35
  36
  37 #include <sys/param.h>
  38 #include <sys/isa_defs.h>
  39 #include <sys/types.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/systm.h>
  42 #include <sys/errno.h>
  43 #include <sys/fcntl.h>
  44 #include <sys/flock.h>
  45 #include <sys/vnode.h>
  46 #include <sys/file.h>
  47 #include <sys/mode.h>
  48 #include <sys/proc.h>
  49 #include <sys/filio.h>
  50 #include <sys/share.h>
  51 #include <sys/debug.h>
  52 #include <sys/rctl.h>
  53 #include <sys/nbmlock.h>
  54
  55 #include <sys/cmn_err.h>
  56
  57 static int flock_check(vnode_t *, flock64_t *, offset_t, offset_t);
  58 static int flock_get_start(vnode_t *, flock64_t *, offset_t, u_offset_t *);
  59 static void fd_too_big(proc_t *);
  60
  61 /*
  62  * File control.
  63  */
  64 int
  65 fcntl(int fdes, int cmd, intptr_t arg)
  66 {
  67         int iarg;
  68         int error = 0;
  69         int retval;
  70         proc_t *p;
  71         file_t *fp;
  72         vnode_t *vp;
  73         u_offset_t offset;
  74         u_offset_t start;
  75         struct vattr vattr;
  76         int in_crit;
  77         int flag;
  78         struct flock sbf;
  79         struct flock64 bf;
  80         struct o_flock obf;
  81         struct flock64_32 bf64_32;
  82         struct fshare fsh;
  83         struct shrlock shr;
  84         struct shr_locowner shr_own;
  85         offset_t maxoffset;
  86         model_t datamodel;
  87         int fdres;
  88
  89 #if defined(_ILP32) && !defined(lint) && defined(_SYSCALL32)
  90         ASSERT(sizeof (struct flock) == sizeof (struct flock32));
  91         ASSERT(sizeof (struct flock64) == sizeof (struct flock64_32));
  92 #endif
  93 #if defined(_LP64) && !defined(lint) && defined(_SYSCALL32)
  94         ASSERT(sizeof (struct flock) == sizeof (struct flock64_64));
  95         ASSERT(sizeof (struct flock64) == sizeof (struct flock64_64));
  96 #endif
  97
  98         /*
  99          * First, for speed, deal with the subset of cases
 100          * that do not require getf() / releasef().
 101          */
 102         switch (cmd) {
 103         case F_GETFD:
 104                 if ((error = f_getfd_error(fdes, &flag)) == 0)
 105                         retval = flag;
 106                 goto out;
 107
 108         case F_SETFD:
 109                 error = f_setfd_error(fdes, (int)arg);
 110                 retval = 0;
 111                 goto out;
 112
 113         case F_GETFL:
 114                 if ((error = f_getfl(fdes, &flag)) == 0) {
 115                         retval = (flag & (FMASK | FASYNC));
 116                         if ((flag & (FSEARCH | FEXEC)) == 0)
 117                                 retval += FOPEN;
 118                         else
 119                                 retval |= (flag & (FSEARCH | FEXEC));
 120                 }
 121                 goto out;
 122
 123         case F_GETXFL:
 124                 if ((error = f_getfl(fdes, &flag)) == 0) {
 125                         retval = flag;
 126                         if ((flag & (FSEARCH | FEXEC)) == 0)
 127                                 retval += FOPEN;
 128                 }
 129                 goto out;
 130
 131         case F_BADFD:
 132                 if ((error = f_badfd(fdes, &fdres, (int)arg)) == 0)
 133                         retval = fdres;
 134                 goto out;
 135         }
 136
 137         /*
 138          * Second, for speed, deal with the subset of cases that
 139          * require getf() / releasef() but do not require copyin.
 140          */
 141         if ((fp = getf(fdes)) == NULL) {
 142                 error = EBADF;
 143                 goto out;
 144         }
 145         iarg = (int)arg;
 146
 147         switch (cmd) {
 148         case F_DUPFD:
 149         case F_DUPFD_CLOEXEC:
 150                 p = curproc;
 151                 if ((uint_t)iarg >= p->p_fno_ctl) {
 152                         if (iarg >= 0)
 153                                 fd_too_big(p);
 154                         error = EINVAL;
 155                         goto done;
 156                 }
 157                 /*
 158                  * We need to increment the f_count reference counter
 159                  * before allocating a new file descriptor.
 160                  * Doing it other way round opens a window for race condition
 161                  * with closeandsetf() on the target file descriptor which can
 162                  * close the file still referenced by the original
 163                  * file descriptor.
 164                  */
 165                 mutex_enter(&fp->f_tlock);
 166                 fp->f_count++;
 167                 mutex_exit(&fp->f_tlock);
 168                 if ((retval = ufalloc_file(iarg, fp)) == -1) {
 169                         /*
 170                          * New file descriptor can't be allocated.
 171                          * Revert the reference count.
 172                          */
 173                         mutex_enter(&fp->f_tlock);
 174                         fp->f_count--;
 175                         mutex_exit(&fp->f_tlock);
 176                         error = EMFILE;
 177                 } else {
 178                         if (cmd == F_DUPFD_CLOEXEC) {
 179                                 f_setfd(retval, FD_CLOEXEC);
 180                         }
 181                 }
 182                 goto done;
 183
 184         case F_DUP2FD_CLOEXEC:
 185                 if (fdes == iarg) {
 186                         error = EINVAL;
 187                         goto done;
 188                 }
 189
 190                 /*FALLTHROUGH*/
 191
 192         case F_DUP2FD:
 193                 p = curproc;
 194                 if (fdes == iarg) {
 195                         retval = iarg;
 196                 } else if ((uint_t)iarg >= p->p_fno_ctl) {
 197                         if (iarg >= 0)
 198                                 fd_too_big(p);
 199                         error = EBADF;
 200                 } else {
 201                         /*
 202                          * We can't hold our getf(fdes) across the call to
 203                          * closeandsetf() because it creates a window for
 204                          * deadlock: if one thread is doing dup2(a, b) while
 205                          * another is doing dup2(b, a), each one will block
 206                          * waiting for the other to call releasef().  The
 207                          * solution is to increment the file reference count
 208                          * (which we have to do anyway), then releasef(fdes),
 209                          * then closeandsetf().  Incrementing f_count ensures
 210                          * that fp won't disappear after we call releasef().
 211                          * When closeandsetf() fails, we try avoid calling
 212                          * closef() because of all the side effects.
 213                          */
 214                         mutex_enter(&fp->f_tlock);
 215                         fp->f_count++;
 216                         mutex_exit(&fp->f_tlock);
 217                         releasef(fdes);
 218                         if ((error = closeandsetf(iarg, fp)) == 0) {
 219                                 if (cmd == F_DUP2FD_CLOEXEC) {
 220                                         f_setfd(iarg, FD_CLOEXEC);
 221                                 }
 222                                 retval = iarg;
 223                         } else {
 224                                 mutex_enter(&fp->f_tlock);
 225                                 if (fp->f_count > 1) {
 226                                         fp->f_count--;
 227                                         mutex_exit(&fp->f_tlock);
 228                                 } else {
 229                                         mutex_exit(&fp->f_tlock);
 230                                         (void) closef(fp);
 231                                 }
 232                         }
 233                         goto out;
 234                 }
 235                 goto done;
 236
 237         case F_SETFL:
 238                 vp = fp->f_vnode;
 239                 flag = fp->f_flag;
 240                 if ((iarg & (FNONBLOCK|FNDELAY)) == (FNONBLOCK|FNDELAY))
 241                         iarg &= ~FNDELAY;
 242                 if ((error = VOP_SETFL(vp, flag, iarg, fp->f_cred, NULL)) ==
 243                     0) {
 244                         iarg &= FMASK;
 245                         mutex_enter(&fp->f_tlock);
 246                         fp->f_flag &= ~FMASK | (FREAD|FWRITE);
 247                         fp->f_flag |= (iarg - FOPEN) & ~(FREAD|FWRITE);
 248                         mutex_exit(&fp->f_tlock);
 249                 }
 250                 retval = 0;
 251                 goto done;
 252         }
 253
 254         /*
 255          * Finally, deal with the expensive cases.
 256          */
 257         retval = 0;
 258         in_crit = 0;
 259         maxoffset = MAXOFF_T;
 260         datamodel = DATAMODEL_NATIVE;
 261 #if defined(_SYSCALL32_IMPL)
 262         if ((datamodel = get_udatamodel()) == DATAMODEL_ILP32)
 263                 maxoffset = MAXOFF32_T;
 264 #endif
 265
 266         vp = fp->f_vnode;
 267         flag = fp->f_flag;
 268         offset = fp->f_offset;
 269
 270         switch (cmd) {
 271         /*
 272          * The file system and vnode layers understand and implement
 273          * locking with flock64 structures. So here once we pass through
 274          * the test for compatibility as defined by LFS API, (for F_SETLK,
 275          * F_SETLKW, F_GETLK, F_GETLKW, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW,
 276          * F_FREESP) we transform the flock structure to a flock64 structure
 277          * and send it to the lower layers. Similarly in case of GETLK and
 278          * OFD_GETLK the returned flock64 structure is transformed to a flock
 279          * structure if everything fits in nicely, otherwise we return
 280          * EOVERFLOW.
 281          */
 282
 283         case F_GETLK:
 284         case F_O_GETLK:
 285         case F_SETLK:
 286         case F_SETLKW:
 287         case F_SETLK_NBMAND:
 288         case F_OFD_GETLK:
 289         case F_OFD_SETLK:
 290         case F_OFD_SETLKW:
 291         case F_FLOCK:
 292         case F_FLOCKW:
 293
 294                 /*
 295                  * Copy in input fields only.
 296                  */
 297
 298                 if (cmd == F_O_GETLK) {
 299                         if (datamodel != DATAMODEL_ILP32) {
 300                                 error = EINVAL;
 301                                 break;
 302                         }
 303
 304                         if (copyin((void *)arg, &obf, sizeof (obf))) {
 305                                 error = EFAULT;
 306                                 break;
 307                         }
 308                         bf.l_type = obf.l_type;
 309                         bf.l_whence = obf.l_whence;
 310                         bf.l_start = (off64_t)obf.l_start;
 311                         bf.l_len = (off64_t)obf.l_len;
 312                         bf.l_sysid = (int)obf.l_sysid;
 313                         bf.l_pid = obf.l_pid;
 314                 } else if (datamodel == DATAMODEL_NATIVE) {
 315                         if (copyin((void *)arg, &sbf, sizeof (sbf))) {
 316                                 error = EFAULT;
 317                                 break;
 318                         }
 319                         /*
 320                          * XXX  In an LP64 kernel with an LP64 application
 321                          *      there's no need to do a structure copy here
 322                          *      struct flock == struct flock64. However,
 323                          *      we did it this way to avoid more conditional
 324                          *      compilation.
 325                          */
 326                         bf.l_type = sbf.l_type;
 327                         bf.l_whence = sbf.l_whence;
 328                         bf.l_start = (off64_t)sbf.l_start;
 329                         bf.l_len = (off64_t)sbf.l_len;
 330                         bf.l_sysid = sbf.l_sysid;
 331                         bf.l_pid = sbf.l_pid;
 332                 }
 333 #if defined(_SYSCALL32_IMPL)
 334                 else {
 335                         struct flock32 sbf32;
 336                         if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 337                                 error = EFAULT;
 338                                 break;
 339                         }
 340                         bf.l_type = sbf32.l_type;
 341                         bf.l_whence = sbf32.l_whence;
 342                         bf.l_start = (off64_t)sbf32.l_start;
 343                         bf.l_len = (off64_t)sbf32.l_len;
 344                         bf.l_sysid = sbf32.l_sysid;
 345                         bf.l_pid = sbf32.l_pid;
 346                 }
 347 #endif /* _SYSCALL32_IMPL */
 348
 349                 /*
 350                  * 64-bit support: check for overflow for 32-bit lock ops
 351                  */
 352                 if ((error = flock_check(vp, &bf, offset, maxoffset)) != 0)
 353                         break;
 354
 355                 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 356                         /* FLOCK* locking is always over the entire file. */
 357                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 358                             bf.l_len != 0) {
 359                                 error = EINVAL;
 360                                 break;
 361                         }
 362                         if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 363                                 error = EINVAL;
 364                                 break;
 365                         }
 366                 }
 367
 368                 if (cmd == F_OFD_GETLK || cmd == F_OFD_SETLK ||
 369                     cmd == F_OFD_SETLKW) {
 370                         /*
 371                          * TBD OFD-style locking is currently limited to
 372                          * covering the entire file.
 373                          */
 374                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 375                             bf.l_len != 0) {
 376                                 error = EINVAL;
 377                                 break;
 378                         }
 379                 }
 380
 381                 /*
 382                  * Not all of the filesystems understand F_O_GETLK, and
 383                  * there's no need for them to know.  Map it to F_GETLK.
 384                  *
 385                  * The *_frlock functions in the various file systems basically
 386                  * do some validation and then funnel everything through the
 387                  * fs_frlock function. For OFD-style locks fs_frlock will do
 388                  * nothing so that once control returns here we can call the
 389                  * ofdlock function with the correct fp. For OFD-style locks
 390                  * the unsupported remote file systems, such as NFS, detect and
 391                  * reject the OFD-style cmd argument.
 392                  */
 393                 if ((error = VOP_FRLOCK(vp, (cmd == F_O_GETLK) ? F_GETLK : cmd,
 394                     &bf, flag, offset, NULL, fp->f_cred, NULL)) != 0)
 395                         break;
 396
 397                 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 398                     cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 399                         /*
 400                          * This is an OFD-style lock so we need to handle it
 401                          * here. Because OFD-style locks are associated with
 402                          * the file_t we didn't have enough info down the
 403                          * VOP_FRLOCK path immediately above.
 404                          */
 405                         if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 406                                 break;
 407                 }
 408
 409                 /*
 410                  * If command is GETLK and no lock is found, only
 411                  * the type field is changed.
 412                  */
 413                 if ((cmd == F_O_GETLK || cmd == F_GETLK ||
 414                     cmd == F_OFD_GETLK) && bf.l_type == F_UNLCK) {
 415                         /* l_type always first entry, always a short */
 416                         if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 417                             sizeof (bf.l_type)))
 418                                 error = EFAULT;
 419                         break;
 420                 }
 421
 422                 if (cmd == F_O_GETLK) {
 423                         /*
 424                          * Return an SVR3 flock structure to the user.
 425                          */
 426                         obf.l_type = (int16_t)bf.l_type;
 427                         obf.l_whence = (int16_t)bf.l_whence;
 428                         obf.l_start = (int32_t)bf.l_start;
 429                         obf.l_len = (int32_t)bf.l_len;
 430                         if (bf.l_sysid > SHRT_MAX || bf.l_pid > SHRT_MAX) {
 431                                 /*
 432                                  * One or both values for the above fields
 433                                  * is too large to store in an SVR3 flock
 434                                  * structure.
 435                                  */
 436                                 error = EOVERFLOW;
 437                                 break;
 438                         }
 439                         obf.l_sysid = (int16_t)bf.l_sysid;
 440                         obf.l_pid = (int16_t)bf.l_pid;
 441                         if (copyout(&obf, (void *)arg, sizeof (obf)))
 442                                 error = EFAULT;
 443                 } else if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 444                         /*
 445                          * Copy out SVR4 flock.
 446                          */
 447                         int i;
 448
 449                         if (bf.l_start > maxoffset || bf.l_len > maxoffset) {
 450                                 error = EOVERFLOW;
 451                                 break;
 452                         }
 453
 454                         if (datamodel == DATAMODEL_NATIVE) {
 455                                 for (i = 0; i < 4; i++)
 456                                         sbf.l_pad[i] = 0;
 457                                 /*
 458                                  * XXX  In an LP64 kernel with an LP64
 459                                  *      application there's no need to do a
 460                                  *      structure copy here as currently
 461                                  *      struct flock == struct flock64.
 462                                  *      We did it this way to avoid more
 463                                  *      conditional compilation.
 464                                  */
 465                                 sbf.l_type = bf.l_type;
 466                                 sbf.l_whence = bf.l_whence;
 467                                 sbf.l_start = (off_t)bf.l_start;
 468                                 sbf.l_len = (off_t)bf.l_len;
 469                                 sbf.l_sysid = bf.l_sysid;
 470                                 sbf.l_pid = bf.l_pid;
 471                                 if (copyout(&sbf, (void *)arg, sizeof (sbf)))
 472                                         error = EFAULT;
 473                         }
 474 #if defined(_SYSCALL32_IMPL)
 475                         else {
 476                                 struct flock32 sbf32;
 477                                 if (bf.l_start > MAXOFF32_T ||
 478                                     bf.l_len > MAXOFF32_T) {
 479                                         error = EOVERFLOW;
 480                                         break;
 481                                 }
 482                                 for (i = 0; i < 4; i++)
 483                                         sbf32.l_pad[i] = 0;
 484                                 sbf32.l_type = (int16_t)bf.l_type;
 485                                 sbf32.l_whence = (int16_t)bf.l_whence;
 486                                 sbf32.l_start = (off32_t)bf.l_start;
 487                                 sbf32.l_len = (off32_t)bf.l_len;
 488                                 sbf32.l_sysid = (int32_t)bf.l_sysid;
 489                                 sbf32.l_pid = (pid32_t)bf.l_pid;
 490                                 if (copyout(&sbf32,
 491                                     (void *)arg, sizeof (sbf32)))
 492                                         error = EFAULT;
 493                         }
 494 #endif
 495                 }
 496                 break;
 497
 498         case F_CHKFL:
 499                 /*
 500                  * This is for internal use only, to allow the vnode layer
 501                  * to validate a flags setting before applying it.  User
 502                  * programs can't issue it.
 503                  */
 504                 error = EINVAL;
 505                 break;
 506
 507         case F_ALLOCSP:
 508         case F_FREESP:
 509         case F_ALLOCSP64:
 510         case F_FREESP64:
 511                 /*
 512                  * Test for not-a-regular-file (and returning EINVAL)
 513                  * before testing for open-for-writing (and returning EBADF).
 514                  * This is relied upon by posix_fallocate() in libc.
 515                  */
 516                 if (vp->v_type != VREG) {
 517                         error = EINVAL;
 518                         break;
 519                 }
 520
 521                 if ((flag & FWRITE) == 0) {
 522                         error = EBADF;
 523                         break;
 524                 }
 525
 526                 if (datamodel != DATAMODEL_ILP32 &&
 527                     (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 528                         error = EINVAL;
 529                         break;
 530                 }
 531
 532 #if defined(_ILP32) || defined(_SYSCALL32_IMPL)
 533                 if (datamodel == DATAMODEL_ILP32 &&
 534                     (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 535                         struct flock32 sbf32;
 536                         /*
 537                          * For compatibility we overlay an SVR3 flock on an SVR4
 538                          * flock.  This works because the input field offsets
 539                          * in "struct flock" were preserved.
 540                          */
 541                         if (copyin((void *)arg, &sbf32, sizeof (sbf32))) {
 542                                 error = EFAULT;
 543                                 break;
 544                         } else {
 545                                 bf.l_type = sbf32.l_type;
 546                                 bf.l_whence = sbf32.l_whence;
 547                                 bf.l_start = (off64_t)sbf32.l_start;
 548                                 bf.l_len = (off64_t)sbf32.l_len;
 549                                 bf.l_sysid = sbf32.l_sysid;
 550                                 bf.l_pid = sbf32.l_pid;
 551                         }
 552                 }
 553 #endif /* _ILP32 || _SYSCALL32_IMPL */
 554
 555 #if defined(_LP64)
 556                 if (datamodel == DATAMODEL_LP64 &&
 557                     (cmd == F_ALLOCSP || cmd == F_FREESP)) {
 558                         if (copyin((void *)arg, &bf, sizeof (bf))) {
 559                                 error = EFAULT;
 560                                 break;
 561                         }
 562                 }
 563 #endif /* defined(_LP64) */
 564
 565 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 566                 if (datamodel == DATAMODEL_ILP32 &&
 567                     (cmd == F_ALLOCSP64 || cmd == F_FREESP64)) {
 568                         if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 569                                 error = EFAULT;
 570                                 break;
 571                         } else {
 572                                 /*
 573                                  * Note that the size of flock64 is different in
 574                                  * the ILP32 and LP64 models, due to the l_pad
 575                                  * field. We do not want to assume that the
 576                                  * flock64 structure is laid out the same in
 577                                  * ILP32 and LP64 environments, so we will
 578                                  * copy in the ILP32 version of flock64
 579                                  * explicitly and copy it to the native
 580                                  * flock64 structure.
 581                                  */
 582                                 bf.l_type = (short)bf64_32.l_type;
 583                                 bf.l_whence = (short)bf64_32.l_whence;
 584                                 bf.l_start = bf64_32.l_start;
 585                                 bf.l_len = bf64_32.l_len;
 586                                 bf.l_sysid = (int)bf64_32.l_sysid;
 587                                 bf.l_pid = (pid_t)bf64_32.l_pid;
 588                         }
 589                 }
 590 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 591
 592                 if (cmd == F_ALLOCSP || cmd == F_FREESP)
 593                         error = flock_check(vp, &bf, offset, maxoffset);
 594                 else if (cmd == F_ALLOCSP64 || cmd == F_FREESP64)
 595                         error = flock_check(vp, &bf, offset, MAXOFFSET_T);
 596                 if (error)
 597                         break;
 598
 599                 if (vp->v_type == VREG && bf.l_len == 0 &&
 600                     bf.l_start > OFFSET_MAX(fp)) {
 601                         error = EFBIG;
 602                         break;
 603                 }
 604
 605                 /*
 606                  * Make sure that there are no conflicting non-blocking
 607                  * mandatory locks in the region being manipulated. If
 608                  * there are such locks then return EACCES.
 609                  */
 610                 if ((error = flock_get_start(vp, &bf, offset, &start)) != 0)
 611                         break;
 612
 613                 if (nbl_need_check(vp)) {
 614                         u_offset_t      begin;
 615                         ssize_t         length;
 616
 617                         nbl_start_crit(vp, RW_READER);
 618                         in_crit = 1;
 619                         vattr.va_mask = AT_SIZE;
 620                         if ((error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 621                             != 0)
 622                                 break;
 623                         begin = start > vattr.va_size ? vattr.va_size : start;
 624                         length = vattr.va_size > start ? vattr.va_size - start :
 625                             start - vattr.va_size;
 626                         if (nbl_conflict(vp, NBL_WRITE, begin, length, 0,
 627                             NULL)) {
 628                                 error = EACCES;
 629                                 break;
 630                         }
 631                 }
 632
 633                 if (cmd == F_ALLOCSP64)
 634                         cmd = F_ALLOCSP;
 635                 else if (cmd == F_FREESP64)
 636                         cmd = F_FREESP;
 637
 638                 error = VOP_SPACE(vp, cmd, &bf, flag, offset, fp->f_cred, NULL);
 639
 640                 break;
 641
 642 #if !defined(_LP64) || defined(_SYSCALL32_IMPL)
 643         case F_GETLK64:
 644         case F_SETLK64:
 645         case F_SETLKW64:
 646         case F_SETLK64_NBMAND:
 647         case F_OFD_GETLK64:
 648         case F_OFD_SETLK64:
 649         case F_OFD_SETLKW64:
 650         case F_FLOCK64:
 651         case F_FLOCKW64:
 652                 /*
 653                  * Large Files: Here we set cmd as *LK and send it to
 654                  * lower layers. *LK64 is only for the user land.
 655                  * Most of the comments described above for F_SETLK
 656                  * applies here too.
 657                  * Large File support is only needed for ILP32 apps!
 658                  */
 659                 if (datamodel != DATAMODEL_ILP32) {
 660                         error = EINVAL;
 661                         break;
 662                 }
 663
 664                 if (cmd == F_GETLK64)
 665                         cmd = F_GETLK;
 666                 else if (cmd == F_SETLK64)
 667                         cmd = F_SETLK;
 668                 else if (cmd == F_SETLKW64)
 669                         cmd = F_SETLKW;
 670                 else if (cmd == F_SETLK64_NBMAND)
 671                         cmd = F_SETLK_NBMAND;
 672                 else if (cmd == F_OFD_GETLK64)
 673                         cmd = F_OFD_GETLK;
 674                 else if (cmd == F_OFD_SETLK64)
 675                         cmd = F_OFD_SETLK;
 676                 else if (cmd == F_OFD_SETLKW64)
 677                         cmd = F_OFD_SETLKW;
 678                 else if (cmd == F_FLOCK64)
 679                         cmd = F_FLOCK;
 680                 else if (cmd == F_FLOCKW64)
 681                         cmd = F_FLOCKW;
 682
 683                 /*
 684                  * Note that the size of flock64 is different in the ILP32
 685                  * and LP64 models, due to the sucking l_pad field.
 686                  * We do not want to assume that the flock64 structure is
 687                  * laid out in the same in ILP32 and LP64 environments, so
 688                  * we will copy in the ILP32 version of flock64 explicitly
 689                  * and copy it to the native flock64 structure.
 690                  */
 691
 692                 if (copyin((void *)arg, &bf64_32, sizeof (bf64_32))) {
 693                         error = EFAULT;
 694                         break;
 695                 }
 696
 697                 bf.l_type = (short)bf64_32.l_type;
 698                 bf.l_whence = (short)bf64_32.l_whence;
 699                 bf.l_start = bf64_32.l_start;
 700                 bf.l_len = bf64_32.l_len;
 701                 bf.l_sysid = (int)bf64_32.l_sysid;
 702                 bf.l_pid = (pid_t)bf64_32.l_pid;
 703
 704                 if ((error = flock_check(vp, &bf, offset, MAXOFFSET_T)) != 0)
 705                         break;
 706
 707                 if (cmd == F_FLOCK || cmd == F_FLOCKW) {
 708                         /* FLOCK* locking is always over the entire file. */
 709                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 710                             bf.l_len != 0) {
 711                                 error = EINVAL;
 712                                 break;
 713                         }
 714                         if (bf.l_type < F_RDLCK || bf.l_type > F_UNLCK) {
 715                                 error = EINVAL;
 716                                 break;
 717                         }
 718                 }
 719
 720                 if (cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 721                         /*
 722                          * TBD OFD-style locking is currently limited to
 723                          * covering the entire file.
 724                          */
 725                         if (bf.l_whence != 0 || bf.l_start != 0 ||
 726                             bf.l_len != 0) {
 727                                 error = EINVAL;
 728                                 break;
 729                         }
 730                 }
 731
 732                 /*
 733                  * The *_frlock functions in the various file systems basically
 734                  * do some validation and then funnel everything through the
 735                  * fs_frlock function. For OFD-style locks fs_frlock will do
 736                  * nothing so that once control returns here we can call the
 737                  * ofdlock function with the correct fp. For OFD-style locks
 738                  * the unsupported remote file systems, such as NFS, detect and
 739                  * reject the OFD-style cmd argument.
 740                  */
 741                 if ((error = VOP_FRLOCK(vp, cmd, &bf, flag, offset,
 742                     NULL, fp->f_cred, NULL)) != 0)
 743                         break;
 744
 745                 if (cmd == F_FLOCK || cmd == F_FLOCKW || cmd == F_OFD_GETLK ||
 746                     cmd == F_OFD_SETLK || cmd == F_OFD_SETLKW) {
 747                         /*
 748                          * This is an OFD-style lock so we need to handle it
 749                          * here. Because OFD-style locks are associated with
 750                          * the file_t we didn't have enough info down the
 751                          * VOP_FRLOCK path immediately above.
 752                          */
 753                         if ((error = ofdlock(fp, cmd, &bf, flag, offset)) != 0)
 754                                 break;
 755                 }
 756
 757                 if ((cmd == F_GETLK || cmd == F_OFD_GETLK) &&
 758                     bf.l_type == F_UNLCK) {
 759                         if (copyout(&bf.l_type, &((struct flock *)arg)->l_type,
 760                             sizeof (bf.l_type)))
 761                                 error = EFAULT;
 762                         break;
 763                 }
 764
 765                 if (cmd == F_GETLK || cmd == F_OFD_GETLK) {
 766                         int i;
 767
 768                         /*
 769                          * We do not want to assume that the flock64 structure
 770                          * is laid out in the same in ILP32 and LP64
 771                          * environments, so we will copy out the ILP32 version
 772                          * of flock64 explicitly after copying the native
 773                          * flock64 structure to it.
 774                          */
 775                         for (i = 0; i < 4; i++)
 776                                 bf64_32.l_pad[i] = 0;
 777                         bf64_32.l_type = (int16_t)bf.l_type;
 778                         bf64_32.l_whence = (int16_t)bf.l_whence;
 779                         bf64_32.l_start = bf.l_start;
 780                         bf64_32.l_len = bf.l_len;
 781                         bf64_32.l_sysid = (int32_t)bf.l_sysid;
 782                         bf64_32.l_pid = (pid32_t)bf.l_pid;
 783                         if (copyout(&bf64_32, (void *)arg, sizeof (bf64_32)))
 784                                 error = EFAULT;
 785                 }
 786                 break;
 787 #endif /* !defined(_LP64) || defined(_SYSCALL32_IMPL) */
 788
 789         case F_SHARE:
 790         case F_SHARE_NBMAND:
 791         case F_UNSHARE:
 792
 793                 /*
 794                  * Copy in input fields only.
 795                  */
 796                 if (copyin((void *)arg, &fsh, sizeof (fsh))) {
 797                         error = EFAULT;
 798                         break;
 799                 }
 800
 801                 /*
 802                  * Local share reservations always have this simple form
 803                  */
 804                 shr.s_access = fsh.f_access;
 805                 shr.s_deny = fsh.f_deny;
 806                 shr.s_sysid = 0;
 807                 shr.s_pid = ttoproc(curthread)->p_pid;
 808                 shr_own.sl_pid = shr.s_pid;
 809                 shr_own.sl_id = fsh.f_id;
 810                 shr.s_own_len = sizeof (shr_own);
 811                 shr.s_owner = (caddr_t)&shr_own;
 812                 error = VOP_SHRLOCK(vp, cmd, &shr, flag, fp->f_cred, NULL);
 813                 break;
 814
 815         default:
 816                 error = EINVAL;
 817                 break;
 818         }
 819
 820         if (in_crit)
 821                 nbl_end_crit(vp);
 822
 823 done:
 824         releasef(fdes);
 825 out:
 826         if (error)
 827                 return (set_errno(error));
 828         return (retval);
 829 }
 830
 831 int
 832 flock_check(vnode_t *vp, flock64_t *flp, offset_t offset, offset_t max)
 833 {
 834         struct vattr    vattr;
 835         int     error;
 836         u_offset_t start, end;
 837
 838         /*
 839          * Determine the starting point of the request
 840          */
 841         switch (flp->l_whence) {
 842         case 0:         /* SEEK_SET */
 843                 start = (u_offset_t)flp->l_start;
 844                 if (start > max)
 845                         return (EINVAL);
 846                 break;
 847         case 1:         /* SEEK_CUR */
 848                 if (flp->l_start > (max - offset))
 849                         return (EOVERFLOW);
 850                 start = (u_offset_t)(flp->l_start + offset);
 851                 if (start > max)
 852                         return (EINVAL);
 853                 break;
 854         case 2:         /* SEEK_END */
 855                 vattr.va_mask = AT_SIZE;
 856                 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 857                         return (error);
 858                 if (flp->l_start > (max - (offset_t)vattr.va_size))
 859                         return (EOVERFLOW);
 860                 start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 861                 if (start > max)
 862                         return (EINVAL);
 863                 break;
 864         default:
 865                 return (EINVAL);
 866         }
 867
 868         /*
 869          * Determine the range covered by the request.
 870          */
 871         if (flp->l_len == 0)
 872                 end = MAXEND;
 873         else if ((offset_t)flp->l_len > 0) {
 874                 if (flp->l_len > (max - start + 1))
 875                         return (EOVERFLOW);
 876                 end = (u_offset_t)(start + (flp->l_len - 1));
 877                 ASSERT(end <= max);
 878         } else {
 879                 /*
 880                  * Negative length; why do we even allow this ?
 881                  * Because this allows easy specification of
 882                  * the last n bytes of the file.
 883                  */
 884                 end = start;
 885                 start += (u_offset_t)flp->l_len;
 886                 (start)++;
 887                 if (start > max)
 888                         return (EINVAL);
 889                 ASSERT(end <= max);
 890         }
 891         ASSERT(start <= max);
 892         if (flp->l_type == F_UNLCK && flp->l_len > 0 &&
 893             end == (offset_t)max) {
 894                 flp->l_len = 0;
 895         }
 896         if (start  > end)
 897                 return (EINVAL);
 898         return (0);
 899 }
 900
 901 static int
 902 flock_get_start(vnode_t *vp, flock64_t *flp, offset_t offset, u_offset_t *start)
 903 {
 904         struct vattr    vattr;
 905         int     error;
 906
 907         /*
 908          * Determine the starting point of the request. Assume that it is
 909          * a valid starting point.
 910          */
 911         switch (flp->l_whence) {
 912         case 0:         /* SEEK_SET */
 913                 *start = (u_offset_t)flp->l_start;
 914                 break;
 915         case 1:         /* SEEK_CUR */
 916                 *start = (u_offset_t)(flp->l_start + offset);
 917                 break;
 918         case 2:         /* SEEK_END */
 919                 vattr.va_mask = AT_SIZE;
 920                 if (error = VOP_GETATTR(vp, &vattr, 0, CRED(), NULL))
 921                         return (error);
 922                 *start = (u_offset_t)(flp->l_start + (offset_t)vattr.va_size);
 923                 break;
 924         default:
 925                 return (EINVAL);
 926         }
 927
 928         return (0);
 929 }
 930
 931 /*
 932  * Take rctl action when the requested file descriptor is too big.
 933  */
 934 static void
 935 fd_too_big(proc_t *p)
 936 {
 937         mutex_enter(&p->p_lock);
 938         (void) rctl_action(rctlproc_legacy[RLIMIT_NOFILE],
 939             p->p_rctls, p, RCA_SAFE);
 940         mutex_exit(&p->p_lock);
 941 }