usr/src/uts/common/fs/nfs/nfs4_srv.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24 /*
  25  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  26  */
  27
  28 /*
  29  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  30  *      All Rights Reserved
  31  */
  32
  33 #include <sys/param.h>
  34 #include <sys/types.h>
  35 #include <sys/systm.h>
  36 #include <sys/cred.h>
  37 #include <sys/buf.h>
  38 #include <sys/vfs.h>
  39 #include <sys/vfs_opreg.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/errno.h>
  43 #include <sys/sysmacros.h>
  44 #include <sys/statvfs.h>
  45 #include <sys/kmem.h>
  46 #include <sys/dirent.h>
  47 #include <sys/cmn_err.h>
  48 #include <sys/debug.h>
  49 #include <sys/systeminfo.h>
  50 #include <sys/flock.h>
  51 #include <sys/pathname.h>
  52 #include <sys/nbmlock.h>
  53 #include <sys/share.h>
  54 #include <sys/atomic.h>
  55 #include <sys/policy.h>
  56 #include <sys/fem.h>
  57 #include <sys/sdt.h>
  58 #include <sys/ddi.h>
  59 #include <sys/zone.h>
  60
  61 #include <fs/fs_reparse.h>
  62
  63 #include <rpc/types.h>
  64 #include <rpc/auth.h>
  65 #include <rpc/rpcsec_gss.h>
  66 #include <rpc/svc.h>
  67
  68 #include <nfs/nfs.h>
  69 #include <nfs/export.h>
  70 #include <nfs/nfs_cmd.h>
  71 #include <nfs/lm.h>
  72 #include <nfs/nfs4.h>
  73
  74 #include <sys/strsubr.h>
  75 #include <sys/strsun.h>
  76
  77 #include <inet/common.h>
  78 #include <inet/ip.h>
  79 #include <inet/ip6.h>
  80
  81 #include <sys/tsol/label.h>
  82 #include <sys/tsol/tndb.h>
  83
  84 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  85 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  86 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  87 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  88 extern struct svc_ops rdma_svc_ops;
  89 extern int nfs_loaned_buffers;
  90 /* End of Tunables */
  91
  92 static int rdma_setup_read_data4(READ4args *, READ4res *);
  93
  94 /*
  95  * Used to bump the stateid4.seqid value and show changes in the stateid
  96  */
  97 #define next_stateid(sp) (++(sp)->bits.chgseq)
  98
  99 /*
 100  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 101  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 102  *      maxcount that isn't large enough to hold the smallest possible
 103  *      XDR encoded dirent.
 104  *
 105  *          sizeof cookie (8 bytes) +
 106  *          sizeof name_len (4 bytes) +
 107  *          sizeof smallest (padded) name (4 bytes) +
 108  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 109  *          sizeof attrlist4_len (4 bytes) +
 110  *          sizeof next boolean (4 bytes)
 111  *
 112  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 113  * the smallest possible entry4 (assumes no attrs requested).
 114  *      sizeof nfsstat4 (4 bytes) +
 115  *      sizeof verifier4 (8 bytes) +
 116  *      sizeof entry4list bool (4 bytes) +
 117  *      sizeof entry4   (36 bytes) +
 118  *      sizeof eof bool  (4 bytes)
 119  *
 120  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 121  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 122  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 123  *      required for a given name length.  MAXNAMELEN is the maximum
 124  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 125  *      macros are to allow for . and .. entries -- just a minor tweak to try
 126  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 127  *      to hold ., .., and the largest possible solaris dirent64.
 128  */
 129 #define RFS4_MINLEN_ENTRY4 36
 130 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 131 #define RFS4_MINLEN_RDDIR_BUF \
 132         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 133
 134 /*
 135  * It would be better to pad to 4 bytes since that's what XDR would do,
 136  * but the dirents UFS gives us are already padded to 8, so just take
 137  * what we're given.  Dircount is only a hint anyway.  Currently the
 138  * solaris kernel is ASCII only, so there's no point in calling the
 139  * UTF8 functions.
 140  *
 141  * dirent64: named padded to provide 8 byte struct alignment
 142  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 143  *
 144  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 145  *
 146  */
 147 #define DIRENT64_TO_DIRCOUNT(dp) \
 148         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 149
 150 time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 151
 152 static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
 153
 154 u_longlong_t    nfs4_srv_caller_id;
 155 uint_t          nfs4_srv_vkey = 0;
 156
 157 verifier4       Write4verf;
 158 verifier4       Readdir4verf;
 159
 160 void    rfs4_init_compound_state(struct compound_state *);
 161
 162 static void     nullfree(caddr_t);
 163 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 164                         struct compound_state *);
 165 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 166                         struct compound_state *);
 167 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 168                         struct compound_state *);
 169 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 170                         struct compound_state *);
 171 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 172                         struct compound_state *);
 173 static void     rfs4_op_create_free(nfs_resop4 *resop);
 174 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 175                         struct svc_req *, struct compound_state *);
 176 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 177                         struct svc_req *, struct compound_state *);
 178 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 179                         struct compound_state *);
 180 static void     rfs4_op_getattr_free(nfs_resop4 *);
 181 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182                         struct compound_state *);
 183 static void     rfs4_op_getfh_free(nfs_resop4 *);
 184 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 185                         struct compound_state *);
 186 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 187                         struct compound_state *);
 188 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189                         struct compound_state *);
 190 static void     lock_denied_free(nfs_resop4 *);
 191 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 192                         struct compound_state *);
 193 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 194                         struct compound_state *);
 195 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 196                         struct compound_state *);
 197 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 198                         struct compound_state *);
 199 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 200                                 struct svc_req *req, struct compound_state *cs);
 201 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 202                         struct compound_state *);
 203 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 204                         struct compound_state *);
 205 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 206                         struct svc_req *, struct compound_state *);
 207 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 208                         struct svc_req *, struct compound_state *);
 209 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 210                         struct compound_state *);
 211 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 212                         struct compound_state *);
 213 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 214                         struct compound_state *);
 215 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 216                         struct compound_state *);
 217 static void     rfs4_op_read_free(nfs_resop4 *);
 218 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 219 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 220                         struct compound_state *);
 221 static void     rfs4_op_readlink_free(nfs_resop4 *);
 222 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 223                         struct svc_req *, struct compound_state *);
 224 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 225                         struct compound_state *);
 226 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 227                         struct compound_state *);
 228 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 229                         struct compound_state *);
 230 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 231                         struct compound_state *);
 232 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 233                         struct compound_state *);
 234 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 235                         struct compound_state *);
 236 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 237                         struct compound_state *);
 238 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 239                         struct compound_state *);
 240 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 241                         struct svc_req *, struct compound_state *);
 242 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 243                         struct svc_req *req, struct compound_state *);
 244 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 245                         struct compound_state *);
 246 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 247
 248 static nfsstat4 check_open_access(uint32_t,
 249                                 struct compound_state *, struct svc_req *);
 250 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 251 void rfs4_ss_clid(rfs4_client_t *);
 252
 253 /*
 254  * translation table for attrs
 255  */
 256 struct nfs4_ntov_table {
 257         union nfs4_attr_u *na;
 258         uint8_t amap[NFS4_MAXNUM_ATTRS];
 259         int attrcnt;
 260         bool_t vfsstat;
 261 };
 262
 263 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 264 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 265                                     struct nfs4_svgetit_arg *sargp);
 266
 267 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 268                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 269                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 270
 271 fem_t           *deleg_rdops;
 272 fem_t           *deleg_wrops;
 273
 274 rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 275 kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 276 int             rfs4_seen_first_compound;       /* set first time we see one */
 277
 278 /*
 279  * NFS4 op dispatch table
 280  */
 281
 282 struct rfsv4disp {
 283         void    (*dis_proc)();          /* proc to call */
 284         void    (*dis_resfree)();       /* frees space allocated by proc */
 285         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 286 };
 287
 288 static struct rfsv4disp rfsv4disptab[] = {
 289         /*
 290          * NFS VERSION 4
 291          */
 292
 293         /* RFS_NULL = 0 */
 294         {rfs4_op_illegal, nullfree, 0},
 295
 296         /* UNUSED = 1 */
 297         {rfs4_op_illegal, nullfree, 0},
 298
 299         /* UNUSED = 2 */
 300         {rfs4_op_illegal, nullfree, 0},
 301
 302         /* OP_ACCESS = 3 */
 303         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 304
 305         /* OP_CLOSE = 4 */
 306         {rfs4_op_close, nullfree, 0},
 307
 308         /* OP_COMMIT = 5 */
 309         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 310
 311         /* OP_CREATE = 6 */
 312         {rfs4_op_create, nullfree, 0},
 313
 314         /* OP_DELEGPURGE = 7 */
 315         {rfs4_op_delegpurge, nullfree, 0},
 316
 317         /* OP_DELEGRETURN = 8 */
 318         {rfs4_op_delegreturn, nullfree, 0},
 319
 320         /* OP_GETATTR = 9 */
 321         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 322
 323         /* OP_GETFH = 10 */
 324         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 325
 326         /* OP_LINK = 11 */
 327         {rfs4_op_link, nullfree, 0},
 328
 329         /* OP_LOCK = 12 */
 330         {rfs4_op_lock, lock_denied_free, 0},
 331
 332         /* OP_LOCKT = 13 */
 333         {rfs4_op_lockt, lock_denied_free, 0},
 334
 335         /* OP_LOCKU = 14 */
 336         {rfs4_op_locku, nullfree, 0},
 337
 338         /* OP_LOOKUP = 15 */
 339         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 340
 341         /* OP_LOOKUPP = 16 */
 342         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 343
 344         /* OP_NVERIFY = 17 */
 345         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 346
 347         /* OP_OPEN = 18 */
 348         {rfs4_op_open, rfs4_free_reply, 0},
 349
 350         /* OP_OPENATTR = 19 */
 351         {rfs4_op_openattr, nullfree, 0},
 352
 353         /* OP_OPEN_CONFIRM = 20 */
 354         {rfs4_op_open_confirm, nullfree, 0},
 355
 356         /* OP_OPEN_DOWNGRADE = 21 */
 357         {rfs4_op_open_downgrade, nullfree, 0},
 358
 359         /* OP_OPEN_PUTFH = 22 */
 360         {rfs4_op_putfh, nullfree, RPC_ALL},
 361
 362         /* OP_PUTPUBFH = 23 */
 363         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 364
 365         /* OP_PUTROOTFH = 24 */
 366         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 367
 368         /* OP_READ = 25 */
 369         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 370
 371         /* OP_READDIR = 26 */
 372         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 373
 374         /* OP_READLINK = 27 */
 375         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 376
 377         /* OP_REMOVE = 28 */
 378         {rfs4_op_remove, nullfree, 0},
 379
 380         /* OP_RENAME = 29 */
 381         {rfs4_op_rename, nullfree, 0},
 382
 383         /* OP_RENEW = 30 */
 384         {rfs4_op_renew, nullfree, 0},
 385
 386         /* OP_RESTOREFH = 31 */
 387         {rfs4_op_restorefh, nullfree, RPC_ALL},
 388
 389         /* OP_SAVEFH = 32 */
 390         {rfs4_op_savefh, nullfree, RPC_ALL},
 391
 392         /* OP_SECINFO = 33 */
 393         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 394
 395         /* OP_SETATTR = 34 */
 396         {rfs4_op_setattr, nullfree, 0},
 397
 398         /* OP_SETCLIENTID = 35 */
 399         {rfs4_op_setclientid, nullfree, 0},
 400
 401         /* OP_SETCLIENTID_CONFIRM = 36 */
 402         {rfs4_op_setclientid_confirm, nullfree, 0},
 403
 404         /* OP_VERIFY = 37 */
 405         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 406
 407         /* OP_WRITE = 38 */
 408         {rfs4_op_write, nullfree, 0},
 409
 410         /* OP_RELEASE_LOCKOWNER = 39 */
 411         {rfs4_op_release_lockowner, nullfree, 0},
 412 };
 413
 414 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 415
 416 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 417
 418 #ifdef DEBUG
 419
 420 int             rfs4_fillone_debug = 0;
 421 int             rfs4_no_stub_access = 1;
 422 int             rfs4_rddir_debug = 0;
 423
 424 static char    *rfs4_op_string[] = {
 425         "rfs4_op_null",
 426         "rfs4_op_1 unused",
 427         "rfs4_op_2 unused",
 428         "rfs4_op_access",
 429         "rfs4_op_close",
 430         "rfs4_op_commit",
 431         "rfs4_op_create",
 432         "rfs4_op_delegpurge",
 433         "rfs4_op_delegreturn",
 434         "rfs4_op_getattr",
 435         "rfs4_op_getfh",
 436         "rfs4_op_link",
 437         "rfs4_op_lock",
 438         "rfs4_op_lockt",
 439         "rfs4_op_locku",
 440         "rfs4_op_lookup",
 441         "rfs4_op_lookupp",
 442         "rfs4_op_nverify",
 443         "rfs4_op_open",
 444         "rfs4_op_openattr",
 445         "rfs4_op_open_confirm",
 446         "rfs4_op_open_downgrade",
 447         "rfs4_op_putfh",
 448         "rfs4_op_putpubfh",
 449         "rfs4_op_putrootfh",
 450         "rfs4_op_read",
 451         "rfs4_op_readdir",
 452         "rfs4_op_readlink",
 453         "rfs4_op_remove",
 454         "rfs4_op_rename",
 455         "rfs4_op_renew",
 456         "rfs4_op_restorefh",
 457         "rfs4_op_savefh",
 458         "rfs4_op_secinfo",
 459         "rfs4_op_setattr",
 460         "rfs4_op_setclientid",
 461         "rfs4_op_setclient_confirm",
 462         "rfs4_op_verify",
 463         "rfs4_op_write",
 464         "rfs4_op_release_lockowner",
 465         "rfs4_op_illegal"
 466 };
 467 #endif
 468
 469 void    rfs4_ss_chkclid(rfs4_client_t *);
 470
 471 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 472
 473 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 474
 475 #ifdef  nextdp
 476 #undef nextdp
 477 #endif
 478 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 479
 480 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 481         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 482         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 483         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 484         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 485         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 486         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 487         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 488         NULL,                   NULL
 489 };
 490 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 491         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 492         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 493         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 494         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 495         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 496         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 497         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 498         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 499         NULL,                   NULL
 500 };
 501
 502 int
 503 rfs4_srvrinit(void)
 504 {
 505         timespec32_t verf;
 506         int error;
 507         extern void rfs4_attr_init();
 508         extern krwlock_t rfs4_deleg_policy_lock;
 509
 510         /*
 511          * The following algorithm attempts to find a unique verifier
 512          * to be used as the write verifier returned from the server
 513          * to the client.  It is important that this verifier change
 514          * whenever the server reboots.  Of secondary importance, it
 515          * is important for the verifier to be unique between two
 516          * different servers.
 517          *
 518          * Thus, an attempt is made to use the system hostid and the
 519          * current time in seconds when the nfssrv kernel module is
 520          * loaded.  It is assumed that an NFS server will not be able
 521          * to boot and then to reboot in less than a second.  If the
 522          * hostid has not been set, then the current high resolution
 523          * time is used.  This will ensure different verifiers each
 524          * time the server reboots and minimize the chances that two
 525          * different servers will have the same verifier.
 526          * XXX - this is broken on LP64 kernels.
 527          */
 528         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 529         if (verf.tv_sec != 0) {
 530                 verf.tv_nsec = gethrestime_sec();
 531         } else {
 532                 timespec_t tverf;
 533
 534                 gethrestime(&tverf);
 535                 verf.tv_sec = (time_t)tverf.tv_sec;
 536                 verf.tv_nsec = tverf.tv_nsec;
 537         }
 538
 539         Write4verf = *(uint64_t *)&verf;
 540
 541         rfs4_attr_init();
 542         mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 543
 544         /* Used to manage create/destroy of server state */
 545         mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
 546
 547         /* Used to manage access to server instance linked list */
 548         mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 549
 550         /* Used to manage access to rfs4_deleg_policy */
 551         rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 552
 553         error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
 554         if (error != 0) {
 555                 rfs4_disable_delegation();
 556         } else {
 557                 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 558                     &deleg_wrops);
 559                 if (error != 0) {
 560                         rfs4_disable_delegation();
 561                         fem_free(deleg_rdops);
 562                 }
 563         }
 564
 565         nfs4_srv_caller_id = fs_new_caller_id();
 566
 567         lockt_sysid = lm_alloc_sysidt();
 568
 569         vsd_create(&nfs4_srv_vkey, NULL);
 570
 571         return (0);
 572 }
 573
 574 void
 575 rfs4_srvrfini(void)
 576 {
 577         extern krwlock_t rfs4_deleg_policy_lock;
 578
 579         if (lockt_sysid != LM_NOSYSID) {
 580                 lm_free_sysidt(lockt_sysid);
 581                 lockt_sysid = LM_NOSYSID;
 582         }
 583
 584         mutex_destroy(&rfs4_deleg_lock);
 585         mutex_destroy(&rfs4_state_lock);
 586         rw_destroy(&rfs4_deleg_policy_lock);
 587
 588         fem_free(deleg_rdops);
 589         fem_free(deleg_wrops);
 590 }
 591
 592 void
 593 rfs4_init_compound_state(struct compound_state *cs)
 594 {
 595         bzero(cs, sizeof (*cs));
 596         cs->cont = TRUE;
 597         cs->access = CS_ACCESS_DENIED;
 598         cs->deleg = FALSE;
 599         cs->mandlock = FALSE;
 600         cs->fh.nfs_fh4_val = cs->fhbuf;
 601 }
 602
 603 void
 604 rfs4_grace_start(rfs4_servinst_t *sip)
 605 {
 606         rw_enter(&sip->rwlock, RW_WRITER);
 607         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 608         sip->grace_period = rfs4_grace_period;
 609         rw_exit(&sip->rwlock);
 610 }
 611
 612 /*
 613  * returns true if the instance's grace period has never been started
 614  */
 615 int
 616 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 617 {
 618         time_t start_time;
 619
 620         rw_enter(&sip->rwlock, RW_READER);
 621         start_time = sip->start_time;
 622         rw_exit(&sip->rwlock);
 623
 624         return (start_time == 0);
 625 }
 626
 627 /*
 628  * Indicates if server instance is within the
 629  * grace period.
 630  */
 631 int
 632 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 633 {
 634         time_t grace_expiry;
 635
 636         rw_enter(&sip->rwlock, RW_READER);
 637         grace_expiry = sip->start_time + sip->grace_period;
 638         rw_exit(&sip->rwlock);
 639
 640         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 641 }
 642
 643 int
 644 rfs4_clnt_in_grace(rfs4_client_t *cp)
 645 {
 646         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 647
 648         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 649 }
 650
 651 /*
 652  * reset all currently active grace periods
 653  */
 654 void
 655 rfs4_grace_reset_all(void)
 656 {
 657         rfs4_servinst_t *sip;
 658
 659         mutex_enter(&rfs4_servinst_lock);
 660         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 661                 if (rfs4_servinst_in_grace(sip))
 662                         rfs4_grace_start(sip);
 663         mutex_exit(&rfs4_servinst_lock);
 664 }
 665
 666 /*
 667  * start any new instances' grace periods
 668  */
 669 void
 670 rfs4_grace_start_new(void)
 671 {
 672         rfs4_servinst_t *sip;
 673
 674         mutex_enter(&rfs4_servinst_lock);
 675         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 676                 if (rfs4_servinst_grace_new(sip))
 677                         rfs4_grace_start(sip);
 678         mutex_exit(&rfs4_servinst_lock);
 679 }
 680
 681 static rfs4_dss_path_t *
 682 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
 683 {
 684         size_t len;
 685         rfs4_dss_path_t *dss_path;
 686
 687         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 688
 689         /*
 690          * Take a copy of the string, since the original may be overwritten.
 691          * Sadly, no strdup() in the kernel.
 692          */
 693         /* allow for NUL */
 694         len = strlen(path) + 1;
 695         dss_path->path = kmem_alloc(len, KM_SLEEP);
 696         (void) strlcpy(dss_path->path, path, len);
 697
 698         /* associate with servinst */
 699         dss_path->sip = sip;
 700         dss_path->index = index;
 701
 702         /*
 703          * Add to list of served paths.
 704          * No locking required, as we're only ever called at startup.
 705          */
 706         if (rfs4_dss_pathlist == NULL) {
 707                 /* this is the first dss_path_t */
 708
 709                 /* needed for insque/remque */
 710                 dss_path->next = dss_path->prev = dss_path;
 711
 712                 rfs4_dss_pathlist = dss_path;
 713         } else {
 714                 insque(dss_path, rfs4_dss_pathlist);
 715         }
 716
 717         return (dss_path);
 718 }
 719
 720 /*
 721  * Create a new server instance, and make it the currently active instance.
 722  * Note that starting the grace period too early will reduce the clients'
 723  * recovery window.
 724  */
 725 void
 726 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
 727 {
 728         unsigned i;
 729         rfs4_servinst_t *sip;
 730         rfs4_oldstate_t *oldstate;
 731
 732         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 733         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 734
 735         sip->start_time = (time_t)0;
 736         sip->grace_period = (time_t)0;
 737         sip->next = NULL;
 738         sip->prev = NULL;
 739
 740         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 741         /*
 742          * This initial dummy entry is required to setup for insque/remque.
 743          * It must be skipped over whenever the list is traversed.
 744          */
 745         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 746         /* insque/remque require initial list entry to be self-terminated */
 747         oldstate->next = oldstate;
 748         oldstate->prev = oldstate;
 749         sip->oldstate = oldstate;
 750
 751
 752         sip->dss_npaths = dss_npaths;
 753         sip->dss_paths = kmem_alloc(dss_npaths *
 754             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 755
 756         for (i = 0; i < dss_npaths; i++) {
 757                 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
 758         }
 759
 760         mutex_enter(&rfs4_servinst_lock);
 761         if (rfs4_cur_servinst != NULL) {
 762                 /* add to linked list */
 763                 sip->prev = rfs4_cur_servinst;
 764                 rfs4_cur_servinst->next = sip;
 765         }
 766         if (start_grace)
 767                 rfs4_grace_start(sip);
 768         /* make the new instance "current" */
 769         rfs4_cur_servinst = sip;
 770
 771         mutex_exit(&rfs4_servinst_lock);
 772 }
 773
 774 /*
 775  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 776  * all instances directly.
 777  */
 778 void
 779 rfs4_servinst_destroy_all(void)
 780 {
 781         rfs4_servinst_t *sip, *prev, *current;
 782 #ifdef DEBUG
 783         int n = 0;
 784 #endif
 785
 786         mutex_enter(&rfs4_servinst_lock);
 787         ASSERT(rfs4_cur_servinst != NULL);
 788         current = rfs4_cur_servinst;
 789         rfs4_cur_servinst = NULL;
 790         for (sip = current; sip != NULL; sip = prev) {
 791                 prev = sip->prev;
 792                 rw_destroy(&sip->rwlock);
 793                 if (sip->oldstate)
 794                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 795                 if (sip->dss_paths)
 796                         kmem_free(sip->dss_paths,
 797                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 798                 kmem_free(sip, sizeof (rfs4_servinst_t));
 799 #ifdef DEBUG
 800                 n++;
 801 #endif
 802         }
 803         mutex_exit(&rfs4_servinst_lock);
 804 }
 805
 806 /*
 807  * Assign the current server instance to a client_t.
 808  * Should be called with cp->rc_dbe held.
 809  */
 810 void
 811 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
 812 {
 813         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 814
 815         /*
 816          * The lock ensures that if the current instance is in the process
 817          * of changing, we will see the new one.
 818          */
 819         mutex_enter(&rfs4_servinst_lock);
 820         cp->rc_server_instance = sip;
 821         mutex_exit(&rfs4_servinst_lock);
 822 }
 823
 824 rfs4_servinst_t *
 825 rfs4_servinst(rfs4_client_t *cp)
 826 {
 827         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 828
 829         return (cp->rc_server_instance);
 830 }
 831
 832 /* ARGSUSED */
 833 static void
 834 nullfree(caddr_t resop)
 835 {
 836 }
 837
 838 /*
 839  * This is a fall-through for invalid or not implemented (yet) ops
 840  */
 841 /* ARGSUSED */
 842 static void
 843 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 844         struct compound_state *cs)
 845 {
 846         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 847 }
 848
 849 /*
 850  * Check if the security flavor, nfsnum, is in the flavor_list.
 851  */
 852 bool_t
 853 in_flavor_list(int nfsnum, int *flavor_list, int count)
 854 {
 855         int i;
 856
 857         for (i = 0; i < count; i++) {
 858                 if (nfsnum == flavor_list[i])
 859                         return (TRUE);
 860         }
 861         return (FALSE);
 862 }
 863
 864 /*
 865  * Used by rfs4_op_secinfo to get the security information from the
 866  * export structure associated with the component.
 867  */
 868 /* ARGSUSED */
 869 static nfsstat4
 870 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 871 {
 872         int error, different_export = 0;
 873         vnode_t *dvp, *vp, *tvp;
 874         struct exportinfo *exi = NULL;
 875         fid_t fid;
 876         uint_t count, i;
 877         secinfo4 *resok_val;
 878         struct secinfo *secp;
 879         seconfig_t *si;
 880         bool_t did_traverse = FALSE;
 881         int dotdot, walk;
 882
 883         dvp = cs->vp;
 884         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 885
 886         /*
 887          * If dotdotting, then need to check whether it's above the
 888          * root of a filesystem, or above an export point.
 889          */
 890         if (dotdot) {
 891
 892                 /*
 893                  * If dotdotting at the root of a filesystem, then
 894                  * need to traverse back to the mounted-on filesystem
 895                  * and do the dotdot lookup there.
 896                  */
 897                 if (cs->vp->v_flag & VROOT) {
 898
 899                         /*
 900                          * If at the system root, then can
 901                          * go up no further.
 902                          */
 903                         if (VN_CMP(dvp, rootdir))
 904                                 return (puterrno4(ENOENT));
 905
 906                         /*
 907                          * Traverse back to the mounted-on filesystem
 908                          */
 909                         dvp = untraverse(cs->vp);
 910
 911                         /*
 912                          * Set the different_export flag so we remember
 913                          * to pick up a new exportinfo entry for
 914                          * this new filesystem.
 915                          */
 916                         different_export = 1;
 917                 } else {
 918
 919                         /*
 920                          * If dotdotting above an export point then set
 921                          * the different_export to get new export info.
 922                          */
 923                         different_export = nfs_exported(cs->exi, cs->vp);
 924                 }
 925         }
 926
 927         /*
 928          * Get the vnode for the component "nm".
 929          */
 930         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 931             NULL, NULL, NULL);
 932         if (error)
 933                 return (puterrno4(error));
 934
 935         /*
 936          * If the vnode is in a pseudo filesystem, or if the security flavor
 937          * used in the request is valid but not an explicitly shared flavor,
 938          * or the access bit indicates that this is a limited access,
 939          * check whether this vnode is visible.
 940          */
 941         if (!different_export &&
 942             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
 943             cs->access & CS_ACCESS_LIMITED)) {
 944                 if (! nfs_visible(cs->exi, vp, &different_export)) {
 945                         VN_RELE(vp);
 946                         return (puterrno4(ENOENT));
 947                 }
 948         }
 949
 950         /*
 951          * If it's a mountpoint, then traverse it.
 952          */
 953         if (vn_ismntpt(vp)) {
 954                 tvp = vp;
 955                 if ((error = traverse(&tvp)) != 0) {
 956                         VN_RELE(vp);
 957                         return (puterrno4(error));
 958                 }
 959                 /* remember that we had to traverse mountpoint */
 960                 did_traverse = TRUE;
 961                 vp = tvp;
 962                 different_export = 1;
 963         } else if (vp->v_vfsp != dvp->v_vfsp) {
 964                 /*
 965                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 966                  * then vp is probably an LOFS object.  We don't need the
 967                  * realvp, we just need to know that we might have crossed
 968                  * a server fs boundary and need to call checkexport4.
 969                  * (LOFS lookup hides server fs mountpoints, and actually calls
 970                  * traverse)
 971                  */
 972                 different_export = 1;
 973         }
 974
 975         /*
 976          * Get the export information for it.
 977          */
 978         if (different_export) {
 979
 980                 bzero(&fid, sizeof (fid));
 981                 fid.fid_len = MAXFIDSZ;
 982                 error = vop_fid_pseudo(vp, &fid);
 983                 if (error) {
 984                         VN_RELE(vp);
 985                         return (puterrno4(error));
 986                 }
 987
 988                 if (dotdot)
 989                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 990                 else
 991                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
 992
 993                 if (exi == NULL) {
 994                         if (did_traverse == TRUE) {
 995                                 /*
 996                                  * If this vnode is a mounted-on vnode,
 997                                  * but the mounted-on file system is not
 998                                  * exported, send back the secinfo for
 999                                  * the exported node that the mounted-on
1000                                  * vnode lives in.
1001                                  */
1002                                 exi = cs->exi;
1003                         } else {
1004                                 VN_RELE(vp);
1005                                 return (puterrno4(EACCES));
1006                         }
1007                 }
1008         } else {
1009                 exi = cs->exi;
1010         }
1011         ASSERT(exi != NULL);
1012
1013
1014         /*
1015          * Create the secinfo result based on the security information
1016          * from the exportinfo structure (exi).
1017          *
1018          * Return all flavors for a pseudo node.
1019          * For a real export node, return the flavor that the client
1020          * has access with.
1021          */
1022         ASSERT(RW_LOCK_HELD(&exported_lock));
1023         if (PSEUDO(exi)) {
1024                 count = exi->exi_export.ex_seccnt; /* total sec count */
1025                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1026                 secp = exi->exi_export.ex_secinfo;
1027
1028                 for (i = 0; i < count; i++) {
1029                         si = &secp[i].s_secinfo;
1030                         resok_val[i].flavor = si->sc_rpcnum;
1031                         if (resok_val[i].flavor == RPCSEC_GSS) {
1032                                 rpcsec_gss_info *info;
1033
1034                                 info = &resok_val[i].flavor_info;
1035                                 info->qop = si->sc_qop;
1036                                 info->service = (rpc_gss_svc_t)si->sc_service;
1037
1038                                 /* get oid opaque data */
1039                                 info->oid.sec_oid4_len =
1040                                     si->sc_gss_mech_type->length;
1041                                 info->oid.sec_oid4_val = kmem_alloc(
1042                                     si->sc_gss_mech_type->length, KM_SLEEP);
1043                                 bcopy(
1044                                     si->sc_gss_mech_type->elements,
1045                                     info->oid.sec_oid4_val,
1046                                     info->oid.sec_oid4_len);
1047                         }
1048                 }
1049                 resp->SECINFO4resok_len = count;
1050                 resp->SECINFO4resok_val = resok_val;
1051         } else {
1052                 int ret_cnt = 0, k = 0;
1053                 int *flavor_list;
1054
1055                 count = exi->exi_export.ex_seccnt; /* total sec count */
1056                 secp = exi->exi_export.ex_secinfo;
1057
1058                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1059                 /* find out which flavors to return */
1060                 for (i = 0; i < count; i ++) {
1061                         int access, flavor, perm;
1062
1063                         flavor = secp[i].s_secinfo.sc_nfsnum;
1064                         perm = secp[i].s_flags;
1065
1066                         access = nfsauth4_secinfo_access(exi, cs->req,
1067                             flavor, perm);
1068
1069                         if (! (access & NFSAUTH_DENIED) &&
1070                             ! (access & NFSAUTH_WRONGSEC)) {
1071                                 flavor_list[ret_cnt] = flavor;
1072                                 ret_cnt++;
1073                         }
1074                 }
1075
1076                 /* Create the returning SECINFO value */
1077                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1078
1079                 for (i = 0; i < count; i++) {
1080                         /*
1081                          * If the flavor is in the flavor list,
1082                          * fill in resok_val.
1083                          */
1084                         si = &secp[i].s_secinfo;
1085                         if (in_flavor_list(si->sc_nfsnum,
1086                             flavor_list, ret_cnt)) {
1087                                 resok_val[k].flavor = si->sc_rpcnum;
1088                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1089                                         rpcsec_gss_info *info;
1090
1091                                         info = &resok_val[k].flavor_info;
1092                                         info->qop = si->sc_qop;
1093                                         info->service = (rpc_gss_svc_t)
1094                                             si->sc_service;
1095
1096                                         /* get oid opaque data */
1097                                         info->oid.sec_oid4_len =
1098                                             si->sc_gss_mech_type->length;
1099                                         info->oid.sec_oid4_val = kmem_alloc(
1100                                             si->sc_gss_mech_type->length,
1101                                             KM_SLEEP);
1102                                         bcopy(si->sc_gss_mech_type->elements,
1103                                             info->oid.sec_oid4_val,
1104                                             info->oid.sec_oid4_len);
1105                                 }
1106                                 k++;
1107                         }
1108                         if (k >= ret_cnt)
1109                                 break;
1110                 }
1111                 resp->SECINFO4resok_len = ret_cnt;
1112                 resp->SECINFO4resok_val = resok_val;
1113                 kmem_free(flavor_list, count * sizeof (int));
1114         }
1115
1116         VN_RELE(vp);
1117         return (NFS4_OK);
1118 }
1119
1120 /*
1121  * SECINFO (Operation 33): Obtain required security information on
1122  * the component name in the format of (security-mechanism-oid, qop, service)
1123  * triplets.
1124  */
1125 /* ARGSUSED */
1126 static void
1127 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1128     struct compound_state *cs)
1129 {
1130         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1131         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1132         utf8string *utfnm = &args->name;
1133         uint_t len;
1134         char *nm;
1135         struct sockaddr *ca;
1136         char *name = NULL;
1137         nfsstat4 status = NFS4_OK;
1138
1139         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1140             SECINFO4args *, args);
1141
1142         /*
1143          * Current file handle (cfh) should have been set before getting
1144          * into this function. If not, return error.
1145          */
1146         if (cs->vp == NULL) {
1147                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1148                 goto out;
1149         }
1150
1151         if (cs->vp->v_type != VDIR) {
1152                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1153                 goto out;
1154         }
1155
1156         /*
1157          * Verify the component name. If failed, error out, but
1158          * do not error out if the component name is a "..".
1159          * SECINFO will return its parents secinfo data for SECINFO "..".
1160          */
1161         status = utf8_dir_verify(utfnm);
1162         if (status != NFS4_OK) {
1163                 if (utfnm->utf8string_len != 2 ||
1164                     utfnm->utf8string_val[0] != '.' ||
1165                     utfnm->utf8string_val[1] != '.') {
1166                         *cs->statusp = resp->status = status;
1167                         goto out;
1168                 }
1169         }
1170
1171         nm = utf8_to_str(utfnm, &len, NULL);
1172         if (nm == NULL) {
1173                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1174                 goto out;
1175         }
1176
1177         if (len > MAXNAMELEN) {
1178                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1179                 kmem_free(nm, len);
1180                 goto out;
1181         }
1182
1183         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1184         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1185             MAXPATHLEN  + 1);
1186
1187         if (name == NULL) {
1188                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1189                 kmem_free(nm, len);
1190                 goto out;
1191         }
1192
1193
1194         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1195
1196         if (name != nm)
1197                 kmem_free(name, MAXPATHLEN + 1);
1198         kmem_free(nm, len);
1199
1200 out:
1201         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1202             SECINFO4res *, resp);
1203 }
1204
1205 /*
1206  * Free SECINFO result.
1207  */
1208 /* ARGSUSED */
1209 static void
1210 rfs4_op_secinfo_free(nfs_resop4 *resop)
1211 {
1212         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1213         int count, i;
1214         secinfo4 *resok_val;
1215
1216         /* If this is not an Ok result, nothing to free. */
1217         if (resp->status != NFS4_OK) {
1218                 return;
1219         }
1220
1221         count = resp->SECINFO4resok_len;
1222         resok_val = resp->SECINFO4resok_val;
1223
1224         for (i = 0; i < count; i++) {
1225                 if (resok_val[i].flavor == RPCSEC_GSS) {
1226                         rpcsec_gss_info *info;
1227
1228                         info = &resok_val[i].flavor_info;
1229                         kmem_free(info->oid.sec_oid4_val,
1230                             info->oid.sec_oid4_len);
1231                 }
1232         }
1233         kmem_free(resok_val, count * sizeof (secinfo4));
1234         resp->SECINFO4resok_len = 0;
1235         resp->SECINFO4resok_val = NULL;
1236 }
1237
1238 /* ARGSUSED */
1239 static void
1240 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1241     struct compound_state *cs)
1242 {
1243         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1244         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1245         int error;
1246         vnode_t *vp;
1247         struct vattr va;
1248         int checkwriteperm;
1249         cred_t *cr = cs->cr;
1250         bslabel_t *clabel, *slabel;
1251         ts_label_t *tslabel;
1252         boolean_t admin_low_client;
1253
1254         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1255             ACCESS4args *, args);
1256
1257 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1258         if (cs->access == CS_ACCESS_DENIED) {
1259                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1260                 goto out;
1261         }
1262 #endif
1263         if (cs->vp == NULL) {
1264                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1265                 goto out;
1266         }
1267
1268         ASSERT(cr != NULL);
1269
1270         vp = cs->vp;
1271
1272         /*
1273          * If the file system is exported read only, it is not appropriate
1274          * to check write permissions for regular files and directories.
1275          * Special files are interpreted by the client, so the underlying
1276          * permissions are sent back to the client for interpretation.
1277          */
1278         if (rdonly4(cs->exi, cs->vp, req) &&
1279             (vp->v_type == VREG || vp->v_type == VDIR))
1280                 checkwriteperm = 0;
1281         else
1282                 checkwriteperm = 1;
1283
1284         /*
1285          * XXX
1286          * We need the mode so that we can correctly determine access
1287          * permissions relative to a mandatory lock file.  Access to
1288          * mandatory lock files is denied on the server, so it might
1289          * as well be reflected to the server during the open.
1290          */
1291         va.va_mask = AT_MODE;
1292         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1293         if (error) {
1294                 *cs->statusp = resp->status = puterrno4(error);
1295                 goto out;
1296         }
1297         resp->access = 0;
1298         resp->supported = 0;
1299
1300         if (is_system_labeled()) {
1301                 ASSERT(req->rq_label != NULL);
1302                 clabel = req->rq_label;
1303                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1304                     "got client label from request(1)",
1305                     struct svc_req *, req);
1306                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1307                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1308                                 *cs->statusp = resp->status = puterrno4(EACCES);
1309                                 goto out;
1310                         }
1311                         slabel = label2bslabel(tslabel);
1312                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1313                             char *, "got server label(1) for vp(2)",
1314                             bslabel_t *, slabel, vnode_t *, vp);
1315
1316                         admin_low_client = B_FALSE;
1317                 } else
1318                         admin_low_client = B_TRUE;
1319         }
1320
1321         if (args->access & ACCESS4_READ) {
1322                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1323                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1324                     (!is_system_labeled() || admin_low_client ||
1325                     bldominates(clabel, slabel)))
1326                         resp->access |= ACCESS4_READ;
1327                 resp->supported |= ACCESS4_READ;
1328         }
1329         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1330                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1331                 if (!error && (!is_system_labeled() || admin_low_client ||
1332                     bldominates(clabel, slabel)))
1333                         resp->access |= ACCESS4_LOOKUP;
1334                 resp->supported |= ACCESS4_LOOKUP;
1335         }
1336         if (checkwriteperm &&
1337             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1338                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1339                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1340                     (!is_system_labeled() || admin_low_client ||
1341                     blequal(clabel, slabel)))
1342                         resp->access |=
1343                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1344                 resp->supported |= (ACCESS4_MODIFY | ACCESS4_EXTEND);
1345         }
1346
1347         if (checkwriteperm &&
1348             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1349                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1350                 if (!error && (!is_system_labeled() || admin_low_client ||
1351                     blequal(clabel, slabel)))
1352                         resp->access |= ACCESS4_DELETE;
1353                 resp->supported |= ACCESS4_DELETE;
1354         }
1355         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1356                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1357                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1358                     (!is_system_labeled() || admin_low_client ||
1359                     bldominates(clabel, slabel)))
1360                         resp->access |= ACCESS4_EXECUTE;
1361                 resp->supported |= ACCESS4_EXECUTE;
1362         }
1363
1364         if (is_system_labeled() && !admin_low_client)
1365                 label_rele(tslabel);
1366
1367         *cs->statusp = resp->status = NFS4_OK;
1368 out:
1369         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1370             ACCESS4res *, resp);
1371 }
1372
1373 /* ARGSUSED */
1374 static void
1375 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1376     struct compound_state *cs)
1377 {
1378         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1379         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1380         int error;
1381         vnode_t *vp = cs->vp;
1382         cred_t *cr = cs->cr;
1383         vattr_t va;
1384
1385         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1386             COMMIT4args *, args);
1387
1388         if (vp == NULL) {
1389                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1390                 goto out;
1391         }
1392         if (cs->access == CS_ACCESS_DENIED) {
1393                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1394                 goto out;
1395         }
1396
1397         if (args->offset + args->count < args->offset) {
1398                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1399                 goto out;
1400         }
1401
1402         va.va_mask = AT_UID;
1403         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1404
1405         /*
1406          * If we can't get the attributes, then we can't do the
1407          * right access checking.  So, we'll fail the request.
1408          */
1409         if (error) {
1410                 *cs->statusp = resp->status = puterrno4(error);
1411                 goto out;
1412         }
1413         if (rdonly4(cs->exi, cs->vp, req)) {
1414                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1415                 goto out;
1416         }
1417
1418         if (vp->v_type != VREG) {
1419                 if (vp->v_type == VDIR)
1420                         resp->status = NFS4ERR_ISDIR;
1421                 else
1422                         resp->status = NFS4ERR_INVAL;
1423                 *cs->statusp = resp->status;
1424                 goto out;
1425         }
1426
1427         if (crgetuid(cr) != va.va_uid &&
1428             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1429                 *cs->statusp = resp->status = puterrno4(error);
1430                 goto out;
1431         }
1432
1433         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1434
1435         if (error) {
1436                 *cs->statusp = resp->status = puterrno4(error);
1437                 goto out;
1438         }
1439
1440         *cs->statusp = resp->status = NFS4_OK;
1441         resp->writeverf = Write4verf;
1442 out:
1443         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1444             COMMIT4res *, resp);
1445 }
1446
1447 /*
1448  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1449  * was completed. It does the nfsv4 create for special files.
1450  */
1451 /* ARGSUSED */
1452 static vnode_t *
1453 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1454     struct compound_state *cs, vattr_t *vap, char *nm)
1455 {
1456         int error;
1457         cred_t *cr = cs->cr;
1458         vnode_t *dvp = cs->vp;
1459         vnode_t *vp = NULL;
1460         int mode;
1461         enum vcexcl excl;
1462
1463         switch (args->type) {
1464         case NF4CHR:
1465         case NF4BLK:
1466                 if (secpolicy_sys_devices(cr) != 0) {
1467                         *cs->statusp = resp->status = NFS4ERR_PERM;
1468                         return (NULL);
1469                 }
1470                 if (args->type == NF4CHR)
1471                         vap->va_type = VCHR;
1472                 else
1473                         vap->va_type = VBLK;
1474                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1475                     args->ftype4_u.devdata.specdata2);
1476                 vap->va_mask |= AT_RDEV;
1477                 break;
1478         case NF4SOCK:
1479                 vap->va_type = VSOCK;
1480                 break;
1481         case NF4FIFO:
1482                 vap->va_type = VFIFO;
1483                 break;
1484         default:
1485                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1486                 return (NULL);
1487         }
1488
1489         /*
1490          * Must specify the mode.
1491          */
1492         if (!(vap->va_mask & AT_MODE)) {
1493                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1494                 return (NULL);
1495         }
1496
1497         excl = EXCL;
1498
1499         mode = 0;
1500
1501         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1502         if (error) {
1503                 *cs->statusp = resp->status = puterrno4(error);
1504                 return (NULL);
1505         }
1506         return (vp);
1507 }
1508
1509 /*
1510  * nfsv4 create is used to create non-regular files. For regular files,
1511  * use nfsv4 open.
1512  */
1513 /* ARGSUSED */
1514 static void
1515 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1516     struct compound_state *cs)
1517 {
1518         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1519         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1520         int error;
1521         struct vattr bva, iva, iva2, ava, *vap;
1522         cred_t *cr = cs->cr;
1523         vnode_t *dvp = cs->vp;
1524         vnode_t *vp = NULL;
1525         vnode_t *realvp;
1526         char *nm, *lnm;
1527         uint_t len, llen;
1528         int syncval = 0;
1529         struct nfs4_svgetit_arg sarg;
1530         struct nfs4_ntov_table ntov;
1531         struct statvfs64 sb;
1532         nfsstat4 status;
1533         struct sockaddr *ca;
1534         char *name = NULL;
1535         char *lname = NULL;
1536
1537         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1538             CREATE4args *, args);
1539
1540         resp->attrset = 0;
1541
1542         if (dvp == NULL) {
1543                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1544                 goto out;
1545         }
1546
1547         /*
1548          * If there is an unshared filesystem mounted on this vnode,
1549          * do not allow to create an object in this directory.
1550          */
1551         if (vn_ismntpt(dvp)) {
1552                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1553                 goto out;
1554         }
1555
1556         /* Verify that type is correct */
1557         switch (args->type) {
1558         case NF4LNK:
1559         case NF4BLK:
1560         case NF4CHR:
1561         case NF4SOCK:
1562         case NF4FIFO:
1563         case NF4DIR:
1564                 break;
1565         default:
1566                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1567                 goto out;
1568         };
1569
1570         if (cs->access == CS_ACCESS_DENIED) {
1571                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1572                 goto out;
1573         }
1574         if (dvp->v_type != VDIR) {
1575                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1576                 goto out;
1577         }
1578         status = utf8_dir_verify(&args->objname);
1579         if (status != NFS4_OK) {
1580                 *cs->statusp = resp->status = status;
1581                 goto out;
1582         }
1583
1584         if (rdonly4(cs->exi, cs->vp, req)) {
1585                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1586                 goto out;
1587         }
1588
1589         /*
1590          * Name of newly created object
1591          */
1592         nm = utf8_to_fn(&args->objname, &len, NULL);
1593         if (nm == NULL) {
1594                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1595                 goto out;
1596         }
1597
1598         if (len > MAXNAMELEN) {
1599                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1600                 kmem_free(nm, len);
1601                 goto out;
1602         }
1603
1604         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1605         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1606             MAXPATHLEN  + 1);
1607
1608         if (name == NULL) {
1609                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1610                 kmem_free(nm, len);
1611                 goto out;
1612         }
1613
1614         resp->attrset = 0;
1615
1616         sarg.sbp = &sb;
1617         sarg.is_referral = B_FALSE;
1618         nfs4_ntov_table_init(&ntov);
1619
1620         status = do_rfs4_set_attrs(&resp->attrset,
1621             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1622
1623         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1624                 status = NFS4ERR_INVAL;
1625
1626         if (status != NFS4_OK) {
1627                 *cs->statusp = resp->status = status;
1628                 if (name != nm)
1629                         kmem_free(name, MAXPATHLEN + 1);
1630                 kmem_free(nm, len);
1631                 nfs4_ntov_table_free(&ntov, &sarg);
1632                 resp->attrset = 0;
1633                 goto out;
1634         }
1635
1636         /* Get "before" change value */
1637         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1638         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1639         if (error) {
1640                 *cs->statusp = resp->status = puterrno4(error);
1641                 if (name != nm)
1642                         kmem_free(name, MAXPATHLEN + 1);
1643                 kmem_free(nm, len);
1644                 nfs4_ntov_table_free(&ntov, &sarg);
1645                 resp->attrset = 0;
1646                 goto out;
1647         }
1648         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1649
1650         vap = sarg.vap;
1651
1652         /*
1653          * Set the default initial values for attributes when the parent
1654          * directory does not have the VSUID/VSGID bit set and they have
1655          * not been specified in createattrs.
1656          */
1657         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1658                 vap->va_uid = crgetuid(cr);
1659                 vap->va_mask |= AT_UID;
1660         }
1661         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1662                 vap->va_gid = crgetgid(cr);
1663                 vap->va_mask |= AT_GID;
1664         }
1665
1666         vap->va_mask |= AT_TYPE;
1667         switch (args->type) {
1668         case NF4DIR:
1669                 vap->va_type = VDIR;
1670                 if ((vap->va_mask & AT_MODE) == 0) {
1671                         vap->va_mode = 0700;    /* default: owner rwx only */
1672                         vap->va_mask |= AT_MODE;
1673                 }
1674                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1675                 if (error)
1676                         break;
1677
1678                 /*
1679                  * Get the initial "after" sequence number, if it fails,
1680                  * set to zero
1681                  */
1682                 iva.va_mask = AT_SEQ;
1683                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1684                         iva.va_seq = 0;
1685                 break;
1686         case NF4LNK:
1687                 vap->va_type = VLNK;
1688                 if ((vap->va_mask & AT_MODE) == 0) {
1689                         vap->va_mode = 0700;    /* default: owner rwx only */
1690                         vap->va_mask |= AT_MODE;
1691                 }
1692
1693                 /*
1694                  * symlink names must be treated as data
1695                  */
1696                 lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1697
1698                 if (lnm == NULL) {
1699                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1700                         if (name != nm)
1701                                 kmem_free(name, MAXPATHLEN + 1);
1702                         kmem_free(nm, len);
1703                         nfs4_ntov_table_free(&ntov, &sarg);
1704                         resp->attrset = 0;
1705                         goto out;
1706                 }
1707
1708                 if (llen > MAXPATHLEN) {
1709                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1710                         if (name != nm)
1711                                 kmem_free(name, MAXPATHLEN + 1);
1712                         kmem_free(nm, len);
1713                         kmem_free(lnm, llen);
1714                         nfs4_ntov_table_free(&ntov, &sarg);
1715                         resp->attrset = 0;
1716                         goto out;
1717                 }
1718
1719                 lname = nfscmd_convname(ca, cs->exi, lnm,
1720                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1721
1722                 if (lname == NULL) {
1723                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1724                         if (name != nm)
1725                                 kmem_free(name, MAXPATHLEN + 1);
1726                         kmem_free(nm, len);
1727                         kmem_free(lnm, llen);
1728                         nfs4_ntov_table_free(&ntov, &sarg);
1729                         resp->attrset = 0;
1730                         goto out;
1731                 }
1732
1733                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1734                 if (lname != lnm)
1735                         kmem_free(lname, MAXPATHLEN + 1);
1736                 kmem_free(lnm, llen);
1737                 if (error)
1738                         break;
1739
1740                 /*
1741                  * Get the initial "after" sequence number, if it fails,
1742                  * set to zero
1743                  */
1744                 iva.va_mask = AT_SEQ;
1745                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1746                         iva.va_seq = 0;
1747
1748                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1749                     NULL, NULL, NULL);
1750                 if (error)
1751                         break;
1752
1753                 /*
1754                  * va_seq is not safe over VOP calls, check it again
1755                  * if it has changed zero out iva to force atomic = FALSE.
1756                  */
1757                 iva2.va_mask = AT_SEQ;
1758                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1759                     iva2.va_seq != iva.va_seq)
1760                         iva.va_seq = 0;
1761                 break;
1762         default:
1763                 /*
1764                  * probably a special file.
1765                  */
1766                 if ((vap->va_mask & AT_MODE) == 0) {
1767                         vap->va_mode = 0600;    /* default: owner rw only */
1768                         vap->va_mask |= AT_MODE;
1769                 }
1770                 syncval = FNODSYNC;
1771                 /*
1772                  * We know this will only generate one VOP call
1773                  */
1774                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1775
1776                 if (vp == NULL) {
1777                         if (name != nm)
1778                                 kmem_free(name, MAXPATHLEN + 1);
1779                         kmem_free(nm, len);
1780                         nfs4_ntov_table_free(&ntov, &sarg);
1781                         resp->attrset = 0;
1782                         goto out;
1783                 }
1784
1785                 /*
1786                  * Get the initial "after" sequence number, if it fails,
1787                  * set to zero
1788                  */
1789                 iva.va_mask = AT_SEQ;
1790                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1791                         iva.va_seq = 0;
1792
1793                 break;
1794         }
1795         if (name != nm)
1796                 kmem_free(name, MAXPATHLEN + 1);
1797         kmem_free(nm, len);
1798
1799         if (error) {
1800                 *cs->statusp = resp->status = puterrno4(error);
1801         }
1802
1803         /*
1804          * Force modified data and metadata out to stable storage.
1805          */
1806         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1807
1808         if (resp->status != NFS4_OK) {
1809                 if (vp != NULL)
1810                         VN_RELE(vp);
1811                 nfs4_ntov_table_free(&ntov, &sarg);
1812                 resp->attrset = 0;
1813                 goto out;
1814         }
1815
1816         /*
1817          * Finish setup of cinfo response, "before" value already set.
1818          * Get "after" change value, if it fails, simply return the
1819          * before value.
1820          */
1821         ava.va_mask = AT_CTIME|AT_SEQ;
1822         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1823                 ava.va_ctime = bva.va_ctime;
1824                 ava.va_seq = 0;
1825         }
1826         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1827
1828         /*
1829          * True verification that object was created with correct
1830          * attrs is impossible.  The attrs could have been changed
1831          * immediately after object creation.  If attributes did
1832          * not verify, the only recourse for the server is to
1833          * destroy the object.  Maybe if some attrs (like gid)
1834          * are set incorrectly, the object should be destroyed;
1835          * however, seems bad as a default policy.  Do we really
1836          * want to destroy an object over one of the times not
1837          * verifying correctly?  For these reasons, the server
1838          * currently sets bits in attrset for createattrs
1839          * that were set; however, no verification is done.
1840          *
1841          * vmask_to_nmask accounts for vattr bits set on create
1842          *      [do_rfs4_set_attrs() only sets resp bits for
1843          *       non-vattr/vfs bits.]
1844          * Mask off any bits set by default so as not to return
1845          * more attrset bits than were requested in createattrs
1846          */
1847         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1848         resp->attrset &= args->createattrs.attrmask;
1849         nfs4_ntov_table_free(&ntov, &sarg);
1850
1851         error = makefh4(&cs->fh, vp, cs->exi);
1852         if (error) {
1853                 *cs->statusp = resp->status = puterrno4(error);
1854         }
1855
1856         /*
1857          * The cinfo.atomic = TRUE only if we got no errors, we have
1858          * non-zero va_seq's, and it has incremented by exactly one
1859          * during the creation and it didn't change during the VOP_LOOKUP
1860          * or VOP_FSYNC.
1861          */
1862         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1863             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1864                 resp->cinfo.atomic = TRUE;
1865         else
1866                 resp->cinfo.atomic = FALSE;
1867
1868         /*
1869          * Force modified metadata out to stable storage.
1870          *
1871          * if a underlying vp exists, pass it to VOP_FSYNC
1872          */
1873         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1874                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1875         else
1876                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1877
1878         if (resp->status != NFS4_OK) {
1879                 VN_RELE(vp);
1880                 goto out;
1881         }
1882         if (cs->vp)
1883                 VN_RELE(cs->vp);
1884
1885         cs->vp = vp;
1886         *cs->statusp = resp->status = NFS4_OK;
1887 out:
1888         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1889             CREATE4res *, resp);
1890 }
1891
1892 /*ARGSUSED*/
1893 static void
1894 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1895     struct compound_state *cs)
1896 {
1897         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1898             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1899
1900         rfs4_op_inval(argop, resop, req, cs);
1901
1902         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1903             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1904 }
1905
1906 /*ARGSUSED*/
1907 static void
1908 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1909     struct compound_state *cs)
1910 {
1911         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1912         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1913         rfs4_deleg_state_t *dsp;
1914         nfsstat4 status;
1915
1916         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1917             DELEGRETURN4args *, args);
1918
1919         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1920         resp->status = *cs->statusp = status;
1921         if (status != NFS4_OK)
1922                 goto out;
1923
1924         /* Ensure specified filehandle matches */
1925         if (cs->vp != dsp->rds_finfo->rf_vp) {
1926                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1927         } else
1928                 rfs4_return_deleg(dsp, FALSE);
1929
1930         rfs4_update_lease(dsp->rds_client);
1931
1932         rfs4_deleg_state_rele(dsp);
1933 out:
1934         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1935             DELEGRETURN4res *, resp);
1936 }
1937
1938 /*
1939  * Check to see if a given "flavor" is an explicitly shared flavor.
1940  * The assumption of this routine is the "flavor" is already a valid
1941  * flavor in the secinfo list of "exi".
1942  *
1943  *      e.g.
1944  *              # share -o sec=flavor1 /export
1945  *              # share -o sec=flavor2 /export/home
1946  *
1947  *              flavor2 is not an explicitly shared flavor for /export,
1948  *              however it is in the secinfo list for /export thru the
1949  *              server namespace setup.
1950  */
1951 int
1952 is_exported_sec(int flavor, struct exportinfo *exi)
1953 {
1954         int     i;
1955         struct secinfo *sp;
1956
1957         sp = exi->exi_export.ex_secinfo;
1958         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1959                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1960                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1961                         return (SEC_REF_EXPORTED(&sp[i]));
1962                 }
1963         }
1964
1965         /* Should not reach this point based on the assumption */
1966         return (0);
1967 }
1968
1969 /*
1970  * Check if the security flavor used in the request matches what is
1971  * required at the export point or at the root pseudo node (exi_root).
1972  *
1973  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1974  *
1975  */
1976 static int
1977 secinfo_match_or_authnone(struct compound_state *cs)
1978 {
1979         int     i;
1980         struct secinfo *sp;
1981
1982         /*
1983          * Check cs->nfsflavor (from the request) against
1984          * the current export data in cs->exi.
1985          */
1986         sp = cs->exi->exi_export.ex_secinfo;
1987         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1988                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1989                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1990                         return (1);
1991         }
1992
1993         return (0);
1994 }
1995
1996 /*
1997  * Check the access authority for the client and return the correct error.
1998  */
1999 nfsstat4
2000 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2001 {
2002         int     authres;
2003
2004         /*
2005          * First, check if the security flavor used in the request
2006          * are among the flavors set in the server namespace.
2007          */
2008         if (!secinfo_match_or_authnone(cs)) {
2009                 *cs->statusp = NFS4ERR_WRONGSEC;
2010                 return (*cs->statusp);
2011         }
2012
2013         authres = checkauth4(cs, req);
2014
2015         if (authres > 0) {
2016                 *cs->statusp = NFS4_OK;
2017                 if (! (cs->access & CS_ACCESS_LIMITED))
2018                         cs->access = CS_ACCESS_OK;
2019         } else if (authres == 0) {
2020                 *cs->statusp = NFS4ERR_ACCESS;
2021         } else if (authres == -2) {
2022                 *cs->statusp = NFS4ERR_WRONGSEC;
2023         } else {
2024                 *cs->statusp = NFS4ERR_DELAY;
2025         }
2026         return (*cs->statusp);
2027 }
2028
2029 /*
2030  * bitmap4_to_attrmask is called by getattr and readdir.
2031  * It sets up the vattr mask and determines whether vfsstat call is needed
2032  * based on the input bitmap.
2033  * Returns nfsv4 status.
2034  */
2035 static nfsstat4
2036 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2037 {
2038         int i;
2039         uint_t  va_mask;
2040         struct statvfs64 *sbp = sargp->sbp;
2041
2042         sargp->sbp = NULL;
2043         sargp->flag = 0;
2044         sargp->rdattr_error = NFS4_OK;
2045         sargp->mntdfid_set = FALSE;
2046         if (sargp->cs->vp)
2047                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2048                     FH4_ATTRDIR | FH4_NAMEDATTR);
2049         else
2050                 sargp->xattr = 0;
2051
2052         /*
2053          * Set rdattr_error_req to true if return error per
2054          * failed entry rather than fail the readdir.
2055          */
2056         if (breq & FATTR4_RDATTR_ERROR_MASK)
2057                 sargp->rdattr_error_req = 1;
2058         else
2059                 sargp->rdattr_error_req = 0;
2060
2061         /*
2062          * generate the va_mask
2063          * Handle the easy cases first
2064          */
2065         switch (breq) {
2066         case NFS4_NTOV_ATTR_MASK:
2067                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2068                 return (NFS4_OK);
2069
2070         case NFS4_FS_ATTR_MASK:
2071                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2072                 sargp->sbp = sbp;
2073                 return (NFS4_OK);
2074
2075         case NFS4_NTOV_ATTR_CACHE_MASK:
2076                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2077                 return (NFS4_OK);
2078
2079         case FATTR4_LEASE_TIME_MASK:
2080                 sargp->vap->va_mask = 0;
2081                 return (NFS4_OK);
2082
2083         default:
2084                 va_mask = 0;
2085                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2086                         if ((breq & nfs4_ntov_map[i].fbit) &&
2087                             nfs4_ntov_map[i].vbit)
2088                                 va_mask |= nfs4_ntov_map[i].vbit;
2089                 }
2090
2091                 /*
2092                  * Check is vfsstat is needed
2093                  */
2094                 if (breq & NFS4_FS_ATTR_MASK)
2095                         sargp->sbp = sbp;
2096
2097                 sargp->vap->va_mask = va_mask;
2098                 return (NFS4_OK);
2099         }
2100         /* NOTREACHED */
2101 }
2102
2103 /*
2104  * bitmap4_get_sysattrs is called by getattr and readdir.
2105  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2106  * Returns nfsv4 status.
2107  */
2108 static nfsstat4
2109 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2110 {
2111         int error;
2112         struct compound_state *cs = sargp->cs;
2113         vnode_t *vp = cs->vp;
2114
2115         if (sargp->sbp != NULL) {
2116                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2117                         sargp->sbp = NULL;      /* to identify error */
2118                         return (puterrno4(error));
2119                 }
2120         }
2121
2122         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2123 }
2124
2125 static void
2126 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2127 {
2128         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2129             KM_SLEEP);
2130         ntovp->attrcnt = 0;
2131         ntovp->vfsstat = FALSE;
2132 }
2133
2134 static void
2135 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2136     struct nfs4_svgetit_arg *sargp)
2137 {
2138         int i;
2139         union nfs4_attr_u *na;
2140         uint8_t *amap;
2141
2142         /*
2143          * XXX Should do the same checks for whether the bit is set
2144          */
2145         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2146             i < ntovp->attrcnt; i++, na++, amap++) {
2147                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2148                     NFS4ATTR_FREEIT, sargp, na);
2149         }
2150         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2151                 /*
2152                  * xdr_free for getattr will be done later
2153                  */
2154                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2155                     i < ntovp->attrcnt; i++, na++, amap++) {
2156                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2157                 }
2158         }
2159         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2160 }
2161
2162 /*
2163  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2164  */
2165 static nfsstat4
2166 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2167     struct nfs4_svgetit_arg *sargp)
2168 {
2169         int error = 0;
2170         int i, k;
2171         struct nfs4_ntov_table ntov;
2172         XDR xdr;
2173         ulong_t xdr_size;
2174         char *xdr_attrs;
2175         nfsstat4 status = NFS4_OK;
2176         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2177         union nfs4_attr_u *na;
2178         uint8_t *amap;
2179
2180         sargp->op = NFS4ATTR_GETIT;
2181         sargp->flag = 0;
2182
2183         fattrp->attrmask = 0;
2184         /* if no bits requested, then return empty fattr4 */
2185         if (breq == 0) {
2186                 fattrp->attrlist4_len = 0;
2187                 fattrp->attrlist4 = NULL;
2188                 return (NFS4_OK);
2189         }
2190
2191         /*
2192          * return NFS4ERR_INVAL when client requests write-only attrs
2193          */
2194         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2195                 return (NFS4ERR_INVAL);
2196
2197         nfs4_ntov_table_init(&ntov);
2198         na = ntov.na;
2199         amap = ntov.amap;
2200
2201         /*
2202          * Now loop to get or verify the attrs
2203          */
2204         for (i = 0; i < nfs4_ntov_map_size; i++) {
2205                 if (breq & nfs4_ntov_map[i].fbit) {
2206                         if ((*nfs4_ntov_map[i].sv_getit)(
2207                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2208
2209                                 error = (*nfs4_ntov_map[i].sv_getit)(
2210                                     NFS4ATTR_GETIT, sargp, na);
2211
2212                                 /*
2213                                  * Possible error values:
2214                                  * >0 if sv_getit failed to
2215                                  * get the attr; 0 if succeeded;
2216                                  * <0 if rdattr_error and the
2217                                  * attribute cannot be returned.
2218                                  */
2219                                 if (error && !(sargp->rdattr_error_req))
2220                                         goto done;
2221                                 /*
2222                                  * If error then just for entry
2223                                  */
2224                                 if (error == 0) {
2225                                         fattrp->attrmask |=
2226                                             nfs4_ntov_map[i].fbit;
2227                                         *amap++ =
2228                                             (uint8_t)nfs4_ntov_map[i].nval;
2229                                         na++;
2230                                         (ntov.attrcnt)++;
2231                                 } else if ((error > 0) &&
2232                                     (sargp->rdattr_error == NFS4_OK)) {
2233                                         sargp->rdattr_error = puterrno4(error);
2234                                 }
2235                                 error = 0;
2236                         }
2237                 }
2238         }
2239
2240         /*
2241          * If rdattr_error was set after the return value for it was assigned,
2242          * update it.
2243          */
2244         if (prev_rdattr_error != sargp->rdattr_error) {
2245                 na = ntov.na;
2246                 amap = ntov.amap;
2247                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2248                         k = *amap;
2249                         if (k < FATTR4_RDATTR_ERROR) {
2250                                 continue;
2251                         }
2252                         if ((k == FATTR4_RDATTR_ERROR) &&
2253                             ((*nfs4_ntov_map[k].sv_getit)(
2254                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2255
2256                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2257                                     NFS4ATTR_GETIT, sargp, na);
2258                         }
2259                         break;
2260                 }
2261         }
2262
2263         xdr_size = 0;
2264         na = ntov.na;
2265         amap = ntov.amap;
2266         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2267                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2268         }
2269
2270         fattrp->attrlist4_len = xdr_size;
2271         if (xdr_size) {
2272                 /* freed by rfs4_op_getattr_free() */
2273                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2274
2275                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2276
2277                 na = ntov.na;
2278                 amap = ntov.amap;
2279                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2280                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2281                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2282                                     int, *amap);
2283                                 status = NFS4ERR_SERVERFAULT;
2284                                 break;
2285                         }
2286                 }
2287                 /* xdrmem_destroy(&xdrs); */    /* NO-OP */
2288         } else {
2289                 fattrp->attrlist4 = NULL;
2290         }
2291 done:
2292
2293         nfs4_ntov_table_free(&ntov, sargp);
2294
2295         if (error != 0)
2296                 status = puterrno4(error);
2297
2298         return (status);
2299 }
2300
2301 /* ARGSUSED */
2302 static void
2303 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2304     struct compound_state *cs)
2305 {
2306         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2307         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2308         struct nfs4_svgetit_arg sarg;
2309         struct statvfs64 sb;
2310         nfsstat4 status;
2311
2312         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2313             GETATTR4args *, args);
2314
2315         if (cs->vp == NULL) {
2316                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2317                 goto out;
2318         }
2319
2320         if (cs->access == CS_ACCESS_DENIED) {
2321                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2322                 goto out;
2323         }
2324
2325         sarg.sbp = &sb;
2326         sarg.cs = cs;
2327         sarg.is_referral = B_FALSE;
2328
2329         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2330         if (status == NFS4_OK) {
2331
2332                 status = bitmap4_get_sysattrs(&sarg);
2333                 if (status == NFS4_OK) {
2334
2335                         /* Is this a referral? */
2336                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2337                                 /* Older V4 Solaris client sees a link */
2338                                 if (client_is_downrev(req))
2339                                         sarg.vap->va_type = VLNK;
2340                                 else
2341                                         sarg.is_referral = B_TRUE;
2342                         }
2343
2344                         status = do_rfs4_op_getattr(args->attr_request,
2345                             &resp->obj_attributes, &sarg);
2346                 }
2347         }
2348         *cs->statusp = resp->status = status;
2349 out:
2350         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2351             GETATTR4res *, resp);
2352 }
2353
2354 static void
2355 rfs4_op_getattr_free(nfs_resop4 *resop)
2356 {
2357         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2358
2359         nfs4_fattr4_free(&resp->obj_attributes);
2360 }
2361
2362 /* ARGSUSED */
2363 static void
2364 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2365     struct compound_state *cs)
2366 {
2367         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2368
2369         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2370
2371         if (cs->vp == NULL) {
2372                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2373                 goto out;
2374         }
2375         if (cs->access == CS_ACCESS_DENIED) {
2376                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2377                 goto out;
2378         }
2379
2380         /* check for reparse point at the share point */
2381         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2382                 /* it's all bad */
2383                 cs->exi->exi_moved = 1;
2384                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2385                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2386                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2387                 return;
2388         }
2389
2390         /* check for reparse point at vp */
2391         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2392                 /* it's not all bad */
2393                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2394                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2395                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2396                 return;
2397         }
2398
2399         resp->object.nfs_fh4_val =
2400             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2401         nfs_fh4_copy(&cs->fh, &resp->object);
2402         *cs->statusp = resp->status = NFS4_OK;
2403 out:
2404         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2405             GETFH4res *, resp);
2406 }
2407
2408 static void
2409 rfs4_op_getfh_free(nfs_resop4 *resop)
2410 {
2411         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2412
2413         if (resp->status == NFS4_OK &&
2414             resp->object.nfs_fh4_val != NULL) {
2415                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2416                 resp->object.nfs_fh4_val = NULL;
2417                 resp->object.nfs_fh4_len = 0;
2418         }
2419 }
2420
2421 /*
2422  * illegal: args: void
2423  *          res : status (NFS4ERR_OP_ILLEGAL)
2424  */
2425 /* ARGSUSED */
2426 static void
2427 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2428     struct svc_req *req, struct compound_state *cs)
2429 {
2430         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2431
2432         resop->resop = OP_ILLEGAL;
2433         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2434 }
2435
2436 /*
2437  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2438  *       res: status. If success - CURRENT_FH unchanged, return change_info
2439  */
2440 /* ARGSUSED */
2441 static void
2442 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2443     struct compound_state *cs)
2444 {
2445         LINK4args *args = &argop->nfs_argop4_u.oplink;
2446         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2447         int error;
2448         vnode_t *vp;
2449         vnode_t *dvp;
2450         struct vattr bdva, idva, adva;
2451         char *nm;
2452         uint_t  len;
2453         struct sockaddr *ca;
2454         char *name = NULL;
2455         nfsstat4 status;
2456
2457         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2458             LINK4args *, args);
2459
2460         /* SAVED_FH: source object */
2461         vp = cs->saved_vp;
2462         if (vp == NULL) {
2463                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464                 goto out;
2465         }
2466
2467         /* CURRENT_FH: target directory */
2468         dvp = cs->vp;
2469         if (dvp == NULL) {
2470                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2471                 goto out;
2472         }
2473
2474         /*
2475          * If there is a non-shared filesystem mounted on this vnode,
2476          * do not allow to link any file in this directory.
2477          */
2478         if (vn_ismntpt(dvp)) {
2479                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2480                 goto out;
2481         }
2482
2483         if (cs->access == CS_ACCESS_DENIED) {
2484                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2485                 goto out;
2486         }
2487
2488         /* Check source object's type validity */
2489         if (vp->v_type == VDIR) {
2490                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2491                 goto out;
2492         }
2493
2494         /* Check target directory's type */
2495         if (dvp->v_type != VDIR) {
2496                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2497                 goto out;
2498         }
2499
2500         if (cs->saved_exi != cs->exi) {
2501                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2502                 goto out;
2503         }
2504
2505         status = utf8_dir_verify(&args->newname);
2506         if (status != NFS4_OK) {
2507                 *cs->statusp = resp->status = status;
2508                 goto out;
2509         }
2510
2511         nm = utf8_to_fn(&args->newname, &len, NULL);
2512         if (nm == NULL) {
2513                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2514                 goto out;
2515         }
2516
2517         if (len > MAXNAMELEN) {
2518                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2519                 kmem_free(nm, len);
2520                 goto out;
2521         }
2522
2523         if (rdonly4(cs->exi, cs->vp, req)) {
2524                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2525                 kmem_free(nm, len);
2526                 goto out;
2527         }
2528
2529         /* Get "before" change value */
2530         bdva.va_mask = AT_CTIME|AT_SEQ;
2531         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2532         if (error) {
2533                 *cs->statusp = resp->status = puterrno4(error);
2534                 kmem_free(nm, len);
2535                 goto out;
2536         }
2537
2538         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2539         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2540             MAXPATHLEN  + 1);
2541
2542         if (name == NULL) {
2543                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2544                 kmem_free(nm, len);
2545                 goto out;
2546         }
2547
2548         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2549
2550         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2551
2552         if (nm != name)
2553                 kmem_free(name, MAXPATHLEN + 1);
2554         kmem_free(nm, len);
2555
2556         /*
2557          * Get the initial "after" sequence number, if it fails, set to zero
2558          */
2559         idva.va_mask = AT_SEQ;
2560         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2561                 idva.va_seq = 0;
2562
2563         /*
2564          * Force modified data and metadata out to stable storage.
2565          */
2566         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2567         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2568
2569         if (error) {
2570                 *cs->statusp = resp->status = puterrno4(error);
2571                 goto out;
2572         }
2573
2574         /*
2575          * Get "after" change value, if it fails, simply return the
2576          * before value.
2577          */
2578         adva.va_mask = AT_CTIME|AT_SEQ;
2579         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2580                 adva.va_ctime = bdva.va_ctime;
2581                 adva.va_seq = 0;
2582         }
2583
2584         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2585
2586         /*
2587          * The cinfo.atomic = TRUE only if we have
2588          * non-zero va_seq's, and it has incremented by exactly one
2589          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2590          */
2591         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2592             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2593                 resp->cinfo.atomic = TRUE;
2594         else
2595                 resp->cinfo.atomic = FALSE;
2596
2597         *cs->statusp = resp->status = NFS4_OK;
2598 out:
2599         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2600             LINK4res *, resp);
2601 }
2602
2603 /*
2604  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2605  */
2606
2607 /* ARGSUSED */
2608 static nfsstat4
2609 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2610 {
2611         int error;
2612         int different_export = 0;
2613         vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2614         struct exportinfo *exi = NULL, *pre_exi = NULL;
2615         nfsstat4 stat;
2616         fid_t fid;
2617         int attrdir, dotdot, walk;
2618         bool_t is_newvp = FALSE;
2619
2620         if (cs->vp->v_flag & V_XATTRDIR) {
2621                 attrdir = 1;
2622                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2623         } else {
2624                 attrdir = 0;
2625                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2626         }
2627
2628         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629
2630         /*
2631          * If dotdotting, then need to check whether it's
2632          * above the root of a filesystem, or above an
2633          * export point.
2634          */
2635         if (dotdot) {
2636
2637                 /*
2638                  * If dotdotting at the root of a filesystem, then
2639                  * need to traverse back to the mounted-on filesystem
2640                  * and do the dotdot lookup there.
2641                  */
2642                 if (cs->vp->v_flag & VROOT) {
2643
2644                         /*
2645                          * If at the system root, then can
2646                          * go up no further.
2647                          */
2648                         if (VN_CMP(cs->vp, rootdir))
2649                                 return (puterrno4(ENOENT));
2650
2651                         /*
2652                          * Traverse back to the mounted-on filesystem
2653                          */
2654                         cs->vp = untraverse(cs->vp);
2655
2656                         /*
2657                          * Set the different_export flag so we remember
2658                          * to pick up a new exportinfo entry for
2659                          * this new filesystem.
2660                          */
2661                         different_export = 1;
2662                 } else {
2663
2664                         /*
2665                          * If dotdotting above an export point then set
2666                          * the different_export to get new export info.
2667                          */
2668                         different_export = nfs_exported(cs->exi, cs->vp);
2669                 }
2670         }
2671
2672         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2673             NULL, NULL, NULL);
2674         if (error)
2675                 return (puterrno4(error));
2676
2677         /*
2678          * If the vnode is in a pseudo filesystem, check whether it is visible.
2679          *
2680          * XXX if the vnode is a symlink and it is not visible in
2681          * a pseudo filesystem, return ENOENT (not following symlink).
2682          * V4 client can not mount such symlink. This is a regression
2683          * from V2/V3.
2684          *
2685          * In the same exported filesystem, if the security flavor used
2686          * is not an explicitly shared flavor, limit the view to the visible
2687          * list entries only. This is not a WRONGSEC case because it's already
2688          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2689          */
2690         if (!different_export &&
2691             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2692             cs->access & CS_ACCESS_LIMITED)) {
2693                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2694                         VN_RELE(vp);
2695                         return (puterrno4(ENOENT));
2696                 }
2697         }
2698
2699         /*
2700          * If it's a mountpoint, then traverse it.
2701          */
2702         if (vn_ismntpt(vp)) {
2703                 pre_exi = cs->exi;      /* save pre-traversed exportinfo */
2704                 pre_tvp = vp;           /* save pre-traversed vnode     */
2705
2706                 /*
2707                  * hold pre_tvp to counteract rele by traverse.  We will
2708                  * need pre_tvp below if checkexport4 fails
2709                  */
2710                 VN_HOLD(pre_tvp);
2711                 tvp = vp;
2712                 if ((error = traverse(&tvp)) != 0) {
2713                         VN_RELE(vp);
2714                         VN_RELE(pre_tvp);
2715                         return (puterrno4(error));
2716                 }
2717                 vp = tvp;
2718                 different_export = 1;
2719         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2720                 /*
2721                  * The vfsp comparison is to handle the case where
2722                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2723                  * and NFS is unaware of local fs transistions because
2724                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2725                  * the dir and the obj returned by lookup will have different
2726                  * vfs ptrs.
2727                  */
2728                 different_export = 1;
2729         }
2730
2731         if (different_export) {
2732
2733                 bzero(&fid, sizeof (fid));
2734                 fid.fid_len = MAXFIDSZ;
2735                 error = vop_fid_pseudo(vp, &fid);
2736                 if (error) {
2737                         VN_RELE(vp);
2738                         if (pre_tvp)
2739                                 VN_RELE(pre_tvp);
2740                         return (puterrno4(error));
2741                 }
2742
2743                 if (dotdot)
2744                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2745                 else
2746                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2747
2748                 if (exi == NULL) {
2749                         if (pre_tvp) {
2750                                 /*
2751                                  * If this vnode is a mounted-on vnode,
2752                                  * but the mounted-on file system is not
2753                                  * exported, send back the filehandle for
2754                                  * the mounted-on vnode, not the root of
2755                                  * the mounted-on file system.
2756                                  */
2757                                 VN_RELE(vp);
2758                                 vp = pre_tvp;
2759                                 exi = pre_exi;
2760                         } else {
2761                                 VN_RELE(vp);
2762                                 return (puterrno4(EACCES));
2763                         }
2764                 } else if (pre_tvp) {
2765                         /* we're done with pre_tvp now. release extra hold */
2766                         VN_RELE(pre_tvp);
2767                 }
2768
2769                 cs->exi = exi;
2770
2771                 /*
2772                  * Now we do a checkauth4. The reason is that
2773                  * this client/user may not have access to the new
2774                  * exported file system, and if he does,
2775                  * the client/user may be mapped to a different uid.
2776                  *
2777                  * We start with a new cr, because the checkauth4 done
2778                  * in the PUT*FH operation over wrote the cred's uid,
2779                  * gid, etc, and we want the real thing before calling
2780                  * checkauth4()
2781                  */
2782                 crfree(cs->cr);
2783                 cs->cr = crdup(cs->basecr);
2784
2785                 oldvp = cs->vp;
2786                 cs->vp = vp;
2787                 is_newvp = TRUE;
2788
2789                 stat = call_checkauth4(cs, req);
2790                 if (stat != NFS4_OK) {
2791                         VN_RELE(cs->vp);
2792                         cs->vp = oldvp;
2793                         return (stat);
2794                 }
2795         }
2796
2797         /*
2798          * After various NFS checks, do a label check on the path
2799          * component. The label on this path should either be the
2800          * global zone's label or a zone's label. We are only
2801          * interested in the zone's label because exported files
2802          * in global zone is accessible (though read-only) to
2803          * clients. The exportability/visibility check is already
2804          * done before reaching this code.
2805          */
2806         if (is_system_labeled()) {
2807                 bslabel_t *clabel;
2808
2809                 ASSERT(req->rq_label != NULL);
2810                 clabel = req->rq_label;
2811                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2812                     "got client label from request(1)", struct svc_req *, req);
2813
2814                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2815                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2816                             cs->exi)) {
2817                                 error = EACCES;
2818                                 goto err_out;
2819                         }
2820                 } else {
2821                         /*
2822                          * We grant access to admin_low label clients
2823                          * only if the client is trusted, i.e. also
2824                          * running Solaris Trusted Extension.
2825                          */
2826                         struct sockaddr *ca;
2827                         int             addr_type;
2828                         void            *ipaddr;
2829                         tsol_tpc_t      *tp;
2830
2831                         ca = (struct sockaddr *)svc_getrpccaller(
2832                             req->rq_xprt)->buf;
2833                         if (ca->sa_family == AF_INET) {
2834                                 addr_type = IPV4_VERSION;
2835                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2836                         } else if (ca->sa_family == AF_INET6) {
2837                                 addr_type = IPV6_VERSION;
2838                                 ipaddr = &((struct sockaddr_in6 *)
2839                                     ca)->sin6_addr;
2840                         }
2841                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2842                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2843                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2844                             SUN_CIPSO) {
2845                                 if (tp != NULL)
2846                                         TPC_RELE(tp);
2847                                 error = EACCES;
2848                                 goto err_out;
2849                         }
2850                         TPC_RELE(tp);
2851                 }
2852         }
2853
2854         error = makefh4(&cs->fh, vp, cs->exi);
2855
2856 err_out:
2857         if (error) {
2858                 if (is_newvp) {
2859                         VN_RELE(cs->vp);
2860                         cs->vp = oldvp;
2861                 } else
2862                         VN_RELE(vp);
2863                 return (puterrno4(error));
2864         }
2865
2866         if (!is_newvp) {
2867                 if (cs->vp)
2868                         VN_RELE(cs->vp);
2869                 cs->vp = vp;
2870         } else if (oldvp)
2871                 VN_RELE(oldvp);
2872
2873         /*
2874          * if did lookup on attrdir and didn't lookup .., set named
2875          * attr fh flag
2876          */
2877         if (attrdir && ! dotdot)
2878                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2879
2880         /* Assume false for now, open proc will set this */
2881         cs->mandlock = FALSE;
2882
2883         return (NFS4_OK);
2884 }
2885
2886 /* ARGSUSED */
2887 static void
2888 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2889     struct compound_state *cs)
2890 {
2891         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2892         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2893         char *nm;
2894         uint_t len;
2895         struct sockaddr *ca;
2896         char *name = NULL;
2897         nfsstat4 status;
2898
2899         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2900             LOOKUP4args *, args);
2901
2902         if (cs->vp == NULL) {
2903                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2904                 goto out;
2905         }
2906
2907         if (cs->vp->v_type == VLNK) {
2908                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2909                 goto out;
2910         }
2911
2912         if (cs->vp->v_type != VDIR) {
2913                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2914                 goto out;
2915         }
2916
2917         status = utf8_dir_verify(&args->objname);
2918         if (status != NFS4_OK) {
2919                 *cs->statusp = resp->status = status;
2920                 goto out;
2921         }
2922
2923         nm = utf8_to_str(&args->objname, &len, NULL);
2924         if (nm == NULL) {
2925                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2926                 goto out;
2927         }
2928
2929         if (len > MAXNAMELEN) {
2930                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2931                 kmem_free(nm, len);
2932                 goto out;
2933         }
2934
2935         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2936         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2937             MAXPATHLEN  + 1);
2938
2939         if (name == NULL) {
2940                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2941                 kmem_free(nm, len);
2942                 goto out;
2943         }
2944
2945         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2946
2947         if (name != nm)
2948                 kmem_free(name, MAXPATHLEN + 1);
2949         kmem_free(nm, len);
2950
2951 out:
2952         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2953             LOOKUP4res *, resp);
2954 }
2955
2956 /* ARGSUSED */
2957 static void
2958 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2959     struct compound_state *cs)
2960 {
2961         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2962
2963         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2964
2965         if (cs->vp == NULL) {
2966                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2967                 goto out;
2968         }
2969
2970         if (cs->vp->v_type != VDIR) {
2971                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2972                 goto out;
2973         }
2974
2975         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2976
2977         /*
2978          * From NFSV4 Specification, LOOKUPP should not check for
2979          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2980          */
2981         if (resp->status == NFS4ERR_WRONGSEC) {
2982                 *cs->statusp = resp->status = NFS4_OK;
2983         }
2984
2985 out:
2986         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2987             LOOKUPP4res *, resp);
2988 }
2989
2990
2991 /*ARGSUSED2*/
2992 static void
2993 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2994     struct compound_state *cs)
2995 {
2996         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
2997         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
2998         vnode_t         *avp = NULL;
2999         int             lookup_flags = LOOKUP_XATTR, error;
3000         int             exp_ro = 0;
3001
3002         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3003             OPENATTR4args *, args);
3004
3005         if (cs->vp == NULL) {
3006                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3007                 goto out;
3008         }
3009
3010         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3011             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3012                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3013                 goto out;
3014         }
3015
3016         /*
3017          * If file system supports passing ACE mask to VOP_ACCESS then
3018          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3019          */
3020
3021         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3022                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3023                     V_ACE_MASK, cs->cr, NULL);
3024         else
3025                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3026                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3027                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3028
3029         if (error) {
3030                 *cs->statusp = resp->status = puterrno4(EACCES);
3031                 goto out;
3032         }
3033
3034         /*
3035          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3036          * the file system is exported read-only -- regardless of
3037          * createdir flag.  Otherwise the attrdir would be created
3038          * (assuming server fs isn't mounted readonly locally).  If
3039          * VOP_LOOKUP returns ENOENT in this case, the error will
3040          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3041          * because specfs has no VOP_LOOKUP op, so the macro would
3042          * return ENOSYS.  EINVAL is returned by all (current)
3043          * Solaris file system implementations when any of their
3044          * restrictions are violated (xattr(dir) can't have xattrdir).
3045          * Returning NOTSUPP is more appropriate in this case
3046          * because the object will never be able to have an attrdir.
3047          */
3048         if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
3049                 lookup_flags |= CREATE_XATTR_DIR;
3050
3051         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3052             NULL, NULL, NULL);
3053
3054         if (error) {
3055                 if (error == ENOENT && args->createdir && exp_ro)
3056                         *cs->statusp = resp->status = puterrno4(EROFS);
3057                 else if (error == EINVAL || error == ENOSYS)
3058                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3059                 else
3060                         *cs->statusp = resp->status = puterrno4(error);
3061                 goto out;
3062         }
3063
3064         ASSERT(avp->v_flag & V_XATTRDIR);
3065
3066         error = makefh4(&cs->fh, avp, cs->exi);
3067
3068         if (error) {
3069                 VN_RELE(avp);
3070                 *cs->statusp = resp->status = puterrno4(error);
3071                 goto out;
3072         }
3073
3074         VN_RELE(cs->vp);
3075         cs->vp = avp;
3076
3077         /*
3078          * There is no requirement for an attrdir fh flag
3079          * because the attrdir has a vnode flag to distinguish
3080          * it from regular (non-xattr) directories.  The
3081          * FH4_ATTRDIR flag is set for future sanity checks.
3082          */
3083         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3084         *cs->statusp = resp->status = NFS4_OK;
3085
3086 out:
3087         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3088             OPENATTR4res *, resp);
3089 }
3090
3091 static int
3092 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3093     caller_context_t *ct)
3094 {
3095         int error;
3096         int i;
3097         clock_t delaytime;
3098
3099         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3100
3101         /*
3102          * Don't block on mandatory locks. If this routine returns
3103          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3104          */
3105         uio->uio_fmode = FNONBLOCK;
3106
3107         for (i = 0; i < rfs4_maxlock_tries; i++) {
3108
3109
3110                 if (direction == FREAD) {
3111                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3112                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3113                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3114                 } else {
3115                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3116                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3117                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3118                 }
3119
3120                 if (error != EAGAIN)
3121                         break;
3122
3123                 if (i < rfs4_maxlock_tries - 1) {
3124                         delay(delaytime);
3125                         delaytime *= 2;
3126                 }
3127         }
3128
3129         return (error);
3130 }
3131
3132 /* ARGSUSED */
3133 static void
3134 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3135     struct compound_state *cs)
3136 {
3137         READ4args *args = &argop->nfs_argop4_u.opread;
3138         READ4res *resp = &resop->nfs_resop4_u.opread;
3139         int error;
3140         int verror;
3141         vnode_t *vp;
3142         struct vattr va;
3143         struct iovec iov;
3144         struct uio uio;
3145         u_offset_t offset;
3146         bool_t *deleg = &cs->deleg;
3147         nfsstat4 stat;
3148         int in_crit = 0;
3149         mblk_t *mp = NULL;
3150         int alloc_err = 0;
3151         int rdma_used = 0;
3152         int loaned_buffers;
3153         caller_context_t ct;
3154         struct uio *uiop;
3155
3156         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3157             READ4args, args);
3158
3159         vp = cs->vp;
3160         if (vp == NULL) {
3161                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3162                 goto out;
3163         }
3164         if (cs->access == CS_ACCESS_DENIED) {
3165                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3166                 goto out;
3167         }
3168
3169         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3170             deleg, TRUE, &ct)) != NFS4_OK) {
3171                 *cs->statusp = resp->status = stat;
3172                 goto out;
3173         }
3174
3175         /*
3176          * Enter the critical region before calling VOP_RWLOCK
3177          * to avoid a deadlock with write requests.
3178          */
3179         if (nbl_need_check(vp)) {
3180                 nbl_start_crit(vp, RW_READER);
3181                 in_crit = 1;
3182                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3183                     &ct)) {
3184                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3185                         goto out;
3186                 }
3187         }
3188
3189         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3190             deleg, TRUE, &ct)) != NFS4_OK) {
3191                 *cs->statusp = resp->status = stat;
3192                 goto out;
3193         }
3194
3195         if (args->wlist) {
3196                 if (args->count > clist_len(args->wlist)) {
3197                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3198                         goto out;
3199                 }
3200                 rdma_used = 1;
3201         }
3202
3203         /* use loaned buffers for TCP */
3204         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3205
3206         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3207         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3208
3209         /*
3210          * If we can't get the attributes, then we can't do the
3211          * right access checking.  So, we'll fail the request.
3212          */
3213         if (verror) {
3214                 *cs->statusp = resp->status = puterrno4(verror);
3215                 goto out;
3216         }
3217
3218         if (vp->v_type != VREG) {
3219                 *cs->statusp = resp->status =
3220                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3221                 goto out;
3222         }
3223
3224         if (crgetuid(cs->cr) != va.va_uid &&
3225             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3226             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3227                 *cs->statusp = resp->status = puterrno4(error);
3228                 goto out;
3229         }
3230
3231         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3232                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3233                 goto out;
3234         }
3235
3236         offset = args->offset;
3237         if (offset >= va.va_size) {
3238                 *cs->statusp = resp->status = NFS4_OK;
3239                 resp->eof = TRUE;
3240                 resp->data_len = 0;
3241                 resp->data_val = NULL;
3242                 resp->mblk = NULL;
3243                 /* RDMA */
3244                 resp->wlist = args->wlist;
3245                 resp->wlist_len = resp->data_len;
3246                 *cs->statusp = resp->status = NFS4_OK;
3247                 if (resp->wlist)
3248                         clist_zero_len(resp->wlist);
3249                 goto out;
3250         }
3251
3252         if (args->count == 0) {
3253                 *cs->statusp = resp->status = NFS4_OK;
3254                 resp->eof = FALSE;
3255                 resp->data_len = 0;
3256                 resp->data_val = NULL;
3257                 resp->mblk = NULL;
3258                 /* RDMA */
3259                 resp->wlist = args->wlist;
3260                 resp->wlist_len = resp->data_len;
3261                 if (resp->wlist)
3262                         clist_zero_len(resp->wlist);
3263                 goto out;
3264         }
3265
3266         /*
3267          * Do not allocate memory more than maximum allowed
3268          * transfer size
3269          */
3270         if (args->count > rfs4_tsize(req))
3271                 args->count = rfs4_tsize(req);
3272
3273         if (loaned_buffers) {
3274                 uiop = (uio_t *)rfs_setup_xuio(vp);
3275                 ASSERT(uiop != NULL);
3276                 uiop->uio_segflg = UIO_SYSSPACE;
3277                 uiop->uio_loffset = args->offset;
3278                 uiop->uio_resid = args->count;
3279
3280                 /* Jump to do the read if successful */
3281                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3282                         /*
3283                          * Need to hold the vnode until after VOP_RETZCBUF()
3284                          * is called.
3285                          */
3286                         VN_HOLD(vp);
3287                         goto doio_read;
3288                 }
3289
3290                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3291                     uiop->uio_loffset, int, uiop->uio_resid);
3292
3293                 uiop->uio_extflg = 0;
3294
3295                 /* failure to setup for zero copy */
3296                 rfs_free_xuio((void *)uiop);
3297                 loaned_buffers = 0;
3298         }
3299
3300         /*
3301          * If returning data via RDMA Write, then grab the chunk list. If we
3302          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3303          */
3304         if (rdma_used) {
3305                 mp = NULL;
3306                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3307         } else {
3308                 /*
3309                  * mp will contain the data to be sent out in the read reply.
3310                  * It will be freed after the reply has been sent. Let's
3311                  * roundup the data to a BYTES_PER_XDR_UNIT multiple, so that
3312                  * the call to xdrmblk_putmblk() never fails. If the first
3313                  * alloc of the requested size fails, then decrease the size to
3314                  * something more reasonable and wait for the allocation to
3315                  * occur.
3316                  */
3317                 mp = allocb(RNDUP(args->count), BPRI_MED);
3318                 if (mp == NULL) {
3319                         if (args->count > MAXBSIZE)
3320                                 args->count = MAXBSIZE;
3321                         mp = allocb_wait(RNDUP(args->count), BPRI_MED,
3322                             STR_NOSIG, &alloc_err);
3323                 }
3324                 ASSERT(mp != NULL);
3325                 ASSERT(alloc_err == 0);
3326
3327                 iov.iov_base = (caddr_t)mp->b_datap->db_base;
3328                 iov.iov_len = args->count;
3329         }
3330
3331         uio.uio_iov = &iov;
3332         uio.uio_iovcnt = 1;
3333         uio.uio_segflg = UIO_SYSSPACE;
3334         uio.uio_extflg = UIO_COPY_CACHED;
3335         uio.uio_loffset = args->offset;
3336         uio.uio_resid = args->count;
3337         uiop = &uio;
3338
3339 doio_read:
3340         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3341
3342         va.va_mask = AT_SIZE;
3343         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3344
3345         if (error) {
3346                 if (mp)
3347                         freemsg(mp);
3348                 *cs->statusp = resp->status = puterrno4(error);
3349                 goto out;
3350         }
3351
3352         /* make mblk using zc buffers */
3353         if (loaned_buffers) {
3354                 mp = uio_to_mblk(uiop);
3355                 ASSERT(mp != NULL);
3356         }
3357
3358         *cs->statusp = resp->status = NFS4_OK;
3359
3360         ASSERT(uiop->uio_resid >= 0);
3361         resp->data_len = args->count - uiop->uio_resid;
3362         if (mp) {
3363                 resp->data_val = (char *)mp->b_datap->db_base;
3364                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3365         } else {
3366                 resp->data_val = (caddr_t)iov.iov_base;
3367         }
3368
3369         resp->mblk = mp;
3370
3371         if (!verror && offset + resp->data_len == va.va_size)
3372                 resp->eof = TRUE;
3373         else
3374                 resp->eof = FALSE;
3375
3376         if (rdma_used) {
3377                 if (!rdma_setup_read_data4(args, resp)) {
3378                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3379                 }
3380         } else {
3381                 resp->wlist = NULL;
3382         }
3383
3384 out:
3385         if (in_crit)
3386                 nbl_end_crit(vp);
3387
3388         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3389             READ4res *, resp);
3390 }
3391
3392 static void
3393 rfs4_op_read_free(nfs_resop4 *resop)
3394 {
3395         READ4res        *resp = &resop->nfs_resop4_u.opread;
3396
3397         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3398                 freemsg(resp->mblk);
3399                 resp->mblk = NULL;
3400                 resp->data_val = NULL;
3401                 resp->data_len = 0;
3402         }
3403 }
3404
3405 static void
3406 rfs4_op_readdir_free(nfs_resop4 * resop)
3407 {
3408         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3409
3410         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3411                 freeb(resp->mblk);
3412                 resp->mblk = NULL;
3413                 resp->data_len = 0;
3414         }
3415 }
3416
3417
3418 /* ARGSUSED */
3419 static void
3420 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3421     struct compound_state *cs)
3422 {
3423         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3424         int             error;
3425         vnode_t         *vp;
3426         struct exportinfo *exi, *sav_exi;
3427         nfs_fh4_fmt_t   *fh_fmtp;
3428
3429         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3430
3431         if (cs->vp) {
3432                 VN_RELE(cs->vp);
3433                 cs->vp = NULL;
3434         }
3435
3436         if (cs->cr)
3437                 crfree(cs->cr);
3438
3439         cs->cr = crdup(cs->basecr);
3440
3441         vp = exi_public->exi_vp;
3442         if (vp == NULL) {
3443                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3444                 goto out;
3445         }
3446
3447         error = makefh4(&cs->fh, vp, exi_public);
3448         if (error != 0) {
3449                 *cs->statusp = resp->status = puterrno4(error);
3450                 goto out;
3451         }
3452         sav_exi = cs->exi;
3453         if (exi_public == exi_root) {
3454                 /*
3455                  * No filesystem is actually shared public, so we default
3456                  * to exi_root. In this case, we must check whether root
3457                  * is exported.
3458                  */
3459                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3460
3461                 /*
3462                  * if root filesystem is exported, the exportinfo struct that we
3463                  * should use is what checkexport4 returns, because root_exi is
3464                  * actually a mostly empty struct.
3465                  */
3466                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3467                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3468                 cs->exi = ((exi != NULL) ? exi : exi_public);
3469         } else {
3470                 /*
3471                  * it's a properly shared filesystem
3472                  */
3473                 cs->exi = exi_public;
3474         }
3475
3476         if (is_system_labeled()) {
3477                 bslabel_t *clabel;
3478
3479                 ASSERT(req->rq_label != NULL);
3480                 clabel = req->rq_label;
3481                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3482                     "got client label from request(1)",
3483                     struct svc_req *, req);
3484                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3485                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3486                             cs->exi)) {
3487                                 *cs->statusp = resp->status =
3488                                     NFS4ERR_SERVERFAULT;
3489                                 goto out;
3490                         }
3491                 }
3492         }
3493
3494         VN_HOLD(vp);
3495         cs->vp = vp;
3496
3497         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3498                 VN_RELE(cs->vp);
3499                 cs->vp = NULL;
3500                 cs->exi = sav_exi;
3501                 goto out;
3502         }
3503
3504         *cs->statusp = resp->status = NFS4_OK;
3505 out:
3506         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3507             PUTPUBFH4res *, resp);
3508 }
3509
3510 /*
3511  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3512  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3513  * or joe have restrictive search permissions, then we shouldn't let
3514  * the client get a file handle. This is easy to enforce. However, we
3515  * don't know what security flavor should be used until we resolve the
3516  * path name. Another complication is uid mapping. If root is
3517  * the user, then it will be mapped to the anonymous user by default,
3518  * but we won't know that till we've resolved the path name. And we won't
3519  * know what the anonymous user is.
3520  * Luckily, SECINFO is specified to take a full filename.
3521  * So what we will have to in rfs4_op_lookup is check that flavor of
3522  * the target object matches that of the request, and if root was the
3523  * caller, check for the root= and anon= options, and if necessary,
3524  * repeat the lookup using the right cred_t. But that's not done yet.
3525  */
3526 /* ARGSUSED */
3527 static void
3528 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3529     struct compound_state *cs)
3530 {
3531         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3532         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3533         nfs_fh4_fmt_t *fh_fmtp;
3534
3535         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3536             PUTFH4args *, args);
3537
3538         if (cs->vp) {
3539                 VN_RELE(cs->vp);
3540                 cs->vp = NULL;
3541         }
3542
3543         if (cs->cr) {
3544                 crfree(cs->cr);
3545                 cs->cr = NULL;
3546         }
3547
3548
3549         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3550                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3551                 goto out;
3552         }
3553
3554         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3555         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3556             NULL);
3557
3558         if (cs->exi == NULL) {
3559                 *cs->statusp = resp->status = NFS4ERR_STALE;
3560                 goto out;
3561         }
3562
3563         cs->cr = crdup(cs->basecr);
3564
3565         ASSERT(cs->cr != NULL);
3566
3567         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3568                 *cs->statusp = resp->status;
3569                 goto out;
3570         }
3571
3572         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3573                 VN_RELE(cs->vp);
3574                 cs->vp = NULL;
3575                 goto out;
3576         }
3577
3578         nfs_fh4_copy(&args->object, &cs->fh);
3579         *cs->statusp = resp->status = NFS4_OK;
3580         cs->deleg = FALSE;
3581
3582 out:
3583         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3584             PUTFH4res *, resp);
3585 }
3586
3587 /* ARGSUSED */
3588 static void
3589 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3590     struct compound_state *cs)
3591 {
3592         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3593         int error;
3594         fid_t fid;
3595         struct exportinfo *exi, *sav_exi;
3596
3597         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3598
3599         if (cs->vp) {
3600                 VN_RELE(cs->vp);
3601                 cs->vp = NULL;
3602         }
3603
3604         if (cs->cr)
3605                 crfree(cs->cr);
3606
3607         cs->cr = crdup(cs->basecr);
3608
3609         /*
3610          * Using rootdir, the system root vnode,
3611          * get its fid.
3612          */
3613         bzero(&fid, sizeof (fid));
3614         fid.fid_len = MAXFIDSZ;
3615         error = vop_fid_pseudo(rootdir, &fid);
3616         if (error != 0) {
3617                 *cs->statusp = resp->status = puterrno4(error);
3618                 goto out;
3619         }
3620
3621         /*
3622          * Then use the root fsid & fid it to find out if it's exported
3623          *
3624          * If the server root isn't exported directly, then
3625          * it should at least be a pseudo export based on
3626          * one or more exports further down in the server's
3627          * file tree.
3628          */
3629         exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3630         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3631                 NFS4_DEBUG(rfs4_debug,
3632                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3633                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3634                 goto out;
3635         }
3636
3637         /*
3638          * Now make a filehandle based on the root
3639          * export and root vnode.
3640          */
3641         error = makefh4(&cs->fh, rootdir, exi);
3642         if (error != 0) {
3643                 *cs->statusp = resp->status = puterrno4(error);
3644                 goto out;
3645         }
3646
3647         sav_exi = cs->exi;
3648         cs->exi = exi;
3649
3650         VN_HOLD(rootdir);
3651         cs->vp = rootdir;
3652
3653         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3654                 VN_RELE(rootdir);
3655                 cs->vp = NULL;
3656                 cs->exi = sav_exi;
3657                 goto out;
3658         }
3659
3660         *cs->statusp = resp->status = NFS4_OK;
3661         cs->deleg = FALSE;
3662 out:
3663         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3664             PUTROOTFH4res *, resp);
3665 }
3666
3667 /*
3668  * A directory entry is a valid nfsv4 entry if
3669  * - it has a non-zero ino
3670  * - it is not a dot or dotdot name
3671  * - it is visible in a pseudo export or in a real export that can
3672  *   only have a limited view.
3673  */
3674 static bool_t
3675 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3676     int *expseudo, int check_visible)
3677 {
3678         if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3679                 *expseudo = 0;
3680                 return (FALSE);
3681         }
3682
3683         if (! check_visible) {
3684                 *expseudo = 0;
3685                 return (TRUE);
3686         }
3687
3688         return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3689 }
3690
3691 /*
3692  * set_rdattr_params sets up the variables used to manage what information
3693  * to get for each directory entry.
3694  */
3695 static nfsstat4
3696 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3697     bitmap4 attrs, bool_t *need_to_lookup)
3698 {
3699         uint_t  va_mask;
3700         nfsstat4 status;
3701         bitmap4 objbits;
3702
3703         status = bitmap4_to_attrmask(attrs, sargp);
3704         if (status != NFS4_OK) {
3705                 /*
3706                  * could not even figure attr mask
3707                  */
3708                 return (status);
3709         }
3710         va_mask = sargp->vap->va_mask;
3711
3712         /*
3713          * dirent's d_ino is always correct value for mounted_on_fileid.
3714          * mntdfid_set is set once here, but mounted_on_fileid is
3715          * set in main dirent processing loop for each dirent.
3716          * The mntdfid_set is a simple optimization that lets the
3717          * server attr code avoid work when caller is readdir.
3718          */
3719         sargp->mntdfid_set = TRUE;
3720
3721         /*
3722          * Lookup entry only if client asked for any of the following:
3723          * a) vattr attrs
3724          * b) vfs attrs
3725          * c) attrs w/per-object scope requested (change, filehandle, etc)
3726          *    other than mounted_on_fileid (which we can take from dirent)
3727          */
3728         objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3729
3730         if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3731                 *need_to_lookup = TRUE;
3732         else
3733                 *need_to_lookup = FALSE;
3734
3735         if (sargp->sbp == NULL)
3736                 return (NFS4_OK);
3737
3738         /*
3739          * If filesystem attrs are requested, get them now from the
3740          * directory vp, as most entries will have same filesystem. The only
3741          * exception are mounted over entries but we handle
3742          * those as we go (XXX mounted over detection not yet implemented).
3743          */
3744         sargp->vap->va_mask = 0;        /* to avoid VOP_GETATTR */
3745         status = bitmap4_get_sysattrs(sargp);
3746         sargp->vap->va_mask = va_mask;
3747
3748         if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3749                 /*
3750                  * Failed to get filesystem attributes.
3751                  * Return a rdattr_error for each entry, but don't fail.
3752                  * However, don't get any obj-dependent attrs.
3753                  */
3754                 sargp->rdattr_error = status;   /* for rdattr_error */
3755                 *need_to_lookup = FALSE;
3756                 /*
3757                  * At least get fileid for regular readdir output
3758                  */
3759                 sargp->vap->va_mask &= AT_NODEID;
3760                 status = NFS4_OK;
3761         }
3762
3763         return (status);
3764 }
3765
3766 /*
3767  * readlink: args: CURRENT_FH.
3768  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3769  */
3770
3771 /* ARGSUSED */
3772 static void
3773 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3774     struct compound_state *cs)
3775 {
3776         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3777         int error;
3778         vnode_t *vp;
3779         struct iovec iov;
3780         struct vattr va;
3781         struct uio uio;
3782         char *data;
3783         struct sockaddr *ca;
3784         char *name = NULL;
3785         int is_referral;
3786
3787         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3788
3789         /* CURRENT_FH: directory */
3790         vp = cs->vp;
3791         if (vp == NULL) {
3792                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3793                 goto out;
3794         }
3795
3796         if (cs->access == CS_ACCESS_DENIED) {
3797                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3798                 goto out;
3799         }
3800
3801         /* Is it a referral? */
3802         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3803
3804                 is_referral = 1;
3805
3806         } else {
3807
3808                 is_referral = 0;
3809
3810                 if (vp->v_type == VDIR) {
3811                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3812                         goto out;
3813                 }
3814
3815                 if (vp->v_type != VLNK) {
3816                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3817                         goto out;
3818                 }
3819
3820         }
3821
3822         va.va_mask = AT_MODE;
3823         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3824         if (error) {
3825                 *cs->statusp = resp->status = puterrno4(error);
3826                 goto out;
3827         }
3828
3829         if (MANDLOCK(vp, va.va_mode)) {
3830                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3831                 goto out;
3832         }
3833
3834         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3835
3836         if (is_referral) {
3837                 char *s;
3838                 size_t strsz;
3839
3840                 /* Get an artificial symlink based on a referral */
3841                 s = build_symlink(vp, cs->cr, &strsz);
3842                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3843                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3844                     vnode_t *, vp, char *, s);
3845                 if (s == NULL)
3846                         error = EINVAL;
3847                 else {
3848                         error = 0;
3849                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3850                         kmem_free(s, strsz);
3851                 }
3852
3853         } else {
3854
3855                 iov.iov_base = data;
3856                 iov.iov_len = MAXPATHLEN;
3857                 uio.uio_iov = &iov;
3858                 uio.uio_iovcnt = 1;
3859                 uio.uio_segflg = UIO_SYSSPACE;
3860                 uio.uio_extflg = UIO_COPY_CACHED;
3861                 uio.uio_loffset = 0;
3862                 uio.uio_resid = MAXPATHLEN;
3863
3864                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3865
3866                 if (!error)
3867                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3868         }
3869
3870         if (error) {
3871                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3872                 *cs->statusp = resp->status = puterrno4(error);
3873                 goto out;
3874         }
3875
3876         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3877         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3878             MAXPATHLEN  + 1);
3879
3880         if (name == NULL) {
3881                 /*
3882                  * Even though the conversion failed, we return
3883                  * something. We just don't translate it.
3884                  */
3885                 name = data;
3886         }
3887
3888         /*
3889          * treat link name as data
3890          */
3891         (void) str_to_utf8(name, &resp->link);
3892
3893         if (name != data)
3894                 kmem_free(name, MAXPATHLEN + 1);
3895         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3896         *cs->statusp = resp->status = NFS4_OK;
3897
3898 out:
3899         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3900             READLINK4res *, resp);
3901 }
3902
3903 static void
3904 rfs4_op_readlink_free(nfs_resop4 *resop)
3905 {
3906         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3907         utf8string *symlink = &resp->link;
3908
3909         if (symlink->utf8string_val) {
3910                 UTF8STRING_FREE(*symlink)
3911         }
3912 }
3913
3914 /*
3915  * release_lockowner:
3916  *      Release any state associated with the supplied
3917  *      lockowner. Note if any lo_state is holding locks we will not
3918  *      rele that lo_state and thus the lockowner will not be destroyed.
3919  *      A client using lock after the lock owner stateid has been released
3920  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3921  *      to reissue the lock with new_lock_owner set to TRUE.
3922  *      args: lock_owner
3923  *      res:  status
3924  */
3925 /* ARGSUSED */
3926 static void
3927 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3928     struct svc_req *req, struct compound_state *cs)
3929 {
3930         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3931         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3932         rfs4_lockowner_t *lo;
3933         rfs4_openowner_t *oo;
3934         rfs4_state_t *sp;
3935         rfs4_lo_state_t *lsp;
3936         rfs4_client_t *cp;
3937         bool_t create = FALSE;
3938         locklist_t *llist;
3939         sysid_t sysid;
3940
3941         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3942             cs, RELEASE_LOCKOWNER4args *, ap);
3943
3944         /* Make sure there is a clientid around for this request */
3945         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3946
3947         if (cp == NULL) {
3948                 *cs->statusp = resp->status =
3949                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3950                 goto out;
3951         }
3952         rfs4_client_rele(cp);
3953
3954         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3955         if (lo == NULL) {
3956                 *cs->statusp = resp->status = NFS4_OK;
3957                 goto out;
3958         }
3959         ASSERT(lo->rl_client != NULL);
3960
3961         /*
3962          * Check for EXPIRED client. If so will reap state with in a lease
3963          * period or on next set_clientid_confirm step
3964          */
3965         if (rfs4_lease_expired(lo->rl_client)) {
3966                 rfs4_lockowner_rele(lo);
3967                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3968                 goto out;
3969         }
3970
3971         /*
3972          * If no sysid has been assigned, then no locks exist; just return.
3973          */
3974         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3975         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3976                 rfs4_lockowner_rele(lo);
3977                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3978                 goto out;
3979         }
3980
3981         sysid = lo->rl_client->rc_sysidt;
3982         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3983
3984         /*
3985          * Mark the lockowner invalid.
3986          */
3987         rfs4_dbe_hide(lo->rl_dbe);
3988
3989         /*
3990          * sysid-pid pair should now not be used since the lockowner is
3991          * invalid. If the client were to instantiate the lockowner again
3992          * it would be assigned a new pid. Thus we can get the list of
3993          * current locks.
3994          */
3995
3996         llist = flk_get_active_locks(sysid, lo->rl_pid);
3997         /* If we are still holding locks fail */
3998         if (llist != NULL) {
3999
4000                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4001
4002                 flk_free_locklist(llist);
4003                 /*
4004                  * We need to unhide the lockowner so the client can
4005                  * try it again. The bad thing here is if the client
4006                  * has a logic error that took it here in the first place
4007                  * he probably has lost accounting of the locks that it
4008                  * is holding. So we may have dangling state until the
4009                  * open owner state is reaped via close. One scenario
4010                  * that could possibly occur is that the client has
4011                  * sent the unlock request(s) in separate threads
4012                  * and has not waited for the replies before sending the
4013                  * RELEASE_LOCKOWNER request. Presumably, it would expect
4014                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4015                  * reissuing the request.
4016                  */
4017                 rfs4_dbe_unhide(lo->rl_dbe);
4018                 rfs4_lockowner_rele(lo);
4019                 goto out;
4020         }
4021
4022         /*
4023          * For the corresponding client we need to check each open
4024          * owner for any opens that have lockowner state associated
4025          * with this lockowner.
4026          */
4027
4028         rfs4_dbe_lock(lo->rl_client->rc_dbe);
4029         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4030             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4031
4032                 rfs4_dbe_lock(oo->ro_dbe);
4033                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4034                     sp = list_next(&oo->ro_statelist, sp)) {
4035
4036                         rfs4_dbe_lock(sp->rs_dbe);
4037                         for (lsp = list_head(&sp->rs_lostatelist);
4038                             lsp != NULL;
4039                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4040                                 if (lsp->rls_locker == lo) {
4041                                         rfs4_dbe_lock(lsp->rls_dbe);
4042                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4043                                         rfs4_dbe_unlock(lsp->rls_dbe);
4044                                 }
4045                         }
4046                         rfs4_dbe_unlock(sp->rs_dbe);
4047                 }
4048                 rfs4_dbe_unlock(oo->ro_dbe);
4049         }
4050         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4051
4052         rfs4_lockowner_rele(lo);
4053
4054         *cs->statusp = resp->status = NFS4_OK;
4055
4056 out:
4057         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4058             cs, RELEASE_LOCKOWNER4res *, resp);
4059 }
4060
4061 /*
4062  * short utility function to lookup a file and recall the delegation
4063  */
4064 static rfs4_file_t *
4065 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4066     int *lkup_error, cred_t *cr)
4067 {
4068         vnode_t *vp;
4069         rfs4_file_t *fp = NULL;
4070         bool_t fcreate = FALSE;
4071         int error;
4072
4073         if (vpp)
4074                 *vpp = NULL;
4075
4076         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4077             NULL)) == 0) {
4078                 if (vp->v_type == VREG)
4079                         fp = rfs4_findfile(vp, NULL, &fcreate);
4080                 if (vpp)
4081                         *vpp = vp;
4082                 else
4083                         VN_RELE(vp);
4084         }
4085
4086         if (lkup_error)
4087                 *lkup_error = error;
4088
4089         return (fp);
4090 }
4091
4092 /*
4093  * remove: args: CURRENT_FH: directory; name.
4094  *      res: status. If success - CURRENT_FH unchanged, return change_info
4095  *              for directory.
4096  */
4097 /* ARGSUSED */
4098 static void
4099 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4100     struct compound_state *cs)
4101 {
4102         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4103         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4104         int error;
4105         vnode_t *dvp, *vp;
4106         struct vattr bdva, idva, adva;
4107         char *nm;
4108         uint_t len;
4109         rfs4_file_t *fp;
4110         int in_crit = 0;
4111         bslabel_t *clabel;
4112         struct sockaddr *ca;
4113         char *name = NULL;
4114         nfsstat4 status;
4115
4116         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4117             REMOVE4args *, args);
4118
4119         /* CURRENT_FH: directory */
4120         dvp = cs->vp;
4121         if (dvp == NULL) {
4122                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4123                 goto out;
4124         }
4125
4126         if (cs->access == CS_ACCESS_DENIED) {
4127                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4128                 goto out;
4129         }
4130
4131         /*
4132          * If there is an unshared filesystem mounted on this vnode,
4133          * Do not allow to remove anything in this directory.
4134          */
4135         if (vn_ismntpt(dvp)) {
4136                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4137                 goto out;
4138         }
4139
4140         if (dvp->v_type != VDIR) {
4141                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4142                 goto out;
4143         }
4144
4145         status = utf8_dir_verify(&args->target);
4146         if (status != NFS4_OK) {
4147                 *cs->statusp = resp->status = status;
4148                 goto out;
4149         }
4150
4151         /*
4152          * Lookup the file so that we can check if it's a directory
4153          */
4154         nm = utf8_to_fn(&args->target, &len, NULL);
4155         if (nm == NULL) {
4156                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4157                 goto out;
4158         }
4159
4160         if (len > MAXNAMELEN) {
4161                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4162                 kmem_free(nm, len);
4163                 goto out;
4164         }
4165
4166         if (rdonly4(cs->exi, cs->vp, req)) {
4167                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4168                 kmem_free(nm, len);
4169                 goto out;
4170         }
4171
4172         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4173         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4174             MAXPATHLEN  + 1);
4175
4176         if (name == NULL) {
4177                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4178                 kmem_free(nm, len);
4179                 goto out;
4180         }
4181
4182         /*
4183          * Lookup the file to determine type and while we are see if
4184          * there is a file struct around and check for delegation.
4185          * We don't need to acquire va_seq before this lookup, if
4186          * it causes an update, cinfo.before will not match, which will
4187          * trigger a cache flush even if atomic is TRUE.
4188          */
4189         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4190                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4191                     NULL)) {
4192                         VN_RELE(vp);
4193                         rfs4_file_rele(fp);
4194                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4195                         if (nm != name)
4196                                 kmem_free(name, MAXPATHLEN + 1);
4197                         kmem_free(nm, len);
4198                         goto out;
4199                 }
4200         }
4201
4202         /* Didn't find anything to remove */
4203         if (vp == NULL) {
4204                 *cs->statusp = resp->status = error;
4205                 if (nm != name)
4206                         kmem_free(name, MAXPATHLEN + 1);
4207                 kmem_free(nm, len);
4208                 goto out;
4209         }
4210
4211         if (nbl_need_check(vp)) {
4212                 nbl_start_crit(vp, RW_READER);
4213                 in_crit = 1;
4214                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4215                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4216                         if (nm != name)
4217                                 kmem_free(name, MAXPATHLEN + 1);
4218                         kmem_free(nm, len);
4219                         nbl_end_crit(vp);
4220                         VN_RELE(vp);
4221                         if (fp) {
4222                                 rfs4_clear_dont_grant(fp);
4223                                 rfs4_file_rele(fp);
4224                         }
4225                         goto out;
4226                 }
4227         }
4228
4229         /* check label before allowing removal */
4230         if (is_system_labeled()) {
4231                 ASSERT(req->rq_label != NULL);
4232                 clabel = req->rq_label;
4233                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4234                     "got client label from request(1)",
4235                     struct svc_req *, req);
4236                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4237                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4238                             cs->exi)) {
4239                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4240                                 if (name != nm)
4241                                         kmem_free(name, MAXPATHLEN + 1);
4242                                 kmem_free(nm, len);
4243                                 if (in_crit)
4244                                         nbl_end_crit(vp);
4245                                 VN_RELE(vp);
4246                                 if (fp) {
4247                                         rfs4_clear_dont_grant(fp);
4248                                         rfs4_file_rele(fp);
4249                                 }
4250                                 goto out;
4251                         }
4252                 }
4253         }
4254
4255         /* Get dir "before" change value */
4256         bdva.va_mask = AT_CTIME|AT_SEQ;
4257         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4258         if (error) {
4259                 *cs->statusp = resp->status = puterrno4(error);
4260                 if (nm != name)
4261                         kmem_free(name, MAXPATHLEN + 1);
4262                 kmem_free(nm, len);
4263                 if (in_crit)
4264                         nbl_end_crit(vp);
4265                 VN_RELE(vp);
4266                 if (fp) {
4267                         rfs4_clear_dont_grant(fp);
4268                         rfs4_file_rele(fp);
4269                 }
4270                 goto out;
4271         }
4272         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4273
4274         /* Actually do the REMOVE operation */
4275         if (vp->v_type == VDIR) {
4276                 /*
4277                  * Can't remove a directory that has a mounted-on filesystem.
4278                  */
4279                 if (vn_ismntpt(vp)) {
4280                         error = EACCES;
4281                 } else {
4282                         /*
4283                          * System V defines rmdir to return EEXIST,
4284                          * not ENOTEMPTY, if the directory is not
4285                          * empty.  A System V NFS server needs to map
4286                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4287                          * transmit over the wire.
4288                          */
4289                         if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4290                             NULL, 0)) == EEXIST)
4291                                 error = ENOTEMPTY;
4292                 }
4293         } else {
4294                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4295                     fp != NULL) {
4296                         struct vattr va;
4297                         vnode_t *tvp;
4298
4299                         rfs4_dbe_lock(fp->rf_dbe);
4300                         tvp = fp->rf_vp;
4301                         if (tvp)
4302                                 VN_HOLD(tvp);
4303                         rfs4_dbe_unlock(fp->rf_dbe);
4304
4305                         if (tvp) {
4306                                 /*
4307                                  * This is va_seq safe because we are not
4308                                  * manipulating dvp.
4309                                  */
4310                                 va.va_mask = AT_NLINK;
4311                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4312                                     va.va_nlink == 0) {
4313                                         /* Remove state on file remove */
4314                                         if (in_crit) {
4315                                                 nbl_end_crit(vp);
4316                                                 in_crit = 0;
4317                                         }
4318                                         rfs4_close_all_state(fp);
4319                                 }
4320                                 VN_RELE(tvp);
4321                         }
4322                 }
4323         }
4324
4325         if (in_crit)
4326                 nbl_end_crit(vp);
4327         VN_RELE(vp);
4328
4329         if (fp) {
4330                 rfs4_clear_dont_grant(fp);
4331                 rfs4_file_rele(fp);
4332         }
4333         if (nm != name)
4334                 kmem_free(name, MAXPATHLEN + 1);
4335         kmem_free(nm, len);
4336
4337         if (error) {
4338                 *cs->statusp = resp->status = puterrno4(error);
4339                 goto out;
4340         }
4341
4342         /*
4343          * Get the initial "after" sequence number, if it fails, set to zero
4344          */
4345         idva.va_mask = AT_SEQ;
4346         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4347                 idva.va_seq = 0;
4348
4349         /*
4350          * Force modified data and metadata out to stable storage.
4351          */
4352         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4353
4354         /*
4355          * Get "after" change value, if it fails, simply return the
4356          * before value.
4357          */
4358         adva.va_mask = AT_CTIME|AT_SEQ;
4359         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4360                 adva.va_ctime = bdva.va_ctime;
4361                 adva.va_seq = 0;
4362         }
4363
4364         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4365
4366         /*
4367          * The cinfo.atomic = TRUE only if we have
4368          * non-zero va_seq's, and it has incremented by exactly one
4369          * during the VOP_REMOVE/RMDIR and it didn't change during
4370          * the VOP_FSYNC.
4371          */
4372         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4373             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4374                 resp->cinfo.atomic = TRUE;
4375         else
4376                 resp->cinfo.atomic = FALSE;
4377
4378         *cs->statusp = resp->status = NFS4_OK;
4379
4380 out:
4381         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4382             REMOVE4res *, resp);
4383 }
4384
4385 /*
4386  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4387  *              oldname and newname.
4388  *      res: status. If success - CURRENT_FH unchanged, return change_info
4389  *              for both from and target directories.
4390  */
4391 /* ARGSUSED */
4392 static void
4393 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4394     struct compound_state *cs)
4395 {
4396         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4397         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4398         int error;
4399         vnode_t *odvp;
4400         vnode_t *ndvp;
4401         vnode_t *srcvp, *targvp;
4402         struct vattr obdva, oidva, oadva;
4403         struct vattr nbdva, nidva, nadva;
4404         char *onm, *nnm;
4405         uint_t olen, nlen;
4406         rfs4_file_t *fp, *sfp;
4407         int in_crit_src, in_crit_targ;
4408         int fp_rele_grant_hold, sfp_rele_grant_hold;
4409         bslabel_t *clabel;
4410         struct sockaddr *ca;
4411         char *converted_onm = NULL;
4412         char *converted_nnm = NULL;
4413         nfsstat4 status;
4414
4415         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4416             RENAME4args *, args);
4417
4418         fp = sfp = NULL;
4419         srcvp = targvp = NULL;
4420         in_crit_src = in_crit_targ = 0;
4421         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4422
4423         /* CURRENT_FH: target directory */
4424         ndvp = cs->vp;
4425         if (ndvp == NULL) {
4426                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4427                 goto out;
4428         }
4429
4430         /* SAVED_FH: from directory */
4431         odvp = cs->saved_vp;
4432         if (odvp == NULL) {
4433                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4434                 goto out;
4435         }
4436
4437         if (cs->access == CS_ACCESS_DENIED) {
4438                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4439                 goto out;
4440         }
4441
4442         /*
4443          * If there is an unshared filesystem mounted on this vnode,
4444          * do not allow to rename objects in this directory.
4445          */
4446         if (vn_ismntpt(odvp)) {
4447                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4448                 goto out;
4449         }
4450
4451         /*
4452          * If there is an unshared filesystem mounted on this vnode,
4453          * do not allow to rename to this directory.
4454          */
4455         if (vn_ismntpt(ndvp)) {
4456                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4457                 goto out;
4458         }
4459
4460         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4461                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4462                 goto out;
4463         }
4464
4465         if (cs->saved_exi != cs->exi) {
4466                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4467                 goto out;
4468         }
4469
4470         status = utf8_dir_verify(&args->oldname);
4471         if (status != NFS4_OK) {
4472                 *cs->statusp = resp->status = status;
4473                 goto out;
4474         }
4475
4476         status = utf8_dir_verify(&args->newname);
4477         if (status != NFS4_OK) {
4478                 *cs->statusp = resp->status = status;
4479                 goto out;
4480         }
4481
4482         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4483         if (onm == NULL) {
4484                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4485                 goto out;
4486         }
4487         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4488         nlen = MAXPATHLEN + 1;
4489         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4490             nlen);
4491
4492         if (converted_onm == NULL) {
4493                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4494                 kmem_free(onm, olen);
4495                 goto out;
4496         }
4497
4498         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4499         if (nnm == NULL) {
4500                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4501                 if (onm != converted_onm)
4502                         kmem_free(converted_onm, MAXPATHLEN + 1);
4503                 kmem_free(onm, olen);
4504                 goto out;
4505         }
4506         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4507             MAXPATHLEN  + 1);
4508
4509         if (converted_nnm == NULL) {
4510                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4511                 kmem_free(nnm, nlen);
4512                 nnm = NULL;
4513                 if (onm != converted_onm)
4514                         kmem_free(converted_onm, MAXPATHLEN + 1);
4515                 kmem_free(onm, olen);
4516                 goto out;
4517         }
4518
4519
4520         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4521                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4522                 kmem_free(onm, olen);
4523                 kmem_free(nnm, nlen);
4524                 goto out;
4525         }
4526
4527
4528         if (rdonly4(cs->exi, cs->vp, req)) {
4529                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4530                 if (onm != converted_onm)
4531                         kmem_free(converted_onm, MAXPATHLEN + 1);
4532                 kmem_free(onm, olen);
4533                 if (nnm != converted_nnm)
4534                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4535                 kmem_free(nnm, nlen);
4536                 goto out;
4537         }
4538
4539         /* check label of the target dir */
4540         if (is_system_labeled()) {
4541                 ASSERT(req->rq_label != NULL);
4542                 clabel = req->rq_label;
4543                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4544                     "got client label from request(1)",
4545                     struct svc_req *, req);
4546                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4547                         if (!do_rfs_label_check(clabel, ndvp,
4548                             EQUALITY_CHECK, cs->exi)) {
4549                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4550                                 goto err_out;
4551                         }
4552                 }
4553         }
4554
4555         /*
4556          * Is the source a file and have a delegation?
4557          * We don't need to acquire va_seq before these lookups, if
4558          * it causes an update, cinfo.before will not match, which will
4559          * trigger a cache flush even if atomic is TRUE.
4560          */
4561         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4562             &error, cs->cr)) {
4563                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4564                     NULL)) {
4565                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4566                         goto err_out;
4567                 }
4568         }
4569
4570         if (srcvp == NULL) {
4571                 *cs->statusp = resp->status = puterrno4(error);
4572                 if (onm != converted_onm)
4573                         kmem_free(converted_onm, MAXPATHLEN + 1);
4574                 kmem_free(onm, olen);
4575                 if (nnm != converted_nnm)
4576                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4577                 kmem_free(nnm, nlen);
4578                 goto out;
4579         }
4580
4581         sfp_rele_grant_hold = 1;
4582
4583         /* Does the destination exist and a file and have a delegation? */
4584         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4585             NULL, cs->cr)) {
4586                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4587                     NULL)) {
4588                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4589                         goto err_out;
4590                 }
4591         }
4592         fp_rele_grant_hold = 1;
4593
4594
4595         /* Check for NBMAND lock on both source and target */
4596         if (nbl_need_check(srcvp)) {
4597                 nbl_start_crit(srcvp, RW_READER);
4598                 in_crit_src = 1;
4599                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4600                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4601                         goto err_out;
4602                 }
4603         }
4604
4605         if (targvp && nbl_need_check(targvp)) {
4606                 nbl_start_crit(targvp, RW_READER);
4607                 in_crit_targ = 1;
4608                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4609                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4610                         goto err_out;
4611                 }
4612         }
4613
4614         /* Get source "before" change value */
4615         obdva.va_mask = AT_CTIME|AT_SEQ;
4616         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4617         if (!error) {
4618                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4619                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4620         }
4621         if (error) {
4622                 *cs->statusp = resp->status = puterrno4(error);
4623                 goto err_out;
4624         }
4625
4626         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4627         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4628
4629         if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4630             cs->cr, NULL, 0)) == 0 && fp != NULL) {
4631                 struct vattr va;
4632                 vnode_t *tvp;
4633
4634                 rfs4_dbe_lock(fp->rf_dbe);
4635                 tvp = fp->rf_vp;
4636                 if (tvp)
4637                         VN_HOLD(tvp);
4638                 rfs4_dbe_unlock(fp->rf_dbe);
4639
4640                 if (tvp) {
4641                         va.va_mask = AT_NLINK;
4642                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4643                             va.va_nlink == 0) {
4644                                 /* The file is gone and so should the state */
4645                                 if (in_crit_targ) {
4646                                         nbl_end_crit(targvp);
4647                                         in_crit_targ = 0;
4648                                 }
4649                                 rfs4_close_all_state(fp);
4650                         }
4651                         VN_RELE(tvp);
4652                 }
4653         }
4654         if (error == 0)
4655                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4656
4657         if (in_crit_src)
4658                 nbl_end_crit(srcvp);
4659         if (srcvp)
4660                 VN_RELE(srcvp);
4661         if (in_crit_targ)
4662                 nbl_end_crit(targvp);
4663         if (targvp)
4664                 VN_RELE(targvp);
4665
4666         if (sfp) {
4667                 rfs4_clear_dont_grant(sfp);
4668                 rfs4_file_rele(sfp);
4669         }
4670         if (fp) {
4671                 rfs4_clear_dont_grant(fp);
4672                 rfs4_file_rele(fp);
4673         }
4674
4675         if (converted_onm != onm)
4676                 kmem_free(converted_onm, MAXPATHLEN + 1);
4677         kmem_free(onm, olen);
4678         if (converted_nnm != nnm)
4679                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4680         kmem_free(nnm, nlen);
4681
4682         /*
4683          * Get the initial "after" sequence number, if it fails, set to zero
4684          */
4685         oidva.va_mask = AT_SEQ;
4686         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4687                 oidva.va_seq = 0;
4688
4689         nidva.va_mask = AT_SEQ;
4690         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4691                 nidva.va_seq = 0;
4692
4693         /*
4694          * Force modified data and metadata out to stable storage.
4695          */
4696         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4697         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4698
4699         if (error) {
4700                 *cs->statusp = resp->status = puterrno4(error);
4701                 goto out;
4702         }
4703
4704         /*
4705          * Get "after" change values, if it fails, simply return the
4706          * before value.
4707          */
4708         oadva.va_mask = AT_CTIME|AT_SEQ;
4709         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4710                 oadva.va_ctime = obdva.va_ctime;
4711                 oadva.va_seq = 0;
4712         }
4713
4714         nadva.va_mask = AT_CTIME|AT_SEQ;
4715         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4716                 nadva.va_ctime = nbdva.va_ctime;
4717                 nadva.va_seq = 0;
4718         }
4719
4720         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4721         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4722
4723         /*
4724          * The cinfo.atomic = TRUE only if we have
4725          * non-zero va_seq's, and it has incremented by exactly one
4726          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4727          */
4728         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4729             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4730                 resp->source_cinfo.atomic = TRUE;
4731         else
4732                 resp->source_cinfo.atomic = FALSE;
4733
4734         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4735             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4736                 resp->target_cinfo.atomic = TRUE;
4737         else
4738                 resp->target_cinfo.atomic = FALSE;
4739
4740 #ifdef  VOLATILE_FH_TEST
4741         {
4742         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4743
4744         /*
4745          * Add the renamed file handle to the volatile rename list
4746          */
4747         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4748                 /* file handles may expire on rename */
4749                 vnode_t *vp;
4750
4751                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4752                 /*
4753                  * Already know that nnm will be a valid string
4754                  */
4755                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4756                     NULL, NULL, NULL);
4757                 kmem_free(nnm, nlen);
4758                 if (!error) {
4759                         add_volrnm_fh(cs->exi, vp);
4760                         VN_RELE(vp);
4761                 }
4762         }
4763         }
4764 #endif  /* VOLATILE_FH_TEST */
4765
4766         *cs->statusp = resp->status = NFS4_OK;
4767 out:
4768         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4769             RENAME4res *, resp);
4770         return;
4771
4772 err_out:
4773         if (onm != converted_onm)
4774                 kmem_free(converted_onm, MAXPATHLEN + 1);
4775         if (onm != NULL)
4776                 kmem_free(onm, olen);
4777         if (nnm != converted_nnm)
4778                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4779         if (nnm != NULL)
4780                 kmem_free(nnm, nlen);
4781
4782         if (in_crit_src) nbl_end_crit(srcvp);
4783         if (in_crit_targ) nbl_end_crit(targvp);
4784         if (targvp) VN_RELE(targvp);
4785         if (srcvp) VN_RELE(srcvp);
4786         if (sfp) {
4787                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4788                 rfs4_file_rele(sfp);
4789         }
4790         if (fp) {
4791                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4792                 rfs4_file_rele(fp);
4793         }
4794
4795         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4796             RENAME4res *, resp);
4797 }
4798
4799 /* ARGSUSED */
4800 static void
4801 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4802     struct compound_state *cs)
4803 {
4804         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4805         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4806         rfs4_client_t *cp;
4807
4808         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4809             RENEW4args *, args);
4810
4811         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4812                 *cs->statusp = resp->status =
4813                     rfs4_check_clientid(&args->clientid, 0);
4814                 goto out;
4815         }
4816
4817         if (rfs4_lease_expired(cp)) {
4818                 rfs4_client_rele(cp);
4819                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4820                 goto out;
4821         }
4822
4823         rfs4_update_lease(cp);
4824
4825         mutex_enter(cp->rc_cbinfo.cb_lock);
4826         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4827                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4828                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4829         } else {
4830                 *cs->statusp = resp->status = NFS4_OK;
4831         }
4832         mutex_exit(cp->rc_cbinfo.cb_lock);
4833
4834         rfs4_client_rele(cp);
4835
4836 out:
4837         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4838             RENEW4res *, resp);
4839 }
4840
4841 /* ARGSUSED */
4842 static void
4843 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4844     struct compound_state *cs)
4845 {
4846         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4847
4848         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4849
4850         /* No need to check cs->access - we are not accessing any object */
4851         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4852                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4853                 goto out;
4854         }
4855         if (cs->vp != NULL) {
4856                 VN_RELE(cs->vp);
4857         }
4858         cs->vp = cs->saved_vp;
4859         cs->saved_vp = NULL;
4860         cs->exi = cs->saved_exi;
4861         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4862         *cs->statusp = resp->status = NFS4_OK;
4863         cs->deleg = FALSE;
4864
4865 out:
4866         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4867             RESTOREFH4res *, resp);
4868 }
4869
4870 /* ARGSUSED */
4871 static void
4872 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4873     struct compound_state *cs)
4874 {
4875         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4876
4877         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4878
4879         /* No need to check cs->access - we are not accessing any object */
4880         if (cs->vp == NULL) {
4881                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4882                 goto out;
4883         }
4884         if (cs->saved_vp != NULL) {
4885                 VN_RELE(cs->saved_vp);
4886         }
4887         cs->saved_vp = cs->vp;
4888         VN_HOLD(cs->saved_vp);
4889         cs->saved_exi = cs->exi;
4890         /*
4891          * since SAVEFH is fairly rare, don't alloc space for its fh
4892          * unless necessary.
4893          */
4894         if (cs->saved_fh.nfs_fh4_val == NULL) {
4895                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4896         }
4897         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4898         *cs->statusp = resp->status = NFS4_OK;
4899
4900 out:
4901         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4902             SAVEFH4res *, resp);
4903 }
4904
4905 /*
4906  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4907  * return the bitmap of attrs that were set successfully. It is also
4908  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4909  * always be called only after rfs4_do_set_attrs().
4910  *
4911  * Verify that the attributes are same as the expected ones. sargp->vap
4912  * and sargp->sbp contain the input attributes as translated from fattr4.
4913  *
4914  * This function verifies only the attrs that correspond to a vattr or
4915  * vfsstat struct. That is because of the extra step needed to get the
4916  * corresponding system structs. Other attributes have already been set or
4917  * verified by do_rfs4_set_attrs.
4918  *
4919  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4920  */
4921 static int
4922 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4923     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4924 {
4925         int error, ret_error = 0;
4926         int i, k;
4927         uint_t sva_mask = sargp->vap->va_mask;
4928         uint_t vbit;
4929         union nfs4_attr_u *na;
4930         uint8_t *amap;
4931         bool_t getsb = ntovp->vfsstat;
4932
4933         if (sva_mask != 0) {
4934                 /*
4935                  * Okay to overwrite sargp->vap because we verify based
4936                  * on the incoming values.
4937                  */
4938                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4939                     sargp->cs->cr, NULL);
4940                 if (ret_error) {
4941                         if (resp == NULL)
4942                                 return (ret_error);
4943                         /*
4944                          * Must return bitmap of successful attrs
4945                          */
4946                         sva_mask = 0;   /* to prevent checking vap later */
4947                 } else {
4948                         /*
4949                          * Some file systems clobber va_mask. it is probably
4950                          * wrong of them to do so, nonethless we practice
4951                          * defensive coding.
4952                          * See bug id 4276830.
4953                          */
4954                         sargp->vap->va_mask = sva_mask;
4955                 }
4956         }
4957
4958         if (getsb) {
4959                 /*
4960                  * Now get the superblock and loop on the bitmap, as there is
4961                  * no simple way of translating from superblock to bitmap4.
4962                  */
4963                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4964                 if (ret_error) {
4965                         if (resp == NULL)
4966                                 goto errout;
4967                         getsb = FALSE;
4968                 }
4969         }
4970
4971         /*
4972          * Now loop and verify each attribute which getattr returned
4973          * whether it's the same as the input.
4974          */
4975         if (resp == NULL && !getsb && (sva_mask == 0))
4976                 goto errout;
4977
4978         na = ntovp->na;
4979         amap = ntovp->amap;
4980         k = 0;
4981         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4982                 k = *amap;
4983                 ASSERT(nfs4_ntov_map[k].nval == k);
4984                 vbit = nfs4_ntov_map[k].vbit;
4985
4986                 /*
4987                  * If vattr attribute but VOP_GETATTR failed, or it's
4988                  * superblock attribute but VFS_STATVFS failed, skip
4989                  */
4990                 if (vbit) {
4991                         if ((vbit & sva_mask) == 0)
4992                                 continue;
4993                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4994                         continue;
4995                 }
4996                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4997                 if (resp != NULL) {
4998                         if (error)
4999                                 ret_error = -1; /* not all match */
5000                         else    /* update response bitmap */
5001                                 *resp |= nfs4_ntov_map[k].fbit;
5002                         continue;
5003                 }
5004                 if (error) {
5005                         ret_error = -1; /* not all match */
5006                         break;
5007                 }
5008         }
5009 errout:
5010         return (ret_error);
5011 }
5012
5013 /*
5014  * Decode the attribute to be set/verified. If the attr requires a sys op
5015  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5016  * call the sv_getit function for it, because the sys op hasn't yet been done.
5017  * Return 0 for success, error code if failed.
5018  *
5019  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5020  */
5021 static int
5022 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5023     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5024 {
5025         int error = 0;
5026         bool_t set_later;
5027
5028         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5029
5030         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5031                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5032                 /*
5033                  * don't verify yet if a vattr or sb dependent attr,
5034                  * because we don't have their sys values yet.
5035                  * Will be done later.
5036                  */
5037                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5038                         /*
5039                          * ACLs are a special case, since setting the MODE
5040                          * conflicts with setting the ACL.  We delay setting
5041                          * the ACL until all other attributes have been set.
5042                          * The ACL gets set in do_rfs4_op_setattr().
5043                          */
5044                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5045                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5046                                     sargp, nap);
5047                                 if (error) {
5048                                         xdr_free(nfs4_ntov_map[k].xfunc,
5049                                             (caddr_t)nap);
5050                                 }
5051                         }
5052                 }
5053         } else {
5054 #ifdef  DEBUG
5055                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5056                     "decoding attribute %d\n", k);
5057 #endif
5058                 error = EINVAL;
5059         }
5060         if (!error && resp_bval && !set_later) {
5061                 *resp_bval |= nfs4_ntov_map[k].fbit;
5062         }
5063
5064         return (error);
5065 }
5066
5067 /*
5068  * Set vattr based on incoming fattr4 attrs - used by setattr.
5069  * Set response mask. Ignore any values that are not writable vattr attrs.
5070  */
5071 static nfsstat4
5072 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5073     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5074     nfs4_attr_cmd_t cmd)
5075 {
5076         int error = 0;
5077         int i;
5078         char *attrs = fattrp->attrlist4;
5079         uint32_t attrslen = fattrp->attrlist4_len;
5080         XDR xdr;
5081         nfsstat4 status = NFS4_OK;
5082         vnode_t *vp = cs->vp;
5083         union nfs4_attr_u *na;
5084         uint8_t *amap;
5085
5086 #ifndef lint
5087         /*
5088          * Make sure that maximum attribute number can be expressed as an
5089          * 8 bit quantity.
5090          */
5091         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5092 #endif
5093
5094         if (vp == NULL) {
5095                 if (resp)
5096                         *resp = 0;
5097                 return (NFS4ERR_NOFILEHANDLE);
5098         }
5099         if (cs->access == CS_ACCESS_DENIED) {
5100                 if (resp)
5101                         *resp = 0;
5102                 return (NFS4ERR_ACCESS);
5103         }
5104
5105         sargp->op = cmd;
5106         sargp->cs = cs;
5107         sargp->flag = 0;        /* may be set later */
5108         sargp->vap->va_mask = 0;
5109         sargp->rdattr_error = NFS4_OK;
5110         sargp->rdattr_error_req = FALSE;
5111         /* sargp->sbp is set by the caller */
5112
5113         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5114
5115         na = ntovp->na;
5116         amap = ntovp->amap;
5117
5118         /*
5119          * The following loop iterates on the nfs4_ntov_map checking
5120          * if the fbit is set in the requested bitmap.
5121          * If set then we process the arguments using the
5122          * rfs4_fattr4 conversion functions to populate the setattr
5123          * vattr and va_mask. Any settable attrs that are not using vattr
5124          * will be set in this loop.
5125          */
5126         for (i = 0; i < nfs4_ntov_map_size; i++) {
5127                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5128                         continue;
5129                 }
5130                 /*
5131                  * If setattr, must be a writable attr.
5132                  * If verify/nverify, must be a readable attr.
5133                  */
5134                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5135                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5136                         /*
5137                          * Client tries to set/verify an
5138                          * unsupported attribute, tries to set
5139                          * a read only attr or verify a write
5140                          * only one - error!
5141                          */
5142                         break;
5143                 }
5144                 /*
5145                  * Decode the attribute to set/verify
5146                  */
5147                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5148                     &xdr, resp ? resp : NULL, na);
5149                 if (error)
5150                         break;
5151                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5152                 na++;
5153                 (ntovp->attrcnt)++;
5154                 if (nfs4_ntov_map[i].vfsstat)
5155                         ntovp->vfsstat = TRUE;
5156         }
5157
5158         if (error != 0)
5159                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5160                     puterrno4(error));
5161         /* xdrmem_destroy(&xdrs); */    /* NO-OP */
5162         return (status);
5163 }
5164
5165 static nfsstat4
5166 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5167     stateid4 *stateid)
5168 {
5169         int error = 0;
5170         struct nfs4_svgetit_arg sarg;
5171         bool_t trunc;
5172
5173         nfsstat4 status = NFS4_OK;
5174         cred_t *cr = cs->cr;
5175         vnode_t *vp = cs->vp;
5176         struct nfs4_ntov_table ntov;
5177         struct statvfs64 sb;
5178         struct vattr bva;
5179         struct flock64 bf;
5180         int in_crit = 0;
5181         uint_t saved_mask = 0;
5182         caller_context_t ct;
5183
5184         *resp = 0;
5185         sarg.sbp = &sb;
5186         sarg.is_referral = B_FALSE;
5187         nfs4_ntov_table_init(&ntov);
5188         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5189             NFS4ATTR_SETIT);
5190         if (status != NFS4_OK) {
5191                 /*
5192                  * failed set attrs
5193                  */
5194                 goto done;
5195         }
5196         if ((sarg.vap->va_mask == 0) &&
5197             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5198                 /*
5199                  * no further work to be done
5200                  */
5201                 goto done;
5202         }
5203
5204         /*
5205          * If we got a request to set the ACL and the MODE, only
5206          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5207          * to change any other bits, along with setting an ACL,
5208          * gives NFS4ERR_INVAL.
5209          */
5210         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5211             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5212                 vattr_t va;
5213
5214                 va.va_mask = AT_MODE;
5215                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5216                 if (error) {
5217                         status = puterrno4(error);
5218                         goto done;
5219                 }
5220                 if ((sarg.vap->va_mode ^ va.va_mode) &
5221                     ~(VSUID | VSGID | VSVTX)) {
5222                         status = NFS4ERR_INVAL;
5223                         goto done;
5224                 }
5225         }
5226
5227         /* Check stateid only if size has been set */
5228         if (sarg.vap->va_mask & AT_SIZE) {
5229                 trunc = (sarg.vap->va_size == 0);
5230                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5231                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5232                 if (status != NFS4_OK)
5233                         goto done;
5234         } else {
5235                 ct.cc_sysid = 0;
5236                 ct.cc_pid = 0;
5237                 ct.cc_caller_id = nfs4_srv_caller_id;
5238                 ct.cc_flags = CC_DONTBLOCK;
5239         }
5240
5241         /* XXX start of possible race with delegations */
5242
5243         /*
5244          * We need to specially handle size changes because it is
5245          * possible for the client to create a file with read-only
5246          * modes, but with the file opened for writing. If the client
5247          * then tries to set the file size, e.g. ftruncate(3C),
5248          * fcntl(F_FREESP), the normal access checking done in
5249          * VOP_SETATTR would prevent the client from doing it even though
5250          * it should be allowed to do so.  To get around this, we do the
5251          * access checking for ourselves and use VOP_SPACE which doesn't
5252          * do the access checking.
5253          * Also the client should not be allowed to change the file
5254          * size if there is a conflicting non-blocking mandatory lock in
5255          * the region of the change.
5256          */
5257         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5258                 u_offset_t offset;
5259                 ssize_t length;
5260
5261                 /*
5262                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5263                  * before returning, sarg.vap->va_mask is used to
5264                  * generate the setattr reply bitmap.  We also clear
5265                  * AT_SIZE below before calling VOP_SPACE.  For both
5266                  * of these cases, the va_mask needs to be saved here
5267                  * and restored after calling VOP_SETATTR.
5268                  */
5269                 saved_mask = sarg.vap->va_mask;
5270
5271                 /*
5272                  * Check any possible conflict due to NBMAND locks.
5273                  * Get into critical region before VOP_GETATTR, so the
5274                  * size attribute is valid when checking conflicts.
5275                  */
5276                 if (nbl_need_check(vp)) {
5277                         nbl_start_crit(vp, RW_READER);
5278                         in_crit = 1;
5279                 }
5280
5281                 bva.va_mask = AT_UID|AT_SIZE;
5282                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5283                         status = puterrno4(error);
5284                         goto done;
5285                 }
5286
5287                 if (in_crit) {
5288                         if (sarg.vap->va_size < bva.va_size) {
5289                                 offset = sarg.vap->va_size;
5290                                 length = bva.va_size - sarg.vap->va_size;
5291                         } else {
5292                                 offset = bva.va_size;
5293                                 length = sarg.vap->va_size - bva.va_size;
5294                         }
5295                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5296                             &ct)) {
5297                                 status = NFS4ERR_LOCKED;
5298                                 goto done;
5299                         }
5300                 }
5301
5302                 if (crgetuid(cr) == bva.va_uid) {
5303                         sarg.vap->va_mask &= ~AT_SIZE;
5304                         bf.l_type = F_WRLCK;
5305                         bf.l_whence = 0;
5306                         bf.l_start = (off64_t)sarg.vap->va_size;
5307                         bf.l_len = 0;
5308                         bf.l_sysid = 0;
5309                         bf.l_pid = 0;
5310                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5311                             (offset_t)sarg.vap->va_size, cr, &ct);
5312                 }
5313         }
5314
5315         if (!error && sarg.vap->va_mask != 0)
5316                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5317
5318         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5319         if (saved_mask & AT_SIZE)
5320                 sarg.vap->va_mask |= AT_SIZE;
5321
5322         /*
5323          * If an ACL was being set, it has been delayed until now,
5324          * in order to set the mode (via the VOP_SETATTR() above) first.
5325          */
5326         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5327                 int i;
5328
5329                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5330                         if (ntov.amap[i] == FATTR4_ACL)
5331                                 break;
5332                 if (i < NFS4_MAXNUM_ATTRS) {
5333                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5334                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5335                         if (error == 0) {
5336                                 *resp |= FATTR4_ACL_MASK;
5337                         } else if (error == ENOTSUP) {
5338                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5339                                 status = NFS4ERR_ATTRNOTSUPP;
5340                                 goto done;
5341                         }
5342                 } else {
5343                         NFS4_DEBUG(rfs4_debug,
5344                             (CE_NOTE, "do_rfs4_op_setattr: "
5345                             "unable to find ACL in fattr4"));
5346                         error = EINVAL;
5347                 }
5348         }
5349
5350         if (error) {
5351                 /* check if a monitor detected a delegation conflict */
5352                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5353                         status = NFS4ERR_DELAY;
5354                 else
5355                         status = puterrno4(error);
5356
5357                 /*
5358                  * Set the response bitmap when setattr failed.
5359                  * If VOP_SETATTR partially succeeded, test by doing a
5360                  * VOP_GETATTR on the object and comparing the data
5361                  * to the setattr arguments.
5362                  */
5363                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5364         } else {
5365                 /*
5366                  * Force modified metadata out to stable storage.
5367                  */
5368                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5369                 /*
5370                  * Set response bitmap
5371                  */
5372                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5373         }
5374
5375 /* Return early and already have a NFSv4 error */
5376 done:
5377         /*
5378          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5379          * conversion sets both readable and writeable NFS4 attrs
5380          * for AT_MTIME and AT_ATIME.  The line below masks out
5381          * unrequested attrs from the setattr result bitmap.  This
5382          * is placed after the done: label to catch the ATTRNOTSUP
5383          * case.
5384          */
5385         *resp &= fattrp->attrmask;
5386
5387         if (in_crit)
5388                 nbl_end_crit(vp);
5389
5390         nfs4_ntov_table_free(&ntov, &sarg);
5391
5392         return (status);
5393 }
5394
5395 /* ARGSUSED */
5396 static void
5397 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5398     struct compound_state *cs)
5399 {
5400         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5401         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5402         bslabel_t *clabel;
5403
5404         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5405             SETATTR4args *, args);
5406
5407         if (cs->vp == NULL) {
5408                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5409                 goto out;
5410         }
5411
5412         /*
5413          * If there is an unshared filesystem mounted on this vnode,
5414          * do not allow to setattr on this vnode.
5415          */
5416         if (vn_ismntpt(cs->vp)) {
5417                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5418                 goto out;
5419         }
5420
5421         resp->attrsset = 0;
5422
5423         if (rdonly4(cs->exi, cs->vp, req)) {
5424                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5425                 goto out;
5426         }
5427
5428         /* check label before setting attributes */
5429         if (is_system_labeled()) {
5430                 ASSERT(req->rq_label != NULL);
5431                 clabel = req->rq_label;
5432                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5433                     "got client label from request(1)",
5434                     struct svc_req *, req);
5435                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5436                         if (!do_rfs_label_check(clabel, cs->vp,
5437                             EQUALITY_CHECK, cs->exi)) {
5438                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5439                                 goto out;
5440                         }
5441                 }
5442         }
5443
5444         *cs->statusp = resp->status =
5445             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5446             &args->stateid);
5447
5448 out:
5449         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5450             SETATTR4res *, resp);
5451 }
5452
5453 /* ARGSUSED */
5454 static void
5455 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5456     struct compound_state *cs)
5457 {
5458         /*
5459          * verify and nverify are exactly the same, except that nverify
5460          * succeeds when some argument changed, and verify succeeds when
5461          * when none changed.
5462          */
5463
5464         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5465         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5466
5467         int error;
5468         struct nfs4_svgetit_arg sarg;
5469         struct statvfs64 sb;
5470         struct nfs4_ntov_table ntov;
5471
5472         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5473             VERIFY4args *, args);
5474
5475         if (cs->vp == NULL) {
5476                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5477                 goto out;
5478         }
5479
5480         sarg.sbp = &sb;
5481         sarg.is_referral = B_FALSE;
5482         nfs4_ntov_table_init(&ntov);
5483         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5484             &sarg, &ntov, NFS4ATTR_VERIT);
5485         if (resp->status != NFS4_OK) {
5486                 /*
5487                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5488                  * so could return -1 for "no match".
5489                  */
5490                 if (resp->status == -1)
5491                         resp->status = NFS4ERR_NOT_SAME;
5492                 goto done;
5493         }
5494         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5495         switch (error) {
5496         case 0:
5497                 resp->status = NFS4_OK;
5498                 break;
5499         case -1:
5500                 resp->status = NFS4ERR_NOT_SAME;
5501                 break;
5502         default:
5503                 resp->status = puterrno4(error);
5504                 break;
5505         }
5506 done:
5507         *cs->statusp = resp->status;
5508         nfs4_ntov_table_free(&ntov, &sarg);
5509 out:
5510         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5511             VERIFY4res *, resp);
5512 }
5513
5514 /* ARGSUSED */
5515 static void
5516 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5517     struct compound_state *cs)
5518 {
5519         /*
5520          * verify and nverify are exactly the same, except that nverify
5521          * succeeds when some argument changed, and verify succeeds when
5522          * when none changed.
5523          */
5524
5525         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5526         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5527
5528         int error;
5529         struct nfs4_svgetit_arg sarg;
5530         struct statvfs64 sb;
5531         struct nfs4_ntov_table ntov;
5532
5533         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5534             NVERIFY4args *, args);
5535
5536         if (cs->vp == NULL) {
5537                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5538                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5539                     NVERIFY4res *, resp);
5540                 return;
5541         }
5542         sarg.sbp = &sb;
5543         sarg.is_referral = B_FALSE;
5544         nfs4_ntov_table_init(&ntov);
5545         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5546             &sarg, &ntov, NFS4ATTR_VERIT);
5547         if (resp->status != NFS4_OK) {
5548                 /*
5549                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5550                  * so could return -1 for "no match".
5551                  */
5552                 if (resp->status == -1)
5553                         resp->status = NFS4_OK;
5554                 goto done;
5555         }
5556         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5557         switch (error) {
5558         case 0:
5559                 resp->status = NFS4ERR_SAME;
5560                 break;
5561         case -1:
5562                 resp->status = NFS4_OK;
5563                 break;
5564         default:
5565                 resp->status = puterrno4(error);
5566                 break;
5567         }
5568 done:
5569         *cs->statusp = resp->status;
5570         nfs4_ntov_table_free(&ntov, &sarg);
5571
5572         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5573             NVERIFY4res *, resp);
5574 }
5575
5576 /*
5577  * XXX - This should live in an NFS header file.
5578  */
5579 #define MAX_IOVECS      12
5580
5581 /* ARGSUSED */
5582 static void
5583 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5584     struct compound_state *cs)
5585 {
5586         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5587         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5588         int error;
5589         vnode_t *vp;
5590         struct vattr bva;
5591         u_offset_t rlimit;
5592         struct uio uio;
5593         struct iovec iov[MAX_IOVECS];
5594         struct iovec *iovp;
5595         int iovcnt;
5596         int ioflag;
5597         cred_t *savecred, *cr;
5598         bool_t *deleg = &cs->deleg;
5599         nfsstat4 stat;
5600         int in_crit = 0;
5601         caller_context_t ct;
5602
5603         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5604             WRITE4args *, args);
5605
5606         vp = cs->vp;
5607         if (vp == NULL) {
5608                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5609                 goto out;
5610         }
5611         if (cs->access == CS_ACCESS_DENIED) {
5612                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5613                 goto out;
5614         }
5615
5616         cr = cs->cr;
5617
5618         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5619             deleg, TRUE, &ct)) != NFS4_OK) {
5620                 *cs->statusp = resp->status = stat;
5621                 goto out;
5622         }
5623
5624         /*
5625          * We have to enter the critical region before calling VOP_RWLOCK
5626          * to avoid a deadlock with ufs.
5627          */
5628         if (nbl_need_check(vp)) {
5629                 nbl_start_crit(vp, RW_READER);
5630                 in_crit = 1;
5631                 if (nbl_conflict(vp, NBL_WRITE,
5632                     args->offset, args->data_len, 0, &ct)) {
5633                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5634                         goto out;
5635                 }
5636         }
5637
5638         bva.va_mask = AT_MODE | AT_UID;
5639         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5640
5641         /*
5642          * If we can't get the attributes, then we can't do the
5643          * right access checking.  So, we'll fail the request.
5644          */
5645         if (error) {
5646                 *cs->statusp = resp->status = puterrno4(error);
5647                 goto out;
5648         }
5649
5650         if (rdonly4(cs->exi, cs->vp, req)) {
5651                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5652                 goto out;
5653         }
5654
5655         if (vp->v_type != VREG) {
5656                 *cs->statusp = resp->status =
5657                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5658                 goto out;
5659         }
5660
5661         if (crgetuid(cr) != bva.va_uid &&
5662             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5663                 *cs->statusp = resp->status = puterrno4(error);
5664                 goto out;
5665         }
5666
5667         if (MANDLOCK(vp, bva.va_mode)) {
5668                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5669                 goto out;
5670         }
5671
5672         if (args->data_len == 0) {
5673                 *cs->statusp = resp->status = NFS4_OK;
5674                 resp->count = 0;
5675                 resp->committed = args->stable;
5676                 resp->writeverf = Write4verf;
5677                 goto out;
5678         }
5679
5680         if (args->mblk != NULL) {
5681                 mblk_t *m;
5682                 uint_t bytes, round_len;
5683
5684                 iovcnt = 0;
5685                 bytes = 0;
5686                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5687                 for (m = args->mblk;
5688                     m != NULL && bytes < round_len;
5689                     m = m->b_cont) {
5690                         iovcnt++;
5691                         bytes += MBLKL(m);
5692                 }
5693 #ifdef DEBUG
5694                 /* should have ended on an mblk boundary */
5695                 if (bytes != round_len) {
5696                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5697                             bytes, round_len, args->data_len);
5698                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5699                             (void *)args->mblk, (void *)m);
5700                         ASSERT(bytes == round_len);
5701                 }
5702 #endif
5703                 if (iovcnt <= MAX_IOVECS) {
5704                         iovp = iov;
5705                 } else {
5706                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5707                 }
5708                 mblk_to_iov(args->mblk, iovcnt, iovp);
5709         } else if (args->rlist != NULL) {
5710                 iovcnt = 1;
5711                 iovp = iov;
5712                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5713                 iovp->iov_len = args->data_len;
5714         } else {
5715                 iovcnt = 1;
5716                 iovp = iov;
5717                 iovp->iov_base = args->data_val;
5718                 iovp->iov_len = args->data_len;
5719         }
5720
5721         uio.uio_iov = iovp;
5722         uio.uio_iovcnt = iovcnt;
5723
5724         uio.uio_segflg = UIO_SYSSPACE;
5725         uio.uio_extflg = UIO_COPY_DEFAULT;
5726         uio.uio_loffset = args->offset;
5727         uio.uio_resid = args->data_len;
5728         uio.uio_llimit = curproc->p_fsz_ctl;
5729         rlimit = uio.uio_llimit - args->offset;
5730         if (rlimit < (u_offset_t)uio.uio_resid)
5731                 uio.uio_resid = (int)rlimit;
5732
5733         if (args->stable == UNSTABLE4)
5734                 ioflag = 0;
5735         else if (args->stable == FILE_SYNC4)
5736                 ioflag = FSYNC;
5737         else if (args->stable == DATA_SYNC4)
5738                 ioflag = FDSYNC;
5739         else {
5740                 if (iovp != iov)
5741                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5742                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5743                 goto out;
5744         }
5745
5746         /*
5747          * We're changing creds because VM may fault and we need
5748          * the cred of the current thread to be used if quota
5749          * checking is enabled.
5750          */
5751         savecred = curthread->t_cred;
5752         curthread->t_cred = cr;
5753         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5754         curthread->t_cred = savecred;
5755
5756         if (iovp != iov)
5757                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5758
5759         if (error) {
5760                 *cs->statusp = resp->status = puterrno4(error);
5761                 goto out;
5762         }
5763
5764         *cs->statusp = resp->status = NFS4_OK;
5765         resp->count = args->data_len - uio.uio_resid;
5766
5767         if (ioflag == 0)
5768                 resp->committed = UNSTABLE4;
5769         else
5770                 resp->committed = FILE_SYNC4;
5771
5772         resp->writeverf = Write4verf;
5773
5774 out:
5775         if (in_crit)
5776                 nbl_end_crit(vp);
5777
5778         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5779             WRITE4res *, resp);
5780 }
5781
5782
5783 /* XXX put in a header file */
5784 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5785
5786 void
5787 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5788     struct svc_req *req, cred_t *cr, int *rv)
5789 {
5790         uint_t i;
5791         struct compound_state cs;
5792
5793         if (rv != NULL)
5794                 *rv = 0;
5795         rfs4_init_compound_state(&cs);
5796         /*
5797          * Form a reply tag by copying over the reqeuest tag.
5798          */
5799         resp->tag.utf8string_val =
5800             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5801         resp->tag.utf8string_len = args->tag.utf8string_len;
5802         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5803             resp->tag.utf8string_len);
5804
5805         cs.statusp = &resp->status;
5806         cs.req = req;
5807
5808         /*
5809          * XXX for now, minorversion should be zero
5810          */
5811         if (args->minorversion != NFS4_MINORVERSION) {
5812                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5813                     &cs, COMPOUND4args *, args);
5814                 resp->array_len = 0;
5815                 resp->array = NULL;
5816                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5817                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5818                     &cs, COMPOUND4res *, resp);
5819                 return;
5820         }
5821
5822         ASSERT(exi == NULL);
5823         ASSERT(cr == NULL);
5824
5825         cr = crget();
5826         ASSERT(cr != NULL);
5827
5828         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5829                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5830                     &cs, COMPOUND4args *, args);
5831                 crfree(cr);
5832                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5833                     &cs, COMPOUND4res *, resp);
5834                 svcerr_badcred(req->rq_xprt);
5835                 if (rv != NULL)
5836                         *rv = 1;
5837                 return;
5838         }
5839         resp->array_len = args->array_len;
5840         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5841             KM_SLEEP);
5842
5843         cs.basecr = cr;
5844
5845         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5846             COMPOUND4args *, args);
5847
5848         /*
5849          * For now, NFS4 compound processing must be protected by
5850          * exported_lock because it can access more than one exportinfo
5851          * per compound and share/unshare can now change multiple
5852          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5853          * per proc (excluding public exinfo), and exi_count design
5854          * is sufficient to protect concurrent execution of NFS2/3
5855          * ops along with unexport.  This lock will be removed as
5856          * part of the NFSv4 phase 2 namespace redesign work.
5857          */
5858         rw_enter(&exported_lock, RW_READER);
5859
5860         /*
5861          * If this is the first compound we've seen, we need to start all
5862          * new instances' grace periods.
5863          */
5864         if (rfs4_seen_first_compound == 0) {
5865                 rfs4_grace_start_new();
5866                 /*
5867                  * This must be set after rfs4_grace_start_new(), otherwise
5868                  * another thread could proceed past here before the former
5869                  * is finished.
5870                  */
5871                 rfs4_seen_first_compound = 1;
5872         }
5873
5874         for (i = 0; i < args->array_len && cs.cont; i++) {
5875                 nfs_argop4 *argop;
5876                 nfs_resop4 *resop;
5877                 uint_t op;
5878
5879                 argop = &args->array[i];
5880                 resop = &resp->array[i];
5881                 resop->resop = argop->argop;
5882                 op = (uint_t)resop->resop;
5883
5884                 if (op < rfsv4disp_cnt) {
5885                         /*
5886                          * Count the individual ops here; NULL and COMPOUND
5887                          * are counted in common_dispatch()
5888                          */
5889                         rfsproccnt_v4_ptr[op].value.ui64++;
5890
5891                         NFS4_DEBUG(rfs4_debug > 1,
5892                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5893                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5894                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5895                             rfs4_op_string[op], *cs.statusp));
5896                         if (*cs.statusp != NFS4_OK)
5897                                 cs.cont = FALSE;
5898                 } else {
5899                         /*
5900                          * This is effectively dead code since XDR code
5901                          * will have already returned BADXDR if op doesn't
5902                          * decode to legal value.  This only done for a
5903                          * day when XDR code doesn't verify v4 opcodes.
5904                          */
5905                         op = OP_ILLEGAL;
5906                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5907
5908                         rfs4_op_illegal(argop, resop, req, &cs);
5909                         cs.cont = FALSE;
5910                 }
5911
5912                 /*
5913                  * If not at last op, and if we are to stop, then
5914                  * compact the results array.
5915                  */
5916                 if ((i + 1) < args->array_len && !cs.cont) {
5917                         nfs_resop4 *new_res = kmem_alloc(
5918                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5919                         bcopy(resp->array,
5920                             new_res, (i+1) * sizeof (nfs_resop4));
5921                         kmem_free(resp->array,
5922                             args->array_len * sizeof (nfs_resop4));
5923
5924                         resp->array_len =  i + 1;
5925                         resp->array = new_res;
5926                 }
5927         }
5928
5929         rw_exit(&exported_lock);
5930
5931         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5932             COMPOUND4res *, resp);
5933
5934         if (cs.vp)
5935                 VN_RELE(cs.vp);
5936         if (cs.saved_vp)
5937                 VN_RELE(cs.saved_vp);
5938         if (cs.saved_fh.nfs_fh4_val)
5939                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5940
5941         if (cs.basecr)
5942                 crfree(cs.basecr);
5943         if (cs.cr)
5944                 crfree(cs.cr);
5945         /*
5946          * done with this compound request, free the label
5947          */
5948
5949         if (req->rq_label != NULL) {
5950                 kmem_free(req->rq_label, sizeof (bslabel_t));
5951                 req->rq_label = NULL;
5952         }
5953 }
5954
5955 /*
5956  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5957  * XXX zero out the tag and array values. Need to investigate why the
5958  * XXX calls occur, but at least prevent the panic for now.
5959  */
5960 void
5961 rfs4_compound_free(COMPOUND4res *resp)
5962 {
5963         uint_t i;
5964
5965         if (resp->tag.utf8string_val) {
5966                 UTF8STRING_FREE(resp->tag)
5967         }
5968
5969         for (i = 0; i < resp->array_len; i++) {
5970                 nfs_resop4 *resop;
5971                 uint_t op;
5972
5973                 resop = &resp->array[i];
5974                 op = (uint_t)resop->resop;
5975                 if (op < rfsv4disp_cnt) {
5976                         (*rfsv4disptab[op].dis_resfree)(resop);
5977                 }
5978         }
5979         if (resp->array != NULL) {
5980                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5981         }
5982 }
5983
5984 /*
5985  * Process the value of the compound request rpc flags, as a bit-AND
5986  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5987  */
5988 void
5989 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5990 {
5991         int i;
5992         int flag = RPC_ALL;
5993
5994         for (i = 0; flag && i < args->array_len; i++) {
5995                 uint_t op;
5996
5997                 op = (uint_t)args->array[i].argop;
5998
5999                 if (op < rfsv4disp_cnt)
6000                         flag &= rfsv4disptab[op].dis_flags;
6001                 else
6002                         flag = 0;
6003         }
6004         *flagp = flag;
6005 }
6006
6007 nfsstat4
6008 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6009 {
6010         nfsstat4 e;
6011
6012         rfs4_dbe_lock(cp->rc_dbe);
6013
6014         if (cp->rc_sysidt != LM_NOSYSID) {
6015                 *sp = cp->rc_sysidt;
6016                 e = NFS4_OK;
6017
6018         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6019                 *sp = cp->rc_sysidt;
6020                 e = NFS4_OK;
6021
6022                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6023                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6024         } else
6025                 e = NFS4ERR_DELAY;
6026
6027         rfs4_dbe_unlock(cp->rc_dbe);
6028         return (e);
6029 }
6030
6031 #if defined(DEBUG) && ! defined(lint)
6032 static void lock_print(char *str, int operation, struct flock64 *flk)
6033 {
6034         char *op, *type;
6035
6036         switch (operation) {
6037         case F_GETLK: op = "F_GETLK";
6038                 break;
6039         case F_SETLK: op = "F_SETLK";
6040                 break;
6041         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6042                 break;
6043         default: op = "F_UNKNOWN";
6044                 break;
6045         }
6046         switch (flk->l_type) {
6047         case F_UNLCK: type = "F_UNLCK";
6048                 break;
6049         case F_RDLCK: type = "F_RDLCK";
6050                 break;
6051         case F_WRLCK: type = "F_WRLCK";
6052                 break;
6053         default: type = "F_UNKNOWN";
6054                 break;
6055         }
6056
6057         ASSERT(flk->l_whence == 0);
6058         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6059             str, op, type, (longlong_t)flk->l_start,
6060             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6061 }
6062
6063 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6064 #else
6065 #define LOCK_PRINT(d, s, t, f)
6066 #endif
6067
6068 /*ARGSUSED*/
6069 static bool_t
6070 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6071 {
6072         return (TRUE);
6073 }
6074
6075 /*
6076  * Look up the pathname using the vp in cs as the directory vnode.
6077  * cs->vp will be the vnode for the file on success
6078  */
6079
6080 static nfsstat4
6081 rfs4_lookup(component4 *component, struct svc_req *req,
6082     struct compound_state *cs)
6083 {
6084         char *nm;
6085         uint32_t len;
6086         nfsstat4 status;
6087         struct sockaddr *ca;
6088         char *name;
6089
6090         if (cs->vp == NULL) {
6091                 return (NFS4ERR_NOFILEHANDLE);
6092         }
6093         if (cs->vp->v_type != VDIR) {
6094                 return (NFS4ERR_NOTDIR);
6095         }
6096
6097         status = utf8_dir_verify(component);
6098         if (status != NFS4_OK)
6099                 return (status);
6100
6101         nm = utf8_to_fn(component, &len, NULL);
6102         if (nm == NULL) {
6103                 return (NFS4ERR_INVAL);
6104         }
6105
6106         if (len > MAXNAMELEN) {
6107                 kmem_free(nm, len);
6108                 return (NFS4ERR_NAMETOOLONG);
6109         }
6110
6111         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6112         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6113             MAXPATHLEN + 1);
6114
6115         if (name == NULL) {
6116                 kmem_free(nm, len);
6117                 return (NFS4ERR_INVAL);
6118         }
6119
6120         status = do_rfs4_op_lookup(name, req, cs);
6121
6122         if (name != nm)
6123                 kmem_free(name, MAXPATHLEN + 1);
6124
6125         kmem_free(nm, len);
6126
6127         return (status);
6128 }
6129
6130 static nfsstat4
6131 rfs4_lookupfile(component4 *component, struct svc_req *req,
6132     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6133 {
6134         nfsstat4 status;
6135         vnode_t *dvp = cs->vp;
6136         vattr_t bva, ava, fva;
6137         int error;
6138
6139         /* Get "before" change value */
6140         bva.va_mask = AT_CTIME|AT_SEQ;
6141         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6142         if (error)
6143                 return (puterrno4(error));
6144
6145         /* rfs4_lookup may VN_RELE directory */
6146         VN_HOLD(dvp);
6147
6148         status = rfs4_lookup(component, req, cs);
6149         if (status != NFS4_OK) {
6150                 VN_RELE(dvp);
6151                 return (status);
6152         }
6153
6154         /*
6155          * Get "after" change value, if it fails, simply return the
6156          * before value.
6157          */
6158         ava.va_mask = AT_CTIME|AT_SEQ;
6159         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6160                 ava.va_ctime = bva.va_ctime;
6161                 ava.va_seq = 0;
6162         }
6163         VN_RELE(dvp);
6164
6165         /*
6166          * Validate the file is a file
6167          */
6168         fva.va_mask = AT_TYPE|AT_MODE;
6169         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6170         if (error)
6171                 return (puterrno4(error));
6172
6173         if (fva.va_type != VREG) {
6174                 if (fva.va_type == VDIR)
6175                         return (NFS4ERR_ISDIR);
6176                 if (fva.va_type == VLNK)
6177                         return (NFS4ERR_SYMLINK);
6178                 return (NFS4ERR_INVAL);
6179         }
6180
6181         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6182         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6183
6184         /*
6185          * It is undefined if VOP_LOOKUP will change va_seq, so
6186          * cinfo.atomic = TRUE only if we have
6187          * non-zero va_seq's, and they have not changed.
6188          */
6189         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6190                 cinfo->atomic = TRUE;
6191         else
6192                 cinfo->atomic = FALSE;
6193
6194         /* Check for mandatory locking */
6195         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6196         return (check_open_access(access, cs, req));
6197 }
6198
6199 static nfsstat4
6200 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6201     timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6202 {
6203         int error;
6204         nfsstat4 status = NFS4_OK;
6205         vattr_t va;
6206
6207 tryagain:
6208
6209         /*
6210          * The file open mode used is VWRITE.  If the client needs
6211          * some other semantic, then it should do the access checking
6212          * itself.  It would have been nice to have the file open mode
6213          * passed as part of the arguments.
6214          */
6215
6216         *created = TRUE;
6217         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6218
6219         if (error) {
6220                 *created = FALSE;
6221
6222                 /*
6223                  * If we got something other than file already exists
6224                  * then just return this error.  Otherwise, we got
6225                  * EEXIST.  If we were doing a GUARDED create, then
6226                  * just return this error.  Otherwise, we need to
6227                  * make sure that this wasn't a duplicate of an
6228                  * exclusive create request.
6229                  *
6230                  * The assumption is made that a non-exclusive create
6231                  * request will never return EEXIST.
6232                  */
6233
6234                 if (error != EEXIST || mode == GUARDED4) {
6235                         status = puterrno4(error);
6236                         return (status);
6237                 }
6238                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6239                     NULL, NULL, NULL);
6240
6241                 if (error) {
6242                         /*
6243                          * We couldn't find the file that we thought that
6244                          * we just created.  So, we'll just try creating
6245                          * it again.
6246                          */
6247                         if (error == ENOENT)
6248                                 goto tryagain;
6249
6250                         status = puterrno4(error);
6251                         return (status);
6252                 }
6253
6254                 if (mode == UNCHECKED4) {
6255                         /* existing object must be regular file */
6256                         if ((*vpp)->v_type != VREG) {
6257                                 if ((*vpp)->v_type == VDIR)
6258                                         status = NFS4ERR_ISDIR;
6259                                 else if ((*vpp)->v_type == VLNK)
6260                                         status = NFS4ERR_SYMLINK;
6261                                 else
6262                                         status = NFS4ERR_INVAL;
6263                                 VN_RELE(*vpp);
6264                                 return (status);
6265                         }
6266
6267                         return (NFS4_OK);
6268                 }
6269
6270                 /* Check for duplicate request */
6271                 ASSERT(mtime != 0);
6272                 va.va_mask = AT_MTIME;
6273                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6274                 if (!error) {
6275                         /* We found the file */
6276                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6277                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6278                                 /* but its not our creation */
6279                                 VN_RELE(*vpp);
6280                                 return (NFS4ERR_EXIST);
6281                         }
6282                         *created = TRUE; /* retrans of create == created */
6283                         return (NFS4_OK);
6284                 }
6285                 VN_RELE(*vpp);
6286                 return (NFS4ERR_EXIST);
6287         }
6288
6289         return (NFS4_OK);
6290 }
6291
6292 static nfsstat4
6293 check_open_access(uint32_t access, struct compound_state *cs,
6294     struct svc_req *req)
6295 {
6296         int error;
6297         vnode_t *vp;
6298         bool_t readonly;
6299         cred_t *cr = cs->cr;
6300
6301         /* For now we don't allow mandatory locking as per V2/V3 */
6302         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6303                 return (NFS4ERR_ACCESS);
6304         }
6305
6306         vp = cs->vp;
6307         ASSERT(cr != NULL && vp->v_type == VREG);
6308
6309         /*
6310          * If the file system is exported read only and we are trying
6311          * to open for write, then return NFS4ERR_ROFS
6312          */
6313
6314         readonly = rdonly4(cs->exi, cs->vp, req);
6315
6316         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6317                 return (NFS4ERR_ROFS);
6318
6319         if (access & OPEN4_SHARE_ACCESS_READ) {
6320                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6321                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6322                         return (NFS4ERR_ACCESS);
6323                 }
6324         }
6325
6326         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6327                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6328                 if (error)
6329                         return (NFS4ERR_ACCESS);
6330         }
6331
6332         return (NFS4_OK);
6333 }
6334
6335 static nfsstat4
6336 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6337     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6338 {
6339         struct nfs4_svgetit_arg sarg;
6340         struct nfs4_ntov_table ntov;
6341
6342         bool_t ntov_table_init = FALSE;
6343         struct statvfs64 sb;
6344         nfsstat4 status;
6345         vnode_t *vp;
6346         vattr_t bva, ava, iva, cva, *vap;
6347         vnode_t *dvp;
6348         timespec32_t *mtime;
6349         char *nm = NULL;
6350         uint_t buflen;
6351         bool_t created;
6352         bool_t setsize = FALSE;
6353         len_t reqsize;
6354         int error;
6355         bool_t trunc;
6356         caller_context_t ct;
6357         component4 *component;
6358         bslabel_t *clabel;
6359         struct sockaddr *ca;
6360         char *name = NULL;
6361
6362         sarg.sbp = &sb;
6363         sarg.is_referral = B_FALSE;
6364
6365         dvp = cs->vp;
6366
6367         /* Check if the file system is read only */
6368         if (rdonly4(cs->exi, dvp, req))
6369                 return (NFS4ERR_ROFS);
6370
6371         /* check the label of including directory */
6372         if (is_system_labeled()) {
6373                 ASSERT(req->rq_label != NULL);
6374                 clabel = req->rq_label;
6375                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6376                     "got client label from request(1)",
6377                     struct svc_req *, req);
6378                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6379                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6380                             cs->exi)) {
6381                                 return (NFS4ERR_ACCESS);
6382                         }
6383                 }
6384         }
6385
6386         /*
6387          * Get the last component of path name in nm. cs will reference
6388          * the including directory on success.
6389          */
6390         component = &args->open_claim4_u.file;
6391         status = utf8_dir_verify(component);
6392         if (status != NFS4_OK)
6393                 return (status);
6394
6395         nm = utf8_to_fn(component, &buflen, NULL);
6396
6397         if (nm == NULL)
6398                 return (NFS4ERR_RESOURCE);
6399
6400         if (buflen > MAXNAMELEN) {
6401                 kmem_free(nm, buflen);
6402                 return (NFS4ERR_NAMETOOLONG);
6403         }
6404
6405         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6406         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6407         if (error) {
6408                 kmem_free(nm, buflen);
6409                 return (puterrno4(error));
6410         }
6411
6412         if (bva.va_type != VDIR) {
6413                 kmem_free(nm, buflen);
6414                 return (NFS4ERR_NOTDIR);
6415         }
6416
6417         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6418
6419         switch (args->mode) {
6420         case GUARDED4:
6421                 /*FALLTHROUGH*/
6422         case UNCHECKED4:
6423                 nfs4_ntov_table_init(&ntov);
6424                 ntov_table_init = TRUE;
6425
6426                 *attrset = 0;
6427                 status = do_rfs4_set_attrs(attrset,
6428                     &args->createhow4_u.createattrs,
6429                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6430
6431                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6432                     sarg.vap->va_type != VREG) {
6433                         if (sarg.vap->va_type == VDIR)
6434                                 status = NFS4ERR_ISDIR;
6435                         else if (sarg.vap->va_type == VLNK)
6436                                 status = NFS4ERR_SYMLINK;
6437                         else
6438                                 status = NFS4ERR_INVAL;
6439                 }
6440
6441                 if (status != NFS4_OK) {
6442                         kmem_free(nm, buflen);
6443                         nfs4_ntov_table_free(&ntov, &sarg);
6444                         *attrset = 0;
6445                         return (status);
6446                 }
6447
6448                 vap = sarg.vap;
6449                 vap->va_type = VREG;
6450                 vap->va_mask |= AT_TYPE;
6451
6452                 if ((vap->va_mask & AT_MODE) == 0) {
6453                         vap->va_mask |= AT_MODE;
6454                         vap->va_mode = (mode_t)0600;
6455                 }
6456
6457                 if (vap->va_mask & AT_SIZE) {
6458
6459                         /* Disallow create with a non-zero size */
6460
6461                         if ((reqsize = sarg.vap->va_size) != 0) {
6462                                 kmem_free(nm, buflen);
6463                                 nfs4_ntov_table_free(&ntov, &sarg);
6464                                 *attrset = 0;
6465                                 return (NFS4ERR_INVAL);
6466                         }
6467                         setsize = TRUE;
6468                 }
6469                 break;
6470
6471         case EXCLUSIVE4:
6472                 /* prohibit EXCL create of named attributes */
6473                 if (dvp->v_flag & V_XATTRDIR) {
6474                         kmem_free(nm, buflen);
6475                         *attrset = 0;
6476                         return (NFS4ERR_INVAL);
6477                 }
6478
6479                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6480                 cva.va_type = VREG;
6481                 /*
6482                  * Ensure no time overflows. Assumes underlying
6483                  * filesystem supports at least 32 bits.
6484                  * Truncate nsec to usec resolution to allow valid
6485                  * compares even if the underlying filesystem truncates.
6486                  */
6487                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6488                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6489                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6490                 cva.va_mode = (mode_t)0;
6491                 vap = &cva;
6492
6493                 /*
6494                  * For EXCL create, attrset is set to the server attr
6495                  * used to cache the client's verifier.
6496                  */
6497                 *attrset = FATTR4_TIME_MODIFY_MASK;
6498                 break;
6499         }
6500
6501         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6502         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6503             MAXPATHLEN  + 1);
6504
6505         if (name == NULL) {
6506                 kmem_free(nm, buflen);
6507                 return (NFS4ERR_SERVERFAULT);
6508         }
6509
6510         status = create_vnode(dvp, name, vap, args->mode, mtime,
6511             cs->cr, &vp, &created);
6512         if (nm != name)
6513                 kmem_free(name, MAXPATHLEN + 1);
6514         kmem_free(nm, buflen);
6515
6516         if (status != NFS4_OK) {
6517                 if (ntov_table_init)
6518                         nfs4_ntov_table_free(&ntov, &sarg);
6519                 *attrset = 0;
6520                 return (status);
6521         }
6522
6523         trunc = (setsize && !created);
6524
6525         if (args->mode != EXCLUSIVE4) {
6526                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6527
6528                 /*
6529                  * True verification that object was created with correct
6530                  * attrs is impossible.  The attrs could have been changed
6531                  * immediately after object creation.  If attributes did
6532                  * not verify, the only recourse for the server is to
6533                  * destroy the object.  Maybe if some attrs (like gid)
6534                  * are set incorrectly, the object should be destroyed;
6535                  * however, seems bad as a default policy.  Do we really
6536                  * want to destroy an object over one of the times not
6537                  * verifying correctly?  For these reasons, the server
6538                  * currently sets bits in attrset for createattrs
6539                  * that were set; however, no verification is done.
6540                  *
6541                  * vmask_to_nmask accounts for vattr bits set on create
6542                  *      [do_rfs4_set_attrs() only sets resp bits for
6543                  *       non-vattr/vfs bits.]
6544                  * Mask off any bits we set by default so as not to return
6545                  * more attrset bits than were requested in createattrs
6546                  */
6547                 if (created) {
6548                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6549                         *attrset &= createmask;
6550                 } else {
6551                         /*
6552                          * We did not create the vnode (we tried but it
6553                          * already existed).  In this case, the only createattr
6554                          * that the spec allows the server to set is size,
6555                          * and even then, it can only be set if it is 0.
6556                          */
6557                         *attrset = 0;
6558                         if (trunc)
6559                                 *attrset = FATTR4_SIZE_MASK;
6560                 }
6561         }
6562         if (ntov_table_init)
6563                 nfs4_ntov_table_free(&ntov, &sarg);
6564
6565         /*
6566          * Get the initial "after" sequence number, if it fails,
6567          * set to zero, time to before.
6568          */
6569         iva.va_mask = AT_CTIME|AT_SEQ;
6570         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6571                 iva.va_seq = 0;
6572                 iva.va_ctime = bva.va_ctime;
6573         }
6574
6575         /*
6576          * create_vnode attempts to create the file exclusive,
6577          * if it already exists the VOP_CREATE will fail and
6578          * may not increase va_seq. It is atomic if
6579          * we haven't changed the directory, but if it has changed
6580          * we don't know what changed it.
6581          */
6582         if (!created) {
6583                 if (bva.va_seq && iva.va_seq &&
6584                     bva.va_seq == iva.va_seq)
6585                         cinfo->atomic = TRUE;
6586                 else
6587                         cinfo->atomic = FALSE;
6588                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6589         } else {
6590                 /*
6591                  * The entry was created, we need to sync the
6592                  * directory metadata.
6593                  */
6594                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6595
6596                 /*
6597                  * Get "after" change value, if it fails, simply return the
6598                  * before value.
6599                  */
6600                 ava.va_mask = AT_CTIME|AT_SEQ;
6601                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6602                         ava.va_ctime = bva.va_ctime;
6603                         ava.va_seq = 0;
6604                 }
6605
6606                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6607
6608                 /*
6609                  * The cinfo->atomic = TRUE only if we have
6610                  * non-zero va_seq's, and it has incremented by exactly one
6611                  * during the create_vnode and it didn't
6612                  * change during the VOP_FSYNC.
6613                  */
6614                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6615                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6616                         cinfo->atomic = TRUE;
6617                 else
6618                         cinfo->atomic = FALSE;
6619         }
6620
6621         /* Check for mandatory locking and that the size gets set. */
6622         cva.va_mask = AT_MODE;
6623         if (setsize)
6624                 cva.va_mask |= AT_SIZE;
6625
6626         /* Assume the worst */
6627         cs->mandlock = TRUE;
6628
6629         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6630                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6631
6632                 /*
6633                  * Truncate the file if necessary; this would be
6634                  * the case for create over an existing file.
6635                  */
6636
6637                 if (trunc) {
6638                         int in_crit = 0;
6639                         rfs4_file_t *fp;
6640                         bool_t create = FALSE;
6641
6642                         /*
6643                          * We are writing over an existing file.
6644                          * Check to see if we need to recall a delegation.
6645                          */
6646                         rfs4_hold_deleg_policy();
6647                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6648                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6649                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6650                                         rfs4_file_rele(fp);
6651                                         rfs4_rele_deleg_policy();
6652                                         VN_RELE(vp);
6653                                         *attrset = 0;
6654                                         return (NFS4ERR_DELAY);
6655                                 }
6656                                 rfs4_file_rele(fp);
6657                         }
6658                         rfs4_rele_deleg_policy();
6659
6660                         if (nbl_need_check(vp)) {
6661                                 in_crit = 1;
6662
6663                                 ASSERT(reqsize == 0);
6664
6665                                 nbl_start_crit(vp, RW_READER);
6666                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6667                                     cva.va_size, 0, NULL)) {
6668                                         in_crit = 0;
6669                                         nbl_end_crit(vp);
6670                                         VN_RELE(vp);
6671                                         *attrset = 0;
6672                                         return (NFS4ERR_ACCESS);
6673                                 }
6674                         }
6675                         ct.cc_sysid = 0;
6676                         ct.cc_pid = 0;
6677                         ct.cc_caller_id = nfs4_srv_caller_id;
6678                         ct.cc_flags = CC_DONTBLOCK;
6679
6680                         cva.va_mask = AT_SIZE;
6681                         cva.va_size = reqsize;
6682                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6683                         if (in_crit)
6684                                 nbl_end_crit(vp);
6685                 }
6686         }
6687
6688         error = makefh4(&cs->fh, vp, cs->exi);
6689
6690         /*
6691          * Force modified data and metadata out to stable storage.
6692          */
6693         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6694
6695         if (error) {
6696                 VN_RELE(vp);
6697                 *attrset = 0;
6698                 return (puterrno4(error));
6699         }
6700
6701         /* if parent dir is attrdir, set namedattr fh flag */
6702         if (dvp->v_flag & V_XATTRDIR)
6703                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6704
6705         if (cs->vp)
6706                 VN_RELE(cs->vp);
6707
6708         cs->vp = vp;
6709
6710         /*
6711          * if we did not create the file, we will need to check
6712          * the access bits on the file
6713          */
6714
6715         if (!created) {
6716                 if (setsize)
6717                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6718                 status = check_open_access(args->share_access, cs, req);
6719                 if (status != NFS4_OK)
6720                         *attrset = 0;
6721         }
6722         return (status);
6723 }
6724
6725 /*ARGSUSED*/
6726 static void
6727 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6728     rfs4_openowner_t *oo, delegreq_t deleg,
6729     uint32_t access, uint32_t deny,
6730     OPEN4res *resp, int deleg_cur)
6731 {
6732         /* XXX Currently not using req  */
6733         rfs4_state_t *sp;
6734         rfs4_file_t *fp;
6735         bool_t screate = TRUE;
6736         bool_t fcreate = TRUE;
6737         uint32_t open_a, share_a;
6738         uint32_t open_d, share_d;
6739         rfs4_deleg_state_t *dsp;
6740         sysid_t sysid;
6741         nfsstat4 status;
6742         caller_context_t ct;
6743         int fflags = 0;
6744         int recall = 0;
6745         int err;
6746         int first_open;
6747
6748         /* get the file struct and hold a lock on it during initial open */
6749         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6750         if (fp == NULL) {
6751                 resp->status = NFS4ERR_RESOURCE;
6752                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6753                 return;
6754         }
6755
6756         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6757         if (sp == NULL) {
6758                 resp->status = NFS4ERR_RESOURCE;
6759                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6760                 /* No need to keep any reference */
6761                 rw_exit(&fp->rf_file_rwlock);
6762                 rfs4_file_rele(fp);
6763                 return;
6764         }
6765
6766         /* try to get the sysid before continuing */
6767         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6768                 resp->status = status;
6769                 rfs4_file_rele(fp);
6770                 /* Not a fully formed open; "close" it */
6771                 if (screate == TRUE)
6772                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6773                 rfs4_state_rele(sp);
6774                 return;
6775         }
6776
6777         /* Calculate the fflags for this OPEN. */
6778         if (access & OPEN4_SHARE_ACCESS_READ)
6779                 fflags |= FREAD;
6780         if (access & OPEN4_SHARE_ACCESS_WRITE)
6781                 fflags |= FWRITE;
6782
6783         rfs4_dbe_lock(sp->rs_dbe);
6784
6785         /*
6786          * Calculate the new deny and access mode that this open is adding to
6787          * the file for this open owner;
6788          */
6789         open_d = (deny & ~sp->rs_open_deny);
6790         open_a = (access & ~sp->rs_open_access);
6791
6792         /*
6793          * Calculate the new share access and share deny modes that this open
6794          * is adding to the file for this open owner;
6795          */
6796         share_a = (access & ~sp->rs_share_access);
6797         share_d = (deny & ~sp->rs_share_deny);
6798
6799         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6800
6801         /*
6802          * Check to see the client has already sent an open for this
6803          * open owner on this file with the same share/deny modes.
6804          * If so, we don't need to check for a conflict and we don't
6805          * need to add another shrlock.  If not, then we need to
6806          * check for conflicts in deny and access before checking for
6807          * conflicts in delegation.  We don't want to recall a
6808          * delegation based on an open that will eventually fail based
6809          * on shares modes.
6810          */
6811
6812         if (share_a || share_d) {
6813                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6814                         rfs4_dbe_unlock(sp->rs_dbe);
6815                         resp->status = err;
6816
6817                         rfs4_file_rele(fp);
6818                         /* Not a fully formed open; "close" it */
6819                         if (screate == TRUE)
6820                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6821                         rfs4_state_rele(sp);
6822                         return;
6823                 }
6824         }
6825
6826         rfs4_dbe_lock(fp->rf_dbe);
6827
6828         /*
6829          * Check to see if this file is delegated and if so, if a
6830          * recall needs to be done.
6831          */
6832         if (rfs4_check_recall(sp, access)) {
6833                 rfs4_dbe_unlock(fp->rf_dbe);
6834                 rfs4_dbe_unlock(sp->rs_dbe);
6835                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6836                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6837                 rfs4_dbe_lock(sp->rs_dbe);
6838
6839                 /* if state closed while lock was dropped */
6840                 if (sp->rs_closed) {
6841                         if (share_a || share_d)
6842                                 (void) rfs4_unshare(sp);
6843                         rfs4_dbe_unlock(sp->rs_dbe);
6844                         rfs4_file_rele(fp);
6845                         /* Not a fully formed open; "close" it */
6846                         if (screate == TRUE)
6847                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6848                         rfs4_state_rele(sp);
6849                         resp->status = NFS4ERR_OLD_STATEID;
6850                         return;
6851                 }
6852
6853                 rfs4_dbe_lock(fp->rf_dbe);
6854                 /* Let's see if the delegation was returned */
6855                 if (rfs4_check_recall(sp, access)) {
6856                         rfs4_dbe_unlock(fp->rf_dbe);
6857                         if (share_a || share_d)
6858                                 (void) rfs4_unshare(sp);
6859                         rfs4_dbe_unlock(sp->rs_dbe);
6860                         rfs4_file_rele(fp);
6861                         rfs4_update_lease(sp->rs_owner->ro_client);
6862
6863                         /* Not a fully formed open; "close" it */
6864                         if (screate == TRUE)
6865                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6866                         rfs4_state_rele(sp);
6867                         resp->status = NFS4ERR_DELAY;
6868                         return;
6869                 }
6870         }
6871         /*
6872          * the share check passed and any delegation conflict has been
6873          * taken care of, now call vop_open.
6874          * if this is the first open then call vop_open with fflags.
6875          * if not, call vn_open_upgrade with just the upgrade flags.
6876          *
6877          * if the file has been opened already, it will have the current
6878          * access mode in the state struct.  if it has no share access, then
6879          * this is a new open.
6880          *
6881          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6882          * call VOP_OPEN(), just do the open upgrade.
6883          */
6884         if (first_open && !deleg_cur) {
6885                 ct.cc_sysid = sysid;
6886                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6887                 ct.cc_caller_id = nfs4_srv_caller_id;
6888                 ct.cc_flags = CC_DONTBLOCK;
6889                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6890                 if (err) {
6891                         rfs4_dbe_unlock(fp->rf_dbe);
6892                         if (share_a || share_d)
6893                                 (void) rfs4_unshare(sp);
6894                         rfs4_dbe_unlock(sp->rs_dbe);
6895                         rfs4_file_rele(fp);
6896
6897                         /* Not a fully formed open; "close" it */
6898                         if (screate == TRUE)
6899                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6900                         rfs4_state_rele(sp);
6901                         /* check if a monitor detected a delegation conflict */
6902                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6903                                 resp->status = NFS4ERR_DELAY;
6904                         else
6905                                 resp->status = NFS4ERR_SERVERFAULT;
6906                         return;
6907                 }
6908         } else { /* open upgrade */
6909                 /*
6910                  * calculate the fflags for the new mode that is being added
6911                  * by this upgrade.
6912                  */
6913                 fflags = 0;
6914                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6915                         fflags |= FREAD;
6916                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6917                         fflags |= FWRITE;
6918                 vn_open_upgrade(cs->vp, fflags);
6919         }
6920         sp->rs_open_access |= access;
6921         sp->rs_open_deny |= deny;
6922
6923         if (open_d & OPEN4_SHARE_DENY_READ)
6924                 fp->rf_deny_read++;
6925         if (open_d & OPEN4_SHARE_DENY_WRITE)
6926                 fp->rf_deny_write++;
6927         fp->rf_share_deny |= deny;
6928
6929         if (open_a & OPEN4_SHARE_ACCESS_READ)
6930                 fp->rf_access_read++;
6931         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6932                 fp->rf_access_write++;
6933         fp->rf_share_access |= access;
6934
6935         /*
6936          * Check for delegation here. if the deleg argument is not
6937          * DELEG_ANY, then this is a reclaim from a client and
6938          * we must honor the delegation requested. If necessary we can
6939          * set the recall flag.
6940          */
6941
6942         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6943
6944         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6945
6946         next_stateid(&sp->rs_stateid);
6947
6948         resp->stateid = sp->rs_stateid.stateid;
6949
6950         rfs4_dbe_unlock(fp->rf_dbe);
6951         rfs4_dbe_unlock(sp->rs_dbe);
6952
6953         if (dsp) {
6954                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6955                 rfs4_deleg_state_rele(dsp);
6956         }
6957
6958         rfs4_file_rele(fp);
6959         rfs4_state_rele(sp);
6960
6961         resp->status = NFS4_OK;
6962 }
6963
6964 /*ARGSUSED*/
6965 static void
6966 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6967     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6968 {
6969         change_info4 *cinfo = &resp->cinfo;
6970         bitmap4 *attrset = &resp->attrset;
6971
6972         if (args->opentype == OPEN4_NOCREATE)
6973                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6974                     req, cs, args->share_access, cinfo);
6975         else {
6976                 /* inhibit delegation grants during exclusive create */
6977
6978                 if (args->mode == EXCLUSIVE4)
6979                         rfs4_disable_delegation();
6980
6981                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6982                     oo->ro_client->rc_clientid);
6983         }
6984
6985         if (resp->status == NFS4_OK) {
6986
6987                 /* cs->vp cs->fh now reference the desired file */
6988
6989                 rfs4_do_open(cs, req, oo,
6990                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6991                     args->share_access, args->share_deny, resp, 0);
6992
6993                 /*
6994                  * If rfs4_createfile set attrset, we must
6995                  * clear this attrset before the response is copied.
6996                  */
6997                 if (resp->status != NFS4_OK && resp->attrset) {
6998                         resp->attrset = 0;
6999                 }
7000         }
7001         else
7002                 *cs->statusp = resp->status;
7003
7004         if (args->mode == EXCLUSIVE4)
7005                 rfs4_enable_delegation();
7006 }
7007
7008 /*ARGSUSED*/
7009 static void
7010 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7011     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7012 {
7013         change_info4 *cinfo = &resp->cinfo;
7014         vattr_t va;
7015         vtype_t v_type = cs->vp->v_type;
7016         int error = 0;
7017
7018         /* Verify that we have a regular file */
7019         if (v_type != VREG) {
7020                 if (v_type == VDIR)
7021                         resp->status = NFS4ERR_ISDIR;
7022                 else if (v_type == VLNK)
7023                         resp->status = NFS4ERR_SYMLINK;
7024                 else
7025                         resp->status = NFS4ERR_INVAL;
7026                 return;
7027         }
7028
7029         va.va_mask = AT_MODE|AT_UID;
7030         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7031         if (error) {
7032                 resp->status = puterrno4(error);
7033                 return;
7034         }
7035
7036         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7037
7038         /*
7039          * Check if we have access to the file, Note the the file
7040          * could have originally been open UNCHECKED or GUARDED
7041          * with mode bits that will now fail, but there is nothing
7042          * we can really do about that except in the case that the
7043          * owner of the file is the one requesting the open.
7044          */
7045         if (crgetuid(cs->cr) != va.va_uid) {
7046                 resp->status = check_open_access(args->share_access, cs, req);
7047                 if (resp->status != NFS4_OK) {
7048                         return;
7049                 }
7050         }
7051
7052         /*
7053          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7054          */
7055         cinfo->before = 0;
7056         cinfo->after = 0;
7057         cinfo->atomic = FALSE;
7058
7059         rfs4_do_open(cs, req, oo,
7060             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7061             args->share_access, args->share_deny, resp, 0);
7062 }
7063
7064 static void
7065 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7066     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7067 {
7068         int error;
7069         nfsstat4 status;
7070         stateid4 stateid =
7071             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7072         rfs4_deleg_state_t *dsp;
7073
7074         /*
7075          * Find the state info from the stateid and confirm that the
7076          * file is delegated.  If the state openowner is the same as
7077          * the supplied openowner we're done. If not, get the file
7078          * info from the found state info. Use that file info to
7079          * create the state for this lock owner. Note solaris doen't
7080          * really need the pathname to find the file. We may want to
7081          * lookup the pathname and make sure that the vp exist and
7082          * matches the vp in the file structure. However it is
7083          * possible that the pathname nolonger exists (local process
7084          * unlinks the file), so this may not be that useful.
7085          */
7086
7087         status = rfs4_get_deleg_state(&stateid, &dsp);
7088         if (status != NFS4_OK) {
7089                 resp->status = status;
7090                 return;
7091         }
7092
7093         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7094
7095         /*
7096          * New lock owner, create state. Since this was probably called
7097          * in response to a CB_RECALL we set deleg to DELEG_NONE
7098          */
7099
7100         ASSERT(cs->vp != NULL);
7101         VN_RELE(cs->vp);
7102         VN_HOLD(dsp->rds_finfo->rf_vp);
7103         cs->vp = dsp->rds_finfo->rf_vp;
7104
7105         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7106                 rfs4_deleg_state_rele(dsp);
7107                 *cs->statusp = resp->status = puterrno4(error);
7108                 return;
7109         }
7110
7111         /* Mark progress for delegation returns */
7112         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7113         rfs4_deleg_state_rele(dsp);
7114         rfs4_do_open(cs, req, oo, DELEG_NONE,
7115             args->share_access, args->share_deny, resp, 1);
7116 }
7117
7118 /*ARGSUSED*/
7119 static void
7120 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7121     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7122 {
7123         /*
7124          * Lookup the pathname, it must already exist since this file
7125          * was delegated.
7126          *
7127          * Find the file and state info for this vp and open owner pair.
7128          *      check that they are in fact delegated.
7129          *      check that the state access and deny modes are the same.
7130          *
7131          * Return the delgation possibly seting the recall flag.
7132          */
7133         rfs4_file_t *fp;
7134         rfs4_state_t *sp;
7135         bool_t create = FALSE;
7136         bool_t dcreate = FALSE;
7137         rfs4_deleg_state_t *dsp;
7138         nfsace4 *ace;
7139
7140         /* Note we ignore oflags */
7141         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7142             req, cs, args->share_access, &resp->cinfo);
7143
7144         if (resp->status != NFS4_OK) {
7145                 return;
7146         }
7147
7148         /* get the file struct and hold a lock on it during initial open */
7149         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7150         if (fp == NULL) {
7151                 resp->status = NFS4ERR_RESOURCE;
7152                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7153                 return;
7154         }
7155
7156         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7157         if (sp == NULL) {
7158                 resp->status = NFS4ERR_SERVERFAULT;
7159                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7160                 rw_exit(&fp->rf_file_rwlock);
7161                 rfs4_file_rele(fp);
7162                 return;
7163         }
7164
7165         rfs4_dbe_lock(sp->rs_dbe);
7166         rfs4_dbe_lock(fp->rf_dbe);
7167         if (args->share_access != sp->rs_share_access ||
7168             args->share_deny != sp->rs_share_deny ||
7169             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7170                 NFS4_DEBUG(rfs4_debug,
7171                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7172                 rfs4_dbe_unlock(fp->rf_dbe);
7173                 rfs4_dbe_unlock(sp->rs_dbe);
7174                 rfs4_file_rele(fp);
7175                 rfs4_state_rele(sp);
7176                 resp->status = NFS4ERR_SERVERFAULT;
7177                 return;
7178         }
7179         rfs4_dbe_unlock(fp->rf_dbe);
7180         rfs4_dbe_unlock(sp->rs_dbe);
7181
7182         dsp = rfs4_finddeleg(sp, &dcreate);
7183         if (dsp == NULL) {
7184                 rfs4_state_rele(sp);
7185                 rfs4_file_rele(fp);
7186                 resp->status = NFS4ERR_SERVERFAULT;
7187                 return;
7188         }
7189
7190         next_stateid(&sp->rs_stateid);
7191
7192         resp->stateid = sp->rs_stateid.stateid;
7193
7194         resp->delegation.delegation_type = dsp->rds_dtype;
7195
7196         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7197                 open_read_delegation4 *rv =
7198                     &resp->delegation.open_delegation4_u.read;
7199
7200                 rv->stateid = dsp->rds_delegid.stateid;
7201                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7202                 ace = &rv->permissions;
7203         } else {
7204                 open_write_delegation4 *rv =
7205                     &resp->delegation.open_delegation4_u.write;
7206
7207                 rv->stateid = dsp->rds_delegid.stateid;
7208                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7209                 ace = &rv->permissions;
7210                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7211                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7212         }
7213
7214         /* XXX For now */
7215         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7216         ace->flag = 0;
7217         ace->access_mask = 0;
7218         ace->who.utf8string_len = 0;
7219         ace->who.utf8string_val = 0;
7220
7221         rfs4_deleg_state_rele(dsp);
7222         rfs4_state_rele(sp);
7223         rfs4_file_rele(fp);
7224 }
7225
7226 typedef enum {
7227         NFS4_CHKSEQ_OKAY = 0,
7228         NFS4_CHKSEQ_REPLAY = 1,
7229         NFS4_CHKSEQ_BAD = 2
7230 } rfs4_chkseq_t;
7231
7232 /*
7233  * Generic function for sequence number checks.
7234  */
7235 static rfs4_chkseq_t
7236 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7237     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7238 {
7239         /* Same sequence ids and matching operations? */
7240         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7241                 if (copyres == TRUE) {
7242                         rfs4_free_reply(resop);
7243                         rfs4_copy_reply(resop, lastop);
7244                 }
7245                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7246                     "Replayed SEQID %d\n", seqid));
7247                 return (NFS4_CHKSEQ_REPLAY);
7248         }
7249
7250         /* If the incoming sequence is not the next expected then it is bad */
7251         if (rqst_seq != seqid + 1) {
7252                 if (rqst_seq == seqid) {
7253                         NFS4_DEBUG(rfs4_debug,
7254                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7255                             "but last op was %d current op is %d\n",
7256                             lastop->resop, resop->resop));
7257                         return (NFS4_CHKSEQ_BAD);
7258                 }
7259                 NFS4_DEBUG(rfs4_debug,
7260                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7261                     rqst_seq, seqid));
7262                 return (NFS4_CHKSEQ_BAD);
7263         }
7264
7265         /* Everything okay -- next expected */
7266         return (NFS4_CHKSEQ_OKAY);
7267 }
7268
7269
7270 static rfs4_chkseq_t
7271 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7272 {
7273         rfs4_chkseq_t rc;
7274
7275         rfs4_dbe_lock(op->ro_dbe);
7276         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7277             TRUE);
7278         rfs4_dbe_unlock(op->ro_dbe);
7279
7280         if (rc == NFS4_CHKSEQ_OKAY)
7281                 rfs4_update_lease(op->ro_client);
7282
7283         return (rc);
7284 }
7285
7286 static rfs4_chkseq_t
7287 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7288 {
7289         rfs4_chkseq_t rc;
7290
7291         rfs4_dbe_lock(op->ro_dbe);
7292         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7293             olo_seqid, resop, FALSE);
7294         rfs4_dbe_unlock(op->ro_dbe);
7295
7296         return (rc);
7297 }
7298
7299 static rfs4_chkseq_t
7300 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7301 {
7302         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7303
7304         rfs4_dbe_lock(lsp->rls_dbe);
7305         if (!lsp->rls_skip_seqid_check)
7306                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7307                     resop, TRUE);
7308         rfs4_dbe_unlock(lsp->rls_dbe);
7309
7310         return (rc);
7311 }
7312
7313 static void
7314 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7315     struct svc_req *req, struct compound_state *cs)
7316 {
7317         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7318         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7319         open_owner4 *owner = &args->owner;
7320         open_claim_type4 claim = args->claim;
7321         rfs4_client_t *cp;
7322         rfs4_openowner_t *oo;
7323         bool_t create;
7324         bool_t replay = FALSE;
7325         int can_reclaim;
7326
7327         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7328             OPEN4args *, args);
7329
7330         if (cs->vp == NULL) {
7331                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7332                 goto end;
7333         }
7334
7335         /*
7336          * Need to check clientid and lease expiration first based on
7337          * error ordering and incrementing sequence id.
7338          */
7339         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7340         if (cp == NULL) {
7341                 *cs->statusp = resp->status =
7342                     rfs4_check_clientid(&owner->clientid, 0);
7343                 goto end;
7344         }
7345
7346         if (rfs4_lease_expired(cp)) {
7347                 rfs4_client_close(cp);
7348                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7349                 goto end;
7350         }
7351         can_reclaim = cp->rc_can_reclaim;
7352
7353         /*
7354          * Find the open_owner for use from this point forward.  Take
7355          * care in updating the sequence id based on the type of error
7356          * being returned.
7357          */
7358 retry:
7359         create = TRUE;
7360         oo = rfs4_findopenowner(owner, &create, args->seqid);
7361         if (oo == NULL) {
7362                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
7363                 rfs4_client_rele(cp);
7364                 goto end;
7365         }
7366
7367         /* Hold off access to the sequence space while the open is done */
7368         rfs4_sw_enter(&oo->ro_sw);
7369
7370         /*
7371          * If the open_owner existed before at the server, then check
7372          * the sequence id.
7373          */
7374         if (!create && !oo->ro_postpone_confirm) {
7375                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7376                 case NFS4_CHKSEQ_BAD:
7377                         if ((args->seqid > oo->ro_open_seqid) &&
7378                             oo->ro_need_confirm) {
7379                                 rfs4_free_opens(oo, TRUE, FALSE);
7380                                 rfs4_sw_exit(&oo->ro_sw);
7381                                 rfs4_openowner_rele(oo);
7382                                 goto retry;
7383                         }
7384                         resp->status = NFS4ERR_BAD_SEQID;
7385                         goto out;
7386                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7387                         replay = TRUE;
7388                         goto out;
7389                 default:
7390                         break;
7391                 }
7392
7393                 /*
7394                  * Sequence was ok and open owner exists
7395                  * check to see if we have yet to see an
7396                  * open_confirm.
7397                  */
7398                 if (oo->ro_need_confirm) {
7399                         rfs4_free_opens(oo, TRUE, FALSE);
7400                         rfs4_sw_exit(&oo->ro_sw);
7401                         rfs4_openowner_rele(oo);
7402                         goto retry;
7403                 }
7404         }
7405         /* Grace only applies to regular-type OPENs */
7406         if (rfs4_clnt_in_grace(cp) &&
7407             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7408                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7409                 goto out;
7410         }
7411
7412         /*
7413          * If previous state at the server existed then can_reclaim
7414          * will be set. If not reply NFS4ERR_NO_GRACE to the
7415          * client.
7416          */
7417         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7418                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7419                 goto out;
7420         }
7421
7422
7423         /*
7424          * Reject the open if the client has missed the grace period
7425          */
7426         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7427                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7428                 goto out;
7429         }
7430
7431         /* Couple of up-front bookkeeping items */
7432         if (oo->ro_need_confirm) {
7433                 /*
7434                  * If this is a reclaim OPEN then we should not ask
7435                  * for a confirmation of the open_owner per the
7436                  * protocol specification.
7437                  */
7438                 if (claim == CLAIM_PREVIOUS)
7439                         oo->ro_need_confirm = FALSE;
7440                 else
7441                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7442         }
7443         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7444
7445         /*
7446          * If there is an unshared filesystem mounted on this vnode,
7447          * do not allow to open/create in this directory.
7448          */
7449         if (vn_ismntpt(cs->vp)) {
7450                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7451                 goto out;
7452         }
7453
7454         /*
7455          * access must READ, WRITE, or BOTH.  No access is invalid.
7456          * deny can be READ, WRITE, BOTH, or NONE.
7457          * bits not defined for access/deny are invalid.
7458          */
7459         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7460             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7461             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7462                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7463                 goto out;
7464         }
7465
7466
7467         /*
7468          * make sure attrset is zero before response is built.
7469          */
7470         resp->attrset = 0;
7471
7472         switch (claim) {
7473         case CLAIM_NULL:
7474                 rfs4_do_opennull(cs, req, args, oo, resp);
7475                 break;
7476         case CLAIM_PREVIOUS:
7477                 rfs4_do_openprev(cs, req, args, oo, resp);
7478                 break;
7479         case CLAIM_DELEGATE_CUR:
7480                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7481                 break;
7482         case CLAIM_DELEGATE_PREV:
7483                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7484                 break;
7485         default:
7486                 resp->status = NFS4ERR_INVAL;
7487                 break;
7488         }
7489
7490 out:
7491         rfs4_client_rele(cp);
7492
7493         /* Catch sequence id handling here to make it a little easier */
7494         switch (resp->status) {
7495         case NFS4ERR_BADXDR:
7496         case NFS4ERR_BAD_SEQID:
7497         case NFS4ERR_BAD_STATEID:
7498         case NFS4ERR_NOFILEHANDLE:
7499         case NFS4ERR_RESOURCE:
7500         case NFS4ERR_STALE_CLIENTID:
7501         case NFS4ERR_STALE_STATEID:
7502                 /*
7503                  * The protocol states that if any of these errors are
7504                  * being returned, the sequence id should not be
7505                  * incremented.  Any other return requires an
7506                  * increment.
7507                  */
7508                 break;
7509         default:
7510                 /* Always update the lease in this case */
7511                 rfs4_update_lease(oo->ro_client);
7512
7513                 /* Regular response - copy the result */
7514                 if (!replay)
7515                         rfs4_update_open_resp(oo, resop, &cs->fh);
7516
7517                 /*
7518                  * REPLAY case: Only if the previous response was OK
7519                  * do we copy the filehandle.  If not OK, no
7520                  * filehandle to copy.
7521                  */
7522                 if (replay == TRUE &&
7523                     resp->status == NFS4_OK &&
7524                     oo->ro_reply_fh.nfs_fh4_val) {
7525                         /*
7526                          * If this is a replay, we must restore the
7527                          * current filehandle/vp to that of what was
7528                          * returned originally.  Try our best to do
7529                          * it.
7530                          */
7531                         nfs_fh4_fmt_t *fh_fmtp =
7532                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7533
7534                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7535                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7536
7537                         if (cs->exi == NULL) {
7538                                 resp->status = NFS4ERR_STALE;
7539                                 goto finish;
7540                         }
7541
7542                         VN_RELE(cs->vp);
7543
7544                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7545                             &resp->status);
7546
7547                         if (cs->vp == NULL)
7548                                 goto finish;
7549
7550                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7551                 }
7552
7553                 /*
7554                  * If this was a replay, no need to update the
7555                  * sequence id. If the open_owner was not created on
7556                  * this pass, then update.  The first use of an
7557                  * open_owner will not bump the sequence id.
7558                  */
7559                 if (replay == FALSE && !create)
7560                         rfs4_update_open_sequence(oo);
7561                 /*
7562                  * If the client is receiving an error and the
7563                  * open_owner needs to be confirmed, there is no way
7564                  * to notify the client of this fact ignoring the fact
7565                  * that the server has no method of returning a
7566                  * stateid to confirm.  Therefore, the server needs to
7567                  * mark this open_owner in a way as to avoid the
7568                  * sequence id checking the next time the client uses
7569                  * this open_owner.
7570                  */
7571                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7572                         oo->ro_postpone_confirm = TRUE;
7573                 /*
7574                  * If OK response then clear the postpone flag and
7575                  * reset the sequence id to keep in sync with the
7576                  * client.
7577                  */
7578                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7579                         oo->ro_postpone_confirm = FALSE;
7580                         oo->ro_open_seqid = args->seqid;
7581                 }
7582                 break;
7583         }
7584
7585 finish:
7586         *cs->statusp = resp->status;
7587
7588         rfs4_sw_exit(&oo->ro_sw);
7589         rfs4_openowner_rele(oo);
7590
7591 end:
7592         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7593             OPEN4res *, resp);
7594 }
7595
7596 /*ARGSUSED*/
7597 void
7598 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7599     struct svc_req *req, struct compound_state *cs)
7600 {
7601         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7602         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7603         rfs4_state_t *sp;
7604         nfsstat4 status;
7605
7606         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7607             OPEN_CONFIRM4args *, args);
7608
7609         if (cs->vp == NULL) {
7610                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7611                 goto out;
7612         }
7613
7614         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7615         if (status != NFS4_OK) {
7616                 *cs->statusp = resp->status = status;
7617                 goto out;
7618         }
7619
7620         /* Ensure specified filehandle matches */
7621         if (cs->vp != sp->rs_finfo->rf_vp) {
7622                 rfs4_state_rele(sp);
7623                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7624                 goto out;
7625         }
7626
7627         /* hold off other access to open_owner while we tinker */
7628         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7629
7630         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7631         case NFS4_CHECK_STATEID_OKAY:
7632                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7633                     resop) != 0) {
7634                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7635                         break;
7636                 }
7637                 /*
7638                  * If it is the appropriate stateid and determined to
7639                  * be "OKAY" then this means that the stateid does not
7640                  * need to be confirmed and the client is in error for
7641                  * sending an OPEN_CONFIRM.
7642                  */
7643                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7644                 break;
7645         case NFS4_CHECK_STATEID_OLD:
7646                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7647                 break;
7648         case NFS4_CHECK_STATEID_BAD:
7649                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7650                 break;
7651         case NFS4_CHECK_STATEID_EXPIRED:
7652                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7653                 break;
7654         case NFS4_CHECK_STATEID_CLOSED:
7655                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7656                 break;
7657         case NFS4_CHECK_STATEID_REPLAY:
7658                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7659                     resop)) {
7660                 case NFS4_CHKSEQ_OKAY:
7661                         /*
7662                          * This is replayed stateid; if seqid matches
7663                          * next expected, then client is using wrong seqid.
7664                          */
7665                         /* fall through */
7666                 case NFS4_CHKSEQ_BAD:
7667                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7668                         break;
7669                 case NFS4_CHKSEQ_REPLAY:
7670                         /*
7671                          * Note this case is the duplicate case so
7672                          * resp->status is already set.
7673                          */
7674                         *cs->statusp = resp->status;
7675                         rfs4_update_lease(sp->rs_owner->ro_client);
7676                         break;
7677                 }
7678                 break;
7679         case NFS4_CHECK_STATEID_UNCONFIRMED:
7680                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7681                     resop) != NFS4_CHKSEQ_OKAY) {
7682                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7683                         break;
7684                 }
7685                 *cs->statusp = resp->status = NFS4_OK;
7686
7687                 next_stateid(&sp->rs_stateid);
7688                 resp->open_stateid = sp->rs_stateid.stateid;
7689                 sp->rs_owner->ro_need_confirm = FALSE;
7690                 rfs4_update_lease(sp->rs_owner->ro_client);
7691                 rfs4_update_open_sequence(sp->rs_owner);
7692                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7693                 break;
7694         default:
7695                 ASSERT(FALSE);
7696                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7697                 break;
7698         }
7699         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7700         rfs4_state_rele(sp);
7701
7702 out:
7703         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7704             OPEN_CONFIRM4res *, resp);
7705 }
7706
7707 /*ARGSUSED*/
7708 void
7709 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7710     struct svc_req *req, struct compound_state *cs)
7711 {
7712         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7713         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7714         uint32_t access = args->share_access;
7715         uint32_t deny = args->share_deny;
7716         nfsstat4 status;
7717         rfs4_state_t *sp;
7718         rfs4_file_t *fp;
7719         int fflags = 0;
7720
7721         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7722             OPEN_DOWNGRADE4args *, args);
7723
7724         if (cs->vp == NULL) {
7725                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7726                 goto out;
7727         }
7728
7729         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7730         if (status != NFS4_OK) {
7731                 *cs->statusp = resp->status = status;
7732                 goto out;
7733         }
7734
7735         /* Ensure specified filehandle matches */
7736         if (cs->vp != sp->rs_finfo->rf_vp) {
7737                 rfs4_state_rele(sp);
7738                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7739                 goto out;
7740         }
7741
7742         /* hold off other access to open_owner while we tinker */
7743         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7744
7745         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7746         case NFS4_CHECK_STATEID_OKAY:
7747                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7748                     resop) != NFS4_CHKSEQ_OKAY) {
7749                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7750                         goto end;
7751                 }
7752                 break;
7753         case NFS4_CHECK_STATEID_OLD:
7754                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7755                 goto end;
7756         case NFS4_CHECK_STATEID_BAD:
7757                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7758                 goto end;
7759         case NFS4_CHECK_STATEID_EXPIRED:
7760                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7761                 goto end;
7762         case NFS4_CHECK_STATEID_CLOSED:
7763                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7764                 goto end;
7765         case NFS4_CHECK_STATEID_UNCONFIRMED:
7766                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7767                 goto end;
7768         case NFS4_CHECK_STATEID_REPLAY:
7769                 /* Check the sequence id for the open owner */
7770                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7771                     resop)) {
7772                 case NFS4_CHKSEQ_OKAY:
7773                         /*
7774                          * This is replayed stateid; if seqid matches
7775                          * next expected, then client is using wrong seqid.
7776                          */
7777                         /* fall through */
7778                 case NFS4_CHKSEQ_BAD:
7779                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7780                         goto end;
7781                 case NFS4_CHKSEQ_REPLAY:
7782                         /*
7783                          * Note this case is the duplicate case so
7784                          * resp->status is already set.
7785                          */
7786                         *cs->statusp = resp->status;
7787                         rfs4_update_lease(sp->rs_owner->ro_client);
7788                         goto end;
7789                 }
7790                 break;
7791         default:
7792                 ASSERT(FALSE);
7793                 break;
7794         }
7795
7796         rfs4_dbe_lock(sp->rs_dbe);
7797         /*
7798          * Check that the new access modes and deny modes are valid.
7799          * Check that no invalid bits are set.
7800          */
7801         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7802             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7803                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7804                 rfs4_update_open_sequence(sp->rs_owner);
7805                 rfs4_dbe_unlock(sp->rs_dbe);
7806                 goto end;
7807         }
7808
7809         /*
7810          * The new modes must be a subset of the current modes and
7811          * the access must specify at least one mode. To test that
7812          * the new mode is a subset of the current modes we bitwise
7813          * AND them together and check that the result equals the new
7814          * mode. For example:
7815          * New mode, access == R and current mode, sp->rs_open_access  == RW
7816          * access & sp->rs_open_access == R == access, so the new access mode
7817          * is valid. Consider access == RW, sp->rs_open_access = R
7818          * access & sp->rs_open_access == R != access, so the new access mode
7819          * is invalid.
7820          */
7821         if ((access & sp->rs_open_access) != access ||
7822             (deny & sp->rs_open_deny) != deny ||
7823             (access &
7824             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7825                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7826                 rfs4_update_open_sequence(sp->rs_owner);
7827                 rfs4_dbe_unlock(sp->rs_dbe);
7828                 goto end;
7829         }
7830
7831         /*
7832          * Release any share locks associated with this stateID.
7833          * Strictly speaking, this violates the spec because the
7834          * spec effectively requires that open downgrade be atomic.
7835          * At present, fs_shrlock does not have this capability.
7836          */
7837         (void) rfs4_unshare(sp);
7838
7839         status = rfs4_share(sp, access, deny);
7840         if (status != NFS4_OK) {
7841                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7842                 rfs4_update_open_sequence(sp->rs_owner);
7843                 rfs4_dbe_unlock(sp->rs_dbe);
7844                 goto end;
7845         }
7846
7847         fp = sp->rs_finfo;
7848         rfs4_dbe_lock(fp->rf_dbe);
7849
7850         /*
7851          * If the current mode has deny read and the new mode
7852          * does not, decrement the number of deny read mode bits
7853          * and if it goes to zero turn off the deny read bit
7854          * on the file.
7855          */
7856         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7857             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7858                 fp->rf_deny_read--;
7859                 if (fp->rf_deny_read == 0)
7860                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7861         }
7862
7863         /*
7864          * If the current mode has deny write and the new mode
7865          * does not, decrement the number of deny write mode bits
7866          * and if it goes to zero turn off the deny write bit
7867          * on the file.
7868          */
7869         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7870             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7871                 fp->rf_deny_write--;
7872                 if (fp->rf_deny_write == 0)
7873                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7874         }
7875
7876         /*
7877          * If the current mode has access read and the new mode
7878          * does not, decrement the number of access read mode bits
7879          * and if it goes to zero turn off the access read bit
7880          * on the file.  set fflags to FREAD for the call to
7881          * vn_open_downgrade().
7882          */
7883         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7884             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7885                 fp->rf_access_read--;
7886                 if (fp->rf_access_read == 0)
7887                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7888                 fflags |= FREAD;
7889         }
7890
7891         /*
7892          * If the current mode has access write and the new mode
7893          * does not, decrement the number of access write mode bits
7894          * and if it goes to zero turn off the access write bit
7895          * on the file.  set fflags to FWRITE for the call to
7896          * vn_open_downgrade().
7897          */
7898         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7899             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7900                 fp->rf_access_write--;
7901                 if (fp->rf_access_write == 0)
7902                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7903                 fflags |= FWRITE;
7904         }
7905
7906         /* Check that the file is still accessible */
7907         ASSERT(fp->rf_share_access);
7908
7909         rfs4_dbe_unlock(fp->rf_dbe);
7910
7911         /* now set the new open access and deny modes */
7912         sp->rs_open_access = access;
7913         sp->rs_open_deny = deny;
7914
7915         /*
7916          * we successfully downgraded the share lock, now we need to downgrade
7917          * the open. it is possible that the downgrade was only for a deny
7918          * mode and we have nothing else to do.
7919          */
7920         if ((fflags & (FREAD|FWRITE)) != 0)
7921                 vn_open_downgrade(cs->vp, fflags);
7922
7923         /* Update the stateid */
7924         next_stateid(&sp->rs_stateid);
7925         resp->open_stateid = sp->rs_stateid.stateid;
7926
7927         rfs4_dbe_unlock(sp->rs_dbe);
7928
7929         *cs->statusp = resp->status = NFS4_OK;
7930         /* Update the lease */
7931         rfs4_update_lease(sp->rs_owner->ro_client);
7932         /* And the sequence */
7933         rfs4_update_open_sequence(sp->rs_owner);
7934         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7935
7936 end:
7937         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7938         rfs4_state_rele(sp);
7939 out:
7940         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7941             OPEN_DOWNGRADE4res *, resp);
7942 }
7943
7944 /*
7945  * The logic behind this function is detailed in the NFSv4 RFC in the
7946  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7947  * that section for explicit guidance to server behavior for
7948  * SETCLIENTID.
7949  */
7950 void
7951 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7952     struct svc_req *req, struct compound_state *cs)
7953 {
7954         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7955         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7956         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7957         rfs4_clntip_t *ci;
7958         bool_t create;
7959         char *addr, *netid;
7960         int len;
7961
7962         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7963             SETCLIENTID4args *, args);
7964 retry:
7965         newcp = cp_confirmed = cp_unconfirmed = NULL;
7966
7967         /*
7968          * Save the caller's IP address
7969          */
7970         args->client.cl_addr =
7971             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7972
7973         /*
7974          * Record if it is a Solaris client that cannot handle referrals.
7975          */
7976         if (strstr(args->client.id_val, "Solaris") &&
7977             !strstr(args->client.id_val, "+referrals")) {
7978                 /* Add a "yes, it's downrev" record */
7979                 create = TRUE;
7980                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7981                 ASSERT(ci != NULL);
7982                 rfs4_dbe_rele(ci->ri_dbe);
7983         } else {
7984                 /* Remove any previous record */
7985                 rfs4_invalidate_clntip(args->client.cl_addr);
7986         }
7987
7988         /*
7989          * In search of an EXISTING client matching the incoming
7990          * request to establish a new client identifier at the server
7991          */
7992         create = TRUE;
7993         cp = rfs4_findclient(&args->client, &create, NULL);
7994
7995         /* Should never happen */
7996         ASSERT(cp != NULL);
7997
7998         if (cp == NULL) {
7999                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8000                 goto out;
8001         }
8002
8003         /*
8004          * Easiest case. Client identifier is newly created and is
8005          * unconfirmed.  Also note that for this case, no other
8006          * entries exist for the client identifier.  Nothing else to
8007          * check.  Just setup the response and respond.
8008          */
8009         if (create) {
8010                 *cs->statusp = res->status = NFS4_OK;
8011                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8012                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8013                     cp->rc_confirm_verf;
8014                 /* Setup callback information; CB_NULL confirmation later */
8015                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8016
8017                 rfs4_client_rele(cp);
8018                 goto out;
8019         }
8020
8021         /*
8022          * An existing, confirmed client may exist but it may not have
8023          * been active for at least one lease period.  If so, then
8024          * "close" the client and create a new client identifier
8025          */
8026         if (rfs4_lease_expired(cp)) {
8027                 rfs4_client_close(cp);
8028                 goto retry;
8029         }
8030
8031         if (cp->rc_need_confirm == TRUE)
8032                 cp_unconfirmed = cp;
8033         else
8034                 cp_confirmed = cp;
8035
8036         cp = NULL;
8037
8038         /*
8039          * We have a confirmed client, now check for an
8040          * unconfimred entry
8041          */
8042         if (cp_confirmed) {
8043                 /* If creds don't match then client identifier is inuse */
8044                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8045                         rfs4_cbinfo_t *cbp;
8046                         /*
8047                          * Some one else has established this client
8048                          * id. Try and say * who they are. We will use
8049                          * the call back address supplied by * the
8050                          * first client.
8051                          */
8052                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8053
8054                         addr = netid = NULL;
8055
8056                         cbp = &cp_confirmed->rc_cbinfo;
8057                         if (cbp->cb_callback.cb_location.r_addr &&
8058                             cbp->cb_callback.cb_location.r_netid) {
8059                                 cb_client4 *cbcp = &cbp->cb_callback;
8060
8061                                 len = strlen(cbcp->cb_location.r_addr)+1;
8062                                 addr = kmem_alloc(len, KM_SLEEP);
8063                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8064                                 len = strlen(cbcp->cb_location.r_netid)+1;
8065                                 netid = kmem_alloc(len, KM_SLEEP);
8066                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8067                         }
8068
8069                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8070                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8071
8072                         rfs4_client_rele(cp_confirmed);
8073                 }
8074
8075                 /*
8076                  * Confirmed, creds match, and verifier matches; must
8077                  * be an update of the callback info
8078                  */
8079                 if (cp_confirmed->rc_nfs_client.verifier ==
8080                     args->client.verifier) {
8081                         /* Setup callback information */
8082                         rfs4_client_setcb(cp_confirmed, &args->callback,
8083                             args->callback_ident);
8084
8085                         /* everything okay -- move ahead */
8086                         *cs->statusp = res->status = NFS4_OK;
8087                         res->SETCLIENTID4res_u.resok4.clientid =
8088                             cp_confirmed->rc_clientid;
8089
8090                         /* update the confirm_verifier and return it */
8091                         rfs4_client_scv_next(cp_confirmed);
8092                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8093                             cp_confirmed->rc_confirm_verf;
8094
8095                         rfs4_client_rele(cp_confirmed);
8096                         goto out;
8097                 }
8098
8099                 /*
8100                  * Creds match but the verifier doesn't.  Must search
8101                  * for an unconfirmed client that would be replaced by
8102                  * this request.
8103                  */
8104                 create = FALSE;
8105                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8106                     cp_confirmed);
8107         }
8108
8109         /*
8110          * At this point, we have taken care of the brand new client
8111          * struct, INUSE case, update of an existing, and confirmed
8112          * client struct.
8113          */
8114
8115         /*
8116          * check to see if things have changed while we originally
8117          * picked up the client struct.  If they have, then return and
8118          * retry the processing of this SETCLIENTID request.
8119          */
8120         if (cp_unconfirmed) {
8121                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8122                 if (!cp_unconfirmed->rc_need_confirm) {
8123                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8124                         rfs4_client_rele(cp_unconfirmed);
8125                         if (cp_confirmed)
8126                                 rfs4_client_rele(cp_confirmed);
8127                         goto retry;
8128                 }
8129                 /* do away with the old unconfirmed one */
8130                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8131                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8132                 rfs4_client_rele(cp_unconfirmed);
8133                 cp_unconfirmed = NULL;
8134         }
8135
8136         /*
8137          * This search will temporarily hide the confirmed client
8138          * struct while a new client struct is created as the
8139          * unconfirmed one.
8140          */
8141         create = TRUE;
8142         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8143
8144         ASSERT(newcp != NULL);
8145
8146         if (newcp == NULL) {
8147                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8148                 rfs4_client_rele(cp_confirmed);
8149                 goto out;
8150         }
8151
8152         /*
8153          * If one was not created, then a similar request must be in
8154          * process so release and start over with this one
8155          */
8156         if (create != TRUE) {
8157                 rfs4_client_rele(newcp);
8158                 if (cp_confirmed)
8159                         rfs4_client_rele(cp_confirmed);
8160                 goto retry;
8161         }
8162
8163         *cs->statusp = res->status = NFS4_OK;
8164         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8165         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8166             newcp->rc_confirm_verf;
8167         /* Setup callback information; CB_NULL confirmation later */
8168         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8169
8170         newcp->rc_cp_confirmed = cp_confirmed;
8171
8172         rfs4_client_rele(newcp);
8173
8174 out:
8175         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8176             SETCLIENTID4res *, res);
8177 }
8178
8179 /*ARGSUSED*/
8180 void
8181 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8182     struct svc_req *req, struct compound_state *cs)
8183 {
8184         SETCLIENTID_CONFIRM4args *args =
8185             &argop->nfs_argop4_u.opsetclientid_confirm;
8186         SETCLIENTID_CONFIRM4res *res =
8187             &resop->nfs_resop4_u.opsetclientid_confirm;
8188         rfs4_client_t *cp, *cptoclose = NULL;
8189
8190         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8191             struct compound_state *, cs,
8192             SETCLIENTID_CONFIRM4args *, args);
8193
8194         *cs->statusp = res->status = NFS4_OK;
8195
8196         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8197
8198         if (cp == NULL) {
8199                 *cs->statusp = res->status =
8200                     rfs4_check_clientid(&args->clientid, 1);
8201                 goto out;
8202         }
8203
8204         if (!creds_ok(cp, req, cs)) {
8205                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8206                 rfs4_client_rele(cp);
8207                 goto out;
8208         }
8209
8210         /* If the verifier doesn't match, the record doesn't match */
8211         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8212                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8213                 rfs4_client_rele(cp);
8214                 goto out;
8215         }
8216
8217         rfs4_dbe_lock(cp->rc_dbe);
8218         cp->rc_need_confirm = FALSE;
8219         if (cp->rc_cp_confirmed) {
8220                 cptoclose = cp->rc_cp_confirmed;
8221                 cptoclose->rc_ss_remove = 1;
8222                 cp->rc_cp_confirmed = NULL;
8223         }
8224
8225         /*
8226          * Update the client's associated server instance, if it's changed
8227          * since the client was created.
8228          */
8229         if (rfs4_servinst(cp) != rfs4_cur_servinst)
8230                 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8231
8232         /*
8233          * Record clientid in stable storage.
8234          * Must be done after server instance has been assigned.
8235          */
8236         rfs4_ss_clid(cp);
8237
8238         rfs4_dbe_unlock(cp->rc_dbe);
8239
8240         if (cptoclose)
8241                 /* don't need to rele, client_close does it */
8242                 rfs4_client_close(cptoclose);
8243
8244         /* If needed, initiate CB_NULL call for callback path */
8245         rfs4_deleg_cb_check(cp);
8246         rfs4_update_lease(cp);
8247
8248         /*
8249          * Check to see if client can perform reclaims
8250          */
8251         rfs4_ss_chkclid(cp);
8252
8253         rfs4_client_rele(cp);
8254
8255 out:
8256         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8257             struct compound_state *, cs,
8258             SETCLIENTID_CONFIRM4 *, res);
8259 }
8260
8261
8262 /*ARGSUSED*/
8263 void
8264 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8265     struct svc_req *req, struct compound_state *cs)
8266 {
8267         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8268         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8269         rfs4_state_t *sp;
8270         nfsstat4 status;
8271
8272         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8273             CLOSE4args *, args);
8274
8275         if (cs->vp == NULL) {
8276                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8277                 goto out;
8278         }
8279
8280         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8281         if (status != NFS4_OK) {
8282                 *cs->statusp = resp->status = status;
8283                 goto out;
8284         }
8285
8286         /* Ensure specified filehandle matches */
8287         if (cs->vp != sp->rs_finfo->rf_vp) {
8288                 rfs4_state_rele(sp);
8289                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8290                 goto out;
8291         }
8292
8293         /* hold off other access to open_owner while we tinker */
8294         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8295
8296         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8297         case NFS4_CHECK_STATEID_OKAY:
8298                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8299                     resop) != NFS4_CHKSEQ_OKAY) {
8300                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8301                         goto end;
8302                 }
8303                 break;
8304         case NFS4_CHECK_STATEID_OLD:
8305                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8306                 goto end;
8307         case NFS4_CHECK_STATEID_BAD:
8308                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8309                 goto end;
8310         case NFS4_CHECK_STATEID_EXPIRED:
8311                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8312                 goto end;
8313         case NFS4_CHECK_STATEID_CLOSED:
8314                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8315                 goto end;
8316         case NFS4_CHECK_STATEID_UNCONFIRMED:
8317                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8318                 goto end;
8319         case NFS4_CHECK_STATEID_REPLAY:
8320                 /* Check the sequence id for the open owner */
8321                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8322                     resop)) {
8323                 case NFS4_CHKSEQ_OKAY:
8324                         /*
8325                          * This is replayed stateid; if seqid matches
8326                          * next expected, then client is using wrong seqid.
8327                          */
8328                         /* FALL THROUGH */
8329                 case NFS4_CHKSEQ_BAD:
8330                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8331                         goto end;
8332                 case NFS4_CHKSEQ_REPLAY:
8333                         /*
8334                          * Note this case is the duplicate case so
8335                          * resp->status is already set.
8336                          */
8337                         *cs->statusp = resp->status;
8338                         rfs4_update_lease(sp->rs_owner->ro_client);
8339                         goto end;
8340                 }
8341                 break;
8342         default:
8343                 ASSERT(FALSE);
8344                 break;
8345         }
8346
8347         rfs4_dbe_lock(sp->rs_dbe);
8348
8349         /* Update the stateid. */
8350         next_stateid(&sp->rs_stateid);
8351         resp->open_stateid = sp->rs_stateid.stateid;
8352
8353         rfs4_dbe_unlock(sp->rs_dbe);
8354
8355         rfs4_update_lease(sp->rs_owner->ro_client);
8356         rfs4_update_open_sequence(sp->rs_owner);
8357         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8358
8359         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8360
8361         *cs->statusp = resp->status = status;
8362
8363 end:
8364         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8365         rfs4_state_rele(sp);
8366 out:
8367         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8368             CLOSE4res *, resp);
8369 }
8370
8371 /*
8372  * Manage the counts on the file struct and close all file locks
8373  */
8374 /*ARGSUSED*/
8375 void
8376 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8377     bool_t close_of_client)
8378 {
8379         rfs4_file_t *fp = sp->rs_finfo;
8380         rfs4_lo_state_t *lsp;
8381         int fflags = 0;
8382
8383         /*
8384          * If this call is part of the larger closing down of client
8385          * state then it is just easier to release all locks
8386          * associated with this client instead of going through each
8387          * individual file and cleaning locks there.
8388          */
8389         if (close_of_client) {
8390                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8391                     !list_is_empty(&sp->rs_lostatelist) &&
8392                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8393                         /* Is the PxFS kernel module loaded? */
8394                         if (lm_remove_file_locks != NULL) {
8395                                 int new_sysid;
8396
8397                                 /* Encode the cluster nodeid in new sysid */
8398                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8399                                 lm_set_nlmid_flk(&new_sysid);
8400
8401                                 /*
8402                                  * This PxFS routine removes file locks for a
8403                                  * client over all nodes of a cluster.
8404                                  */
8405                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8406                                     "lm_remove_file_locks(sysid=0x%x)\n",
8407                                     new_sysid));
8408                                 (*lm_remove_file_locks)(new_sysid);
8409                         } else {
8410                                 struct flock64 flk;
8411
8412                                 /* Release all locks for this client */
8413                                 flk.l_type = F_UNLKSYS;
8414                                 flk.l_whence = 0;
8415                                 flk.l_start = 0;
8416                                 flk.l_len = 0;
8417                                 flk.l_sysid =
8418                                     sp->rs_owner->ro_client->rc_sysidt;
8419                                 flk.l_pid = 0;
8420                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8421                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8422                                     (u_offset_t)0, NULL, CRED(), NULL);
8423                         }
8424
8425                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8426                 }
8427         }
8428
8429         /*
8430          * Release all locks on this file by this lock owner or at
8431          * least mark the locks as having been released
8432          */
8433         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8434             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8435                 lsp->rls_locks_cleaned = TRUE;
8436
8437                 /* Was this already taken care of above? */
8438                 if (!close_of_client &&
8439                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8440                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8441                             lsp->rls_locker->rl_pid,
8442                             lsp->rls_locker->rl_client->rc_sysidt);
8443         }
8444
8445         /*
8446          * Release any shrlocks associated with this open state ID.
8447          * This must be done before the rfs4_state gets marked closed.
8448          */
8449         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8450                 (void) rfs4_unshare(sp);
8451
8452         if (sp->rs_open_access) {
8453                 rfs4_dbe_lock(fp->rf_dbe);
8454
8455                 /*
8456                  * Decrement the count for each access and deny bit that this
8457                  * state has contributed to the file.
8458                  * If the file counts go to zero
8459                  * clear the appropriate bit in the appropriate mask.
8460                  */
8461                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8462                         fp->rf_access_read--;
8463                         fflags |= FREAD;
8464                         if (fp->rf_access_read == 0)
8465                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8466                 }
8467                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8468                         fp->rf_access_write--;
8469                         fflags |= FWRITE;
8470                         if (fp->rf_access_write == 0)
8471                                 fp->rf_share_access &=
8472                                     ~OPEN4_SHARE_ACCESS_WRITE;
8473                 }
8474                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8475                         fp->rf_deny_read--;
8476                         if (fp->rf_deny_read == 0)
8477                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8478                 }
8479                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8480                         fp->rf_deny_write--;
8481                         if (fp->rf_deny_write == 0)
8482                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8483                 }
8484
8485                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8486
8487                 rfs4_dbe_unlock(fp->rf_dbe);
8488
8489                 sp->rs_open_access = 0;
8490                 sp->rs_open_deny = 0;
8491         }
8492 }
8493
8494 /*
8495  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8496  */
8497 static nfsstat4
8498 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8499 {
8500         rfs4_lockowner_t *lo;
8501         rfs4_client_t *cp;
8502         uint32_t len;
8503
8504         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8505         if (lo != NULL) {
8506                 cp = lo->rl_client;
8507                 if (rfs4_lease_expired(cp)) {
8508                         rfs4_lockowner_rele(lo);
8509                         rfs4_dbe_hold(cp->rc_dbe);
8510                         rfs4_client_close(cp);
8511                         return (NFS4ERR_EXPIRED);
8512                 }
8513                 dp->owner.clientid = lo->rl_owner.clientid;
8514                 len = lo->rl_owner.owner_len;
8515                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8516                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8517                 dp->owner.owner_len = len;
8518                 rfs4_lockowner_rele(lo);
8519                 goto finish;
8520         }
8521
8522         /*
8523          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8524          * of the client id contain the boot time for a NFS4 lock. So we
8525          * fabricate and identity by setting clientid to the sysid, and
8526          * the lock owner to the pid.
8527          */
8528         dp->owner.clientid = flk->l_sysid;
8529         len = sizeof (pid_t);
8530         dp->owner.owner_len = len;
8531         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8532         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8533 finish:
8534         dp->offset = flk->l_start;
8535         dp->length = flk->l_len;
8536
8537         if (flk->l_type == F_RDLCK)
8538                 dp->locktype = READ_LT;
8539         else if (flk->l_type == F_WRLCK)
8540                 dp->locktype = WRITE_LT;
8541         else
8542                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8543
8544         return (NFS4_OK);
8545 }
8546
8547 static int
8548 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8549 {
8550         int error;
8551         struct flock64 flk;
8552         int i;
8553         clock_t delaytime;
8554         int cmd;
8555
8556         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8557 retry:
8558         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8559
8560         for (i = 0; i < rfs4_maxlock_tries; i++) {
8561                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8562                 error = VOP_FRLOCK(vp, cmd,
8563                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8564
8565                 if (error != EAGAIN && error != EACCES)
8566                         break;
8567
8568                 if (i < rfs4_maxlock_tries - 1) {
8569                         delay(delaytime);
8570                         delaytime *= 2;
8571                 }
8572         }
8573
8574         if (error == EAGAIN || error == EACCES) {
8575                 /* Get the owner of the lock */
8576                 flk = *flock;
8577                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8578                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag,
8579                     (u_offset_t)0, NULL, cred, NULL) == 0) {
8580                         if (flk.l_type == F_UNLCK) {
8581                                 /* No longer locked, retry */
8582                                 goto retry;
8583                         }
8584                         *flock = flk;
8585                         LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8586                             F_GETLK, &flk);
8587                 }
8588         }
8589
8590         return (error);
8591 }
8592
8593 /*ARGSUSED*/
8594 static nfsstat4
8595 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8596     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8597 {
8598         nfsstat4 status;
8599         rfs4_lockowner_t *lo = lsp->rls_locker;
8600         rfs4_state_t *sp = lsp->rls_state;
8601         struct flock64 flock;
8602         int16_t ltype;
8603         int flag;
8604         int error;
8605         sysid_t sysid;
8606         LOCK4res *lres;
8607
8608         if (rfs4_lease_expired(lo->rl_client)) {
8609                 return (NFS4ERR_EXPIRED);
8610         }
8611
8612         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8613                 return (status);
8614
8615         /* Check for zero length. To lock to end of file use all ones for V4 */
8616         if (length == 0)
8617                 return (NFS4ERR_INVAL);
8618         else if (length == (length4)(~0))
8619                 length = 0;             /* Posix to end of file  */
8620
8621 retry:
8622         rfs4_dbe_lock(sp->rs_dbe);
8623         if (sp->rs_closed) {
8624                 rfs4_dbe_unlock(sp->rs_dbe);
8625                 return (NFS4ERR_OLD_STATEID);
8626         }
8627
8628         if (resop->resop != OP_LOCKU) {
8629                 switch (locktype) {
8630                 case READ_LT:
8631                 case READW_LT:
8632                         if ((sp->rs_share_access
8633                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8634                                 rfs4_dbe_unlock(sp->rs_dbe);
8635
8636                                 return (NFS4ERR_OPENMODE);
8637                         }
8638                         ltype = F_RDLCK;
8639                         break;
8640                 case WRITE_LT:
8641                 case WRITEW_LT:
8642                         if ((sp->rs_share_access
8643                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8644                                 rfs4_dbe_unlock(sp->rs_dbe);
8645
8646                                 return (NFS4ERR_OPENMODE);
8647                         }
8648                         ltype = F_WRLCK;
8649                         break;
8650                 }
8651         } else
8652                 ltype = F_UNLCK;
8653
8654         flock.l_type = ltype;
8655         flock.l_whence = 0;             /* SEEK_SET */
8656         flock.l_start = offset;
8657         flock.l_len = length;
8658         flock.l_sysid = sysid;
8659         flock.l_pid = lsp->rls_locker->rl_pid;
8660
8661         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8662         if (flock.l_len < 0 || flock.l_start < 0) {
8663                 rfs4_dbe_unlock(sp->rs_dbe);
8664                 return (NFS4ERR_INVAL);
8665         }
8666
8667         /*
8668          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8669          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8670          */
8671         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8672
8673         error = setlock(sp->rs_finfo->rf_vp, &flock, flag, cred);
8674         if (error == 0) {
8675                 rfs4_dbe_lock(lsp->rls_dbe);
8676                 next_stateid(&lsp->rls_lockid);
8677                 rfs4_dbe_unlock(lsp->rls_dbe);
8678         }
8679
8680         rfs4_dbe_unlock(sp->rs_dbe);
8681
8682         /*
8683          * N.B. We map error values to nfsv4 errors. This is differrent
8684          * than puterrno4 routine.
8685          */
8686         switch (error) {
8687         case 0:
8688                 status = NFS4_OK;
8689                 break;
8690         case EAGAIN:
8691         case EACCES:            /* Old value */
8692                 /* Can only get here if op is OP_LOCK */
8693                 ASSERT(resop->resop == OP_LOCK);
8694                 lres = &resop->nfs_resop4_u.oplock;
8695                 status = NFS4ERR_DENIED;
8696                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8697                     == NFS4ERR_EXPIRED)
8698                         goto retry;
8699                 break;
8700         case ENOLCK:
8701                 status = NFS4ERR_DELAY;
8702                 break;
8703         case EOVERFLOW:
8704                 status = NFS4ERR_INVAL;
8705                 break;
8706         case EINVAL:
8707                 status = NFS4ERR_NOTSUPP;
8708                 break;
8709         default:
8710                 status = NFS4ERR_SERVERFAULT;
8711                 break;
8712         }
8713
8714         return (status);
8715 }
8716
8717 /*ARGSUSED*/
8718 void
8719 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8720     struct svc_req *req, struct compound_state *cs)
8721 {
8722         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8723         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8724         nfsstat4 status;
8725         stateid4 *stateid;
8726         rfs4_lockowner_t *lo;
8727         rfs4_client_t *cp;
8728         rfs4_state_t *sp = NULL;
8729         rfs4_lo_state_t *lsp = NULL;
8730         bool_t ls_sw_held = FALSE;
8731         bool_t create = TRUE;
8732         bool_t lcreate = TRUE;
8733         bool_t dup_lock = FALSE;
8734         int rc;
8735
8736         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8737             LOCK4args *, args);
8738
8739         if (cs->vp == NULL) {
8740                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8741                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8742                     cs, LOCK4res *, resp);
8743                 return;
8744         }
8745
8746         if (args->locker.new_lock_owner) {
8747                 /* Create a new lockowner for this instance */
8748                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8749
8750                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8751
8752                 stateid = &olo->open_stateid;
8753                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8754                 if (status != NFS4_OK) {
8755                         NFS4_DEBUG(rfs4_debug,
8756                             (CE_NOTE, "Get state failed in lock %d", status));
8757                         *cs->statusp = resp->status = status;
8758                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8759                             cs, LOCK4res *, resp);
8760                         return;
8761                 }
8762
8763                 /* Ensure specified filehandle matches */
8764                 if (cs->vp != sp->rs_finfo->rf_vp) {
8765                         rfs4_state_rele(sp);
8766                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8767                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8768                             cs, LOCK4res *, resp);
8769                         return;
8770                 }
8771
8772                 /* hold off other access to open_owner while we tinker */
8773                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8774
8775                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8776                 case NFS4_CHECK_STATEID_OLD:
8777                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8778                         goto end;
8779                 case NFS4_CHECK_STATEID_BAD:
8780                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8781                         goto end;
8782                 case NFS4_CHECK_STATEID_EXPIRED:
8783                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8784                         goto end;
8785                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8786                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8787                         goto end;
8788                 case NFS4_CHECK_STATEID_CLOSED:
8789                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8790                         goto end;
8791                 case NFS4_CHECK_STATEID_OKAY:
8792                 case NFS4_CHECK_STATEID_REPLAY:
8793                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8794                             sp->rs_owner, resop)) {
8795                         case NFS4_CHKSEQ_OKAY:
8796                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8797                                         break;
8798                                 /*
8799                                  * This is replayed stateid; if seqid
8800                                  * matches next expected, then client
8801                                  * is using wrong seqid.
8802                                  */
8803                                 /* FALLTHROUGH */
8804                         case NFS4_CHKSEQ_BAD:
8805                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8806                                 goto end;
8807                         case NFS4_CHKSEQ_REPLAY:
8808                                 /* This is a duplicate LOCK request */
8809                                 dup_lock = TRUE;
8810
8811                                 /*
8812                                  * For a duplicate we do not want to
8813                                  * create a new lockowner as it should
8814                                  * already exist.
8815                                  * Turn off the lockowner create flag.
8816                                  */
8817                                 lcreate = FALSE;
8818                         }
8819                         break;
8820                 }
8821
8822                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8823                 if (lo == NULL) {
8824                         NFS4_DEBUG(rfs4_debug,
8825                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8826                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8827                         goto end;
8828                 }
8829
8830                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8831                 if (lsp == NULL) {
8832                         rfs4_update_lease(sp->rs_owner->ro_client);
8833                         /*
8834                          * Only update theh open_seqid if this is not
8835                          * a duplicate request
8836                          */
8837                         if (dup_lock == FALSE) {
8838                                 rfs4_update_open_sequence(sp->rs_owner);
8839                         }
8840
8841                         NFS4_DEBUG(rfs4_debug,
8842                             (CE_NOTE, "rfs4_op_lock: no state"));
8843                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8844                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8845                         rfs4_lockowner_rele(lo);
8846                         goto end;
8847                 }
8848
8849                 /*
8850                  * This is the new_lock_owner branch and the client is
8851                  * supposed to be associating a new lock_owner with
8852                  * the open file at this point.  If we find that a
8853                  * lock_owner/state association already exists and a
8854                  * successful LOCK request was returned to the client,
8855                  * an error is returned to the client since this is
8856                  * not appropriate.  The client should be using the
8857                  * existing lock_owner branch.
8858                  */
8859                 if (dup_lock == FALSE && create == FALSE) {
8860                         if (lsp->rls_lock_completed == TRUE) {
8861                                 *cs->statusp =
8862                                     resp->status = NFS4ERR_BAD_SEQID;
8863                                 rfs4_lockowner_rele(lo);
8864                                 goto end;
8865                         }
8866                 }
8867
8868                 rfs4_update_lease(sp->rs_owner->ro_client);
8869
8870                 /*
8871                  * Only update theh open_seqid if this is not
8872                  * a duplicate request
8873                  */
8874                 if (dup_lock == FALSE) {
8875                         rfs4_update_open_sequence(sp->rs_owner);
8876                 }
8877
8878                 /*
8879                  * If this is a duplicate lock request, just copy the
8880                  * previously saved reply and return.
8881                  */
8882                 if (dup_lock == TRUE) {
8883                         /* verify that lock_seqid's match */
8884                         if (lsp->rls_seqid != olo->lock_seqid) {
8885                                 NFS4_DEBUG(rfs4_debug,
8886                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8887                                     "lsp->seqid=%d old->seqid=%d",
8888                                     lsp->rls_seqid, olo->lock_seqid));
8889                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8890                         } else {
8891                                 rfs4_copy_reply(resop, &lsp->rls_reply);
8892                                 /*
8893                                  * Make sure to copy the just
8894                                  * retrieved reply status into the
8895                                  * overall compound status
8896                                  */
8897                                 *cs->statusp = resp->status;
8898                         }
8899                         rfs4_lockowner_rele(lo);
8900                         goto end;
8901                 }
8902
8903                 rfs4_dbe_lock(lsp->rls_dbe);
8904
8905                 /* Make sure to update the lock sequence id */
8906                 lsp->rls_seqid = olo->lock_seqid;
8907
8908                 NFS4_DEBUG(rfs4_debug,
8909                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8910
8911                 /*
8912                  * This is used to signify the newly created lockowner
8913                  * stateid and its sequence number.  The checks for
8914                  * sequence number and increment don't occur on the
8915                  * very first lock request for a lockowner.
8916                  */
8917                 lsp->rls_skip_seqid_check = TRUE;
8918
8919                 /* hold off other access to lsp while we tinker */
8920                 rfs4_sw_enter(&lsp->rls_sw);
8921                 ls_sw_held = TRUE;
8922
8923                 rfs4_dbe_unlock(lsp->rls_dbe);
8924
8925                 rfs4_lockowner_rele(lo);
8926         } else {
8927                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8928                 /* get lsp and hold the lock on the underlying file struct */
8929                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8930                     != NFS4_OK) {
8931                         *cs->statusp = resp->status = status;
8932                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8933                             cs, LOCK4res *, resp);
8934                         return;
8935                 }
8936                 create = FALSE; /* We didn't create lsp */
8937
8938                 /* Ensure specified filehandle matches */
8939                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8940                         rfs4_lo_state_rele(lsp, TRUE);
8941                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8942                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8943                             cs, LOCK4res *, resp);
8944                         return;
8945                 }
8946
8947                 /* hold off other access to lsp while we tinker */
8948                 rfs4_sw_enter(&lsp->rls_sw);
8949                 ls_sw_held = TRUE;
8950
8951                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8952                 /*
8953                  * The stateid looks like it was okay (expected to be
8954                  * the next one)
8955                  */
8956                 case NFS4_CHECK_STATEID_OKAY:
8957                         /*
8958                          * The sequence id is now checked.  Determine
8959                          * if this is a replay or if it is in the
8960                          * expected (next) sequence.  In the case of a
8961                          * replay, there are two replay conditions
8962                          * that may occur.  The first is the normal
8963                          * condition where a LOCK is done with a
8964                          * NFS4_OK response and the stateid is
8965                          * updated.  That case is handled below when
8966                          * the stateid is identified as a REPLAY.  The
8967                          * second is the case where an error is
8968                          * returned, like NFS4ERR_DENIED, and the
8969                          * sequence number is updated but the stateid
8970                          * is not updated.  This second case is dealt
8971                          * with here.  So it may seem odd that the
8972                          * stateid is okay but the sequence id is a
8973                          * replay but it is okay.
8974                          */
8975                         switch (rfs4_check_lock_seqid(
8976                             args->locker.locker4_u.lock_owner.lock_seqid,
8977                             lsp, resop)) {
8978                         case NFS4_CHKSEQ_REPLAY:
8979                                 if (resp->status != NFS4_OK) {
8980                                         /*
8981                                          * Here is our replay and need
8982                                          * to verify that the last
8983                                          * response was an error.
8984                                          */
8985                                         *cs->statusp = resp->status;
8986                                         goto end;
8987                                 }
8988                                 /*
8989                                  * This is done since the sequence id
8990                                  * looked like a replay but it didn't
8991                                  * pass our check so a BAD_SEQID is
8992                                  * returned as a result.
8993                                  */
8994                                 /*FALLTHROUGH*/
8995                         case NFS4_CHKSEQ_BAD:
8996                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8997                                 goto end;
8998                         case NFS4_CHKSEQ_OKAY:
8999                                 /* Everything looks okay move ahead */
9000                                 break;
9001                         }
9002                         break;
9003                 case NFS4_CHECK_STATEID_OLD:
9004                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9005                         goto end;
9006                 case NFS4_CHECK_STATEID_BAD:
9007                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9008                         goto end;
9009                 case NFS4_CHECK_STATEID_EXPIRED:
9010                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9011                         goto end;
9012                 case NFS4_CHECK_STATEID_CLOSED:
9013                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9014                         goto end;
9015                 case NFS4_CHECK_STATEID_REPLAY:
9016                         switch (rfs4_check_lock_seqid(
9017                             args->locker.locker4_u.lock_owner.lock_seqid,
9018                             lsp, resop)) {
9019                         case NFS4_CHKSEQ_OKAY:
9020                                 /*
9021                                  * This is a replayed stateid; if
9022                                  * seqid matches the next expected,
9023                                  * then client is using wrong seqid.
9024                                  */
9025                         case NFS4_CHKSEQ_BAD:
9026                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9027                                 goto end;
9028                         case NFS4_CHKSEQ_REPLAY:
9029                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9030                                 *cs->statusp = status = resp->status;
9031                                 goto end;
9032                         }
9033                         break;
9034                 default:
9035                         ASSERT(FALSE);
9036                         break;
9037                 }
9038
9039                 rfs4_update_lock_sequence(lsp);
9040                 rfs4_update_lease(lsp->rls_locker->rl_client);
9041         }
9042
9043         /*
9044          * NFS4 only allows locking on regular files, so
9045          * verify type of object.
9046          */
9047         if (cs->vp->v_type != VREG) {
9048                 if (cs->vp->v_type == VDIR)
9049                         status = NFS4ERR_ISDIR;
9050                 else
9051                         status = NFS4ERR_INVAL;
9052                 goto out;
9053         }
9054
9055         cp = lsp->rls_state->rs_owner->ro_client;
9056
9057         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9058                 status = NFS4ERR_GRACE;
9059                 goto out;
9060         }
9061
9062         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9063                 status = NFS4ERR_NO_GRACE;
9064                 goto out;
9065         }
9066
9067         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9068                 status = NFS4ERR_NO_GRACE;
9069                 goto out;
9070         }
9071
9072         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9073                 cs->deleg = TRUE;
9074
9075         status = rfs4_do_lock(lsp, args->locktype,
9076             args->offset, args->length, cs->cr, resop);
9077
9078 out:
9079         lsp->rls_skip_seqid_check = FALSE;
9080
9081         *cs->statusp = resp->status = status;
9082
9083         if (status == NFS4_OK) {
9084                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9085                 lsp->rls_lock_completed = TRUE;
9086         }
9087         /*
9088          * Only update the "OPEN" response here if this was a new
9089          * lock_owner
9090          */
9091         if (sp)
9092                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9093
9094         rfs4_update_lock_resp(lsp, resop);
9095
9096 end:
9097         if (lsp) {
9098                 if (ls_sw_held)
9099                         rfs4_sw_exit(&lsp->rls_sw);
9100                 /*
9101                  * If an sp obtained, then the lsp does not represent
9102                  * a lock on the file struct.
9103                  */
9104                 if (sp != NULL)
9105                         rfs4_lo_state_rele(lsp, FALSE);
9106                 else
9107                         rfs4_lo_state_rele(lsp, TRUE);
9108         }
9109         if (sp) {
9110                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9111                 rfs4_state_rele(sp);
9112         }
9113
9114         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9115             LOCK4res *, resp);
9116 }
9117
9118 /* free function for LOCK/LOCKT */
9119 static void
9120 lock_denied_free(nfs_resop4 *resop)
9121 {
9122         LOCK4denied *dp = NULL;
9123
9124         switch (resop->resop) {
9125         case OP_LOCK:
9126                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9127                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9128                 break;
9129         case OP_LOCKT:
9130                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9131                         dp = &resop->nfs_resop4_u.oplockt.denied;
9132                 break;
9133         default:
9134                 break;
9135         }
9136
9137         if (dp)
9138                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9139 }
9140
9141 /*ARGSUSED*/
9142 void
9143 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9144     struct svc_req *req, struct compound_state *cs)
9145 {
9146         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9147         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9148         nfsstat4 status;
9149         stateid4 *stateid = &args->lock_stateid;
9150         rfs4_lo_state_t *lsp;
9151
9152         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9153             LOCKU4args *, args);
9154
9155         if (cs->vp == NULL) {
9156                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9157                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9158                     LOCKU4res *, resp);
9159                 return;
9160         }
9161
9162         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9163                 *cs->statusp = resp->status = status;
9164                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9165                     LOCKU4res *, resp);
9166                 return;
9167         }
9168
9169         /* Ensure specified filehandle matches */
9170         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9171                 rfs4_lo_state_rele(lsp, TRUE);
9172                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9173                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9174                     LOCKU4res *, resp);
9175                 return;
9176         }
9177
9178         /* hold off other access to lsp while we tinker */
9179         rfs4_sw_enter(&lsp->rls_sw);
9180
9181         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9182         case NFS4_CHECK_STATEID_OKAY:
9183                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9184                     != NFS4_CHKSEQ_OKAY) {
9185                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9186                         goto end;
9187                 }
9188                 break;
9189         case NFS4_CHECK_STATEID_OLD:
9190                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9191                 goto end;
9192         case NFS4_CHECK_STATEID_BAD:
9193                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9194                 goto end;
9195         case NFS4_CHECK_STATEID_EXPIRED:
9196                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9197                 goto end;
9198         case NFS4_CHECK_STATEID_CLOSED:
9199                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9200                 goto end;
9201         case NFS4_CHECK_STATEID_REPLAY:
9202                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9203                 case NFS4_CHKSEQ_OKAY:
9204                                 /*
9205                                  * This is a replayed stateid; if
9206                                  * seqid matches the next expected,
9207                                  * then client is using wrong seqid.
9208                                  */
9209                 case NFS4_CHKSEQ_BAD:
9210                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9211                         goto end;
9212                 case NFS4_CHKSEQ_REPLAY:
9213                         rfs4_update_lease(lsp->rls_locker->rl_client);
9214                         *cs->statusp = status = resp->status;
9215                         goto end;
9216                 }
9217                 break;
9218         default:
9219                 ASSERT(FALSE);
9220                 break;
9221         }
9222
9223         rfs4_update_lock_sequence(lsp);
9224         rfs4_update_lease(lsp->rls_locker->rl_client);
9225
9226         /*
9227          * NFS4 only allows locking on regular files, so
9228          * verify type of object.
9229          */
9230         if (cs->vp->v_type != VREG) {
9231                 if (cs->vp->v_type == VDIR)
9232                         status = NFS4ERR_ISDIR;
9233                 else
9234                         status = NFS4ERR_INVAL;
9235                 goto out;
9236         }
9237
9238         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9239                 status = NFS4ERR_GRACE;
9240                 goto out;
9241         }
9242
9243         status = rfs4_do_lock(lsp, args->locktype,
9244             args->offset, args->length, cs->cr, resop);
9245
9246 out:
9247         *cs->statusp = resp->status = status;
9248
9249         if (status == NFS4_OK)
9250                 resp->lock_stateid = lsp->rls_lockid.stateid;
9251
9252         rfs4_update_lock_resp(lsp, resop);
9253
9254 end:
9255         rfs4_sw_exit(&lsp->rls_sw);
9256         rfs4_lo_state_rele(lsp, TRUE);
9257
9258         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9259             LOCKU4res *, resp);
9260 }
9261
9262 /*
9263  * LOCKT is a best effort routine, the client can not be guaranteed that
9264  * the status return is still in effect by the time the reply is received.
9265  * They are numerous race conditions in this routine, but we are not required
9266  * and can not be accurate.
9267  */
9268 /*ARGSUSED*/
9269 void
9270 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9271     struct svc_req *req, struct compound_state *cs)
9272 {
9273         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9274         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9275         rfs4_lockowner_t *lo;
9276         rfs4_client_t *cp;
9277         bool_t create = FALSE;
9278         struct flock64 flk;
9279         int error;
9280         int flag = FREAD | FWRITE;
9281         int ltype;
9282         length4 posix_length;
9283         sysid_t sysid;
9284         pid_t pid;
9285
9286         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9287             LOCKT4args *, args);
9288
9289         if (cs->vp == NULL) {
9290                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9291                 goto out;
9292         }
9293
9294         /*
9295          * NFS4 only allows locking on regular files, so
9296          * verify type of object.
9297          */
9298         if (cs->vp->v_type != VREG) {
9299                 if (cs->vp->v_type == VDIR)
9300                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9301                 else
9302                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9303                 goto out;
9304         }
9305
9306         /*
9307          * Check out the clientid to ensure the server knows about it
9308          * so that we correctly inform the client of a server reboot.
9309          */
9310         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9311             == NULL) {
9312                 *cs->statusp = resp->status =
9313                     rfs4_check_clientid(&args->owner.clientid, 0);
9314                 goto out;
9315         }
9316         if (rfs4_lease_expired(cp)) {
9317                 rfs4_client_close(cp);
9318                 /*
9319                  * Protocol doesn't allow returning NFS4ERR_STALE as
9320                  * other operations do on this check so STALE_CLIENTID
9321                  * is returned instead
9322                  */
9323                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9324                 goto out;
9325         }
9326
9327         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9328                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9329                 rfs4_client_rele(cp);
9330                 goto out;
9331         }
9332         rfs4_client_rele(cp);
9333
9334         resp->status = NFS4_OK;
9335
9336         switch (args->locktype) {
9337         case READ_LT:
9338         case READW_LT:
9339                 ltype = F_RDLCK;
9340                 break;
9341         case WRITE_LT:
9342         case WRITEW_LT:
9343                 ltype = F_WRLCK;
9344                 break;
9345         }
9346
9347         posix_length = args->length;
9348         /* Check for zero length. To lock to end of file use all ones for V4 */
9349         if (posix_length == 0) {
9350                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9351                 goto out;
9352         } else if (posix_length == (length4)(~0)) {
9353                 posix_length = 0;       /* Posix to end of file  */
9354         }
9355
9356         /* Find or create a lockowner */
9357         lo = rfs4_findlockowner(&args->owner, &create);
9358
9359         if (lo) {
9360                 pid = lo->rl_pid;
9361                 if ((resp->status =
9362                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9363                         goto err;
9364         } else {
9365                 pid = 0;
9366                 sysid = lockt_sysid;
9367         }
9368 retry:
9369         flk.l_type = ltype;
9370         flk.l_whence = 0;               /* SEEK_SET */
9371         flk.l_start = args->offset;
9372         flk.l_len = posix_length;
9373         flk.l_sysid = sysid;
9374         flk.l_pid = pid;
9375         flag |= F_REMOTELOCK;
9376
9377         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9378
9379         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9380         if (flk.l_len < 0 || flk.l_start < 0) {
9381                 resp->status = NFS4ERR_INVAL;
9382                 goto err;
9383         }
9384         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9385             NULL, cs->cr, NULL);
9386
9387         /*
9388          * N.B. We map error values to nfsv4 errors. This is differrent
9389          * than puterrno4 routine.
9390          */
9391         switch (error) {
9392         case 0:
9393                 if (flk.l_type == F_UNLCK)
9394                         resp->status = NFS4_OK;
9395                 else {
9396                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9397                                 goto retry;
9398                         resp->status = NFS4ERR_DENIED;
9399                 }
9400                 break;
9401         case EOVERFLOW:
9402                 resp->status = NFS4ERR_INVAL;
9403                 break;
9404         case EINVAL:
9405                 resp->status = NFS4ERR_NOTSUPP;
9406                 break;
9407         default:
9408                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9409                     error);
9410                 resp->status = NFS4ERR_SERVERFAULT;
9411                 break;
9412         }
9413
9414 err:
9415         if (lo)
9416                 rfs4_lockowner_rele(lo);
9417         *cs->statusp = resp->status;
9418 out:
9419         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9420             LOCKT4res *, resp);
9421 }
9422
9423 int
9424 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9425 {
9426         int err;
9427         int cmd;
9428         vnode_t *vp;
9429         struct shrlock shr;
9430         struct shr_locowner shr_loco;
9431         int fflags = 0;
9432
9433         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9434         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9435
9436         if (sp->rs_closed)
9437                 return (NFS4ERR_OLD_STATEID);
9438
9439         vp = sp->rs_finfo->rf_vp;
9440         ASSERT(vp);
9441
9442         shr.s_access = shr.s_deny = 0;
9443
9444         if (access & OPEN4_SHARE_ACCESS_READ) {
9445                 fflags |= FREAD;
9446                 shr.s_access |= F_RDACC;
9447         }
9448         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9449                 fflags |= FWRITE;
9450                 shr.s_access |= F_WRACC;
9451         }
9452         ASSERT(shr.s_access);
9453
9454         if (deny & OPEN4_SHARE_DENY_READ)
9455                 shr.s_deny |= F_RDDNY;
9456         if (deny & OPEN4_SHARE_DENY_WRITE)
9457                 shr.s_deny |= F_WRDNY;
9458
9459         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9460         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9461         shr_loco.sl_pid = shr.s_pid;
9462         shr_loco.sl_id = shr.s_sysid;
9463         shr.s_owner = (caddr_t)&shr_loco;
9464         shr.s_own_len = sizeof (shr_loco);
9465
9466         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9467
9468         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9469         if (err != 0) {
9470                 if (err == EAGAIN)
9471                         err = NFS4ERR_SHARE_DENIED;
9472                 else
9473                         err = puterrno4(err);
9474                 return (err);
9475         }
9476
9477         sp->rs_share_access |= access;
9478         sp->rs_share_deny |= deny;
9479
9480         return (0);
9481 }
9482
9483 int
9484 rfs4_unshare(rfs4_state_t *sp)
9485 {
9486         int err;
9487         struct shrlock shr;
9488         struct shr_locowner shr_loco;
9489
9490         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9491
9492         if (sp->rs_closed || sp->rs_share_access == 0)
9493                 return (0);
9494
9495         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9496         ASSERT(sp->rs_finfo->rf_vp);
9497
9498         shr.s_access = shr.s_deny = 0;
9499         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9500         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9501         shr_loco.sl_pid = shr.s_pid;
9502         shr_loco.sl_id = shr.s_sysid;
9503         shr.s_owner = (caddr_t)&shr_loco;
9504         shr.s_own_len = sizeof (shr_loco);
9505
9506         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9507             NULL);
9508         if (err != 0) {
9509                 err = puterrno4(err);
9510                 return (err);
9511         }
9512
9513         sp->rs_share_access = 0;
9514         sp->rs_share_deny = 0;
9515
9516         return (0);
9517
9518 }
9519
9520 static int
9521 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9522 {
9523         struct clist    *wcl;
9524         count4          count = rok->data_len;
9525         int             wlist_len;
9526
9527         wcl = args->wlist;
9528         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9529                 return (FALSE);
9530         }
9531         wcl = args->wlist;
9532         rok->wlist_len = wlist_len;
9533         rok->wlist = wcl;
9534         return (TRUE);
9535 }
9536
9537 /* tunable to disable server referrals */
9538 int rfs4_no_referrals = 0;
9539
9540 /*
9541  * Find an NFS record in reparse point data.
9542  * Returns 0 for success and <0 or an errno value on failure.
9543  */
9544 int
9545 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9546 {
9547         int err;
9548         char *stype, *val;
9549         nvlist_t *nvl;
9550         nvpair_t *curr;
9551
9552         if ((nvl = reparse_init()) == NULL)
9553                 return (-1);
9554
9555         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9556                 reparse_free(nvl);
9557                 return (err);
9558         }
9559
9560         curr = NULL;
9561         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9562                 if ((stype = nvpair_name(curr)) == NULL) {
9563                         reparse_free(nvl);
9564                         return (-2);
9565                 }
9566                 if (strncasecmp(stype, "NFS", 3) == 0)
9567                         break;
9568         }
9569
9570         if ((curr == NULL) ||
9571             (nvpair_value_string(curr, &val))) {
9572                 reparse_free(nvl);
9573                 return (-3);
9574         }
9575         *nvlp = nvl;
9576         *svcp = stype;
9577         *datap = val;
9578         return (0);
9579 }
9580
9581 int
9582 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9583 {
9584         nvlist_t *nvl;
9585         char *s, *d;
9586
9587         if (rfs4_no_referrals != 0)
9588                 return (B_FALSE);
9589
9590         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9591                 return (B_FALSE);
9592
9593         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9594                 return (B_FALSE);
9595
9596         reparse_free(nvl);
9597
9598         return (B_TRUE);
9599 }
9600
9601 /*
9602  * There is a user-level copy of this routine in ref_subr.c.
9603  * Changes should be kept in sync.
9604  */
9605 static int
9606 nfs4_create_components(char *path, component4 *comp4)
9607 {
9608         int slen, plen, ncomp;
9609         char *ori_path, *nxtc, buf[MAXNAMELEN];
9610
9611         if (path == NULL)
9612                 return (0);
9613
9614         plen = strlen(path) + 1;        /* include the terminator */
9615         ori_path = path;
9616         ncomp = 0;
9617
9618         /* count number of components in the path */
9619         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9620                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9621                         if ((slen = nxtc - path) == 0) {
9622                                 path = nxtc + 1;
9623                                 continue;
9624                         }
9625
9626                         if (comp4 != NULL) {
9627                                 bcopy(path, buf, slen);
9628                                 buf[slen] = '\0';
9629                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9630                         }
9631
9632                         ncomp++;        /* 1 valid component */
9633                         path = nxtc + 1;
9634                 }
9635                 if (*nxtc == '\0' || *nxtc == '\n')
9636                         break;
9637         }
9638
9639         return (ncomp);
9640 }
9641
9642 /*
9643  * There is a user-level copy of this routine in ref_subr.c.
9644  * Changes should be kept in sync.
9645  */
9646 static int
9647 make_pathname4(char *path, pathname4 *pathname)
9648 {
9649         int ncomp;
9650         component4 *comp4;
9651
9652         if (pathname == NULL)
9653                 return (0);
9654
9655         if (path == NULL) {
9656                 pathname->pathname4_val = NULL;
9657                 pathname->pathname4_len = 0;
9658                 return (0);
9659         }
9660
9661         /* count number of components to alloc buffer */
9662         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9663                 pathname->pathname4_val = NULL;
9664                 pathname->pathname4_len = 0;
9665                 return (0);
9666         }
9667         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9668
9669         /* copy components into allocated buffer */
9670         ncomp = nfs4_create_components(path, comp4);
9671
9672         pathname->pathname4_val = comp4;
9673         pathname->pathname4_len = ncomp;
9674
9675         return (ncomp);
9676 }
9677
9678 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9679
9680 fs_locations4 *
9681 fetch_referral(vnode_t *vp, cred_t *cr)
9682 {
9683         nvlist_t *nvl;
9684         char *stype, *sdata;
9685         fs_locations4 *result;
9686         char buf[1024];
9687         size_t bufsize;
9688         XDR xdr;
9689         int err;
9690
9691         /*
9692          * Check attrs to ensure it's a reparse point
9693          */
9694         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9695                 return (NULL);
9696
9697         /*
9698          * Look for an NFS record and get the type and data
9699          */
9700         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9701                 return (NULL);
9702
9703         /*
9704          * With the type and data, upcall to get the referral
9705          */
9706         bufsize = sizeof (buf);
9707         bzero(buf, sizeof (buf));
9708         err = reparse_kderef((const char *)stype, (const char *)sdata,
9709             buf, &bufsize);
9710         reparse_free(nvl);
9711
9712         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9713             char *, stype, char *, sdata, char *, buf, int, err);
9714         if (err) {
9715                 cmn_err(CE_NOTE,
9716                     "reparsed daemon not running: unable to get referral (%d)",
9717                     err);
9718                 return (NULL);
9719         }
9720
9721         /*
9722          * We get an XDR'ed record back from the kderef call
9723          */
9724         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9725         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9726         err = xdr_fs_locations4(&xdr, result);
9727         XDR_DESTROY(&xdr);
9728         if (err != TRUE) {
9729                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9730                     int, err);
9731                 return (NULL);
9732         }
9733
9734         /*
9735          * Look at path to recover fs_root, ignoring the leading '/'
9736          */
9737         (void) make_pathname4(vp->v_path, &result->fs_root);
9738
9739         return (result);
9740 }
9741
9742 char *
9743 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9744 {
9745         fs_locations4 *fsl;
9746         fs_location4 *fs;
9747         char *server, *path, *symbuf;
9748         static char *prefix = "/net/";
9749         int i, size, npaths;
9750         uint_t len;
9751
9752         /* Get the referral */
9753         if ((fsl = fetch_referral(vp, cr)) == NULL)
9754                 return (NULL);
9755
9756         /* Deal with only the first location and first server */
9757         fs = &fsl->locations_val[0];
9758         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9759         if (server == NULL) {
9760                 rfs4_free_fs_locations4(fsl);
9761                 kmem_free(fsl, sizeof (fs_locations4));
9762                 return (NULL);
9763         }
9764
9765         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9766         size = strlen(prefix) + len;
9767         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9768                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9769
9770         /* Allocate the symlink buffer and fill it */
9771         symbuf = kmem_zalloc(size, KM_SLEEP);
9772         (void) strcat(symbuf, prefix);
9773         (void) strcat(symbuf, server);
9774         kmem_free(server, len);
9775
9776         npaths = 0;
9777         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9778                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9779                 if (path == NULL)
9780                         continue;
9781                 (void) strcat(symbuf, "/");
9782                 (void) strcat(symbuf, path);
9783                 npaths++;
9784                 kmem_free(path, len);
9785         }
9786
9787         rfs4_free_fs_locations4(fsl);
9788         kmem_free(fsl, sizeof (fs_locations4));
9789
9790         if (strsz != NULL)
9791                 *strsz = size;
9792         return (symbuf);
9793 }
9794
9795 /*
9796  * Check to see if we have a downrev Solaris client, so that we
9797  * can send it a symlink instead of a referral.
9798  */
9799 int
9800 client_is_downrev(struct svc_req *req)
9801 {
9802         struct sockaddr *ca;
9803         rfs4_clntip_t *ci;
9804         bool_t create = FALSE;
9805         int is_downrev;
9806
9807         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9808         ASSERT(ca);
9809         ci = rfs4_find_clntip(ca, &create);
9810         if (ci == NULL)
9811                 return (0);
9812         is_downrev = ci->ri_no_referrals;
9813         rfs4_dbe_rele(ci->ri_dbe);
9814         return (is_downrev);
9815 }