usr/src/uts/common/fs/nfs/nfs4_srv.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24
  25 /*
  26  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  27  *      All Rights Reserved
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/types.h>
  32 #include <sys/systm.h>
  33 #include <sys/cred.h>
  34 #include <sys/buf.h>
  35 #include <sys/vfs.h>
  36 #include <sys/vfs_opreg.h>
  37 #include <sys/vnode.h>
  38 #include <sys/uio.h>
  39 #include <sys/errno.h>
  40 #include <sys/sysmacros.h>
  41 #include <sys/statvfs.h>
  42 #include <sys/kmem.h>
  43 #include <sys/dirent.h>
  44 #include <sys/cmn_err.h>
  45 #include <sys/debug.h>
  46 #include <sys/systeminfo.h>
  47 #include <sys/flock.h>
  48 #include <sys/pathname.h>
  49 #include <sys/nbmlock.h>
  50 #include <sys/share.h>
  51 #include <sys/atomic.h>
  52 #include <sys/policy.h>
  53 #include <sys/fem.h>
  54 #include <sys/sdt.h>
  55 #include <sys/ddi.h>
  56 #include <sys/zone.h>
  57
  58 #include <fs/fs_reparse.h>
  59
  60 #include <rpc/types.h>
  61 #include <rpc/auth.h>
  62 #include <rpc/rpcsec_gss.h>
  63 #include <rpc/svc.h>
  64
  65 #include <nfs/nfs.h>
  66 #include <nfs/export.h>
  67 #include <nfs/nfs_cmd.h>
  68 #include <nfs/lm.h>
  69 #include <nfs/nfs4.h>
  70
  71 #include <sys/strsubr.h>
  72 #include <sys/strsun.h>
  73
  74 #include <inet/common.h>
  75 #include <inet/ip.h>
  76 #include <inet/ip6.h>
  77
  78 #include <sys/tsol/label.h>
  79 #include <sys/tsol/tndb.h>
  80
  81 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  82 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  83 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  84 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  85 extern struct svc_ops rdma_svc_ops;
  86 extern int nfs_loaned_buffers;
  87 /* End of Tunables */
  88
  89 static int rdma_setup_read_data4(READ4args *, READ4res *);
  90
  91 /*
  92  * Used to bump the stateid4.seqid value and show changes in the stateid
  93  */
  94 #define next_stateid(sp) (++(sp)->bits.chgseq)
  95
  96 /*
  97  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
  98  *      This is used to return NFS4ERR_TOOSMALL when clients specify
  99  *      maxcount that isn't large enough to hold the smallest possible
 100  *      XDR encoded dirent.
 101  *
 102  *          sizeof cookie (8 bytes) +
 103  *          sizeof name_len (4 bytes) +
 104  *          sizeof smallest (padded) name (4 bytes) +
 105  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 106  *          sizeof attrlist4_len (4 bytes) +
 107  *          sizeof next boolean (4 bytes)
 108  *
 109  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 110  * the smallest possible entry4 (assumes no attrs requested).
 111  *      sizeof nfsstat4 (4 bytes) +
 112  *      sizeof verifier4 (8 bytes) +
 113  *      sizeof entry4list bool (4 bytes) +
 114  *      sizeof entry4   (36 bytes) +
 115  *      sizeof eof bool  (4 bytes)
 116  *
 117  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 118  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 119  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 120  *      required for a given name length.  MAXNAMELEN is the maximum
 121  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 122  *      macros are to allow for . and .. entries -- just a minor tweak to try
 123  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 124  *      to hold ., .., and the largest possible solaris dirent64.
 125  */
 126 #define RFS4_MINLEN_ENTRY4 36
 127 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 128 #define RFS4_MINLEN_RDDIR_BUF \
 129         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 130
 131 /*
 132  * It would be better to pad to 4 bytes since that's what XDR would do,
 133  * but the dirents UFS gives us are already padded to 8, so just take
 134  * what we're given.  Dircount is only a hint anyway.  Currently the
 135  * solaris kernel is ASCII only, so there's no point in calling the
 136  * UTF8 functions.
 137  *
 138  * dirent64: named padded to provide 8 byte struct alignment
 139  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 140  *
 141  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 142  *
 143  */
 144 #define DIRENT64_TO_DIRCOUNT(dp) \
 145         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 146
 147 time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 148
 149 static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
 150
 151 u_longlong_t    nfs4_srv_caller_id;
 152 uint_t          nfs4_srv_vkey = 0;
 153
 154 verifier4       Write4verf;
 155 verifier4       Readdir4verf;
 156
 157 void    rfs4_init_compound_state(struct compound_state *);
 158
 159 static void     nullfree(caddr_t);
 160 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 161                         struct compound_state *);
 162 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 163                         struct compound_state *);
 164 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 165                         struct compound_state *);
 166 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 167                         struct compound_state *);
 168 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 169                         struct compound_state *);
 170 static void     rfs4_op_create_free(nfs_resop4 *resop);
 171 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 172                         struct svc_req *, struct compound_state *);
 173 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 174                         struct svc_req *, struct compound_state *);
 175 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 176                         struct compound_state *);
 177 static void     rfs4_op_getattr_free(nfs_resop4 *);
 178 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 179                         struct compound_state *);
 180 static void     rfs4_op_getfh_free(nfs_resop4 *);
 181 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 182                         struct compound_state *);
 183 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 184                         struct compound_state *);
 185 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 186                         struct compound_state *);
 187 static void     lock_denied_free(nfs_resop4 *);
 188 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 189                         struct compound_state *);
 190 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 191                         struct compound_state *);
 192 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 193                         struct compound_state *);
 194 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 195                         struct compound_state *);
 196 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 197                                 struct svc_req *req, struct compound_state *cs);
 198 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 199                         struct compound_state *);
 200 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 201                         struct compound_state *);
 202 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 203                         struct svc_req *, struct compound_state *);
 204 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 205                         struct svc_req *, struct compound_state *);
 206 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 207                         struct compound_state *);
 208 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 209                         struct compound_state *);
 210 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 211                         struct compound_state *);
 212 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 213                         struct compound_state *);
 214 static void     rfs4_op_read_free(nfs_resop4 *);
 215 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 216 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 217                         struct compound_state *);
 218 static void     rfs4_op_readlink_free(nfs_resop4 *);
 219 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 220                         struct svc_req *, struct compound_state *);
 221 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 222                         struct compound_state *);
 223 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 224                         struct compound_state *);
 225 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 226                         struct compound_state *);
 227 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 228                         struct compound_state *);
 229 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 230                         struct compound_state *);
 231 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 232                         struct compound_state *);
 233 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 234                         struct compound_state *);
 235 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 236                         struct compound_state *);
 237 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 238                         struct svc_req *, struct compound_state *);
 239 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 240                         struct svc_req *req, struct compound_state *);
 241 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 242                         struct compound_state *);
 243 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 244
 245 static nfsstat4 check_open_access(uint32_t,
 246                                 struct compound_state *, struct svc_req *);
 247 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 248 void rfs4_ss_clid(rfs4_client_t *);
 249
 250 /*
 251  * translation table for attrs
 252  */
 253 struct nfs4_ntov_table {
 254         union nfs4_attr_u *na;
 255         uint8_t amap[NFS4_MAXNUM_ATTRS];
 256         int attrcnt;
 257         bool_t vfsstat;
 258 };
 259
 260 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 261 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 262                                     struct nfs4_svgetit_arg *sargp);
 263
 264 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 265                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 266                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 267
 268 fem_t           *deleg_rdops;
 269 fem_t           *deleg_wrops;
 270
 271 rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 272 kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 273 int             rfs4_seen_first_compound;       /* set first time we see one */
 274
 275 /*
 276  * NFS4 op dispatch table
 277  */
 278
 279 struct rfsv4disp {
 280         void    (*dis_proc)();          /* proc to call */
 281         void    (*dis_resfree)();       /* frees space allocated by proc */
 282         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 283 };
 284
 285 static struct rfsv4disp rfsv4disptab[] = {
 286         /*
 287          * NFS VERSION 4
 288          */
 289
 290         /* RFS_NULL = 0 */
 291         {rfs4_op_illegal, nullfree, 0},
 292
 293         /* UNUSED = 1 */
 294         {rfs4_op_illegal, nullfree, 0},
 295
 296         /* UNUSED = 2 */
 297         {rfs4_op_illegal, nullfree, 0},
 298
 299         /* OP_ACCESS = 3 */
 300         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 301
 302         /* OP_CLOSE = 4 */
 303         {rfs4_op_close, nullfree, 0},
 304
 305         /* OP_COMMIT = 5 */
 306         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 307
 308         /* OP_CREATE = 6 */
 309         {rfs4_op_create, nullfree, 0},
 310
 311         /* OP_DELEGPURGE = 7 */
 312         {rfs4_op_delegpurge, nullfree, 0},
 313
 314         /* OP_DELEGRETURN = 8 */
 315         {rfs4_op_delegreturn, nullfree, 0},
 316
 317         /* OP_GETATTR = 9 */
 318         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 319
 320         /* OP_GETFH = 10 */
 321         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 322
 323         /* OP_LINK = 11 */
 324         {rfs4_op_link, nullfree, 0},
 325
 326         /* OP_LOCK = 12 */
 327         {rfs4_op_lock, lock_denied_free, 0},
 328
 329         /* OP_LOCKT = 13 */
 330         {rfs4_op_lockt, lock_denied_free, 0},
 331
 332         /* OP_LOCKU = 14 */
 333         {rfs4_op_locku, nullfree, 0},
 334
 335         /* OP_LOOKUP = 15 */
 336         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 337
 338         /* OP_LOOKUPP = 16 */
 339         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 340
 341         /* OP_NVERIFY = 17 */
 342         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 343
 344         /* OP_OPEN = 18 */
 345         {rfs4_op_open, rfs4_free_reply, 0},
 346
 347         /* OP_OPENATTR = 19 */
 348         {rfs4_op_openattr, nullfree, 0},
 349
 350         /* OP_OPEN_CONFIRM = 20 */
 351         {rfs4_op_open_confirm, nullfree, 0},
 352
 353         /* OP_OPEN_DOWNGRADE = 21 */
 354         {rfs4_op_open_downgrade, nullfree, 0},
 355
 356         /* OP_OPEN_PUTFH = 22 */
 357         {rfs4_op_putfh, nullfree, RPC_ALL},
 358
 359         /* OP_PUTPUBFH = 23 */
 360         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 361
 362         /* OP_PUTROOTFH = 24 */
 363         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 364
 365         /* OP_READ = 25 */
 366         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 367
 368         /* OP_READDIR = 26 */
 369         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 370
 371         /* OP_READLINK = 27 */
 372         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 373
 374         /* OP_REMOVE = 28 */
 375         {rfs4_op_remove, nullfree, 0},
 376
 377         /* OP_RENAME = 29 */
 378         {rfs4_op_rename, nullfree, 0},
 379
 380         /* OP_RENEW = 30 */
 381         {rfs4_op_renew, nullfree, 0},
 382
 383         /* OP_RESTOREFH = 31 */
 384         {rfs4_op_restorefh, nullfree, RPC_ALL},
 385
 386         /* OP_SAVEFH = 32 */
 387         {rfs4_op_savefh, nullfree, RPC_ALL},
 388
 389         /* OP_SECINFO = 33 */
 390         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 391
 392         /* OP_SETATTR = 34 */
 393         {rfs4_op_setattr, nullfree, 0},
 394
 395         /* OP_SETCLIENTID = 35 */
 396         {rfs4_op_setclientid, nullfree, 0},
 397
 398         /* OP_SETCLIENTID_CONFIRM = 36 */
 399         {rfs4_op_setclientid_confirm, nullfree, 0},
 400
 401         /* OP_VERIFY = 37 */
 402         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 403
 404         /* OP_WRITE = 38 */
 405         {rfs4_op_write, nullfree, 0},
 406
 407         /* OP_RELEASE_LOCKOWNER = 39 */
 408         {rfs4_op_release_lockowner, nullfree, 0},
 409 };
 410
 411 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 412
 413 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 414
 415 #ifdef DEBUG
 416
 417 int             rfs4_fillone_debug = 0;
 418 int             rfs4_no_stub_access = 1;
 419 int             rfs4_rddir_debug = 0;
 420
 421 static char    *rfs4_op_string[] = {
 422         "rfs4_op_null",
 423         "rfs4_op_1 unused",
 424         "rfs4_op_2 unused",
 425         "rfs4_op_access",
 426         "rfs4_op_close",
 427         "rfs4_op_commit",
 428         "rfs4_op_create",
 429         "rfs4_op_delegpurge",
 430         "rfs4_op_delegreturn",
 431         "rfs4_op_getattr",
 432         "rfs4_op_getfh",
 433         "rfs4_op_link",
 434         "rfs4_op_lock",
 435         "rfs4_op_lockt",
 436         "rfs4_op_locku",
 437         "rfs4_op_lookup",
 438         "rfs4_op_lookupp",
 439         "rfs4_op_nverify",
 440         "rfs4_op_open",
 441         "rfs4_op_openattr",
 442         "rfs4_op_open_confirm",
 443         "rfs4_op_open_downgrade",
 444         "rfs4_op_putfh",
 445         "rfs4_op_putpubfh",
 446         "rfs4_op_putrootfh",
 447         "rfs4_op_read",
 448         "rfs4_op_readdir",
 449         "rfs4_op_readlink",
 450         "rfs4_op_remove",
 451         "rfs4_op_rename",
 452         "rfs4_op_renew",
 453         "rfs4_op_restorefh",
 454         "rfs4_op_savefh",
 455         "rfs4_op_secinfo",
 456         "rfs4_op_setattr",
 457         "rfs4_op_setclientid",
 458         "rfs4_op_setclient_confirm",
 459         "rfs4_op_verify",
 460         "rfs4_op_write",
 461         "rfs4_op_release_lockowner",
 462         "rfs4_op_illegal"
 463 };
 464 #endif
 465
 466 void    rfs4_ss_chkclid(rfs4_client_t *);
 467
 468 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 469
 470 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 471
 472 #ifdef  nextdp
 473 #undef nextdp
 474 #endif
 475 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 476
 477 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 478         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 479         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 480         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 481         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 482         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 483         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 484         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 485         NULL,                   NULL
 486 };
 487 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 488         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 489         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 490         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 491         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 492         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 493         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 494         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 495         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 496         NULL,                   NULL
 497 };
 498
 499 int
 500 rfs4_srvrinit(void)
 501 {
 502         timespec32_t verf;
 503         int error;
 504         extern void rfs4_attr_init();
 505         extern krwlock_t rfs4_deleg_policy_lock;
 506
 507         /*
 508          * The following algorithm attempts to find a unique verifier
 509          * to be used as the write verifier returned from the server
 510          * to the client.  It is important that this verifier change
 511          * whenever the server reboots.  Of secondary importance, it
 512          * is important for the verifier to be unique between two
 513          * different servers.
 514          *
 515          * Thus, an attempt is made to use the system hostid and the
 516          * current time in seconds when the nfssrv kernel module is
 517          * loaded.  It is assumed that an NFS server will not be able
 518          * to boot and then to reboot in less than a second.  If the
 519          * hostid has not been set, then the current high resolution
 520          * time is used.  This will ensure different verifiers each
 521          * time the server reboots and minimize the chances that two
 522          * different servers will have the same verifier.
 523          * XXX - this is broken on LP64 kernels.
 524          */
 525         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 526         if (verf.tv_sec != 0) {
 527                 verf.tv_nsec = gethrestime_sec();
 528         } else {
 529                 timespec_t tverf;
 530
 531                 gethrestime(&tverf);
 532                 verf.tv_sec = (time_t)tverf.tv_sec;
 533                 verf.tv_nsec = tverf.tv_nsec;
 534         }
 535
 536         Write4verf = *(uint64_t *)&verf;
 537
 538         rfs4_attr_init();
 539         mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 540
 541         /* Used to manage create/destroy of server state */
 542         mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
 543
 544         /* Used to manage access to server instance linked list */
 545         mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 546
 547         /* Used to manage access to rfs4_deleg_policy */
 548         rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 549
 550         error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
 551         if (error != 0) {
 552                 rfs4_disable_delegation();
 553         } else {
 554                 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 555                     &deleg_wrops);
 556                 if (error != 0) {
 557                         rfs4_disable_delegation();
 558                         fem_free(deleg_rdops);
 559                 }
 560         }
 561
 562         nfs4_srv_caller_id = fs_new_caller_id();
 563
 564         lockt_sysid = lm_alloc_sysidt();
 565
 566         vsd_create(&nfs4_srv_vkey, NULL);
 567
 568         return (0);
 569 }
 570
 571 void
 572 rfs4_srvrfini(void)
 573 {
 574         extern krwlock_t rfs4_deleg_policy_lock;
 575
 576         if (lockt_sysid != LM_NOSYSID) {
 577                 lm_free_sysidt(lockt_sysid);
 578                 lockt_sysid = LM_NOSYSID;
 579         }
 580
 581         mutex_destroy(&rfs4_deleg_lock);
 582         mutex_destroy(&rfs4_state_lock);
 583         rw_destroy(&rfs4_deleg_policy_lock);
 584
 585         fem_free(deleg_rdops);
 586         fem_free(deleg_wrops);
 587 }
 588
 589 void
 590 rfs4_init_compound_state(struct compound_state *cs)
 591 {
 592         bzero(cs, sizeof (*cs));
 593         cs->cont = TRUE;
 594         cs->access = CS_ACCESS_DENIED;
 595         cs->deleg = FALSE;
 596         cs->mandlock = FALSE;
 597         cs->fh.nfs_fh4_val = cs->fhbuf;
 598 }
 599
 600 void
 601 rfs4_grace_start(rfs4_servinst_t *sip)
 602 {
 603         rw_enter(&sip->rwlock, RW_WRITER);
 604         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 605         sip->grace_period = rfs4_grace_period;
 606         rw_exit(&sip->rwlock);
 607 }
 608
 609 /*
 610  * returns true if the instance's grace period has never been started
 611  */
 612 int
 613 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 614 {
 615         time_t start_time;
 616
 617         rw_enter(&sip->rwlock, RW_READER);
 618         start_time = sip->start_time;
 619         rw_exit(&sip->rwlock);
 620
 621         return (start_time == 0);
 622 }
 623
 624 /*
 625  * Indicates if server instance is within the
 626  * grace period.
 627  */
 628 int
 629 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 630 {
 631         time_t grace_expiry;
 632
 633         rw_enter(&sip->rwlock, RW_READER);
 634         grace_expiry = sip->start_time + sip->grace_period;
 635         rw_exit(&sip->rwlock);
 636
 637         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 638 }
 639
 640 int
 641 rfs4_clnt_in_grace(rfs4_client_t *cp)
 642 {
 643         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 644
 645         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 646 }
 647
 648 /*
 649  * reset all currently active grace periods
 650  */
 651 void
 652 rfs4_grace_reset_all(void)
 653 {
 654         rfs4_servinst_t *sip;
 655
 656         mutex_enter(&rfs4_servinst_lock);
 657         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 658                 if (rfs4_servinst_in_grace(sip))
 659                         rfs4_grace_start(sip);
 660         mutex_exit(&rfs4_servinst_lock);
 661 }
 662
 663 /*
 664  * start any new instances' grace periods
 665  */
 666 void
 667 rfs4_grace_start_new(void)
 668 {
 669         rfs4_servinst_t *sip;
 670
 671         mutex_enter(&rfs4_servinst_lock);
 672         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 673                 if (rfs4_servinst_grace_new(sip))
 674                         rfs4_grace_start(sip);
 675         mutex_exit(&rfs4_servinst_lock);
 676 }
 677
 678 static rfs4_dss_path_t *
 679 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
 680 {
 681         size_t len;
 682         rfs4_dss_path_t *dss_path;
 683
 684         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 685
 686         /*
 687          * Take a copy of the string, since the original may be overwritten.
 688          * Sadly, no strdup() in the kernel.
 689          */
 690         /* allow for NUL */
 691         len = strlen(path) + 1;
 692         dss_path->path = kmem_alloc(len, KM_SLEEP);
 693         (void) strlcpy(dss_path->path, path, len);
 694
 695         /* associate with servinst */
 696         dss_path->sip = sip;
 697         dss_path->index = index;
 698
 699         /*
 700          * Add to list of served paths.
 701          * No locking required, as we're only ever called at startup.
 702          */
 703         if (rfs4_dss_pathlist == NULL) {
 704                 /* this is the first dss_path_t */
 705
 706                 /* needed for insque/remque */
 707                 dss_path->next = dss_path->prev = dss_path;
 708
 709                 rfs4_dss_pathlist = dss_path;
 710         } else {
 711                 insque(dss_path, rfs4_dss_pathlist);
 712         }
 713
 714         return (dss_path);
 715 }
 716
 717 /*
 718  * Create a new server instance, and make it the currently active instance.
 719  * Note that starting the grace period too early will reduce the clients'
 720  * recovery window.
 721  */
 722 void
 723 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
 724 {
 725         unsigned i;
 726         rfs4_servinst_t *sip;
 727         rfs4_oldstate_t *oldstate;
 728
 729         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 730         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 731
 732         sip->start_time = (time_t)0;
 733         sip->grace_period = (time_t)0;
 734         sip->next = NULL;
 735         sip->prev = NULL;
 736
 737         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 738         /*
 739          * This initial dummy entry is required to setup for insque/remque.
 740          * It must be skipped over whenever the list is traversed.
 741          */
 742         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 743         /* insque/remque require initial list entry to be self-terminated */
 744         oldstate->next = oldstate;
 745         oldstate->prev = oldstate;
 746         sip->oldstate = oldstate;
 747
 748
 749         sip->dss_npaths = dss_npaths;
 750         sip->dss_paths = kmem_alloc(dss_npaths *
 751             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 752
 753         for (i = 0; i < dss_npaths; i++) {
 754                 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
 755         }
 756
 757         mutex_enter(&rfs4_servinst_lock);
 758         if (rfs4_cur_servinst != NULL) {
 759                 /* add to linked list */
 760                 sip->prev = rfs4_cur_servinst;
 761                 rfs4_cur_servinst->next = sip;
 762         }
 763         if (start_grace)
 764                 rfs4_grace_start(sip);
 765         /* make the new instance "current" */
 766         rfs4_cur_servinst = sip;
 767
 768         mutex_exit(&rfs4_servinst_lock);
 769 }
 770
 771 /*
 772  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 773  * all instances directly.
 774  */
 775 void
 776 rfs4_servinst_destroy_all(void)
 777 {
 778         rfs4_servinst_t *sip, *prev, *current;
 779 #ifdef DEBUG
 780         int n = 0;
 781 #endif
 782
 783         mutex_enter(&rfs4_servinst_lock);
 784         ASSERT(rfs4_cur_servinst != NULL);
 785         current = rfs4_cur_servinst;
 786         rfs4_cur_servinst = NULL;
 787         for (sip = current; sip != NULL; sip = prev) {
 788                 prev = sip->prev;
 789                 rw_destroy(&sip->rwlock);
 790                 if (sip->oldstate)
 791                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 792                 if (sip->dss_paths)
 793                         kmem_free(sip->dss_paths,
 794                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 795                 kmem_free(sip, sizeof (rfs4_servinst_t));
 796 #ifdef DEBUG
 797                 n++;
 798 #endif
 799         }
 800         mutex_exit(&rfs4_servinst_lock);
 801 }
 802
 803 /*
 804  * Assign the current server instance to a client_t.
 805  * Should be called with cp->rc_dbe held.
 806  */
 807 void
 808 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
 809 {
 810         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 811
 812         /*
 813          * The lock ensures that if the current instance is in the process
 814          * of changing, we will see the new one.
 815          */
 816         mutex_enter(&rfs4_servinst_lock);
 817         cp->rc_server_instance = sip;
 818         mutex_exit(&rfs4_servinst_lock);
 819 }
 820
 821 rfs4_servinst_t *
 822 rfs4_servinst(rfs4_client_t *cp)
 823 {
 824         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 825
 826         return (cp->rc_server_instance);
 827 }
 828
 829 /* ARGSUSED */
 830 static void
 831 nullfree(caddr_t resop)
 832 {
 833 }
 834
 835 /*
 836  * This is a fall-through for invalid or not implemented (yet) ops
 837  */
 838 /* ARGSUSED */
 839 static void
 840 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 841         struct compound_state *cs)
 842 {
 843         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 844 }
 845
 846 /*
 847  * Check if the security flavor, nfsnum, is in the flavor_list.
 848  */
 849 bool_t
 850 in_flavor_list(int nfsnum, int *flavor_list, int count)
 851 {
 852         int i;
 853
 854         for (i = 0; i < count; i++) {
 855                 if (nfsnum == flavor_list[i])
 856                         return (TRUE);
 857         }
 858         return (FALSE);
 859 }
 860
 861 /*
 862  * Used by rfs4_op_secinfo to get the security information from the
 863  * export structure associated with the component.
 864  */
 865 /* ARGSUSED */
 866 static nfsstat4
 867 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 868 {
 869         int error, different_export = 0;
 870         vnode_t *dvp, *vp, *tvp;
 871         struct exportinfo *exi = NULL;
 872         fid_t fid;
 873         uint_t count, i;
 874         secinfo4 *resok_val;
 875         struct secinfo *secp;
 876         seconfig_t *si;
 877         bool_t did_traverse = FALSE;
 878         int dotdot, walk;
 879
 880         dvp = cs->vp;
 881         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 882
 883         /*
 884          * If dotdotting, then need to check whether it's above the
 885          * root of a filesystem, or above an export point.
 886          */
 887         if (dotdot) {
 888
 889                 /*
 890                  * If dotdotting at the root of a filesystem, then
 891                  * need to traverse back to the mounted-on filesystem
 892                  * and do the dotdot lookup there.
 893                  */
 894                 if (cs->vp->v_flag & VROOT) {
 895
 896                         /*
 897                          * If at the system root, then can
 898                          * go up no further.
 899                          */
 900                         if (VN_CMP(dvp, rootdir))
 901                                 return (puterrno4(ENOENT));
 902
 903                         /*
 904                          * Traverse back to the mounted-on filesystem
 905                          */
 906                         dvp = untraverse(cs->vp);
 907
 908                         /*
 909                          * Set the different_export flag so we remember
 910                          * to pick up a new exportinfo entry for
 911                          * this new filesystem.
 912                          */
 913                         different_export = 1;
 914                 } else {
 915
 916                         /*
 917                          * If dotdotting above an export point then set
 918                          * the different_export to get new export info.
 919                          */
 920                         different_export = nfs_exported(cs->exi, cs->vp);
 921                 }
 922         }
 923
 924         /*
 925          * Get the vnode for the component "nm".
 926          */
 927         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 928             NULL, NULL, NULL);
 929         if (error)
 930                 return (puterrno4(error));
 931
 932         /*
 933          * If the vnode is in a pseudo filesystem, or if the security flavor
 934          * used in the request is valid but not an explicitly shared flavor,
 935          * or the access bit indicates that this is a limited access,
 936          * check whether this vnode is visible.
 937          */
 938         if (!different_export &&
 939             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
 940             cs->access & CS_ACCESS_LIMITED)) {
 941                 if (! nfs_visible(cs->exi, vp, &different_export)) {
 942                         VN_RELE(vp);
 943                         return (puterrno4(ENOENT));
 944                 }
 945         }
 946
 947         /*
 948          * If it's a mountpoint, then traverse it.
 949          */
 950         if (vn_ismntpt(vp)) {
 951                 tvp = vp;
 952                 if ((error = traverse(&tvp)) != 0) {
 953                         VN_RELE(vp);
 954                         return (puterrno4(error));
 955                 }
 956                 /* remember that we had to traverse mountpoint */
 957                 did_traverse = TRUE;
 958                 vp = tvp;
 959                 different_export = 1;
 960         } else if (vp->v_vfsp != dvp->v_vfsp) {
 961                 /*
 962                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 963                  * then vp is probably an LOFS object.  We don't need the
 964                  * realvp, we just need to know that we might have crossed
 965                  * a server fs boundary and need to call checkexport4.
 966                  * (LOFS lookup hides server fs mountpoints, and actually calls
 967                  * traverse)
 968                  */
 969                 different_export = 1;
 970         }
 971
 972         /*
 973          * Get the export information for it.
 974          */
 975         if (different_export) {
 976
 977                 bzero(&fid, sizeof (fid));
 978                 fid.fid_len = MAXFIDSZ;
 979                 error = vop_fid_pseudo(vp, &fid);
 980                 if (error) {
 981                         VN_RELE(vp);
 982                         return (puterrno4(error));
 983                 }
 984
 985                 if (dotdot)
 986                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 987                 else
 988                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
 989
 990                 if (exi == NULL) {
 991                         if (did_traverse == TRUE) {
 992                                 /*
 993                                  * If this vnode is a mounted-on vnode,
 994                                  * but the mounted-on file system is not
 995                                  * exported, send back the secinfo for
 996                                  * the exported node that the mounted-on
 997                                  * vnode lives in.
 998                                  */
 999                                 exi = cs->exi;
1000                         } else {
1001                                 VN_RELE(vp);
1002                                 return (puterrno4(EACCES));
1003                         }
1004                 }
1005         } else {
1006                 exi = cs->exi;
1007         }
1008         ASSERT(exi != NULL);
1009
1010
1011         /*
1012          * Create the secinfo result based on the security information
1013          * from the exportinfo structure (exi).
1014          *
1015          * Return all flavors for a pseudo node.
1016          * For a real export node, return the flavor that the client
1017          * has access with.
1018          */
1019         ASSERT(RW_LOCK_HELD(&exported_lock));
1020         if (PSEUDO(exi)) {
1021                 count = exi->exi_export.ex_seccnt; /* total sec count */
1022                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1023                 secp = exi->exi_export.ex_secinfo;
1024
1025                 for (i = 0; i < count; i++) {
1026                         si = &secp[i].s_secinfo;
1027                         resok_val[i].flavor = si->sc_rpcnum;
1028                         if (resok_val[i].flavor == RPCSEC_GSS) {
1029                                 rpcsec_gss_info *info;
1030
1031                                 info = &resok_val[i].flavor_info;
1032                                 info->qop = si->sc_qop;
1033                                 info->service = (rpc_gss_svc_t)si->sc_service;
1034
1035                                 /* get oid opaque data */
1036                                 info->oid.sec_oid4_len =
1037                                     si->sc_gss_mech_type->length;
1038                                 info->oid.sec_oid4_val = kmem_alloc(
1039                                     si->sc_gss_mech_type->length, KM_SLEEP);
1040                                 bcopy(
1041                                     si->sc_gss_mech_type->elements,
1042                                     info->oid.sec_oid4_val,
1043                                     info->oid.sec_oid4_len);
1044                         }
1045                 }
1046                 resp->SECINFO4resok_len = count;
1047                 resp->SECINFO4resok_val = resok_val;
1048         } else {
1049                 int ret_cnt = 0, k = 0;
1050                 int *flavor_list;
1051
1052                 count = exi->exi_export.ex_seccnt; /* total sec count */
1053                 secp = exi->exi_export.ex_secinfo;
1054
1055                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1056                 /* find out which flavors to return */
1057                 for (i = 0; i < count; i ++) {
1058                         int access, flavor, perm;
1059
1060                         flavor = secp[i].s_secinfo.sc_nfsnum;
1061                         perm = secp[i].s_flags;
1062
1063                         access = nfsauth4_secinfo_access(exi, cs->req,
1064                             flavor, perm);
1065
1066                         if (! (access & NFSAUTH_DENIED) &&
1067                             ! (access & NFSAUTH_WRONGSEC)) {
1068                                 flavor_list[ret_cnt] = flavor;
1069                                 ret_cnt++;
1070                         }
1071                 }
1072
1073                 /* Create the returning SECINFO value */
1074                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1075
1076                 for (i = 0; i < count; i++) {
1077                         /*
1078                          * If the flavor is in the flavor list,
1079                          * fill in resok_val.
1080                          */
1081                         si = &secp[i].s_secinfo;
1082                         if (in_flavor_list(si->sc_nfsnum,
1083                             flavor_list, ret_cnt)) {
1084                                 resok_val[k].flavor = si->sc_rpcnum;
1085                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1086                                         rpcsec_gss_info *info;
1087
1088                                         info = &resok_val[k].flavor_info;
1089                                         info->qop = si->sc_qop;
1090                                         info->service = (rpc_gss_svc_t)
1091                                             si->sc_service;
1092
1093                                         /* get oid opaque data */
1094                                         info->oid.sec_oid4_len =
1095                                             si->sc_gss_mech_type->length;
1096                                         info->oid.sec_oid4_val = kmem_alloc(
1097                                             si->sc_gss_mech_type->length,
1098                                             KM_SLEEP);
1099                                         bcopy(si->sc_gss_mech_type->elements,
1100                                             info->oid.sec_oid4_val,
1101                                             info->oid.sec_oid4_len);
1102                                 }
1103                                 k++;
1104                         }
1105                         if (k >= ret_cnt)
1106                                 break;
1107                 }
1108                 resp->SECINFO4resok_len = ret_cnt;
1109                 resp->SECINFO4resok_val = resok_val;
1110                 kmem_free(flavor_list, count * sizeof (int));
1111         }
1112
1113         VN_RELE(vp);
1114         return (NFS4_OK);
1115 }
1116
1117 /*
1118  * SECINFO (Operation 33): Obtain required security information on
1119  * the component name in the format of (security-mechanism-oid, qop, service)
1120  * triplets.
1121  */
1122 /* ARGSUSED */
1123 static void
1124 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1125     struct compound_state *cs)
1126 {
1127         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1128         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1129         utf8string *utfnm = &args->name;
1130         uint_t len;
1131         char *nm;
1132         struct sockaddr *ca;
1133         char *name = NULL;
1134
1135         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1136             SECINFO4args *, args);
1137
1138         /*
1139          * Current file handle (cfh) should have been set before getting
1140          * into this function. If not, return error.
1141          */
1142         if (cs->vp == NULL) {
1143                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1144                 goto out;
1145         }
1146
1147         if (cs->vp->v_type != VDIR) {
1148                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1149                 goto out;
1150         }
1151
1152         /*
1153          * Verify the component name. If failed, error out, but
1154          * do not error out if the component name is a "..".
1155          * SECINFO will return its parents secinfo data for SECINFO "..".
1156          */
1157         if (!utf8_dir_verify(utfnm)) {
1158                 if (utfnm->utf8string_len != 2 ||
1159                     utfnm->utf8string_val[0] != '.' ||
1160                     utfnm->utf8string_val[1] != '.') {
1161                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1162                         goto out;
1163                 }
1164         }
1165
1166         nm = utf8_to_str(utfnm, &len, NULL);
1167         if (nm == NULL) {
1168                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1169                 goto out;
1170         }
1171
1172         if (len > MAXNAMELEN) {
1173                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1174                 kmem_free(nm, len);
1175                 goto out;
1176         }
1177
1178         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1179         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1180             MAXPATHLEN  + 1);
1181
1182         if (name == NULL) {
1183                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1184                 kmem_free(nm, len);
1185                 goto out;
1186         }
1187
1188
1189         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1190
1191         if (name != nm)
1192                 kmem_free(name, MAXPATHLEN + 1);
1193         kmem_free(nm, len);
1194
1195 out:
1196         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1197             SECINFO4res *, resp);
1198 }
1199
1200 /*
1201  * Free SECINFO result.
1202  */
1203 /* ARGSUSED */
1204 static void
1205 rfs4_op_secinfo_free(nfs_resop4 *resop)
1206 {
1207         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1208         int count, i;
1209         secinfo4 *resok_val;
1210
1211         /* If this is not an Ok result, nothing to free. */
1212         if (resp->status != NFS4_OK) {
1213                 return;
1214         }
1215
1216         count = resp->SECINFO4resok_len;
1217         resok_val = resp->SECINFO4resok_val;
1218
1219         for (i = 0; i < count; i++) {
1220                 if (resok_val[i].flavor == RPCSEC_GSS) {
1221                         rpcsec_gss_info *info;
1222
1223                         info = &resok_val[i].flavor_info;
1224                         kmem_free(info->oid.sec_oid4_val,
1225                             info->oid.sec_oid4_len);
1226                 }
1227         }
1228         kmem_free(resok_val, count * sizeof (secinfo4));
1229         resp->SECINFO4resok_len = 0;
1230         resp->SECINFO4resok_val = NULL;
1231 }
1232
1233 /* ARGSUSED */
1234 static void
1235 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1236     struct compound_state *cs)
1237 {
1238         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1239         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1240         int error;
1241         vnode_t *vp;
1242         struct vattr va;
1243         int checkwriteperm;
1244         cred_t *cr = cs->cr;
1245         bslabel_t *clabel, *slabel;
1246         ts_label_t *tslabel;
1247         boolean_t admin_low_client;
1248
1249         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1250             ACCESS4args *, args);
1251
1252 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1253         if (cs->access == CS_ACCESS_DENIED) {
1254                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1255                 goto out;
1256         }
1257 #endif
1258         if (cs->vp == NULL) {
1259                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1260                 goto out;
1261         }
1262
1263         ASSERT(cr != NULL);
1264
1265         vp = cs->vp;
1266
1267         /*
1268          * If the file system is exported read only, it is not appropriate
1269          * to check write permissions for regular files and directories.
1270          * Special files are interpreted by the client, so the underlying
1271          * permissions are sent back to the client for interpretation.
1272          */
1273         if (rdonly4(cs->exi, cs->vp, req) &&
1274             (vp->v_type == VREG || vp->v_type == VDIR))
1275                 checkwriteperm = 0;
1276         else
1277                 checkwriteperm = 1;
1278
1279         /*
1280          * XXX
1281          * We need the mode so that we can correctly determine access
1282          * permissions relative to a mandatory lock file.  Access to
1283          * mandatory lock files is denied on the server, so it might
1284          * as well be reflected to the server during the open.
1285          */
1286         va.va_mask = AT_MODE;
1287         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1288         if (error) {
1289                 *cs->statusp = resp->status = puterrno4(error);
1290                 goto out;
1291         }
1292         resp->access = 0;
1293         resp->supported = 0;
1294
1295         if (is_system_labeled()) {
1296                 ASSERT(req->rq_label != NULL);
1297                 clabel = req->rq_label;
1298                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1299                     "got client label from request(1)",
1300                     struct svc_req *, req);
1301                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1302                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1303                                 *cs->statusp = resp->status = puterrno4(EACCES);
1304                                 goto out;
1305                         }
1306                         slabel = label2bslabel(tslabel);
1307                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1308                             char *, "got server label(1) for vp(2)",
1309                             bslabel_t *, slabel, vnode_t *, vp);
1310
1311                         admin_low_client = B_FALSE;
1312                 } else
1313                         admin_low_client = B_TRUE;
1314         }
1315
1316         if (args->access & ACCESS4_READ) {
1317                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1318                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1319                     (!is_system_labeled() || admin_low_client ||
1320                     bldominates(clabel, slabel)))
1321                         resp->access |= ACCESS4_READ;
1322                 resp->supported |= ACCESS4_READ;
1323         }
1324         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1325                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1326                 if (!error && (!is_system_labeled() || admin_low_client ||
1327                     bldominates(clabel, slabel)))
1328                         resp->access |= ACCESS4_LOOKUP;
1329                 resp->supported |= ACCESS4_LOOKUP;
1330         }
1331         if (checkwriteperm &&
1332             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1333                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1334                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1335                     (!is_system_labeled() || admin_low_client ||
1336                     blequal(clabel, slabel)))
1337                         resp->access |=
1338                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1339                 resp->supported |= (ACCESS4_MODIFY | ACCESS4_EXTEND);
1340         }
1341
1342         if (checkwriteperm &&
1343             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1344                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1345                 if (!error && (!is_system_labeled() || admin_low_client ||
1346                     blequal(clabel, slabel)))
1347                         resp->access |= ACCESS4_DELETE;
1348                 resp->supported |= ACCESS4_DELETE;
1349         }
1350         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1351                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1352                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1353                     (!is_system_labeled() || admin_low_client ||
1354                     bldominates(clabel, slabel)))
1355                         resp->access |= ACCESS4_EXECUTE;
1356                 resp->supported |= ACCESS4_EXECUTE;
1357         }
1358
1359         if (is_system_labeled() && !admin_low_client)
1360                 label_rele(tslabel);
1361
1362         *cs->statusp = resp->status = NFS4_OK;
1363 out:
1364         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1365             ACCESS4res *, resp);
1366 }
1367
1368 /* ARGSUSED */
1369 static void
1370 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1371     struct compound_state *cs)
1372 {
1373         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1374         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1375         int error;
1376         vnode_t *vp = cs->vp;
1377         cred_t *cr = cs->cr;
1378         vattr_t va;
1379
1380         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1381             COMMIT4args *, args);
1382
1383         if (vp == NULL) {
1384                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1385                 goto out;
1386         }
1387         if (cs->access == CS_ACCESS_DENIED) {
1388                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1389                 goto out;
1390         }
1391
1392         if (args->offset + args->count < args->offset) {
1393                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1394                 goto out;
1395         }
1396
1397         va.va_mask = AT_UID;
1398         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1399
1400         /*
1401          * If we can't get the attributes, then we can't do the
1402          * right access checking.  So, we'll fail the request.
1403          */
1404         if (error) {
1405                 *cs->statusp = resp->status = puterrno4(error);
1406                 goto out;
1407         }
1408         if (rdonly4(cs->exi, cs->vp, req)) {
1409                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1410                 goto out;
1411         }
1412
1413         if (vp->v_type != VREG) {
1414                 if (vp->v_type == VDIR)
1415                         resp->status = NFS4ERR_ISDIR;
1416                 else
1417                         resp->status = NFS4ERR_INVAL;
1418                 *cs->statusp = resp->status;
1419                 goto out;
1420         }
1421
1422         if (crgetuid(cr) != va.va_uid &&
1423             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1424                 *cs->statusp = resp->status = puterrno4(error);
1425                 goto out;
1426         }
1427
1428         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1429
1430         if (error) {
1431                 *cs->statusp = resp->status = puterrno4(error);
1432                 goto out;
1433         }
1434
1435         *cs->statusp = resp->status = NFS4_OK;
1436         resp->writeverf = Write4verf;
1437 out:
1438         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1439             COMMIT4res *, resp);
1440 }
1441
1442 /*
1443  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1444  * was completed. It does the nfsv4 create for special files.
1445  */
1446 /* ARGSUSED */
1447 static vnode_t *
1448 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1449     struct compound_state *cs, vattr_t *vap, char *nm)
1450 {
1451         int error;
1452         cred_t *cr = cs->cr;
1453         vnode_t *dvp = cs->vp;
1454         vnode_t *vp = NULL;
1455         int mode;
1456         enum vcexcl excl;
1457
1458         switch (args->type) {
1459         case NF4CHR:
1460         case NF4BLK:
1461                 if (secpolicy_sys_devices(cr) != 0) {
1462                         *cs->statusp = resp->status = NFS4ERR_PERM;
1463                         return (NULL);
1464                 }
1465                 if (args->type == NF4CHR)
1466                         vap->va_type = VCHR;
1467                 else
1468                         vap->va_type = VBLK;
1469                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1470                     args->ftype4_u.devdata.specdata2);
1471                 vap->va_mask |= AT_RDEV;
1472                 break;
1473         case NF4SOCK:
1474                 vap->va_type = VSOCK;
1475                 break;
1476         case NF4FIFO:
1477                 vap->va_type = VFIFO;
1478                 break;
1479         default:
1480                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1481                 return (NULL);
1482         }
1483
1484         /*
1485          * Must specify the mode.
1486          */
1487         if (!(vap->va_mask & AT_MODE)) {
1488                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1489                 return (NULL);
1490         }
1491
1492         excl = EXCL;
1493
1494         mode = 0;
1495
1496         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1497         if (error) {
1498                 *cs->statusp = resp->status = puterrno4(error);
1499                 return (NULL);
1500         }
1501         return (vp);
1502 }
1503
1504 /*
1505  * nfsv4 create is used to create non-regular files. For regular files,
1506  * use nfsv4 open.
1507  */
1508 /* ARGSUSED */
1509 static void
1510 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1511     struct compound_state *cs)
1512 {
1513         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1514         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1515         int error;
1516         struct vattr bva, iva, iva2, ava, *vap;
1517         cred_t *cr = cs->cr;
1518         vnode_t *dvp = cs->vp;
1519         vnode_t *vp = NULL;
1520         vnode_t *realvp;
1521         char *nm, *lnm;
1522         uint_t len, llen;
1523         int syncval = 0;
1524         struct nfs4_svgetit_arg sarg;
1525         struct nfs4_ntov_table ntov;
1526         struct statvfs64 sb;
1527         nfsstat4 status;
1528         struct sockaddr *ca;
1529         char *name = NULL;
1530         char *lname = NULL;
1531
1532         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1533             CREATE4args *, args);
1534
1535         resp->attrset = 0;
1536
1537         if (dvp == NULL) {
1538                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1539                 goto out;
1540         }
1541
1542         /*
1543          * If there is an unshared filesystem mounted on this vnode,
1544          * do not allow to create an object in this directory.
1545          */
1546         if (vn_ismntpt(dvp)) {
1547                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1548                 goto out;
1549         }
1550
1551         /* Verify that type is correct */
1552         switch (args->type) {
1553         case NF4LNK:
1554         case NF4BLK:
1555         case NF4CHR:
1556         case NF4SOCK:
1557         case NF4FIFO:
1558         case NF4DIR:
1559                 break;
1560         default:
1561                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1562                 goto out;
1563         };
1564
1565         if (cs->access == CS_ACCESS_DENIED) {
1566                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1567                 goto out;
1568         }
1569         if (dvp->v_type != VDIR) {
1570                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1571                 goto out;
1572         }
1573         if (!utf8_dir_verify(&args->objname)) {
1574                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1575                 goto out;
1576         }
1577
1578         if (rdonly4(cs->exi, cs->vp, req)) {
1579                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1580                 goto out;
1581         }
1582
1583         /*
1584          * Name of newly created object
1585          */
1586         nm = utf8_to_fn(&args->objname, &len, NULL);
1587         if (nm == NULL) {
1588                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1589                 goto out;
1590         }
1591
1592         if (len > MAXNAMELEN) {
1593                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1594                 kmem_free(nm, len);
1595                 goto out;
1596         }
1597
1598         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1599         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1600             MAXPATHLEN  + 1);
1601
1602         if (name == NULL) {
1603                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1604                 kmem_free(nm, len);
1605                 goto out;
1606         }
1607
1608         resp->attrset = 0;
1609
1610         sarg.sbp = &sb;
1611         sarg.is_referral = B_FALSE;
1612         nfs4_ntov_table_init(&ntov);
1613
1614         status = do_rfs4_set_attrs(&resp->attrset,
1615             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1616
1617         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1618                 status = NFS4ERR_INVAL;
1619
1620         if (status != NFS4_OK) {
1621                 *cs->statusp = resp->status = status;
1622                 if (name != nm)
1623                         kmem_free(name, MAXPATHLEN + 1);
1624                 kmem_free(nm, len);
1625                 nfs4_ntov_table_free(&ntov, &sarg);
1626                 resp->attrset = 0;
1627                 goto out;
1628         }
1629
1630         /* Get "before" change value */
1631         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1632         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1633         if (error) {
1634                 *cs->statusp = resp->status = puterrno4(error);
1635                 if (name != nm)
1636                         kmem_free(name, MAXPATHLEN + 1);
1637                 kmem_free(nm, len);
1638                 nfs4_ntov_table_free(&ntov, &sarg);
1639                 resp->attrset = 0;
1640                 goto out;
1641         }
1642         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1643
1644         vap = sarg.vap;
1645
1646         /*
1647          * Set the default initial values for attributes when the parent
1648          * directory does not have the VSUID/VSGID bit set and they have
1649          * not been specified in createattrs.
1650          */
1651         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1652                 vap->va_uid = crgetuid(cr);
1653                 vap->va_mask |= AT_UID;
1654         }
1655         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1656                 vap->va_gid = crgetgid(cr);
1657                 vap->va_mask |= AT_GID;
1658         }
1659
1660         vap->va_mask |= AT_TYPE;
1661         switch (args->type) {
1662         case NF4DIR:
1663                 vap->va_type = VDIR;
1664                 if ((vap->va_mask & AT_MODE) == 0) {
1665                         vap->va_mode = 0700;    /* default: owner rwx only */
1666                         vap->va_mask |= AT_MODE;
1667                 }
1668                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1669                 if (error)
1670                         break;
1671
1672                 /*
1673                  * Get the initial "after" sequence number, if it fails,
1674                  * set to zero
1675                  */
1676                 iva.va_mask = AT_SEQ;
1677                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1678                         iva.va_seq = 0;
1679                 break;
1680         case NF4LNK:
1681                 vap->va_type = VLNK;
1682                 if ((vap->va_mask & AT_MODE) == 0) {
1683                         vap->va_mode = 0700;    /* default: owner rwx only */
1684                         vap->va_mask |= AT_MODE;
1685                 }
1686
1687                 /*
1688                  * symlink names must be treated as data
1689                  */
1690                 lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1691
1692                 if (lnm == NULL) {
1693                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1694                         if (name != nm)
1695                                 kmem_free(name, MAXPATHLEN + 1);
1696                         kmem_free(nm, len);
1697                         nfs4_ntov_table_free(&ntov, &sarg);
1698                         resp->attrset = 0;
1699                         goto out;
1700                 }
1701
1702                 if (llen > MAXPATHLEN) {
1703                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1704                         if (name != nm)
1705                                 kmem_free(name, MAXPATHLEN + 1);
1706                         kmem_free(nm, len);
1707                         kmem_free(lnm, llen);
1708                         nfs4_ntov_table_free(&ntov, &sarg);
1709                         resp->attrset = 0;
1710                         goto out;
1711                 }
1712
1713                 lname = nfscmd_convname(ca, cs->exi, lnm,
1714                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1715
1716                 if (lname == NULL) {
1717                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1718                         if (name != nm)
1719                                 kmem_free(name, MAXPATHLEN + 1);
1720                         kmem_free(nm, len);
1721                         kmem_free(lnm, llen);
1722                         nfs4_ntov_table_free(&ntov, &sarg);
1723                         resp->attrset = 0;
1724                         goto out;
1725                 }
1726
1727                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1728                 if (lname != lnm)
1729                         kmem_free(lname, MAXPATHLEN + 1);
1730                 kmem_free(lnm, llen);
1731                 if (error)
1732                         break;
1733
1734                 /*
1735                  * Get the initial "after" sequence number, if it fails,
1736                  * set to zero
1737                  */
1738                 iva.va_mask = AT_SEQ;
1739                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1740                         iva.va_seq = 0;
1741
1742                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1743                     NULL, NULL, NULL);
1744                 if (error)
1745                         break;
1746
1747                 /*
1748                  * va_seq is not safe over VOP calls, check it again
1749                  * if it has changed zero out iva to force atomic = FALSE.
1750                  */
1751                 iva2.va_mask = AT_SEQ;
1752                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1753                     iva2.va_seq != iva.va_seq)
1754                         iva.va_seq = 0;
1755                 break;
1756         default:
1757                 /*
1758                  * probably a special file.
1759                  */
1760                 if ((vap->va_mask & AT_MODE) == 0) {
1761                         vap->va_mode = 0600;    /* default: owner rw only */
1762                         vap->va_mask |= AT_MODE;
1763                 }
1764                 syncval = FNODSYNC;
1765                 /*
1766                  * We know this will only generate one VOP call
1767                  */
1768                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1769
1770                 if (vp == NULL) {
1771                         if (name != nm)
1772                                 kmem_free(name, MAXPATHLEN + 1);
1773                         kmem_free(nm, len);
1774                         nfs4_ntov_table_free(&ntov, &sarg);
1775                         resp->attrset = 0;
1776                         goto out;
1777                 }
1778
1779                 /*
1780                  * Get the initial "after" sequence number, if it fails,
1781                  * set to zero
1782                  */
1783                 iva.va_mask = AT_SEQ;
1784                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1785                         iva.va_seq = 0;
1786
1787                 break;
1788         }
1789         if (name != nm)
1790                 kmem_free(name, MAXPATHLEN + 1);
1791         kmem_free(nm, len);
1792
1793         if (error) {
1794                 *cs->statusp = resp->status = puterrno4(error);
1795         }
1796
1797         /*
1798          * Force modified data and metadata out to stable storage.
1799          */
1800         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1801
1802         if (resp->status != NFS4_OK) {
1803                 if (vp != NULL)
1804                         VN_RELE(vp);
1805                 nfs4_ntov_table_free(&ntov, &sarg);
1806                 resp->attrset = 0;
1807                 goto out;
1808         }
1809
1810         /*
1811          * Finish setup of cinfo response, "before" value already set.
1812          * Get "after" change value, if it fails, simply return the
1813          * before value.
1814          */
1815         ava.va_mask = AT_CTIME|AT_SEQ;
1816         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1817                 ava.va_ctime = bva.va_ctime;
1818                 ava.va_seq = 0;
1819         }
1820         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1821
1822         /*
1823          * True verification that object was created with correct
1824          * attrs is impossible.  The attrs could have been changed
1825          * immediately after object creation.  If attributes did
1826          * not verify, the only recourse for the server is to
1827          * destroy the object.  Maybe if some attrs (like gid)
1828          * are set incorrectly, the object should be destroyed;
1829          * however, seems bad as a default policy.  Do we really
1830          * want to destroy an object over one of the times not
1831          * verifying correctly?  For these reasons, the server
1832          * currently sets bits in attrset for createattrs
1833          * that were set; however, no verification is done.
1834          *
1835          * vmask_to_nmask accounts for vattr bits set on create
1836          *      [do_rfs4_set_attrs() only sets resp bits for
1837          *       non-vattr/vfs bits.]
1838          * Mask off any bits set by default so as not to return
1839          * more attrset bits than were requested in createattrs
1840          */
1841         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1842         resp->attrset &= args->createattrs.attrmask;
1843         nfs4_ntov_table_free(&ntov, &sarg);
1844
1845         error = makefh4(&cs->fh, vp, cs->exi);
1846         if (error) {
1847                 *cs->statusp = resp->status = puterrno4(error);
1848         }
1849
1850         /*
1851          * The cinfo.atomic = TRUE only if we got no errors, we have
1852          * non-zero va_seq's, and it has incremented by exactly one
1853          * during the creation and it didn't change during the VOP_LOOKUP
1854          * or VOP_FSYNC.
1855          */
1856         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1857             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1858                 resp->cinfo.atomic = TRUE;
1859         else
1860                 resp->cinfo.atomic = FALSE;
1861
1862         /*
1863          * Force modified metadata out to stable storage.
1864          *
1865          * if a underlying vp exists, pass it to VOP_FSYNC
1866          */
1867         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1868                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1869         else
1870                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1871
1872         if (resp->status != NFS4_OK) {
1873                 VN_RELE(vp);
1874                 goto out;
1875         }
1876         if (cs->vp)
1877                 VN_RELE(cs->vp);
1878
1879         cs->vp = vp;
1880         *cs->statusp = resp->status = NFS4_OK;
1881 out:
1882         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1883             CREATE4res *, resp);
1884 }
1885
1886 /*ARGSUSED*/
1887 static void
1888 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1889     struct compound_state *cs)
1890 {
1891         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1892             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1893
1894         rfs4_op_inval(argop, resop, req, cs);
1895
1896         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1897             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1898 }
1899
1900 /*ARGSUSED*/
1901 static void
1902 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1903     struct compound_state *cs)
1904 {
1905         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1906         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1907         rfs4_deleg_state_t *dsp;
1908         nfsstat4 status;
1909
1910         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1911             DELEGRETURN4args *, args);
1912
1913         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1914         resp->status = *cs->statusp = status;
1915         if (status != NFS4_OK)
1916                 goto out;
1917
1918         /* Ensure specified filehandle matches */
1919         if (cs->vp != dsp->rds_finfo->rf_vp) {
1920                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1921         } else
1922                 rfs4_return_deleg(dsp, FALSE);
1923
1924         rfs4_update_lease(dsp->rds_client);
1925
1926         rfs4_deleg_state_rele(dsp);
1927 out:
1928         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1929             DELEGRETURN4res *, resp);
1930 }
1931
1932 /*
1933  * Check to see if a given "flavor" is an explicitly shared flavor.
1934  * The assumption of this routine is the "flavor" is already a valid
1935  * flavor in the secinfo list of "exi".
1936  *
1937  *      e.g.
1938  *              # share -o sec=flavor1 /export
1939  *              # share -o sec=flavor2 /export/home
1940  *
1941  *              flavor2 is not an explicitly shared flavor for /export,
1942  *              however it is in the secinfo list for /export thru the
1943  *              server namespace setup.
1944  */
1945 int
1946 is_exported_sec(int flavor, struct exportinfo *exi)
1947 {
1948         int     i;
1949         struct secinfo *sp;
1950
1951         sp = exi->exi_export.ex_secinfo;
1952         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1953                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1954                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1955                         return (SEC_REF_EXPORTED(&sp[i]));
1956                 }
1957         }
1958
1959         /* Should not reach this point based on the assumption */
1960         return (0);
1961 }
1962
1963 /*
1964  * Check if the security flavor used in the request matches what is
1965  * required at the export point or at the root pseudo node (exi_root).
1966  *
1967  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1968  *
1969  */
1970 static int
1971 secinfo_match_or_authnone(struct compound_state *cs)
1972 {
1973         int     i;
1974         struct secinfo *sp;
1975
1976         /*
1977          * Check cs->nfsflavor (from the request) against
1978          * the current export data in cs->exi.
1979          */
1980         sp = cs->exi->exi_export.ex_secinfo;
1981         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1982                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1983                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1984                         return (1);
1985         }
1986
1987         return (0);
1988 }
1989
1990 /*
1991  * Check the access authority for the client and return the correct error.
1992  */
1993 nfsstat4
1994 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1995 {
1996         int     authres;
1997
1998         /*
1999          * First, check if the security flavor used in the request
2000          * are among the flavors set in the server namespace.
2001          */
2002         if (!secinfo_match_or_authnone(cs)) {
2003                 *cs->statusp = NFS4ERR_WRONGSEC;
2004                 return (*cs->statusp);
2005         }
2006
2007         authres = checkauth4(cs, req);
2008
2009         if (authres > 0) {
2010                 *cs->statusp = NFS4_OK;
2011                 if (! (cs->access & CS_ACCESS_LIMITED))
2012                         cs->access = CS_ACCESS_OK;
2013         } else if (authres == 0) {
2014                 *cs->statusp = NFS4ERR_ACCESS;
2015         } else if (authres == -2) {
2016                 *cs->statusp = NFS4ERR_WRONGSEC;
2017         } else {
2018                 *cs->statusp = NFS4ERR_DELAY;
2019         }
2020         return (*cs->statusp);
2021 }
2022
2023 /*
2024  * bitmap4_to_attrmask is called by getattr and readdir.
2025  * It sets up the vattr mask and determines whether vfsstat call is needed
2026  * based on the input bitmap.
2027  * Returns nfsv4 status.
2028  */
2029 static nfsstat4
2030 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2031 {
2032         int i;
2033         uint_t  va_mask;
2034         struct statvfs64 *sbp = sargp->sbp;
2035
2036         sargp->sbp = NULL;
2037         sargp->flag = 0;
2038         sargp->rdattr_error = NFS4_OK;
2039         sargp->mntdfid_set = FALSE;
2040         if (sargp->cs->vp)
2041                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2042                     FH4_ATTRDIR | FH4_NAMEDATTR);
2043         else
2044                 sargp->xattr = 0;
2045
2046         /*
2047          * Set rdattr_error_req to true if return error per
2048          * failed entry rather than fail the readdir.
2049          */
2050         if (breq & FATTR4_RDATTR_ERROR_MASK)
2051                 sargp->rdattr_error_req = 1;
2052         else
2053                 sargp->rdattr_error_req = 0;
2054
2055         /*
2056          * generate the va_mask
2057          * Handle the easy cases first
2058          */
2059         switch (breq) {
2060         case NFS4_NTOV_ATTR_MASK:
2061                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2062                 return (NFS4_OK);
2063
2064         case NFS4_FS_ATTR_MASK:
2065                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2066                 sargp->sbp = sbp;
2067                 return (NFS4_OK);
2068
2069         case NFS4_NTOV_ATTR_CACHE_MASK:
2070                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2071                 return (NFS4_OK);
2072
2073         case FATTR4_LEASE_TIME_MASK:
2074                 sargp->vap->va_mask = 0;
2075                 return (NFS4_OK);
2076
2077         default:
2078                 va_mask = 0;
2079                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2080                         if ((breq & nfs4_ntov_map[i].fbit) &&
2081                             nfs4_ntov_map[i].vbit)
2082                                 va_mask |= nfs4_ntov_map[i].vbit;
2083                 }
2084
2085                 /*
2086                  * Check is vfsstat is needed
2087                  */
2088                 if (breq & NFS4_FS_ATTR_MASK)
2089                         sargp->sbp = sbp;
2090
2091                 sargp->vap->va_mask = va_mask;
2092                 return (NFS4_OK);
2093         }
2094         /* NOTREACHED */
2095 }
2096
2097 /*
2098  * bitmap4_get_sysattrs is called by getattr and readdir.
2099  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2100  * Returns nfsv4 status.
2101  */
2102 static nfsstat4
2103 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2104 {
2105         int error;
2106         struct compound_state *cs = sargp->cs;
2107         vnode_t *vp = cs->vp;
2108
2109         if (sargp->sbp != NULL) {
2110                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2111                         sargp->sbp = NULL;      /* to identify error */
2112                         return (puterrno4(error));
2113                 }
2114         }
2115
2116         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2117 }
2118
2119 static void
2120 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2121 {
2122         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2123             KM_SLEEP);
2124         ntovp->attrcnt = 0;
2125         ntovp->vfsstat = FALSE;
2126 }
2127
2128 static void
2129 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2130     struct nfs4_svgetit_arg *sargp)
2131 {
2132         int i;
2133         union nfs4_attr_u *na;
2134         uint8_t *amap;
2135
2136         /*
2137          * XXX Should do the same checks for whether the bit is set
2138          */
2139         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2140             i < ntovp->attrcnt; i++, na++, amap++) {
2141                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2142                     NFS4ATTR_FREEIT, sargp, na);
2143         }
2144         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2145                 /*
2146                  * xdr_free for getattr will be done later
2147                  */
2148                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2149                     i < ntovp->attrcnt; i++, na++, amap++) {
2150                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2151                 }
2152         }
2153         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2154 }
2155
2156 /*
2157  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2158  */
2159 static nfsstat4
2160 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2161     struct nfs4_svgetit_arg *sargp)
2162 {
2163         int error = 0;
2164         int i, k;
2165         struct nfs4_ntov_table ntov;
2166         XDR xdr;
2167         ulong_t xdr_size;
2168         char *xdr_attrs;
2169         nfsstat4 status = NFS4_OK;
2170         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2171         union nfs4_attr_u *na;
2172         uint8_t *amap;
2173
2174         sargp->op = NFS4ATTR_GETIT;
2175         sargp->flag = 0;
2176
2177         fattrp->attrmask = 0;
2178         /* if no bits requested, then return empty fattr4 */
2179         if (breq == 0) {
2180                 fattrp->attrlist4_len = 0;
2181                 fattrp->attrlist4 = NULL;
2182                 return (NFS4_OK);
2183         }
2184
2185         /*
2186          * return NFS4ERR_INVAL when client requests write-only attrs
2187          */
2188         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2189                 return (NFS4ERR_INVAL);
2190
2191         nfs4_ntov_table_init(&ntov);
2192         na = ntov.na;
2193         amap = ntov.amap;
2194
2195         /*
2196          * Now loop to get or verify the attrs
2197          */
2198         for (i = 0; i < nfs4_ntov_map_size; i++) {
2199                 if (breq & nfs4_ntov_map[i].fbit) {
2200                         if ((*nfs4_ntov_map[i].sv_getit)(
2201                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2202
2203                                 error = (*nfs4_ntov_map[i].sv_getit)(
2204                                     NFS4ATTR_GETIT, sargp, na);
2205
2206                                 /*
2207                                  * Possible error values:
2208                                  * >0 if sv_getit failed to
2209                                  * get the attr; 0 if succeeded;
2210                                  * <0 if rdattr_error and the
2211                                  * attribute cannot be returned.
2212                                  */
2213                                 if (error && !(sargp->rdattr_error_req))
2214                                         goto done;
2215                                 /*
2216                                  * If error then just for entry
2217                                  */
2218                                 if (error == 0) {
2219                                         fattrp->attrmask |=
2220                                             nfs4_ntov_map[i].fbit;
2221                                         *amap++ =
2222                                             (uint8_t)nfs4_ntov_map[i].nval;
2223                                         na++;
2224                                         (ntov.attrcnt)++;
2225                                 } else if ((error > 0) &&
2226                                     (sargp->rdattr_error == NFS4_OK)) {
2227                                         sargp->rdattr_error = puterrno4(error);
2228                                 }
2229                                 error = 0;
2230                         }
2231                 }
2232         }
2233
2234         /*
2235          * If rdattr_error was set after the return value for it was assigned,
2236          * update it.
2237          */
2238         if (prev_rdattr_error != sargp->rdattr_error) {
2239                 na = ntov.na;
2240                 amap = ntov.amap;
2241                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2242                         k = *amap;
2243                         if (k < FATTR4_RDATTR_ERROR) {
2244                                 continue;
2245                         }
2246                         if ((k == FATTR4_RDATTR_ERROR) &&
2247                             ((*nfs4_ntov_map[k].sv_getit)(
2248                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2249
2250                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2251                                     NFS4ATTR_GETIT, sargp, na);
2252                         }
2253                         break;
2254                 }
2255         }
2256
2257         xdr_size = 0;
2258         na = ntov.na;
2259         amap = ntov.amap;
2260         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2261                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2262         }
2263
2264         fattrp->attrlist4_len = xdr_size;
2265         if (xdr_size) {
2266                 /* freed by rfs4_op_getattr_free() */
2267                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2268
2269                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2270
2271                 na = ntov.na;
2272                 amap = ntov.amap;
2273                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2274                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2275                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2276                                     int, *amap);
2277                                 status = NFS4ERR_SERVERFAULT;
2278                                 break;
2279                         }
2280                 }
2281                 /* xdrmem_destroy(&xdrs); */    /* NO-OP */
2282         } else {
2283                 fattrp->attrlist4 = NULL;
2284         }
2285 done:
2286
2287         nfs4_ntov_table_free(&ntov, sargp);
2288
2289         if (error != 0)
2290                 status = puterrno4(error);
2291
2292         return (status);
2293 }
2294
2295 /* ARGSUSED */
2296 static void
2297 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2298     struct compound_state *cs)
2299 {
2300         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2301         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2302         struct nfs4_svgetit_arg sarg;
2303         struct statvfs64 sb;
2304         nfsstat4 status;
2305
2306         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2307             GETATTR4args *, args);
2308
2309         if (cs->vp == NULL) {
2310                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2311                 goto out;
2312         }
2313
2314         if (cs->access == CS_ACCESS_DENIED) {
2315                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2316                 goto out;
2317         }
2318
2319         sarg.sbp = &sb;
2320         sarg.cs = cs;
2321         sarg.is_referral = B_FALSE;
2322
2323         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2324         if (status == NFS4_OK) {
2325
2326                 status = bitmap4_get_sysattrs(&sarg);
2327                 if (status == NFS4_OK) {
2328
2329                         /* Is this a referral? */
2330                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2331                                 /* Older V4 Solaris client sees a link */
2332                                 if (client_is_downrev(req))
2333                                         sarg.vap->va_type = VLNK;
2334                                 else
2335                                         sarg.is_referral = B_TRUE;
2336                         }
2337
2338                         status = do_rfs4_op_getattr(args->attr_request,
2339                             &resp->obj_attributes, &sarg);
2340                 }
2341         }
2342         *cs->statusp = resp->status = status;
2343 out:
2344         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2345             GETATTR4res *, resp);
2346 }
2347
2348 static void
2349 rfs4_op_getattr_free(nfs_resop4 *resop)
2350 {
2351         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2352
2353         nfs4_fattr4_free(&resp->obj_attributes);
2354 }
2355
2356 /* ARGSUSED */
2357 static void
2358 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2359     struct compound_state *cs)
2360 {
2361         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2362
2363         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2364
2365         if (cs->vp == NULL) {
2366                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2367                 goto out;
2368         }
2369         if (cs->access == CS_ACCESS_DENIED) {
2370                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2371                 goto out;
2372         }
2373
2374         /* check for reparse point at the share point */
2375         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2376                 /* it's all bad */
2377                 cs->exi->exi_moved = 1;
2378                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2379                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2380                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2381                 return;
2382         }
2383
2384         /* check for reparse point at vp */
2385         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2386                 /* it's not all bad */
2387                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2388                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2389                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2390                 return;
2391         }
2392
2393         resp->object.nfs_fh4_val =
2394             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2395         nfs_fh4_copy(&cs->fh, &resp->object);
2396         *cs->statusp = resp->status = NFS4_OK;
2397 out:
2398         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2399             GETFH4res *, resp);
2400 }
2401
2402 static void
2403 rfs4_op_getfh_free(nfs_resop4 *resop)
2404 {
2405         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2406
2407         if (resp->status == NFS4_OK &&
2408             resp->object.nfs_fh4_val != NULL) {
2409                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2410                 resp->object.nfs_fh4_val = NULL;
2411                 resp->object.nfs_fh4_len = 0;
2412         }
2413 }
2414
2415 /*
2416  * illegal: args: void
2417  *          res : status (NFS4ERR_OP_ILLEGAL)
2418  */
2419 /* ARGSUSED */
2420 static void
2421 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2422     struct svc_req *req, struct compound_state *cs)
2423 {
2424         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2425
2426         resop->resop = OP_ILLEGAL;
2427         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2428 }
2429
2430 /*
2431  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2432  *       res: status. If success - CURRENT_FH unchanged, return change_info
2433  */
2434 /* ARGSUSED */
2435 static void
2436 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2437     struct compound_state *cs)
2438 {
2439         LINK4args *args = &argop->nfs_argop4_u.oplink;
2440         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2441         int error;
2442         vnode_t *vp;
2443         vnode_t *dvp;
2444         struct vattr bdva, idva, adva;
2445         char *nm;
2446         uint_t  len;
2447         struct sockaddr *ca;
2448         char *name = NULL;
2449
2450         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2451             LINK4args *, args);
2452
2453         /* SAVED_FH: source object */
2454         vp = cs->saved_vp;
2455         if (vp == NULL) {
2456                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2457                 goto out;
2458         }
2459
2460         /* CURRENT_FH: target directory */
2461         dvp = cs->vp;
2462         if (dvp == NULL) {
2463                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464                 goto out;
2465         }
2466
2467         /*
2468          * If there is a non-shared filesystem mounted on this vnode,
2469          * do not allow to link any file in this directory.
2470          */
2471         if (vn_ismntpt(dvp)) {
2472                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2473                 goto out;
2474         }
2475
2476         if (cs->access == CS_ACCESS_DENIED) {
2477                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2478                 goto out;
2479         }
2480
2481         /* Check source object's type validity */
2482         if (vp->v_type == VDIR) {
2483                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2484                 goto out;
2485         }
2486
2487         /* Check target directory's type */
2488         if (dvp->v_type != VDIR) {
2489                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2490                 goto out;
2491         }
2492
2493         if (cs->saved_exi != cs->exi) {
2494                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2495                 goto out;
2496         }
2497
2498         if (!utf8_dir_verify(&args->newname)) {
2499                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2500                 goto out;
2501         }
2502
2503         nm = utf8_to_fn(&args->newname, &len, NULL);
2504         if (nm == NULL) {
2505                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2506                 goto out;
2507         }
2508
2509         if (len > MAXNAMELEN) {
2510                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2511                 kmem_free(nm, len);
2512                 goto out;
2513         }
2514
2515         if (rdonly4(cs->exi, cs->vp, req)) {
2516                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2517                 kmem_free(nm, len);
2518                 goto out;
2519         }
2520
2521         /* Get "before" change value */
2522         bdva.va_mask = AT_CTIME|AT_SEQ;
2523         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2524         if (error) {
2525                 *cs->statusp = resp->status = puterrno4(error);
2526                 kmem_free(nm, len);
2527                 goto out;
2528         }
2529
2530         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2531         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2532             MAXPATHLEN  + 1);
2533
2534         if (name == NULL) {
2535                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2536                 kmem_free(nm, len);
2537                 goto out;
2538         }
2539
2540         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2541
2542         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2543
2544         if (nm != name)
2545                 kmem_free(name, MAXPATHLEN + 1);
2546         kmem_free(nm, len);
2547
2548         /*
2549          * Get the initial "after" sequence number, if it fails, set to zero
2550          */
2551         idva.va_mask = AT_SEQ;
2552         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2553                 idva.va_seq = 0;
2554
2555         /*
2556          * Force modified data and metadata out to stable storage.
2557          */
2558         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2559         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2560
2561         if (error) {
2562                 *cs->statusp = resp->status = puterrno4(error);
2563                 goto out;
2564         }
2565
2566         /*
2567          * Get "after" change value, if it fails, simply return the
2568          * before value.
2569          */
2570         adva.va_mask = AT_CTIME|AT_SEQ;
2571         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2572                 adva.va_ctime = bdva.va_ctime;
2573                 adva.va_seq = 0;
2574         }
2575
2576         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2577
2578         /*
2579          * The cinfo.atomic = TRUE only if we have
2580          * non-zero va_seq's, and it has incremented by exactly one
2581          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2582          */
2583         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2584             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2585                 resp->cinfo.atomic = TRUE;
2586         else
2587                 resp->cinfo.atomic = FALSE;
2588
2589         *cs->statusp = resp->status = NFS4_OK;
2590 out:
2591         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2592             LINK4res *, resp);
2593 }
2594
2595 /*
2596  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2597  */
2598
2599 /* ARGSUSED */
2600 static nfsstat4
2601 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2602 {
2603         int error;
2604         int different_export = 0;
2605         vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2606         struct exportinfo *exi = NULL, *pre_exi = NULL;
2607         nfsstat4 stat;
2608         fid_t fid;
2609         int attrdir, dotdot, walk;
2610         bool_t is_newvp = FALSE;
2611
2612         if (cs->vp->v_flag & V_XATTRDIR) {
2613                 attrdir = 1;
2614                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2615         } else {
2616                 attrdir = 0;
2617                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2618         }
2619
2620         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2621
2622         /*
2623          * If dotdotting, then need to check whether it's
2624          * above the root of a filesystem, or above an
2625          * export point.
2626          */
2627         if (dotdot) {
2628
2629                 /*
2630                  * If dotdotting at the root of a filesystem, then
2631                  * need to traverse back to the mounted-on filesystem
2632                  * and do the dotdot lookup there.
2633                  */
2634                 if (cs->vp->v_flag & VROOT) {
2635
2636                         /*
2637                          * If at the system root, then can
2638                          * go up no further.
2639                          */
2640                         if (VN_CMP(cs->vp, rootdir))
2641                                 return (puterrno4(ENOENT));
2642
2643                         /*
2644                          * Traverse back to the mounted-on filesystem
2645                          */
2646                         cs->vp = untraverse(cs->vp);
2647
2648                         /*
2649                          * Set the different_export flag so we remember
2650                          * to pick up a new exportinfo entry for
2651                          * this new filesystem.
2652                          */
2653                         different_export = 1;
2654                 } else {
2655
2656                         /*
2657                          * If dotdotting above an export point then set
2658                          * the different_export to get new export info.
2659                          */
2660                         different_export = nfs_exported(cs->exi, cs->vp);
2661                 }
2662         }
2663
2664         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2665             NULL, NULL, NULL);
2666         if (error)
2667                 return (puterrno4(error));
2668
2669         /*
2670          * If the vnode is in a pseudo filesystem, check whether it is visible.
2671          *
2672          * XXX if the vnode is a symlink and it is not visible in
2673          * a pseudo filesystem, return ENOENT (not following symlink).
2674          * V4 client can not mount such symlink. This is a regression
2675          * from V2/V3.
2676          *
2677          * In the same exported filesystem, if the security flavor used
2678          * is not an explicitly shared flavor, limit the view to the visible
2679          * list entries only. This is not a WRONGSEC case because it's already
2680          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2681          */
2682         if (!different_export &&
2683             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2684             cs->access & CS_ACCESS_LIMITED)) {
2685                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2686                         VN_RELE(vp);
2687                         return (puterrno4(ENOENT));
2688                 }
2689         }
2690
2691         /*
2692          * If it's a mountpoint, then traverse it.
2693          */
2694         if (vn_ismntpt(vp)) {
2695                 pre_exi = cs->exi;      /* save pre-traversed exportinfo */
2696                 pre_tvp = vp;           /* save pre-traversed vnode     */
2697
2698                 /*
2699                  * hold pre_tvp to counteract rele by traverse.  We will
2700                  * need pre_tvp below if checkexport4 fails
2701                  */
2702                 VN_HOLD(pre_tvp);
2703                 tvp = vp;
2704                 if ((error = traverse(&tvp)) != 0) {
2705                         VN_RELE(vp);
2706                         VN_RELE(pre_tvp);
2707                         return (puterrno4(error));
2708                 }
2709                 vp = tvp;
2710                 different_export = 1;
2711         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2712                 /*
2713                  * The vfsp comparison is to handle the case where
2714                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2715                  * and NFS is unaware of local fs transistions because
2716                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2717                  * the dir and the obj returned by lookup will have different
2718                  * vfs ptrs.
2719                  */
2720                 different_export = 1;
2721         }
2722
2723         if (different_export) {
2724
2725                 bzero(&fid, sizeof (fid));
2726                 fid.fid_len = MAXFIDSZ;
2727                 error = vop_fid_pseudo(vp, &fid);
2728                 if (error) {
2729                         VN_RELE(vp);
2730                         if (pre_tvp)
2731                                 VN_RELE(pre_tvp);
2732                         return (puterrno4(error));
2733                 }
2734
2735                 if (dotdot)
2736                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2737                 else
2738                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2739
2740                 if (exi == NULL) {
2741                         if (pre_tvp) {
2742                                 /*
2743                                  * If this vnode is a mounted-on vnode,
2744                                  * but the mounted-on file system is not
2745                                  * exported, send back the filehandle for
2746                                  * the mounted-on vnode, not the root of
2747                                  * the mounted-on file system.
2748                                  */
2749                                 VN_RELE(vp);
2750                                 vp = pre_tvp;
2751                                 exi = pre_exi;
2752                         } else {
2753                                 VN_RELE(vp);
2754                                 return (puterrno4(EACCES));
2755                         }
2756                 } else if (pre_tvp) {
2757                         /* we're done with pre_tvp now. release extra hold */
2758                         VN_RELE(pre_tvp);
2759                 }
2760
2761                 cs->exi = exi;
2762
2763                 /*
2764                  * Now we do a checkauth4. The reason is that
2765                  * this client/user may not have access to the new
2766                  * exported file system, and if he does,
2767                  * the client/user may be mapped to a different uid.
2768                  *
2769                  * We start with a new cr, because the checkauth4 done
2770                  * in the PUT*FH operation over wrote the cred's uid,
2771                  * gid, etc, and we want the real thing before calling
2772                  * checkauth4()
2773                  */
2774                 crfree(cs->cr);
2775                 cs->cr = crdup(cs->basecr);
2776
2777                 oldvp = cs->vp;
2778                 cs->vp = vp;
2779                 is_newvp = TRUE;
2780
2781                 stat = call_checkauth4(cs, req);
2782                 if (stat != NFS4_OK) {
2783                         VN_RELE(cs->vp);
2784                         cs->vp = oldvp;
2785                         return (stat);
2786                 }
2787         }
2788
2789         /*
2790          * After various NFS checks, do a label check on the path
2791          * component. The label on this path should either be the
2792          * global zone's label or a zone's label. We are only
2793          * interested in the zone's label because exported files
2794          * in global zone is accessible (though read-only) to
2795          * clients. The exportability/visibility check is already
2796          * done before reaching this code.
2797          */
2798         if (is_system_labeled()) {
2799                 bslabel_t *clabel;
2800
2801                 ASSERT(req->rq_label != NULL);
2802                 clabel = req->rq_label;
2803                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2804                     "got client label from request(1)", struct svc_req *, req);
2805
2806                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2807                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2808                             cs->exi)) {
2809                                 error = EACCES;
2810                                 goto err_out;
2811                         }
2812                 } else {
2813                         /*
2814                          * We grant access to admin_low label clients
2815                          * only if the client is trusted, i.e. also
2816                          * running Solaris Trusted Extension.
2817                          */
2818                         struct sockaddr *ca;
2819                         int             addr_type;
2820                         void            *ipaddr;
2821                         tsol_tpc_t      *tp;
2822
2823                         ca = (struct sockaddr *)svc_getrpccaller(
2824                             req->rq_xprt)->buf;
2825                         if (ca->sa_family == AF_INET) {
2826                                 addr_type = IPV4_VERSION;
2827                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2828                         } else if (ca->sa_family == AF_INET6) {
2829                                 addr_type = IPV6_VERSION;
2830                                 ipaddr = &((struct sockaddr_in6 *)
2831                                     ca)->sin6_addr;
2832                         }
2833                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2834                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2835                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2836                             SUN_CIPSO) {
2837                                 if (tp != NULL)
2838                                         TPC_RELE(tp);
2839                                 error = EACCES;
2840                                 goto err_out;
2841                         }
2842                         TPC_RELE(tp);
2843                 }
2844         }
2845
2846         error = makefh4(&cs->fh, vp, cs->exi);
2847
2848 err_out:
2849         if (error) {
2850                 if (is_newvp) {
2851                         VN_RELE(cs->vp);
2852                         cs->vp = oldvp;
2853                 } else
2854                         VN_RELE(vp);
2855                 return (puterrno4(error));
2856         }
2857
2858         if (!is_newvp) {
2859                 if (cs->vp)
2860                         VN_RELE(cs->vp);
2861                 cs->vp = vp;
2862         } else if (oldvp)
2863                 VN_RELE(oldvp);
2864
2865         /*
2866          * if did lookup on attrdir and didn't lookup .., set named
2867          * attr fh flag
2868          */
2869         if (attrdir && ! dotdot)
2870                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2871
2872         /* Assume false for now, open proc will set this */
2873         cs->mandlock = FALSE;
2874
2875         return (NFS4_OK);
2876 }
2877
2878 /* ARGSUSED */
2879 static void
2880 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2881     struct compound_state *cs)
2882 {
2883         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2884         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2885         char *nm;
2886         uint_t len;
2887         struct sockaddr *ca;
2888         char *name = NULL;
2889
2890         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2891             LOOKUP4args *, args);
2892
2893         if (cs->vp == NULL) {
2894                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2895                 goto out;
2896         }
2897
2898         if (cs->vp->v_type == VLNK) {
2899                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2900                 goto out;
2901         }
2902
2903         if (cs->vp->v_type != VDIR) {
2904                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2905                 goto out;
2906         }
2907
2908         if (!utf8_dir_verify(&args->objname)) {
2909                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2910                 goto out;
2911         }
2912
2913         nm = utf8_to_str(&args->objname, &len, NULL);
2914         if (nm == NULL) {
2915                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2916                 goto out;
2917         }
2918
2919         if (len > MAXNAMELEN) {
2920                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2921                 kmem_free(nm, len);
2922                 goto out;
2923         }
2924
2925         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2926         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2927             MAXPATHLEN  + 1);
2928
2929         if (name == NULL) {
2930                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2931                 kmem_free(nm, len);
2932                 goto out;
2933         }
2934
2935         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2936
2937         if (name != nm)
2938                 kmem_free(name, MAXPATHLEN + 1);
2939         kmem_free(nm, len);
2940
2941 out:
2942         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2943             LOOKUP4res *, resp);
2944 }
2945
2946 /* ARGSUSED */
2947 static void
2948 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2949     struct compound_state *cs)
2950 {
2951         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2952
2953         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2954
2955         if (cs->vp == NULL) {
2956                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2957                 goto out;
2958         }
2959
2960         if (cs->vp->v_type != VDIR) {
2961                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2962                 goto out;
2963         }
2964
2965         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2966
2967         /*
2968          * From NFSV4 Specification, LOOKUPP should not check for
2969          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2970          */
2971         if (resp->status == NFS4ERR_WRONGSEC) {
2972                 *cs->statusp = resp->status = NFS4_OK;
2973         }
2974
2975 out:
2976         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2977             LOOKUPP4res *, resp);
2978 }
2979
2980
2981 /*ARGSUSED2*/
2982 static void
2983 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2984     struct compound_state *cs)
2985 {
2986         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
2987         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
2988         vnode_t         *avp = NULL;
2989         int             lookup_flags = LOOKUP_XATTR, error;
2990         int             exp_ro = 0;
2991
2992         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
2993             OPENATTR4args *, args);
2994
2995         if (cs->vp == NULL) {
2996                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2997                 goto out;
2998         }
2999
3000         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3001             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3002                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3003                 goto out;
3004         }
3005
3006         /*
3007          * If file system supports passing ACE mask to VOP_ACCESS then
3008          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3009          */
3010
3011         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3012                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3013                     V_ACE_MASK, cs->cr, NULL);
3014         else
3015                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3016                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3017                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3018
3019         if (error) {
3020                 *cs->statusp = resp->status = puterrno4(EACCES);
3021                 goto out;
3022         }
3023
3024         /*
3025          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3026          * the file system is exported read-only -- regardless of
3027          * createdir flag.  Otherwise the attrdir would be created
3028          * (assuming server fs isn't mounted readonly locally).  If
3029          * VOP_LOOKUP returns ENOENT in this case, the error will
3030          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3031          * because specfs has no VOP_LOOKUP op, so the macro would
3032          * return ENOSYS.  EINVAL is returned by all (current)
3033          * Solaris file system implementations when any of their
3034          * restrictions are violated (xattr(dir) can't have xattrdir).
3035          * Returning NOTSUPP is more appropriate in this case
3036          * because the object will never be able to have an attrdir.
3037          */
3038         if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
3039                 lookup_flags |= CREATE_XATTR_DIR;
3040
3041         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3042             NULL, NULL, NULL);
3043
3044         if (error) {
3045                 if (error == ENOENT && args->createdir && exp_ro)
3046                         *cs->statusp = resp->status = puterrno4(EROFS);
3047                 else if (error == EINVAL || error == ENOSYS)
3048                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3049                 else
3050                         *cs->statusp = resp->status = puterrno4(error);
3051                 goto out;
3052         }
3053
3054         ASSERT(avp->v_flag & V_XATTRDIR);
3055
3056         error = makefh4(&cs->fh, avp, cs->exi);
3057
3058         if (error) {
3059                 VN_RELE(avp);
3060                 *cs->statusp = resp->status = puterrno4(error);
3061                 goto out;
3062         }
3063
3064         VN_RELE(cs->vp);
3065         cs->vp = avp;
3066
3067         /*
3068          * There is no requirement for an attrdir fh flag
3069          * because the attrdir has a vnode flag to distinguish
3070          * it from regular (non-xattr) directories.  The
3071          * FH4_ATTRDIR flag is set for future sanity checks.
3072          */
3073         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3074         *cs->statusp = resp->status = NFS4_OK;
3075
3076 out:
3077         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3078             OPENATTR4res *, resp);
3079 }
3080
3081 static int
3082 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3083     caller_context_t *ct)
3084 {
3085         int error;
3086         int i;
3087         clock_t delaytime;
3088
3089         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3090
3091         /*
3092          * Don't block on mandatory locks. If this routine returns
3093          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3094          */
3095         uio->uio_fmode = FNONBLOCK;
3096
3097         for (i = 0; i < rfs4_maxlock_tries; i++) {
3098
3099
3100                 if (direction == FREAD) {
3101                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3102                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3103                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3104                 } else {
3105                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3106                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3107                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3108                 }
3109
3110                 if (error != EAGAIN)
3111                         break;
3112
3113                 if (i < rfs4_maxlock_tries - 1) {
3114                         delay(delaytime);
3115                         delaytime *= 2;
3116                 }
3117         }
3118
3119         return (error);
3120 }
3121
3122 /* ARGSUSED */
3123 static void
3124 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3125     struct compound_state *cs)
3126 {
3127         READ4args *args = &argop->nfs_argop4_u.opread;
3128         READ4res *resp = &resop->nfs_resop4_u.opread;
3129         int error;
3130         int verror;
3131         vnode_t *vp;
3132         struct vattr va;
3133         struct iovec iov;
3134         struct uio uio;
3135         u_offset_t offset;
3136         bool_t *deleg = &cs->deleg;
3137         nfsstat4 stat;
3138         int in_crit = 0;
3139         mblk_t *mp = NULL;
3140         int alloc_err = 0;
3141         int rdma_used = 0;
3142         int loaned_buffers;
3143         caller_context_t ct;
3144         struct uio *uiop;
3145
3146         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3147             READ4args, args);
3148
3149         vp = cs->vp;
3150         if (vp == NULL) {
3151                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3152                 goto out;
3153         }
3154         if (cs->access == CS_ACCESS_DENIED) {
3155                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3156                 goto out;
3157         }
3158
3159         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3160             deleg, TRUE, &ct)) != NFS4_OK) {
3161                 *cs->statusp = resp->status = stat;
3162                 goto out;
3163         }
3164
3165         /*
3166          * Enter the critical region before calling VOP_RWLOCK
3167          * to avoid a deadlock with write requests.
3168          */
3169         if (nbl_need_check(vp)) {
3170                 nbl_start_crit(vp, RW_READER);
3171                 in_crit = 1;
3172                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3173                     &ct)) {
3174                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3175                         goto out;
3176                 }
3177         }
3178
3179         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3180             deleg, TRUE, &ct)) != NFS4_OK) {
3181                 *cs->statusp = resp->status = stat;
3182                 goto out;
3183         }
3184
3185         if (args->wlist)
3186                 rdma_used = 1;
3187
3188         /* use loaned buffers for TCP */
3189         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3190
3191         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3192         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3193
3194         /*
3195          * If we can't get the attributes, then we can't do the
3196          * right access checking.  So, we'll fail the request.
3197          */
3198         if (verror) {
3199                 *cs->statusp = resp->status = puterrno4(verror);
3200                 goto out;
3201         }
3202
3203         if (vp->v_type != VREG) {
3204                 *cs->statusp = resp->status =
3205                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3206                 goto out;
3207         }
3208
3209         if (crgetuid(cs->cr) != va.va_uid &&
3210             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3211             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3212                 *cs->statusp = resp->status = puterrno4(error);
3213                 goto out;
3214         }
3215
3216         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3217                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3218                 goto out;
3219         }
3220
3221         offset = args->offset;
3222         if (offset >= va.va_size) {
3223                 *cs->statusp = resp->status = NFS4_OK;
3224                 resp->eof = TRUE;
3225                 resp->data_len = 0;
3226                 resp->data_val = NULL;
3227                 resp->mblk = NULL;
3228                 /* RDMA */
3229                 resp->wlist = args->wlist;
3230                 resp->wlist_len = resp->data_len;
3231                 *cs->statusp = resp->status = NFS4_OK;
3232                 if (resp->wlist)
3233                         clist_zero_len(resp->wlist);
3234                 goto out;
3235         }
3236
3237         if (args->count == 0) {
3238                 *cs->statusp = resp->status = NFS4_OK;
3239                 resp->eof = FALSE;
3240                 resp->data_len = 0;
3241                 resp->data_val = NULL;
3242                 resp->mblk = NULL;
3243                 /* RDMA */
3244                 resp->wlist = args->wlist;
3245                 resp->wlist_len = resp->data_len;
3246                 if (resp->wlist)
3247                         clist_zero_len(resp->wlist);
3248                 goto out;
3249         }
3250
3251         /*
3252          * Do not allocate memory more than maximum allowed
3253          * transfer size
3254          */
3255         if (args->count > rfs4_tsize(req))
3256                 args->count = rfs4_tsize(req);
3257
3258         if (loaned_buffers) {
3259                 uiop = (uio_t *)rfs_setup_xuio(vp);
3260                 ASSERT(uiop != NULL);
3261                 uiop->uio_segflg = UIO_SYSSPACE;
3262                 uiop->uio_loffset = args->offset;
3263                 uiop->uio_resid = args->count;
3264
3265                 /* Jump to do the read if successful */
3266                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3267                         /*
3268                          * Need to hold the vnode until after VOP_RETZCBUF()
3269                          * is called.
3270                          */
3271                         VN_HOLD(vp);
3272                         goto doio_read;
3273                 }
3274
3275                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3276                     uiop->uio_loffset, int, uiop->uio_resid);
3277
3278                 uiop->uio_extflg = 0;
3279
3280                 /* failure to setup for zero copy */
3281                 rfs_free_xuio((void *)uiop);
3282                 loaned_buffers = 0;
3283         }
3284
3285         /*
3286          * If returning data via RDMA Write, then grab the chunk list. If we
3287          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3288          */
3289         if (rdma_used) {
3290                 mp = NULL;
3291                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3292         } else {
3293                 /*
3294                  * mp will contain the data to be sent out in the read reply.
3295                  * It will be freed after the reply has been sent. Let's
3296                  * roundup the data to a BYTES_PER_XDR_UNIT multiple, so that
3297                  * the call to xdrmblk_putmblk() never fails. If the first
3298                  * alloc of the requested size fails, then decrease the size to
3299                  * something more reasonable and wait for the allocation to
3300                  * occur.
3301                  */
3302                 mp = allocb(RNDUP(args->count), BPRI_MED);
3303                 if (mp == NULL) {
3304                         if (args->count > MAXBSIZE)
3305                                 args->count = MAXBSIZE;
3306                         mp = allocb_wait(RNDUP(args->count), BPRI_MED,
3307                             STR_NOSIG, &alloc_err);
3308                 }
3309                 ASSERT(mp != NULL);
3310                 ASSERT(alloc_err == 0);
3311
3312                 iov.iov_base = (caddr_t)mp->b_datap->db_base;
3313                 iov.iov_len = args->count;
3314         }
3315
3316         uio.uio_iov = &iov;
3317         uio.uio_iovcnt = 1;
3318         uio.uio_segflg = UIO_SYSSPACE;
3319         uio.uio_extflg = UIO_COPY_CACHED;
3320         uio.uio_loffset = args->offset;
3321         uio.uio_resid = args->count;
3322         uiop = &uio;
3323
3324 doio_read:
3325         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3326
3327         va.va_mask = AT_SIZE;
3328         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3329
3330         if (error) {
3331                 if (mp)
3332                         freemsg(mp);
3333                 *cs->statusp = resp->status = puterrno4(error);
3334                 goto out;
3335         }
3336
3337         /* make mblk using zc buffers */
3338         if (loaned_buffers) {
3339                 mp = uio_to_mblk(uiop);
3340                 ASSERT(mp != NULL);
3341         }
3342
3343         *cs->statusp = resp->status = NFS4_OK;
3344
3345         ASSERT(uiop->uio_resid >= 0);
3346         resp->data_len = args->count - uiop->uio_resid;
3347         if (mp) {
3348                 resp->data_val = (char *)mp->b_datap->db_base;
3349                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3350         } else {
3351                 resp->data_val = (caddr_t)iov.iov_base;
3352         }
3353
3354         resp->mblk = mp;
3355
3356         if (!verror && offset + resp->data_len == va.va_size)
3357                 resp->eof = TRUE;
3358         else
3359                 resp->eof = FALSE;
3360
3361         if (rdma_used) {
3362                 if (!rdma_setup_read_data4(args, resp)) {
3363                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3364                 }
3365         } else {
3366                 resp->wlist = NULL;
3367         }
3368
3369 out:
3370         if (in_crit)
3371                 nbl_end_crit(vp);
3372
3373         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3374             READ4res *, resp);
3375 }
3376
3377 static void
3378 rfs4_op_read_free(nfs_resop4 *resop)
3379 {
3380         READ4res        *resp = &resop->nfs_resop4_u.opread;
3381
3382         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3383                 freemsg(resp->mblk);
3384                 resp->mblk = NULL;
3385                 resp->data_val = NULL;
3386                 resp->data_len = 0;
3387         }
3388 }
3389
3390 static void
3391 rfs4_op_readdir_free(nfs_resop4 * resop)
3392 {
3393         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3394
3395         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3396                 freeb(resp->mblk);
3397                 resp->mblk = NULL;
3398                 resp->data_len = 0;
3399         }
3400 }
3401
3402
3403 /* ARGSUSED */
3404 static void
3405 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3406     struct compound_state *cs)
3407 {
3408         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3409         int             error;
3410         vnode_t         *vp;
3411         struct exportinfo *exi, *sav_exi;
3412         nfs_fh4_fmt_t   *fh_fmtp;
3413
3414         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3415
3416         if (cs->vp) {
3417                 VN_RELE(cs->vp);
3418                 cs->vp = NULL;
3419         }
3420
3421         if (cs->cr)
3422                 crfree(cs->cr);
3423
3424         cs->cr = crdup(cs->basecr);
3425
3426         vp = exi_public->exi_vp;
3427         if (vp == NULL) {
3428                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3429                 goto out;
3430         }
3431
3432         error = makefh4(&cs->fh, vp, exi_public);
3433         if (error != 0) {
3434                 *cs->statusp = resp->status = puterrno4(error);
3435                 goto out;
3436         }
3437         sav_exi = cs->exi;
3438         if (exi_public == exi_root) {
3439                 /*
3440                  * No filesystem is actually shared public, so we default
3441                  * to exi_root. In this case, we must check whether root
3442                  * is exported.
3443                  */
3444                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3445
3446                 /*
3447                  * if root filesystem is exported, the exportinfo struct that we
3448                  * should use is what checkexport4 returns, because root_exi is
3449                  * actually a mostly empty struct.
3450                  */
3451                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3452                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3453                 cs->exi = ((exi != NULL) ? exi : exi_public);
3454         } else {
3455                 /*
3456                  * it's a properly shared filesystem
3457                  */
3458                 cs->exi = exi_public;
3459         }
3460
3461         if (is_system_labeled()) {
3462                 bslabel_t *clabel;
3463
3464                 ASSERT(req->rq_label != NULL);
3465                 clabel = req->rq_label;
3466                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3467                     "got client label from request(1)",
3468                     struct svc_req *, req);
3469                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3470                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3471                             cs->exi)) {
3472                                 *cs->statusp = resp->status =
3473                                     NFS4ERR_SERVERFAULT;
3474                                 goto out;
3475                         }
3476                 }
3477         }
3478
3479         VN_HOLD(vp);
3480         cs->vp = vp;
3481
3482         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3483                 VN_RELE(cs->vp);
3484                 cs->vp = NULL;
3485                 cs->exi = sav_exi;
3486                 goto out;
3487         }
3488
3489         *cs->statusp = resp->status = NFS4_OK;
3490 out:
3491         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3492             PUTPUBFH4res *, resp);
3493 }
3494
3495 /*
3496  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3497  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3498  * or joe have restrictive search permissions, then we shouldn't let
3499  * the client get a file handle. This is easy to enforce. However, we
3500  * don't know what security flavor should be used until we resolve the
3501  * path name. Another complication is uid mapping. If root is
3502  * the user, then it will be mapped to the anonymous user by default,
3503  * but we won't know that till we've resolved the path name. And we won't
3504  * know what the anonymous user is.
3505  * Luckily, SECINFO is specified to take a full filename.
3506  * So what we will have to in rfs4_op_lookup is check that flavor of
3507  * the target object matches that of the request, and if root was the
3508  * caller, check for the root= and anon= options, and if necessary,
3509  * repeat the lookup using the right cred_t. But that's not done yet.
3510  */
3511 /* ARGSUSED */
3512 static void
3513 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3514     struct compound_state *cs)
3515 {
3516         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3517         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3518         nfs_fh4_fmt_t *fh_fmtp;
3519
3520         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3521             PUTFH4args *, args);
3522
3523         if (cs->vp) {
3524                 VN_RELE(cs->vp);
3525                 cs->vp = NULL;
3526         }
3527
3528         if (cs->cr) {
3529                 crfree(cs->cr);
3530                 cs->cr = NULL;
3531         }
3532
3533
3534         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3535                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3536                 goto out;
3537         }
3538
3539         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3540         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3541             NULL);
3542
3543         if (cs->exi == NULL) {
3544                 *cs->statusp = resp->status = NFS4ERR_STALE;
3545                 goto out;
3546         }
3547
3548         cs->cr = crdup(cs->basecr);
3549
3550         ASSERT(cs->cr != NULL);
3551
3552         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3553                 *cs->statusp = resp->status;
3554                 goto out;
3555         }
3556
3557         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3558                 VN_RELE(cs->vp);
3559                 cs->vp = NULL;
3560                 goto out;
3561         }
3562
3563         nfs_fh4_copy(&args->object, &cs->fh);
3564         *cs->statusp = resp->status = NFS4_OK;
3565         cs->deleg = FALSE;
3566
3567 out:
3568         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3569             PUTFH4res *, resp);
3570 }
3571
3572 /* ARGSUSED */
3573 static void
3574 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3575     struct compound_state *cs)
3576 {
3577         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3578         int error;
3579         fid_t fid;
3580         struct exportinfo *exi, *sav_exi;
3581
3582         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3583
3584         if (cs->vp) {
3585                 VN_RELE(cs->vp);
3586                 cs->vp = NULL;
3587         }
3588
3589         if (cs->cr)
3590                 crfree(cs->cr);
3591
3592         cs->cr = crdup(cs->basecr);
3593
3594         /*
3595          * Using rootdir, the system root vnode,
3596          * get its fid.
3597          */
3598         bzero(&fid, sizeof (fid));
3599         fid.fid_len = MAXFIDSZ;
3600         error = vop_fid_pseudo(rootdir, &fid);
3601         if (error != 0) {
3602                 *cs->statusp = resp->status = puterrno4(error);
3603                 goto out;
3604         }
3605
3606         /*
3607          * Then use the root fsid & fid it to find out if it's exported
3608          *
3609          * If the server root isn't exported directly, then
3610          * it should at least be a pseudo export based on
3611          * one or more exports further down in the server's
3612          * file tree.
3613          */
3614         exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3615         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3616                 NFS4_DEBUG(rfs4_debug,
3617                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3618                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3619                 goto out;
3620         }
3621
3622         /*
3623          * Now make a filehandle based on the root
3624          * export and root vnode.
3625          */
3626         error = makefh4(&cs->fh, rootdir, exi);
3627         if (error != 0) {
3628                 *cs->statusp = resp->status = puterrno4(error);
3629                 goto out;
3630         }
3631
3632         sav_exi = cs->exi;
3633         cs->exi = exi;
3634
3635         VN_HOLD(rootdir);
3636         cs->vp = rootdir;
3637
3638         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3639                 VN_RELE(rootdir);
3640                 cs->vp = NULL;
3641                 cs->exi = sav_exi;
3642                 goto out;
3643         }
3644
3645         *cs->statusp = resp->status = NFS4_OK;
3646         cs->deleg = FALSE;
3647 out:
3648         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3649             PUTROOTFH4res *, resp);
3650 }
3651
3652 /*
3653  * A directory entry is a valid nfsv4 entry if
3654  * - it has a non-zero ino
3655  * - it is not a dot or dotdot name
3656  * - it is visible in a pseudo export or in a real export that can
3657  *   only have a limited view.
3658  */
3659 static bool_t
3660 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3661     int *expseudo, int check_visible)
3662 {
3663         if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3664                 *expseudo = 0;
3665                 return (FALSE);
3666         }
3667
3668         if (! check_visible) {
3669                 *expseudo = 0;
3670                 return (TRUE);
3671         }
3672
3673         return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3674 }
3675
3676 /*
3677  * set_rdattr_params sets up the variables used to manage what information
3678  * to get for each directory entry.
3679  */
3680 static nfsstat4
3681 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3682     bitmap4 attrs, bool_t *need_to_lookup)
3683 {
3684         uint_t  va_mask;
3685         nfsstat4 status;
3686         bitmap4 objbits;
3687
3688         status = bitmap4_to_attrmask(attrs, sargp);
3689         if (status != NFS4_OK) {
3690                 /*
3691                  * could not even figure attr mask
3692                  */
3693                 return (status);
3694         }
3695         va_mask = sargp->vap->va_mask;
3696
3697         /*
3698          * dirent's d_ino is always correct value for mounted_on_fileid.
3699          * mntdfid_set is set once here, but mounted_on_fileid is
3700          * set in main dirent processing loop for each dirent.
3701          * The mntdfid_set is a simple optimization that lets the
3702          * server attr code avoid work when caller is readdir.
3703          */
3704         sargp->mntdfid_set = TRUE;
3705
3706         /*
3707          * Lookup entry only if client asked for any of the following:
3708          * a) vattr attrs
3709          * b) vfs attrs
3710          * c) attrs w/per-object scope requested (change, filehandle, etc)
3711          *    other than mounted_on_fileid (which we can take from dirent)
3712          */
3713         objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3714
3715         if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3716                 *need_to_lookup = TRUE;
3717         else
3718                 *need_to_lookup = FALSE;
3719
3720         if (sargp->sbp == NULL)
3721                 return (NFS4_OK);
3722
3723         /*
3724          * If filesystem attrs are requested, get them now from the
3725          * directory vp, as most entries will have same filesystem. The only
3726          * exception are mounted over entries but we handle
3727          * those as we go (XXX mounted over detection not yet implemented).
3728          */
3729         sargp->vap->va_mask = 0;        /* to avoid VOP_GETATTR */
3730         status = bitmap4_get_sysattrs(sargp);
3731         sargp->vap->va_mask = va_mask;
3732
3733         if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3734                 /*
3735                  * Failed to get filesystem attributes.
3736                  * Return a rdattr_error for each entry, but don't fail.
3737                  * However, don't get any obj-dependent attrs.
3738                  */
3739                 sargp->rdattr_error = status;   /* for rdattr_error */
3740                 *need_to_lookup = FALSE;
3741                 /*
3742                  * At least get fileid for regular readdir output
3743                  */
3744                 sargp->vap->va_mask &= AT_NODEID;
3745                 status = NFS4_OK;
3746         }
3747
3748         return (status);
3749 }
3750
3751 /*
3752  * readlink: args: CURRENT_FH.
3753  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3754  */
3755
3756 /* ARGSUSED */
3757 static void
3758 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3759     struct compound_state *cs)
3760 {
3761         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3762         int error;
3763         vnode_t *vp;
3764         struct iovec iov;
3765         struct vattr va;
3766         struct uio uio;
3767         char *data;
3768         struct sockaddr *ca;
3769         char *name = NULL;
3770         int is_referral;
3771
3772         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3773
3774         /* CURRENT_FH: directory */
3775         vp = cs->vp;
3776         if (vp == NULL) {
3777                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3778                 goto out;
3779         }
3780
3781         if (cs->access == CS_ACCESS_DENIED) {
3782                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3783                 goto out;
3784         }
3785
3786         /* Is it a referral? */
3787         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3788
3789                 is_referral = 1;
3790
3791         } else {
3792
3793                 is_referral = 0;
3794
3795                 if (vp->v_type == VDIR) {
3796                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3797                         goto out;
3798                 }
3799
3800                 if (vp->v_type != VLNK) {
3801                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3802                         goto out;
3803                 }
3804
3805         }
3806
3807         va.va_mask = AT_MODE;
3808         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3809         if (error) {
3810                 *cs->statusp = resp->status = puterrno4(error);
3811                 goto out;
3812         }
3813
3814         if (MANDLOCK(vp, va.va_mode)) {
3815                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3816                 goto out;
3817         }
3818
3819         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3820
3821         if (is_referral) {
3822                 char *s;
3823                 size_t strsz;
3824
3825                 /* Get an artificial symlink based on a referral */
3826                 s = build_symlink(vp, cs->cr, &strsz);
3827                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3828                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3829                     vnode_t *, vp, char *, s);
3830                 if (s == NULL)
3831                         error = EINVAL;
3832                 else {
3833                         error = 0;
3834                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3835                         kmem_free(s, strsz);
3836                 }
3837
3838         } else {
3839
3840                 iov.iov_base = data;
3841                 iov.iov_len = MAXPATHLEN;
3842                 uio.uio_iov = &iov;
3843                 uio.uio_iovcnt = 1;
3844                 uio.uio_segflg = UIO_SYSSPACE;
3845                 uio.uio_extflg = UIO_COPY_CACHED;
3846                 uio.uio_loffset = 0;
3847                 uio.uio_resid = MAXPATHLEN;
3848
3849                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3850
3851                 if (!error)
3852                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3853         }
3854
3855         if (error) {
3856                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3857                 *cs->statusp = resp->status = puterrno4(error);
3858                 goto out;
3859         }
3860
3861         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3862         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3863             MAXPATHLEN  + 1);
3864
3865         if (name == NULL) {
3866                 /*
3867                  * Even though the conversion failed, we return
3868                  * something. We just don't translate it.
3869                  */
3870                 name = data;
3871         }
3872
3873         /*
3874          * treat link name as data
3875          */
3876         (void) str_to_utf8(name, &resp->link);
3877
3878         if (name != data)
3879                 kmem_free(name, MAXPATHLEN + 1);
3880         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3881         *cs->statusp = resp->status = NFS4_OK;
3882
3883 out:
3884         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3885             READLINK4res *, resp);
3886 }
3887
3888 static void
3889 rfs4_op_readlink_free(nfs_resop4 *resop)
3890 {
3891         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3892         utf8string *symlink = &resp->link;
3893
3894         if (symlink->utf8string_val) {
3895                 UTF8STRING_FREE(*symlink)
3896         }
3897 }
3898
3899 /*
3900  * release_lockowner:
3901  *      Release any state associated with the supplied
3902  *      lockowner. Note if any lo_state is holding locks we will not
3903  *      rele that lo_state and thus the lockowner will not be destroyed.
3904  *      A client using lock after the lock owner stateid has been released
3905  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3906  *      to reissue the lock with new_lock_owner set to TRUE.
3907  *      args: lock_owner
3908  *      res:  status
3909  */
3910 /* ARGSUSED */
3911 static void
3912 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3913     struct svc_req *req, struct compound_state *cs)
3914 {
3915         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3916         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3917         rfs4_lockowner_t *lo;
3918         rfs4_openowner_t *oo;
3919         rfs4_state_t *sp;
3920         rfs4_lo_state_t *lsp;
3921         rfs4_client_t *cp;
3922         bool_t create = FALSE;
3923         locklist_t *llist;
3924         sysid_t sysid;
3925
3926         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3927             cs, RELEASE_LOCKOWNER4args *, ap);
3928
3929         /* Make sure there is a clientid around for this request */
3930         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3931
3932         if (cp == NULL) {
3933                 *cs->statusp = resp->status =
3934                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3935                 goto out;
3936         }
3937         rfs4_client_rele(cp);
3938
3939         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3940         if (lo == NULL) {
3941                 *cs->statusp = resp->status = NFS4_OK;
3942                 goto out;
3943         }
3944         ASSERT(lo->rl_client != NULL);
3945
3946         /*
3947          * Check for EXPIRED client. If so will reap state with in a lease
3948          * period or on next set_clientid_confirm step
3949          */
3950         if (rfs4_lease_expired(lo->rl_client)) {
3951                 rfs4_lockowner_rele(lo);
3952                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3953                 goto out;
3954         }
3955
3956         /*
3957          * If no sysid has been assigned, then no locks exist; just return.
3958          */
3959         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3960         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3961                 rfs4_lockowner_rele(lo);
3962                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3963                 goto out;
3964         }
3965
3966         sysid = lo->rl_client->rc_sysidt;
3967         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3968
3969         /*
3970          * Mark the lockowner invalid.
3971          */
3972         rfs4_dbe_hide(lo->rl_dbe);
3973
3974         /*
3975          * sysid-pid pair should now not be used since the lockowner is
3976          * invalid. If the client were to instantiate the lockowner again
3977          * it would be assigned a new pid. Thus we can get the list of
3978          * current locks.
3979          */
3980
3981         llist = flk_get_active_locks(sysid, lo->rl_pid);
3982         /* If we are still holding locks fail */
3983         if (llist != NULL) {
3984
3985                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3986
3987                 flk_free_locklist(llist);
3988                 /*
3989                  * We need to unhide the lockowner so the client can
3990                  * try it again. The bad thing here is if the client
3991                  * has a logic error that took it here in the first place
3992                  * he probably has lost accounting of the locks that it
3993                  * is holding. So we may have dangling state until the
3994                  * open owner state is reaped via close. One scenario
3995                  * that could possibly occur is that the client has
3996                  * sent the unlock request(s) in separate threads
3997                  * and has not waited for the replies before sending the
3998                  * RELEASE_LOCKOWNER request. Presumably, it would expect
3999                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4000                  * reissuing the request.
4001                  */
4002                 rfs4_dbe_unhide(lo->rl_dbe);
4003                 rfs4_lockowner_rele(lo);
4004                 goto out;
4005         }
4006
4007         /*
4008          * For the corresponding client we need to check each open
4009          * owner for any opens that have lockowner state associated
4010          * with this lockowner.
4011          */
4012
4013         rfs4_dbe_lock(lo->rl_client->rc_dbe);
4014         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4015             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4016
4017                 rfs4_dbe_lock(oo->ro_dbe);
4018                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4019                     sp = list_next(&oo->ro_statelist, sp)) {
4020
4021                         rfs4_dbe_lock(sp->rs_dbe);
4022                         for (lsp = list_head(&sp->rs_lostatelist);
4023                             lsp != NULL;
4024                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4025                                 if (lsp->rls_locker == lo) {
4026                                         rfs4_dbe_lock(lsp->rls_dbe);
4027                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4028                                         rfs4_dbe_unlock(lsp->rls_dbe);
4029                                 }
4030                         }
4031                         rfs4_dbe_unlock(sp->rs_dbe);
4032                 }
4033                 rfs4_dbe_unlock(oo->ro_dbe);
4034         }
4035         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4036
4037         rfs4_lockowner_rele(lo);
4038
4039         *cs->statusp = resp->status = NFS4_OK;
4040
4041 out:
4042         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4043             cs, RELEASE_LOCKOWNER4res *, resp);
4044 }
4045
4046 /*
4047  * short utility function to lookup a file and recall the delegation
4048  */
4049 static rfs4_file_t *
4050 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4051     int *lkup_error, cred_t *cr)
4052 {
4053         vnode_t *vp;
4054         rfs4_file_t *fp = NULL;
4055         bool_t fcreate = FALSE;
4056         int error;
4057
4058         if (vpp)
4059                 *vpp = NULL;
4060
4061         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4062             NULL)) == 0) {
4063                 if (vp->v_type == VREG)
4064                         fp = rfs4_findfile(vp, NULL, &fcreate);
4065                 if (vpp)
4066                         *vpp = vp;
4067                 else
4068                         VN_RELE(vp);
4069         }
4070
4071         if (lkup_error)
4072                 *lkup_error = error;
4073
4074         return (fp);
4075 }
4076
4077 /*
4078  * remove: args: CURRENT_FH: directory; name.
4079  *      res: status. If success - CURRENT_FH unchanged, return change_info
4080  *              for directory.
4081  */
4082 /* ARGSUSED */
4083 static void
4084 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4085     struct compound_state *cs)
4086 {
4087         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4088         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4089         int error;
4090         vnode_t *dvp, *vp;
4091         struct vattr bdva, idva, adva;
4092         char *nm;
4093         uint_t len;
4094         rfs4_file_t *fp;
4095         int in_crit = 0;
4096         bslabel_t *clabel;
4097         struct sockaddr *ca;
4098         char *name = NULL;
4099
4100         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4101             REMOVE4args *, args);
4102
4103         /* CURRENT_FH: directory */
4104         dvp = cs->vp;
4105         if (dvp == NULL) {
4106                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4107                 goto out;
4108         }
4109
4110         if (cs->access == CS_ACCESS_DENIED) {
4111                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4112                 goto out;
4113         }
4114
4115         /*
4116          * If there is an unshared filesystem mounted on this vnode,
4117          * Do not allow to remove anything in this directory.
4118          */
4119         if (vn_ismntpt(dvp)) {
4120                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4121                 goto out;
4122         }
4123
4124         if (dvp->v_type != VDIR) {
4125                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4126                 goto out;
4127         }
4128
4129         if (!utf8_dir_verify(&args->target)) {
4130                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4131                 goto out;
4132         }
4133
4134         /*
4135          * Lookup the file so that we can check if it's a directory
4136          */
4137         nm = utf8_to_fn(&args->target, &len, NULL);
4138         if (nm == NULL) {
4139                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4140                 goto out;
4141         }
4142
4143         if (len > MAXNAMELEN) {
4144                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4145                 kmem_free(nm, len);
4146                 goto out;
4147         }
4148
4149         if (rdonly4(cs->exi, cs->vp, req)) {
4150                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4151                 kmem_free(nm, len);
4152                 goto out;
4153         }
4154
4155         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4156         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4157             MAXPATHLEN  + 1);
4158
4159         if (name == NULL) {
4160                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4161                 kmem_free(nm, len);
4162                 goto out;
4163         }
4164
4165         /*
4166          * Lookup the file to determine type and while we are see if
4167          * there is a file struct around and check for delegation.
4168          * We don't need to acquire va_seq before this lookup, if
4169          * it causes an update, cinfo.before will not match, which will
4170          * trigger a cache flush even if atomic is TRUE.
4171          */
4172         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4173                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4174                     NULL)) {
4175                         VN_RELE(vp);
4176                         rfs4_file_rele(fp);
4177                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4178                         if (nm != name)
4179                                 kmem_free(name, MAXPATHLEN + 1);
4180                         kmem_free(nm, len);
4181                         goto out;
4182                 }
4183         }
4184
4185         /* Didn't find anything to remove */
4186         if (vp == NULL) {
4187                 *cs->statusp = resp->status = error;
4188                 if (nm != name)
4189                         kmem_free(name, MAXPATHLEN + 1);
4190                 kmem_free(nm, len);
4191                 goto out;
4192         }
4193
4194         if (nbl_need_check(vp)) {
4195                 nbl_start_crit(vp, RW_READER);
4196                 in_crit = 1;
4197                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4198                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4199                         if (nm != name)
4200                                 kmem_free(name, MAXPATHLEN + 1);
4201                         kmem_free(nm, len);
4202                         nbl_end_crit(vp);
4203                         VN_RELE(vp);
4204                         if (fp) {
4205                                 rfs4_clear_dont_grant(fp);
4206                                 rfs4_file_rele(fp);
4207                         }
4208                         goto out;
4209                 }
4210         }
4211
4212         /* check label before allowing removal */
4213         if (is_system_labeled()) {
4214                 ASSERT(req->rq_label != NULL);
4215                 clabel = req->rq_label;
4216                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4217                     "got client label from request(1)",
4218                     struct svc_req *, req);
4219                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4220                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4221                             cs->exi)) {
4222                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4223                                 if (name != nm)
4224                                         kmem_free(name, MAXPATHLEN + 1);
4225                                 kmem_free(nm, len);
4226                                 if (in_crit)
4227                                         nbl_end_crit(vp);
4228                                 VN_RELE(vp);
4229                                 if (fp) {
4230                                         rfs4_clear_dont_grant(fp);
4231                                         rfs4_file_rele(fp);
4232                                 }
4233                                 goto out;
4234                         }
4235                 }
4236         }
4237
4238         /* Get dir "before" change value */
4239         bdva.va_mask = AT_CTIME|AT_SEQ;
4240         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4241         if (error) {
4242                 *cs->statusp = resp->status = puterrno4(error);
4243                 if (nm != name)
4244                         kmem_free(name, MAXPATHLEN + 1);
4245                 kmem_free(nm, len);
4246                 if (in_crit)
4247                         nbl_end_crit(vp);
4248                 VN_RELE(vp);
4249                 if (fp) {
4250                         rfs4_clear_dont_grant(fp);
4251                         rfs4_file_rele(fp);
4252                 }
4253                 goto out;
4254         }
4255         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4256
4257         /* Actually do the REMOVE operation */
4258         if (vp->v_type == VDIR) {
4259                 /*
4260                  * Can't remove a directory that has a mounted-on filesystem.
4261                  */
4262                 if (vn_ismntpt(vp)) {
4263                         error = EACCES;
4264                 } else {
4265                         /*
4266                          * System V defines rmdir to return EEXIST,
4267                          * not ENOTEMPTY, if the directory is not
4268                          * empty.  A System V NFS server needs to map
4269                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4270                          * transmit over the wire.
4271                          */
4272                         if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4273                             NULL, 0)) == EEXIST)
4274                                 error = ENOTEMPTY;
4275                 }
4276         } else {
4277                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4278                     fp != NULL) {
4279                         struct vattr va;
4280                         vnode_t *tvp;
4281
4282                         rfs4_dbe_lock(fp->rf_dbe);
4283                         tvp = fp->rf_vp;
4284                         if (tvp)
4285                                 VN_HOLD(tvp);
4286                         rfs4_dbe_unlock(fp->rf_dbe);
4287
4288                         if (tvp) {
4289                                 /*
4290                                  * This is va_seq safe because we are not
4291                                  * manipulating dvp.
4292                                  */
4293                                 va.va_mask = AT_NLINK;
4294                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4295                                     va.va_nlink == 0) {
4296                                         /* Remove state on file remove */
4297                                         if (in_crit) {
4298                                                 nbl_end_crit(vp);
4299                                                 in_crit = 0;
4300                                         }
4301                                         rfs4_close_all_state(fp);
4302                                 }
4303                                 VN_RELE(tvp);
4304                         }
4305                 }
4306         }
4307
4308         if (in_crit)
4309                 nbl_end_crit(vp);
4310         VN_RELE(vp);
4311
4312         if (fp) {
4313                 rfs4_clear_dont_grant(fp);
4314                 rfs4_file_rele(fp);
4315         }
4316         if (nm != name)
4317                 kmem_free(name, MAXPATHLEN + 1);
4318         kmem_free(nm, len);
4319
4320         if (error) {
4321                 *cs->statusp = resp->status = puterrno4(error);
4322                 goto out;
4323         }
4324
4325         /*
4326          * Get the initial "after" sequence number, if it fails, set to zero
4327          */
4328         idva.va_mask = AT_SEQ;
4329         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4330                 idva.va_seq = 0;
4331
4332         /*
4333          * Force modified data and metadata out to stable storage.
4334          */
4335         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4336
4337         /*
4338          * Get "after" change value, if it fails, simply return the
4339          * before value.
4340          */
4341         adva.va_mask = AT_CTIME|AT_SEQ;
4342         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4343                 adva.va_ctime = bdva.va_ctime;
4344                 adva.va_seq = 0;
4345         }
4346
4347         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4348
4349         /*
4350          * The cinfo.atomic = TRUE only if we have
4351          * non-zero va_seq's, and it has incremented by exactly one
4352          * during the VOP_REMOVE/RMDIR and it didn't change during
4353          * the VOP_FSYNC.
4354          */
4355         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4356             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4357                 resp->cinfo.atomic = TRUE;
4358         else
4359                 resp->cinfo.atomic = FALSE;
4360
4361         *cs->statusp = resp->status = NFS4_OK;
4362
4363 out:
4364         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4365             REMOVE4res *, resp);
4366 }
4367
4368 /*
4369  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4370  *              oldname and newname.
4371  *      res: status. If success - CURRENT_FH unchanged, return change_info
4372  *              for both from and target directories.
4373  */
4374 /* ARGSUSED */
4375 static void
4376 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4377     struct compound_state *cs)
4378 {
4379         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4380         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4381         int error;
4382         vnode_t *odvp;
4383         vnode_t *ndvp;
4384         vnode_t *srcvp, *targvp;
4385         struct vattr obdva, oidva, oadva;
4386         struct vattr nbdva, nidva, nadva;
4387         char *onm, *nnm;
4388         uint_t olen, nlen;
4389         rfs4_file_t *fp, *sfp;
4390         int in_crit_src, in_crit_targ;
4391         int fp_rele_grant_hold, sfp_rele_grant_hold;
4392         bslabel_t *clabel;
4393         struct sockaddr *ca;
4394         char *converted_onm = NULL;
4395         char *converted_nnm = NULL;
4396
4397         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4398             RENAME4args *, args);
4399
4400         fp = sfp = NULL;
4401         srcvp = targvp = NULL;
4402         in_crit_src = in_crit_targ = 0;
4403         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4404
4405         /* CURRENT_FH: target directory */
4406         ndvp = cs->vp;
4407         if (ndvp == NULL) {
4408                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4409                 goto out;
4410         }
4411
4412         /* SAVED_FH: from directory */
4413         odvp = cs->saved_vp;
4414         if (odvp == NULL) {
4415                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4416                 goto out;
4417         }
4418
4419         if (cs->access == CS_ACCESS_DENIED) {
4420                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4421                 goto out;
4422         }
4423
4424         /*
4425          * If there is an unshared filesystem mounted on this vnode,
4426          * do not allow to rename objects in this directory.
4427          */
4428         if (vn_ismntpt(odvp)) {
4429                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4430                 goto out;
4431         }
4432
4433         /*
4434          * If there is an unshared filesystem mounted on this vnode,
4435          * do not allow to rename to this directory.
4436          */
4437         if (vn_ismntpt(ndvp)) {
4438                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4439                 goto out;
4440         }
4441
4442         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4443                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4444                 goto out;
4445         }
4446
4447         if (cs->saved_exi != cs->exi) {
4448                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4449                 goto out;
4450         }
4451
4452         if (!utf8_dir_verify(&args->oldname)) {
4453                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4454                 goto out;
4455         }
4456
4457         if (!utf8_dir_verify(&args->newname)) {
4458                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4459                 goto out;
4460         }
4461
4462         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4463         if (onm == NULL) {
4464                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4465                 goto out;
4466         }
4467         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4468         nlen = MAXPATHLEN + 1;
4469         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4470             nlen);
4471
4472         if (converted_onm == NULL) {
4473                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4474                 kmem_free(onm, olen);
4475                 goto out;
4476         }
4477
4478         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4479         if (nnm == NULL) {
4480                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4481                 if (onm != converted_onm)
4482                         kmem_free(converted_onm, MAXPATHLEN + 1);
4483                 kmem_free(onm, olen);
4484                 goto out;
4485         }
4486         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4487             MAXPATHLEN  + 1);
4488
4489         if (converted_nnm == NULL) {
4490                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4491                 kmem_free(nnm, nlen);
4492                 nnm = NULL;
4493                 if (onm != converted_onm)
4494                         kmem_free(converted_onm, MAXPATHLEN + 1);
4495                 kmem_free(onm, olen);
4496                 goto out;
4497         }
4498
4499
4500         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4501                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4502                 kmem_free(onm, olen);
4503                 kmem_free(nnm, nlen);
4504                 goto out;
4505         }
4506
4507
4508         if (rdonly4(cs->exi, cs->vp, req)) {
4509                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4510                 if (onm != converted_onm)
4511                         kmem_free(converted_onm, MAXPATHLEN + 1);
4512                 kmem_free(onm, olen);
4513                 if (nnm != converted_nnm)
4514                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4515                 kmem_free(nnm, nlen);
4516                 goto out;
4517         }
4518
4519         /* check label of the target dir */
4520         if (is_system_labeled()) {
4521                 ASSERT(req->rq_label != NULL);
4522                 clabel = req->rq_label;
4523                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4524                     "got client label from request(1)",
4525                     struct svc_req *, req);
4526                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4527                         if (!do_rfs_label_check(clabel, ndvp,
4528                             EQUALITY_CHECK, cs->exi)) {
4529                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4530                                 goto err_out;
4531                         }
4532                 }
4533         }
4534
4535         /*
4536          * Is the source a file and have a delegation?
4537          * We don't need to acquire va_seq before these lookups, if
4538          * it causes an update, cinfo.before will not match, which will
4539          * trigger a cache flush even if atomic is TRUE.
4540          */
4541         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4542             &error, cs->cr)) {
4543                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4544                     NULL)) {
4545                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4546                         goto err_out;
4547                 }
4548         }
4549
4550         if (srcvp == NULL) {
4551                 *cs->statusp = resp->status = puterrno4(error);
4552                 if (onm != converted_onm)
4553                         kmem_free(converted_onm, MAXPATHLEN + 1);
4554                 kmem_free(onm, olen);
4555                 if (nnm != converted_nnm)
4556                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4557                 kmem_free(nnm, nlen);
4558                 goto out;
4559         }
4560
4561         sfp_rele_grant_hold = 1;
4562
4563         /* Does the destination exist and a file and have a delegation? */
4564         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4565             NULL, cs->cr)) {
4566                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4567                     NULL)) {
4568                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4569                         goto err_out;
4570                 }
4571         }
4572         fp_rele_grant_hold = 1;
4573
4574
4575         /* Check for NBMAND lock on both source and target */
4576         if (nbl_need_check(srcvp)) {
4577                 nbl_start_crit(srcvp, RW_READER);
4578                 in_crit_src = 1;
4579                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4580                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4581                         goto err_out;
4582                 }
4583         }
4584
4585         if (targvp && nbl_need_check(targvp)) {
4586                 nbl_start_crit(targvp, RW_READER);
4587                 in_crit_targ = 1;
4588                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4589                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4590                         goto err_out;
4591                 }
4592         }
4593
4594         /* Get source "before" change value */
4595         obdva.va_mask = AT_CTIME|AT_SEQ;
4596         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4597         if (!error) {
4598                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4599                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4600         }
4601         if (error) {
4602                 *cs->statusp = resp->status = puterrno4(error);
4603                 goto err_out;
4604         }
4605
4606         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4607         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4608
4609         if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4610             cs->cr, NULL, 0)) == 0 && fp != NULL) {
4611                 struct vattr va;
4612                 vnode_t *tvp;
4613
4614                 rfs4_dbe_lock(fp->rf_dbe);
4615                 tvp = fp->rf_vp;
4616                 if (tvp)
4617                         VN_HOLD(tvp);
4618                 rfs4_dbe_unlock(fp->rf_dbe);
4619
4620                 if (tvp) {
4621                         va.va_mask = AT_NLINK;
4622                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4623                             va.va_nlink == 0) {
4624                                 /* The file is gone and so should the state */
4625                                 if (in_crit_targ) {
4626                                         nbl_end_crit(targvp);
4627                                         in_crit_targ = 0;
4628                                 }
4629                                 rfs4_close_all_state(fp);
4630                         }
4631                         VN_RELE(tvp);
4632                 }
4633         }
4634         if (error == 0)
4635                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4636
4637         if (in_crit_src)
4638                 nbl_end_crit(srcvp);
4639         if (srcvp)
4640                 VN_RELE(srcvp);
4641         if (in_crit_targ)
4642                 nbl_end_crit(targvp);
4643         if (targvp)
4644                 VN_RELE(targvp);
4645
4646         if (sfp) {
4647                 rfs4_clear_dont_grant(sfp);
4648                 rfs4_file_rele(sfp);
4649         }
4650         if (fp) {
4651                 rfs4_clear_dont_grant(fp);
4652                 rfs4_file_rele(fp);
4653         }
4654
4655         if (converted_onm != onm)
4656                 kmem_free(converted_onm, MAXPATHLEN + 1);
4657         kmem_free(onm, olen);
4658         if (converted_nnm != nnm)
4659                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4660         kmem_free(nnm, nlen);
4661
4662         /*
4663          * Get the initial "after" sequence number, if it fails, set to zero
4664          */
4665         oidva.va_mask = AT_SEQ;
4666         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4667                 oidva.va_seq = 0;
4668
4669         nidva.va_mask = AT_SEQ;
4670         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4671                 nidva.va_seq = 0;
4672
4673         /*
4674          * Force modified data and metadata out to stable storage.
4675          */
4676         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4677         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4678
4679         if (error) {
4680                 *cs->statusp = resp->status = puterrno4(error);
4681                 goto out;
4682         }
4683
4684         /*
4685          * Get "after" change values, if it fails, simply return the
4686          * before value.
4687          */
4688         oadva.va_mask = AT_CTIME|AT_SEQ;
4689         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4690                 oadva.va_ctime = obdva.va_ctime;
4691                 oadva.va_seq = 0;
4692         }
4693
4694         nadva.va_mask = AT_CTIME|AT_SEQ;
4695         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4696                 nadva.va_ctime = nbdva.va_ctime;
4697                 nadva.va_seq = 0;
4698         }
4699
4700         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4701         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4702
4703         /*
4704          * The cinfo.atomic = TRUE only if we have
4705          * non-zero va_seq's, and it has incremented by exactly one
4706          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4707          */
4708         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4709             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4710                 resp->source_cinfo.atomic = TRUE;
4711         else
4712                 resp->source_cinfo.atomic = FALSE;
4713
4714         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4715             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4716                 resp->target_cinfo.atomic = TRUE;
4717         else
4718                 resp->target_cinfo.atomic = FALSE;
4719
4720 #ifdef  VOLATILE_FH_TEST
4721         {
4722         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4723
4724         /*
4725          * Add the renamed file handle to the volatile rename list
4726          */
4727         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4728                 /* file handles may expire on rename */
4729                 vnode_t *vp;
4730
4731                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4732                 /*
4733                  * Already know that nnm will be a valid string
4734                  */
4735                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4736                     NULL, NULL, NULL);
4737                 kmem_free(nnm, nlen);
4738                 if (!error) {
4739                         add_volrnm_fh(cs->exi, vp);
4740                         VN_RELE(vp);
4741                 }
4742         }
4743         }
4744 #endif  /* VOLATILE_FH_TEST */
4745
4746         *cs->statusp = resp->status = NFS4_OK;
4747 out:
4748         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4749             RENAME4res *, resp);
4750         return;
4751
4752 err_out:
4753         if (onm != converted_onm)
4754                 kmem_free(converted_onm, MAXPATHLEN + 1);
4755         if (onm != NULL)
4756                 kmem_free(onm, olen);
4757         if (nnm != converted_nnm)
4758                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4759         if (nnm != NULL)
4760                 kmem_free(nnm, nlen);
4761
4762         if (in_crit_src) nbl_end_crit(srcvp);
4763         if (in_crit_targ) nbl_end_crit(targvp);
4764         if (targvp) VN_RELE(targvp);
4765         if (srcvp) VN_RELE(srcvp);
4766         if (sfp) {
4767                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4768                 rfs4_file_rele(sfp);
4769         }
4770         if (fp) {
4771                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4772                 rfs4_file_rele(fp);
4773         }
4774
4775         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4776             RENAME4res *, resp);
4777 }
4778
4779 /* ARGSUSED */
4780 static void
4781 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4782     struct compound_state *cs)
4783 {
4784         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4785         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4786         rfs4_client_t *cp;
4787
4788         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4789             RENEW4args *, args);
4790
4791         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4792                 *cs->statusp = resp->status =
4793                     rfs4_check_clientid(&args->clientid, 0);
4794                 goto out;
4795         }
4796
4797         if (rfs4_lease_expired(cp)) {
4798                 rfs4_client_rele(cp);
4799                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4800                 goto out;
4801         }
4802
4803         rfs4_update_lease(cp);
4804
4805         mutex_enter(cp->rc_cbinfo.cb_lock);
4806         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4807                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4808                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4809         } else {
4810                 *cs->statusp = resp->status = NFS4_OK;
4811         }
4812         mutex_exit(cp->rc_cbinfo.cb_lock);
4813
4814         rfs4_client_rele(cp);
4815
4816 out:
4817         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4818             RENEW4res *, resp);
4819 }
4820
4821 /* ARGSUSED */
4822 static void
4823 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4824     struct compound_state *cs)
4825 {
4826         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4827
4828         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4829
4830         /* No need to check cs->access - we are not accessing any object */
4831         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4832                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4833                 goto out;
4834         }
4835         if (cs->vp != NULL) {
4836                 VN_RELE(cs->vp);
4837         }
4838         cs->vp = cs->saved_vp;
4839         cs->saved_vp = NULL;
4840         cs->exi = cs->saved_exi;
4841         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4842         *cs->statusp = resp->status = NFS4_OK;
4843         cs->deleg = FALSE;
4844
4845 out:
4846         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4847             RESTOREFH4res *, resp);
4848 }
4849
4850 /* ARGSUSED */
4851 static void
4852 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4853     struct compound_state *cs)
4854 {
4855         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4856
4857         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4858
4859         /* No need to check cs->access - we are not accessing any object */
4860         if (cs->vp == NULL) {
4861                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4862                 goto out;
4863         }
4864         if (cs->saved_vp != NULL) {
4865                 VN_RELE(cs->saved_vp);
4866         }
4867         cs->saved_vp = cs->vp;
4868         VN_HOLD(cs->saved_vp);
4869         cs->saved_exi = cs->exi;
4870         /*
4871          * since SAVEFH is fairly rare, don't alloc space for its fh
4872          * unless necessary.
4873          */
4874         if (cs->saved_fh.nfs_fh4_val == NULL) {
4875                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4876         }
4877         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4878         *cs->statusp = resp->status = NFS4_OK;
4879
4880 out:
4881         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4882             SAVEFH4res *, resp);
4883 }
4884
4885 /*
4886  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4887  * return the bitmap of attrs that were set successfully. It is also
4888  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4889  * always be called only after rfs4_do_set_attrs().
4890  *
4891  * Verify that the attributes are same as the expected ones. sargp->vap
4892  * and sargp->sbp contain the input attributes as translated from fattr4.
4893  *
4894  * This function verifies only the attrs that correspond to a vattr or
4895  * vfsstat struct. That is because of the extra step needed to get the
4896  * corresponding system structs. Other attributes have already been set or
4897  * verified by do_rfs4_set_attrs.
4898  *
4899  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4900  */
4901 static int
4902 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4903     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4904 {
4905         int error, ret_error = 0;
4906         int i, k;
4907         uint_t sva_mask = sargp->vap->va_mask;
4908         uint_t vbit;
4909         union nfs4_attr_u *na;
4910         uint8_t *amap;
4911         bool_t getsb = ntovp->vfsstat;
4912
4913         if (sva_mask != 0) {
4914                 /*
4915                  * Okay to overwrite sargp->vap because we verify based
4916                  * on the incoming values.
4917                  */
4918                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4919                     sargp->cs->cr, NULL);
4920                 if (ret_error) {
4921                         if (resp == NULL)
4922                                 return (ret_error);
4923                         /*
4924                          * Must return bitmap of successful attrs
4925                          */
4926                         sva_mask = 0;   /* to prevent checking vap later */
4927                 } else {
4928                         /*
4929                          * Some file systems clobber va_mask. it is probably
4930                          * wrong of them to do so, nonethless we practice
4931                          * defensive coding.
4932                          * See bug id 4276830.
4933                          */
4934                         sargp->vap->va_mask = sva_mask;
4935                 }
4936         }
4937
4938         if (getsb) {
4939                 /*
4940                  * Now get the superblock and loop on the bitmap, as there is
4941                  * no simple way of translating from superblock to bitmap4.
4942                  */
4943                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4944                 if (ret_error) {
4945                         if (resp == NULL)
4946                                 goto errout;
4947                         getsb = FALSE;
4948                 }
4949         }
4950
4951         /*
4952          * Now loop and verify each attribute which getattr returned
4953          * whether it's the same as the input.
4954          */
4955         if (resp == NULL && !getsb && (sva_mask == 0))
4956                 goto errout;
4957
4958         na = ntovp->na;
4959         amap = ntovp->amap;
4960         k = 0;
4961         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4962                 k = *amap;
4963                 ASSERT(nfs4_ntov_map[k].nval == k);
4964                 vbit = nfs4_ntov_map[k].vbit;
4965
4966                 /*
4967                  * If vattr attribute but VOP_GETATTR failed, or it's
4968                  * superblock attribute but VFS_STATVFS failed, skip
4969                  */
4970                 if (vbit) {
4971                         if ((vbit & sva_mask) == 0)
4972                                 continue;
4973                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4974                         continue;
4975                 }
4976                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4977                 if (resp != NULL) {
4978                         if (error)
4979                                 ret_error = -1; /* not all match */
4980                         else    /* update response bitmap */
4981                                 *resp |= nfs4_ntov_map[k].fbit;
4982                         continue;
4983                 }
4984                 if (error) {
4985                         ret_error = -1; /* not all match */
4986                         break;
4987                 }
4988         }
4989 errout:
4990         return (ret_error);
4991 }
4992
4993 /*
4994  * Decode the attribute to be set/verified. If the attr requires a sys op
4995  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4996  * call the sv_getit function for it, because the sys op hasn't yet been done.
4997  * Return 0 for success, error code if failed.
4998  *
4999  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5000  */
5001 static int
5002 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5003     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5004 {
5005         int error = 0;
5006         bool_t set_later;
5007
5008         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5009
5010         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5011                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5012                 /*
5013                  * don't verify yet if a vattr or sb dependent attr,
5014                  * because we don't have their sys values yet.
5015                  * Will be done later.
5016                  */
5017                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5018                         /*
5019                          * ACLs are a special case, since setting the MODE
5020                          * conflicts with setting the ACL.  We delay setting
5021                          * the ACL until all other attributes have been set.
5022                          * The ACL gets set in do_rfs4_op_setattr().
5023                          */
5024                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5025                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5026                                     sargp, nap);
5027                                 if (error) {
5028                                         xdr_free(nfs4_ntov_map[k].xfunc,
5029                                             (caddr_t)nap);
5030                                 }
5031                         }
5032                 }
5033         } else {
5034 #ifdef  DEBUG
5035                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5036                     "decoding attribute %d\n", k);
5037 #endif
5038                 error = EINVAL;
5039         }
5040         if (!error && resp_bval && !set_later) {
5041                 *resp_bval |= nfs4_ntov_map[k].fbit;
5042         }
5043
5044         return (error);
5045 }
5046
5047 /*
5048  * Set vattr based on incoming fattr4 attrs - used by setattr.
5049  * Set response mask. Ignore any values that are not writable vattr attrs.
5050  */
5051 static nfsstat4
5052 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5053     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5054     nfs4_attr_cmd_t cmd)
5055 {
5056         int error = 0;
5057         int i;
5058         char *attrs = fattrp->attrlist4;
5059         uint32_t attrslen = fattrp->attrlist4_len;
5060         XDR xdr;
5061         nfsstat4 status = NFS4_OK;
5062         vnode_t *vp = cs->vp;
5063         union nfs4_attr_u *na;
5064         uint8_t *amap;
5065
5066 #ifndef lint
5067         /*
5068          * Make sure that maximum attribute number can be expressed as an
5069          * 8 bit quantity.
5070          */
5071         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5072 #endif
5073
5074         if (vp == NULL) {
5075                 if (resp)
5076                         *resp = 0;
5077                 return (NFS4ERR_NOFILEHANDLE);
5078         }
5079         if (cs->access == CS_ACCESS_DENIED) {
5080                 if (resp)
5081                         *resp = 0;
5082                 return (NFS4ERR_ACCESS);
5083         }
5084
5085         sargp->op = cmd;
5086         sargp->cs = cs;
5087         sargp->flag = 0;        /* may be set later */
5088         sargp->vap->va_mask = 0;
5089         sargp->rdattr_error = NFS4_OK;
5090         sargp->rdattr_error_req = FALSE;
5091         /* sargp->sbp is set by the caller */
5092
5093         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5094
5095         na = ntovp->na;
5096         amap = ntovp->amap;
5097
5098         /*
5099          * The following loop iterates on the nfs4_ntov_map checking
5100          * if the fbit is set in the requested bitmap.
5101          * If set then we process the arguments using the
5102          * rfs4_fattr4 conversion functions to populate the setattr
5103          * vattr and va_mask. Any settable attrs that are not using vattr
5104          * will be set in this loop.
5105          */
5106         for (i = 0; i < nfs4_ntov_map_size; i++) {
5107                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5108                         continue;
5109                 }
5110                 /*
5111                  * If setattr, must be a writable attr.
5112                  * If verify/nverify, must be a readable attr.
5113                  */
5114                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5115                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5116                         /*
5117                          * Client tries to set/verify an
5118                          * unsupported attribute, tries to set
5119                          * a read only attr or verify a write
5120                          * only one - error!
5121                          */
5122                         break;
5123                 }
5124                 /*
5125                  * Decode the attribute to set/verify
5126                  */
5127                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5128                     &xdr, resp ? resp : NULL, na);
5129                 if (error)
5130                         break;
5131                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5132                 na++;
5133                 (ntovp->attrcnt)++;
5134                 if (nfs4_ntov_map[i].vfsstat)
5135                         ntovp->vfsstat = TRUE;
5136         }
5137
5138         if (error != 0)
5139                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5140                     puterrno4(error));
5141         /* xdrmem_destroy(&xdrs); */    /* NO-OP */
5142         return (status);
5143 }
5144
5145 static nfsstat4
5146 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5147     stateid4 *stateid)
5148 {
5149         int error = 0;
5150         struct nfs4_svgetit_arg sarg;
5151         bool_t trunc;
5152
5153         nfsstat4 status = NFS4_OK;
5154         cred_t *cr = cs->cr;
5155         vnode_t *vp = cs->vp;
5156         struct nfs4_ntov_table ntov;
5157         struct statvfs64 sb;
5158         struct vattr bva;
5159         struct flock64 bf;
5160         int in_crit = 0;
5161         uint_t saved_mask = 0;
5162         caller_context_t ct;
5163
5164         *resp = 0;
5165         sarg.sbp = &sb;
5166         sarg.is_referral = B_FALSE;
5167         nfs4_ntov_table_init(&ntov);
5168         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5169             NFS4ATTR_SETIT);
5170         if (status != NFS4_OK) {
5171                 /*
5172                  * failed set attrs
5173                  */
5174                 goto done;
5175         }
5176         if ((sarg.vap->va_mask == 0) &&
5177             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5178                 /*
5179                  * no further work to be done
5180                  */
5181                 goto done;
5182         }
5183
5184         /*
5185          * If we got a request to set the ACL and the MODE, only
5186          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5187          * to change any other bits, along with setting an ACL,
5188          * gives NFS4ERR_INVAL.
5189          */
5190         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5191             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5192                 vattr_t va;
5193
5194                 va.va_mask = AT_MODE;
5195                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5196                 if (error) {
5197                         status = puterrno4(error);
5198                         goto done;
5199                 }
5200                 if ((sarg.vap->va_mode ^ va.va_mode) &
5201                     ~(VSUID | VSGID | VSVTX)) {
5202                         status = NFS4ERR_INVAL;
5203                         goto done;
5204                 }
5205         }
5206
5207         /* Check stateid only if size has been set */
5208         if (sarg.vap->va_mask & AT_SIZE) {
5209                 trunc = (sarg.vap->va_size == 0);
5210                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5211                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5212                 if (status != NFS4_OK)
5213                         goto done;
5214         } else {
5215                 ct.cc_sysid = 0;
5216                 ct.cc_pid = 0;
5217                 ct.cc_caller_id = nfs4_srv_caller_id;
5218                 ct.cc_flags = CC_DONTBLOCK;
5219         }
5220
5221         /* XXX start of possible race with delegations */
5222
5223         /*
5224          * We need to specially handle size changes because it is
5225          * possible for the client to create a file with read-only
5226          * modes, but with the file opened for writing. If the client
5227          * then tries to set the file size, e.g. ftruncate(3C),
5228          * fcntl(F_FREESP), the normal access checking done in
5229          * VOP_SETATTR would prevent the client from doing it even though
5230          * it should be allowed to do so.  To get around this, we do the
5231          * access checking for ourselves and use VOP_SPACE which doesn't
5232          * do the access checking.
5233          * Also the client should not be allowed to change the file
5234          * size if there is a conflicting non-blocking mandatory lock in
5235          * the region of the change.
5236          */
5237         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5238                 u_offset_t offset;
5239                 ssize_t length;
5240
5241                 /*
5242                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5243                  * before returning, sarg.vap->va_mask is used to
5244                  * generate the setattr reply bitmap.  We also clear
5245                  * AT_SIZE below before calling VOP_SPACE.  For both
5246                  * of these cases, the va_mask needs to be saved here
5247                  * and restored after calling VOP_SETATTR.
5248                  */
5249                 saved_mask = sarg.vap->va_mask;
5250
5251                 /*
5252                  * Check any possible conflict due to NBMAND locks.
5253                  * Get into critical region before VOP_GETATTR, so the
5254                  * size attribute is valid when checking conflicts.
5255                  */
5256                 if (nbl_need_check(vp)) {
5257                         nbl_start_crit(vp, RW_READER);
5258                         in_crit = 1;
5259                 }
5260
5261                 bva.va_mask = AT_UID|AT_SIZE;
5262                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5263                         status = puterrno4(error);
5264                         goto done;
5265                 }
5266
5267                 if (in_crit) {
5268                         if (sarg.vap->va_size < bva.va_size) {
5269                                 offset = sarg.vap->va_size;
5270                                 length = bva.va_size - sarg.vap->va_size;
5271                         } else {
5272                                 offset = bva.va_size;
5273                                 length = sarg.vap->va_size - bva.va_size;
5274                         }
5275                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5276                             &ct)) {
5277                                 status = NFS4ERR_LOCKED;
5278                                 goto done;
5279                         }
5280                 }
5281
5282                 if (crgetuid(cr) == bva.va_uid) {
5283                         sarg.vap->va_mask &= ~AT_SIZE;
5284                         bf.l_type = F_WRLCK;
5285                         bf.l_whence = 0;
5286                         bf.l_start = (off64_t)sarg.vap->va_size;
5287                         bf.l_len = 0;
5288                         bf.l_sysid = 0;
5289                         bf.l_pid = 0;
5290                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5291                             (offset_t)sarg.vap->va_size, cr, &ct);
5292                 }
5293         }
5294
5295         if (!error && sarg.vap->va_mask != 0)
5296                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5297
5298         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5299         if (saved_mask & AT_SIZE)
5300                 sarg.vap->va_mask |= AT_SIZE;
5301
5302         /*
5303          * If an ACL was being set, it has been delayed until now,
5304          * in order to set the mode (via the VOP_SETATTR() above) first.
5305          */
5306         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5307                 int i;
5308
5309                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5310                         if (ntov.amap[i] == FATTR4_ACL)
5311                                 break;
5312                 if (i < NFS4_MAXNUM_ATTRS) {
5313                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5314                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5315                         if (error == 0) {
5316                                 *resp |= FATTR4_ACL_MASK;
5317                         } else if (error == ENOTSUP) {
5318                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5319                                 status = NFS4ERR_ATTRNOTSUPP;
5320                                 goto done;
5321                         }
5322                 } else {
5323                         NFS4_DEBUG(rfs4_debug,
5324                             (CE_NOTE, "do_rfs4_op_setattr: "
5325                             "unable to find ACL in fattr4"));
5326                         error = EINVAL;
5327                 }
5328         }
5329
5330         if (error) {
5331                 /* check if a monitor detected a delegation conflict */
5332                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5333                         status = NFS4ERR_DELAY;
5334                 else
5335                         status = puterrno4(error);
5336
5337                 /*
5338                  * Set the response bitmap when setattr failed.
5339                  * If VOP_SETATTR partially succeeded, test by doing a
5340                  * VOP_GETATTR on the object and comparing the data
5341                  * to the setattr arguments.
5342                  */
5343                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5344         } else {
5345                 /*
5346                  * Force modified metadata out to stable storage.
5347                  */
5348                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5349                 /*
5350                  * Set response bitmap
5351                  */
5352                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5353         }
5354
5355 /* Return early and already have a NFSv4 error */
5356 done:
5357         /*
5358          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5359          * conversion sets both readable and writeable NFS4 attrs
5360          * for AT_MTIME and AT_ATIME.  The line below masks out
5361          * unrequested attrs from the setattr result bitmap.  This
5362          * is placed after the done: label to catch the ATTRNOTSUP
5363          * case.
5364          */
5365         *resp &= fattrp->attrmask;
5366
5367         if (in_crit)
5368                 nbl_end_crit(vp);
5369
5370         nfs4_ntov_table_free(&ntov, &sarg);
5371
5372         return (status);
5373 }
5374
5375 /* ARGSUSED */
5376 static void
5377 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5378     struct compound_state *cs)
5379 {
5380         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5381         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5382         bslabel_t *clabel;
5383
5384         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5385             SETATTR4args *, args);
5386
5387         if (cs->vp == NULL) {
5388                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5389                 goto out;
5390         }
5391
5392         /*
5393          * If there is an unshared filesystem mounted on this vnode,
5394          * do not allow to setattr on this vnode.
5395          */
5396         if (vn_ismntpt(cs->vp)) {
5397                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5398                 goto out;
5399         }
5400
5401         resp->attrsset = 0;
5402
5403         if (rdonly4(cs->exi, cs->vp, req)) {
5404                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5405                 goto out;
5406         }
5407
5408         /* check label before setting attributes */
5409         if (is_system_labeled()) {
5410                 ASSERT(req->rq_label != NULL);
5411                 clabel = req->rq_label;
5412                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5413                     "got client label from request(1)",
5414                     struct svc_req *, req);
5415                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5416                         if (!do_rfs_label_check(clabel, cs->vp,
5417                             EQUALITY_CHECK, cs->exi)) {
5418                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5419                                 goto out;
5420                         }
5421                 }
5422         }
5423
5424         *cs->statusp = resp->status =
5425             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5426             &args->stateid);
5427
5428 out:
5429         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5430             SETATTR4res *, resp);
5431 }
5432
5433 /* ARGSUSED */
5434 static void
5435 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5436     struct compound_state *cs)
5437 {
5438         /*
5439          * verify and nverify are exactly the same, except that nverify
5440          * succeeds when some argument changed, and verify succeeds when
5441          * when none changed.
5442          */
5443
5444         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5445         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5446
5447         int error;
5448         struct nfs4_svgetit_arg sarg;
5449         struct statvfs64 sb;
5450         struct nfs4_ntov_table ntov;
5451
5452         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5453             VERIFY4args *, args);
5454
5455         if (cs->vp == NULL) {
5456                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5457                 goto out;
5458         }
5459
5460         sarg.sbp = &sb;
5461         sarg.is_referral = B_FALSE;
5462         nfs4_ntov_table_init(&ntov);
5463         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5464             &sarg, &ntov, NFS4ATTR_VERIT);
5465         if (resp->status != NFS4_OK) {
5466                 /*
5467                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5468                  * so could return -1 for "no match".
5469                  */
5470                 if (resp->status == -1)
5471                         resp->status = NFS4ERR_NOT_SAME;
5472                 goto done;
5473         }
5474         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5475         switch (error) {
5476         case 0:
5477                 resp->status = NFS4_OK;
5478                 break;
5479         case -1:
5480                 resp->status = NFS4ERR_NOT_SAME;
5481                 break;
5482         default:
5483                 resp->status = puterrno4(error);
5484                 break;
5485         }
5486 done:
5487         *cs->statusp = resp->status;
5488         nfs4_ntov_table_free(&ntov, &sarg);
5489 out:
5490         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5491             VERIFY4res *, resp);
5492 }
5493
5494 /* ARGSUSED */
5495 static void
5496 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5497     struct compound_state *cs)
5498 {
5499         /*
5500          * verify and nverify are exactly the same, except that nverify
5501          * succeeds when some argument changed, and verify succeeds when
5502          * when none changed.
5503          */
5504
5505         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5506         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5507
5508         int error;
5509         struct nfs4_svgetit_arg sarg;
5510         struct statvfs64 sb;
5511         struct nfs4_ntov_table ntov;
5512
5513         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5514             NVERIFY4args *, args);
5515
5516         if (cs->vp == NULL) {
5517                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5518                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5519                     NVERIFY4res *, resp);
5520                 return;
5521         }
5522         sarg.sbp = &sb;
5523         sarg.is_referral = B_FALSE;
5524         nfs4_ntov_table_init(&ntov);
5525         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5526             &sarg, &ntov, NFS4ATTR_VERIT);
5527         if (resp->status != NFS4_OK) {
5528                 /*
5529                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5530                  * so could return -1 for "no match".
5531                  */
5532                 if (resp->status == -1)
5533                         resp->status = NFS4_OK;
5534                 goto done;
5535         }
5536         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5537         switch (error) {
5538         case 0:
5539                 resp->status = NFS4ERR_SAME;
5540                 break;
5541         case -1:
5542                 resp->status = NFS4_OK;
5543                 break;
5544         default:
5545                 resp->status = puterrno4(error);
5546                 break;
5547         }
5548 done:
5549         *cs->statusp = resp->status;
5550         nfs4_ntov_table_free(&ntov, &sarg);
5551
5552         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5553             NVERIFY4res *, resp);
5554 }
5555
5556 /*
5557  * XXX - This should live in an NFS header file.
5558  */
5559 #define MAX_IOVECS      12
5560
5561 /* ARGSUSED */
5562 static void
5563 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5564     struct compound_state *cs)
5565 {
5566         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5567         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5568         int error;
5569         vnode_t *vp;
5570         struct vattr bva;
5571         u_offset_t rlimit;
5572         struct uio uio;
5573         struct iovec iov[MAX_IOVECS];
5574         struct iovec *iovp;
5575         int iovcnt;
5576         int ioflag;
5577         cred_t *savecred, *cr;
5578         bool_t *deleg = &cs->deleg;
5579         nfsstat4 stat;
5580         int in_crit = 0;
5581         caller_context_t ct;
5582
5583         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5584             WRITE4args *, args);
5585
5586         vp = cs->vp;
5587         if (vp == NULL) {
5588                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5589                 goto out;
5590         }
5591         if (cs->access == CS_ACCESS_DENIED) {
5592                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5593                 goto out;
5594         }
5595
5596         cr = cs->cr;
5597
5598         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5599             deleg, TRUE, &ct)) != NFS4_OK) {
5600                 *cs->statusp = resp->status = stat;
5601                 goto out;
5602         }
5603
5604         /*
5605          * We have to enter the critical region before calling VOP_RWLOCK
5606          * to avoid a deadlock with ufs.
5607          */
5608         if (nbl_need_check(vp)) {
5609                 nbl_start_crit(vp, RW_READER);
5610                 in_crit = 1;
5611                 if (nbl_conflict(vp, NBL_WRITE,
5612                     args->offset, args->data_len, 0, &ct)) {
5613                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5614                         goto out;
5615                 }
5616         }
5617
5618         bva.va_mask = AT_MODE | AT_UID;
5619         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5620
5621         /*
5622          * If we can't get the attributes, then we can't do the
5623          * right access checking.  So, we'll fail the request.
5624          */
5625         if (error) {
5626                 *cs->statusp = resp->status = puterrno4(error);
5627                 goto out;
5628         }
5629
5630         if (rdonly4(cs->exi, cs->vp, req)) {
5631                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5632                 goto out;
5633         }
5634
5635         if (vp->v_type != VREG) {
5636                 *cs->statusp = resp->status =
5637                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5638                 goto out;
5639         }
5640
5641         if (crgetuid(cr) != bva.va_uid &&
5642             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5643                 *cs->statusp = resp->status = puterrno4(error);
5644                 goto out;
5645         }
5646
5647         if (MANDLOCK(vp, bva.va_mode)) {
5648                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5649                 goto out;
5650         }
5651
5652         if (args->data_len == 0) {
5653                 *cs->statusp = resp->status = NFS4_OK;
5654                 resp->count = 0;
5655                 resp->committed = args->stable;
5656                 resp->writeverf = Write4verf;
5657                 goto out;
5658         }
5659
5660         if (args->mblk != NULL) {
5661                 mblk_t *m;
5662                 uint_t bytes, round_len;
5663
5664                 iovcnt = 0;
5665                 bytes = 0;
5666                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5667                 for (m = args->mblk;
5668                     m != NULL && bytes < round_len;
5669                     m = m->b_cont) {
5670                         iovcnt++;
5671                         bytes += MBLKL(m);
5672                 }
5673 #ifdef DEBUG
5674                 /* should have ended on an mblk boundary */
5675                 if (bytes != round_len) {
5676                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5677                             bytes, round_len, args->data_len);
5678                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5679                             (void *)args->mblk, (void *)m);
5680                         ASSERT(bytes == round_len);
5681                 }
5682 #endif
5683                 if (iovcnt <= MAX_IOVECS) {
5684                         iovp = iov;
5685                 } else {
5686                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5687                 }
5688                 mblk_to_iov(args->mblk, iovcnt, iovp);
5689         } else if (args->rlist != NULL) {
5690                 iovcnt = 1;
5691                 iovp = iov;
5692                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5693                 iovp->iov_len = args->data_len;
5694         } else {
5695                 iovcnt = 1;
5696                 iovp = iov;
5697                 iovp->iov_base = args->data_val;
5698                 iovp->iov_len = args->data_len;
5699         }
5700
5701         uio.uio_iov = iovp;
5702         uio.uio_iovcnt = iovcnt;
5703
5704         uio.uio_segflg = UIO_SYSSPACE;
5705         uio.uio_extflg = UIO_COPY_DEFAULT;
5706         uio.uio_loffset = args->offset;
5707         uio.uio_resid = args->data_len;
5708         uio.uio_llimit = curproc->p_fsz_ctl;
5709         rlimit = uio.uio_llimit - args->offset;
5710         if (rlimit < (u_offset_t)uio.uio_resid)
5711                 uio.uio_resid = (int)rlimit;
5712
5713         if (args->stable == UNSTABLE4)
5714                 ioflag = 0;
5715         else if (args->stable == FILE_SYNC4)
5716                 ioflag = FSYNC;
5717         else if (args->stable == DATA_SYNC4)
5718                 ioflag = FDSYNC;
5719         else {
5720                 if (iovp != iov)
5721                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5722                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5723                 goto out;
5724         }
5725
5726         /*
5727          * We're changing creds because VM may fault and we need
5728          * the cred of the current thread to be used if quota
5729          * checking is enabled.
5730          */
5731         savecred = curthread->t_cred;
5732         curthread->t_cred = cr;
5733         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5734         curthread->t_cred = savecred;
5735
5736         if (iovp != iov)
5737                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5738
5739         if (error) {
5740                 *cs->statusp = resp->status = puterrno4(error);
5741                 goto out;
5742         }
5743
5744         *cs->statusp = resp->status = NFS4_OK;
5745         resp->count = args->data_len - uio.uio_resid;
5746
5747         if (ioflag == 0)
5748                 resp->committed = UNSTABLE4;
5749         else
5750                 resp->committed = FILE_SYNC4;
5751
5752         resp->writeverf = Write4verf;
5753
5754 out:
5755         if (in_crit)
5756                 nbl_end_crit(vp);
5757
5758         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5759             WRITE4res *, resp);
5760 }
5761
5762
5763 /* XXX put in a header file */
5764 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5765
5766 void
5767 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5768     struct svc_req *req, cred_t *cr, int *rv)
5769 {
5770         uint_t i;
5771         struct compound_state cs;
5772
5773         if (rv != NULL)
5774                 *rv = 0;
5775         rfs4_init_compound_state(&cs);
5776         /*
5777          * Form a reply tag by copying over the reqeuest tag.
5778          */
5779         resp->tag.utf8string_val =
5780             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5781         resp->tag.utf8string_len = args->tag.utf8string_len;
5782         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5783             resp->tag.utf8string_len);
5784
5785         cs.statusp = &resp->status;
5786         cs.req = req;
5787
5788         /*
5789          * XXX for now, minorversion should be zero
5790          */
5791         if (args->minorversion != NFS4_MINORVERSION) {
5792                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5793                     &cs, COMPOUND4args *, args);
5794                 resp->array_len = 0;
5795                 resp->array = NULL;
5796                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5797                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5798                     &cs, COMPOUND4res *, resp);
5799                 return;
5800         }
5801
5802         ASSERT(exi == NULL);
5803         ASSERT(cr == NULL);
5804
5805         cr = crget();
5806         ASSERT(cr != NULL);
5807
5808         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5809                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5810                     &cs, COMPOUND4args *, args);
5811                 crfree(cr);
5812                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5813                     &cs, COMPOUND4res *, resp);
5814                 svcerr_badcred(req->rq_xprt);
5815                 if (rv != NULL)
5816                         *rv = 1;
5817                 return;
5818         }
5819         resp->array_len = args->array_len;
5820         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5821             KM_SLEEP);
5822
5823         cs.basecr = cr;
5824
5825         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5826             COMPOUND4args *, args);
5827
5828         /*
5829          * For now, NFS4 compound processing must be protected by
5830          * exported_lock because it can access more than one exportinfo
5831          * per compound and share/unshare can now change multiple
5832          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5833          * per proc (excluding public exinfo), and exi_count design
5834          * is sufficient to protect concurrent execution of NFS2/3
5835          * ops along with unexport.  This lock will be removed as
5836          * part of the NFSv4 phase 2 namespace redesign work.
5837          */
5838         rw_enter(&exported_lock, RW_READER);
5839
5840         /*
5841          * If this is the first compound we've seen, we need to start all
5842          * new instances' grace periods.
5843          */
5844         if (rfs4_seen_first_compound == 0) {
5845                 rfs4_grace_start_new();
5846                 /*
5847                  * This must be set after rfs4_grace_start_new(), otherwise
5848                  * another thread could proceed past here before the former
5849                  * is finished.
5850                  */
5851                 rfs4_seen_first_compound = 1;
5852         }
5853
5854         for (i = 0; i < args->array_len && cs.cont; i++) {
5855                 nfs_argop4 *argop;
5856                 nfs_resop4 *resop;
5857                 uint_t op;
5858
5859                 argop = &args->array[i];
5860                 resop = &resp->array[i];
5861                 resop->resop = argop->argop;
5862                 op = (uint_t)resop->resop;
5863
5864                 if (op < rfsv4disp_cnt) {
5865                         /*
5866                          * Count the individual ops here; NULL and COMPOUND
5867                          * are counted in common_dispatch()
5868                          */
5869                         rfsproccnt_v4_ptr[op].value.ui64++;
5870
5871                         NFS4_DEBUG(rfs4_debug > 1,
5872                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5873                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5874                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5875                             rfs4_op_string[op], *cs.statusp));
5876                         if (*cs.statusp != NFS4_OK)
5877                                 cs.cont = FALSE;
5878                 } else {
5879                         /*
5880                          * This is effectively dead code since XDR code
5881                          * will have already returned BADXDR if op doesn't
5882                          * decode to legal value.  This only done for a
5883                          * day when XDR code doesn't verify v4 opcodes.
5884                          */
5885                         op = OP_ILLEGAL;
5886                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5887
5888                         rfs4_op_illegal(argop, resop, req, &cs);
5889                         cs.cont = FALSE;
5890                 }
5891
5892                 /*
5893                  * If not at last op, and if we are to stop, then
5894                  * compact the results array.
5895                  */
5896                 if ((i + 1) < args->array_len && !cs.cont) {
5897                         nfs_resop4 *new_res = kmem_alloc(
5898                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5899                         bcopy(resp->array,
5900                             new_res, (i+1) * sizeof (nfs_resop4));
5901                         kmem_free(resp->array,
5902                             args->array_len * sizeof (nfs_resop4));
5903
5904                         resp->array_len =  i + 1;
5905                         resp->array = new_res;
5906                 }
5907         }
5908
5909         rw_exit(&exported_lock);
5910
5911         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5912             COMPOUND4res *, resp);
5913
5914         if (cs.vp)
5915                 VN_RELE(cs.vp);
5916         if (cs.saved_vp)
5917                 VN_RELE(cs.saved_vp);
5918         if (cs.saved_fh.nfs_fh4_val)
5919                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5920
5921         if (cs.basecr)
5922                 crfree(cs.basecr);
5923         if (cs.cr)
5924                 crfree(cs.cr);
5925         /*
5926          * done with this compound request, free the label
5927          */
5928
5929         if (req->rq_label != NULL) {
5930                 kmem_free(req->rq_label, sizeof (bslabel_t));
5931                 req->rq_label = NULL;
5932         }
5933 }
5934
5935 /*
5936  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5937  * XXX zero out the tag and array values. Need to investigate why the
5938  * XXX calls occur, but at least prevent the panic for now.
5939  */
5940 void
5941 rfs4_compound_free(COMPOUND4res *resp)
5942 {
5943         uint_t i;
5944
5945         if (resp->tag.utf8string_val) {
5946                 UTF8STRING_FREE(resp->tag)
5947         }
5948
5949         for (i = 0; i < resp->array_len; i++) {
5950                 nfs_resop4 *resop;
5951                 uint_t op;
5952
5953                 resop = &resp->array[i];
5954                 op = (uint_t)resop->resop;
5955                 if (op < rfsv4disp_cnt) {
5956                         (*rfsv4disptab[op].dis_resfree)(resop);
5957                 }
5958         }
5959         if (resp->array != NULL) {
5960                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5961         }
5962 }
5963
5964 /*
5965  * Process the value of the compound request rpc flags, as a bit-AND
5966  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5967  */
5968 void
5969 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5970 {
5971         int i;
5972         int flag = RPC_ALL;
5973
5974         for (i = 0; flag && i < args->array_len; i++) {
5975                 uint_t op;
5976
5977                 op = (uint_t)args->array[i].argop;
5978
5979                 if (op < rfsv4disp_cnt)
5980                         flag &= rfsv4disptab[op].dis_flags;
5981                 else
5982                         flag = 0;
5983         }
5984         *flagp = flag;
5985 }
5986
5987 nfsstat4
5988 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5989 {
5990         nfsstat4 e;
5991
5992         rfs4_dbe_lock(cp->rc_dbe);
5993
5994         if (cp->rc_sysidt != LM_NOSYSID) {
5995                 *sp = cp->rc_sysidt;
5996                 e = NFS4_OK;
5997
5998         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5999                 *sp = cp->rc_sysidt;
6000                 e = NFS4_OK;
6001
6002                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6003                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6004         } else
6005                 e = NFS4ERR_DELAY;
6006
6007         rfs4_dbe_unlock(cp->rc_dbe);
6008         return (e);
6009 }
6010
6011 #if defined(DEBUG) && ! defined(lint)
6012 static void lock_print(char *str, int operation, struct flock64 *flk)
6013 {
6014         char *op, *type;
6015
6016         switch (operation) {
6017         case F_GETLK: op = "F_GETLK";
6018                 break;
6019         case F_SETLK: op = "F_SETLK";
6020                 break;
6021         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6022                 break;
6023         default: op = "F_UNKNOWN";
6024                 break;
6025         }
6026         switch (flk->l_type) {
6027         case F_UNLCK: type = "F_UNLCK";
6028                 break;
6029         case F_RDLCK: type = "F_RDLCK";
6030                 break;
6031         case F_WRLCK: type = "F_WRLCK";
6032                 break;
6033         default: type = "F_UNKNOWN";
6034                 break;
6035         }
6036
6037         ASSERT(flk->l_whence == 0);
6038         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6039             str, op, type, (longlong_t)flk->l_start,
6040             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6041 }
6042
6043 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6044 #else
6045 #define LOCK_PRINT(d, s, t, f)
6046 #endif
6047
6048 /*ARGSUSED*/
6049 static bool_t
6050 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6051 {
6052         return (TRUE);
6053 }
6054
6055 /*
6056  * Look up the pathname using the vp in cs as the directory vnode.
6057  * cs->vp will be the vnode for the file on success
6058  */
6059
6060 static nfsstat4
6061 rfs4_lookup(component4 *component, struct svc_req *req,
6062     struct compound_state *cs)
6063 {
6064         char *nm;
6065         uint32_t len;
6066         nfsstat4 status;
6067         struct sockaddr *ca;
6068         char *name;
6069
6070         if (cs->vp == NULL) {
6071                 return (NFS4ERR_NOFILEHANDLE);
6072         }
6073         if (cs->vp->v_type != VDIR) {
6074                 return (NFS4ERR_NOTDIR);
6075         }
6076
6077         if (!utf8_dir_verify(component))
6078                 return (NFS4ERR_INVAL);
6079
6080         nm = utf8_to_fn(component, &len, NULL);
6081         if (nm == NULL) {
6082                 return (NFS4ERR_INVAL);
6083         }
6084
6085         if (len > MAXNAMELEN) {
6086                 kmem_free(nm, len);
6087                 return (NFS4ERR_NAMETOOLONG);
6088         }
6089
6090         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6091         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6092             MAXPATHLEN + 1);
6093
6094         if (name == NULL) {
6095                 kmem_free(nm, len);
6096                 return (NFS4ERR_INVAL);
6097         }
6098
6099         status = do_rfs4_op_lookup(name, req, cs);
6100
6101         if (name != nm)
6102                 kmem_free(name, MAXPATHLEN + 1);
6103
6104         kmem_free(nm, len);
6105
6106         return (status);
6107 }
6108
6109 static nfsstat4
6110 rfs4_lookupfile(component4 *component, struct svc_req *req,
6111     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6112 {
6113         nfsstat4 status;
6114         vnode_t *dvp = cs->vp;
6115         vattr_t bva, ava, fva;
6116         int error;
6117
6118         /* Get "before" change value */
6119         bva.va_mask = AT_CTIME|AT_SEQ;
6120         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6121         if (error)
6122                 return (puterrno4(error));
6123
6124         /* rfs4_lookup may VN_RELE directory */
6125         VN_HOLD(dvp);
6126
6127         status = rfs4_lookup(component, req, cs);
6128         if (status != NFS4_OK) {
6129                 VN_RELE(dvp);
6130                 return (status);
6131         }
6132
6133         /*
6134          * Get "after" change value, if it fails, simply return the
6135          * before value.
6136          */
6137         ava.va_mask = AT_CTIME|AT_SEQ;
6138         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6139                 ava.va_ctime = bva.va_ctime;
6140                 ava.va_seq = 0;
6141         }
6142         VN_RELE(dvp);
6143
6144         /*
6145          * Validate the file is a file
6146          */
6147         fva.va_mask = AT_TYPE|AT_MODE;
6148         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6149         if (error)
6150                 return (puterrno4(error));
6151
6152         if (fva.va_type != VREG) {
6153                 if (fva.va_type == VDIR)
6154                         return (NFS4ERR_ISDIR);
6155                 if (fva.va_type == VLNK)
6156                         return (NFS4ERR_SYMLINK);
6157                 return (NFS4ERR_INVAL);
6158         }
6159
6160         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6161         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6162
6163         /*
6164          * It is undefined if VOP_LOOKUP will change va_seq, so
6165          * cinfo.atomic = TRUE only if we have
6166          * non-zero va_seq's, and they have not changed.
6167          */
6168         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6169                 cinfo->atomic = TRUE;
6170         else
6171                 cinfo->atomic = FALSE;
6172
6173         /* Check for mandatory locking */
6174         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6175         return (check_open_access(access, cs, req));
6176 }
6177
6178 static nfsstat4
6179 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6180     timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6181 {
6182         int error;
6183         nfsstat4 status = NFS4_OK;
6184         vattr_t va;
6185
6186 tryagain:
6187
6188         /*
6189          * The file open mode used is VWRITE.  If the client needs
6190          * some other semantic, then it should do the access checking
6191          * itself.  It would have been nice to have the file open mode
6192          * passed as part of the arguments.
6193          */
6194
6195         *created = TRUE;
6196         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6197
6198         if (error) {
6199                 *created = FALSE;
6200
6201                 /*
6202                  * If we got something other than file already exists
6203                  * then just return this error.  Otherwise, we got
6204                  * EEXIST.  If we were doing a GUARDED create, then
6205                  * just return this error.  Otherwise, we need to
6206                  * make sure that this wasn't a duplicate of an
6207                  * exclusive create request.
6208                  *
6209                  * The assumption is made that a non-exclusive create
6210                  * request will never return EEXIST.
6211                  */
6212
6213                 if (error != EEXIST || mode == GUARDED4) {
6214                         status = puterrno4(error);
6215                         return (status);
6216                 }
6217                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6218                     NULL, NULL, NULL);
6219
6220                 if (error) {
6221                         /*
6222                          * We couldn't find the file that we thought that
6223                          * we just created.  So, we'll just try creating
6224                          * it again.
6225                          */
6226                         if (error == ENOENT)
6227                                 goto tryagain;
6228
6229                         status = puterrno4(error);
6230                         return (status);
6231                 }
6232
6233                 if (mode == UNCHECKED4) {
6234                         /* existing object must be regular file */
6235                         if ((*vpp)->v_type != VREG) {
6236                                 if ((*vpp)->v_type == VDIR)
6237                                         status = NFS4ERR_ISDIR;
6238                                 else if ((*vpp)->v_type == VLNK)
6239                                         status = NFS4ERR_SYMLINK;
6240                                 else
6241                                         status = NFS4ERR_INVAL;
6242                                 VN_RELE(*vpp);
6243                                 return (status);
6244                         }
6245
6246                         return (NFS4_OK);
6247                 }
6248
6249                 /* Check for duplicate request */
6250                 ASSERT(mtime != 0);
6251                 va.va_mask = AT_MTIME;
6252                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6253                 if (!error) {
6254                         /* We found the file */
6255                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6256                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6257                                 /* but its not our creation */
6258                                 VN_RELE(*vpp);
6259                                 return (NFS4ERR_EXIST);
6260                         }
6261                         *created = TRUE; /* retrans of create == created */
6262                         return (NFS4_OK);
6263                 }
6264                 VN_RELE(*vpp);
6265                 return (NFS4ERR_EXIST);
6266         }
6267
6268         return (NFS4_OK);
6269 }
6270
6271 static nfsstat4
6272 check_open_access(uint32_t access, struct compound_state *cs,
6273     struct svc_req *req)
6274 {
6275         int error;
6276         vnode_t *vp;
6277         bool_t readonly;
6278         cred_t *cr = cs->cr;
6279
6280         /* For now we don't allow mandatory locking as per V2/V3 */
6281         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6282                 return (NFS4ERR_ACCESS);
6283         }
6284
6285         vp = cs->vp;
6286         ASSERT(cr != NULL && vp->v_type == VREG);
6287
6288         /*
6289          * If the file system is exported read only and we are trying
6290          * to open for write, then return NFS4ERR_ROFS
6291          */
6292
6293         readonly = rdonly4(cs->exi, cs->vp, req);
6294
6295         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6296                 return (NFS4ERR_ROFS);
6297
6298         if (access & OPEN4_SHARE_ACCESS_READ) {
6299                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6300                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6301                         return (NFS4ERR_ACCESS);
6302                 }
6303         }
6304
6305         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6306                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6307                 if (error)
6308                         return (NFS4ERR_ACCESS);
6309         }
6310
6311         return (NFS4_OK);
6312 }
6313
6314 static nfsstat4
6315 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6316     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6317 {
6318         struct nfs4_svgetit_arg sarg;
6319         struct nfs4_ntov_table ntov;
6320
6321         bool_t ntov_table_init = FALSE;
6322         struct statvfs64 sb;
6323         nfsstat4 status;
6324         vnode_t *vp;
6325         vattr_t bva, ava, iva, cva, *vap;
6326         vnode_t *dvp;
6327         timespec32_t *mtime;
6328         char *nm = NULL;
6329         uint_t buflen;
6330         bool_t created;
6331         bool_t setsize = FALSE;
6332         len_t reqsize;
6333         int error;
6334         bool_t trunc;
6335         caller_context_t ct;
6336         component4 *component;
6337         bslabel_t *clabel;
6338         struct sockaddr *ca;
6339         char *name = NULL;
6340
6341         sarg.sbp = &sb;
6342         sarg.is_referral = B_FALSE;
6343
6344         dvp = cs->vp;
6345
6346         /* Check if the file system is read only */
6347         if (rdonly4(cs->exi, dvp, req))
6348                 return (NFS4ERR_ROFS);
6349
6350         /* check the label of including directory */
6351         if (is_system_labeled()) {
6352                 ASSERT(req->rq_label != NULL);
6353                 clabel = req->rq_label;
6354                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6355                     "got client label from request(1)",
6356                     struct svc_req *, req);
6357                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6358                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6359                             cs->exi)) {
6360                                 return (NFS4ERR_ACCESS);
6361                         }
6362                 }
6363         }
6364
6365         /*
6366          * Get the last component of path name in nm. cs will reference
6367          * the including directory on success.
6368          */
6369         component = &args->open_claim4_u.file;
6370         if (!utf8_dir_verify(component))
6371                 return (NFS4ERR_INVAL);
6372
6373         nm = utf8_to_fn(component, &buflen, NULL);
6374
6375         if (nm == NULL)
6376                 return (NFS4ERR_RESOURCE);
6377
6378         if (buflen > MAXNAMELEN) {
6379                 kmem_free(nm, buflen);
6380                 return (NFS4ERR_NAMETOOLONG);
6381         }
6382
6383         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6384         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6385         if (error) {
6386                 kmem_free(nm, buflen);
6387                 return (puterrno4(error));
6388         }
6389
6390         if (bva.va_type != VDIR) {
6391                 kmem_free(nm, buflen);
6392                 return (NFS4ERR_NOTDIR);
6393         }
6394
6395         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6396
6397         switch (args->mode) {
6398         case GUARDED4:
6399                 /*FALLTHROUGH*/
6400         case UNCHECKED4:
6401                 nfs4_ntov_table_init(&ntov);
6402                 ntov_table_init = TRUE;
6403
6404                 *attrset = 0;
6405                 status = do_rfs4_set_attrs(attrset,
6406                     &args->createhow4_u.createattrs,
6407                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6408
6409                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6410                     sarg.vap->va_type != VREG) {
6411                         if (sarg.vap->va_type == VDIR)
6412                                 status = NFS4ERR_ISDIR;
6413                         else if (sarg.vap->va_type == VLNK)
6414                                 status = NFS4ERR_SYMLINK;
6415                         else
6416                                 status = NFS4ERR_INVAL;
6417                 }
6418
6419                 if (status != NFS4_OK) {
6420                         kmem_free(nm, buflen);
6421                         nfs4_ntov_table_free(&ntov, &sarg);
6422                         *attrset = 0;
6423                         return (status);
6424                 }
6425
6426                 vap = sarg.vap;
6427                 vap->va_type = VREG;
6428                 vap->va_mask |= AT_TYPE;
6429
6430                 if ((vap->va_mask & AT_MODE) == 0) {
6431                         vap->va_mask |= AT_MODE;
6432                         vap->va_mode = (mode_t)0600;
6433                 }
6434
6435                 if (vap->va_mask & AT_SIZE) {
6436
6437                         /* Disallow create with a non-zero size */
6438
6439                         if ((reqsize = sarg.vap->va_size) != 0) {
6440                                 kmem_free(nm, buflen);
6441                                 nfs4_ntov_table_free(&ntov, &sarg);
6442                                 *attrset = 0;
6443                                 return (NFS4ERR_INVAL);
6444                         }
6445                         setsize = TRUE;
6446                 }
6447                 break;
6448
6449         case EXCLUSIVE4:
6450                 /* prohibit EXCL create of named attributes */
6451                 if (dvp->v_flag & V_XATTRDIR) {
6452                         kmem_free(nm, buflen);
6453                         *attrset = 0;
6454                         return (NFS4ERR_INVAL);
6455                 }
6456
6457                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6458                 cva.va_type = VREG;
6459                 /*
6460                  * Ensure no time overflows. Assumes underlying
6461                  * filesystem supports at least 32 bits.
6462                  * Truncate nsec to usec resolution to allow valid
6463                  * compares even if the underlying filesystem truncates.
6464                  */
6465                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6466                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6467                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6468                 cva.va_mode = (mode_t)0;
6469                 vap = &cva;
6470
6471                 /*
6472                  * For EXCL create, attrset is set to the server attr
6473                  * used to cache the client's verifier.
6474                  */
6475                 *attrset = FATTR4_TIME_MODIFY_MASK;
6476                 break;
6477         }
6478
6479         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6480         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6481             MAXPATHLEN  + 1);
6482
6483         if (name == NULL) {
6484                 kmem_free(nm, buflen);
6485                 return (NFS4ERR_SERVERFAULT);
6486         }
6487
6488         status = create_vnode(dvp, name, vap, args->mode, mtime,
6489             cs->cr, &vp, &created);
6490         if (nm != name)
6491                 kmem_free(name, MAXPATHLEN + 1);
6492         kmem_free(nm, buflen);
6493
6494         if (status != NFS4_OK) {
6495                 if (ntov_table_init)
6496                         nfs4_ntov_table_free(&ntov, &sarg);
6497                 *attrset = 0;
6498                 return (status);
6499         }
6500
6501         trunc = (setsize && !created);
6502
6503         if (args->mode != EXCLUSIVE4) {
6504                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6505
6506                 /*
6507                  * True verification that object was created with correct
6508                  * attrs is impossible.  The attrs could have been changed
6509                  * immediately after object creation.  If attributes did
6510                  * not verify, the only recourse for the server is to
6511                  * destroy the object.  Maybe if some attrs (like gid)
6512                  * are set incorrectly, the object should be destroyed;
6513                  * however, seems bad as a default policy.  Do we really
6514                  * want to destroy an object over one of the times not
6515                  * verifying correctly?  For these reasons, the server
6516                  * currently sets bits in attrset for createattrs
6517                  * that were set; however, no verification is done.
6518                  *
6519                  * vmask_to_nmask accounts for vattr bits set on create
6520                  *      [do_rfs4_set_attrs() only sets resp bits for
6521                  *       non-vattr/vfs bits.]
6522                  * Mask off any bits we set by default so as not to return
6523                  * more attrset bits than were requested in createattrs
6524                  */
6525                 if (created) {
6526                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6527                         *attrset &= createmask;
6528                 } else {
6529                         /*
6530                          * We did not create the vnode (we tried but it
6531                          * already existed).  In this case, the only createattr
6532                          * that the spec allows the server to set is size,
6533                          * and even then, it can only be set if it is 0.
6534                          */
6535                         *attrset = 0;
6536                         if (trunc)
6537                                 *attrset = FATTR4_SIZE_MASK;
6538                 }
6539         }
6540         if (ntov_table_init)
6541                 nfs4_ntov_table_free(&ntov, &sarg);
6542
6543         /*
6544          * Get the initial "after" sequence number, if it fails,
6545          * set to zero, time to before.
6546          */
6547         iva.va_mask = AT_CTIME|AT_SEQ;
6548         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6549                 iva.va_seq = 0;
6550                 iva.va_ctime = bva.va_ctime;
6551         }
6552
6553         /*
6554          * create_vnode attempts to create the file exclusive,
6555          * if it already exists the VOP_CREATE will fail and
6556          * may not increase va_seq. It is atomic if
6557          * we haven't changed the directory, but if it has changed
6558          * we don't know what changed it.
6559          */
6560         if (!created) {
6561                 if (bva.va_seq && iva.va_seq &&
6562                     bva.va_seq == iva.va_seq)
6563                         cinfo->atomic = TRUE;
6564                 else
6565                         cinfo->atomic = FALSE;
6566                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6567         } else {
6568                 /*
6569                  * The entry was created, we need to sync the
6570                  * directory metadata.
6571                  */
6572                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6573
6574                 /*
6575                  * Get "after" change value, if it fails, simply return the
6576                  * before value.
6577                  */
6578                 ava.va_mask = AT_CTIME|AT_SEQ;
6579                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6580                         ava.va_ctime = bva.va_ctime;
6581                         ava.va_seq = 0;
6582                 }
6583
6584                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6585
6586                 /*
6587                  * The cinfo->atomic = TRUE only if we have
6588                  * non-zero va_seq's, and it has incremented by exactly one
6589                  * during the create_vnode and it didn't
6590                  * change during the VOP_FSYNC.
6591                  */
6592                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6593                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6594                         cinfo->atomic = TRUE;
6595                 else
6596                         cinfo->atomic = FALSE;
6597         }
6598
6599         /* Check for mandatory locking and that the size gets set. */
6600         cva.va_mask = AT_MODE;
6601         if (setsize)
6602                 cva.va_mask |= AT_SIZE;
6603
6604         /* Assume the worst */
6605         cs->mandlock = TRUE;
6606
6607         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6608                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6609
6610                 /*
6611                  * Truncate the file if necessary; this would be
6612                  * the case for create over an existing file.
6613                  */
6614
6615                 if (trunc) {
6616                         int in_crit = 0;
6617                         rfs4_file_t *fp;
6618                         bool_t create = FALSE;
6619
6620                         /*
6621                          * We are writing over an existing file.
6622                          * Check to see if we need to recall a delegation.
6623                          */
6624                         rfs4_hold_deleg_policy();
6625                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6626                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6627                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6628                                         rfs4_file_rele(fp);
6629                                         rfs4_rele_deleg_policy();
6630                                         VN_RELE(vp);
6631                                         *attrset = 0;
6632                                         return (NFS4ERR_DELAY);
6633                                 }
6634                                 rfs4_file_rele(fp);
6635                         }
6636                         rfs4_rele_deleg_policy();
6637
6638                         if (nbl_need_check(vp)) {
6639                                 in_crit = 1;
6640
6641                                 ASSERT(reqsize == 0);
6642
6643                                 nbl_start_crit(vp, RW_READER);
6644                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6645                                     cva.va_size, 0, NULL)) {
6646                                         in_crit = 0;
6647                                         nbl_end_crit(vp);
6648                                         VN_RELE(vp);
6649                                         *attrset = 0;
6650                                         return (NFS4ERR_ACCESS);
6651                                 }
6652                         }
6653                         ct.cc_sysid = 0;
6654                         ct.cc_pid = 0;
6655                         ct.cc_caller_id = nfs4_srv_caller_id;
6656                         ct.cc_flags = CC_DONTBLOCK;
6657
6658                         cva.va_mask = AT_SIZE;
6659                         cva.va_size = reqsize;
6660                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6661                         if (in_crit)
6662                                 nbl_end_crit(vp);
6663                 }
6664         }
6665
6666         error = makefh4(&cs->fh, vp, cs->exi);
6667
6668         /*
6669          * Force modified data and metadata out to stable storage.
6670          */
6671         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6672
6673         if (error) {
6674                 VN_RELE(vp);
6675                 *attrset = 0;
6676                 return (puterrno4(error));
6677         }
6678
6679         /* if parent dir is attrdir, set namedattr fh flag */
6680         if (dvp->v_flag & V_XATTRDIR)
6681                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6682
6683         if (cs->vp)
6684                 VN_RELE(cs->vp);
6685
6686         cs->vp = vp;
6687
6688         /*
6689          * if we did not create the file, we will need to check
6690          * the access bits on the file
6691          */
6692
6693         if (!created) {
6694                 if (setsize)
6695                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6696                 status = check_open_access(args->share_access, cs, req);
6697                 if (status != NFS4_OK)
6698                         *attrset = 0;
6699         }
6700         return (status);
6701 }
6702
6703 /*ARGSUSED*/
6704 static void
6705 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6706     rfs4_openowner_t *oo, delegreq_t deleg,
6707     uint32_t access, uint32_t deny,
6708     OPEN4res *resp, int deleg_cur)
6709 {
6710         /* XXX Currently not using req  */
6711         rfs4_state_t *sp;
6712         rfs4_file_t *fp;
6713         bool_t screate = TRUE;
6714         bool_t fcreate = TRUE;
6715         uint32_t open_a, share_a;
6716         uint32_t open_d, share_d;
6717         rfs4_deleg_state_t *dsp;
6718         sysid_t sysid;
6719         nfsstat4 status;
6720         caller_context_t ct;
6721         int fflags = 0;
6722         int recall = 0;
6723         int err;
6724         int first_open;
6725
6726         /* get the file struct and hold a lock on it during initial open */
6727         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6728         if (fp == NULL) {
6729                 resp->status = NFS4ERR_RESOURCE;
6730                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6731                 return;
6732         }
6733
6734         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6735         if (sp == NULL) {
6736                 resp->status = NFS4ERR_RESOURCE;
6737                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6738                 /* No need to keep any reference */
6739                 rw_exit(&fp->rf_file_rwlock);
6740                 rfs4_file_rele(fp);
6741                 return;
6742         }
6743
6744         /* try to get the sysid before continuing */
6745         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6746                 resp->status = status;
6747                 rfs4_file_rele(fp);
6748                 /* Not a fully formed open; "close" it */
6749                 if (screate == TRUE)
6750                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6751                 rfs4_state_rele(sp);
6752                 return;
6753         }
6754
6755         /* Calculate the fflags for this OPEN. */
6756         if (access & OPEN4_SHARE_ACCESS_READ)
6757                 fflags |= FREAD;
6758         if (access & OPEN4_SHARE_ACCESS_WRITE)
6759                 fflags |= FWRITE;
6760
6761         rfs4_dbe_lock(sp->rs_dbe);
6762
6763         /*
6764          * Calculate the new deny and access mode that this open is adding to
6765          * the file for this open owner;
6766          */
6767         open_d = (deny & ~sp->rs_open_deny);
6768         open_a = (access & ~sp->rs_open_access);
6769
6770         /*
6771          * Calculate the new share access and share deny modes that this open
6772          * is adding to the file for this open owner;
6773          */
6774         share_a = (access & ~sp->rs_share_access);
6775         share_d = (deny & ~sp->rs_share_deny);
6776
6777         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6778
6779         /*
6780          * Check to see the client has already sent an open for this
6781          * open owner on this file with the same share/deny modes.
6782          * If so, we don't need to check for a conflict and we don't
6783          * need to add another shrlock.  If not, then we need to
6784          * check for conflicts in deny and access before checking for
6785          * conflicts in delegation.  We don't want to recall a
6786          * delegation based on an open that will eventually fail based
6787          * on shares modes.
6788          */
6789
6790         if (share_a || share_d) {
6791                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6792                         rfs4_dbe_unlock(sp->rs_dbe);
6793                         resp->status = err;
6794
6795                         rfs4_file_rele(fp);
6796                         /* Not a fully formed open; "close" it */
6797                         if (screate == TRUE)
6798                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6799                         rfs4_state_rele(sp);
6800                         return;
6801                 }
6802         }
6803
6804         rfs4_dbe_lock(fp->rf_dbe);
6805
6806         /*
6807          * Check to see if this file is delegated and if so, if a
6808          * recall needs to be done.
6809          */
6810         if (rfs4_check_recall(sp, access)) {
6811                 rfs4_dbe_unlock(fp->rf_dbe);
6812                 rfs4_dbe_unlock(sp->rs_dbe);
6813                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6814                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6815                 rfs4_dbe_lock(sp->rs_dbe);
6816
6817                 /* if state closed while lock was dropped */
6818                 if (sp->rs_closed) {
6819                         if (share_a || share_d)
6820                                 (void) rfs4_unshare(sp);
6821                         rfs4_dbe_unlock(sp->rs_dbe);
6822                         rfs4_file_rele(fp);
6823                         /* Not a fully formed open; "close" it */
6824                         if (screate == TRUE)
6825                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6826                         rfs4_state_rele(sp);
6827                         resp->status = NFS4ERR_OLD_STATEID;
6828                         return;
6829                 }
6830
6831                 rfs4_dbe_lock(fp->rf_dbe);
6832                 /* Let's see if the delegation was returned */
6833                 if (rfs4_check_recall(sp, access)) {
6834                         rfs4_dbe_unlock(fp->rf_dbe);
6835                         if (share_a || share_d)
6836                                 (void) rfs4_unshare(sp);
6837                         rfs4_dbe_unlock(sp->rs_dbe);
6838                         rfs4_file_rele(fp);
6839                         rfs4_update_lease(sp->rs_owner->ro_client);
6840
6841                         /* Not a fully formed open; "close" it */
6842                         if (screate == TRUE)
6843                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6844                         rfs4_state_rele(sp);
6845                         resp->status = NFS4ERR_DELAY;
6846                         return;
6847                 }
6848         }
6849         /*
6850          * the share check passed and any delegation conflict has been
6851          * taken care of, now call vop_open.
6852          * if this is the first open then call vop_open with fflags.
6853          * if not, call vn_open_upgrade with just the upgrade flags.
6854          *
6855          * if the file has been opened already, it will have the current
6856          * access mode in the state struct.  if it has no share access, then
6857          * this is a new open.
6858          *
6859          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6860          * call VOP_OPEN(), just do the open upgrade.
6861          */
6862         if (first_open && !deleg_cur) {
6863                 ct.cc_sysid = sysid;
6864                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6865                 ct.cc_caller_id = nfs4_srv_caller_id;
6866                 ct.cc_flags = CC_DONTBLOCK;
6867                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6868                 if (err) {
6869                         rfs4_dbe_unlock(fp->rf_dbe);
6870                         if (share_a || share_d)
6871                                 (void) rfs4_unshare(sp);
6872                         rfs4_dbe_unlock(sp->rs_dbe);
6873                         rfs4_file_rele(fp);
6874
6875                         /* Not a fully formed open; "close" it */
6876                         if (screate == TRUE)
6877                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6878                         rfs4_state_rele(sp);
6879                         /* check if a monitor detected a delegation conflict */
6880                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6881                                 resp->status = NFS4ERR_DELAY;
6882                         else
6883                                 resp->status = NFS4ERR_SERVERFAULT;
6884                         return;
6885                 }
6886         } else { /* open upgrade */
6887                 /*
6888                  * calculate the fflags for the new mode that is being added
6889                  * by this upgrade.
6890                  */
6891                 fflags = 0;
6892                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6893                         fflags |= FREAD;
6894                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6895                         fflags |= FWRITE;
6896                 vn_open_upgrade(cs->vp, fflags);
6897         }
6898         sp->rs_open_access |= access;
6899         sp->rs_open_deny |= deny;
6900
6901         if (open_d & OPEN4_SHARE_DENY_READ)
6902                 fp->rf_deny_read++;
6903         if (open_d & OPEN4_SHARE_DENY_WRITE)
6904                 fp->rf_deny_write++;
6905         fp->rf_share_deny |= deny;
6906
6907         if (open_a & OPEN4_SHARE_ACCESS_READ)
6908                 fp->rf_access_read++;
6909         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6910                 fp->rf_access_write++;
6911         fp->rf_share_access |= access;
6912
6913         /*
6914          * Check for delegation here. if the deleg argument is not
6915          * DELEG_ANY, then this is a reclaim from a client and
6916          * we must honor the delegation requested. If necessary we can
6917          * set the recall flag.
6918          */
6919
6920         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6921
6922         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6923
6924         next_stateid(&sp->rs_stateid);
6925
6926         resp->stateid = sp->rs_stateid.stateid;
6927
6928         rfs4_dbe_unlock(fp->rf_dbe);
6929         rfs4_dbe_unlock(sp->rs_dbe);
6930
6931         if (dsp) {
6932                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6933                 rfs4_deleg_state_rele(dsp);
6934         }
6935
6936         rfs4_file_rele(fp);
6937         rfs4_state_rele(sp);
6938
6939         resp->status = NFS4_OK;
6940 }
6941
6942 /*ARGSUSED*/
6943 static void
6944 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6945     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6946 {
6947         change_info4 *cinfo = &resp->cinfo;
6948         bitmap4 *attrset = &resp->attrset;
6949
6950         if (args->opentype == OPEN4_NOCREATE)
6951                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6952                     req, cs, args->share_access, cinfo);
6953         else {
6954                 /* inhibit delegation grants during exclusive create */
6955
6956                 if (args->mode == EXCLUSIVE4)
6957                         rfs4_disable_delegation();
6958
6959                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6960                     oo->ro_client->rc_clientid);
6961         }
6962
6963         if (resp->status == NFS4_OK) {
6964
6965                 /* cs->vp cs->fh now reference the desired file */
6966
6967                 rfs4_do_open(cs, req, oo,
6968                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6969                     args->share_access, args->share_deny, resp, 0);
6970
6971                 /*
6972                  * If rfs4_createfile set attrset, we must
6973                  * clear this attrset before the response is copied.
6974                  */
6975                 if (resp->status != NFS4_OK && resp->attrset) {
6976                         resp->attrset = 0;
6977                 }
6978         }
6979         else
6980                 *cs->statusp = resp->status;
6981
6982         if (args->mode == EXCLUSIVE4)
6983                 rfs4_enable_delegation();
6984 }
6985
6986 /*ARGSUSED*/
6987 static void
6988 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6989     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6990 {
6991         change_info4 *cinfo = &resp->cinfo;
6992         vattr_t va;
6993         vtype_t v_type = cs->vp->v_type;
6994         int error = 0;
6995
6996         /* Verify that we have a regular file */
6997         if (v_type != VREG) {
6998                 if (v_type == VDIR)
6999                         resp->status = NFS4ERR_ISDIR;
7000                 else if (v_type == VLNK)
7001                         resp->status = NFS4ERR_SYMLINK;
7002                 else
7003                         resp->status = NFS4ERR_INVAL;
7004                 return;
7005         }
7006
7007         va.va_mask = AT_MODE|AT_UID;
7008         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7009         if (error) {
7010                 resp->status = puterrno4(error);
7011                 return;
7012         }
7013
7014         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7015
7016         /*
7017          * Check if we have access to the file, Note the the file
7018          * could have originally been open UNCHECKED or GUARDED
7019          * with mode bits that will now fail, but there is nothing
7020          * we can really do about that except in the case that the
7021          * owner of the file is the one requesting the open.
7022          */
7023         if (crgetuid(cs->cr) != va.va_uid) {
7024                 resp->status = check_open_access(args->share_access, cs, req);
7025                 if (resp->status != NFS4_OK) {
7026                         return;
7027                 }
7028         }
7029
7030         /*
7031          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7032          */
7033         cinfo->before = 0;
7034         cinfo->after = 0;
7035         cinfo->atomic = FALSE;
7036
7037         rfs4_do_open(cs, req, oo,
7038             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7039             args->share_access, args->share_deny, resp, 0);
7040 }
7041
7042 static void
7043 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7044     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7045 {
7046         int error;
7047         nfsstat4 status;
7048         stateid4 stateid =
7049             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7050         rfs4_deleg_state_t *dsp;
7051
7052         /*
7053          * Find the state info from the stateid and confirm that the
7054          * file is delegated.  If the state openowner is the same as
7055          * the supplied openowner we're done. If not, get the file
7056          * info from the found state info. Use that file info to
7057          * create the state for this lock owner. Note solaris doen't
7058          * really need the pathname to find the file. We may want to
7059          * lookup the pathname and make sure that the vp exist and
7060          * matches the vp in the file structure. However it is
7061          * possible that the pathname nolonger exists (local process
7062          * unlinks the file), so this may not be that useful.
7063          */
7064
7065         status = rfs4_get_deleg_state(&stateid, &dsp);
7066         if (status != NFS4_OK) {
7067                 resp->status = status;
7068                 return;
7069         }
7070
7071         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7072
7073         /*
7074          * New lock owner, create state. Since this was probably called
7075          * in response to a CB_RECALL we set deleg to DELEG_NONE
7076          */
7077
7078         ASSERT(cs->vp != NULL);
7079         VN_RELE(cs->vp);
7080         VN_HOLD(dsp->rds_finfo->rf_vp);
7081         cs->vp = dsp->rds_finfo->rf_vp;
7082
7083         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7084                 rfs4_deleg_state_rele(dsp);
7085                 *cs->statusp = resp->status = puterrno4(error);
7086                 return;
7087         }
7088
7089         /* Mark progress for delegation returns */
7090         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7091         rfs4_deleg_state_rele(dsp);
7092         rfs4_do_open(cs, req, oo, DELEG_NONE,
7093             args->share_access, args->share_deny, resp, 1);
7094 }
7095
7096 /*ARGSUSED*/
7097 static void
7098 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7099     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7100 {
7101         /*
7102          * Lookup the pathname, it must already exist since this file
7103          * was delegated.
7104          *
7105          * Find the file and state info for this vp and open owner pair.
7106          *      check that they are in fact delegated.
7107          *      check that the state access and deny modes are the same.
7108          *
7109          * Return the delgation possibly seting the recall flag.
7110          */
7111         rfs4_file_t *fp;
7112         rfs4_state_t *sp;
7113         bool_t create = FALSE;
7114         bool_t dcreate = FALSE;
7115         rfs4_deleg_state_t *dsp;
7116         nfsace4 *ace;
7117
7118         /* Note we ignore oflags */
7119         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7120             req, cs, args->share_access, &resp->cinfo);
7121
7122         if (resp->status != NFS4_OK) {
7123                 return;
7124         }
7125
7126         /* get the file struct and hold a lock on it during initial open */
7127         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7128         if (fp == NULL) {
7129                 resp->status = NFS4ERR_RESOURCE;
7130                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7131                 return;
7132         }
7133
7134         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7135         if (sp == NULL) {
7136                 resp->status = NFS4ERR_SERVERFAULT;
7137                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7138                 rw_exit(&fp->rf_file_rwlock);
7139                 rfs4_file_rele(fp);
7140                 return;
7141         }
7142
7143         rfs4_dbe_lock(sp->rs_dbe);
7144         rfs4_dbe_lock(fp->rf_dbe);
7145         if (args->share_access != sp->rs_share_access ||
7146             args->share_deny != sp->rs_share_deny ||
7147             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7148                 NFS4_DEBUG(rfs4_debug,
7149                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7150                 rfs4_dbe_unlock(fp->rf_dbe);
7151                 rfs4_dbe_unlock(sp->rs_dbe);
7152                 rfs4_file_rele(fp);
7153                 rfs4_state_rele(sp);
7154                 resp->status = NFS4ERR_SERVERFAULT;
7155                 return;
7156         }
7157         rfs4_dbe_unlock(fp->rf_dbe);
7158         rfs4_dbe_unlock(sp->rs_dbe);
7159
7160         dsp = rfs4_finddeleg(sp, &dcreate);
7161         if (dsp == NULL) {
7162                 rfs4_state_rele(sp);
7163                 rfs4_file_rele(fp);
7164                 resp->status = NFS4ERR_SERVERFAULT;
7165                 return;
7166         }
7167
7168         next_stateid(&sp->rs_stateid);
7169
7170         resp->stateid = sp->rs_stateid.stateid;
7171
7172         resp->delegation.delegation_type = dsp->rds_dtype;
7173
7174         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7175                 open_read_delegation4 *rv =
7176                     &resp->delegation.open_delegation4_u.read;
7177
7178                 rv->stateid = dsp->rds_delegid.stateid;
7179                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7180                 ace = &rv->permissions;
7181         } else {
7182                 open_write_delegation4 *rv =
7183                     &resp->delegation.open_delegation4_u.write;
7184
7185                 rv->stateid = dsp->rds_delegid.stateid;
7186                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7187                 ace = &rv->permissions;
7188                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7189                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7190         }
7191
7192         /* XXX For now */
7193         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7194         ace->flag = 0;
7195         ace->access_mask = 0;
7196         ace->who.utf8string_len = 0;
7197         ace->who.utf8string_val = 0;
7198
7199         rfs4_deleg_state_rele(dsp);
7200         rfs4_state_rele(sp);
7201         rfs4_file_rele(fp);
7202 }
7203
7204 typedef enum {
7205         NFS4_CHKSEQ_OKAY = 0,
7206         NFS4_CHKSEQ_REPLAY = 1,
7207         NFS4_CHKSEQ_BAD = 2
7208 } rfs4_chkseq_t;
7209
7210 /*
7211  * Generic function for sequence number checks.
7212  */
7213 static rfs4_chkseq_t
7214 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7215     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7216 {
7217         /* Same sequence ids and matching operations? */
7218         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7219                 if (copyres == TRUE) {
7220                         rfs4_free_reply(resop);
7221                         rfs4_copy_reply(resop, lastop);
7222                 }
7223                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7224                     "Replayed SEQID %d\n", seqid));
7225                 return (NFS4_CHKSEQ_REPLAY);
7226         }
7227
7228         /* If the incoming sequence is not the next expected then it is bad */
7229         if (rqst_seq != seqid + 1) {
7230                 if (rqst_seq == seqid) {
7231                         NFS4_DEBUG(rfs4_debug,
7232                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7233                             "but last op was %d current op is %d\n",
7234                             lastop->resop, resop->resop));
7235                         return (NFS4_CHKSEQ_BAD);
7236                 }
7237                 NFS4_DEBUG(rfs4_debug,
7238                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7239                     rqst_seq, seqid));
7240                 return (NFS4_CHKSEQ_BAD);
7241         }
7242
7243         /* Everything okay -- next expected */
7244         return (NFS4_CHKSEQ_OKAY);
7245 }
7246
7247
7248 static rfs4_chkseq_t
7249 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7250 {
7251         rfs4_chkseq_t rc;
7252
7253         rfs4_dbe_lock(op->ro_dbe);
7254         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7255             TRUE);
7256         rfs4_dbe_unlock(op->ro_dbe);
7257
7258         if (rc == NFS4_CHKSEQ_OKAY)
7259                 rfs4_update_lease(op->ro_client);
7260
7261         return (rc);
7262 }
7263
7264 static rfs4_chkseq_t
7265 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7266 {
7267         rfs4_chkseq_t rc;
7268
7269         rfs4_dbe_lock(op->ro_dbe);
7270         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7271             olo_seqid, resop, FALSE);
7272         rfs4_dbe_unlock(op->ro_dbe);
7273
7274         return (rc);
7275 }
7276
7277 static rfs4_chkseq_t
7278 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7279 {
7280         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7281
7282         rfs4_dbe_lock(lsp->rls_dbe);
7283         if (!lsp->rls_skip_seqid_check)
7284                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7285                     resop, TRUE);
7286         rfs4_dbe_unlock(lsp->rls_dbe);
7287
7288         return (rc);
7289 }
7290
7291 static void
7292 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7293     struct svc_req *req, struct compound_state *cs)
7294 {
7295         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7296         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7297         open_owner4 *owner = &args->owner;
7298         open_claim_type4 claim = args->claim;
7299         rfs4_client_t *cp;
7300         rfs4_openowner_t *oo;
7301         bool_t create;
7302         bool_t replay = FALSE;
7303         int can_reclaim;
7304
7305         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7306             OPEN4args *, args);
7307
7308         if (cs->vp == NULL) {
7309                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7310                 goto end;
7311         }
7312
7313         /*
7314          * Need to check clientid and lease expiration first based on
7315          * error ordering and incrementing sequence id.
7316          */
7317         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7318         if (cp == NULL) {
7319                 *cs->statusp = resp->status =
7320                     rfs4_check_clientid(&owner->clientid, 0);
7321                 goto end;
7322         }
7323
7324         if (rfs4_lease_expired(cp)) {
7325                 rfs4_client_close(cp);
7326                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7327                 goto end;
7328         }
7329         can_reclaim = cp->rc_can_reclaim;
7330
7331         /*
7332          * Find the open_owner for use from this point forward.  Take
7333          * care in updating the sequence id based on the type of error
7334          * being returned.
7335          */
7336 retry:
7337         create = TRUE;
7338         oo = rfs4_findopenowner(owner, &create, args->seqid);
7339         if (oo == NULL) {
7340                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
7341                 rfs4_client_rele(cp);
7342                 goto end;
7343         }
7344
7345         /* Hold off access to the sequence space while the open is done */
7346         rfs4_sw_enter(&oo->ro_sw);
7347
7348         /*
7349          * If the open_owner existed before at the server, then check
7350          * the sequence id.
7351          */
7352         if (!create && !oo->ro_postpone_confirm) {
7353                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7354                 case NFS4_CHKSEQ_BAD:
7355                         if ((args->seqid > oo->ro_open_seqid) &&
7356                             oo->ro_need_confirm) {
7357                                 rfs4_free_opens(oo, TRUE, FALSE);
7358                                 rfs4_sw_exit(&oo->ro_sw);
7359                                 rfs4_openowner_rele(oo);
7360                                 goto retry;
7361                         }
7362                         resp->status = NFS4ERR_BAD_SEQID;
7363                         goto out;
7364                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7365                         replay = TRUE;
7366                         goto out;
7367                 default:
7368                         break;
7369                 }
7370
7371                 /*
7372                  * Sequence was ok and open owner exists
7373                  * check to see if we have yet to see an
7374                  * open_confirm.
7375                  */
7376                 if (oo->ro_need_confirm) {
7377                         rfs4_free_opens(oo, TRUE, FALSE);
7378                         rfs4_sw_exit(&oo->ro_sw);
7379                         rfs4_openowner_rele(oo);
7380                         goto retry;
7381                 }
7382         }
7383         /* Grace only applies to regular-type OPENs */
7384         if (rfs4_clnt_in_grace(cp) &&
7385             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7386                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7387                 goto out;
7388         }
7389
7390         /*
7391          * If previous state at the server existed then can_reclaim
7392          * will be set. If not reply NFS4ERR_NO_GRACE to the
7393          * client.
7394          */
7395         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7396                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7397                 goto out;
7398         }
7399
7400
7401         /*
7402          * Reject the open if the client has missed the grace period
7403          */
7404         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7405                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7406                 goto out;
7407         }
7408
7409         /* Couple of up-front bookkeeping items */
7410         if (oo->ro_need_confirm) {
7411                 /*
7412                  * If this is a reclaim OPEN then we should not ask
7413                  * for a confirmation of the open_owner per the
7414                  * protocol specification.
7415                  */
7416                 if (claim == CLAIM_PREVIOUS)
7417                         oo->ro_need_confirm = FALSE;
7418                 else
7419                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7420         }
7421         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7422
7423         /*
7424          * If there is an unshared filesystem mounted on this vnode,
7425          * do not allow to open/create in this directory.
7426          */
7427         if (vn_ismntpt(cs->vp)) {
7428                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7429                 goto out;
7430         }
7431
7432         /*
7433          * access must READ, WRITE, or BOTH.  No access is invalid.
7434          * deny can be READ, WRITE, BOTH, or NONE.
7435          * bits not defined for access/deny are invalid.
7436          */
7437         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7438             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7439             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7440                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7441                 goto out;
7442         }
7443
7444
7445         /*
7446          * make sure attrset is zero before response is built.
7447          */
7448         resp->attrset = 0;
7449
7450         switch (claim) {
7451         case CLAIM_NULL:
7452                 rfs4_do_opennull(cs, req, args, oo, resp);
7453                 break;
7454         case CLAIM_PREVIOUS:
7455                 rfs4_do_openprev(cs, req, args, oo, resp);
7456                 break;
7457         case CLAIM_DELEGATE_CUR:
7458                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7459                 break;
7460         case CLAIM_DELEGATE_PREV:
7461                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7462                 break;
7463         default:
7464                 resp->status = NFS4ERR_INVAL;
7465                 break;
7466         }
7467
7468 out:
7469         rfs4_client_rele(cp);
7470
7471         /* Catch sequence id handling here to make it a little easier */
7472         switch (resp->status) {
7473         case NFS4ERR_BADXDR:
7474         case NFS4ERR_BAD_SEQID:
7475         case NFS4ERR_BAD_STATEID:
7476         case NFS4ERR_NOFILEHANDLE:
7477         case NFS4ERR_RESOURCE:
7478         case NFS4ERR_STALE_CLIENTID:
7479         case NFS4ERR_STALE_STATEID:
7480                 /*
7481                  * The protocol states that if any of these errors are
7482                  * being returned, the sequence id should not be
7483                  * incremented.  Any other return requires an
7484                  * increment.
7485                  */
7486                 break;
7487         default:
7488                 /* Always update the lease in this case */
7489                 rfs4_update_lease(oo->ro_client);
7490
7491                 /* Regular response - copy the result */
7492                 if (!replay)
7493                         rfs4_update_open_resp(oo, resop, &cs->fh);
7494
7495                 /*
7496                  * REPLAY case: Only if the previous response was OK
7497                  * do we copy the filehandle.  If not OK, no
7498                  * filehandle to copy.
7499                  */
7500                 if (replay == TRUE &&
7501                     resp->status == NFS4_OK &&
7502                     oo->ro_reply_fh.nfs_fh4_val) {
7503                         /*
7504                          * If this is a replay, we must restore the
7505                          * current filehandle/vp to that of what was
7506                          * returned originally.  Try our best to do
7507                          * it.
7508                          */
7509                         nfs_fh4_fmt_t *fh_fmtp =
7510                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7511
7512                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7513                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7514
7515                         if (cs->exi == NULL) {
7516                                 resp->status = NFS4ERR_STALE;
7517                                 goto finish;
7518                         }
7519
7520                         VN_RELE(cs->vp);
7521
7522                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7523                             &resp->status);
7524
7525                         if (cs->vp == NULL)
7526                                 goto finish;
7527
7528                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7529                 }
7530
7531                 /*
7532                  * If this was a replay, no need to update the
7533                  * sequence id. If the open_owner was not created on
7534                  * this pass, then update.  The first use of an
7535                  * open_owner will not bump the sequence id.
7536                  */
7537                 if (replay == FALSE && !create)
7538                         rfs4_update_open_sequence(oo);
7539                 /*
7540                  * If the client is receiving an error and the
7541                  * open_owner needs to be confirmed, there is no way
7542                  * to notify the client of this fact ignoring the fact
7543                  * that the server has no method of returning a
7544                  * stateid to confirm.  Therefore, the server needs to
7545                  * mark this open_owner in a way as to avoid the
7546                  * sequence id checking the next time the client uses
7547                  * this open_owner.
7548                  */
7549                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7550                         oo->ro_postpone_confirm = TRUE;
7551                 /*
7552                  * If OK response then clear the postpone flag and
7553                  * reset the sequence id to keep in sync with the
7554                  * client.
7555                  */
7556                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7557                         oo->ro_postpone_confirm = FALSE;
7558                         oo->ro_open_seqid = args->seqid;
7559                 }
7560                 break;
7561         }
7562
7563 finish:
7564         *cs->statusp = resp->status;
7565
7566         rfs4_sw_exit(&oo->ro_sw);
7567         rfs4_openowner_rele(oo);
7568
7569 end:
7570         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7571             OPEN4res *, resp);
7572 }
7573
7574 /*ARGSUSED*/
7575 void
7576 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7577     struct svc_req *req, struct compound_state *cs)
7578 {
7579         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7580         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7581         rfs4_state_t *sp;
7582         nfsstat4 status;
7583
7584         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7585             OPEN_CONFIRM4args *, args);
7586
7587         if (cs->vp == NULL) {
7588                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7589                 goto out;
7590         }
7591
7592         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7593         if (status != NFS4_OK) {
7594                 *cs->statusp = resp->status = status;
7595                 goto out;
7596         }
7597
7598         /* Ensure specified filehandle matches */
7599         if (cs->vp != sp->rs_finfo->rf_vp) {
7600                 rfs4_state_rele(sp);
7601                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7602                 goto out;
7603         }
7604
7605         /* hold off other access to open_owner while we tinker */
7606         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7607
7608         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7609         case NFS4_CHECK_STATEID_OKAY:
7610                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7611                     resop) != 0) {
7612                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7613                         break;
7614                 }
7615                 /*
7616                  * If it is the appropriate stateid and determined to
7617                  * be "OKAY" then this means that the stateid does not
7618                  * need to be confirmed and the client is in error for
7619                  * sending an OPEN_CONFIRM.
7620                  */
7621                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7622                 break;
7623         case NFS4_CHECK_STATEID_OLD:
7624                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7625                 break;
7626         case NFS4_CHECK_STATEID_BAD:
7627                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7628                 break;
7629         case NFS4_CHECK_STATEID_EXPIRED:
7630                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7631                 break;
7632         case NFS4_CHECK_STATEID_CLOSED:
7633                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7634                 break;
7635         case NFS4_CHECK_STATEID_REPLAY:
7636                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7637                     resop)) {
7638                 case NFS4_CHKSEQ_OKAY:
7639                         /*
7640                          * This is replayed stateid; if seqid matches
7641                          * next expected, then client is using wrong seqid.
7642                          */
7643                         /* fall through */
7644                 case NFS4_CHKSEQ_BAD:
7645                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7646                         break;
7647                 case NFS4_CHKSEQ_REPLAY:
7648                         /*
7649                          * Note this case is the duplicate case so
7650                          * resp->status is already set.
7651                          */
7652                         *cs->statusp = resp->status;
7653                         rfs4_update_lease(sp->rs_owner->ro_client);
7654                         break;
7655                 }
7656                 break;
7657         case NFS4_CHECK_STATEID_UNCONFIRMED:
7658                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7659                     resop) != NFS4_CHKSEQ_OKAY) {
7660                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7661                         break;
7662                 }
7663                 *cs->statusp = resp->status = NFS4_OK;
7664
7665                 next_stateid(&sp->rs_stateid);
7666                 resp->open_stateid = sp->rs_stateid.stateid;
7667                 sp->rs_owner->ro_need_confirm = FALSE;
7668                 rfs4_update_lease(sp->rs_owner->ro_client);
7669                 rfs4_update_open_sequence(sp->rs_owner);
7670                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7671                 break;
7672         default:
7673                 ASSERT(FALSE);
7674                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7675                 break;
7676         }
7677         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7678         rfs4_state_rele(sp);
7679
7680 out:
7681         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7682             OPEN_CONFIRM4res *, resp);
7683 }
7684
7685 /*ARGSUSED*/
7686 void
7687 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7688     struct svc_req *req, struct compound_state *cs)
7689 {
7690         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7691         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7692         uint32_t access = args->share_access;
7693         uint32_t deny = args->share_deny;
7694         nfsstat4 status;
7695         rfs4_state_t *sp;
7696         rfs4_file_t *fp;
7697         int fflags = 0;
7698
7699         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7700             OPEN_DOWNGRADE4args *, args);
7701
7702         if (cs->vp == NULL) {
7703                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7704                 goto out;
7705         }
7706
7707         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7708         if (status != NFS4_OK) {
7709                 *cs->statusp = resp->status = status;
7710                 goto out;
7711         }
7712
7713         /* Ensure specified filehandle matches */
7714         if (cs->vp != sp->rs_finfo->rf_vp) {
7715                 rfs4_state_rele(sp);
7716                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7717                 goto out;
7718         }
7719
7720         /* hold off other access to open_owner while we tinker */
7721         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7722
7723         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7724         case NFS4_CHECK_STATEID_OKAY:
7725                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7726                     resop) != NFS4_CHKSEQ_OKAY) {
7727                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7728                         goto end;
7729                 }
7730                 break;
7731         case NFS4_CHECK_STATEID_OLD:
7732                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7733                 goto end;
7734         case NFS4_CHECK_STATEID_BAD:
7735                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7736                 goto end;
7737         case NFS4_CHECK_STATEID_EXPIRED:
7738                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7739                 goto end;
7740         case NFS4_CHECK_STATEID_CLOSED:
7741                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7742                 goto end;
7743         case NFS4_CHECK_STATEID_UNCONFIRMED:
7744                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7745                 goto end;
7746         case NFS4_CHECK_STATEID_REPLAY:
7747                 /* Check the sequence id for the open owner */
7748                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7749                     resop)) {
7750                 case NFS4_CHKSEQ_OKAY:
7751                         /*
7752                          * This is replayed stateid; if seqid matches
7753                          * next expected, then client is using wrong seqid.
7754                          */
7755                         /* fall through */
7756                 case NFS4_CHKSEQ_BAD:
7757                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7758                         goto end;
7759                 case NFS4_CHKSEQ_REPLAY:
7760                         /*
7761                          * Note this case is the duplicate case so
7762                          * resp->status is already set.
7763                          */
7764                         *cs->statusp = resp->status;
7765                         rfs4_update_lease(sp->rs_owner->ro_client);
7766                         goto end;
7767                 }
7768                 break;
7769         default:
7770                 ASSERT(FALSE);
7771                 break;
7772         }
7773
7774         rfs4_dbe_lock(sp->rs_dbe);
7775         /*
7776          * Check that the new access modes and deny modes are valid.
7777          * Check that no invalid bits are set.
7778          */
7779         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7780             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7781                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7782                 rfs4_update_open_sequence(sp->rs_owner);
7783                 rfs4_dbe_unlock(sp->rs_dbe);
7784                 goto end;
7785         }
7786
7787         /*
7788          * The new modes must be a subset of the current modes and
7789          * the access must specify at least one mode. To test that
7790          * the new mode is a subset of the current modes we bitwise
7791          * AND them together and check that the result equals the new
7792          * mode. For example:
7793          * New mode, access == R and current mode, sp->rs_open_access  == RW
7794          * access & sp->rs_open_access == R == access, so the new access mode
7795          * is valid. Consider access == RW, sp->rs_open_access = R
7796          * access & sp->rs_open_access == R != access, so the new access mode
7797          * is invalid.
7798          */
7799         if ((access & sp->rs_open_access) != access ||
7800             (deny & sp->rs_open_deny) != deny ||
7801             (access &
7802             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7803                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7804                 rfs4_update_open_sequence(sp->rs_owner);
7805                 rfs4_dbe_unlock(sp->rs_dbe);
7806                 goto end;
7807         }
7808
7809         /*
7810          * Release any share locks associated with this stateID.
7811          * Strictly speaking, this violates the spec because the
7812          * spec effectively requires that open downgrade be atomic.
7813          * At present, fs_shrlock does not have this capability.
7814          */
7815         (void) rfs4_unshare(sp);
7816
7817         status = rfs4_share(sp, access, deny);
7818         if (status != NFS4_OK) {
7819                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7820                 rfs4_update_open_sequence(sp->rs_owner);
7821                 rfs4_dbe_unlock(sp->rs_dbe);
7822                 goto end;
7823         }
7824
7825         fp = sp->rs_finfo;
7826         rfs4_dbe_lock(fp->rf_dbe);
7827
7828         /*
7829          * If the current mode has deny read and the new mode
7830          * does not, decrement the number of deny read mode bits
7831          * and if it goes to zero turn off the deny read bit
7832          * on the file.
7833          */
7834         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7835             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7836                 fp->rf_deny_read--;
7837                 if (fp->rf_deny_read == 0)
7838                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7839         }
7840
7841         /*
7842          * If the current mode has deny write and the new mode
7843          * does not, decrement the number of deny write mode bits
7844          * and if it goes to zero turn off the deny write bit
7845          * on the file.
7846          */
7847         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7848             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7849                 fp->rf_deny_write--;
7850                 if (fp->rf_deny_write == 0)
7851                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7852         }
7853
7854         /*
7855          * If the current mode has access read and the new mode
7856          * does not, decrement the number of access read mode bits
7857          * and if it goes to zero turn off the access read bit
7858          * on the file.  set fflags to FREAD for the call to
7859          * vn_open_downgrade().
7860          */
7861         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7862             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7863                 fp->rf_access_read--;
7864                 if (fp->rf_access_read == 0)
7865                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7866                 fflags |= FREAD;
7867         }
7868
7869         /*
7870          * If the current mode has access write and the new mode
7871          * does not, decrement the number of access write mode bits
7872          * and if it goes to zero turn off the access write bit
7873          * on the file.  set fflags to FWRITE for the call to
7874          * vn_open_downgrade().
7875          */
7876         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7877             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7878                 fp->rf_access_write--;
7879                 if (fp->rf_access_write == 0)
7880                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7881                 fflags |= FWRITE;
7882         }
7883
7884         /* Check that the file is still accessible */
7885         ASSERT(fp->rf_share_access);
7886
7887         rfs4_dbe_unlock(fp->rf_dbe);
7888
7889         /* now set the new open access and deny modes */
7890         sp->rs_open_access = access;
7891         sp->rs_open_deny = deny;
7892
7893         /*
7894          * we successfully downgraded the share lock, now we need to downgrade
7895          * the open. it is possible that the downgrade was only for a deny
7896          * mode and we have nothing else to do.
7897          */
7898         if ((fflags & (FREAD|FWRITE)) != 0)
7899                 vn_open_downgrade(cs->vp, fflags);
7900
7901         /* Update the stateid */
7902         next_stateid(&sp->rs_stateid);
7903         resp->open_stateid = sp->rs_stateid.stateid;
7904
7905         rfs4_dbe_unlock(sp->rs_dbe);
7906
7907         *cs->statusp = resp->status = NFS4_OK;
7908         /* Update the lease */
7909         rfs4_update_lease(sp->rs_owner->ro_client);
7910         /* And the sequence */
7911         rfs4_update_open_sequence(sp->rs_owner);
7912         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7913
7914 end:
7915         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7916         rfs4_state_rele(sp);
7917 out:
7918         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7919             OPEN_DOWNGRADE4res *, resp);
7920 }
7921
7922 /*
7923  * The logic behind this function is detailed in the NFSv4 RFC in the
7924  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7925  * that section for explicit guidance to server behavior for
7926  * SETCLIENTID.
7927  */
7928 void
7929 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7930     struct svc_req *req, struct compound_state *cs)
7931 {
7932         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7933         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7934         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7935         rfs4_clntip_t *ci;
7936         bool_t create;
7937         char *addr, *netid;
7938         int len;
7939
7940         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7941             SETCLIENTID4args *, args);
7942 retry:
7943         newcp = cp_confirmed = cp_unconfirmed = NULL;
7944
7945         /*
7946          * Save the caller's IP address
7947          */
7948         args->client.cl_addr =
7949             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7950
7951         /*
7952          * Record if it is a Solaris client that cannot handle referrals.
7953          */
7954         if (strstr(args->client.id_val, "Solaris") &&
7955             !strstr(args->client.id_val, "+referrals")) {
7956                 /* Add a "yes, it's downrev" record */
7957                 create = TRUE;
7958                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7959                 ASSERT(ci != NULL);
7960                 rfs4_dbe_rele(ci->ri_dbe);
7961         } else {
7962                 /* Remove any previous record */
7963                 rfs4_invalidate_clntip(args->client.cl_addr);
7964         }
7965
7966         /*
7967          * In search of an EXISTING client matching the incoming
7968          * request to establish a new client identifier at the server
7969          */
7970         create = TRUE;
7971         cp = rfs4_findclient(&args->client, &create, NULL);
7972
7973         /* Should never happen */
7974         ASSERT(cp != NULL);
7975
7976         if (cp == NULL) {
7977                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7978                 goto out;
7979         }
7980
7981         /*
7982          * Easiest case. Client identifier is newly created and is
7983          * unconfirmed.  Also note that for this case, no other
7984          * entries exist for the client identifier.  Nothing else to
7985          * check.  Just setup the response and respond.
7986          */
7987         if (create) {
7988                 *cs->statusp = res->status = NFS4_OK;
7989                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
7990                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7991                     cp->rc_confirm_verf;
7992                 /* Setup callback information; CB_NULL confirmation later */
7993                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7994
7995                 rfs4_client_rele(cp);
7996                 goto out;
7997         }
7998
7999         /*
8000          * An existing, confirmed client may exist but it may not have
8001          * been active for at least one lease period.  If so, then
8002          * "close" the client and create a new client identifier
8003          */
8004         if (rfs4_lease_expired(cp)) {
8005                 rfs4_client_close(cp);
8006                 goto retry;
8007         }
8008
8009         if (cp->rc_need_confirm == TRUE)
8010                 cp_unconfirmed = cp;
8011         else
8012                 cp_confirmed = cp;
8013
8014         cp = NULL;
8015
8016         /*
8017          * We have a confirmed client, now check for an
8018          * unconfimred entry
8019          */
8020         if (cp_confirmed) {
8021                 /* If creds don't match then client identifier is inuse */
8022                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8023                         rfs4_cbinfo_t *cbp;
8024                         /*
8025                          * Some one else has established this client
8026                          * id. Try and say * who they are. We will use
8027                          * the call back address supplied by * the
8028                          * first client.
8029                          */
8030                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8031
8032                         addr = netid = NULL;
8033
8034                         cbp = &cp_confirmed->rc_cbinfo;
8035                         if (cbp->cb_callback.cb_location.r_addr &&
8036                             cbp->cb_callback.cb_location.r_netid) {
8037                                 cb_client4 *cbcp = &cbp->cb_callback;
8038
8039                                 len = strlen(cbcp->cb_location.r_addr)+1;
8040                                 addr = kmem_alloc(len, KM_SLEEP);
8041                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8042                                 len = strlen(cbcp->cb_location.r_netid)+1;
8043                                 netid = kmem_alloc(len, KM_SLEEP);
8044                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8045                         }
8046
8047                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8048                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8049
8050                         rfs4_client_rele(cp_confirmed);
8051                 }
8052
8053                 /*
8054                  * Confirmed, creds match, and verifier matches; must
8055                  * be an update of the callback info
8056                  */
8057                 if (cp_confirmed->rc_nfs_client.verifier ==
8058                     args->client.verifier) {
8059                         /* Setup callback information */
8060                         rfs4_client_setcb(cp_confirmed, &args->callback,
8061                             args->callback_ident);
8062
8063                         /* everything okay -- move ahead */
8064                         *cs->statusp = res->status = NFS4_OK;
8065                         res->SETCLIENTID4res_u.resok4.clientid =
8066                             cp_confirmed->rc_clientid;
8067
8068                         /* update the confirm_verifier and return it */
8069                         rfs4_client_scv_next(cp_confirmed);
8070                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8071                             cp_confirmed->rc_confirm_verf;
8072
8073                         rfs4_client_rele(cp_confirmed);
8074                         goto out;
8075                 }
8076
8077                 /*
8078                  * Creds match but the verifier doesn't.  Must search
8079                  * for an unconfirmed client that would be replaced by
8080                  * this request.
8081                  */
8082                 create = FALSE;
8083                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8084                     cp_confirmed);
8085         }
8086
8087         /*
8088          * At this point, we have taken care of the brand new client
8089          * struct, INUSE case, update of an existing, and confirmed
8090          * client struct.
8091          */
8092
8093         /*
8094          * check to see if things have changed while we originally
8095          * picked up the client struct.  If they have, then return and
8096          * retry the processing of this SETCLIENTID request.
8097          */
8098         if (cp_unconfirmed) {
8099                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8100                 if (!cp_unconfirmed->rc_need_confirm) {
8101                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8102                         rfs4_client_rele(cp_unconfirmed);
8103                         if (cp_confirmed)
8104                                 rfs4_client_rele(cp_confirmed);
8105                         goto retry;
8106                 }
8107                 /* do away with the old unconfirmed one */
8108                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8109                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8110                 rfs4_client_rele(cp_unconfirmed);
8111                 cp_unconfirmed = NULL;
8112         }
8113
8114         /*
8115          * This search will temporarily hide the confirmed client
8116          * struct while a new client struct is created as the
8117          * unconfirmed one.
8118          */
8119         create = TRUE;
8120         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8121
8122         ASSERT(newcp != NULL);
8123
8124         if (newcp == NULL) {
8125                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8126                 rfs4_client_rele(cp_confirmed);
8127                 goto out;
8128         }
8129
8130         /*
8131          * If one was not created, then a similar request must be in
8132          * process so release and start over with this one
8133          */
8134         if (create != TRUE) {
8135                 rfs4_client_rele(newcp);
8136                 if (cp_confirmed)
8137                         rfs4_client_rele(cp_confirmed);
8138                 goto retry;
8139         }
8140
8141         *cs->statusp = res->status = NFS4_OK;
8142         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8143         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8144             newcp->rc_confirm_verf;
8145         /* Setup callback information; CB_NULL confirmation later */
8146         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8147
8148         newcp->rc_cp_confirmed = cp_confirmed;
8149
8150         rfs4_client_rele(newcp);
8151
8152 out:
8153         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8154             SETCLIENTID4res *, res);
8155 }
8156
8157 /*ARGSUSED*/
8158 void
8159 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8160     struct svc_req *req, struct compound_state *cs)
8161 {
8162         SETCLIENTID_CONFIRM4args *args =
8163             &argop->nfs_argop4_u.opsetclientid_confirm;
8164         SETCLIENTID_CONFIRM4res *res =
8165             &resop->nfs_resop4_u.opsetclientid_confirm;
8166         rfs4_client_t *cp, *cptoclose = NULL;
8167
8168         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8169             struct compound_state *, cs,
8170             SETCLIENTID_CONFIRM4args *, args);
8171
8172         *cs->statusp = res->status = NFS4_OK;
8173
8174         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8175
8176         if (cp == NULL) {
8177                 *cs->statusp = res->status =
8178                     rfs4_check_clientid(&args->clientid, 1);
8179                 goto out;
8180         }
8181
8182         if (!creds_ok(cp, req, cs)) {
8183                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8184                 rfs4_client_rele(cp);
8185                 goto out;
8186         }
8187
8188         /* If the verifier doesn't match, the record doesn't match */
8189         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8190                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8191                 rfs4_client_rele(cp);
8192                 goto out;
8193         }
8194
8195         rfs4_dbe_lock(cp->rc_dbe);
8196         cp->rc_need_confirm = FALSE;
8197         if (cp->rc_cp_confirmed) {
8198                 cptoclose = cp->rc_cp_confirmed;
8199                 cptoclose->rc_ss_remove = 1;
8200                 cp->rc_cp_confirmed = NULL;
8201         }
8202
8203         /*
8204          * Update the client's associated server instance, if it's changed
8205          * since the client was created.
8206          */
8207         if (rfs4_servinst(cp) != rfs4_cur_servinst)
8208                 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8209
8210         /*
8211          * Record clientid in stable storage.
8212          * Must be done after server instance has been assigned.
8213          */
8214         rfs4_ss_clid(cp);
8215
8216         rfs4_dbe_unlock(cp->rc_dbe);
8217
8218         if (cptoclose)
8219                 /* don't need to rele, client_close does it */
8220                 rfs4_client_close(cptoclose);
8221
8222         /* If needed, initiate CB_NULL call for callback path */
8223         rfs4_deleg_cb_check(cp);
8224         rfs4_update_lease(cp);
8225
8226         /*
8227          * Check to see if client can perform reclaims
8228          */
8229         rfs4_ss_chkclid(cp);
8230
8231         rfs4_client_rele(cp);
8232
8233 out:
8234         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8235             struct compound_state *, cs,
8236             SETCLIENTID_CONFIRM4 *, res);
8237 }
8238
8239
8240 /*ARGSUSED*/
8241 void
8242 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8243     struct svc_req *req, struct compound_state *cs)
8244 {
8245         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8246         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8247         rfs4_state_t *sp;
8248         nfsstat4 status;
8249
8250         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8251             CLOSE4args *, args);
8252
8253         if (cs->vp == NULL) {
8254                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8255                 goto out;
8256         }
8257
8258         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8259         if (status != NFS4_OK) {
8260                 *cs->statusp = resp->status = status;
8261                 goto out;
8262         }
8263
8264         /* Ensure specified filehandle matches */
8265         if (cs->vp != sp->rs_finfo->rf_vp) {
8266                 rfs4_state_rele(sp);
8267                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8268                 goto out;
8269         }
8270
8271         /* hold off other access to open_owner while we tinker */
8272         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8273
8274         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8275         case NFS4_CHECK_STATEID_OKAY:
8276                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8277                     resop) != NFS4_CHKSEQ_OKAY) {
8278                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8279                         goto end;
8280                 }
8281                 break;
8282         case NFS4_CHECK_STATEID_OLD:
8283                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8284                 goto end;
8285         case NFS4_CHECK_STATEID_BAD:
8286                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8287                 goto end;
8288         case NFS4_CHECK_STATEID_EXPIRED:
8289                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8290                 goto end;
8291         case NFS4_CHECK_STATEID_CLOSED:
8292                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8293                 goto end;
8294         case NFS4_CHECK_STATEID_UNCONFIRMED:
8295                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8296                 goto end;
8297         case NFS4_CHECK_STATEID_REPLAY:
8298                 /* Check the sequence id for the open owner */
8299                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8300                     resop)) {
8301                 case NFS4_CHKSEQ_OKAY:
8302                         /*
8303                          * This is replayed stateid; if seqid matches
8304                          * next expected, then client is using wrong seqid.
8305                          */
8306                         /* FALL THROUGH */
8307                 case NFS4_CHKSEQ_BAD:
8308                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8309                         goto end;
8310                 case NFS4_CHKSEQ_REPLAY:
8311                         /*
8312                          * Note this case is the duplicate case so
8313                          * resp->status is already set.
8314                          */
8315                         *cs->statusp = resp->status;
8316                         rfs4_update_lease(sp->rs_owner->ro_client);
8317                         goto end;
8318                 }
8319                 break;
8320         default:
8321                 ASSERT(FALSE);
8322                 break;
8323         }
8324
8325         rfs4_dbe_lock(sp->rs_dbe);
8326
8327         /* Update the stateid. */
8328         next_stateid(&sp->rs_stateid);
8329         resp->open_stateid = sp->rs_stateid.stateid;
8330
8331         rfs4_dbe_unlock(sp->rs_dbe);
8332
8333         rfs4_update_lease(sp->rs_owner->ro_client);
8334         rfs4_update_open_sequence(sp->rs_owner);
8335         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8336
8337         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8338
8339         *cs->statusp = resp->status = status;
8340
8341 end:
8342         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8343         rfs4_state_rele(sp);
8344 out:
8345         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8346             CLOSE4res *, resp);
8347 }
8348
8349 /*
8350  * Manage the counts on the file struct and close all file locks
8351  */
8352 /*ARGSUSED*/
8353 void
8354 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8355     bool_t close_of_client)
8356 {
8357         rfs4_file_t *fp = sp->rs_finfo;
8358         rfs4_lo_state_t *lsp;
8359         int fflags = 0;
8360
8361         /*
8362          * If this call is part of the larger closing down of client
8363          * state then it is just easier to release all locks
8364          * associated with this client instead of going through each
8365          * individual file and cleaning locks there.
8366          */
8367         if (close_of_client) {
8368                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8369                     !list_is_empty(&sp->rs_lostatelist) &&
8370                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8371                         /* Is the PxFS kernel module loaded? */
8372                         if (lm_remove_file_locks != NULL) {
8373                                 int new_sysid;
8374
8375                                 /* Encode the cluster nodeid in new sysid */
8376                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8377                                 lm_set_nlmid_flk(&new_sysid);
8378
8379                                 /*
8380                                  * This PxFS routine removes file locks for a
8381                                  * client over all nodes of a cluster.
8382                                  */
8383                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8384                                     "lm_remove_file_locks(sysid=0x%x)\n",
8385                                     new_sysid));
8386                                 (*lm_remove_file_locks)(new_sysid);
8387                         } else {
8388                                 struct flock64 flk;
8389
8390                                 /* Release all locks for this client */
8391                                 flk.l_type = F_UNLKSYS;
8392                                 flk.l_whence = 0;
8393                                 flk.l_start = 0;
8394                                 flk.l_len = 0;
8395                                 flk.l_sysid =
8396                                     sp->rs_owner->ro_client->rc_sysidt;
8397                                 flk.l_pid = 0;
8398                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8399                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8400                                     (u_offset_t)0, NULL, CRED(), NULL);
8401                         }
8402
8403                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8404                 }
8405         }
8406
8407         /*
8408          * Release all locks on this file by this lock owner or at
8409          * least mark the locks as having been released
8410          */
8411         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8412             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8413                 lsp->rls_locks_cleaned = TRUE;
8414
8415                 /* Was this already taken care of above? */
8416                 if (!close_of_client &&
8417                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8418                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8419                             lsp->rls_locker->rl_pid,
8420                             lsp->rls_locker->rl_client->rc_sysidt);
8421         }
8422
8423         /*
8424          * Release any shrlocks associated with this open state ID.
8425          * This must be done before the rfs4_state gets marked closed.
8426          */
8427         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8428                 (void) rfs4_unshare(sp);
8429
8430         if (sp->rs_open_access) {
8431                 rfs4_dbe_lock(fp->rf_dbe);
8432
8433                 /*
8434                  * Decrement the count for each access and deny bit that this
8435                  * state has contributed to the file.
8436                  * If the file counts go to zero
8437                  * clear the appropriate bit in the appropriate mask.
8438                  */
8439                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8440                         fp->rf_access_read--;
8441                         fflags |= FREAD;
8442                         if (fp->rf_access_read == 0)
8443                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8444                 }
8445                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8446                         fp->rf_access_write--;
8447                         fflags |= FWRITE;
8448                         if (fp->rf_access_write == 0)
8449                                 fp->rf_share_access &=
8450                                     ~OPEN4_SHARE_ACCESS_WRITE;
8451                 }
8452                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8453                         fp->rf_deny_read--;
8454                         if (fp->rf_deny_read == 0)
8455                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8456                 }
8457                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8458                         fp->rf_deny_write--;
8459                         if (fp->rf_deny_write == 0)
8460                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8461                 }
8462
8463                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8464
8465                 rfs4_dbe_unlock(fp->rf_dbe);
8466
8467                 sp->rs_open_access = 0;
8468                 sp->rs_open_deny = 0;
8469         }
8470 }
8471
8472 /*
8473  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8474  */
8475 static nfsstat4
8476 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8477 {
8478         rfs4_lockowner_t *lo;
8479         rfs4_client_t *cp;
8480         uint32_t len;
8481
8482         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8483         if (lo != NULL) {
8484                 cp = lo->rl_client;
8485                 if (rfs4_lease_expired(cp)) {
8486                         rfs4_lockowner_rele(lo);
8487                         rfs4_dbe_hold(cp->rc_dbe);
8488                         rfs4_client_close(cp);
8489                         return (NFS4ERR_EXPIRED);
8490                 }
8491                 dp->owner.clientid = lo->rl_owner.clientid;
8492                 len = lo->rl_owner.owner_len;
8493                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8494                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8495                 dp->owner.owner_len = len;
8496                 rfs4_lockowner_rele(lo);
8497                 goto finish;
8498         }
8499
8500         /*
8501          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8502          * of the client id contain the boot time for a NFS4 lock. So we
8503          * fabricate and identity by setting clientid to the sysid, and
8504          * the lock owner to the pid.
8505          */
8506         dp->owner.clientid = flk->l_sysid;
8507         len = sizeof (pid_t);
8508         dp->owner.owner_len = len;
8509         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8510         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8511 finish:
8512         dp->offset = flk->l_start;
8513         dp->length = flk->l_len;
8514
8515         if (flk->l_type == F_RDLCK)
8516                 dp->locktype = READ_LT;
8517         else if (flk->l_type == F_WRLCK)
8518                 dp->locktype = WRITE_LT;
8519         else
8520                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8521
8522         return (NFS4_OK);
8523 }
8524
8525 static int
8526 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8527 {
8528         int error;
8529         struct flock64 flk;
8530         int i;
8531         clock_t delaytime;
8532         int cmd;
8533
8534         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8535 retry:
8536         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8537
8538         for (i = 0; i < rfs4_maxlock_tries; i++) {
8539                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8540                 error = VOP_FRLOCK(vp, cmd,
8541                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8542
8543                 if (error != EAGAIN && error != EACCES)
8544                         break;
8545
8546                 if (i < rfs4_maxlock_tries - 1) {
8547                         delay(delaytime);
8548                         delaytime *= 2;
8549                 }
8550         }
8551
8552         if (error == EAGAIN || error == EACCES) {
8553                 /* Get the owner of the lock */
8554                 flk = *flock;
8555                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8556                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag,
8557                     (u_offset_t)0, NULL, cred, NULL) == 0) {
8558                         if (flk.l_type == F_UNLCK) {
8559                                 /* No longer locked, retry */
8560                                 goto retry;
8561                         }
8562                         *flock = flk;
8563                         LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8564                             F_GETLK, &flk);
8565                 }
8566         }
8567
8568         return (error);
8569 }
8570
8571 /*ARGSUSED*/
8572 static nfsstat4
8573 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8574     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8575 {
8576         nfsstat4 status;
8577         rfs4_lockowner_t *lo = lsp->rls_locker;
8578         rfs4_state_t *sp = lsp->rls_state;
8579         struct flock64 flock;
8580         int16_t ltype;
8581         int flag;
8582         int error;
8583         sysid_t sysid;
8584         LOCK4res *lres;
8585
8586         if (rfs4_lease_expired(lo->rl_client)) {
8587                 return (NFS4ERR_EXPIRED);
8588         }
8589
8590         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8591                 return (status);
8592
8593         /* Check for zero length. To lock to end of file use all ones for V4 */
8594         if (length == 0)
8595                 return (NFS4ERR_INVAL);
8596         else if (length == (length4)(~0))
8597                 length = 0;             /* Posix to end of file  */
8598
8599 retry:
8600         rfs4_dbe_lock(sp->rs_dbe);
8601         if (sp->rs_closed) {
8602                 rfs4_dbe_unlock(sp->rs_dbe);
8603                 return (NFS4ERR_OLD_STATEID);
8604         }
8605
8606         if (resop->resop != OP_LOCKU) {
8607                 switch (locktype) {
8608                 case READ_LT:
8609                 case READW_LT:
8610                         if ((sp->rs_share_access
8611                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8612                                 rfs4_dbe_unlock(sp->rs_dbe);
8613
8614                                 return (NFS4ERR_OPENMODE);
8615                         }
8616                         ltype = F_RDLCK;
8617                         break;
8618                 case WRITE_LT:
8619                 case WRITEW_LT:
8620                         if ((sp->rs_share_access
8621                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8622                                 rfs4_dbe_unlock(sp->rs_dbe);
8623
8624                                 return (NFS4ERR_OPENMODE);
8625                         }
8626                         ltype = F_WRLCK;
8627                         break;
8628                 }
8629         } else
8630                 ltype = F_UNLCK;
8631
8632         flock.l_type = ltype;
8633         flock.l_whence = 0;             /* SEEK_SET */
8634         flock.l_start = offset;
8635         flock.l_len = length;
8636         flock.l_sysid = sysid;
8637         flock.l_pid = lsp->rls_locker->rl_pid;
8638
8639         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8640         if (flock.l_len < 0 || flock.l_start < 0) {
8641                 rfs4_dbe_unlock(sp->rs_dbe);
8642                 return (NFS4ERR_INVAL);
8643         }
8644
8645         /*
8646          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8647          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8648          */
8649         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8650
8651         error = setlock(sp->rs_finfo->rf_vp, &flock, flag, cred);
8652         if (error == 0) {
8653                 rfs4_dbe_lock(lsp->rls_dbe);
8654                 next_stateid(&lsp->rls_lockid);
8655                 rfs4_dbe_unlock(lsp->rls_dbe);
8656         }
8657
8658         rfs4_dbe_unlock(sp->rs_dbe);
8659
8660         /*
8661          * N.B. We map error values to nfsv4 errors. This is differrent
8662          * than puterrno4 routine.
8663          */
8664         switch (error) {
8665         case 0:
8666                 status = NFS4_OK;
8667                 break;
8668         case EAGAIN:
8669         case EACCES:            /* Old value */
8670                 /* Can only get here if op is OP_LOCK */
8671                 ASSERT(resop->resop == OP_LOCK);
8672                 lres = &resop->nfs_resop4_u.oplock;
8673                 status = NFS4ERR_DENIED;
8674                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8675                     == NFS4ERR_EXPIRED)
8676                         goto retry;
8677                 break;
8678         case ENOLCK:
8679                 status = NFS4ERR_DELAY;
8680                 break;
8681         case EOVERFLOW:
8682                 status = NFS4ERR_INVAL;
8683                 break;
8684         case EINVAL:
8685                 status = NFS4ERR_NOTSUPP;
8686                 break;
8687         default:
8688                 status = NFS4ERR_SERVERFAULT;
8689                 break;
8690         }
8691
8692         return (status);
8693 }
8694
8695 /*ARGSUSED*/
8696 void
8697 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8698     struct svc_req *req, struct compound_state *cs)
8699 {
8700         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8701         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8702         nfsstat4 status;
8703         stateid4 *stateid;
8704         rfs4_lockowner_t *lo;
8705         rfs4_client_t *cp;
8706         rfs4_state_t *sp = NULL;
8707         rfs4_lo_state_t *lsp = NULL;
8708         bool_t ls_sw_held = FALSE;
8709         bool_t create = TRUE;
8710         bool_t lcreate = TRUE;
8711         bool_t dup_lock = FALSE;
8712         int rc;
8713
8714         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8715             LOCK4args *, args);
8716
8717         if (cs->vp == NULL) {
8718                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8719                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8720                     cs, LOCK4res *, resp);
8721                 return;
8722         }
8723
8724         if (args->locker.new_lock_owner) {
8725                 /* Create a new lockowner for this instance */
8726                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8727
8728                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8729
8730                 stateid = &olo->open_stateid;
8731                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8732                 if (status != NFS4_OK) {
8733                         NFS4_DEBUG(rfs4_debug,
8734                             (CE_NOTE, "Get state failed in lock %d", status));
8735                         *cs->statusp = resp->status = status;
8736                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8737                             cs, LOCK4res *, resp);
8738                         return;
8739                 }
8740
8741                 /* Ensure specified filehandle matches */
8742                 if (cs->vp != sp->rs_finfo->rf_vp) {
8743                         rfs4_state_rele(sp);
8744                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8745                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8746                             cs, LOCK4res *, resp);
8747                         return;
8748                 }
8749
8750                 /* hold off other access to open_owner while we tinker */
8751                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8752
8753                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8754                 case NFS4_CHECK_STATEID_OLD:
8755                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8756                         goto end;
8757                 case NFS4_CHECK_STATEID_BAD:
8758                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8759                         goto end;
8760                 case NFS4_CHECK_STATEID_EXPIRED:
8761                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8762                         goto end;
8763                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8764                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8765                         goto end;
8766                 case NFS4_CHECK_STATEID_CLOSED:
8767                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8768                         goto end;
8769                 case NFS4_CHECK_STATEID_OKAY:
8770                 case NFS4_CHECK_STATEID_REPLAY:
8771                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8772                             sp->rs_owner, resop)) {
8773                         case NFS4_CHKSEQ_OKAY:
8774                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8775                                         break;
8776                                 /*
8777                                  * This is replayed stateid; if seqid
8778                                  * matches next expected, then client
8779                                  * is using wrong seqid.
8780                                  */
8781                                 /* FALLTHROUGH */
8782                         case NFS4_CHKSEQ_BAD:
8783                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8784                                 goto end;
8785                         case NFS4_CHKSEQ_REPLAY:
8786                                 /* This is a duplicate LOCK request */
8787                                 dup_lock = TRUE;
8788
8789                                 /*
8790                                  * For a duplicate we do not want to
8791                                  * create a new lockowner as it should
8792                                  * already exist.
8793                                  * Turn off the lockowner create flag.
8794                                  */
8795                                 lcreate = FALSE;
8796                         }
8797                         break;
8798                 }
8799
8800                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8801                 if (lo == NULL) {
8802                         NFS4_DEBUG(rfs4_debug,
8803                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8804                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8805                         goto end;
8806                 }
8807
8808                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8809                 if (lsp == NULL) {
8810                         rfs4_update_lease(sp->rs_owner->ro_client);
8811                         /*
8812                          * Only update theh open_seqid if this is not
8813                          * a duplicate request
8814                          */
8815                         if (dup_lock == FALSE) {
8816                                 rfs4_update_open_sequence(sp->rs_owner);
8817                         }
8818
8819                         NFS4_DEBUG(rfs4_debug,
8820                             (CE_NOTE, "rfs4_op_lock: no state"));
8821                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8822                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8823                         rfs4_lockowner_rele(lo);
8824                         goto end;
8825                 }
8826
8827                 /*
8828                  * This is the new_lock_owner branch and the client is
8829                  * supposed to be associating a new lock_owner with
8830                  * the open file at this point.  If we find that a
8831                  * lock_owner/state association already exists and a
8832                  * successful LOCK request was returned to the client,
8833                  * an error is returned to the client since this is
8834                  * not appropriate.  The client should be using the
8835                  * existing lock_owner branch.
8836                  */
8837                 if (dup_lock == FALSE && create == FALSE) {
8838                         if (lsp->rls_lock_completed == TRUE) {
8839                                 *cs->statusp =
8840                                     resp->status = NFS4ERR_BAD_SEQID;
8841                                 rfs4_lockowner_rele(lo);
8842                                 goto end;
8843                         }
8844                 }
8845
8846                 rfs4_update_lease(sp->rs_owner->ro_client);
8847
8848                 /*
8849                  * Only update theh open_seqid if this is not
8850                  * a duplicate request
8851                  */
8852                 if (dup_lock == FALSE) {
8853                         rfs4_update_open_sequence(sp->rs_owner);
8854                 }
8855
8856                 /*
8857                  * If this is a duplicate lock request, just copy the
8858                  * previously saved reply and return.
8859                  */
8860                 if (dup_lock == TRUE) {
8861                         /* verify that lock_seqid's match */
8862                         if (lsp->rls_seqid != olo->lock_seqid) {
8863                                 NFS4_DEBUG(rfs4_debug,
8864                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8865                                     "lsp->seqid=%d old->seqid=%d",
8866                                     lsp->rls_seqid, olo->lock_seqid));
8867                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8868                         } else {
8869                                 rfs4_copy_reply(resop, &lsp->rls_reply);
8870                                 /*
8871                                  * Make sure to copy the just
8872                                  * retrieved reply status into the
8873                                  * overall compound status
8874                                  */
8875                                 *cs->statusp = resp->status;
8876                         }
8877                         rfs4_lockowner_rele(lo);
8878                         goto end;
8879                 }
8880
8881                 rfs4_dbe_lock(lsp->rls_dbe);
8882
8883                 /* Make sure to update the lock sequence id */
8884                 lsp->rls_seqid = olo->lock_seqid;
8885
8886                 NFS4_DEBUG(rfs4_debug,
8887                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8888
8889                 /*
8890                  * This is used to signify the newly created lockowner
8891                  * stateid and its sequence number.  The checks for
8892                  * sequence number and increment don't occur on the
8893                  * very first lock request for a lockowner.
8894                  */
8895                 lsp->rls_skip_seqid_check = TRUE;
8896
8897                 /* hold off other access to lsp while we tinker */
8898                 rfs4_sw_enter(&lsp->rls_sw);
8899                 ls_sw_held = TRUE;
8900
8901                 rfs4_dbe_unlock(lsp->rls_dbe);
8902
8903                 rfs4_lockowner_rele(lo);
8904         } else {
8905                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8906                 /* get lsp and hold the lock on the underlying file struct */
8907                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8908                     != NFS4_OK) {
8909                         *cs->statusp = resp->status = status;
8910                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8911                             cs, LOCK4res *, resp);
8912                         return;
8913                 }
8914                 create = FALSE; /* We didn't create lsp */
8915
8916                 /* Ensure specified filehandle matches */
8917                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8918                         rfs4_lo_state_rele(lsp, TRUE);
8919                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8920                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8921                             cs, LOCK4res *, resp);
8922                         return;
8923                 }
8924
8925                 /* hold off other access to lsp while we tinker */
8926                 rfs4_sw_enter(&lsp->rls_sw);
8927                 ls_sw_held = TRUE;
8928
8929                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8930                 /*
8931                  * The stateid looks like it was okay (expected to be
8932                  * the next one)
8933                  */
8934                 case NFS4_CHECK_STATEID_OKAY:
8935                         /*
8936                          * The sequence id is now checked.  Determine
8937                          * if this is a replay or if it is in the
8938                          * expected (next) sequence.  In the case of a
8939                          * replay, there are two replay conditions
8940                          * that may occur.  The first is the normal
8941                          * condition where a LOCK is done with a
8942                          * NFS4_OK response and the stateid is
8943                          * updated.  That case is handled below when
8944                          * the stateid is identified as a REPLAY.  The
8945                          * second is the case where an error is
8946                          * returned, like NFS4ERR_DENIED, and the
8947                          * sequence number is updated but the stateid
8948                          * is not updated.  This second case is dealt
8949                          * with here.  So it may seem odd that the
8950                          * stateid is okay but the sequence id is a
8951                          * replay but it is okay.
8952                          */
8953                         switch (rfs4_check_lock_seqid(
8954                             args->locker.locker4_u.lock_owner.lock_seqid,
8955                             lsp, resop)) {
8956                         case NFS4_CHKSEQ_REPLAY:
8957                                 if (resp->status != NFS4_OK) {
8958                                         /*
8959                                          * Here is our replay and need
8960                                          * to verify that the last
8961                                          * response was an error.
8962                                          */
8963                                         *cs->statusp = resp->status;
8964                                         goto end;
8965                                 }
8966                                 /*
8967                                  * This is done since the sequence id
8968                                  * looked like a replay but it didn't
8969                                  * pass our check so a BAD_SEQID is
8970                                  * returned as a result.
8971                                  */
8972                                 /*FALLTHROUGH*/
8973                         case NFS4_CHKSEQ_BAD:
8974                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8975                                 goto end;
8976                         case NFS4_CHKSEQ_OKAY:
8977                                 /* Everything looks okay move ahead */
8978                                 break;
8979                         }
8980                         break;
8981                 case NFS4_CHECK_STATEID_OLD:
8982                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8983                         goto end;
8984                 case NFS4_CHECK_STATEID_BAD:
8985                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8986                         goto end;
8987                 case NFS4_CHECK_STATEID_EXPIRED:
8988                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8989                         goto end;
8990                 case NFS4_CHECK_STATEID_CLOSED:
8991                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8992                         goto end;
8993                 case NFS4_CHECK_STATEID_REPLAY:
8994                         switch (rfs4_check_lock_seqid(
8995                             args->locker.locker4_u.lock_owner.lock_seqid,
8996                             lsp, resop)) {
8997                         case NFS4_CHKSEQ_OKAY:
8998                                 /*
8999                                  * This is a replayed stateid; if
9000                                  * seqid matches the next expected,
9001                                  * then client is using wrong seqid.
9002                                  */
9003                         case NFS4_CHKSEQ_BAD:
9004                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9005                                 goto end;
9006                         case NFS4_CHKSEQ_REPLAY:
9007                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9008                                 *cs->statusp = status = resp->status;
9009                                 goto end;
9010                         }
9011                         break;
9012                 default:
9013                         ASSERT(FALSE);
9014                         break;
9015                 }
9016
9017                 rfs4_update_lock_sequence(lsp);
9018                 rfs4_update_lease(lsp->rls_locker->rl_client);
9019         }
9020
9021         /*
9022          * NFS4 only allows locking on regular files, so
9023          * verify type of object.
9024          */
9025         if (cs->vp->v_type != VREG) {
9026                 if (cs->vp->v_type == VDIR)
9027                         status = NFS4ERR_ISDIR;
9028                 else
9029                         status = NFS4ERR_INVAL;
9030                 goto out;
9031         }
9032
9033         cp = lsp->rls_state->rs_owner->ro_client;
9034
9035         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9036                 status = NFS4ERR_GRACE;
9037                 goto out;
9038         }
9039
9040         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9041                 status = NFS4ERR_NO_GRACE;
9042                 goto out;
9043         }
9044
9045         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9046                 status = NFS4ERR_NO_GRACE;
9047                 goto out;
9048         }
9049
9050         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9051                 cs->deleg = TRUE;
9052
9053         status = rfs4_do_lock(lsp, args->locktype,
9054             args->offset, args->length, cs->cr, resop);
9055
9056 out:
9057         lsp->rls_skip_seqid_check = FALSE;
9058
9059         *cs->statusp = resp->status = status;
9060
9061         if (status == NFS4_OK) {
9062                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9063                 lsp->rls_lock_completed = TRUE;
9064         }
9065         /*
9066          * Only update the "OPEN" response here if this was a new
9067          * lock_owner
9068          */
9069         if (sp)
9070                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9071
9072         rfs4_update_lock_resp(lsp, resop);
9073
9074 end:
9075         if (lsp) {
9076                 if (ls_sw_held)
9077                         rfs4_sw_exit(&lsp->rls_sw);
9078                 /*
9079                  * If an sp obtained, then the lsp does not represent
9080                  * a lock on the file struct.
9081                  */
9082                 if (sp != NULL)
9083                         rfs4_lo_state_rele(lsp, FALSE);
9084                 else
9085                         rfs4_lo_state_rele(lsp, TRUE);
9086         }
9087         if (sp) {
9088                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9089                 rfs4_state_rele(sp);
9090         }
9091
9092         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9093             LOCK4res *, resp);
9094 }
9095
9096 /* free function for LOCK/LOCKT */
9097 static void
9098 lock_denied_free(nfs_resop4 *resop)
9099 {
9100         LOCK4denied *dp = NULL;
9101
9102         switch (resop->resop) {
9103         case OP_LOCK:
9104                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9105                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9106                 break;
9107         case OP_LOCKT:
9108                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9109                         dp = &resop->nfs_resop4_u.oplockt.denied;
9110                 break;
9111         default:
9112                 break;
9113         }
9114
9115         if (dp)
9116                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9117 }
9118
9119 /*ARGSUSED*/
9120 void
9121 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9122     struct svc_req *req, struct compound_state *cs)
9123 {
9124         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9125         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9126         nfsstat4 status;
9127         stateid4 *stateid = &args->lock_stateid;
9128         rfs4_lo_state_t *lsp;
9129
9130         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9131             LOCKU4args *, args);
9132
9133         if (cs->vp == NULL) {
9134                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9135                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9136                     LOCKU4res *, resp);
9137                 return;
9138         }
9139
9140         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9141                 *cs->statusp = resp->status = status;
9142                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9143                     LOCKU4res *, resp);
9144                 return;
9145         }
9146
9147         /* Ensure specified filehandle matches */
9148         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9149                 rfs4_lo_state_rele(lsp, TRUE);
9150                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9151                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9152                     LOCKU4res *, resp);
9153                 return;
9154         }
9155
9156         /* hold off other access to lsp while we tinker */
9157         rfs4_sw_enter(&lsp->rls_sw);
9158
9159         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9160         case NFS4_CHECK_STATEID_OKAY:
9161                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9162                     != NFS4_CHKSEQ_OKAY) {
9163                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9164                         goto end;
9165                 }
9166                 break;
9167         case NFS4_CHECK_STATEID_OLD:
9168                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9169                 goto end;
9170         case NFS4_CHECK_STATEID_BAD:
9171                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9172                 goto end;
9173         case NFS4_CHECK_STATEID_EXPIRED:
9174                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9175                 goto end;
9176         case NFS4_CHECK_STATEID_CLOSED:
9177                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9178                 goto end;
9179         case NFS4_CHECK_STATEID_REPLAY:
9180                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9181                 case NFS4_CHKSEQ_OKAY:
9182                                 /*
9183                                  * This is a replayed stateid; if
9184                                  * seqid matches the next expected,
9185                                  * then client is using wrong seqid.
9186                                  */
9187                 case NFS4_CHKSEQ_BAD:
9188                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9189                         goto end;
9190                 case NFS4_CHKSEQ_REPLAY:
9191                         rfs4_update_lease(lsp->rls_locker->rl_client);
9192                         *cs->statusp = status = resp->status;
9193                         goto end;
9194                 }
9195                 break;
9196         default:
9197                 ASSERT(FALSE);
9198                 break;
9199         }
9200
9201         rfs4_update_lock_sequence(lsp);
9202         rfs4_update_lease(lsp->rls_locker->rl_client);
9203
9204         /*
9205          * NFS4 only allows locking on regular files, so
9206          * verify type of object.
9207          */
9208         if (cs->vp->v_type != VREG) {
9209                 if (cs->vp->v_type == VDIR)
9210                         status = NFS4ERR_ISDIR;
9211                 else
9212                         status = NFS4ERR_INVAL;
9213                 goto out;
9214         }
9215
9216         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9217                 status = NFS4ERR_GRACE;
9218                 goto out;
9219         }
9220
9221         status = rfs4_do_lock(lsp, args->locktype,
9222             args->offset, args->length, cs->cr, resop);
9223
9224 out:
9225         *cs->statusp = resp->status = status;
9226
9227         if (status == NFS4_OK)
9228                 resp->lock_stateid = lsp->rls_lockid.stateid;
9229
9230         rfs4_update_lock_resp(lsp, resop);
9231
9232 end:
9233         rfs4_sw_exit(&lsp->rls_sw);
9234         rfs4_lo_state_rele(lsp, TRUE);
9235
9236         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9237             LOCKU4res *, resp);
9238 }
9239
9240 /*
9241  * LOCKT is a best effort routine, the client can not be guaranteed that
9242  * the status return is still in effect by the time the reply is received.
9243  * They are numerous race conditions in this routine, but we are not required
9244  * and can not be accurate.
9245  */
9246 /*ARGSUSED*/
9247 void
9248 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9249     struct svc_req *req, struct compound_state *cs)
9250 {
9251         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9252         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9253         rfs4_lockowner_t *lo;
9254         rfs4_client_t *cp;
9255         bool_t create = FALSE;
9256         struct flock64 flk;
9257         int error;
9258         int flag = FREAD | FWRITE;
9259         int ltype;
9260         length4 posix_length;
9261         sysid_t sysid;
9262         pid_t pid;
9263
9264         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9265             LOCKT4args *, args);
9266
9267         if (cs->vp == NULL) {
9268                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9269                 goto out;
9270         }
9271
9272         /*
9273          * NFS4 only allows locking on regular files, so
9274          * verify type of object.
9275          */
9276         if (cs->vp->v_type != VREG) {
9277                 if (cs->vp->v_type == VDIR)
9278                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9279                 else
9280                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9281                 goto out;
9282         }
9283
9284         /*
9285          * Check out the clientid to ensure the server knows about it
9286          * so that we correctly inform the client of a server reboot.
9287          */
9288         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9289             == NULL) {
9290                 *cs->statusp = resp->status =
9291                     rfs4_check_clientid(&args->owner.clientid, 0);
9292                 goto out;
9293         }
9294         if (rfs4_lease_expired(cp)) {
9295                 rfs4_client_close(cp);
9296                 /*
9297                  * Protocol doesn't allow returning NFS4ERR_STALE as
9298                  * other operations do on this check so STALE_CLIENTID
9299                  * is returned instead
9300                  */
9301                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9302                 goto out;
9303         }
9304
9305         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9306                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9307                 rfs4_client_rele(cp);
9308                 goto out;
9309         }
9310         rfs4_client_rele(cp);
9311
9312         resp->status = NFS4_OK;
9313
9314         switch (args->locktype) {
9315         case READ_LT:
9316         case READW_LT:
9317                 ltype = F_RDLCK;
9318                 break;
9319         case WRITE_LT:
9320         case WRITEW_LT:
9321                 ltype = F_WRLCK;
9322                 break;
9323         }
9324
9325         posix_length = args->length;
9326         /* Check for zero length. To lock to end of file use all ones for V4 */
9327         if (posix_length == 0) {
9328                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9329                 goto out;
9330         } else if (posix_length == (length4)(~0)) {
9331                 posix_length = 0;       /* Posix to end of file  */
9332         }
9333
9334         /* Find or create a lockowner */
9335         lo = rfs4_findlockowner(&args->owner, &create);
9336
9337         if (lo) {
9338                 pid = lo->rl_pid;
9339                 if ((resp->status =
9340                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9341                         goto err;
9342         } else {
9343                 pid = 0;
9344                 sysid = lockt_sysid;
9345         }
9346 retry:
9347         flk.l_type = ltype;
9348         flk.l_whence = 0;               /* SEEK_SET */
9349         flk.l_start = args->offset;
9350         flk.l_len = posix_length;
9351         flk.l_sysid = sysid;
9352         flk.l_pid = pid;
9353         flag |= F_REMOTELOCK;
9354
9355         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9356
9357         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9358         if (flk.l_len < 0 || flk.l_start < 0) {
9359                 resp->status = NFS4ERR_INVAL;
9360                 goto err;
9361         }
9362         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9363             NULL, cs->cr, NULL);
9364
9365         /*
9366          * N.B. We map error values to nfsv4 errors. This is differrent
9367          * than puterrno4 routine.
9368          */
9369         switch (error) {
9370         case 0:
9371                 if (flk.l_type == F_UNLCK)
9372                         resp->status = NFS4_OK;
9373                 else {
9374                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9375                                 goto retry;
9376                         resp->status = NFS4ERR_DENIED;
9377                 }
9378                 break;
9379         case EOVERFLOW:
9380                 resp->status = NFS4ERR_INVAL;
9381                 break;
9382         case EINVAL:
9383                 resp->status = NFS4ERR_NOTSUPP;
9384                 break;
9385         default:
9386                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9387                     error);
9388                 resp->status = NFS4ERR_SERVERFAULT;
9389                 break;
9390         }
9391
9392 err:
9393         if (lo)
9394                 rfs4_lockowner_rele(lo);
9395         *cs->statusp = resp->status;
9396 out:
9397         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9398             LOCKT4res *, resp);
9399 }
9400
9401 int
9402 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9403 {
9404         int err;
9405         int cmd;
9406         vnode_t *vp;
9407         struct shrlock shr;
9408         struct shr_locowner shr_loco;
9409         int fflags = 0;
9410
9411         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9412         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9413
9414         if (sp->rs_closed)
9415                 return (NFS4ERR_OLD_STATEID);
9416
9417         vp = sp->rs_finfo->rf_vp;
9418         ASSERT(vp);
9419
9420         shr.s_access = shr.s_deny = 0;
9421
9422         if (access & OPEN4_SHARE_ACCESS_READ) {
9423                 fflags |= FREAD;
9424                 shr.s_access |= F_RDACC;
9425         }
9426         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9427                 fflags |= FWRITE;
9428                 shr.s_access |= F_WRACC;
9429         }
9430         ASSERT(shr.s_access);
9431
9432         if (deny & OPEN4_SHARE_DENY_READ)
9433                 shr.s_deny |= F_RDDNY;
9434         if (deny & OPEN4_SHARE_DENY_WRITE)
9435                 shr.s_deny |= F_WRDNY;
9436
9437         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9438         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9439         shr_loco.sl_pid = shr.s_pid;
9440         shr_loco.sl_id = shr.s_sysid;
9441         shr.s_owner = (caddr_t)&shr_loco;
9442         shr.s_own_len = sizeof (shr_loco);
9443
9444         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9445
9446         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9447         if (err != 0) {
9448                 if (err == EAGAIN)
9449                         err = NFS4ERR_SHARE_DENIED;
9450                 else
9451                         err = puterrno4(err);
9452                 return (err);
9453         }
9454
9455         sp->rs_share_access |= access;
9456         sp->rs_share_deny |= deny;
9457
9458         return (0);
9459 }
9460
9461 int
9462 rfs4_unshare(rfs4_state_t *sp)
9463 {
9464         int err;
9465         struct shrlock shr;
9466         struct shr_locowner shr_loco;
9467
9468         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9469
9470         if (sp->rs_closed || sp->rs_share_access == 0)
9471                 return (0);
9472
9473         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9474         ASSERT(sp->rs_finfo->rf_vp);
9475
9476         shr.s_access = shr.s_deny = 0;
9477         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9478         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9479         shr_loco.sl_pid = shr.s_pid;
9480         shr_loco.sl_id = shr.s_sysid;
9481         shr.s_owner = (caddr_t)&shr_loco;
9482         shr.s_own_len = sizeof (shr_loco);
9483
9484         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9485             NULL);
9486         if (err != 0) {
9487                 err = puterrno4(err);
9488                 return (err);
9489         }
9490
9491         sp->rs_share_access = 0;
9492         sp->rs_share_deny = 0;
9493
9494         return (0);
9495
9496 }
9497
9498 static int
9499 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9500 {
9501         struct clist    *wcl;
9502         count4          count = rok->data_len;
9503         int             wlist_len;
9504
9505         wcl = args->wlist;
9506         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9507                 return (FALSE);
9508         }
9509         wcl = args->wlist;
9510         rok->wlist_len = wlist_len;
9511         rok->wlist = wcl;
9512         return (TRUE);
9513 }
9514
9515 /* tunable to disable server referrals */
9516 int rfs4_no_referrals = 0;
9517
9518 /*
9519  * Find an NFS record in reparse point data.
9520  * Returns 0 for success and <0 or an errno value on failure.
9521  */
9522 int
9523 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9524 {
9525         int err;
9526         char *stype, *val;
9527         nvlist_t *nvl;
9528         nvpair_t *curr;
9529
9530         if ((nvl = reparse_init()) == NULL)
9531                 return (-1);
9532
9533         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9534                 reparse_free(nvl);
9535                 return (err);
9536         }
9537
9538         curr = NULL;
9539         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9540                 if ((stype = nvpair_name(curr)) == NULL) {
9541                         reparse_free(nvl);
9542                         return (-2);
9543                 }
9544                 if (strncasecmp(stype, "NFS", 3) == 0)
9545                         break;
9546         }
9547
9548         if ((curr == NULL) ||
9549             (nvpair_value_string(curr, &val))) {
9550                 reparse_free(nvl);
9551                 return (-3);
9552         }
9553         *nvlp = nvl;
9554         *svcp = stype;
9555         *datap = val;
9556         return (0);
9557 }
9558
9559 int
9560 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9561 {
9562         nvlist_t *nvl;
9563         char *s, *d;
9564
9565         if (rfs4_no_referrals != 0)
9566                 return (B_FALSE);
9567
9568         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9569                 return (B_FALSE);
9570
9571         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9572                 return (B_FALSE);
9573
9574         reparse_free(nvl);
9575
9576         return (B_TRUE);
9577 }
9578
9579 /*
9580  * There is a user-level copy of this routine in ref_subr.c.
9581  * Changes should be kept in sync.
9582  */
9583 static int
9584 nfs4_create_components(char *path, component4 *comp4)
9585 {
9586         int slen, plen, ncomp;
9587         char *ori_path, *nxtc, buf[MAXNAMELEN];
9588
9589         if (path == NULL)
9590                 return (0);
9591
9592         plen = strlen(path) + 1;        /* include the terminator */
9593         ori_path = path;
9594         ncomp = 0;
9595
9596         /* count number of components in the path */
9597         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9598                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9599                         if ((slen = nxtc - path) == 0) {
9600                                 path = nxtc + 1;
9601                                 continue;
9602                         }
9603
9604                         if (comp4 != NULL) {
9605                                 bcopy(path, buf, slen);
9606                                 buf[slen] = '\0';
9607                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9608                         }
9609
9610                         ncomp++;        /* 1 valid component */
9611                         path = nxtc + 1;
9612                 }
9613                 if (*nxtc == '\0' || *nxtc == '\n')
9614                         break;
9615         }
9616
9617         return (ncomp);
9618 }
9619
9620 /*
9621  * There is a user-level copy of this routine in ref_subr.c.
9622  * Changes should be kept in sync.
9623  */
9624 static int
9625 make_pathname4(char *path, pathname4 *pathname)
9626 {
9627         int ncomp;
9628         component4 *comp4;
9629
9630         if (pathname == NULL)
9631                 return (0);
9632
9633         if (path == NULL) {
9634                 pathname->pathname4_val = NULL;
9635                 pathname->pathname4_len = 0;
9636                 return (0);
9637         }
9638
9639         /* count number of components to alloc buffer */
9640         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9641                 pathname->pathname4_val = NULL;
9642                 pathname->pathname4_len = 0;
9643                 return (0);
9644         }
9645         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9646
9647         /* copy components into allocated buffer */
9648         ncomp = nfs4_create_components(path, comp4);
9649
9650         pathname->pathname4_val = comp4;
9651         pathname->pathname4_len = ncomp;
9652
9653         return (ncomp);
9654 }
9655
9656 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9657
9658 fs_locations4 *
9659 fetch_referral(vnode_t *vp, cred_t *cr)
9660 {
9661         nvlist_t *nvl;
9662         char *stype, *sdata;
9663         fs_locations4 *result;
9664         char buf[1024];
9665         size_t bufsize;
9666         XDR xdr;
9667         int err;
9668
9669         /*
9670          * Check attrs to ensure it's a reparse point
9671          */
9672         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9673                 return (NULL);
9674
9675         /*
9676          * Look for an NFS record and get the type and data
9677          */
9678         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9679                 return (NULL);
9680
9681         /*
9682          * With the type and data, upcall to get the referral
9683          */
9684         bufsize = sizeof (buf);
9685         bzero(buf, sizeof (buf));
9686         err = reparse_kderef((const char *)stype, (const char *)sdata,
9687             buf, &bufsize);
9688         reparse_free(nvl);
9689
9690         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9691             char *, stype, char *, sdata, char *, buf, int, err);
9692         if (err) {
9693                 cmn_err(CE_NOTE,
9694                     "reparsed daemon not running: unable to get referral (%d)",
9695                     err);
9696                 return (NULL);
9697         }
9698
9699         /*
9700          * We get an XDR'ed record back from the kderef call
9701          */
9702         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9703         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9704         err = xdr_fs_locations4(&xdr, result);
9705         XDR_DESTROY(&xdr);
9706         if (err != TRUE) {
9707                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9708                     int, err);
9709                 return (NULL);
9710         }
9711
9712         /*
9713          * Look at path to recover fs_root, ignoring the leading '/'
9714          */
9715         (void) make_pathname4(vp->v_path, &result->fs_root);
9716
9717         return (result);
9718 }
9719
9720 char *
9721 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9722 {
9723         fs_locations4 *fsl;
9724         fs_location4 *fs;
9725         char *server, *path, *symbuf;
9726         static char *prefix = "/net/";
9727         int i, size, npaths;
9728         uint_t len;
9729
9730         /* Get the referral */
9731         if ((fsl = fetch_referral(vp, cr)) == NULL)
9732                 return (NULL);
9733
9734         /* Deal with only the first location and first server */
9735         fs = &fsl->locations_val[0];
9736         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9737         if (server == NULL) {
9738                 rfs4_free_fs_locations4(fsl);
9739                 kmem_free(fsl, sizeof (fs_locations4));
9740                 return (NULL);
9741         }
9742
9743         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9744         size = strlen(prefix) + len;
9745         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9746                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9747
9748         /* Allocate the symlink buffer and fill it */
9749         symbuf = kmem_zalloc(size, KM_SLEEP);
9750         (void) strcat(symbuf, prefix);
9751         (void) strcat(symbuf, server);
9752         kmem_free(server, len);
9753
9754         npaths = 0;
9755         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9756                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9757                 if (path == NULL)
9758                         continue;
9759                 (void) strcat(symbuf, "/");
9760                 (void) strcat(symbuf, path);
9761                 npaths++;
9762                 kmem_free(path, len);
9763         }
9764
9765         rfs4_free_fs_locations4(fsl);
9766         kmem_free(fsl, sizeof (fs_locations4));
9767
9768         if (strsz != NULL)
9769                 *strsz = size;
9770         return (symbuf);
9771 }
9772
9773 /*
9774  * Check to see if we have a downrev Solaris client, so that we
9775  * can send it a symlink instead of a referral.
9776  */
9777 int
9778 client_is_downrev(struct svc_req *req)
9779 {
9780         struct sockaddr *ca;
9781         rfs4_clntip_t *ci;
9782         bool_t create = FALSE;
9783         int is_downrev;
9784
9785         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9786         ASSERT(ca);
9787         ci = rfs4_find_clntip(ca, &create);
9788         if (ci == NULL)
9789                 return (0);
9790         is_downrev = ci->ri_no_referrals;
9791         rfs4_dbe_rele(ci->ri_dbe);
9792         return (is_downrev);
9793 }