usr/src/uts/common/fs/nfs/nfs4_srv.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
  24  */
  25
  26
  27 /*
  28  *      Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
  29  *      All Rights Reserved
  30  */
  31
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/cred.h>
  36 #include <sys/buf.h>
  37 #include <sys/vfs.h>
  38 #include <sys/vfs_opreg.h>
  39 #include <sys/vnode.h>
  40 #include <sys/uio.h>
  41 #include <sys/errno.h>
  42 #include <sys/sysmacros.h>
  43 #include <sys/statvfs.h>
  44 #include <sys/kmem.h>
  45 #include <sys/dirent.h>
  46 #include <sys/cmn_err.h>
  47 #include <sys/debug.h>
  48 #include <sys/systeminfo.h>
  49 #include <sys/flock.h>
  50 #include <sys/pathname.h>
  51 #include <sys/nbmlock.h>
  52 #include <sys/share.h>
  53 #include <sys/atomic.h>
  54 #include <sys/policy.h>
  55 #include <sys/fem.h>
  56 #include <sys/sdt.h>
  57 #include <sys/ddi.h>
  58 #include <sys/zone.h>
  59
  60 #include <fs/fs_reparse.h>
  61
  62 #include <rpc/types.h>
  63 #include <rpc/auth.h>
  64 #include <rpc/rpcsec_gss.h>
  65 #include <rpc/svc.h>
  66
  67 #include <nfs/nfs.h>
  68 #include <nfs/export.h>
  69 #include <nfs/nfs_cmd.h>
  70 #include <nfs/lm.h>
  71 #include <nfs/nfs4.h>
  72
  73 #include <sys/strsubr.h>
  74 #include <sys/strsun.h>
  75
  76 #include <inet/common.h>
  77 #include <inet/ip.h>
  78 #include <inet/ip6.h>
  79
  80 #include <sys/tsol/label.h>
  81 #include <sys/tsol/tndb.h>
  82
  83 #define RFS4_MAXLOCK_TRIES 4    /* Try to get the lock this many times */
  84 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
  85 #define RFS4_LOCK_DELAY 10      /* Milliseconds */
  86 static clock_t  rfs4_lock_delay = RFS4_LOCK_DELAY;
  87 extern struct svc_ops rdma_svc_ops;
  88 extern int nfs_loaned_buffers;
  89 /* End of Tunables */
  90
  91 static int rdma_setup_read_data4(READ4args *, READ4res *);
  92
  93 /*
  94  * Used to bump the stateid4.seqid value and show changes in the stateid
  95  */
  96 #define next_stateid(sp) (++(sp)->bits.chgseq)
  97
  98 /*
  99  * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
 100  *      This is used to return NFS4ERR_TOOSMALL when clients specify
 101  *      maxcount that isn't large enough to hold the smallest possible
 102  *      XDR encoded dirent.
 103  *
 104  *          sizeof cookie (8 bytes) +
 105  *          sizeof name_len (4 bytes) +
 106  *          sizeof smallest (padded) name (4 bytes) +
 107  *          sizeof bitmap4_len (12 bytes) +   NOTE: we always encode len=2 bm4
 108  *          sizeof attrlist4_len (4 bytes) +
 109  *          sizeof next boolean (4 bytes)
 110  *
 111  * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
 112  * the smallest possible entry4 (assumes no attrs requested).
 113  *      sizeof nfsstat4 (4 bytes) +
 114  *      sizeof verifier4 (8 bytes) +
 115  *      sizeof entry4list bool (4 bytes) +
 116  *      sizeof entry4   (36 bytes) +
 117  *      sizeof eof bool  (4 bytes)
 118  *
 119  * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
 120  *      VOP_READDIR.  Its value is the size of the maximum possible dirent
 121  *      for solaris.  The DIRENT64_RECLEN macro returns the size of dirent
 122  *      required for a given name length.  MAXNAMELEN is the maximum
 123  *      filename length allowed in Solaris.  The first two DIRENT64_RECLEN()
 124  *      macros are to allow for . and .. entries -- just a minor tweak to try
 125  *      and guarantee that buffer we give to VOP_READDIR will be large enough
 126  *      to hold ., .., and the largest possible solaris dirent64.
 127  */
 128 #define RFS4_MINLEN_ENTRY4 36
 129 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
 130 #define RFS4_MINLEN_RDDIR_BUF \
 131         (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
 132
 133 /*
 134  * It would be better to pad to 4 bytes since that's what XDR would do,
 135  * but the dirents UFS gives us are already padded to 8, so just take
 136  * what we're given.  Dircount is only a hint anyway.  Currently the
 137  * solaris kernel is ASCII only, so there's no point in calling the
 138  * UTF8 functions.
 139  *
 140  * dirent64: named padded to provide 8 byte struct alignment
 141  *      d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
 142  *
 143  * cookie: uint64_t   +  utf8namelen: uint_t  +   utf8name padded to 8 bytes
 144  *
 145  */
 146 #define DIRENT64_TO_DIRCOUNT(dp) \
 147         (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
 148
 149 time_t rfs4_start_time;                 /* Initialized in rfs4_srvrinit */
 150
 151 static sysid_t lockt_sysid;             /* dummy sysid for all LOCKT calls */
 152
 153 u_longlong_t    nfs4_srv_caller_id;
 154 uint_t          nfs4_srv_vkey = 0;
 155
 156 verifier4       Write4verf;
 157 verifier4       Readdir4verf;
 158
 159 void    rfs4_init_compound_state(struct compound_state *);
 160
 161 static void     nullfree(caddr_t);
 162 static void     rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 163                         struct compound_state *);
 164 static void     rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 165                         struct compound_state *);
 166 static void     rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 167                         struct compound_state *);
 168 static void     rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 169                         struct compound_state *);
 170 static void     rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 171                         struct compound_state *);
 172 static void     rfs4_op_create_free(nfs_resop4 *resop);
 173 static void     rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
 174                         struct svc_req *, struct compound_state *);
 175 static void     rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
 176                         struct svc_req *, struct compound_state *);
 177 static void     rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 178                         struct compound_state *);
 179 static void     rfs4_op_getattr_free(nfs_resop4 *);
 180 static void     rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 181                         struct compound_state *);
 182 static void     rfs4_op_getfh_free(nfs_resop4 *);
 183 static void     rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 184                         struct compound_state *);
 185 static void     rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 186                         struct compound_state *);
 187 static void     rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 188                         struct compound_state *);
 189 static void     lock_denied_free(nfs_resop4 *);
 190 static void     rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 191                         struct compound_state *);
 192 static void     rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 193                         struct compound_state *);
 194 static void     rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 195                         struct compound_state *);
 196 static void     rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 197                         struct compound_state *);
 198 static void     rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
 199                                 struct svc_req *req, struct compound_state *cs);
 200 static void     rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 201                         struct compound_state *);
 202 static void     rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 203                         struct compound_state *);
 204 static void     rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
 205                         struct svc_req *, struct compound_state *);
 206 static void     rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
 207                         struct svc_req *, struct compound_state *);
 208 static void     rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 209                         struct compound_state *);
 210 static void     rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 211                         struct compound_state *);
 212 static void     rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 213                         struct compound_state *);
 214 static void     rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 215                         struct compound_state *);
 216 static void     rfs4_op_read_free(nfs_resop4 *);
 217 static void     rfs4_op_readdir_free(nfs_resop4 *resop);
 218 static void     rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 219                         struct compound_state *);
 220 static void     rfs4_op_readlink_free(nfs_resop4 *);
 221 static void     rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
 222                         struct svc_req *, struct compound_state *);
 223 static void     rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 224                         struct compound_state *);
 225 static void     rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 226                         struct compound_state *);
 227 static void     rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 228                         struct compound_state *);
 229 static void     rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 230                         struct compound_state *);
 231 static void     rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 232                         struct compound_state *);
 233 static void     rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 234                         struct compound_state *);
 235 static void     rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 236                         struct compound_state *);
 237 static void     rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 238                         struct compound_state *);
 239 static void     rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
 240                         struct svc_req *, struct compound_state *);
 241 static void     rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
 242                         struct svc_req *req, struct compound_state *);
 243 static void     rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
 244                         struct compound_state *);
 245 static void     rfs4_op_secinfo_free(nfs_resop4 *);
 246
 247 static nfsstat4 check_open_access(uint32_t,
 248                                 struct compound_state *, struct svc_req *);
 249 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
 250 void rfs4_ss_clid(rfs4_client_t *);
 251
 252 /*
 253  * translation table for attrs
 254  */
 255 struct nfs4_ntov_table {
 256         union nfs4_attr_u *na;
 257         uint8_t amap[NFS4_MAXNUM_ATTRS];
 258         int attrcnt;
 259         bool_t vfsstat;
 260 };
 261
 262 static void     nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
 263 static void     nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
 264                                     struct nfs4_svgetit_arg *sargp);
 265
 266 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
 267                     struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
 268                     struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
 269
 270 fem_t           *deleg_rdops;
 271 fem_t           *deleg_wrops;
 272
 273 rfs4_servinst_t *rfs4_cur_servinst = NULL;      /* current server instance */
 274 kmutex_t        rfs4_servinst_lock;     /* protects linked list */
 275 int             rfs4_seen_first_compound;       /* set first time we see one */
 276
 277 /*
 278  * NFS4 op dispatch table
 279  */
 280
 281 struct rfsv4disp {
 282         void    (*dis_proc)();          /* proc to call */
 283         void    (*dis_resfree)();       /* frees space allocated by proc */
 284         int     dis_flags;              /* RPC_IDEMPOTENT, etc... */
 285 };
 286
 287 static struct rfsv4disp rfsv4disptab[] = {
 288         /*
 289          * NFS VERSION 4
 290          */
 291
 292         /* RFS_NULL = 0 */
 293         {rfs4_op_illegal, nullfree, 0},
 294
 295         /* UNUSED = 1 */
 296         {rfs4_op_illegal, nullfree, 0},
 297
 298         /* UNUSED = 2 */
 299         {rfs4_op_illegal, nullfree, 0},
 300
 301         /* OP_ACCESS = 3 */
 302         {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
 303
 304         /* OP_CLOSE = 4 */
 305         {rfs4_op_close, nullfree, 0},
 306
 307         /* OP_COMMIT = 5 */
 308         {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
 309
 310         /* OP_CREATE = 6 */
 311         {rfs4_op_create, nullfree, 0},
 312
 313         /* OP_DELEGPURGE = 7 */
 314         {rfs4_op_delegpurge, nullfree, 0},
 315
 316         /* OP_DELEGRETURN = 8 */
 317         {rfs4_op_delegreturn, nullfree, 0},
 318
 319         /* OP_GETATTR = 9 */
 320         {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
 321
 322         /* OP_GETFH = 10 */
 323         {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
 324
 325         /* OP_LINK = 11 */
 326         {rfs4_op_link, nullfree, 0},
 327
 328         /* OP_LOCK = 12 */
 329         {rfs4_op_lock, lock_denied_free, 0},
 330
 331         /* OP_LOCKT = 13 */
 332         {rfs4_op_lockt, lock_denied_free, 0},
 333
 334         /* OP_LOCKU = 14 */
 335         {rfs4_op_locku, nullfree, 0},
 336
 337         /* OP_LOOKUP = 15 */
 338         {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 339
 340         /* OP_LOOKUPP = 16 */
 341         {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
 342
 343         /* OP_NVERIFY = 17 */
 344         {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
 345
 346         /* OP_OPEN = 18 */
 347         {rfs4_op_open, rfs4_free_reply, 0},
 348
 349         /* OP_OPENATTR = 19 */
 350         {rfs4_op_openattr, nullfree, 0},
 351
 352         /* OP_OPEN_CONFIRM = 20 */
 353         {rfs4_op_open_confirm, nullfree, 0},
 354
 355         /* OP_OPEN_DOWNGRADE = 21 */
 356         {rfs4_op_open_downgrade, nullfree, 0},
 357
 358         /* OP_OPEN_PUTFH = 22 */
 359         {rfs4_op_putfh, nullfree, RPC_ALL},
 360
 361         /* OP_PUTPUBFH = 23 */
 362         {rfs4_op_putpubfh, nullfree, RPC_ALL},
 363
 364         /* OP_PUTROOTFH = 24 */
 365         {rfs4_op_putrootfh, nullfree, RPC_ALL},
 366
 367         /* OP_READ = 25 */
 368         {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
 369
 370         /* OP_READDIR = 26 */
 371         {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
 372
 373         /* OP_READLINK = 27 */
 374         {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
 375
 376         /* OP_REMOVE = 28 */
 377         {rfs4_op_remove, nullfree, 0},
 378
 379         /* OP_RENAME = 29 */
 380         {rfs4_op_rename, nullfree, 0},
 381
 382         /* OP_RENEW = 30 */
 383         {rfs4_op_renew, nullfree, 0},
 384
 385         /* OP_RESTOREFH = 31 */
 386         {rfs4_op_restorefh, nullfree, RPC_ALL},
 387
 388         /* OP_SAVEFH = 32 */
 389         {rfs4_op_savefh, nullfree, RPC_ALL},
 390
 391         /* OP_SECINFO = 33 */
 392         {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
 393
 394         /* OP_SETATTR = 34 */
 395         {rfs4_op_setattr, nullfree, 0},
 396
 397         /* OP_SETCLIENTID = 35 */
 398         {rfs4_op_setclientid, nullfree, 0},
 399
 400         /* OP_SETCLIENTID_CONFIRM = 36 */
 401         {rfs4_op_setclientid_confirm, nullfree, 0},
 402
 403         /* OP_VERIFY = 37 */
 404         {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
 405
 406         /* OP_WRITE = 38 */
 407         {rfs4_op_write, nullfree, 0},
 408
 409         /* OP_RELEASE_LOCKOWNER = 39 */
 410         {rfs4_op_release_lockowner, nullfree, 0},
 411 };
 412
 413 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
 414
 415 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
 416
 417 #ifdef DEBUG
 418
 419 int             rfs4_fillone_debug = 0;
 420 int             rfs4_no_stub_access = 1;
 421 int             rfs4_rddir_debug = 0;
 422
 423 static char    *rfs4_op_string[] = {
 424         "rfs4_op_null",
 425         "rfs4_op_1 unused",
 426         "rfs4_op_2 unused",
 427         "rfs4_op_access",
 428         "rfs4_op_close",
 429         "rfs4_op_commit",
 430         "rfs4_op_create",
 431         "rfs4_op_delegpurge",
 432         "rfs4_op_delegreturn",
 433         "rfs4_op_getattr",
 434         "rfs4_op_getfh",
 435         "rfs4_op_link",
 436         "rfs4_op_lock",
 437         "rfs4_op_lockt",
 438         "rfs4_op_locku",
 439         "rfs4_op_lookup",
 440         "rfs4_op_lookupp",
 441         "rfs4_op_nverify",
 442         "rfs4_op_open",
 443         "rfs4_op_openattr",
 444         "rfs4_op_open_confirm",
 445         "rfs4_op_open_downgrade",
 446         "rfs4_op_putfh",
 447         "rfs4_op_putpubfh",
 448         "rfs4_op_putrootfh",
 449         "rfs4_op_read",
 450         "rfs4_op_readdir",
 451         "rfs4_op_readlink",
 452         "rfs4_op_remove",
 453         "rfs4_op_rename",
 454         "rfs4_op_renew",
 455         "rfs4_op_restorefh",
 456         "rfs4_op_savefh",
 457         "rfs4_op_secinfo",
 458         "rfs4_op_setattr",
 459         "rfs4_op_setclientid",
 460         "rfs4_op_setclient_confirm",
 461         "rfs4_op_verify",
 462         "rfs4_op_write",
 463         "rfs4_op_release_lockowner",
 464         "rfs4_op_illegal"
 465 };
 466 #endif
 467
 468 void    rfs4_ss_chkclid(rfs4_client_t *);
 469
 470 extern size_t   strlcpy(char *dst, const char *src, size_t dstsize);
 471
 472 extern void     rfs4_free_fs_locations4(fs_locations4 *);
 473
 474 #ifdef  nextdp
 475 #undef nextdp
 476 #endif
 477 #define nextdp(dp)      ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
 478
 479 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
 480         VOPNAME_OPEN,           { .femop_open = deleg_rd_open },
 481         VOPNAME_WRITE,          { .femop_write = deleg_rd_write },
 482         VOPNAME_SETATTR,        { .femop_setattr = deleg_rd_setattr },
 483         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_rd_rwlock },
 484         VOPNAME_SPACE,          { .femop_space = deleg_rd_space },
 485         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_rd_setsecattr },
 486         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_rd_vnevent },
 487         NULL,                   NULL
 488 };
 489 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
 490         VOPNAME_OPEN,           { .femop_open = deleg_wr_open },
 491         VOPNAME_READ,           { .femop_read = deleg_wr_read },
 492         VOPNAME_WRITE,          { .femop_write = deleg_wr_write },
 493         VOPNAME_SETATTR,        { .femop_setattr = deleg_wr_setattr },
 494         VOPNAME_RWLOCK,         { .femop_rwlock = deleg_wr_rwlock },
 495         VOPNAME_SPACE,          { .femop_space = deleg_wr_space },
 496         VOPNAME_SETSECATTR,     { .femop_setsecattr = deleg_wr_setsecattr },
 497         VOPNAME_VNEVENT,        { .femop_vnevent = deleg_wr_vnevent },
 498         NULL,                   NULL
 499 };
 500
 501 int
 502 rfs4_srvrinit(void)
 503 {
 504         timespec32_t verf;
 505         int error;
 506         extern void rfs4_attr_init();
 507         extern krwlock_t rfs4_deleg_policy_lock;
 508
 509         /*
 510          * The following algorithm attempts to find a unique verifier
 511          * to be used as the write verifier returned from the server
 512          * to the client.  It is important that this verifier change
 513          * whenever the server reboots.  Of secondary importance, it
 514          * is important for the verifier to be unique between two
 515          * different servers.
 516          *
 517          * Thus, an attempt is made to use the system hostid and the
 518          * current time in seconds when the nfssrv kernel module is
 519          * loaded.  It is assumed that an NFS server will not be able
 520          * to boot and then to reboot in less than a second.  If the
 521          * hostid has not been set, then the current high resolution
 522          * time is used.  This will ensure different verifiers each
 523          * time the server reboots and minimize the chances that two
 524          * different servers will have the same verifier.
 525          * XXX - this is broken on LP64 kernels.
 526          */
 527         verf.tv_sec = (time_t)zone_get_hostid(NULL);
 528         if (verf.tv_sec != 0) {
 529                 verf.tv_nsec = gethrestime_sec();
 530         } else {
 531                 timespec_t tverf;
 532
 533                 gethrestime(&tverf);
 534                 verf.tv_sec = (time_t)tverf.tv_sec;
 535                 verf.tv_nsec = tverf.tv_nsec;
 536         }
 537
 538         Write4verf = *(uint64_t *)&verf;
 539
 540         rfs4_attr_init();
 541         mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
 542
 543         /* Used to manage create/destroy of server state */
 544         mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
 545
 546         /* Used to manage access to server instance linked list */
 547         mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
 548
 549         /* Used to manage access to rfs4_deleg_policy */
 550         rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
 551
 552         error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
 553         if (error != 0) {
 554                 rfs4_disable_delegation();
 555         } else {
 556                 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
 557                     &deleg_wrops);
 558                 if (error != 0) {
 559                         rfs4_disable_delegation();
 560                         fem_free(deleg_rdops);
 561                 }
 562         }
 563
 564         nfs4_srv_caller_id = fs_new_caller_id();
 565
 566         lockt_sysid = lm_alloc_sysidt();
 567
 568         vsd_create(&nfs4_srv_vkey, NULL);
 569
 570         return (0);
 571 }
 572
 573 void
 574 rfs4_srvrfini(void)
 575 {
 576         extern krwlock_t rfs4_deleg_policy_lock;
 577
 578         if (lockt_sysid != LM_NOSYSID) {
 579                 lm_free_sysidt(lockt_sysid);
 580                 lockt_sysid = LM_NOSYSID;
 581         }
 582
 583         mutex_destroy(&rfs4_deleg_lock);
 584         mutex_destroy(&rfs4_state_lock);
 585         rw_destroy(&rfs4_deleg_policy_lock);
 586
 587         fem_free(deleg_rdops);
 588         fem_free(deleg_wrops);
 589 }
 590
 591 void
 592 rfs4_init_compound_state(struct compound_state *cs)
 593 {
 594         bzero(cs, sizeof (*cs));
 595         cs->cont = TRUE;
 596         cs->access = CS_ACCESS_DENIED;
 597         cs->deleg = FALSE;
 598         cs->mandlock = FALSE;
 599         cs->fh.nfs_fh4_val = cs->fhbuf;
 600 }
 601
 602 void
 603 rfs4_grace_start(rfs4_servinst_t *sip)
 604 {
 605         rw_enter(&sip->rwlock, RW_WRITER);
 606         sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
 607         sip->grace_period = rfs4_grace_period;
 608         rw_exit(&sip->rwlock);
 609 }
 610
 611 /*
 612  * returns true if the instance's grace period has never been started
 613  */
 614 int
 615 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
 616 {
 617         time_t start_time;
 618
 619         rw_enter(&sip->rwlock, RW_READER);
 620         start_time = sip->start_time;
 621         rw_exit(&sip->rwlock);
 622
 623         return (start_time == 0);
 624 }
 625
 626 /*
 627  * Indicates if server instance is within the
 628  * grace period.
 629  */
 630 int
 631 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
 632 {
 633         time_t grace_expiry;
 634
 635         rw_enter(&sip->rwlock, RW_READER);
 636         grace_expiry = sip->start_time + sip->grace_period;
 637         rw_exit(&sip->rwlock);
 638
 639         return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
 640 }
 641
 642 int
 643 rfs4_clnt_in_grace(rfs4_client_t *cp)
 644 {
 645         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 646
 647         return (rfs4_servinst_in_grace(cp->rc_server_instance));
 648 }
 649
 650 /*
 651  * reset all currently active grace periods
 652  */
 653 void
 654 rfs4_grace_reset_all(void)
 655 {
 656         rfs4_servinst_t *sip;
 657
 658         mutex_enter(&rfs4_servinst_lock);
 659         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 660                 if (rfs4_servinst_in_grace(sip))
 661                         rfs4_grace_start(sip);
 662         mutex_exit(&rfs4_servinst_lock);
 663 }
 664
 665 /*
 666  * start any new instances' grace periods
 667  */
 668 void
 669 rfs4_grace_start_new(void)
 670 {
 671         rfs4_servinst_t *sip;
 672
 673         mutex_enter(&rfs4_servinst_lock);
 674         for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
 675                 if (rfs4_servinst_grace_new(sip))
 676                         rfs4_grace_start(sip);
 677         mutex_exit(&rfs4_servinst_lock);
 678 }
 679
 680 static rfs4_dss_path_t *
 681 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
 682 {
 683         size_t len;
 684         rfs4_dss_path_t *dss_path;
 685
 686         dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
 687
 688         /*
 689          * Take a copy of the string, since the original may be overwritten.
 690          * Sadly, no strdup() in the kernel.
 691          */
 692         /* allow for NUL */
 693         len = strlen(path) + 1;
 694         dss_path->path = kmem_alloc(len, KM_SLEEP);
 695         (void) strlcpy(dss_path->path, path, len);
 696
 697         /* associate with servinst */
 698         dss_path->sip = sip;
 699         dss_path->index = index;
 700
 701         /*
 702          * Add to list of served paths.
 703          * No locking required, as we're only ever called at startup.
 704          */
 705         if (rfs4_dss_pathlist == NULL) {
 706                 /* this is the first dss_path_t */
 707
 708                 /* needed for insque/remque */
 709                 dss_path->next = dss_path->prev = dss_path;
 710
 711                 rfs4_dss_pathlist = dss_path;
 712         } else {
 713                 insque(dss_path, rfs4_dss_pathlist);
 714         }
 715
 716         return (dss_path);
 717 }
 718
 719 /*
 720  * Create a new server instance, and make it the currently active instance.
 721  * Note that starting the grace period too early will reduce the clients'
 722  * recovery window.
 723  */
 724 void
 725 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
 726 {
 727         unsigned i;
 728         rfs4_servinst_t *sip;
 729         rfs4_oldstate_t *oldstate;
 730
 731         sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
 732         rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
 733
 734         sip->start_time = (time_t)0;
 735         sip->grace_period = (time_t)0;
 736         sip->next = NULL;
 737         sip->prev = NULL;
 738
 739         rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
 740         /*
 741          * This initial dummy entry is required to setup for insque/remque.
 742          * It must be skipped over whenever the list is traversed.
 743          */
 744         oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
 745         /* insque/remque require initial list entry to be self-terminated */
 746         oldstate->next = oldstate;
 747         oldstate->prev = oldstate;
 748         sip->oldstate = oldstate;
 749
 750
 751         sip->dss_npaths = dss_npaths;
 752         sip->dss_paths = kmem_alloc(dss_npaths *
 753             sizeof (rfs4_dss_path_t *), KM_SLEEP);
 754
 755         for (i = 0; i < dss_npaths; i++) {
 756                 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
 757         }
 758
 759         mutex_enter(&rfs4_servinst_lock);
 760         if (rfs4_cur_servinst != NULL) {
 761                 /* add to linked list */
 762                 sip->prev = rfs4_cur_servinst;
 763                 rfs4_cur_servinst->next = sip;
 764         }
 765         if (start_grace)
 766                 rfs4_grace_start(sip);
 767         /* make the new instance "current" */
 768         rfs4_cur_servinst = sip;
 769
 770         mutex_exit(&rfs4_servinst_lock);
 771 }
 772
 773 /*
 774  * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
 775  * all instances directly.
 776  */
 777 void
 778 rfs4_servinst_destroy_all(void)
 779 {
 780         rfs4_servinst_t *sip, *prev, *current;
 781 #ifdef DEBUG
 782         int n = 0;
 783 #endif
 784
 785         mutex_enter(&rfs4_servinst_lock);
 786         ASSERT(rfs4_cur_servinst != NULL);
 787         current = rfs4_cur_servinst;
 788         rfs4_cur_servinst = NULL;
 789         for (sip = current; sip != NULL; sip = prev) {
 790                 prev = sip->prev;
 791                 rw_destroy(&sip->rwlock);
 792                 if (sip->oldstate)
 793                         kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
 794                 if (sip->dss_paths)
 795                         kmem_free(sip->dss_paths,
 796                             sip->dss_npaths * sizeof (rfs4_dss_path_t *));
 797                 kmem_free(sip, sizeof (rfs4_servinst_t));
 798 #ifdef DEBUG
 799                 n++;
 800 #endif
 801         }
 802         mutex_exit(&rfs4_servinst_lock);
 803 }
 804
 805 /*
 806  * Assign the current server instance to a client_t.
 807  * Should be called with cp->rc_dbe held.
 808  */
 809 void
 810 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
 811 {
 812         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 813
 814         /*
 815          * The lock ensures that if the current instance is in the process
 816          * of changing, we will see the new one.
 817          */
 818         mutex_enter(&rfs4_servinst_lock);
 819         cp->rc_server_instance = sip;
 820         mutex_exit(&rfs4_servinst_lock);
 821 }
 822
 823 rfs4_servinst_t *
 824 rfs4_servinst(rfs4_client_t *cp)
 825 {
 826         ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
 827
 828         return (cp->rc_server_instance);
 829 }
 830
 831 /* ARGSUSED */
 832 static void
 833 nullfree(caddr_t resop)
 834 {
 835 }
 836
 837 /*
 838  * This is a fall-through for invalid or not implemented (yet) ops
 839  */
 840 /* ARGSUSED */
 841 static void
 842 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
 843         struct compound_state *cs)
 844 {
 845         *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
 846 }
 847
 848 /*
 849  * Check if the security flavor, nfsnum, is in the flavor_list.
 850  */
 851 bool_t
 852 in_flavor_list(int nfsnum, int *flavor_list, int count)
 853 {
 854         int i;
 855
 856         for (i = 0; i < count; i++) {
 857                 if (nfsnum == flavor_list[i])
 858                         return (TRUE);
 859         }
 860         return (FALSE);
 861 }
 862
 863 /*
 864  * Used by rfs4_op_secinfo to get the security information from the
 865  * export structure associated with the component.
 866  */
 867 /* ARGSUSED */
 868 static nfsstat4
 869 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
 870 {
 871         int error, different_export = 0;
 872         vnode_t *dvp, *vp, *tvp;
 873         struct exportinfo *exi = NULL;
 874         fid_t fid;
 875         uint_t count, i;
 876         secinfo4 *resok_val;
 877         struct secinfo *secp;
 878         seconfig_t *si;
 879         bool_t did_traverse = FALSE;
 880         int dotdot, walk;
 881
 882         dvp = cs->vp;
 883         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
 884
 885         /*
 886          * If dotdotting, then need to check whether it's above the
 887          * root of a filesystem, or above an export point.
 888          */
 889         if (dotdot) {
 890
 891                 /*
 892                  * If dotdotting at the root of a filesystem, then
 893                  * need to traverse back to the mounted-on filesystem
 894                  * and do the dotdot lookup there.
 895                  */
 896                 if (cs->vp->v_flag & VROOT) {
 897
 898                         /*
 899                          * If at the system root, then can
 900                          * go up no further.
 901                          */
 902                         if (VN_CMP(dvp, rootdir))
 903                                 return (puterrno4(ENOENT));
 904
 905                         /*
 906                          * Traverse back to the mounted-on filesystem
 907                          */
 908                         dvp = untraverse(cs->vp);
 909
 910                         /*
 911                          * Set the different_export flag so we remember
 912                          * to pick up a new exportinfo entry for
 913                          * this new filesystem.
 914                          */
 915                         different_export = 1;
 916                 } else {
 917
 918                         /*
 919                          * If dotdotting above an export point then set
 920                          * the different_export to get new export info.
 921                          */
 922                         different_export = nfs_exported(cs->exi, cs->vp);
 923                 }
 924         }
 925
 926         /*
 927          * Get the vnode for the component "nm".
 928          */
 929         error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
 930             NULL, NULL, NULL);
 931         if (error)
 932                 return (puterrno4(error));
 933
 934         /*
 935          * If the vnode is in a pseudo filesystem, or if the security flavor
 936          * used in the request is valid but not an explicitly shared flavor,
 937          * or the access bit indicates that this is a limited access,
 938          * check whether this vnode is visible.
 939          */
 940         if (!different_export &&
 941             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
 942             cs->access & CS_ACCESS_LIMITED)) {
 943                 if (! nfs_visible(cs->exi, vp, &different_export)) {
 944                         VN_RELE(vp);
 945                         return (puterrno4(ENOENT));
 946                 }
 947         }
 948
 949         /*
 950          * If it's a mountpoint, then traverse it.
 951          */
 952         if (vn_ismntpt(vp)) {
 953                 tvp = vp;
 954                 if ((error = traverse(&tvp)) != 0) {
 955                         VN_RELE(vp);
 956                         return (puterrno4(error));
 957                 }
 958                 /* remember that we had to traverse mountpoint */
 959                 did_traverse = TRUE;
 960                 vp = tvp;
 961                 different_export = 1;
 962         } else if (vp->v_vfsp != dvp->v_vfsp) {
 963                 /*
 964                  * If vp isn't a mountpoint and the vfs ptrs aren't the same,
 965                  * then vp is probably an LOFS object.  We don't need the
 966                  * realvp, we just need to know that we might have crossed
 967                  * a server fs boundary and need to call checkexport4.
 968                  * (LOFS lookup hides server fs mountpoints, and actually calls
 969                  * traverse)
 970                  */
 971                 different_export = 1;
 972         }
 973
 974         /*
 975          * Get the export information for it.
 976          */
 977         if (different_export) {
 978
 979                 bzero(&fid, sizeof (fid));
 980                 fid.fid_len = MAXFIDSZ;
 981                 error = vop_fid_pseudo(vp, &fid);
 982                 if (error) {
 983                         VN_RELE(vp);
 984                         return (puterrno4(error));
 985                 }
 986
 987                 if (dotdot)
 988                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
 989                 else
 990                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
 991
 992                 if (exi == NULL) {
 993                         if (did_traverse == TRUE) {
 994                                 /*
 995                                  * If this vnode is a mounted-on vnode,
 996                                  * but the mounted-on file system is not
 997                                  * exported, send back the secinfo for
 998                                  * the exported node that the mounted-on
 999                                  * vnode lives in.
1000                                  */
1001                                 exi = cs->exi;
1002                         } else {
1003                                 VN_RELE(vp);
1004                                 return (puterrno4(EACCES));
1005                         }
1006                 }
1007         } else {
1008                 exi = cs->exi;
1009         }
1010         ASSERT(exi != NULL);
1011
1012
1013         /*
1014          * Create the secinfo result based on the security information
1015          * from the exportinfo structure (exi).
1016          *
1017          * Return all flavors for a pseudo node.
1018          * For a real export node, return the flavor that the client
1019          * has access with.
1020          */
1021         ASSERT(RW_LOCK_HELD(&exported_lock));
1022         if (PSEUDO(exi)) {
1023                 count = exi->exi_export.ex_seccnt; /* total sec count */
1024                 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1025                 secp = exi->exi_export.ex_secinfo;
1026
1027                 for (i = 0; i < count; i++) {
1028                         si = &secp[i].s_secinfo;
1029                         resok_val[i].flavor = si->sc_rpcnum;
1030                         if (resok_val[i].flavor == RPCSEC_GSS) {
1031                                 rpcsec_gss_info *info;
1032
1033                                 info = &resok_val[i].flavor_info;
1034                                 info->qop = si->sc_qop;
1035                                 info->service = (rpc_gss_svc_t)si->sc_service;
1036
1037                                 /* get oid opaque data */
1038                                 info->oid.sec_oid4_len =
1039                                     si->sc_gss_mech_type->length;
1040                                 info->oid.sec_oid4_val = kmem_alloc(
1041                                     si->sc_gss_mech_type->length, KM_SLEEP);
1042                                 bcopy(
1043                                     si->sc_gss_mech_type->elements,
1044                                     info->oid.sec_oid4_val,
1045                                     info->oid.sec_oid4_len);
1046                         }
1047                 }
1048                 resp->SECINFO4resok_len = count;
1049                 resp->SECINFO4resok_val = resok_val;
1050         } else {
1051                 int ret_cnt = 0, k = 0;
1052                 int *flavor_list;
1053
1054                 count = exi->exi_export.ex_seccnt; /* total sec count */
1055                 secp = exi->exi_export.ex_secinfo;
1056
1057                 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1058                 /* find out which flavors to return */
1059                 for (i = 0; i < count; i ++) {
1060                         int access, flavor, perm;
1061
1062                         flavor = secp[i].s_secinfo.sc_nfsnum;
1063                         perm = secp[i].s_flags;
1064
1065                         access = nfsauth4_secinfo_access(exi, cs->req,
1066                             flavor, perm);
1067
1068                         if (! (access & NFSAUTH_DENIED) &&
1069                             ! (access & NFSAUTH_WRONGSEC)) {
1070                                 flavor_list[ret_cnt] = flavor;
1071                                 ret_cnt++;
1072                         }
1073                 }
1074
1075                 /* Create the returning SECINFO value */
1076                 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1077
1078                 for (i = 0; i < count; i++) {
1079                         /*
1080                          * If the flavor is in the flavor list,
1081                          * fill in resok_val.
1082                          */
1083                         si = &secp[i].s_secinfo;
1084                         if (in_flavor_list(si->sc_nfsnum,
1085                             flavor_list, ret_cnt)) {
1086                                 resok_val[k].flavor = si->sc_rpcnum;
1087                                 if (resok_val[k].flavor == RPCSEC_GSS) {
1088                                         rpcsec_gss_info *info;
1089
1090                                         info = &resok_val[k].flavor_info;
1091                                         info->qop = si->sc_qop;
1092                                         info->service = (rpc_gss_svc_t)
1093                                             si->sc_service;
1094
1095                                         /* get oid opaque data */
1096                                         info->oid.sec_oid4_len =
1097                                             si->sc_gss_mech_type->length;
1098                                         info->oid.sec_oid4_val = kmem_alloc(
1099                                             si->sc_gss_mech_type->length,
1100                                             KM_SLEEP);
1101                                         bcopy(si->sc_gss_mech_type->elements,
1102                                             info->oid.sec_oid4_val,
1103                                             info->oid.sec_oid4_len);
1104                                 }
1105                                 k++;
1106                         }
1107                         if (k >= ret_cnt)
1108                                 break;
1109                 }
1110                 resp->SECINFO4resok_len = ret_cnt;
1111                 resp->SECINFO4resok_val = resok_val;
1112                 kmem_free(flavor_list, count * sizeof (int));
1113         }
1114
1115         VN_RELE(vp);
1116         return (NFS4_OK);
1117 }
1118
1119 /*
1120  * SECINFO (Operation 33): Obtain required security information on
1121  * the component name in the format of (security-mechanism-oid, qop, service)
1122  * triplets.
1123  */
1124 /* ARGSUSED */
1125 static void
1126 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1127     struct compound_state *cs)
1128 {
1129         SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1130         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1131         utf8string *utfnm = &args->name;
1132         uint_t len;
1133         char *nm;
1134         struct sockaddr *ca;
1135         char *name = NULL;
1136         nfsstat4 status = NFS4_OK;
1137
1138         DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1139             SECINFO4args *, args);
1140
1141         /*
1142          * Current file handle (cfh) should have been set before getting
1143          * into this function. If not, return error.
1144          */
1145         if (cs->vp == NULL) {
1146                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1147                 goto out;
1148         }
1149
1150         if (cs->vp->v_type != VDIR) {
1151                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1152                 goto out;
1153         }
1154
1155         /*
1156          * Verify the component name. If failed, error out, but
1157          * do not error out if the component name is a "..".
1158          * SECINFO will return its parents secinfo data for SECINFO "..".
1159          */
1160         status = utf8_dir_verify(utfnm);
1161         if (status != NFS4_OK) {
1162                 if (utfnm->utf8string_len != 2 ||
1163                     utfnm->utf8string_val[0] != '.' ||
1164                     utfnm->utf8string_val[1] != '.') {
1165                         *cs->statusp = resp->status = status;
1166                         goto out;
1167                 }
1168         }
1169
1170         nm = utf8_to_str(utfnm, &len, NULL);
1171         if (nm == NULL) {
1172                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1173                 goto out;
1174         }
1175
1176         if (len > MAXNAMELEN) {
1177                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1178                 kmem_free(nm, len);
1179                 goto out;
1180         }
1181
1182         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1183         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1184             MAXPATHLEN  + 1);
1185
1186         if (name == NULL) {
1187                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1188                 kmem_free(nm, len);
1189                 goto out;
1190         }
1191
1192
1193         *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1194
1195         if (name != nm)
1196                 kmem_free(name, MAXPATHLEN + 1);
1197         kmem_free(nm, len);
1198
1199 out:
1200         DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1201             SECINFO4res *, resp);
1202 }
1203
1204 /*
1205  * Free SECINFO result.
1206  */
1207 /* ARGSUSED */
1208 static void
1209 rfs4_op_secinfo_free(nfs_resop4 *resop)
1210 {
1211         SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1212         int count, i;
1213         secinfo4 *resok_val;
1214
1215         /* If this is not an Ok result, nothing to free. */
1216         if (resp->status != NFS4_OK) {
1217                 return;
1218         }
1219
1220         count = resp->SECINFO4resok_len;
1221         resok_val = resp->SECINFO4resok_val;
1222
1223         for (i = 0; i < count; i++) {
1224                 if (resok_val[i].flavor == RPCSEC_GSS) {
1225                         rpcsec_gss_info *info;
1226
1227                         info = &resok_val[i].flavor_info;
1228                         kmem_free(info->oid.sec_oid4_val,
1229                             info->oid.sec_oid4_len);
1230                 }
1231         }
1232         kmem_free(resok_val, count * sizeof (secinfo4));
1233         resp->SECINFO4resok_len = 0;
1234         resp->SECINFO4resok_val = NULL;
1235 }
1236
1237 /* ARGSUSED */
1238 static void
1239 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1240     struct compound_state *cs)
1241 {
1242         ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1243         ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1244         int error;
1245         vnode_t *vp;
1246         struct vattr va;
1247         int checkwriteperm;
1248         cred_t *cr = cs->cr;
1249         bslabel_t *clabel, *slabel;
1250         ts_label_t *tslabel;
1251         boolean_t admin_low_client;
1252
1253         DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1254             ACCESS4args *, args);
1255
1256 #if 0   /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1257         if (cs->access == CS_ACCESS_DENIED) {
1258                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1259                 goto out;
1260         }
1261 #endif
1262         if (cs->vp == NULL) {
1263                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1264                 goto out;
1265         }
1266
1267         ASSERT(cr != NULL);
1268
1269         vp = cs->vp;
1270
1271         /*
1272          * If the file system is exported read only, it is not appropriate
1273          * to check write permissions for regular files and directories.
1274          * Special files are interpreted by the client, so the underlying
1275          * permissions are sent back to the client for interpretation.
1276          */
1277         if (rdonly4(cs->exi, cs->vp, req) &&
1278             (vp->v_type == VREG || vp->v_type == VDIR))
1279                 checkwriteperm = 0;
1280         else
1281                 checkwriteperm = 1;
1282
1283         /*
1284          * XXX
1285          * We need the mode so that we can correctly determine access
1286          * permissions relative to a mandatory lock file.  Access to
1287          * mandatory lock files is denied on the server, so it might
1288          * as well be reflected to the server during the open.
1289          */
1290         va.va_mask = AT_MODE;
1291         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1292         if (error) {
1293                 *cs->statusp = resp->status = puterrno4(error);
1294                 goto out;
1295         }
1296         resp->access = 0;
1297         resp->supported = 0;
1298
1299         if (is_system_labeled()) {
1300                 ASSERT(req->rq_label != NULL);
1301                 clabel = req->rq_label;
1302                 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1303                     "got client label from request(1)",
1304                     struct svc_req *, req);
1305                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1306                         if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1307                                 *cs->statusp = resp->status = puterrno4(EACCES);
1308                                 goto out;
1309                         }
1310                         slabel = label2bslabel(tslabel);
1311                         DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1312                             char *, "got server label(1) for vp(2)",
1313                             bslabel_t *, slabel, vnode_t *, vp);
1314
1315                         admin_low_client = B_FALSE;
1316                 } else
1317                         admin_low_client = B_TRUE;
1318         }
1319
1320         if (args->access & ACCESS4_READ) {
1321                 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1322                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1323                     (!is_system_labeled() || admin_low_client ||
1324                     bldominates(clabel, slabel)))
1325                         resp->access |= ACCESS4_READ;
1326                 resp->supported |= ACCESS4_READ;
1327         }
1328         if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1329                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1330                 if (!error && (!is_system_labeled() || admin_low_client ||
1331                     bldominates(clabel, slabel)))
1332                         resp->access |= ACCESS4_LOOKUP;
1333                 resp->supported |= ACCESS4_LOOKUP;
1334         }
1335         if (checkwriteperm &&
1336             (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1337                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1338                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1339                     (!is_system_labeled() || admin_low_client ||
1340                     blequal(clabel, slabel)))
1341                         resp->access |=
1342                             (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1343                 resp->supported |=
1344                     resp->access & (ACCESS4_MODIFY | ACCESS4_EXTEND);
1345         }
1346
1347         if (checkwriteperm &&
1348             (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1349                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1350                 if (!error && (!is_system_labeled() || admin_low_client ||
1351                     blequal(clabel, slabel)))
1352                         resp->access |= ACCESS4_DELETE;
1353                 resp->supported |= ACCESS4_DELETE;
1354         }
1355         if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1356                 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1357                 if (!error && !MANDLOCK(vp, va.va_mode) &&
1358                     (!is_system_labeled() || admin_low_client ||
1359                     bldominates(clabel, slabel)))
1360                         resp->access |= ACCESS4_EXECUTE;
1361                 resp->supported |= ACCESS4_EXECUTE;
1362         }
1363
1364         if (is_system_labeled() && !admin_low_client)
1365                 label_rele(tslabel);
1366
1367         *cs->statusp = resp->status = NFS4_OK;
1368 out:
1369         DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1370             ACCESS4res *, resp);
1371 }
1372
1373 /* ARGSUSED */
1374 static void
1375 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1376     struct compound_state *cs)
1377 {
1378         COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1379         COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1380         int error;
1381         vnode_t *vp = cs->vp;
1382         cred_t *cr = cs->cr;
1383         vattr_t va;
1384
1385         DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1386             COMMIT4args *, args);
1387
1388         if (vp == NULL) {
1389                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1390                 goto out;
1391         }
1392         if (cs->access == CS_ACCESS_DENIED) {
1393                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1394                 goto out;
1395         }
1396
1397         if (args->offset + args->count < args->offset) {
1398                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1399                 goto out;
1400         }
1401
1402         va.va_mask = AT_UID;
1403         error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1404
1405         /*
1406          * If we can't get the attributes, then we can't do the
1407          * right access checking.  So, we'll fail the request.
1408          */
1409         if (error) {
1410                 *cs->statusp = resp->status = puterrno4(error);
1411                 goto out;
1412         }
1413         if (rdonly4(cs->exi, cs->vp, req)) {
1414                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1415                 goto out;
1416         }
1417
1418         if (vp->v_type != VREG) {
1419                 if (vp->v_type == VDIR)
1420                         resp->status = NFS4ERR_ISDIR;
1421                 else
1422                         resp->status = NFS4ERR_INVAL;
1423                 *cs->statusp = resp->status;
1424                 goto out;
1425         }
1426
1427         if (crgetuid(cr) != va.va_uid &&
1428             (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1429                 *cs->statusp = resp->status = puterrno4(error);
1430                 goto out;
1431         }
1432
1433         error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1434
1435         if (error) {
1436                 *cs->statusp = resp->status = puterrno4(error);
1437                 goto out;
1438         }
1439
1440         *cs->statusp = resp->status = NFS4_OK;
1441         resp->writeverf = Write4verf;
1442 out:
1443         DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1444             COMMIT4res *, resp);
1445 }
1446
1447 /*
1448  * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1449  * was completed. It does the nfsv4 create for special files.
1450  */
1451 /* ARGSUSED */
1452 static vnode_t *
1453 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1454     struct compound_state *cs, vattr_t *vap, char *nm)
1455 {
1456         int error;
1457         cred_t *cr = cs->cr;
1458         vnode_t *dvp = cs->vp;
1459         vnode_t *vp = NULL;
1460         int mode;
1461         enum vcexcl excl;
1462
1463         switch (args->type) {
1464         case NF4CHR:
1465         case NF4BLK:
1466                 if (secpolicy_sys_devices(cr) != 0) {
1467                         *cs->statusp = resp->status = NFS4ERR_PERM;
1468                         return (NULL);
1469                 }
1470                 if (args->type == NF4CHR)
1471                         vap->va_type = VCHR;
1472                 else
1473                         vap->va_type = VBLK;
1474                 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1475                     args->ftype4_u.devdata.specdata2);
1476                 vap->va_mask |= AT_RDEV;
1477                 break;
1478         case NF4SOCK:
1479                 vap->va_type = VSOCK;
1480                 break;
1481         case NF4FIFO:
1482                 vap->va_type = VFIFO;
1483                 break;
1484         default:
1485                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1486                 return (NULL);
1487         }
1488
1489         /*
1490          * Must specify the mode.
1491          */
1492         if (!(vap->va_mask & AT_MODE)) {
1493                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1494                 return (NULL);
1495         }
1496
1497         excl = EXCL;
1498
1499         mode = 0;
1500
1501         error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1502         if (error) {
1503                 *cs->statusp = resp->status = puterrno4(error);
1504                 return (NULL);
1505         }
1506         return (vp);
1507 }
1508
1509 /*
1510  * nfsv4 create is used to create non-regular files. For regular files,
1511  * use nfsv4 open.
1512  */
1513 /* ARGSUSED */
1514 static void
1515 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1516     struct compound_state *cs)
1517 {
1518         CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1519         CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1520         int error;
1521         struct vattr bva, iva, iva2, ava, *vap;
1522         cred_t *cr = cs->cr;
1523         vnode_t *dvp = cs->vp;
1524         vnode_t *vp = NULL;
1525         vnode_t *realvp;
1526         char *nm, *lnm;
1527         uint_t len, llen;
1528         int syncval = 0;
1529         struct nfs4_svgetit_arg sarg;
1530         struct nfs4_ntov_table ntov;
1531         struct statvfs64 sb;
1532         nfsstat4 status;
1533         struct sockaddr *ca;
1534         char *name = NULL;
1535         char *lname = NULL;
1536
1537         DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1538             CREATE4args *, args);
1539
1540         resp->attrset = 0;
1541
1542         if (dvp == NULL) {
1543                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1544                 goto out;
1545         }
1546
1547         /*
1548          * If there is an unshared filesystem mounted on this vnode,
1549          * do not allow to create an object in this directory.
1550          */
1551         if (vn_ismntpt(dvp)) {
1552                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1553                 goto out;
1554         }
1555
1556         /* Verify that type is correct */
1557         switch (args->type) {
1558         case NF4LNK:
1559         case NF4BLK:
1560         case NF4CHR:
1561         case NF4SOCK:
1562         case NF4FIFO:
1563         case NF4DIR:
1564                 break;
1565         default:
1566                 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1567                 goto out;
1568         };
1569
1570         if (cs->access == CS_ACCESS_DENIED) {
1571                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1572                 goto out;
1573         }
1574         if (dvp->v_type != VDIR) {
1575                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1576                 goto out;
1577         }
1578         status = utf8_dir_verify(&args->objname);
1579         if (status != NFS4_OK) {
1580                 *cs->statusp = resp->status = status;
1581                 goto out;
1582         }
1583
1584         if (rdonly4(cs->exi, cs->vp, req)) {
1585                 *cs->statusp = resp->status = NFS4ERR_ROFS;
1586                 goto out;
1587         }
1588
1589         /*
1590          * Name of newly created object
1591          */
1592         nm = utf8_to_fn(&args->objname, &len, NULL);
1593         if (nm == NULL) {
1594                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1595                 goto out;
1596         }
1597
1598         if (len > MAXNAMELEN) {
1599                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1600                 kmem_free(nm, len);
1601                 goto out;
1602         }
1603
1604         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1605         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1606             MAXPATHLEN  + 1);
1607
1608         if (name == NULL) {
1609                 *cs->statusp = resp->status = NFS4ERR_INVAL;
1610                 kmem_free(nm, len);
1611                 goto out;
1612         }
1613
1614         resp->attrset = 0;
1615
1616         sarg.sbp = &sb;
1617         sarg.is_referral = B_FALSE;
1618         nfs4_ntov_table_init(&ntov);
1619
1620         status = do_rfs4_set_attrs(&resp->attrset,
1621             &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1622
1623         if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1624                 status = NFS4ERR_INVAL;
1625
1626         if (status != NFS4_OK) {
1627                 *cs->statusp = resp->status = status;
1628                 if (name != nm)
1629                         kmem_free(name, MAXPATHLEN + 1);
1630                 kmem_free(nm, len);
1631                 nfs4_ntov_table_free(&ntov, &sarg);
1632                 resp->attrset = 0;
1633                 goto out;
1634         }
1635
1636         /* Get "before" change value */
1637         bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1638         error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1639         if (error) {
1640                 *cs->statusp = resp->status = puterrno4(error);
1641                 if (name != nm)
1642                         kmem_free(name, MAXPATHLEN + 1);
1643                 kmem_free(nm, len);
1644                 nfs4_ntov_table_free(&ntov, &sarg);
1645                 resp->attrset = 0;
1646                 goto out;
1647         }
1648         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1649
1650         vap = sarg.vap;
1651
1652         /*
1653          * Set the default initial values for attributes when the parent
1654          * directory does not have the VSUID/VSGID bit set and they have
1655          * not been specified in createattrs.
1656          */
1657         if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1658                 vap->va_uid = crgetuid(cr);
1659                 vap->va_mask |= AT_UID;
1660         }
1661         if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1662                 vap->va_gid = crgetgid(cr);
1663                 vap->va_mask |= AT_GID;
1664         }
1665
1666         vap->va_mask |= AT_TYPE;
1667         switch (args->type) {
1668         case NF4DIR:
1669                 vap->va_type = VDIR;
1670                 if ((vap->va_mask & AT_MODE) == 0) {
1671                         vap->va_mode = 0700;    /* default: owner rwx only */
1672                         vap->va_mask |= AT_MODE;
1673                 }
1674                 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1675                 if (error)
1676                         break;
1677
1678                 /*
1679                  * Get the initial "after" sequence number, if it fails,
1680                  * set to zero
1681                  */
1682                 iva.va_mask = AT_SEQ;
1683                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1684                         iva.va_seq = 0;
1685                 break;
1686         case NF4LNK:
1687                 vap->va_type = VLNK;
1688                 if ((vap->va_mask & AT_MODE) == 0) {
1689                         vap->va_mode = 0700;    /* default: owner rwx only */
1690                         vap->va_mask |= AT_MODE;
1691                 }
1692
1693                 /*
1694                  * symlink names must be treated as data
1695                  */
1696                 lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1697
1698                 if (lnm == NULL) {
1699                         *cs->statusp = resp->status = NFS4ERR_INVAL;
1700                         if (name != nm)
1701                                 kmem_free(name, MAXPATHLEN + 1);
1702                         kmem_free(nm, len);
1703                         nfs4_ntov_table_free(&ntov, &sarg);
1704                         resp->attrset = 0;
1705                         goto out;
1706                 }
1707
1708                 if (llen > MAXPATHLEN) {
1709                         *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1710                         if (name != nm)
1711                                 kmem_free(name, MAXPATHLEN + 1);
1712                         kmem_free(nm, len);
1713                         kmem_free(lnm, llen);
1714                         nfs4_ntov_table_free(&ntov, &sarg);
1715                         resp->attrset = 0;
1716                         goto out;
1717                 }
1718
1719                 lname = nfscmd_convname(ca, cs->exi, lnm,
1720                     NFSCMD_CONV_INBOUND, MAXPATHLEN  + 1);
1721
1722                 if (lname == NULL) {
1723                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1724                         if (name != nm)
1725                                 kmem_free(name, MAXPATHLEN + 1);
1726                         kmem_free(nm, len);
1727                         kmem_free(lnm, llen);
1728                         nfs4_ntov_table_free(&ntov, &sarg);
1729                         resp->attrset = 0;
1730                         goto out;
1731                 }
1732
1733                 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1734                 if (lname != lnm)
1735                         kmem_free(lname, MAXPATHLEN + 1);
1736                 kmem_free(lnm, llen);
1737                 if (error)
1738                         break;
1739
1740                 /*
1741                  * Get the initial "after" sequence number, if it fails,
1742                  * set to zero
1743                  */
1744                 iva.va_mask = AT_SEQ;
1745                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1746                         iva.va_seq = 0;
1747
1748                 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1749                     NULL, NULL, NULL);
1750                 if (error)
1751                         break;
1752
1753                 /*
1754                  * va_seq is not safe over VOP calls, check it again
1755                  * if it has changed zero out iva to force atomic = FALSE.
1756                  */
1757                 iva2.va_mask = AT_SEQ;
1758                 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1759                     iva2.va_seq != iva.va_seq)
1760                         iva.va_seq = 0;
1761                 break;
1762         default:
1763                 /*
1764                  * probably a special file.
1765                  */
1766                 if ((vap->va_mask & AT_MODE) == 0) {
1767                         vap->va_mode = 0600;    /* default: owner rw only */
1768                         vap->va_mask |= AT_MODE;
1769                 }
1770                 syncval = FNODSYNC;
1771                 /*
1772                  * We know this will only generate one VOP call
1773                  */
1774                 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1775
1776                 if (vp == NULL) {
1777                         if (name != nm)
1778                                 kmem_free(name, MAXPATHLEN + 1);
1779                         kmem_free(nm, len);
1780                         nfs4_ntov_table_free(&ntov, &sarg);
1781                         resp->attrset = 0;
1782                         goto out;
1783                 }
1784
1785                 /*
1786                  * Get the initial "after" sequence number, if it fails,
1787                  * set to zero
1788                  */
1789                 iva.va_mask = AT_SEQ;
1790                 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1791                         iva.va_seq = 0;
1792
1793                 break;
1794         }
1795         if (name != nm)
1796                 kmem_free(name, MAXPATHLEN + 1);
1797         kmem_free(nm, len);
1798
1799         if (error) {
1800                 *cs->statusp = resp->status = puterrno4(error);
1801         }
1802
1803         /*
1804          * Force modified data and metadata out to stable storage.
1805          */
1806         (void) VOP_FSYNC(dvp, 0, cr, NULL);
1807
1808         if (resp->status != NFS4_OK) {
1809                 if (vp != NULL)
1810                         VN_RELE(vp);
1811                 nfs4_ntov_table_free(&ntov, &sarg);
1812                 resp->attrset = 0;
1813                 goto out;
1814         }
1815
1816         /*
1817          * Finish setup of cinfo response, "before" value already set.
1818          * Get "after" change value, if it fails, simply return the
1819          * before value.
1820          */
1821         ava.va_mask = AT_CTIME|AT_SEQ;
1822         if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1823                 ava.va_ctime = bva.va_ctime;
1824                 ava.va_seq = 0;
1825         }
1826         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1827
1828         /*
1829          * True verification that object was created with correct
1830          * attrs is impossible.  The attrs could have been changed
1831          * immediately after object creation.  If attributes did
1832          * not verify, the only recourse for the server is to
1833          * destroy the object.  Maybe if some attrs (like gid)
1834          * are set incorrectly, the object should be destroyed;
1835          * however, seems bad as a default policy.  Do we really
1836          * want to destroy an object over one of the times not
1837          * verifying correctly?  For these reasons, the server
1838          * currently sets bits in attrset for createattrs
1839          * that were set; however, no verification is done.
1840          *
1841          * vmask_to_nmask accounts for vattr bits set on create
1842          *      [do_rfs4_set_attrs() only sets resp bits for
1843          *       non-vattr/vfs bits.]
1844          * Mask off any bits set by default so as not to return
1845          * more attrset bits than were requested in createattrs
1846          */
1847         nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1848         resp->attrset &= args->createattrs.attrmask;
1849         nfs4_ntov_table_free(&ntov, &sarg);
1850
1851         error = makefh4(&cs->fh, vp, cs->exi);
1852         if (error) {
1853                 *cs->statusp = resp->status = puterrno4(error);
1854         }
1855
1856         /*
1857          * The cinfo.atomic = TRUE only if we got no errors, we have
1858          * non-zero va_seq's, and it has incremented by exactly one
1859          * during the creation and it didn't change during the VOP_LOOKUP
1860          * or VOP_FSYNC.
1861          */
1862         if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1863             iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1864                 resp->cinfo.atomic = TRUE;
1865         else
1866                 resp->cinfo.atomic = FALSE;
1867
1868         /*
1869          * Force modified metadata out to stable storage.
1870          *
1871          * if a underlying vp exists, pass it to VOP_FSYNC
1872          */
1873         if (VOP_REALVP(vp, &realvp, NULL) == 0)
1874                 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1875         else
1876                 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1877
1878         if (resp->status != NFS4_OK) {
1879                 VN_RELE(vp);
1880                 goto out;
1881         }
1882         if (cs->vp)
1883                 VN_RELE(cs->vp);
1884
1885         cs->vp = vp;
1886         *cs->statusp = resp->status = NFS4_OK;
1887 out:
1888         DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1889             CREATE4res *, resp);
1890 }
1891
1892 /*ARGSUSED*/
1893 static void
1894 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1895     struct compound_state *cs)
1896 {
1897         DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1898             DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1899
1900         rfs4_op_inval(argop, resop, req, cs);
1901
1902         DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1903             DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1904 }
1905
1906 /*ARGSUSED*/
1907 static void
1908 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1909     struct compound_state *cs)
1910 {
1911         DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1912         DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1913         rfs4_deleg_state_t *dsp;
1914         nfsstat4 status;
1915
1916         DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1917             DELEGRETURN4args *, args);
1918
1919         status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1920         resp->status = *cs->statusp = status;
1921         if (status != NFS4_OK)
1922                 goto out;
1923
1924         /* Ensure specified filehandle matches */
1925         if (cs->vp != dsp->rds_finfo->rf_vp) {
1926                 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1927         } else
1928                 rfs4_return_deleg(dsp, FALSE);
1929
1930         rfs4_update_lease(dsp->rds_client);
1931
1932         rfs4_deleg_state_rele(dsp);
1933 out:
1934         DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1935             DELEGRETURN4res *, resp);
1936 }
1937
1938 /*
1939  * Check to see if a given "flavor" is an explicitly shared flavor.
1940  * The assumption of this routine is the "flavor" is already a valid
1941  * flavor in the secinfo list of "exi".
1942  *
1943  *      e.g.
1944  *              # share -o sec=flavor1 /export
1945  *              # share -o sec=flavor2 /export/home
1946  *
1947  *              flavor2 is not an explicitly shared flavor for /export,
1948  *              however it is in the secinfo list for /export thru the
1949  *              server namespace setup.
1950  */
1951 int
1952 is_exported_sec(int flavor, struct exportinfo *exi)
1953 {
1954         int     i;
1955         struct secinfo *sp;
1956
1957         sp = exi->exi_export.ex_secinfo;
1958         for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1959                 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1960                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1961                         return (SEC_REF_EXPORTED(&sp[i]));
1962                 }
1963         }
1964
1965         /* Should not reach this point based on the assumption */
1966         return (0);
1967 }
1968
1969 /*
1970  * Check if the security flavor used in the request matches what is
1971  * required at the export point or at the root pseudo node (exi_root).
1972  *
1973  * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1974  *
1975  */
1976 static int
1977 secinfo_match_or_authnone(struct compound_state *cs)
1978 {
1979         int     i;
1980         struct secinfo *sp;
1981
1982         /*
1983          * Check cs->nfsflavor (from the request) against
1984          * the current export data in cs->exi.
1985          */
1986         sp = cs->exi->exi_export.ex_secinfo;
1987         for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1988                 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1989                     sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1990                         return (1);
1991         }
1992
1993         return (0);
1994 }
1995
1996 /*
1997  * Check the access authority for the client and return the correct error.
1998  */
1999 nfsstat4
2000 call_checkauth4(struct compound_state *cs, struct svc_req *req)
2001 {
2002         int     authres;
2003
2004         /*
2005          * First, check if the security flavor used in the request
2006          * are among the flavors set in the server namespace.
2007          */
2008         if (!secinfo_match_or_authnone(cs)) {
2009                 *cs->statusp = NFS4ERR_WRONGSEC;
2010                 return (*cs->statusp);
2011         }
2012
2013         authres = checkauth4(cs, req);
2014
2015         if (authres > 0) {
2016                 *cs->statusp = NFS4_OK;
2017                 if (! (cs->access & CS_ACCESS_LIMITED))
2018                         cs->access = CS_ACCESS_OK;
2019         } else if (authres == 0) {
2020                 *cs->statusp = NFS4ERR_ACCESS;
2021         } else if (authres == -2) {
2022                 *cs->statusp = NFS4ERR_WRONGSEC;
2023         } else {
2024                 *cs->statusp = NFS4ERR_DELAY;
2025         }
2026         return (*cs->statusp);
2027 }
2028
2029 /*
2030  * bitmap4_to_attrmask is called by getattr and readdir.
2031  * It sets up the vattr mask and determines whether vfsstat call is needed
2032  * based on the input bitmap.
2033  * Returns nfsv4 status.
2034  */
2035 static nfsstat4
2036 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2037 {
2038         int i;
2039         uint_t  va_mask;
2040         struct statvfs64 *sbp = sargp->sbp;
2041
2042         sargp->sbp = NULL;
2043         sargp->flag = 0;
2044         sargp->rdattr_error = NFS4_OK;
2045         sargp->mntdfid_set = FALSE;
2046         if (sargp->cs->vp)
2047                 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2048                     FH4_ATTRDIR | FH4_NAMEDATTR);
2049         else
2050                 sargp->xattr = 0;
2051
2052         /*
2053          * Set rdattr_error_req to true if return error per
2054          * failed entry rather than fail the readdir.
2055          */
2056         if (breq & FATTR4_RDATTR_ERROR_MASK)
2057                 sargp->rdattr_error_req = 1;
2058         else
2059                 sargp->rdattr_error_req = 0;
2060
2061         /*
2062          * generate the va_mask
2063          * Handle the easy cases first
2064          */
2065         switch (breq) {
2066         case NFS4_NTOV_ATTR_MASK:
2067                 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2068                 return (NFS4_OK);
2069
2070         case NFS4_FS_ATTR_MASK:
2071                 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2072                 sargp->sbp = sbp;
2073                 return (NFS4_OK);
2074
2075         case NFS4_NTOV_ATTR_CACHE_MASK:
2076                 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2077                 return (NFS4_OK);
2078
2079         case FATTR4_LEASE_TIME_MASK:
2080                 sargp->vap->va_mask = 0;
2081                 return (NFS4_OK);
2082
2083         default:
2084                 va_mask = 0;
2085                 for (i = 0; i < nfs4_ntov_map_size; i++) {
2086                         if ((breq & nfs4_ntov_map[i].fbit) &&
2087                             nfs4_ntov_map[i].vbit)
2088                                 va_mask |= nfs4_ntov_map[i].vbit;
2089                 }
2090
2091                 /*
2092                  * Check is vfsstat is needed
2093                  */
2094                 if (breq & NFS4_FS_ATTR_MASK)
2095                         sargp->sbp = sbp;
2096
2097                 sargp->vap->va_mask = va_mask;
2098                 return (NFS4_OK);
2099         }
2100         /* NOTREACHED */
2101 }
2102
2103 /*
2104  * bitmap4_get_sysattrs is called by getattr and readdir.
2105  * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2106  * Returns nfsv4 status.
2107  */
2108 static nfsstat4
2109 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2110 {
2111         int error;
2112         struct compound_state *cs = sargp->cs;
2113         vnode_t *vp = cs->vp;
2114
2115         if (sargp->sbp != NULL) {
2116                 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2117                         sargp->sbp = NULL;      /* to identify error */
2118                         return (puterrno4(error));
2119                 }
2120         }
2121
2122         return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2123 }
2124
2125 static void
2126 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2127 {
2128         ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2129             KM_SLEEP);
2130         ntovp->attrcnt = 0;
2131         ntovp->vfsstat = FALSE;
2132 }
2133
2134 static void
2135 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2136     struct nfs4_svgetit_arg *sargp)
2137 {
2138         int i;
2139         union nfs4_attr_u *na;
2140         uint8_t *amap;
2141
2142         /*
2143          * XXX Should do the same checks for whether the bit is set
2144          */
2145         for (i = 0, na = ntovp->na, amap = ntovp->amap;
2146             i < ntovp->attrcnt; i++, na++, amap++) {
2147                 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2148                     NFS4ATTR_FREEIT, sargp, na);
2149         }
2150         if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2151                 /*
2152                  * xdr_free for getattr will be done later
2153                  */
2154                 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2155                     i < ntovp->attrcnt; i++, na++, amap++) {
2156                         xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2157                 }
2158         }
2159         kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2160 }
2161
2162 /*
2163  * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2164  */
2165 static nfsstat4
2166 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2167     struct nfs4_svgetit_arg *sargp)
2168 {
2169         int error = 0;
2170         int i, k;
2171         struct nfs4_ntov_table ntov;
2172         XDR xdr;
2173         ulong_t xdr_size;
2174         char *xdr_attrs;
2175         nfsstat4 status = NFS4_OK;
2176         nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2177         union nfs4_attr_u *na;
2178         uint8_t *amap;
2179
2180         sargp->op = NFS4ATTR_GETIT;
2181         sargp->flag = 0;
2182
2183         fattrp->attrmask = 0;
2184         /* if no bits requested, then return empty fattr4 */
2185         if (breq == 0) {
2186                 fattrp->attrlist4_len = 0;
2187                 fattrp->attrlist4 = NULL;
2188                 return (NFS4_OK);
2189         }
2190
2191         /*
2192          * return NFS4ERR_INVAL when client requests write-only attrs
2193          */
2194         if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2195                 return (NFS4ERR_INVAL);
2196
2197         nfs4_ntov_table_init(&ntov);
2198         na = ntov.na;
2199         amap = ntov.amap;
2200
2201         /*
2202          * Now loop to get or verify the attrs
2203          */
2204         for (i = 0; i < nfs4_ntov_map_size; i++) {
2205                 if (breq & nfs4_ntov_map[i].fbit) {
2206                         if ((*nfs4_ntov_map[i].sv_getit)(
2207                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2208
2209                                 error = (*nfs4_ntov_map[i].sv_getit)(
2210                                     NFS4ATTR_GETIT, sargp, na);
2211
2212                                 /*
2213                                  * Possible error values:
2214                                  * >0 if sv_getit failed to
2215                                  * get the attr; 0 if succeeded;
2216                                  * <0 if rdattr_error and the
2217                                  * attribute cannot be returned.
2218                                  */
2219                                 if (error && !(sargp->rdattr_error_req))
2220                                         goto done;
2221                                 /*
2222                                  * If error then just for entry
2223                                  */
2224                                 if (error == 0) {
2225                                         fattrp->attrmask |=
2226                                             nfs4_ntov_map[i].fbit;
2227                                         *amap++ =
2228                                             (uint8_t)nfs4_ntov_map[i].nval;
2229                                         na++;
2230                                         (ntov.attrcnt)++;
2231                                 } else if ((error > 0) &&
2232                                     (sargp->rdattr_error == NFS4_OK)) {
2233                                         sargp->rdattr_error = puterrno4(error);
2234                                 }
2235                                 error = 0;
2236                         }
2237                 }
2238         }
2239
2240         /*
2241          * If rdattr_error was set after the return value for it was assigned,
2242          * update it.
2243          */
2244         if (prev_rdattr_error != sargp->rdattr_error) {
2245                 na = ntov.na;
2246                 amap = ntov.amap;
2247                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2248                         k = *amap;
2249                         if (k < FATTR4_RDATTR_ERROR) {
2250                                 continue;
2251                         }
2252                         if ((k == FATTR4_RDATTR_ERROR) &&
2253                             ((*nfs4_ntov_map[k].sv_getit)(
2254                             NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2255
2256                                 (void) (*nfs4_ntov_map[k].sv_getit)(
2257                                     NFS4ATTR_GETIT, sargp, na);
2258                         }
2259                         break;
2260                 }
2261         }
2262
2263         xdr_size = 0;
2264         na = ntov.na;
2265         amap = ntov.amap;
2266         for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2267                 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2268         }
2269
2270         fattrp->attrlist4_len = xdr_size;
2271         if (xdr_size) {
2272                 /* freed by rfs4_op_getattr_free() */
2273                 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2274
2275                 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2276
2277                 na = ntov.na;
2278                 amap = ntov.amap;
2279                 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2280                         if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2281                                 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2282                                     int, *amap);
2283                                 status = NFS4ERR_SERVERFAULT;
2284                                 break;
2285                         }
2286                 }
2287                 /* xdrmem_destroy(&xdrs); */    /* NO-OP */
2288         } else {
2289                 fattrp->attrlist4 = NULL;
2290         }
2291 done:
2292
2293         nfs4_ntov_table_free(&ntov, sargp);
2294
2295         if (error != 0)
2296                 status = puterrno4(error);
2297
2298         return (status);
2299 }
2300
2301 /* ARGSUSED */
2302 static void
2303 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2304     struct compound_state *cs)
2305 {
2306         GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2307         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2308         struct nfs4_svgetit_arg sarg;
2309         struct statvfs64 sb;
2310         nfsstat4 status;
2311
2312         DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2313             GETATTR4args *, args);
2314
2315         if (cs->vp == NULL) {
2316                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2317                 goto out;
2318         }
2319
2320         if (cs->access == CS_ACCESS_DENIED) {
2321                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2322                 goto out;
2323         }
2324
2325         sarg.sbp = &sb;
2326         sarg.cs = cs;
2327         sarg.is_referral = B_FALSE;
2328
2329         status = bitmap4_to_attrmask(args->attr_request, &sarg);
2330         if (status == NFS4_OK) {
2331
2332                 status = bitmap4_get_sysattrs(&sarg);
2333                 if (status == NFS4_OK) {
2334
2335                         /* Is this a referral? */
2336                         if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2337                                 /* Older V4 Solaris client sees a link */
2338                                 if (client_is_downrev(req))
2339                                         sarg.vap->va_type = VLNK;
2340                                 else
2341                                         sarg.is_referral = B_TRUE;
2342                         }
2343
2344                         status = do_rfs4_op_getattr(args->attr_request,
2345                             &resp->obj_attributes, &sarg);
2346                 }
2347         }
2348         *cs->statusp = resp->status = status;
2349 out:
2350         DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2351             GETATTR4res *, resp);
2352 }
2353
2354 static void
2355 rfs4_op_getattr_free(nfs_resop4 *resop)
2356 {
2357         GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2358
2359         nfs4_fattr4_free(&resp->obj_attributes);
2360 }
2361
2362 /* ARGSUSED */
2363 static void
2364 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2365     struct compound_state *cs)
2366 {
2367         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2368
2369         DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2370
2371         if (cs->vp == NULL) {
2372                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2373                 goto out;
2374         }
2375         if (cs->access == CS_ACCESS_DENIED) {
2376                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2377                 goto out;
2378         }
2379
2380         /* check for reparse point at the share point */
2381         if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2382                 /* it's all bad */
2383                 cs->exi->exi_moved = 1;
2384                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2385                 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2386                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2387                 return;
2388         }
2389
2390         /* check for reparse point at vp */
2391         if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2392                 /* it's not all bad */
2393                 *cs->statusp = resp->status = NFS4ERR_MOVED;
2394                 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2395                     vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2396                 return;
2397         }
2398
2399         resp->object.nfs_fh4_val =
2400             kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2401         nfs_fh4_copy(&cs->fh, &resp->object);
2402         *cs->statusp = resp->status = NFS4_OK;
2403 out:
2404         DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2405             GETFH4res *, resp);
2406 }
2407
2408 static void
2409 rfs4_op_getfh_free(nfs_resop4 *resop)
2410 {
2411         GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2412
2413         if (resp->status == NFS4_OK &&
2414             resp->object.nfs_fh4_val != NULL) {
2415                 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2416                 resp->object.nfs_fh4_val = NULL;
2417                 resp->object.nfs_fh4_len = 0;
2418         }
2419 }
2420
2421 /*
2422  * illegal: args: void
2423  *          res : status (NFS4ERR_OP_ILLEGAL)
2424  */
2425 /* ARGSUSED */
2426 static void
2427 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2428     struct svc_req *req, struct compound_state *cs)
2429 {
2430         ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2431
2432         resop->resop = OP_ILLEGAL;
2433         *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2434 }
2435
2436 /*
2437  * link: args: SAVED_FH: file, CURRENT_FH: target directory
2438  *       res: status. If success - CURRENT_FH unchanged, return change_info
2439  */
2440 /* ARGSUSED */
2441 static void
2442 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2443     struct compound_state *cs)
2444 {
2445         LINK4args *args = &argop->nfs_argop4_u.oplink;
2446         LINK4res *resp = &resop->nfs_resop4_u.oplink;
2447         int error;
2448         vnode_t *vp;
2449         vnode_t *dvp;
2450         struct vattr bdva, idva, adva;
2451         char *nm;
2452         uint_t  len;
2453         struct sockaddr *ca;
2454         char *name = NULL;
2455         nfsstat4 status;
2456
2457         DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2458             LINK4args *, args);
2459
2460         /* SAVED_FH: source object */
2461         vp = cs->saved_vp;
2462         if (vp == NULL) {
2463                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464                 goto out;
2465         }
2466
2467         /* CURRENT_FH: target directory */
2468         dvp = cs->vp;
2469         if (dvp == NULL) {
2470                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2471                 goto out;
2472         }
2473
2474         /*
2475          * If there is a non-shared filesystem mounted on this vnode,
2476          * do not allow to link any file in this directory.
2477          */
2478         if (vn_ismntpt(dvp)) {
2479                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2480                 goto out;
2481         }
2482
2483         if (cs->access == CS_ACCESS_DENIED) {
2484                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2485                 goto out;
2486         }
2487
2488         /* Check source object's type validity */
2489         if (vp->v_type == VDIR) {
2490                 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2491                 goto out;
2492         }
2493
2494         /* Check target directory's type */
2495         if (dvp->v_type != VDIR) {
2496                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2497                 goto out;
2498         }
2499
2500         if (cs->saved_exi != cs->exi) {
2501                 *cs->statusp = resp->status = NFS4ERR_XDEV;
2502                 goto out;
2503         }
2504
2505         status = utf8_dir_verify(&args->newname);
2506         if (status != NFS4_OK) {
2507                 *cs->statusp = resp->status = status;
2508                 goto out;
2509         }
2510
2511         nm = utf8_to_fn(&args->newname, &len, NULL);
2512         if (nm == NULL) {
2513                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2514                 goto out;
2515         }
2516
2517         if (len > MAXNAMELEN) {
2518                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2519                 kmem_free(nm, len);
2520                 goto out;
2521         }
2522
2523         if (rdonly4(cs->exi, cs->vp, req)) {
2524                 *cs->statusp = resp->status = NFS4ERR_ROFS;
2525                 kmem_free(nm, len);
2526                 goto out;
2527         }
2528
2529         /* Get "before" change value */
2530         bdva.va_mask = AT_CTIME|AT_SEQ;
2531         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2532         if (error) {
2533                 *cs->statusp = resp->status = puterrno4(error);
2534                 kmem_free(nm, len);
2535                 goto out;
2536         }
2537
2538         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2539         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2540             MAXPATHLEN  + 1);
2541
2542         if (name == NULL) {
2543                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2544                 kmem_free(nm, len);
2545                 goto out;
2546         }
2547
2548         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2549
2550         error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2551
2552         if (nm != name)
2553                 kmem_free(name, MAXPATHLEN + 1);
2554         kmem_free(nm, len);
2555
2556         /*
2557          * Get the initial "after" sequence number, if it fails, set to zero
2558          */
2559         idva.va_mask = AT_SEQ;
2560         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2561                 idva.va_seq = 0;
2562
2563         /*
2564          * Force modified data and metadata out to stable storage.
2565          */
2566         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2567         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2568
2569         if (error) {
2570                 *cs->statusp = resp->status = puterrno4(error);
2571                 goto out;
2572         }
2573
2574         /*
2575          * Get "after" change value, if it fails, simply return the
2576          * before value.
2577          */
2578         adva.va_mask = AT_CTIME|AT_SEQ;
2579         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2580                 adva.va_ctime = bdva.va_ctime;
2581                 adva.va_seq = 0;
2582         }
2583
2584         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2585
2586         /*
2587          * The cinfo.atomic = TRUE only if we have
2588          * non-zero va_seq's, and it has incremented by exactly one
2589          * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2590          */
2591         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2592             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2593                 resp->cinfo.atomic = TRUE;
2594         else
2595                 resp->cinfo.atomic = FALSE;
2596
2597         *cs->statusp = resp->status = NFS4_OK;
2598 out:
2599         DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2600             LINK4res *, resp);
2601 }
2602
2603 /*
2604  * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2605  */
2606
2607 /* ARGSUSED */
2608 static nfsstat4
2609 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2610 {
2611         int error;
2612         int different_export = 0;
2613         vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2614         struct exportinfo *exi = NULL, *pre_exi = NULL;
2615         nfsstat4 stat;
2616         fid_t fid;
2617         int attrdir, dotdot, walk;
2618         bool_t is_newvp = FALSE;
2619
2620         if (cs->vp->v_flag & V_XATTRDIR) {
2621                 attrdir = 1;
2622                 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2623         } else {
2624                 attrdir = 0;
2625                 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2626         }
2627
2628         dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2629
2630         /*
2631          * If dotdotting, then need to check whether it's
2632          * above the root of a filesystem, or above an
2633          * export point.
2634          */
2635         if (dotdot) {
2636
2637                 /*
2638                  * If dotdotting at the root of a filesystem, then
2639                  * need to traverse back to the mounted-on filesystem
2640                  * and do the dotdot lookup there.
2641                  */
2642                 if (cs->vp->v_flag & VROOT) {
2643
2644                         /*
2645                          * If at the system root, then can
2646                          * go up no further.
2647                          */
2648                         if (VN_CMP(cs->vp, rootdir))
2649                                 return (puterrno4(ENOENT));
2650
2651                         /*
2652                          * Traverse back to the mounted-on filesystem
2653                          */
2654                         cs->vp = untraverse(cs->vp);
2655
2656                         /*
2657                          * Set the different_export flag so we remember
2658                          * to pick up a new exportinfo entry for
2659                          * this new filesystem.
2660                          */
2661                         different_export = 1;
2662                 } else {
2663
2664                         /*
2665                          * If dotdotting above an export point then set
2666                          * the different_export to get new export info.
2667                          */
2668                         different_export = nfs_exported(cs->exi, cs->vp);
2669                 }
2670         }
2671
2672         error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2673             NULL, NULL, NULL);
2674         if (error)
2675                 return (puterrno4(error));
2676
2677         /*
2678          * If the vnode is in a pseudo filesystem, check whether it is visible.
2679          *
2680          * XXX if the vnode is a symlink and it is not visible in
2681          * a pseudo filesystem, return ENOENT (not following symlink).
2682          * V4 client can not mount such symlink. This is a regression
2683          * from V2/V3.
2684          *
2685          * In the same exported filesystem, if the security flavor used
2686          * is not an explicitly shared flavor, limit the view to the visible
2687          * list entries only. This is not a WRONGSEC case because it's already
2688          * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2689          */
2690         if (!different_export &&
2691             (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2692             cs->access & CS_ACCESS_LIMITED)) {
2693                 if (! nfs_visible(cs->exi, vp, &different_export)) {
2694                         VN_RELE(vp);
2695                         return (puterrno4(ENOENT));
2696                 }
2697         }
2698
2699         /*
2700          * If it's a mountpoint, then traverse it.
2701          */
2702         if (vn_ismntpt(vp)) {
2703                 pre_exi = cs->exi;      /* save pre-traversed exportinfo */
2704                 pre_tvp = vp;           /* save pre-traversed vnode     */
2705
2706                 /*
2707                  * hold pre_tvp to counteract rele by traverse.  We will
2708                  * need pre_tvp below if checkexport4 fails
2709                  */
2710                 VN_HOLD(pre_tvp);
2711                 tvp = vp;
2712                 if ((error = traverse(&tvp)) != 0) {
2713                         VN_RELE(vp);
2714                         VN_RELE(pre_tvp);
2715                         return (puterrno4(error));
2716                 }
2717                 vp = tvp;
2718                 different_export = 1;
2719         } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2720                 /*
2721                  * The vfsp comparison is to handle the case where
2722                  * a LOFS mount is shared.  lo_lookup traverses mount points,
2723                  * and NFS is unaware of local fs transistions because
2724                  * v_vfsmountedhere isn't set.  For this special LOFS case,
2725                  * the dir and the obj returned by lookup will have different
2726                  * vfs ptrs.
2727                  */
2728                 different_export = 1;
2729         }
2730
2731         if (different_export) {
2732
2733                 bzero(&fid, sizeof (fid));
2734                 fid.fid_len = MAXFIDSZ;
2735                 error = vop_fid_pseudo(vp, &fid);
2736                 if (error) {
2737                         VN_RELE(vp);
2738                         if (pre_tvp)
2739                                 VN_RELE(pre_tvp);
2740                         return (puterrno4(error));
2741                 }
2742
2743                 if (dotdot)
2744                         exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2745                 else
2746                         exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2747
2748                 if (exi == NULL) {
2749                         if (pre_tvp) {
2750                                 /*
2751                                  * If this vnode is a mounted-on vnode,
2752                                  * but the mounted-on file system is not
2753                                  * exported, send back the filehandle for
2754                                  * the mounted-on vnode, not the root of
2755                                  * the mounted-on file system.
2756                                  */
2757                                 VN_RELE(vp);
2758                                 vp = pre_tvp;
2759                                 exi = pre_exi;
2760                         } else {
2761                                 VN_RELE(vp);
2762                                 return (puterrno4(EACCES));
2763                         }
2764                 } else if (pre_tvp) {
2765                         /* we're done with pre_tvp now. release extra hold */
2766                         VN_RELE(pre_tvp);
2767                 }
2768
2769                 cs->exi = exi;
2770
2771                 /*
2772                  * Now we do a checkauth4. The reason is that
2773                  * this client/user may not have access to the new
2774                  * exported file system, and if he does,
2775                  * the client/user may be mapped to a different uid.
2776                  *
2777                  * We start with a new cr, because the checkauth4 done
2778                  * in the PUT*FH operation over wrote the cred's uid,
2779                  * gid, etc, and we want the real thing before calling
2780                  * checkauth4()
2781                  */
2782                 crfree(cs->cr);
2783                 cs->cr = crdup(cs->basecr);
2784
2785                 oldvp = cs->vp;
2786                 cs->vp = vp;
2787                 is_newvp = TRUE;
2788
2789                 stat = call_checkauth4(cs, req);
2790                 if (stat != NFS4_OK) {
2791                         VN_RELE(cs->vp);
2792                         cs->vp = oldvp;
2793                         return (stat);
2794                 }
2795         }
2796
2797         /*
2798          * After various NFS checks, do a label check on the path
2799          * component. The label on this path should either be the
2800          * global zone's label or a zone's label. We are only
2801          * interested in the zone's label because exported files
2802          * in global zone is accessible (though read-only) to
2803          * clients. The exportability/visibility check is already
2804          * done before reaching this code.
2805          */
2806         if (is_system_labeled()) {
2807                 bslabel_t *clabel;
2808
2809                 ASSERT(req->rq_label != NULL);
2810                 clabel = req->rq_label;
2811                 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2812                     "got client label from request(1)", struct svc_req *, req);
2813
2814                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2815                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2816                             cs->exi)) {
2817                                 error = EACCES;
2818                                 goto err_out;
2819                         }
2820                 } else {
2821                         /*
2822                          * We grant access to admin_low label clients
2823                          * only if the client is trusted, i.e. also
2824                          * running Solaris Trusted Extension.
2825                          */
2826                         struct sockaddr *ca;
2827                         int             addr_type;
2828                         void            *ipaddr;
2829                         tsol_tpc_t      *tp;
2830
2831                         ca = (struct sockaddr *)svc_getrpccaller(
2832                             req->rq_xprt)->buf;
2833                         if (ca->sa_family == AF_INET) {
2834                                 addr_type = IPV4_VERSION;
2835                                 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2836                         } else if (ca->sa_family == AF_INET6) {
2837                                 addr_type = IPV6_VERSION;
2838                                 ipaddr = &((struct sockaddr_in6 *)
2839                                     ca)->sin6_addr;
2840                         }
2841                         tp = find_tpc(ipaddr, addr_type, B_FALSE);
2842                         if (tp == NULL || tp->tpc_tp.tp_doi !=
2843                             l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2844                             SUN_CIPSO) {
2845                                 if (tp != NULL)
2846                                         TPC_RELE(tp);
2847                                 error = EACCES;
2848                                 goto err_out;
2849                         }
2850                         TPC_RELE(tp);
2851                 }
2852         }
2853
2854         error = makefh4(&cs->fh, vp, cs->exi);
2855
2856 err_out:
2857         if (error) {
2858                 if (is_newvp) {
2859                         VN_RELE(cs->vp);
2860                         cs->vp = oldvp;
2861                 } else
2862                         VN_RELE(vp);
2863                 return (puterrno4(error));
2864         }
2865
2866         if (!is_newvp) {
2867                 if (cs->vp)
2868                         VN_RELE(cs->vp);
2869                 cs->vp = vp;
2870         } else if (oldvp)
2871                 VN_RELE(oldvp);
2872
2873         /*
2874          * if did lookup on attrdir and didn't lookup .., set named
2875          * attr fh flag
2876          */
2877         if (attrdir && ! dotdot)
2878                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2879
2880         /* Assume false for now, open proc will set this */
2881         cs->mandlock = FALSE;
2882
2883         return (NFS4_OK);
2884 }
2885
2886 /* ARGSUSED */
2887 static void
2888 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2889     struct compound_state *cs)
2890 {
2891         LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2892         LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2893         char *nm;
2894         uint_t len;
2895         struct sockaddr *ca;
2896         char *name = NULL;
2897         nfsstat4 status;
2898
2899         DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2900             LOOKUP4args *, args);
2901
2902         if (cs->vp == NULL) {
2903                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2904                 goto out;
2905         }
2906
2907         if (cs->vp->v_type == VLNK) {
2908                 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2909                 goto out;
2910         }
2911
2912         if (cs->vp->v_type != VDIR) {
2913                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2914                 goto out;
2915         }
2916
2917         status = utf8_dir_verify(&args->objname);
2918         if (status != NFS4_OK) {
2919                 *cs->statusp = resp->status = status;
2920                 goto out;
2921         }
2922
2923         nm = utf8_to_str(&args->objname, &len, NULL);
2924         if (nm == NULL) {
2925                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2926                 goto out;
2927         }
2928
2929         if (len > MAXNAMELEN) {
2930                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2931                 kmem_free(nm, len);
2932                 goto out;
2933         }
2934
2935         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2936         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2937             MAXPATHLEN  + 1);
2938
2939         if (name == NULL) {
2940                 *cs->statusp = resp->status = NFS4ERR_INVAL;
2941                 kmem_free(nm, len);
2942                 goto out;
2943         }
2944
2945         *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2946
2947         if (name != nm)
2948                 kmem_free(name, MAXPATHLEN + 1);
2949         kmem_free(nm, len);
2950
2951 out:
2952         DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2953             LOOKUP4res *, resp);
2954 }
2955
2956 /* ARGSUSED */
2957 static void
2958 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2959     struct compound_state *cs)
2960 {
2961         LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2962
2963         DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2964
2965         if (cs->vp == NULL) {
2966                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2967                 goto out;
2968         }
2969
2970         if (cs->vp->v_type != VDIR) {
2971                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2972                 goto out;
2973         }
2974
2975         *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2976
2977         /*
2978          * From NFSV4 Specification, LOOKUPP should not check for
2979          * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2980          */
2981         if (resp->status == NFS4ERR_WRONGSEC) {
2982                 *cs->statusp = resp->status = NFS4_OK;
2983         }
2984
2985 out:
2986         DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2987             LOOKUPP4res *, resp);
2988 }
2989
2990
2991 /*ARGSUSED2*/
2992 static void
2993 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2994     struct compound_state *cs)
2995 {
2996         OPENATTR4args   *args = &argop->nfs_argop4_u.opopenattr;
2997         OPENATTR4res    *resp = &resop->nfs_resop4_u.opopenattr;
2998         vnode_t         *avp = NULL;
2999         int             lookup_flags = LOOKUP_XATTR, error;
3000         int             exp_ro = 0;
3001
3002         DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
3003             OPENATTR4args *, args);
3004
3005         if (cs->vp == NULL) {
3006                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3007                 goto out;
3008         }
3009
3010         if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3011             !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3012                 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3013                 goto out;
3014         }
3015
3016         /*
3017          * If file system supports passing ACE mask to VOP_ACCESS then
3018          * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3019          */
3020
3021         if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3022                 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3023                     V_ACE_MASK, cs->cr, NULL);
3024         else
3025                 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3026                     (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3027                     (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3028
3029         if (error) {
3030                 *cs->statusp = resp->status = puterrno4(EACCES);
3031                 goto out;
3032         }
3033
3034         /*
3035          * The CREATE_XATTR_DIR VOP flag cannot be specified if
3036          * the file system is exported read-only -- regardless of
3037          * createdir flag.  Otherwise the attrdir would be created
3038          * (assuming server fs isn't mounted readonly locally).  If
3039          * VOP_LOOKUP returns ENOENT in this case, the error will
3040          * be translated into EROFS.  ENOSYS is mapped to ENOTSUP
3041          * because specfs has no VOP_LOOKUP op, so the macro would
3042          * return ENOSYS.  EINVAL is returned by all (current)
3043          * Solaris file system implementations when any of their
3044          * restrictions are violated (xattr(dir) can't have xattrdir).
3045          * Returning NOTSUPP is more appropriate in this case
3046          * because the object will never be able to have an attrdir.
3047          */
3048         if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
3049                 lookup_flags |= CREATE_XATTR_DIR;
3050
3051         error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3052             NULL, NULL, NULL);
3053
3054         if (error) {
3055                 if (error == ENOENT && args->createdir && exp_ro)
3056                         *cs->statusp = resp->status = puterrno4(EROFS);
3057                 else if (error == EINVAL || error == ENOSYS)
3058                         *cs->statusp = resp->status = puterrno4(ENOTSUP);
3059                 else
3060                         *cs->statusp = resp->status = puterrno4(error);
3061                 goto out;
3062         }
3063
3064         ASSERT(avp->v_flag & V_XATTRDIR);
3065
3066         error = makefh4(&cs->fh, avp, cs->exi);
3067
3068         if (error) {
3069                 VN_RELE(avp);
3070                 *cs->statusp = resp->status = puterrno4(error);
3071                 goto out;
3072         }
3073
3074         VN_RELE(cs->vp);
3075         cs->vp = avp;
3076
3077         /*
3078          * There is no requirement for an attrdir fh flag
3079          * because the attrdir has a vnode flag to distinguish
3080          * it from regular (non-xattr) directories.  The
3081          * FH4_ATTRDIR flag is set for future sanity checks.
3082          */
3083         set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3084         *cs->statusp = resp->status = NFS4_OK;
3085
3086 out:
3087         DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3088             OPENATTR4res *, resp);
3089 }
3090
3091 static int
3092 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3093     caller_context_t *ct)
3094 {
3095         int error;
3096         int i;
3097         clock_t delaytime;
3098
3099         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3100
3101         /*
3102          * Don't block on mandatory locks. If this routine returns
3103          * EAGAIN, the caller should return NFS4ERR_LOCKED.
3104          */
3105         uio->uio_fmode = FNONBLOCK;
3106
3107         for (i = 0; i < rfs4_maxlock_tries; i++) {
3108
3109
3110                 if (direction == FREAD) {
3111                         (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3112                         error = VOP_READ(vp, uio, ioflag, cred, ct);
3113                         VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3114                 } else {
3115                         (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3116                         error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3117                         VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3118                 }
3119
3120                 if (error != EAGAIN)
3121                         break;
3122
3123                 if (i < rfs4_maxlock_tries - 1) {
3124                         delay(delaytime);
3125                         delaytime *= 2;
3126                 }
3127         }
3128
3129         return (error);
3130 }
3131
3132 /* ARGSUSED */
3133 static void
3134 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3135     struct compound_state *cs)
3136 {
3137         READ4args *args = &argop->nfs_argop4_u.opread;
3138         READ4res *resp = &resop->nfs_resop4_u.opread;
3139         int error;
3140         int verror;
3141         vnode_t *vp;
3142         struct vattr va;
3143         struct iovec iov;
3144         struct uio uio;
3145         u_offset_t offset;
3146         bool_t *deleg = &cs->deleg;
3147         nfsstat4 stat;
3148         int in_crit = 0;
3149         mblk_t *mp = NULL;
3150         int alloc_err = 0;
3151         int rdma_used = 0;
3152         int loaned_buffers;
3153         caller_context_t ct;
3154         struct uio *uiop;
3155
3156         DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3157             READ4args, args);
3158
3159         vp = cs->vp;
3160         if (vp == NULL) {
3161                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3162                 goto out;
3163         }
3164         if (cs->access == CS_ACCESS_DENIED) {
3165                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3166                 goto out;
3167         }
3168
3169         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3170             deleg, TRUE, &ct)) != NFS4_OK) {
3171                 *cs->statusp = resp->status = stat;
3172                 goto out;
3173         }
3174
3175         /*
3176          * Enter the critical region before calling VOP_RWLOCK
3177          * to avoid a deadlock with write requests.
3178          */
3179         if (nbl_need_check(vp)) {
3180                 nbl_start_crit(vp, RW_READER);
3181                 in_crit = 1;
3182                 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3183                     &ct)) {
3184                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
3185                         goto out;
3186                 }
3187         }
3188
3189         if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3190             deleg, TRUE, &ct)) != NFS4_OK) {
3191                 *cs->statusp = resp->status = stat;
3192                 goto out;
3193         }
3194
3195         if (args->wlist) {
3196                 if (args->count > clist_len(args->wlist)) {
3197                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3198                         goto out;
3199                 }
3200                 rdma_used = 1;
3201         }
3202
3203         /* use loaned buffers for TCP */
3204         loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3205
3206         va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3207         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3208
3209         /*
3210          * If we can't get the attributes, then we can't do the
3211          * right access checking.  So, we'll fail the request.
3212          */
3213         if (verror) {
3214                 *cs->statusp = resp->status = puterrno4(verror);
3215                 goto out;
3216         }
3217
3218         if (vp->v_type != VREG) {
3219                 *cs->statusp = resp->status =
3220                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3221                 goto out;
3222         }
3223
3224         if (crgetuid(cs->cr) != va.va_uid &&
3225             (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3226             (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3227                 *cs->statusp = resp->status = puterrno4(error);
3228                 goto out;
3229         }
3230
3231         if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3232                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3233                 goto out;
3234         }
3235
3236         offset = args->offset;
3237         if (offset >= va.va_size) {
3238                 *cs->statusp = resp->status = NFS4_OK;
3239                 resp->eof = TRUE;
3240                 resp->data_len = 0;
3241                 resp->data_val = NULL;
3242                 resp->mblk = NULL;
3243                 /* RDMA */
3244                 resp->wlist = args->wlist;
3245                 resp->wlist_len = resp->data_len;
3246                 *cs->statusp = resp->status = NFS4_OK;
3247                 if (resp->wlist)
3248                         clist_zero_len(resp->wlist);
3249                 goto out;
3250         }
3251
3252         if (args->count == 0) {
3253                 *cs->statusp = resp->status = NFS4_OK;
3254                 resp->eof = FALSE;
3255                 resp->data_len = 0;
3256                 resp->data_val = NULL;
3257                 resp->mblk = NULL;
3258                 /* RDMA */
3259                 resp->wlist = args->wlist;
3260                 resp->wlist_len = resp->data_len;
3261                 if (resp->wlist)
3262                         clist_zero_len(resp->wlist);
3263                 goto out;
3264         }
3265
3266         /*
3267          * Do not allocate memory more than maximum allowed
3268          * transfer size
3269          */
3270         if (args->count > rfs4_tsize(req))
3271                 args->count = rfs4_tsize(req);
3272
3273         if (loaned_buffers) {
3274                 uiop = (uio_t *)rfs_setup_xuio(vp);
3275                 ASSERT(uiop != NULL);
3276                 uiop->uio_segflg = UIO_SYSSPACE;
3277                 uiop->uio_loffset = args->offset;
3278                 uiop->uio_resid = args->count;
3279
3280                 /* Jump to do the read if successful */
3281                 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3282                         /*
3283                          * Need to hold the vnode until after VOP_RETZCBUF()
3284                          * is called.
3285                          */
3286                         VN_HOLD(vp);
3287                         goto doio_read;
3288                 }
3289
3290                 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3291                     uiop->uio_loffset, int, uiop->uio_resid);
3292
3293                 uiop->uio_extflg = 0;
3294
3295                 /* failure to setup for zero copy */
3296                 rfs_free_xuio((void *)uiop);
3297                 loaned_buffers = 0;
3298         }
3299
3300         /*
3301          * If returning data via RDMA Write, then grab the chunk list. If we
3302          * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3303          */
3304         if (rdma_used) {
3305                 mp = NULL;
3306                 (void) rdma_get_wchunk(req, &iov, args->wlist);
3307         } else {
3308                 /*
3309                  * mp will contain the data to be sent out in the read reply.
3310                  * It will be freed after the reply has been sent. Let's
3311                  * roundup the data to a BYTES_PER_XDR_UNIT multiple, so that
3312                  * the call to xdrmblk_putmblk() never fails. If the first
3313                  * alloc of the requested size fails, then decrease the size to
3314                  * something more reasonable and wait for the allocation to
3315                  * occur.
3316                  */
3317                 mp = allocb(RNDUP(args->count), BPRI_MED);
3318                 if (mp == NULL) {
3319                         if (args->count > MAXBSIZE)
3320                                 args->count = MAXBSIZE;
3321                         mp = allocb_wait(RNDUP(args->count), BPRI_MED,
3322                             STR_NOSIG, &alloc_err);
3323                 }
3324                 ASSERT(mp != NULL);
3325                 ASSERT(alloc_err == 0);
3326
3327                 iov.iov_base = (caddr_t)mp->b_datap->db_base;
3328                 iov.iov_len = args->count;
3329         }
3330
3331         uio.uio_iov = &iov;
3332         uio.uio_iovcnt = 1;
3333         uio.uio_segflg = UIO_SYSSPACE;
3334         uio.uio_extflg = UIO_COPY_CACHED;
3335         uio.uio_loffset = args->offset;
3336         uio.uio_resid = args->count;
3337         uiop = &uio;
3338
3339 doio_read:
3340         error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3341
3342         va.va_mask = AT_SIZE;
3343         verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3344
3345         if (error) {
3346                 if (mp)
3347                         freemsg(mp);
3348                 *cs->statusp = resp->status = puterrno4(error);
3349                 goto out;
3350         }
3351
3352         /* make mblk using zc buffers */
3353         if (loaned_buffers) {
3354                 mp = uio_to_mblk(uiop);
3355                 ASSERT(mp != NULL);
3356         }
3357
3358         *cs->statusp = resp->status = NFS4_OK;
3359
3360         ASSERT(uiop->uio_resid >= 0);
3361         resp->data_len = args->count - uiop->uio_resid;
3362         if (mp) {
3363                 resp->data_val = (char *)mp->b_datap->db_base;
3364                 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3365         } else {
3366                 resp->data_val = (caddr_t)iov.iov_base;
3367         }
3368
3369         resp->mblk = mp;
3370
3371         if (!verror && offset + resp->data_len == va.va_size)
3372                 resp->eof = TRUE;
3373         else
3374                 resp->eof = FALSE;
3375
3376         if (rdma_used) {
3377                 if (!rdma_setup_read_data4(args, resp)) {
3378                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3379                 }
3380         } else {
3381                 resp->wlist = NULL;
3382         }
3383
3384 out:
3385         if (in_crit)
3386                 nbl_end_crit(vp);
3387
3388         DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3389             READ4res *, resp);
3390 }
3391
3392 static void
3393 rfs4_op_read_free(nfs_resop4 *resop)
3394 {
3395         READ4res        *resp = &resop->nfs_resop4_u.opread;
3396
3397         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3398                 freemsg(resp->mblk);
3399                 resp->mblk = NULL;
3400                 resp->data_val = NULL;
3401                 resp->data_len = 0;
3402         }
3403 }
3404
3405 static void
3406 rfs4_op_readdir_free(nfs_resop4 * resop)
3407 {
3408         READDIR4res    *resp = &resop->nfs_resop4_u.opreaddir;
3409
3410         if (resp->status == NFS4_OK && resp->mblk != NULL) {
3411                 freeb(resp->mblk);
3412                 resp->mblk = NULL;
3413                 resp->data_len = 0;
3414         }
3415 }
3416
3417
3418 /* ARGSUSED */
3419 static void
3420 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3421     struct compound_state *cs)
3422 {
3423         PUTPUBFH4res    *resp = &resop->nfs_resop4_u.opputpubfh;
3424         int             error;
3425         vnode_t         *vp;
3426         struct exportinfo *exi, *sav_exi;
3427         nfs_fh4_fmt_t   *fh_fmtp;
3428
3429         DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3430
3431         if (cs->vp) {
3432                 VN_RELE(cs->vp);
3433                 cs->vp = NULL;
3434         }
3435
3436         if (cs->cr)
3437                 crfree(cs->cr);
3438
3439         cs->cr = crdup(cs->basecr);
3440
3441         vp = exi_public->exi_vp;
3442         if (vp == NULL) {
3443                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3444                 goto out;
3445         }
3446
3447         error = makefh4(&cs->fh, vp, exi_public);
3448         if (error != 0) {
3449                 *cs->statusp = resp->status = puterrno4(error);
3450                 goto out;
3451         }
3452         sav_exi = cs->exi;
3453         if (exi_public == exi_root) {
3454                 /*
3455                  * No filesystem is actually shared public, so we default
3456                  * to exi_root. In this case, we must check whether root
3457                  * is exported.
3458                  */
3459                 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3460
3461                 /*
3462                  * if root filesystem is exported, the exportinfo struct that we
3463                  * should use is what checkexport4 returns, because root_exi is
3464                  * actually a mostly empty struct.
3465                  */
3466                 exi = checkexport4(&fh_fmtp->fh4_fsid,
3467                     (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3468                 cs->exi = ((exi != NULL) ? exi : exi_public);
3469         } else {
3470                 /*
3471                  * it's a properly shared filesystem
3472                  */
3473                 cs->exi = exi_public;
3474         }
3475
3476         if (is_system_labeled()) {
3477                 bslabel_t *clabel;
3478
3479                 ASSERT(req->rq_label != NULL);
3480                 clabel = req->rq_label;
3481                 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3482                     "got client label from request(1)",
3483                     struct svc_req *, req);
3484                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3485                         if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3486                             cs->exi)) {
3487                                 *cs->statusp = resp->status =
3488                                     NFS4ERR_SERVERFAULT;
3489                                 goto out;
3490                         }
3491                 }
3492         }
3493
3494         VN_HOLD(vp);
3495         cs->vp = vp;
3496
3497         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3498                 VN_RELE(cs->vp);
3499                 cs->vp = NULL;
3500                 cs->exi = sav_exi;
3501                 goto out;
3502         }
3503
3504         *cs->statusp = resp->status = NFS4_OK;
3505 out:
3506         DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3507             PUTPUBFH4res *, resp);
3508 }
3509
3510 /*
3511  * XXX - issue with put*fh operations. Suppose /export/home is exported.
3512  * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3513  * or joe have restrictive search permissions, then we shouldn't let
3514  * the client get a file handle. This is easy to enforce. However, we
3515  * don't know what security flavor should be used until we resolve the
3516  * path name. Another complication is uid mapping. If root is
3517  * the user, then it will be mapped to the anonymous user by default,
3518  * but we won't know that till we've resolved the path name. And we won't
3519  * know what the anonymous user is.
3520  * Luckily, SECINFO is specified to take a full filename.
3521  * So what we will have to in rfs4_op_lookup is check that flavor of
3522  * the target object matches that of the request, and if root was the
3523  * caller, check for the root= and anon= options, and if necessary,
3524  * repeat the lookup using the right cred_t. But that's not done yet.
3525  */
3526 /* ARGSUSED */
3527 static void
3528 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3529     struct compound_state *cs)
3530 {
3531         PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3532         PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3533         nfs_fh4_fmt_t *fh_fmtp;
3534
3535         DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3536             PUTFH4args *, args);
3537
3538         if (cs->vp) {
3539                 VN_RELE(cs->vp);
3540                 cs->vp = NULL;
3541         }
3542
3543         if (cs->cr) {
3544                 crfree(cs->cr);
3545                 cs->cr = NULL;
3546         }
3547
3548
3549         if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3550                 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3551                 goto out;
3552         }
3553
3554         fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3555         cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3556             NULL);
3557
3558         if (cs->exi == NULL) {
3559                 *cs->statusp = resp->status = NFS4ERR_STALE;
3560                 goto out;
3561         }
3562
3563         cs->cr = crdup(cs->basecr);
3564
3565         ASSERT(cs->cr != NULL);
3566
3567         if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3568                 *cs->statusp = resp->status;
3569                 goto out;
3570         }
3571
3572         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3573                 VN_RELE(cs->vp);
3574                 cs->vp = NULL;
3575                 goto out;
3576         }
3577
3578         nfs_fh4_copy(&args->object, &cs->fh);
3579         *cs->statusp = resp->status = NFS4_OK;
3580         cs->deleg = FALSE;
3581
3582 out:
3583         DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3584             PUTFH4res *, resp);
3585 }
3586
3587 /* ARGSUSED */
3588 static void
3589 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3590     struct compound_state *cs)
3591 {
3592         PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3593         int error;
3594         fid_t fid;
3595         struct exportinfo *exi, *sav_exi;
3596
3597         DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3598
3599         if (cs->vp) {
3600                 VN_RELE(cs->vp);
3601                 cs->vp = NULL;
3602         }
3603
3604         if (cs->cr)
3605                 crfree(cs->cr);
3606
3607         cs->cr = crdup(cs->basecr);
3608
3609         /*
3610          * Using rootdir, the system root vnode,
3611          * get its fid.
3612          */
3613         bzero(&fid, sizeof (fid));
3614         fid.fid_len = MAXFIDSZ;
3615         error = vop_fid_pseudo(rootdir, &fid);
3616         if (error != 0) {
3617                 *cs->statusp = resp->status = puterrno4(error);
3618                 goto out;
3619         }
3620
3621         /*
3622          * Then use the root fsid & fid it to find out if it's exported
3623          *
3624          * If the server root isn't exported directly, then
3625          * it should at least be a pseudo export based on
3626          * one or more exports further down in the server's
3627          * file tree.
3628          */
3629         exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3630         if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3631                 NFS4_DEBUG(rfs4_debug,
3632                     (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3633                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3634                 goto out;
3635         }
3636
3637         /*
3638          * Now make a filehandle based on the root
3639          * export and root vnode.
3640          */
3641         error = makefh4(&cs->fh, rootdir, exi);
3642         if (error != 0) {
3643                 *cs->statusp = resp->status = puterrno4(error);
3644                 goto out;
3645         }
3646
3647         sav_exi = cs->exi;
3648         cs->exi = exi;
3649
3650         VN_HOLD(rootdir);
3651         cs->vp = rootdir;
3652
3653         if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3654                 VN_RELE(rootdir);
3655                 cs->vp = NULL;
3656                 cs->exi = sav_exi;
3657                 goto out;
3658         }
3659
3660         *cs->statusp = resp->status = NFS4_OK;
3661         cs->deleg = FALSE;
3662 out:
3663         DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3664             PUTROOTFH4res *, resp);
3665 }
3666
3667 /*
3668  * A directory entry is a valid nfsv4 entry if
3669  * - it has a non-zero ino
3670  * - it is not a dot or dotdot name
3671  * - it is visible in a pseudo export or in a real export that can
3672  *   only have a limited view.
3673  */
3674 static bool_t
3675 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3676     int *expseudo, int check_visible)
3677 {
3678         if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3679                 *expseudo = 0;
3680                 return (FALSE);
3681         }
3682
3683         if (! check_visible) {
3684                 *expseudo = 0;
3685                 return (TRUE);
3686         }
3687
3688         return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3689 }
3690
3691 /*
3692  * set_rdattr_params sets up the variables used to manage what information
3693  * to get for each directory entry.
3694  */
3695 static nfsstat4
3696 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3697     bitmap4 attrs, bool_t *need_to_lookup)
3698 {
3699         uint_t  va_mask;
3700         nfsstat4 status;
3701         bitmap4 objbits;
3702
3703         status = bitmap4_to_attrmask(attrs, sargp);
3704         if (status != NFS4_OK) {
3705                 /*
3706                  * could not even figure attr mask
3707                  */
3708                 return (status);
3709         }
3710         va_mask = sargp->vap->va_mask;
3711
3712         /*
3713          * dirent's d_ino is always correct value for mounted_on_fileid.
3714          * mntdfid_set is set once here, but mounted_on_fileid is
3715          * set in main dirent processing loop for each dirent.
3716          * The mntdfid_set is a simple optimization that lets the
3717          * server attr code avoid work when caller is readdir.
3718          */
3719         sargp->mntdfid_set = TRUE;
3720
3721         /*
3722          * Lookup entry only if client asked for any of the following:
3723          * a) vattr attrs
3724          * b) vfs attrs
3725          * c) attrs w/per-object scope requested (change, filehandle, etc)
3726          *    other than mounted_on_fileid (which we can take from dirent)
3727          */
3728         objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3729
3730         if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3731                 *need_to_lookup = TRUE;
3732         else
3733                 *need_to_lookup = FALSE;
3734
3735         if (sargp->sbp == NULL)
3736                 return (NFS4_OK);
3737
3738         /*
3739          * If filesystem attrs are requested, get them now from the
3740          * directory vp, as most entries will have same filesystem. The only
3741          * exception are mounted over entries but we handle
3742          * those as we go (XXX mounted over detection not yet implemented).
3743          */
3744         sargp->vap->va_mask = 0;        /* to avoid VOP_GETATTR */
3745         status = bitmap4_get_sysattrs(sargp);
3746         sargp->vap->va_mask = va_mask;
3747
3748         if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3749                 /*
3750                  * Failed to get filesystem attributes.
3751                  * Return a rdattr_error for each entry, but don't fail.
3752                  * However, don't get any obj-dependent attrs.
3753                  */
3754                 sargp->rdattr_error = status;   /* for rdattr_error */
3755                 *need_to_lookup = FALSE;
3756                 /*
3757                  * At least get fileid for regular readdir output
3758                  */
3759                 sargp->vap->va_mask &= AT_NODEID;
3760                 status = NFS4_OK;
3761         }
3762
3763         return (status);
3764 }
3765
3766 /*
3767  * readlink: args: CURRENT_FH.
3768  *      res: status. If success - CURRENT_FH unchanged, return linktext.
3769  */
3770
3771 /* ARGSUSED */
3772 static void
3773 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3774     struct compound_state *cs)
3775 {
3776         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3777         int error;
3778         vnode_t *vp;
3779         struct iovec iov;
3780         struct vattr va;
3781         struct uio uio;
3782         char *data;
3783         struct sockaddr *ca;
3784         char *name = NULL;
3785         int is_referral;
3786
3787         DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3788
3789         /* CURRENT_FH: directory */
3790         vp = cs->vp;
3791         if (vp == NULL) {
3792                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3793                 goto out;
3794         }
3795
3796         if (cs->access == CS_ACCESS_DENIED) {
3797                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3798                 goto out;
3799         }
3800
3801         /* Is it a referral? */
3802         if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3803
3804                 is_referral = 1;
3805
3806         } else {
3807
3808                 is_referral = 0;
3809
3810                 if (vp->v_type == VDIR) {
3811                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
3812                         goto out;
3813                 }
3814
3815                 if (vp->v_type != VLNK) {
3816                         *cs->statusp = resp->status = NFS4ERR_INVAL;
3817                         goto out;
3818                 }
3819
3820         }
3821
3822         va.va_mask = AT_MODE;
3823         error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3824         if (error) {
3825                 *cs->statusp = resp->status = puterrno4(error);
3826                 goto out;
3827         }
3828
3829         if (MANDLOCK(vp, va.va_mode)) {
3830                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3831                 goto out;
3832         }
3833
3834         data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3835
3836         if (is_referral) {
3837                 char *s;
3838                 size_t strsz;
3839
3840                 /* Get an artificial symlink based on a referral */
3841                 s = build_symlink(vp, cs->cr, &strsz);
3842                 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3843                 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3844                     vnode_t *, vp, char *, s);
3845                 if (s == NULL)
3846                         error = EINVAL;
3847                 else {
3848                         error = 0;
3849                         (void) strlcpy(data, s, MAXPATHLEN + 1);
3850                         kmem_free(s, strsz);
3851                 }
3852
3853         } else {
3854
3855                 iov.iov_base = data;
3856                 iov.iov_len = MAXPATHLEN;
3857                 uio.uio_iov = &iov;
3858                 uio.uio_iovcnt = 1;
3859                 uio.uio_segflg = UIO_SYSSPACE;
3860                 uio.uio_extflg = UIO_COPY_CACHED;
3861                 uio.uio_loffset = 0;
3862                 uio.uio_resid = MAXPATHLEN;
3863
3864                 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3865
3866                 if (!error)
3867                         *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3868         }
3869
3870         if (error) {
3871                 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3872                 *cs->statusp = resp->status = puterrno4(error);
3873                 goto out;
3874         }
3875
3876         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3877         name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3878             MAXPATHLEN  + 1);
3879
3880         if (name == NULL) {
3881                 /*
3882                  * Even though the conversion failed, we return
3883                  * something. We just don't translate it.
3884                  */
3885                 name = data;
3886         }
3887
3888         /*
3889          * treat link name as data
3890          */
3891         (void) str_to_utf8(name, &resp->link);
3892
3893         if (name != data)
3894                 kmem_free(name, MAXPATHLEN + 1);
3895         kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3896         *cs->statusp = resp->status = NFS4_OK;
3897
3898 out:
3899         DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3900             READLINK4res *, resp);
3901 }
3902
3903 static void
3904 rfs4_op_readlink_free(nfs_resop4 *resop)
3905 {
3906         READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3907         utf8string *symlink = &resp->link;
3908
3909         if (symlink->utf8string_val) {
3910                 UTF8STRING_FREE(*symlink)
3911         }
3912 }
3913
3914 /*
3915  * release_lockowner:
3916  *      Release any state associated with the supplied
3917  *      lockowner. Note if any lo_state is holding locks we will not
3918  *      rele that lo_state and thus the lockowner will not be destroyed.
3919  *      A client using lock after the lock owner stateid has been released
3920  *      will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3921  *      to reissue the lock with new_lock_owner set to TRUE.
3922  *      args: lock_owner
3923  *      res:  status
3924  */
3925 /* ARGSUSED */
3926 static void
3927 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3928     struct svc_req *req, struct compound_state *cs)
3929 {
3930         RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3931         RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3932         rfs4_lockowner_t *lo;
3933         rfs4_openowner_t *oo;
3934         rfs4_state_t *sp;
3935         rfs4_lo_state_t *lsp;
3936         rfs4_client_t *cp;
3937         bool_t create = FALSE;
3938         locklist_t *llist;
3939         sysid_t sysid;
3940
3941         DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3942             cs, RELEASE_LOCKOWNER4args *, ap);
3943
3944         /* Make sure there is a clientid around for this request */
3945         cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3946
3947         if (cp == NULL) {
3948                 *cs->statusp = resp->status =
3949                     rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3950                 goto out;
3951         }
3952         rfs4_client_rele(cp);
3953
3954         lo = rfs4_findlockowner(&ap->lock_owner, &create);
3955         if (lo == NULL) {
3956                 *cs->statusp = resp->status = NFS4_OK;
3957                 goto out;
3958         }
3959         ASSERT(lo->rl_client != NULL);
3960
3961         /*
3962          * Check for EXPIRED client. If so will reap state with in a lease
3963          * period or on next set_clientid_confirm step
3964          */
3965         if (rfs4_lease_expired(lo->rl_client)) {
3966                 rfs4_lockowner_rele(lo);
3967                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3968                 goto out;
3969         }
3970
3971         /*
3972          * If no sysid has been assigned, then no locks exist; just return.
3973          */
3974         rfs4_dbe_lock(lo->rl_client->rc_dbe);
3975         if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3976                 rfs4_lockowner_rele(lo);
3977                 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3978                 goto out;
3979         }
3980
3981         sysid = lo->rl_client->rc_sysidt;
3982         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3983
3984         /*
3985          * Mark the lockowner invalid.
3986          */
3987         rfs4_dbe_hide(lo->rl_dbe);
3988
3989         /*
3990          * sysid-pid pair should now not be used since the lockowner is
3991          * invalid. If the client were to instantiate the lockowner again
3992          * it would be assigned a new pid. Thus we can get the list of
3993          * current locks.
3994          */
3995
3996         llist = flk_get_active_locks(sysid, lo->rl_pid);
3997         /* If we are still holding locks fail */
3998         if (llist != NULL) {
3999
4000                 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
4001
4002                 flk_free_locklist(llist);
4003                 /*
4004                  * We need to unhide the lockowner so the client can
4005                  * try it again. The bad thing here is if the client
4006                  * has a logic error that took it here in the first place
4007                  * he probably has lost accounting of the locks that it
4008                  * is holding. So we may have dangling state until the
4009                  * open owner state is reaped via close. One scenario
4010                  * that could possibly occur is that the client has
4011                  * sent the unlock request(s) in separate threads
4012                  * and has not waited for the replies before sending the
4013                  * RELEASE_LOCKOWNER request. Presumably, it would expect
4014                  * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4015                  * reissuing the request.
4016                  */
4017                 rfs4_dbe_unhide(lo->rl_dbe);
4018                 rfs4_lockowner_rele(lo);
4019                 goto out;
4020         }
4021
4022         /*
4023          * For the corresponding client we need to check each open
4024          * owner for any opens that have lockowner state associated
4025          * with this lockowner.
4026          */
4027
4028         rfs4_dbe_lock(lo->rl_client->rc_dbe);
4029         for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4030             oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4031
4032                 rfs4_dbe_lock(oo->ro_dbe);
4033                 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4034                     sp = list_next(&oo->ro_statelist, sp)) {
4035
4036                         rfs4_dbe_lock(sp->rs_dbe);
4037                         for (lsp = list_head(&sp->rs_lostatelist);
4038                             lsp != NULL;
4039                             lsp = list_next(&sp->rs_lostatelist, lsp)) {
4040                                 if (lsp->rls_locker == lo) {
4041                                         rfs4_dbe_lock(lsp->rls_dbe);
4042                                         rfs4_dbe_invalidate(lsp->rls_dbe);
4043                                         rfs4_dbe_unlock(lsp->rls_dbe);
4044                                 }
4045                         }
4046                         rfs4_dbe_unlock(sp->rs_dbe);
4047                 }
4048                 rfs4_dbe_unlock(oo->ro_dbe);
4049         }
4050         rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4051
4052         rfs4_lockowner_rele(lo);
4053
4054         *cs->statusp = resp->status = NFS4_OK;
4055
4056 out:
4057         DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4058             cs, RELEASE_LOCKOWNER4res *, resp);
4059 }
4060
4061 /*
4062  * short utility function to lookup a file and recall the delegation
4063  */
4064 static rfs4_file_t *
4065 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4066     int *lkup_error, cred_t *cr)
4067 {
4068         vnode_t *vp;
4069         rfs4_file_t *fp = NULL;
4070         bool_t fcreate = FALSE;
4071         int error;
4072
4073         if (vpp)
4074                 *vpp = NULL;
4075
4076         if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4077             NULL)) == 0) {
4078                 if (vp->v_type == VREG)
4079                         fp = rfs4_findfile(vp, NULL, &fcreate);
4080                 if (vpp)
4081                         *vpp = vp;
4082                 else
4083                         VN_RELE(vp);
4084         }
4085
4086         if (lkup_error)
4087                 *lkup_error = error;
4088
4089         return (fp);
4090 }
4091
4092 /*
4093  * remove: args: CURRENT_FH: directory; name.
4094  *      res: status. If success - CURRENT_FH unchanged, return change_info
4095  *              for directory.
4096  */
4097 /* ARGSUSED */
4098 static void
4099 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4100     struct compound_state *cs)
4101 {
4102         REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4103         REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4104         int error;
4105         vnode_t *dvp, *vp;
4106         struct vattr bdva, idva, adva;
4107         char *nm;
4108         uint_t len;
4109         rfs4_file_t *fp;
4110         int in_crit = 0;
4111         bslabel_t *clabel;
4112         struct sockaddr *ca;
4113         char *name = NULL;
4114         nfsstat4 status;
4115
4116         DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4117             REMOVE4args *, args);
4118
4119         /* CURRENT_FH: directory */
4120         dvp = cs->vp;
4121         if (dvp == NULL) {
4122                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4123                 goto out;
4124         }
4125
4126         if (cs->access == CS_ACCESS_DENIED) {
4127                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4128                 goto out;
4129         }
4130
4131         /*
4132          * If there is an unshared filesystem mounted on this vnode,
4133          * Do not allow to remove anything in this directory.
4134          */
4135         if (vn_ismntpt(dvp)) {
4136                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4137                 goto out;
4138         }
4139
4140         if (dvp->v_type != VDIR) {
4141                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4142                 goto out;
4143         }
4144
4145         status = utf8_dir_verify(&args->target);
4146         if (status != NFS4_OK) {
4147                 *cs->statusp = resp->status = status;
4148                 goto out;
4149         }
4150
4151         /*
4152          * Lookup the file so that we can check if it's a directory
4153          */
4154         nm = utf8_to_fn(&args->target, &len, NULL);
4155         if (nm == NULL) {
4156                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4157                 goto out;
4158         }
4159
4160         if (len > MAXNAMELEN) {
4161                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4162                 kmem_free(nm, len);
4163                 goto out;
4164         }
4165
4166         if (rdonly4(cs->exi, cs->vp, req)) {
4167                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4168                 kmem_free(nm, len);
4169                 goto out;
4170         }
4171
4172         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4173         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4174             MAXPATHLEN  + 1);
4175
4176         if (name == NULL) {
4177                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4178                 kmem_free(nm, len);
4179                 goto out;
4180         }
4181
4182         /*
4183          * Lookup the file to determine type and while we are see if
4184          * there is a file struct around and check for delegation.
4185          * We don't need to acquire va_seq before this lookup, if
4186          * it causes an update, cinfo.before will not match, which will
4187          * trigger a cache flush even if atomic is TRUE.
4188          */
4189         if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4190                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4191                     NULL)) {
4192                         VN_RELE(vp);
4193                         rfs4_file_rele(fp);
4194                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4195                         if (nm != name)
4196                                 kmem_free(name, MAXPATHLEN + 1);
4197                         kmem_free(nm, len);
4198                         goto out;
4199                 }
4200         }
4201
4202         /* Didn't find anything to remove */
4203         if (vp == NULL) {
4204                 *cs->statusp = resp->status = error;
4205                 if (nm != name)
4206                         kmem_free(name, MAXPATHLEN + 1);
4207                 kmem_free(nm, len);
4208                 goto out;
4209         }
4210
4211         if (nbl_need_check(vp)) {
4212                 nbl_start_crit(vp, RW_READER);
4213                 in_crit = 1;
4214                 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4215                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4216                         if (nm != name)
4217                                 kmem_free(name, MAXPATHLEN + 1);
4218                         kmem_free(nm, len);
4219                         nbl_end_crit(vp);
4220                         VN_RELE(vp);
4221                         if (fp) {
4222                                 rfs4_clear_dont_grant(fp);
4223                                 rfs4_file_rele(fp);
4224                         }
4225                         goto out;
4226                 }
4227         }
4228
4229         /* check label before allowing removal */
4230         if (is_system_labeled()) {
4231                 ASSERT(req->rq_label != NULL);
4232                 clabel = req->rq_label;
4233                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4234                     "got client label from request(1)",
4235                     struct svc_req *, req);
4236                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4237                         if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4238                             cs->exi)) {
4239                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4240                                 if (name != nm)
4241                                         kmem_free(name, MAXPATHLEN + 1);
4242                                 kmem_free(nm, len);
4243                                 if (in_crit)
4244                                         nbl_end_crit(vp);
4245                                 VN_RELE(vp);
4246                                 if (fp) {
4247                                         rfs4_clear_dont_grant(fp);
4248                                         rfs4_file_rele(fp);
4249                                 }
4250                                 goto out;
4251                         }
4252                 }
4253         }
4254
4255         /* Get dir "before" change value */
4256         bdva.va_mask = AT_CTIME|AT_SEQ;
4257         error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4258         if (error) {
4259                 *cs->statusp = resp->status = puterrno4(error);
4260                 if (nm != name)
4261                         kmem_free(name, MAXPATHLEN + 1);
4262                 kmem_free(nm, len);
4263                 if (in_crit)
4264                         nbl_end_crit(vp);
4265                 VN_RELE(vp);
4266                 if (fp) {
4267                         rfs4_clear_dont_grant(fp);
4268                         rfs4_file_rele(fp);
4269                 }
4270                 goto out;
4271         }
4272         NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4273
4274         /* Actually do the REMOVE operation */
4275         if (vp->v_type == VDIR) {
4276                 /*
4277                  * Can't remove a directory that has a mounted-on filesystem.
4278                  */
4279                 if (vn_ismntpt(vp)) {
4280                         error = EACCES;
4281                 } else {
4282                         /*
4283                          * System V defines rmdir to return EEXIST,
4284                          * not ENOTEMPTY, if the directory is not
4285                          * empty.  A System V NFS server needs to map
4286                          * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4287                          * transmit over the wire.
4288                          */
4289                         if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4290                             NULL, 0)) == EEXIST)
4291                                 error = ENOTEMPTY;
4292                 }
4293         } else {
4294                 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4295                     fp != NULL) {
4296                         struct vattr va;
4297                         vnode_t *tvp;
4298
4299                         rfs4_dbe_lock(fp->rf_dbe);
4300                         tvp = fp->rf_vp;
4301                         if (tvp)
4302                                 VN_HOLD(tvp);
4303                         rfs4_dbe_unlock(fp->rf_dbe);
4304
4305                         if (tvp) {
4306                                 /*
4307                                  * This is va_seq safe because we are not
4308                                  * manipulating dvp.
4309                                  */
4310                                 va.va_mask = AT_NLINK;
4311                                 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4312                                     va.va_nlink == 0) {
4313                                         /* Remove state on file remove */
4314                                         if (in_crit) {
4315                                                 nbl_end_crit(vp);
4316                                                 in_crit = 0;
4317                                         }
4318                                         rfs4_close_all_state(fp);
4319                                 }
4320                                 VN_RELE(tvp);
4321                         }
4322                 }
4323         }
4324
4325         if (in_crit)
4326                 nbl_end_crit(vp);
4327         VN_RELE(vp);
4328
4329         if (fp) {
4330                 rfs4_clear_dont_grant(fp);
4331                 rfs4_file_rele(fp);
4332         }
4333         if (nm != name)
4334                 kmem_free(name, MAXPATHLEN + 1);
4335         kmem_free(nm, len);
4336
4337         if (error) {
4338                 *cs->statusp = resp->status = puterrno4(error);
4339                 goto out;
4340         }
4341
4342         /*
4343          * Get the initial "after" sequence number, if it fails, set to zero
4344          */
4345         idva.va_mask = AT_SEQ;
4346         if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4347                 idva.va_seq = 0;
4348
4349         /*
4350          * Force modified data and metadata out to stable storage.
4351          */
4352         (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4353
4354         /*
4355          * Get "after" change value, if it fails, simply return the
4356          * before value.
4357          */
4358         adva.va_mask = AT_CTIME|AT_SEQ;
4359         if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4360                 adva.va_ctime = bdva.va_ctime;
4361                 adva.va_seq = 0;
4362         }
4363
4364         NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4365
4366         /*
4367          * The cinfo.atomic = TRUE only if we have
4368          * non-zero va_seq's, and it has incremented by exactly one
4369          * during the VOP_REMOVE/RMDIR and it didn't change during
4370          * the VOP_FSYNC.
4371          */
4372         if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4373             idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4374                 resp->cinfo.atomic = TRUE;
4375         else
4376                 resp->cinfo.atomic = FALSE;
4377
4378         *cs->statusp = resp->status = NFS4_OK;
4379
4380 out:
4381         DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4382             REMOVE4res *, resp);
4383 }
4384
4385 /*
4386  * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4387  *              oldname and newname.
4388  *      res: status. If success - CURRENT_FH unchanged, return change_info
4389  *              for both from and target directories.
4390  */
4391 /* ARGSUSED */
4392 static void
4393 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4394     struct compound_state *cs)
4395 {
4396         RENAME4args *args = &argop->nfs_argop4_u.oprename;
4397         RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4398         int error;
4399         vnode_t *odvp;
4400         vnode_t *ndvp;
4401         vnode_t *srcvp, *targvp;
4402         struct vattr obdva, oidva, oadva;
4403         struct vattr nbdva, nidva, nadva;
4404         char *onm, *nnm;
4405         uint_t olen, nlen;
4406         rfs4_file_t *fp, *sfp;
4407         int in_crit_src, in_crit_targ;
4408         int fp_rele_grant_hold, sfp_rele_grant_hold;
4409         bslabel_t *clabel;
4410         struct sockaddr *ca;
4411         char *converted_onm = NULL;
4412         char *converted_nnm = NULL;
4413         nfsstat4 status;
4414
4415         DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4416             RENAME4args *, args);
4417
4418         fp = sfp = NULL;
4419         srcvp = targvp = NULL;
4420         in_crit_src = in_crit_targ = 0;
4421         fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4422
4423         /* CURRENT_FH: target directory */
4424         ndvp = cs->vp;
4425         if (ndvp == NULL) {
4426                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4427                 goto out;
4428         }
4429
4430         /* SAVED_FH: from directory */
4431         odvp = cs->saved_vp;
4432         if (odvp == NULL) {
4433                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4434                 goto out;
4435         }
4436
4437         if (cs->access == CS_ACCESS_DENIED) {
4438                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4439                 goto out;
4440         }
4441
4442         /*
4443          * If there is an unshared filesystem mounted on this vnode,
4444          * do not allow to rename objects in this directory.
4445          */
4446         if (vn_ismntpt(odvp)) {
4447                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4448                 goto out;
4449         }
4450
4451         /*
4452          * If there is an unshared filesystem mounted on this vnode,
4453          * do not allow to rename to this directory.
4454          */
4455         if (vn_ismntpt(ndvp)) {
4456                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4457                 goto out;
4458         }
4459
4460         if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4461                 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4462                 goto out;
4463         }
4464
4465         if (cs->saved_exi != cs->exi) {
4466                 *cs->statusp = resp->status = NFS4ERR_XDEV;
4467                 goto out;
4468         }
4469
4470         status = utf8_dir_verify(&args->oldname);
4471         if (status != NFS4_OK) {
4472                 *cs->statusp = resp->status = status;
4473                 goto out;
4474         }
4475
4476         status = utf8_dir_verify(&args->newname);
4477         if (status != NFS4_OK) {
4478                 *cs->statusp = resp->status = status;
4479                 goto out;
4480         }
4481
4482         onm = utf8_to_fn(&args->oldname, &olen, NULL);
4483         if (onm == NULL) {
4484                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4485                 goto out;
4486         }
4487         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4488         nlen = MAXPATHLEN + 1;
4489         converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4490             nlen);
4491
4492         if (converted_onm == NULL) {
4493                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4494                 kmem_free(onm, olen);
4495                 goto out;
4496         }
4497
4498         nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4499         if (nnm == NULL) {
4500                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4501                 if (onm != converted_onm)
4502                         kmem_free(converted_onm, MAXPATHLEN + 1);
4503                 kmem_free(onm, olen);
4504                 goto out;
4505         }
4506         converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4507             MAXPATHLEN  + 1);
4508
4509         if (converted_nnm == NULL) {
4510                 *cs->statusp = resp->status = NFS4ERR_INVAL;
4511                 kmem_free(nnm, nlen);
4512                 nnm = NULL;
4513                 if (onm != converted_onm)
4514                         kmem_free(converted_onm, MAXPATHLEN + 1);
4515                 kmem_free(onm, olen);
4516                 goto out;
4517         }
4518
4519
4520         if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4521                 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4522                 kmem_free(onm, olen);
4523                 kmem_free(nnm, nlen);
4524                 goto out;
4525         }
4526
4527
4528         if (rdonly4(cs->exi, cs->vp, req)) {
4529                 *cs->statusp = resp->status = NFS4ERR_ROFS;
4530                 if (onm != converted_onm)
4531                         kmem_free(converted_onm, MAXPATHLEN + 1);
4532                 kmem_free(onm, olen);
4533                 if (nnm != converted_nnm)
4534                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4535                 kmem_free(nnm, nlen);
4536                 goto out;
4537         }
4538
4539         /* check label of the target dir */
4540         if (is_system_labeled()) {
4541                 ASSERT(req->rq_label != NULL);
4542                 clabel = req->rq_label;
4543                 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4544                     "got client label from request(1)",
4545                     struct svc_req *, req);
4546                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4547                         if (!do_rfs_label_check(clabel, ndvp,
4548                             EQUALITY_CHECK, cs->exi)) {
4549                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4550                                 goto err_out;
4551                         }
4552                 }
4553         }
4554
4555         /*
4556          * Is the source a file and have a delegation?
4557          * We don't need to acquire va_seq before these lookups, if
4558          * it causes an update, cinfo.before will not match, which will
4559          * trigger a cache flush even if atomic is TRUE.
4560          */
4561         if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4562             &error, cs->cr)) {
4563                 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4564                     NULL)) {
4565                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4566                         goto err_out;
4567                 }
4568         }
4569
4570         if (srcvp == NULL) {
4571                 *cs->statusp = resp->status = puterrno4(error);
4572                 if (onm != converted_onm)
4573                         kmem_free(converted_onm, MAXPATHLEN + 1);
4574                 kmem_free(onm, olen);
4575                 if (nnm != converted_nnm)
4576                         kmem_free(converted_nnm, MAXPATHLEN + 1);
4577                 kmem_free(nnm, nlen);
4578                 goto out;
4579         }
4580
4581         sfp_rele_grant_hold = 1;
4582
4583         /* Does the destination exist and a file and have a delegation? */
4584         if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4585             NULL, cs->cr)) {
4586                 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4587                     NULL)) {
4588                         *cs->statusp = resp->status = NFS4ERR_DELAY;
4589                         goto err_out;
4590                 }
4591         }
4592         fp_rele_grant_hold = 1;
4593
4594
4595         /* Check for NBMAND lock on both source and target */
4596         if (nbl_need_check(srcvp)) {
4597                 nbl_start_crit(srcvp, RW_READER);
4598                 in_crit_src = 1;
4599                 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4600                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4601                         goto err_out;
4602                 }
4603         }
4604
4605         if (targvp && nbl_need_check(targvp)) {
4606                 nbl_start_crit(targvp, RW_READER);
4607                 in_crit_targ = 1;
4608                 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4609                         *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4610                         goto err_out;
4611                 }
4612         }
4613
4614         /* Get source "before" change value */
4615         obdva.va_mask = AT_CTIME|AT_SEQ;
4616         error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4617         if (!error) {
4618                 nbdva.va_mask = AT_CTIME|AT_SEQ;
4619                 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4620         }
4621         if (error) {
4622                 *cs->statusp = resp->status = puterrno4(error);
4623                 goto err_out;
4624         }
4625
4626         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4627         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4628
4629         if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4630             cs->cr, NULL, 0)) == 0 && fp != NULL) {
4631                 struct vattr va;
4632                 vnode_t *tvp;
4633
4634                 rfs4_dbe_lock(fp->rf_dbe);
4635                 tvp = fp->rf_vp;
4636                 if (tvp)
4637                         VN_HOLD(tvp);
4638                 rfs4_dbe_unlock(fp->rf_dbe);
4639
4640                 if (tvp) {
4641                         va.va_mask = AT_NLINK;
4642                         if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4643                             va.va_nlink == 0) {
4644                                 /* The file is gone and so should the state */
4645                                 if (in_crit_targ) {
4646                                         nbl_end_crit(targvp);
4647                                         in_crit_targ = 0;
4648                                 }
4649                                 rfs4_close_all_state(fp);
4650                         }
4651                         VN_RELE(tvp);
4652                 }
4653         }
4654         if (error == 0)
4655                 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4656
4657         if (in_crit_src)
4658                 nbl_end_crit(srcvp);
4659         if (srcvp)
4660                 VN_RELE(srcvp);
4661         if (in_crit_targ)
4662                 nbl_end_crit(targvp);
4663         if (targvp)
4664                 VN_RELE(targvp);
4665
4666         if (sfp) {
4667                 rfs4_clear_dont_grant(sfp);
4668                 rfs4_file_rele(sfp);
4669         }
4670         if (fp) {
4671                 rfs4_clear_dont_grant(fp);
4672                 rfs4_file_rele(fp);
4673         }
4674
4675         if (converted_onm != onm)
4676                 kmem_free(converted_onm, MAXPATHLEN + 1);
4677         kmem_free(onm, olen);
4678         if (converted_nnm != nnm)
4679                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4680         kmem_free(nnm, nlen);
4681
4682         /*
4683          * Get the initial "after" sequence number, if it fails, set to zero
4684          */
4685         oidva.va_mask = AT_SEQ;
4686         if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4687                 oidva.va_seq = 0;
4688
4689         nidva.va_mask = AT_SEQ;
4690         if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4691                 nidva.va_seq = 0;
4692
4693         /*
4694          * Force modified data and metadata out to stable storage.
4695          */
4696         (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4697         (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4698
4699         if (error) {
4700                 *cs->statusp = resp->status = puterrno4(error);
4701                 goto out;
4702         }
4703
4704         /*
4705          * Get "after" change values, if it fails, simply return the
4706          * before value.
4707          */
4708         oadva.va_mask = AT_CTIME|AT_SEQ;
4709         if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4710                 oadva.va_ctime = obdva.va_ctime;
4711                 oadva.va_seq = 0;
4712         }
4713
4714         nadva.va_mask = AT_CTIME|AT_SEQ;
4715         if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4716                 nadva.va_ctime = nbdva.va_ctime;
4717                 nadva.va_seq = 0;
4718         }
4719
4720         NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4721         NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4722
4723         /*
4724          * The cinfo.atomic = TRUE only if we have
4725          * non-zero va_seq's, and it has incremented by exactly one
4726          * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4727          */
4728         if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4729             oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4730                 resp->source_cinfo.atomic = TRUE;
4731         else
4732                 resp->source_cinfo.atomic = FALSE;
4733
4734         if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4735             nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4736                 resp->target_cinfo.atomic = TRUE;
4737         else
4738                 resp->target_cinfo.atomic = FALSE;
4739
4740 #ifdef  VOLATILE_FH_TEST
4741         {
4742         extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4743
4744         /*
4745          * Add the renamed file handle to the volatile rename list
4746          */
4747         if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4748                 /* file handles may expire on rename */
4749                 vnode_t *vp;
4750
4751                 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4752                 /*
4753                  * Already know that nnm will be a valid string
4754                  */
4755                 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4756                     NULL, NULL, NULL);
4757                 kmem_free(nnm, nlen);
4758                 if (!error) {
4759                         add_volrnm_fh(cs->exi, vp);
4760                         VN_RELE(vp);
4761                 }
4762         }
4763         }
4764 #endif  /* VOLATILE_FH_TEST */
4765
4766         *cs->statusp = resp->status = NFS4_OK;
4767 out:
4768         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4769             RENAME4res *, resp);
4770         return;
4771
4772 err_out:
4773         if (onm != converted_onm)
4774                 kmem_free(converted_onm, MAXPATHLEN + 1);
4775         if (onm != NULL)
4776                 kmem_free(onm, olen);
4777         if (nnm != converted_nnm)
4778                 kmem_free(converted_nnm, MAXPATHLEN + 1);
4779         if (nnm != NULL)
4780                 kmem_free(nnm, nlen);
4781
4782         if (in_crit_src) nbl_end_crit(srcvp);
4783         if (in_crit_targ) nbl_end_crit(targvp);
4784         if (targvp) VN_RELE(targvp);
4785         if (srcvp) VN_RELE(srcvp);
4786         if (sfp) {
4787                 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4788                 rfs4_file_rele(sfp);
4789         }
4790         if (fp) {
4791                 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4792                 rfs4_file_rele(fp);
4793         }
4794
4795         DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4796             RENAME4res *, resp);
4797 }
4798
4799 /* ARGSUSED */
4800 static void
4801 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4802     struct compound_state *cs)
4803 {
4804         RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4805         RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4806         rfs4_client_t *cp;
4807
4808         DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4809             RENEW4args *, args);
4810
4811         if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4812                 *cs->statusp = resp->status =
4813                     rfs4_check_clientid(&args->clientid, 0);
4814                 goto out;
4815         }
4816
4817         if (rfs4_lease_expired(cp)) {
4818                 rfs4_client_rele(cp);
4819                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4820                 goto out;
4821         }
4822
4823         rfs4_update_lease(cp);
4824
4825         mutex_enter(cp->rc_cbinfo.cb_lock);
4826         if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4827                 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4828                 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4829         } else {
4830                 *cs->statusp = resp->status = NFS4_OK;
4831         }
4832         mutex_exit(cp->rc_cbinfo.cb_lock);
4833
4834         rfs4_client_rele(cp);
4835
4836 out:
4837         DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4838             RENEW4res *, resp);
4839 }
4840
4841 /* ARGSUSED */
4842 static void
4843 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4844     struct compound_state *cs)
4845 {
4846         RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4847
4848         DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4849
4850         /* No need to check cs->access - we are not accessing any object */
4851         if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4852                 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4853                 goto out;
4854         }
4855         if (cs->vp != NULL) {
4856                 VN_RELE(cs->vp);
4857         }
4858         cs->vp = cs->saved_vp;
4859         cs->saved_vp = NULL;
4860         cs->exi = cs->saved_exi;
4861         nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4862         *cs->statusp = resp->status = NFS4_OK;
4863         cs->deleg = FALSE;
4864
4865 out:
4866         DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4867             RESTOREFH4res *, resp);
4868 }
4869
4870 /* ARGSUSED */
4871 static void
4872 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4873     struct compound_state *cs)
4874 {
4875         SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4876
4877         DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4878
4879         /* No need to check cs->access - we are not accessing any object */
4880         if (cs->vp == NULL) {
4881                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4882                 goto out;
4883         }
4884         if (cs->saved_vp != NULL) {
4885                 VN_RELE(cs->saved_vp);
4886         }
4887         cs->saved_vp = cs->vp;
4888         VN_HOLD(cs->saved_vp);
4889         cs->saved_exi = cs->exi;
4890         /*
4891          * since SAVEFH is fairly rare, don't alloc space for its fh
4892          * unless necessary.
4893          */
4894         if (cs->saved_fh.nfs_fh4_val == NULL) {
4895                 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4896         }
4897         nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4898         *cs->statusp = resp->status = NFS4_OK;
4899
4900 out:
4901         DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4902             SAVEFH4res *, resp);
4903 }
4904
4905 /*
4906  * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4907  * return the bitmap of attrs that were set successfully. It is also
4908  * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4909  * always be called only after rfs4_do_set_attrs().
4910  *
4911  * Verify that the attributes are same as the expected ones. sargp->vap
4912  * and sargp->sbp contain the input attributes as translated from fattr4.
4913  *
4914  * This function verifies only the attrs that correspond to a vattr or
4915  * vfsstat struct. That is because of the extra step needed to get the
4916  * corresponding system structs. Other attributes have already been set or
4917  * verified by do_rfs4_set_attrs.
4918  *
4919  * Return 0 if all attrs match, -1 if some don't, error if error processing.
4920  */
4921 static int
4922 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4923     bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4924 {
4925         int error, ret_error = 0;
4926         int i, k;
4927         uint_t sva_mask = sargp->vap->va_mask;
4928         uint_t vbit;
4929         union nfs4_attr_u *na;
4930         uint8_t *amap;
4931         bool_t getsb = ntovp->vfsstat;
4932
4933         if (sva_mask != 0) {
4934                 /*
4935                  * Okay to overwrite sargp->vap because we verify based
4936                  * on the incoming values.
4937                  */
4938                 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4939                     sargp->cs->cr, NULL);
4940                 if (ret_error) {
4941                         if (resp == NULL)
4942                                 return (ret_error);
4943                         /*
4944                          * Must return bitmap of successful attrs
4945                          */
4946                         sva_mask = 0;   /* to prevent checking vap later */
4947                 } else {
4948                         /*
4949                          * Some file systems clobber va_mask. it is probably
4950                          * wrong of them to do so, nonethless we practice
4951                          * defensive coding.
4952                          * See bug id 4276830.
4953                          */
4954                         sargp->vap->va_mask = sva_mask;
4955                 }
4956         }
4957
4958         if (getsb) {
4959                 /*
4960                  * Now get the superblock and loop on the bitmap, as there is
4961                  * no simple way of translating from superblock to bitmap4.
4962                  */
4963                 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4964                 if (ret_error) {
4965                         if (resp == NULL)
4966                                 goto errout;
4967                         getsb = FALSE;
4968                 }
4969         }
4970
4971         /*
4972          * Now loop and verify each attribute which getattr returned
4973          * whether it's the same as the input.
4974          */
4975         if (resp == NULL && !getsb && (sva_mask == 0))
4976                 goto errout;
4977
4978         na = ntovp->na;
4979         amap = ntovp->amap;
4980         k = 0;
4981         for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4982                 k = *amap;
4983                 ASSERT(nfs4_ntov_map[k].nval == k);
4984                 vbit = nfs4_ntov_map[k].vbit;
4985
4986                 /*
4987                  * If vattr attribute but VOP_GETATTR failed, or it's
4988                  * superblock attribute but VFS_STATVFS failed, skip
4989                  */
4990                 if (vbit) {
4991                         if ((vbit & sva_mask) == 0)
4992                                 continue;
4993                 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4994                         continue;
4995                 }
4996                 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4997                 if (resp != NULL) {
4998                         if (error)
4999                                 ret_error = -1; /* not all match */
5000                         else    /* update response bitmap */
5001                                 *resp |= nfs4_ntov_map[k].fbit;
5002                         continue;
5003                 }
5004                 if (error) {
5005                         ret_error = -1; /* not all match */
5006                         break;
5007                 }
5008         }
5009 errout:
5010         return (ret_error);
5011 }
5012
5013 /*
5014  * Decode the attribute to be set/verified. If the attr requires a sys op
5015  * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
5016  * call the sv_getit function for it, because the sys op hasn't yet been done.
5017  * Return 0 for success, error code if failed.
5018  *
5019  * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5020  */
5021 static int
5022 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5023     int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5024 {
5025         int error = 0;
5026         bool_t set_later;
5027
5028         sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5029
5030         if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5031                 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5032                 /*
5033                  * don't verify yet if a vattr or sb dependent attr,
5034                  * because we don't have their sys values yet.
5035                  * Will be done later.
5036                  */
5037                 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5038                         /*
5039                          * ACLs are a special case, since setting the MODE
5040                          * conflicts with setting the ACL.  We delay setting
5041                          * the ACL until all other attributes have been set.
5042                          * The ACL gets set in do_rfs4_op_setattr().
5043                          */
5044                         if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5045                                 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5046                                     sargp, nap);
5047                                 if (error) {
5048                                         xdr_free(nfs4_ntov_map[k].xfunc,
5049                                             (caddr_t)nap);
5050                                 }
5051                         }
5052                 }
5053         } else {
5054 #ifdef  DEBUG
5055                 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5056                     "decoding attribute %d\n", k);
5057 #endif
5058                 error = EINVAL;
5059         }
5060         if (!error && resp_bval && !set_later) {
5061                 *resp_bval |= nfs4_ntov_map[k].fbit;
5062         }
5063
5064         return (error);
5065 }
5066
5067 /*
5068  * Set vattr based on incoming fattr4 attrs - used by setattr.
5069  * Set response mask. Ignore any values that are not writable vattr attrs.
5070  */
5071 static nfsstat4
5072 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5073     struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5074     nfs4_attr_cmd_t cmd)
5075 {
5076         int error = 0;
5077         int i;
5078         char *attrs = fattrp->attrlist4;
5079         uint32_t attrslen = fattrp->attrlist4_len;
5080         XDR xdr;
5081         nfsstat4 status = NFS4_OK;
5082         vnode_t *vp = cs->vp;
5083         union nfs4_attr_u *na;
5084         uint8_t *amap;
5085
5086 #ifndef lint
5087         /*
5088          * Make sure that maximum attribute number can be expressed as an
5089          * 8 bit quantity.
5090          */
5091         ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5092 #endif
5093
5094         if (vp == NULL) {
5095                 if (resp)
5096                         *resp = 0;
5097                 return (NFS4ERR_NOFILEHANDLE);
5098         }
5099         if (cs->access == CS_ACCESS_DENIED) {
5100                 if (resp)
5101                         *resp = 0;
5102                 return (NFS4ERR_ACCESS);
5103         }
5104
5105         sargp->op = cmd;
5106         sargp->cs = cs;
5107         sargp->flag = 0;        /* may be set later */
5108         sargp->vap->va_mask = 0;
5109         sargp->rdattr_error = NFS4_OK;
5110         sargp->rdattr_error_req = FALSE;
5111         /* sargp->sbp is set by the caller */
5112
5113         xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5114
5115         na = ntovp->na;
5116         amap = ntovp->amap;
5117
5118         /*
5119          * The following loop iterates on the nfs4_ntov_map checking
5120          * if the fbit is set in the requested bitmap.
5121          * If set then we process the arguments using the
5122          * rfs4_fattr4 conversion functions to populate the setattr
5123          * vattr and va_mask. Any settable attrs that are not using vattr
5124          * will be set in this loop.
5125          */
5126         for (i = 0; i < nfs4_ntov_map_size; i++) {
5127                 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5128                         continue;
5129                 }
5130                 /*
5131                  * If setattr, must be a writable attr.
5132                  * If verify/nverify, must be a readable attr.
5133                  */
5134                 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5135                     NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5136                         /*
5137                          * Client tries to set/verify an
5138                          * unsupported attribute, tries to set
5139                          * a read only attr or verify a write
5140                          * only one - error!
5141                          */
5142                         break;
5143                 }
5144                 /*
5145                  * Decode the attribute to set/verify
5146                  */
5147                 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5148                     &xdr, resp ? resp : NULL, na);
5149                 if (error)
5150                         break;
5151                 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5152                 na++;
5153                 (ntovp->attrcnt)++;
5154                 if (nfs4_ntov_map[i].vfsstat)
5155                         ntovp->vfsstat = TRUE;
5156         }
5157
5158         if (error != 0)
5159                 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5160                     puterrno4(error));
5161         /* xdrmem_destroy(&xdrs); */    /* NO-OP */
5162         return (status);
5163 }
5164
5165 static nfsstat4
5166 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5167     stateid4 *stateid)
5168 {
5169         int error = 0;
5170         struct nfs4_svgetit_arg sarg;
5171         bool_t trunc;
5172
5173         nfsstat4 status = NFS4_OK;
5174         cred_t *cr = cs->cr;
5175         vnode_t *vp = cs->vp;
5176         struct nfs4_ntov_table ntov;
5177         struct statvfs64 sb;
5178         struct vattr bva;
5179         struct flock64 bf;
5180         int in_crit = 0;
5181         uint_t saved_mask = 0;
5182         caller_context_t ct;
5183
5184         *resp = 0;
5185         sarg.sbp = &sb;
5186         sarg.is_referral = B_FALSE;
5187         nfs4_ntov_table_init(&ntov);
5188         status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5189             NFS4ATTR_SETIT);
5190         if (status != NFS4_OK) {
5191                 /*
5192                  * failed set attrs
5193                  */
5194                 goto done;
5195         }
5196         if ((sarg.vap->va_mask == 0) &&
5197             (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5198                 /*
5199                  * no further work to be done
5200                  */
5201                 goto done;
5202         }
5203
5204         /*
5205          * If we got a request to set the ACL and the MODE, only
5206          * allow changing VSUID, VSGID, and VSVTX.  Attempting
5207          * to change any other bits, along with setting an ACL,
5208          * gives NFS4ERR_INVAL.
5209          */
5210         if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5211             (fattrp->attrmask & FATTR4_MODE_MASK)) {
5212                 vattr_t va;
5213
5214                 va.va_mask = AT_MODE;
5215                 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5216                 if (error) {
5217                         status = puterrno4(error);
5218                         goto done;
5219                 }
5220                 if ((sarg.vap->va_mode ^ va.va_mode) &
5221                     ~(VSUID | VSGID | VSVTX)) {
5222                         status = NFS4ERR_INVAL;
5223                         goto done;
5224                 }
5225         }
5226
5227         /* Check stateid only if size has been set */
5228         if (sarg.vap->va_mask & AT_SIZE) {
5229                 trunc = (sarg.vap->va_size == 0);
5230                 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5231                     trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5232                 if (status != NFS4_OK)
5233                         goto done;
5234         } else {
5235                 ct.cc_sysid = 0;
5236                 ct.cc_pid = 0;
5237                 ct.cc_caller_id = nfs4_srv_caller_id;
5238                 ct.cc_flags = CC_DONTBLOCK;
5239         }
5240
5241         /* XXX start of possible race with delegations */
5242
5243         /*
5244          * We need to specially handle size changes because it is
5245          * possible for the client to create a file with read-only
5246          * modes, but with the file opened for writing. If the client
5247          * then tries to set the file size, e.g. ftruncate(3C),
5248          * fcntl(F_FREESP), the normal access checking done in
5249          * VOP_SETATTR would prevent the client from doing it even though
5250          * it should be allowed to do so.  To get around this, we do the
5251          * access checking for ourselves and use VOP_SPACE which doesn't
5252          * do the access checking.
5253          * Also the client should not be allowed to change the file
5254          * size if there is a conflicting non-blocking mandatory lock in
5255          * the region of the change.
5256          */
5257         if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5258                 u_offset_t offset;
5259                 ssize_t length;
5260
5261                 /*
5262                  * ufs_setattr clears AT_SIZE from vap->va_mask, but
5263                  * before returning, sarg.vap->va_mask is used to
5264                  * generate the setattr reply bitmap.  We also clear
5265                  * AT_SIZE below before calling VOP_SPACE.  For both
5266                  * of these cases, the va_mask needs to be saved here
5267                  * and restored after calling VOP_SETATTR.
5268                  */
5269                 saved_mask = sarg.vap->va_mask;
5270
5271                 /*
5272                  * Check any possible conflict due to NBMAND locks.
5273                  * Get into critical region before VOP_GETATTR, so the
5274                  * size attribute is valid when checking conflicts.
5275                  */
5276                 if (nbl_need_check(vp)) {
5277                         nbl_start_crit(vp, RW_READER);
5278                         in_crit = 1;
5279                 }
5280
5281                 bva.va_mask = AT_UID|AT_SIZE;
5282                 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5283                         status = puterrno4(error);
5284                         goto done;
5285                 }
5286
5287                 if (in_crit) {
5288                         if (sarg.vap->va_size < bva.va_size) {
5289                                 offset = sarg.vap->va_size;
5290                                 length = bva.va_size - sarg.vap->va_size;
5291                         } else {
5292                                 offset = bva.va_size;
5293                                 length = sarg.vap->va_size - bva.va_size;
5294                         }
5295                         if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5296                             &ct)) {
5297                                 status = NFS4ERR_LOCKED;
5298                                 goto done;
5299                         }
5300                 }
5301
5302                 if (crgetuid(cr) == bva.va_uid) {
5303                         sarg.vap->va_mask &= ~AT_SIZE;
5304                         bf.l_type = F_WRLCK;
5305                         bf.l_whence = 0;
5306                         bf.l_start = (off64_t)sarg.vap->va_size;
5307                         bf.l_len = 0;
5308                         bf.l_sysid = 0;
5309                         bf.l_pid = 0;
5310                         error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5311                             (offset_t)sarg.vap->va_size, cr, &ct);
5312                 }
5313         }
5314
5315         if (!error && sarg.vap->va_mask != 0)
5316                 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5317
5318         /* restore va_mask -- ufs_setattr clears AT_SIZE */
5319         if (saved_mask & AT_SIZE)
5320                 sarg.vap->va_mask |= AT_SIZE;
5321
5322         /*
5323          * If an ACL was being set, it has been delayed until now,
5324          * in order to set the mode (via the VOP_SETATTR() above) first.
5325          */
5326         if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5327                 int i;
5328
5329                 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5330                         if (ntov.amap[i] == FATTR4_ACL)
5331                                 break;
5332                 if (i < NFS4_MAXNUM_ATTRS) {
5333                         error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5334                             NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5335                         if (error == 0) {
5336                                 *resp |= FATTR4_ACL_MASK;
5337                         } else if (error == ENOTSUP) {
5338                                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5339                                 status = NFS4ERR_ATTRNOTSUPP;
5340                                 goto done;
5341                         }
5342                 } else {
5343                         NFS4_DEBUG(rfs4_debug,
5344                             (CE_NOTE, "do_rfs4_op_setattr: "
5345                             "unable to find ACL in fattr4"));
5346                         error = EINVAL;
5347                 }
5348         }
5349
5350         if (error) {
5351                 /* check if a monitor detected a delegation conflict */
5352                 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5353                         status = NFS4ERR_DELAY;
5354                 else
5355                         status = puterrno4(error);
5356
5357                 /*
5358                  * Set the response bitmap when setattr failed.
5359                  * If VOP_SETATTR partially succeeded, test by doing a
5360                  * VOP_GETATTR on the object and comparing the data
5361                  * to the setattr arguments.
5362                  */
5363                 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5364         } else {
5365                 /*
5366                  * Force modified metadata out to stable storage.
5367                  */
5368                 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5369                 /*
5370                  * Set response bitmap
5371                  */
5372                 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5373         }
5374
5375 /* Return early and already have a NFSv4 error */
5376 done:
5377         /*
5378          * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5379          * conversion sets both readable and writeable NFS4 attrs
5380          * for AT_MTIME and AT_ATIME.  The line below masks out
5381          * unrequested attrs from the setattr result bitmap.  This
5382          * is placed after the done: label to catch the ATTRNOTSUP
5383          * case.
5384          */
5385         *resp &= fattrp->attrmask;
5386
5387         if (in_crit)
5388                 nbl_end_crit(vp);
5389
5390         nfs4_ntov_table_free(&ntov, &sarg);
5391
5392         return (status);
5393 }
5394
5395 /* ARGSUSED */
5396 static void
5397 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5398     struct compound_state *cs)
5399 {
5400         SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5401         SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5402         bslabel_t *clabel;
5403
5404         DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5405             SETATTR4args *, args);
5406
5407         if (cs->vp == NULL) {
5408                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5409                 goto out;
5410         }
5411
5412         /*
5413          * If there is an unshared filesystem mounted on this vnode,
5414          * do not allow to setattr on this vnode.
5415          */
5416         if (vn_ismntpt(cs->vp)) {
5417                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5418                 goto out;
5419         }
5420
5421         resp->attrsset = 0;
5422
5423         if (rdonly4(cs->exi, cs->vp, req)) {
5424                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5425                 goto out;
5426         }
5427
5428         /* check label before setting attributes */
5429         if (is_system_labeled()) {
5430                 ASSERT(req->rq_label != NULL);
5431                 clabel = req->rq_label;
5432                 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5433                     "got client label from request(1)",
5434                     struct svc_req *, req);
5435                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5436                         if (!do_rfs_label_check(clabel, cs->vp,
5437                             EQUALITY_CHECK, cs->exi)) {
5438                                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5439                                 goto out;
5440                         }
5441                 }
5442         }
5443
5444         *cs->statusp = resp->status =
5445             do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5446             &args->stateid);
5447
5448 out:
5449         DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5450             SETATTR4res *, resp);
5451 }
5452
5453 /* ARGSUSED */
5454 static void
5455 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5456     struct compound_state *cs)
5457 {
5458         /*
5459          * verify and nverify are exactly the same, except that nverify
5460          * succeeds when some argument changed, and verify succeeds when
5461          * when none changed.
5462          */
5463
5464         VERIFY4args  *args = &argop->nfs_argop4_u.opverify;
5465         VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5466
5467         int error;
5468         struct nfs4_svgetit_arg sarg;
5469         struct statvfs64 sb;
5470         struct nfs4_ntov_table ntov;
5471
5472         DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5473             VERIFY4args *, args);
5474
5475         if (cs->vp == NULL) {
5476                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5477                 goto out;
5478         }
5479
5480         sarg.sbp = &sb;
5481         sarg.is_referral = B_FALSE;
5482         nfs4_ntov_table_init(&ntov);
5483         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5484             &sarg, &ntov, NFS4ATTR_VERIT);
5485         if (resp->status != NFS4_OK) {
5486                 /*
5487                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5488                  * so could return -1 for "no match".
5489                  */
5490                 if (resp->status == -1)
5491                         resp->status = NFS4ERR_NOT_SAME;
5492                 goto done;
5493         }
5494         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5495         switch (error) {
5496         case 0:
5497                 resp->status = NFS4_OK;
5498                 break;
5499         case -1:
5500                 resp->status = NFS4ERR_NOT_SAME;
5501                 break;
5502         default:
5503                 resp->status = puterrno4(error);
5504                 break;
5505         }
5506 done:
5507         *cs->statusp = resp->status;
5508         nfs4_ntov_table_free(&ntov, &sarg);
5509 out:
5510         DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5511             VERIFY4res *, resp);
5512 }
5513
5514 /* ARGSUSED */
5515 static void
5516 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5517     struct compound_state *cs)
5518 {
5519         /*
5520          * verify and nverify are exactly the same, except that nverify
5521          * succeeds when some argument changed, and verify succeeds when
5522          * when none changed.
5523          */
5524
5525         NVERIFY4args  *args = &argop->nfs_argop4_u.opnverify;
5526         NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5527
5528         int error;
5529         struct nfs4_svgetit_arg sarg;
5530         struct statvfs64 sb;
5531         struct nfs4_ntov_table ntov;
5532
5533         DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5534             NVERIFY4args *, args);
5535
5536         if (cs->vp == NULL) {
5537                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5538                 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5539                     NVERIFY4res *, resp);
5540                 return;
5541         }
5542         sarg.sbp = &sb;
5543         sarg.is_referral = B_FALSE;
5544         nfs4_ntov_table_init(&ntov);
5545         resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5546             &sarg, &ntov, NFS4ATTR_VERIT);
5547         if (resp->status != NFS4_OK) {
5548                 /*
5549                  * do_rfs4_set_attrs will try to verify systemwide attrs,
5550                  * so could return -1 for "no match".
5551                  */
5552                 if (resp->status == -1)
5553                         resp->status = NFS4_OK;
5554                 goto done;
5555         }
5556         error = rfs4_verify_attr(&sarg, NULL, &ntov);
5557         switch (error) {
5558         case 0:
5559                 resp->status = NFS4ERR_SAME;
5560                 break;
5561         case -1:
5562                 resp->status = NFS4_OK;
5563                 break;
5564         default:
5565                 resp->status = puterrno4(error);
5566                 break;
5567         }
5568 done:
5569         *cs->statusp = resp->status;
5570         nfs4_ntov_table_free(&ntov, &sarg);
5571
5572         DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5573             NVERIFY4res *, resp);
5574 }
5575
5576 /*
5577  * XXX - This should live in an NFS header file.
5578  */
5579 #define MAX_IOVECS      12
5580
5581 /* ARGSUSED */
5582 static void
5583 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5584     struct compound_state *cs)
5585 {
5586         WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5587         WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5588         int error;
5589         vnode_t *vp;
5590         struct vattr bva;
5591         u_offset_t rlimit;
5592         struct uio uio;
5593         struct iovec iov[MAX_IOVECS];
5594         struct iovec *iovp;
5595         int iovcnt;
5596         int ioflag;
5597         cred_t *savecred, *cr;
5598         bool_t *deleg = &cs->deleg;
5599         nfsstat4 stat;
5600         int in_crit = 0;
5601         caller_context_t ct;
5602
5603         DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5604             WRITE4args *, args);
5605
5606         vp = cs->vp;
5607         if (vp == NULL) {
5608                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5609                 goto out;
5610         }
5611         if (cs->access == CS_ACCESS_DENIED) {
5612                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5613                 goto out;
5614         }
5615
5616         cr = cs->cr;
5617
5618         if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5619             deleg, TRUE, &ct)) != NFS4_OK) {
5620                 *cs->statusp = resp->status = stat;
5621                 goto out;
5622         }
5623
5624         /*
5625          * We have to enter the critical region before calling VOP_RWLOCK
5626          * to avoid a deadlock with ufs.
5627          */
5628         if (nbl_need_check(vp)) {
5629                 nbl_start_crit(vp, RW_READER);
5630                 in_crit = 1;
5631                 if (nbl_conflict(vp, NBL_WRITE,
5632                     args->offset, args->data_len, 0, &ct)) {
5633                         *cs->statusp = resp->status = NFS4ERR_LOCKED;
5634                         goto out;
5635                 }
5636         }
5637
5638         bva.va_mask = AT_MODE | AT_UID;
5639         error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5640
5641         /*
5642          * If we can't get the attributes, then we can't do the
5643          * right access checking.  So, we'll fail the request.
5644          */
5645         if (error) {
5646                 *cs->statusp = resp->status = puterrno4(error);
5647                 goto out;
5648         }
5649
5650         if (rdonly4(cs->exi, cs->vp, req)) {
5651                 *cs->statusp = resp->status = NFS4ERR_ROFS;
5652                 goto out;
5653         }
5654
5655         if (vp->v_type != VREG) {
5656                 *cs->statusp = resp->status =
5657                     ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5658                 goto out;
5659         }
5660
5661         if (crgetuid(cr) != bva.va_uid &&
5662             (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5663                 *cs->statusp = resp->status = puterrno4(error);
5664                 goto out;
5665         }
5666
5667         if (MANDLOCK(vp, bva.va_mode)) {
5668                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5669                 goto out;
5670         }
5671
5672         if (args->data_len == 0) {
5673                 *cs->statusp = resp->status = NFS4_OK;
5674                 resp->count = 0;
5675                 resp->committed = args->stable;
5676                 resp->writeverf = Write4verf;
5677                 goto out;
5678         }
5679
5680         if (args->mblk != NULL) {
5681                 mblk_t *m;
5682                 uint_t bytes, round_len;
5683
5684                 iovcnt = 0;
5685                 bytes = 0;
5686                 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5687                 for (m = args->mblk;
5688                     m != NULL && bytes < round_len;
5689                     m = m->b_cont) {
5690                         iovcnt++;
5691                         bytes += MBLKL(m);
5692                 }
5693 #ifdef DEBUG
5694                 /* should have ended on an mblk boundary */
5695                 if (bytes != round_len) {
5696                         printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5697                             bytes, round_len, args->data_len);
5698                         printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5699                             (void *)args->mblk, (void *)m);
5700                         ASSERT(bytes == round_len);
5701                 }
5702 #endif
5703                 if (iovcnt <= MAX_IOVECS) {
5704                         iovp = iov;
5705                 } else {
5706                         iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5707                 }
5708                 mblk_to_iov(args->mblk, iovcnt, iovp);
5709         } else if (args->rlist != NULL) {
5710                 iovcnt = 1;
5711                 iovp = iov;
5712                 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5713                 iovp->iov_len = args->data_len;
5714         } else {
5715                 iovcnt = 1;
5716                 iovp = iov;
5717                 iovp->iov_base = args->data_val;
5718                 iovp->iov_len = args->data_len;
5719         }
5720
5721         uio.uio_iov = iovp;
5722         uio.uio_iovcnt = iovcnt;
5723
5724         uio.uio_segflg = UIO_SYSSPACE;
5725         uio.uio_extflg = UIO_COPY_DEFAULT;
5726         uio.uio_loffset = args->offset;
5727         uio.uio_resid = args->data_len;
5728         uio.uio_llimit = curproc->p_fsz_ctl;
5729         rlimit = uio.uio_llimit - args->offset;
5730         if (rlimit < (u_offset_t)uio.uio_resid)
5731                 uio.uio_resid = (int)rlimit;
5732
5733         if (args->stable == UNSTABLE4)
5734                 ioflag = 0;
5735         else if (args->stable == FILE_SYNC4)
5736                 ioflag = FSYNC;
5737         else if (args->stable == DATA_SYNC4)
5738                 ioflag = FDSYNC;
5739         else {
5740                 if (iovp != iov)
5741                         kmem_free(iovp, sizeof (*iovp) * iovcnt);
5742                 *cs->statusp = resp->status = NFS4ERR_INVAL;
5743                 goto out;
5744         }
5745
5746         /*
5747          * We're changing creds because VM may fault and we need
5748          * the cred of the current thread to be used if quota
5749          * checking is enabled.
5750          */
5751         savecred = curthread->t_cred;
5752         curthread->t_cred = cr;
5753         error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5754         curthread->t_cred = savecred;
5755
5756         if (iovp != iov)
5757                 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5758
5759         if (error) {
5760                 *cs->statusp = resp->status = puterrno4(error);
5761                 goto out;
5762         }
5763
5764         *cs->statusp = resp->status = NFS4_OK;
5765         resp->count = args->data_len - uio.uio_resid;
5766
5767         if (ioflag == 0)
5768                 resp->committed = UNSTABLE4;
5769         else
5770                 resp->committed = FILE_SYNC4;
5771
5772         resp->writeverf = Write4verf;
5773
5774 out:
5775         if (in_crit)
5776                 nbl_end_crit(vp);
5777
5778         DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5779             WRITE4res *, resp);
5780 }
5781
5782
5783 /* XXX put in a header file */
5784 extern int      sec_svc_getcred(struct svc_req *, cred_t *,  caddr_t *, int *);
5785
5786 void
5787 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5788     struct svc_req *req, cred_t *cr, int *rv)
5789 {
5790         uint_t i;
5791         struct compound_state cs;
5792
5793         if (rv != NULL)
5794                 *rv = 0;
5795         rfs4_init_compound_state(&cs);
5796         /*
5797          * Form a reply tag by copying over the reqeuest tag.
5798          */
5799         resp->tag.utf8string_val =
5800             kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5801         resp->tag.utf8string_len = args->tag.utf8string_len;
5802         bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5803             resp->tag.utf8string_len);
5804
5805         cs.statusp = &resp->status;
5806         cs.req = req;
5807         resp->array = NULL;
5808         resp->array_len = 0;
5809
5810         resp->status = utf8_name_verify(&(resp->tag));
5811         if (resp->status != NFS4_OK)
5812                 return;
5813
5814         /*
5815          * XXX for now, minorversion should be zero
5816          */
5817         if (args->minorversion != NFS4_MINORVERSION) {
5818                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5819                     &cs, COMPOUND4args *, args);
5820                 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5821                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5822                     &cs, COMPOUND4res *, resp);
5823                 return;
5824         }
5825
5826         if (args->array_len == 0) {
5827                 resp->status = NFS4_OK;
5828                 return;
5829         }
5830
5831         ASSERT(exi == NULL);
5832         ASSERT(cr == NULL);
5833
5834         cr = crget();
5835         ASSERT(cr != NULL);
5836
5837         if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5838                 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5839                     &cs, COMPOUND4args *, args);
5840                 crfree(cr);
5841                 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5842                     &cs, COMPOUND4res *, resp);
5843                 svcerr_badcred(req->rq_xprt);
5844                 if (rv != NULL)
5845                         *rv = 1;
5846                 return;
5847         }
5848         resp->array_len = args->array_len;
5849         resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5850             KM_SLEEP);
5851
5852         cs.basecr = cr;
5853
5854         DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5855             COMPOUND4args *, args);
5856
5857         /*
5858          * For now, NFS4 compound processing must be protected by
5859          * exported_lock because it can access more than one exportinfo
5860          * per compound and share/unshare can now change multiple
5861          * exinfo structs.  The NFS2/3 code only refs 1 exportinfo
5862          * per proc (excluding public exinfo), and exi_count design
5863          * is sufficient to protect concurrent execution of NFS2/3
5864          * ops along with unexport.  This lock will be removed as
5865          * part of the NFSv4 phase 2 namespace redesign work.
5866          */
5867         rw_enter(&exported_lock, RW_READER);
5868
5869         /*
5870          * If this is the first compound we've seen, we need to start all
5871          * new instances' grace periods.
5872          */
5873         if (rfs4_seen_first_compound == 0) {
5874                 rfs4_grace_start_new();
5875                 /*
5876                  * This must be set after rfs4_grace_start_new(), otherwise
5877                  * another thread could proceed past here before the former
5878                  * is finished.
5879                  */
5880                 rfs4_seen_first_compound = 1;
5881         }
5882
5883         for (i = 0; i < args->array_len && cs.cont; i++) {
5884                 nfs_argop4 *argop;
5885                 nfs_resop4 *resop;
5886                 uint_t op;
5887
5888                 argop = &args->array[i];
5889                 resop = &resp->array[i];
5890                 resop->resop = argop->argop;
5891                 op = (uint_t)resop->resop;
5892
5893                 if (op < rfsv4disp_cnt) {
5894                         /*
5895                          * Count the individual ops here; NULL and COMPOUND
5896                          * are counted in common_dispatch()
5897                          */
5898                         rfsproccnt_v4_ptr[op].value.ui64++;
5899
5900                         NFS4_DEBUG(rfs4_debug > 1,
5901                             (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5902                         (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5903                         NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5904                             rfs4_op_string[op], *cs.statusp));
5905                         if (*cs.statusp != NFS4_OK)
5906                                 cs.cont = FALSE;
5907                 } else {
5908                         /*
5909                          * This is effectively dead code since XDR code
5910                          * will have already returned BADXDR if op doesn't
5911                          * decode to legal value.  This only done for a
5912                          * day when XDR code doesn't verify v4 opcodes.
5913                          */
5914                         op = OP_ILLEGAL;
5915                         rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5916
5917                         rfs4_op_illegal(argop, resop, req, &cs);
5918                         cs.cont = FALSE;
5919                 }
5920
5921                 /*
5922                  * If not at last op, and if we are to stop, then
5923                  * compact the results array.
5924                  */
5925                 if ((i + 1) < args->array_len && !cs.cont) {
5926                         nfs_resop4 *new_res = kmem_alloc(
5927                             (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5928                         bcopy(resp->array,
5929                             new_res, (i+1) * sizeof (nfs_resop4));
5930                         kmem_free(resp->array,
5931                             args->array_len * sizeof (nfs_resop4));
5932
5933                         resp->array_len =  i + 1;
5934                         resp->array = new_res;
5935                 }
5936         }
5937
5938         rw_exit(&exported_lock);
5939
5940         DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5941             COMPOUND4res *, resp);
5942
5943         if (cs.vp)
5944                 VN_RELE(cs.vp);
5945         if (cs.saved_vp)
5946                 VN_RELE(cs.saved_vp);
5947         if (cs.saved_fh.nfs_fh4_val)
5948                 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5949
5950         if (cs.basecr)
5951                 crfree(cs.basecr);
5952         if (cs.cr)
5953                 crfree(cs.cr);
5954         /*
5955          * done with this compound request, free the label
5956          */
5957
5958         if (req->rq_label != NULL) {
5959                 kmem_free(req->rq_label, sizeof (bslabel_t));
5960                 req->rq_label = NULL;
5961         }
5962 }
5963
5964 /*
5965  * XXX because of what appears to be duplicate calls to rfs4_compound_free
5966  * XXX zero out the tag and array values. Need to investigate why the
5967  * XXX calls occur, but at least prevent the panic for now.
5968  */
5969 void
5970 rfs4_compound_free(COMPOUND4res *resp)
5971 {
5972         uint_t i;
5973
5974         if (resp->tag.utf8string_val) {
5975                 UTF8STRING_FREE(resp->tag)
5976         }
5977
5978         for (i = 0; i < resp->array_len; i++) {
5979                 nfs_resop4 *resop;
5980                 uint_t op;
5981
5982                 resop = &resp->array[i];
5983                 op = (uint_t)resop->resop;
5984                 if (op < rfsv4disp_cnt) {
5985                         (*rfsv4disptab[op].dis_resfree)(resop);
5986                 }
5987         }
5988         if (resp->array != NULL) {
5989                 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5990         }
5991 }
5992
5993 /*
5994  * Process the value of the compound request rpc flags, as a bit-AND
5995  * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5996  */
5997 void
5998 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5999 {
6000         int i;
6001         int flag = RPC_ALL;
6002
6003         for (i = 0; flag && i < args->array_len; i++) {
6004                 uint_t op;
6005
6006                 op = (uint_t)args->array[i].argop;
6007
6008                 if (op < rfsv4disp_cnt)
6009                         flag &= rfsv4disptab[op].dis_flags;
6010                 else
6011                         flag = 0;
6012         }
6013         *flagp = flag;
6014 }
6015
6016 nfsstat4
6017 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
6018 {
6019         nfsstat4 e;
6020
6021         rfs4_dbe_lock(cp->rc_dbe);
6022
6023         if (cp->rc_sysidt != LM_NOSYSID) {
6024                 *sp = cp->rc_sysidt;
6025                 e = NFS4_OK;
6026
6027         } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
6028                 *sp = cp->rc_sysidt;
6029                 e = NFS4_OK;
6030
6031                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6032                     "rfs4_client_sysid: allocated 0x%x\n", *sp));
6033         } else
6034                 e = NFS4ERR_DELAY;
6035
6036         rfs4_dbe_unlock(cp->rc_dbe);
6037         return (e);
6038 }
6039
6040 #if defined(DEBUG) && ! defined(lint)
6041 static void lock_print(char *str, int operation, struct flock64 *flk)
6042 {
6043         char *op, *type;
6044
6045         switch (operation) {
6046         case F_GETLK: op = "F_GETLK";
6047                 break;
6048         case F_SETLK: op = "F_SETLK";
6049                 break;
6050         case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6051                 break;
6052         default: op = "F_UNKNOWN";
6053                 break;
6054         }
6055         switch (flk->l_type) {
6056         case F_UNLCK: type = "F_UNLCK";
6057                 break;
6058         case F_RDLCK: type = "F_RDLCK";
6059                 break;
6060         case F_WRLCK: type = "F_WRLCK";
6061                 break;
6062         default: type = "F_UNKNOWN";
6063                 break;
6064         }
6065
6066         ASSERT(flk->l_whence == 0);
6067         cmn_err(CE_NOTE, "%s:  %s, type = %s, off = %llx len = %llx pid = %d",
6068             str, op, type, (longlong_t)flk->l_start,
6069             flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6070 }
6071
6072 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6073 #else
6074 #define LOCK_PRINT(d, s, t, f)
6075 #endif
6076
6077 /*ARGSUSED*/
6078 static bool_t
6079 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6080 {
6081         return (TRUE);
6082 }
6083
6084 /*
6085  * Look up the pathname using the vp in cs as the directory vnode.
6086  * cs->vp will be the vnode for the file on success
6087  */
6088
6089 static nfsstat4
6090 rfs4_lookup(component4 *component, struct svc_req *req,
6091     struct compound_state *cs)
6092 {
6093         char *nm;
6094         uint32_t len;
6095         nfsstat4 status;
6096         struct sockaddr *ca;
6097         char *name;
6098
6099         if (cs->vp == NULL) {
6100                 return (NFS4ERR_NOFILEHANDLE);
6101         }
6102         if (cs->vp->v_type != VDIR) {
6103                 return (NFS4ERR_NOTDIR);
6104         }
6105
6106         status = utf8_dir_verify(component);
6107         if (status != NFS4_OK)
6108                 return (status);
6109
6110         nm = utf8_to_fn(component, &len, NULL);
6111         if (nm == NULL) {
6112                 return (NFS4ERR_INVAL);
6113         }
6114
6115         if (len > MAXNAMELEN) {
6116                 kmem_free(nm, len);
6117                 return (NFS4ERR_NAMETOOLONG);
6118         }
6119
6120         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6121         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6122             MAXPATHLEN + 1);
6123
6124         if (name == NULL) {
6125                 kmem_free(nm, len);
6126                 return (NFS4ERR_INVAL);
6127         }
6128
6129         status = do_rfs4_op_lookup(name, req, cs);
6130
6131         if (name != nm)
6132                 kmem_free(name, MAXPATHLEN + 1);
6133
6134         kmem_free(nm, len);
6135
6136         return (status);
6137 }
6138
6139 static nfsstat4
6140 rfs4_lookupfile(component4 *component, struct svc_req *req,
6141     struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6142 {
6143         nfsstat4 status;
6144         vnode_t *dvp = cs->vp;
6145         vattr_t bva, ava, fva;
6146         int error;
6147
6148         /* Get "before" change value */
6149         bva.va_mask = AT_CTIME|AT_SEQ;
6150         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6151         if (error)
6152                 return (puterrno4(error));
6153
6154         /* rfs4_lookup may VN_RELE directory */
6155         VN_HOLD(dvp);
6156
6157         status = rfs4_lookup(component, req, cs);
6158         if (status != NFS4_OK) {
6159                 VN_RELE(dvp);
6160                 return (status);
6161         }
6162
6163         /*
6164          * Get "after" change value, if it fails, simply return the
6165          * before value.
6166          */
6167         ava.va_mask = AT_CTIME|AT_SEQ;
6168         if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6169                 ava.va_ctime = bva.va_ctime;
6170                 ava.va_seq = 0;
6171         }
6172         VN_RELE(dvp);
6173
6174         /*
6175          * Validate the file is a file
6176          */
6177         fva.va_mask = AT_TYPE|AT_MODE;
6178         error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6179         if (error)
6180                 return (puterrno4(error));
6181
6182         if (fva.va_type != VREG) {
6183                 if (fva.va_type == VDIR)
6184                         return (NFS4ERR_ISDIR);
6185                 if (fva.va_type == VLNK)
6186                         return (NFS4ERR_SYMLINK);
6187                 return (NFS4ERR_INVAL);
6188         }
6189
6190         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6191         NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6192
6193         /*
6194          * It is undefined if VOP_LOOKUP will change va_seq, so
6195          * cinfo.atomic = TRUE only if we have
6196          * non-zero va_seq's, and they have not changed.
6197          */
6198         if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6199                 cinfo->atomic = TRUE;
6200         else
6201                 cinfo->atomic = FALSE;
6202
6203         /* Check for mandatory locking */
6204         cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6205         return (check_open_access(access, cs, req));
6206 }
6207
6208 static nfsstat4
6209 create_vnode(vnode_t *dvp, char *nm,  vattr_t *vap, createmode4 mode,
6210     timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6211 {
6212         int error;
6213         nfsstat4 status = NFS4_OK;
6214         vattr_t va;
6215
6216 tryagain:
6217
6218         /*
6219          * The file open mode used is VWRITE.  If the client needs
6220          * some other semantic, then it should do the access checking
6221          * itself.  It would have been nice to have the file open mode
6222          * passed as part of the arguments.
6223          */
6224
6225         *created = TRUE;
6226         error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6227
6228         if (error) {
6229                 *created = FALSE;
6230
6231                 /*
6232                  * If we got something other than file already exists
6233                  * then just return this error.  Otherwise, we got
6234                  * EEXIST.  If we were doing a GUARDED create, then
6235                  * just return this error.  Otherwise, we need to
6236                  * make sure that this wasn't a duplicate of an
6237                  * exclusive create request.
6238                  *
6239                  * The assumption is made that a non-exclusive create
6240                  * request will never return EEXIST.
6241                  */
6242
6243                 if (error != EEXIST || mode == GUARDED4) {
6244                         status = puterrno4(error);
6245                         return (status);
6246                 }
6247                 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6248                     NULL, NULL, NULL);
6249
6250                 if (error) {
6251                         /*
6252                          * We couldn't find the file that we thought that
6253                          * we just created.  So, we'll just try creating
6254                          * it again.
6255                          */
6256                         if (error == ENOENT)
6257                                 goto tryagain;
6258
6259                         status = puterrno4(error);
6260                         return (status);
6261                 }
6262
6263                 if (mode == UNCHECKED4) {
6264                         /* existing object must be regular file */
6265                         if ((*vpp)->v_type != VREG) {
6266                                 if ((*vpp)->v_type == VDIR)
6267                                         status = NFS4ERR_ISDIR;
6268                                 else if ((*vpp)->v_type == VLNK)
6269                                         status = NFS4ERR_SYMLINK;
6270                                 else
6271                                         status = NFS4ERR_INVAL;
6272                                 VN_RELE(*vpp);
6273                                 return (status);
6274                         }
6275
6276                         return (NFS4_OK);
6277                 }
6278
6279                 /* Check for duplicate request */
6280                 ASSERT(mtime != 0);
6281                 va.va_mask = AT_MTIME;
6282                 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6283                 if (!error) {
6284                         /* We found the file */
6285                         if (va.va_mtime.tv_sec != mtime->tv_sec ||
6286                             va.va_mtime.tv_nsec != mtime->tv_nsec) {
6287                                 /* but its not our creation */
6288                                 VN_RELE(*vpp);
6289                                 return (NFS4ERR_EXIST);
6290                         }
6291                         *created = TRUE; /* retrans of create == created */
6292                         return (NFS4_OK);
6293                 }
6294                 VN_RELE(*vpp);
6295                 return (NFS4ERR_EXIST);
6296         }
6297
6298         return (NFS4_OK);
6299 }
6300
6301 static nfsstat4
6302 check_open_access(uint32_t access, struct compound_state *cs,
6303     struct svc_req *req)
6304 {
6305         int error;
6306         vnode_t *vp;
6307         bool_t readonly;
6308         cred_t *cr = cs->cr;
6309
6310         /* For now we don't allow mandatory locking as per V2/V3 */
6311         if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6312                 return (NFS4ERR_ACCESS);
6313         }
6314
6315         vp = cs->vp;
6316         ASSERT(cr != NULL && vp->v_type == VREG);
6317
6318         /*
6319          * If the file system is exported read only and we are trying
6320          * to open for write, then return NFS4ERR_ROFS
6321          */
6322
6323         readonly = rdonly4(cs->exi, cs->vp, req);
6324
6325         if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6326                 return (NFS4ERR_ROFS);
6327
6328         if (access & OPEN4_SHARE_ACCESS_READ) {
6329                 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6330                     (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6331                         return (NFS4ERR_ACCESS);
6332                 }
6333         }
6334
6335         if (access & OPEN4_SHARE_ACCESS_WRITE) {
6336                 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6337                 if (error)
6338                         return (NFS4ERR_ACCESS);
6339         }
6340
6341         return (NFS4_OK);
6342 }
6343
6344 static nfsstat4
6345 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6346     change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6347 {
6348         struct nfs4_svgetit_arg sarg;
6349         struct nfs4_ntov_table ntov;
6350
6351         bool_t ntov_table_init = FALSE;
6352         struct statvfs64 sb;
6353         nfsstat4 status;
6354         vnode_t *vp;
6355         vattr_t bva, ava, iva, cva, *vap;
6356         vnode_t *dvp;
6357         timespec32_t *mtime;
6358         char *nm = NULL;
6359         uint_t buflen;
6360         bool_t created;
6361         bool_t setsize = FALSE;
6362         len_t reqsize;
6363         int error;
6364         bool_t trunc;
6365         caller_context_t ct;
6366         component4 *component;
6367         bslabel_t *clabel;
6368         struct sockaddr *ca;
6369         char *name = NULL;
6370
6371         sarg.sbp = &sb;
6372         sarg.is_referral = B_FALSE;
6373
6374         dvp = cs->vp;
6375
6376         /* Check if the file system is read only */
6377         if (rdonly4(cs->exi, dvp, req))
6378                 return (NFS4ERR_ROFS);
6379
6380         /* check the label of including directory */
6381         if (is_system_labeled()) {
6382                 ASSERT(req->rq_label != NULL);
6383                 clabel = req->rq_label;
6384                 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6385                     "got client label from request(1)",
6386                     struct svc_req *, req);
6387                 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6388                         if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6389                             cs->exi)) {
6390                                 return (NFS4ERR_ACCESS);
6391                         }
6392                 }
6393         }
6394
6395         /*
6396          * Get the last component of path name in nm. cs will reference
6397          * the including directory on success.
6398          */
6399         component = &args->open_claim4_u.file;
6400         status = utf8_dir_verify(component);
6401         if (status != NFS4_OK)
6402                 return (status);
6403
6404         nm = utf8_to_fn(component, &buflen, NULL);
6405
6406         if (nm == NULL)
6407                 return (NFS4ERR_RESOURCE);
6408
6409         if (buflen > MAXNAMELEN) {
6410                 kmem_free(nm, buflen);
6411                 return (NFS4ERR_NAMETOOLONG);
6412         }
6413
6414         bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6415         error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6416         if (error) {
6417                 kmem_free(nm, buflen);
6418                 return (puterrno4(error));
6419         }
6420
6421         if (bva.va_type != VDIR) {
6422                 kmem_free(nm, buflen);
6423                 return (NFS4ERR_NOTDIR);
6424         }
6425
6426         NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6427
6428         switch (args->mode) {
6429         case GUARDED4:
6430                 /*FALLTHROUGH*/
6431         case UNCHECKED4:
6432                 nfs4_ntov_table_init(&ntov);
6433                 ntov_table_init = TRUE;
6434
6435                 *attrset = 0;
6436                 status = do_rfs4_set_attrs(attrset,
6437                     &args->createhow4_u.createattrs,
6438                     cs, &sarg, &ntov, NFS4ATTR_SETIT);
6439
6440                 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6441                     sarg.vap->va_type != VREG) {
6442                         if (sarg.vap->va_type == VDIR)
6443                                 status = NFS4ERR_ISDIR;
6444                         else if (sarg.vap->va_type == VLNK)
6445                                 status = NFS4ERR_SYMLINK;
6446                         else
6447                                 status = NFS4ERR_INVAL;
6448                 }
6449
6450                 if (status != NFS4_OK) {
6451                         kmem_free(nm, buflen);
6452                         nfs4_ntov_table_free(&ntov, &sarg);
6453                         *attrset = 0;
6454                         return (status);
6455                 }
6456
6457                 vap = sarg.vap;
6458                 vap->va_type = VREG;
6459                 vap->va_mask |= AT_TYPE;
6460
6461                 if ((vap->va_mask & AT_MODE) == 0) {
6462                         vap->va_mask |= AT_MODE;
6463                         vap->va_mode = (mode_t)0600;
6464                 }
6465
6466                 if (vap->va_mask & AT_SIZE) {
6467
6468                         /* Disallow create with a non-zero size */
6469
6470                         if ((reqsize = sarg.vap->va_size) != 0) {
6471                                 kmem_free(nm, buflen);
6472                                 nfs4_ntov_table_free(&ntov, &sarg);
6473                                 *attrset = 0;
6474                                 return (NFS4ERR_INVAL);
6475                         }
6476                         setsize = TRUE;
6477                 }
6478                 break;
6479
6480         case EXCLUSIVE4:
6481                 /* prohibit EXCL create of named attributes */
6482                 if (dvp->v_flag & V_XATTRDIR) {
6483                         kmem_free(nm, buflen);
6484                         *attrset = 0;
6485                         return (NFS4ERR_INVAL);
6486                 }
6487
6488                 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6489                 cva.va_type = VREG;
6490                 /*
6491                  * Ensure no time overflows. Assumes underlying
6492                  * filesystem supports at least 32 bits.
6493                  * Truncate nsec to usec resolution to allow valid
6494                  * compares even if the underlying filesystem truncates.
6495                  */
6496                 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6497                 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6498                 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6499                 cva.va_mode = (mode_t)0;
6500                 vap = &cva;
6501
6502                 /*
6503                  * For EXCL create, attrset is set to the server attr
6504                  * used to cache the client's verifier.
6505                  */
6506                 *attrset = FATTR4_TIME_MODIFY_MASK;
6507                 break;
6508         }
6509
6510         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6511         name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6512             MAXPATHLEN  + 1);
6513
6514         if (name == NULL) {
6515                 kmem_free(nm, buflen);
6516                 return (NFS4ERR_SERVERFAULT);
6517         }
6518
6519         status = create_vnode(dvp, name, vap, args->mode, mtime,
6520             cs->cr, &vp, &created);
6521         if (nm != name)
6522                 kmem_free(name, MAXPATHLEN + 1);
6523         kmem_free(nm, buflen);
6524
6525         if (status != NFS4_OK) {
6526                 if (ntov_table_init)
6527                         nfs4_ntov_table_free(&ntov, &sarg);
6528                 *attrset = 0;
6529                 return (status);
6530         }
6531
6532         trunc = (setsize && !created);
6533
6534         if (args->mode != EXCLUSIVE4) {
6535                 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6536
6537                 /*
6538                  * True verification that object was created with correct
6539                  * attrs is impossible.  The attrs could have been changed
6540                  * immediately after object creation.  If attributes did
6541                  * not verify, the only recourse for the server is to
6542                  * destroy the object.  Maybe if some attrs (like gid)
6543                  * are set incorrectly, the object should be destroyed;
6544                  * however, seems bad as a default policy.  Do we really
6545                  * want to destroy an object over one of the times not
6546                  * verifying correctly?  For these reasons, the server
6547                  * currently sets bits in attrset for createattrs
6548                  * that were set; however, no verification is done.
6549                  *
6550                  * vmask_to_nmask accounts for vattr bits set on create
6551                  *      [do_rfs4_set_attrs() only sets resp bits for
6552                  *       non-vattr/vfs bits.]
6553                  * Mask off any bits we set by default so as not to return
6554                  * more attrset bits than were requested in createattrs
6555                  */
6556                 if (created) {
6557                         nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6558                         *attrset &= createmask;
6559                 } else {
6560                         /*
6561                          * We did not create the vnode (we tried but it
6562                          * already existed).  In this case, the only createattr
6563                          * that the spec allows the server to set is size,
6564                          * and even then, it can only be set if it is 0.
6565                          */
6566                         *attrset = 0;
6567                         if (trunc)
6568                                 *attrset = FATTR4_SIZE_MASK;
6569                 }
6570         }
6571         if (ntov_table_init)
6572                 nfs4_ntov_table_free(&ntov, &sarg);
6573
6574         /*
6575          * Get the initial "after" sequence number, if it fails,
6576          * set to zero, time to before.
6577          */
6578         iva.va_mask = AT_CTIME|AT_SEQ;
6579         if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6580                 iva.va_seq = 0;
6581                 iva.va_ctime = bva.va_ctime;
6582         }
6583
6584         /*
6585          * create_vnode attempts to create the file exclusive,
6586          * if it already exists the VOP_CREATE will fail and
6587          * may not increase va_seq. It is atomic if
6588          * we haven't changed the directory, but if it has changed
6589          * we don't know what changed it.
6590          */
6591         if (!created) {
6592                 if (bva.va_seq && iva.va_seq &&
6593                     bva.va_seq == iva.va_seq)
6594                         cinfo->atomic = TRUE;
6595                 else
6596                         cinfo->atomic = FALSE;
6597                 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6598         } else {
6599                 /*
6600                  * The entry was created, we need to sync the
6601                  * directory metadata.
6602                  */
6603                 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6604
6605                 /*
6606                  * Get "after" change value, if it fails, simply return the
6607                  * before value.
6608                  */
6609                 ava.va_mask = AT_CTIME|AT_SEQ;
6610                 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6611                         ava.va_ctime = bva.va_ctime;
6612                         ava.va_seq = 0;
6613                 }
6614
6615                 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6616
6617                 /*
6618                  * The cinfo->atomic = TRUE only if we have
6619                  * non-zero va_seq's, and it has incremented by exactly one
6620                  * during the create_vnode and it didn't
6621                  * change during the VOP_FSYNC.
6622                  */
6623                 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6624                     iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6625                         cinfo->atomic = TRUE;
6626                 else
6627                         cinfo->atomic = FALSE;
6628         }
6629
6630         /* Check for mandatory locking and that the size gets set. */
6631         cva.va_mask = AT_MODE;
6632         if (setsize)
6633                 cva.va_mask |= AT_SIZE;
6634
6635         /* Assume the worst */
6636         cs->mandlock = TRUE;
6637
6638         if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6639                 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6640
6641                 /*
6642                  * Truncate the file if necessary; this would be
6643                  * the case for create over an existing file.
6644                  */
6645
6646                 if (trunc) {
6647                         int in_crit = 0;
6648                         rfs4_file_t *fp;
6649                         bool_t create = FALSE;
6650
6651                         /*
6652                          * We are writing over an existing file.
6653                          * Check to see if we need to recall a delegation.
6654                          */
6655                         rfs4_hold_deleg_policy();
6656                         if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6657                                 if (rfs4_check_delegated_byfp(FWRITE, fp,
6658                                     (reqsize == 0), FALSE, FALSE, &clientid)) {
6659                                         rfs4_file_rele(fp);
6660                                         rfs4_rele_deleg_policy();
6661                                         VN_RELE(vp);
6662                                         *attrset = 0;
6663                                         return (NFS4ERR_DELAY);
6664                                 }
6665                                 rfs4_file_rele(fp);
6666                         }
6667                         rfs4_rele_deleg_policy();
6668
6669                         if (nbl_need_check(vp)) {
6670                                 in_crit = 1;
6671
6672                                 ASSERT(reqsize == 0);
6673
6674                                 nbl_start_crit(vp, RW_READER);
6675                                 if (nbl_conflict(vp, NBL_WRITE, 0,
6676                                     cva.va_size, 0, NULL)) {
6677                                         in_crit = 0;
6678                                         nbl_end_crit(vp);
6679                                         VN_RELE(vp);
6680                                         *attrset = 0;
6681                                         return (NFS4ERR_ACCESS);
6682                                 }
6683                         }
6684                         ct.cc_sysid = 0;
6685                         ct.cc_pid = 0;
6686                         ct.cc_caller_id = nfs4_srv_caller_id;
6687                         ct.cc_flags = CC_DONTBLOCK;
6688
6689                         cva.va_mask = AT_SIZE;
6690                         cva.va_size = reqsize;
6691                         (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6692                         if (in_crit)
6693                                 nbl_end_crit(vp);
6694                 }
6695         }
6696
6697         error = makefh4(&cs->fh, vp, cs->exi);
6698
6699         /*
6700          * Force modified data and metadata out to stable storage.
6701          */
6702         (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6703
6704         if (error) {
6705                 VN_RELE(vp);
6706                 *attrset = 0;
6707                 return (puterrno4(error));
6708         }
6709
6710         /* if parent dir is attrdir, set namedattr fh flag */
6711         if (dvp->v_flag & V_XATTRDIR)
6712                 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6713
6714         if (cs->vp)
6715                 VN_RELE(cs->vp);
6716
6717         cs->vp = vp;
6718
6719         /*
6720          * if we did not create the file, we will need to check
6721          * the access bits on the file
6722          */
6723
6724         if (!created) {
6725                 if (setsize)
6726                         args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6727                 status = check_open_access(args->share_access, cs, req);
6728                 if (status != NFS4_OK)
6729                         *attrset = 0;
6730         }
6731         return (status);
6732 }
6733
6734 /*ARGSUSED*/
6735 static void
6736 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6737     rfs4_openowner_t *oo, delegreq_t deleg,
6738     uint32_t access, uint32_t deny,
6739     OPEN4res *resp, int deleg_cur)
6740 {
6741         /* XXX Currently not using req  */
6742         rfs4_state_t *sp;
6743         rfs4_file_t *fp;
6744         bool_t screate = TRUE;
6745         bool_t fcreate = TRUE;
6746         uint32_t open_a, share_a;
6747         uint32_t open_d, share_d;
6748         rfs4_deleg_state_t *dsp;
6749         sysid_t sysid;
6750         nfsstat4 status;
6751         caller_context_t ct;
6752         int fflags = 0;
6753         int recall = 0;
6754         int err;
6755         int first_open;
6756
6757         /* get the file struct and hold a lock on it during initial open */
6758         fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6759         if (fp == NULL) {
6760                 resp->status = NFS4ERR_RESOURCE;
6761                 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6762                 return;
6763         }
6764
6765         sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6766         if (sp == NULL) {
6767                 resp->status = NFS4ERR_RESOURCE;
6768                 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6769                 /* No need to keep any reference */
6770                 rw_exit(&fp->rf_file_rwlock);
6771                 rfs4_file_rele(fp);
6772                 return;
6773         }
6774
6775         /* try to get the sysid before continuing */
6776         if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6777                 resp->status = status;
6778                 rfs4_file_rele(fp);
6779                 /* Not a fully formed open; "close" it */
6780                 if (screate == TRUE)
6781                         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6782                 rfs4_state_rele(sp);
6783                 return;
6784         }
6785
6786         /* Calculate the fflags for this OPEN. */
6787         if (access & OPEN4_SHARE_ACCESS_READ)
6788                 fflags |= FREAD;
6789         if (access & OPEN4_SHARE_ACCESS_WRITE)
6790                 fflags |= FWRITE;
6791
6792         rfs4_dbe_lock(sp->rs_dbe);
6793
6794         /*
6795          * Calculate the new deny and access mode that this open is adding to
6796          * the file for this open owner;
6797          */
6798         open_d = (deny & ~sp->rs_open_deny);
6799         open_a = (access & ~sp->rs_open_access);
6800
6801         /*
6802          * Calculate the new share access and share deny modes that this open
6803          * is adding to the file for this open owner;
6804          */
6805         share_a = (access & ~sp->rs_share_access);
6806         share_d = (deny & ~sp->rs_share_deny);
6807
6808         first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6809
6810         /*
6811          * Check to see the client has already sent an open for this
6812          * open owner on this file with the same share/deny modes.
6813          * If so, we don't need to check for a conflict and we don't
6814          * need to add another shrlock.  If not, then we need to
6815          * check for conflicts in deny and access before checking for
6816          * conflicts in delegation.  We don't want to recall a
6817          * delegation based on an open that will eventually fail based
6818          * on shares modes.
6819          */
6820
6821         if (share_a || share_d) {
6822                 if ((err = rfs4_share(sp, access, deny)) != 0) {
6823                         rfs4_dbe_unlock(sp->rs_dbe);
6824                         resp->status = err;
6825
6826                         rfs4_file_rele(fp);
6827                         /* Not a fully formed open; "close" it */
6828                         if (screate == TRUE)
6829                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6830                         rfs4_state_rele(sp);
6831                         return;
6832                 }
6833         }
6834
6835         rfs4_dbe_lock(fp->rf_dbe);
6836
6837         /*
6838          * Check to see if this file is delegated and if so, if a
6839          * recall needs to be done.
6840          */
6841         if (rfs4_check_recall(sp, access)) {
6842                 rfs4_dbe_unlock(fp->rf_dbe);
6843                 rfs4_dbe_unlock(sp->rs_dbe);
6844                 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6845                 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6846                 rfs4_dbe_lock(sp->rs_dbe);
6847
6848                 /* if state closed while lock was dropped */
6849                 if (sp->rs_closed) {
6850                         if (share_a || share_d)
6851                                 (void) rfs4_unshare(sp);
6852                         rfs4_dbe_unlock(sp->rs_dbe);
6853                         rfs4_file_rele(fp);
6854                         /* Not a fully formed open; "close" it */
6855                         if (screate == TRUE)
6856                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6857                         rfs4_state_rele(sp);
6858                         resp->status = NFS4ERR_OLD_STATEID;
6859                         return;
6860                 }
6861
6862                 rfs4_dbe_lock(fp->rf_dbe);
6863                 /* Let's see if the delegation was returned */
6864                 if (rfs4_check_recall(sp, access)) {
6865                         rfs4_dbe_unlock(fp->rf_dbe);
6866                         if (share_a || share_d)
6867                                 (void) rfs4_unshare(sp);
6868                         rfs4_dbe_unlock(sp->rs_dbe);
6869                         rfs4_file_rele(fp);
6870                         rfs4_update_lease(sp->rs_owner->ro_client);
6871
6872                         /* Not a fully formed open; "close" it */
6873                         if (screate == TRUE)
6874                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6875                         rfs4_state_rele(sp);
6876                         resp->status = NFS4ERR_DELAY;
6877                         return;
6878                 }
6879         }
6880         /*
6881          * the share check passed and any delegation conflict has been
6882          * taken care of, now call vop_open.
6883          * if this is the first open then call vop_open with fflags.
6884          * if not, call vn_open_upgrade with just the upgrade flags.
6885          *
6886          * if the file has been opened already, it will have the current
6887          * access mode in the state struct.  if it has no share access, then
6888          * this is a new open.
6889          *
6890          * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6891          * call VOP_OPEN(), just do the open upgrade.
6892          */
6893         if (first_open && !deleg_cur) {
6894                 ct.cc_sysid = sysid;
6895                 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6896                 ct.cc_caller_id = nfs4_srv_caller_id;
6897                 ct.cc_flags = CC_DONTBLOCK;
6898                 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6899                 if (err) {
6900                         rfs4_dbe_unlock(fp->rf_dbe);
6901                         if (share_a || share_d)
6902                                 (void) rfs4_unshare(sp);
6903                         rfs4_dbe_unlock(sp->rs_dbe);
6904                         rfs4_file_rele(fp);
6905
6906                         /* Not a fully formed open; "close" it */
6907                         if (screate == TRUE)
6908                                 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6909                         rfs4_state_rele(sp);
6910                         /* check if a monitor detected a delegation conflict */
6911                         if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6912                                 resp->status = NFS4ERR_DELAY;
6913                         else
6914                                 resp->status = NFS4ERR_SERVERFAULT;
6915                         return;
6916                 }
6917         } else { /* open upgrade */
6918                 /*
6919                  * calculate the fflags for the new mode that is being added
6920                  * by this upgrade.
6921                  */
6922                 fflags = 0;
6923                 if (open_a & OPEN4_SHARE_ACCESS_READ)
6924                         fflags |= FREAD;
6925                 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6926                         fflags |= FWRITE;
6927                 vn_open_upgrade(cs->vp, fflags);
6928         }
6929         sp->rs_open_access |= access;
6930         sp->rs_open_deny |= deny;
6931
6932         if (open_d & OPEN4_SHARE_DENY_READ)
6933                 fp->rf_deny_read++;
6934         if (open_d & OPEN4_SHARE_DENY_WRITE)
6935                 fp->rf_deny_write++;
6936         fp->rf_share_deny |= deny;
6937
6938         if (open_a & OPEN4_SHARE_ACCESS_READ)
6939                 fp->rf_access_read++;
6940         if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6941                 fp->rf_access_write++;
6942         fp->rf_share_access |= access;
6943
6944         /*
6945          * Check for delegation here. if the deleg argument is not
6946          * DELEG_ANY, then this is a reclaim from a client and
6947          * we must honor the delegation requested. If necessary we can
6948          * set the recall flag.
6949          */
6950
6951         dsp = rfs4_grant_delegation(deleg, sp, &recall);
6952
6953         cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6954
6955         next_stateid(&sp->rs_stateid);
6956
6957         resp->stateid = sp->rs_stateid.stateid;
6958
6959         rfs4_dbe_unlock(fp->rf_dbe);
6960         rfs4_dbe_unlock(sp->rs_dbe);
6961
6962         if (dsp) {
6963                 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6964                 rfs4_deleg_state_rele(dsp);
6965         }
6966
6967         rfs4_file_rele(fp);
6968         rfs4_state_rele(sp);
6969
6970         resp->status = NFS4_OK;
6971 }
6972
6973 /*ARGSUSED*/
6974 static void
6975 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6976     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6977 {
6978         change_info4 *cinfo = &resp->cinfo;
6979         bitmap4 *attrset = &resp->attrset;
6980
6981         if (args->opentype == OPEN4_NOCREATE)
6982                 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6983                     req, cs, args->share_access, cinfo);
6984         else {
6985                 /* inhibit delegation grants during exclusive create */
6986
6987                 if (args->mode == EXCLUSIVE4)
6988                         rfs4_disable_delegation();
6989
6990                 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6991                     oo->ro_client->rc_clientid);
6992         }
6993
6994         if (resp->status == NFS4_OK) {
6995
6996                 /* cs->vp cs->fh now reference the desired file */
6997
6998                 rfs4_do_open(cs, req, oo,
6999                     oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
7000                     args->share_access, args->share_deny, resp, 0);
7001
7002                 /*
7003                  * If rfs4_createfile set attrset, we must
7004                  * clear this attrset before the response is copied.
7005                  */
7006                 if (resp->status != NFS4_OK && resp->attrset) {
7007                         resp->attrset = 0;
7008                 }
7009         }
7010         else
7011                 *cs->statusp = resp->status;
7012
7013         if (args->mode == EXCLUSIVE4)
7014                 rfs4_enable_delegation();
7015 }
7016
7017 /*ARGSUSED*/
7018 static void
7019 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
7020     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7021 {
7022         change_info4 *cinfo = &resp->cinfo;
7023         vattr_t va;
7024         vtype_t v_type = cs->vp->v_type;
7025         int error = 0;
7026
7027         /* Verify that we have a regular file */
7028         if (v_type != VREG) {
7029                 if (v_type == VDIR)
7030                         resp->status = NFS4ERR_ISDIR;
7031                 else if (v_type == VLNK)
7032                         resp->status = NFS4ERR_SYMLINK;
7033                 else
7034                         resp->status = NFS4ERR_INVAL;
7035                 return;
7036         }
7037
7038         va.va_mask = AT_MODE|AT_UID;
7039         error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7040         if (error) {
7041                 resp->status = puterrno4(error);
7042                 return;
7043         }
7044
7045         cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7046
7047         /*
7048          * Check if we have access to the file, Note the the file
7049          * could have originally been open UNCHECKED or GUARDED
7050          * with mode bits that will now fail, but there is nothing
7051          * we can really do about that except in the case that the
7052          * owner of the file is the one requesting the open.
7053          */
7054         if (crgetuid(cs->cr) != va.va_uid) {
7055                 resp->status = check_open_access(args->share_access, cs, req);
7056                 if (resp->status != NFS4_OK) {
7057                         return;
7058                 }
7059         }
7060
7061         /*
7062          * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7063          */
7064         cinfo->before = 0;
7065         cinfo->after = 0;
7066         cinfo->atomic = FALSE;
7067
7068         rfs4_do_open(cs, req, oo,
7069             NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7070             args->share_access, args->share_deny, resp, 0);
7071 }
7072
7073 static void
7074 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7075     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7076 {
7077         int error;
7078         nfsstat4 status;
7079         stateid4 stateid =
7080             args->open_claim4_u.delegate_cur_info.delegate_stateid;
7081         rfs4_deleg_state_t *dsp;
7082
7083         /*
7084          * Find the state info from the stateid and confirm that the
7085          * file is delegated.  If the state openowner is the same as
7086          * the supplied openowner we're done. If not, get the file
7087          * info from the found state info. Use that file info to
7088          * create the state for this lock owner. Note solaris doen't
7089          * really need the pathname to find the file. We may want to
7090          * lookup the pathname and make sure that the vp exist and
7091          * matches the vp in the file structure. However it is
7092          * possible that the pathname nolonger exists (local process
7093          * unlinks the file), so this may not be that useful.
7094          */
7095
7096         status = rfs4_get_deleg_state(&stateid, &dsp);
7097         if (status != NFS4_OK) {
7098                 resp->status = status;
7099                 return;
7100         }
7101
7102         ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7103
7104         /*
7105          * New lock owner, create state. Since this was probably called
7106          * in response to a CB_RECALL we set deleg to DELEG_NONE
7107          */
7108
7109         ASSERT(cs->vp != NULL);
7110         VN_RELE(cs->vp);
7111         VN_HOLD(dsp->rds_finfo->rf_vp);
7112         cs->vp = dsp->rds_finfo->rf_vp;
7113
7114         if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7115                 rfs4_deleg_state_rele(dsp);
7116                 *cs->statusp = resp->status = puterrno4(error);
7117                 return;
7118         }
7119
7120         /* Mark progress for delegation returns */
7121         dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7122         rfs4_deleg_state_rele(dsp);
7123         rfs4_do_open(cs, req, oo, DELEG_NONE,
7124             args->share_access, args->share_deny, resp, 1);
7125 }
7126
7127 /*ARGSUSED*/
7128 static void
7129 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7130     OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7131 {
7132         /*
7133          * Lookup the pathname, it must already exist since this file
7134          * was delegated.
7135          *
7136          * Find the file and state info for this vp and open owner pair.
7137          *      check that they are in fact delegated.
7138          *      check that the state access and deny modes are the same.
7139          *
7140          * Return the delgation possibly seting the recall flag.
7141          */
7142         rfs4_file_t *fp;
7143         rfs4_state_t *sp;
7144         bool_t create = FALSE;
7145         bool_t dcreate = FALSE;
7146         rfs4_deleg_state_t *dsp;
7147         nfsace4 *ace;
7148
7149         /* Note we ignore oflags */
7150         resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7151             req, cs, args->share_access, &resp->cinfo);
7152
7153         if (resp->status != NFS4_OK) {
7154                 return;
7155         }
7156
7157         /* get the file struct and hold a lock on it during initial open */
7158         fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7159         if (fp == NULL) {
7160                 resp->status = NFS4ERR_RESOURCE;
7161                 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7162                 return;
7163         }
7164
7165         sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7166         if (sp == NULL) {
7167                 resp->status = NFS4ERR_SERVERFAULT;
7168                 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7169                 rw_exit(&fp->rf_file_rwlock);
7170                 rfs4_file_rele(fp);
7171                 return;
7172         }
7173
7174         rfs4_dbe_lock(sp->rs_dbe);
7175         rfs4_dbe_lock(fp->rf_dbe);
7176         if (args->share_access != sp->rs_share_access ||
7177             args->share_deny != sp->rs_share_deny ||
7178             sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7179                 NFS4_DEBUG(rfs4_debug,
7180                     (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7181                 rfs4_dbe_unlock(fp->rf_dbe);
7182                 rfs4_dbe_unlock(sp->rs_dbe);
7183                 rfs4_file_rele(fp);
7184                 rfs4_state_rele(sp);
7185                 resp->status = NFS4ERR_SERVERFAULT;
7186                 return;
7187         }
7188         rfs4_dbe_unlock(fp->rf_dbe);
7189         rfs4_dbe_unlock(sp->rs_dbe);
7190
7191         dsp = rfs4_finddeleg(sp, &dcreate);
7192         if (dsp == NULL) {
7193                 rfs4_state_rele(sp);
7194                 rfs4_file_rele(fp);
7195                 resp->status = NFS4ERR_SERVERFAULT;
7196                 return;
7197         }
7198
7199         next_stateid(&sp->rs_stateid);
7200
7201         resp->stateid = sp->rs_stateid.stateid;
7202
7203         resp->delegation.delegation_type = dsp->rds_dtype;
7204
7205         if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7206                 open_read_delegation4 *rv =
7207                     &resp->delegation.open_delegation4_u.read;
7208
7209                 rv->stateid = dsp->rds_delegid.stateid;
7210                 rv->recall = FALSE; /* no policy in place to set to TRUE */
7211                 ace = &rv->permissions;
7212         } else {
7213                 open_write_delegation4 *rv =
7214                     &resp->delegation.open_delegation4_u.write;
7215
7216                 rv->stateid = dsp->rds_delegid.stateid;
7217                 rv->recall = FALSE;  /* no policy in place to set to TRUE */
7218                 ace = &rv->permissions;
7219                 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7220                 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7221         }
7222
7223         /* XXX For now */
7224         ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7225         ace->flag = 0;
7226         ace->access_mask = 0;
7227         ace->who.utf8string_len = 0;
7228         ace->who.utf8string_val = 0;
7229
7230         rfs4_deleg_state_rele(dsp);
7231         rfs4_state_rele(sp);
7232         rfs4_file_rele(fp);
7233 }
7234
7235 typedef enum {
7236         NFS4_CHKSEQ_OKAY = 0,
7237         NFS4_CHKSEQ_REPLAY = 1,
7238         NFS4_CHKSEQ_BAD = 2
7239 } rfs4_chkseq_t;
7240
7241 /*
7242  * Generic function for sequence number checks.
7243  */
7244 static rfs4_chkseq_t
7245 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7246     seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7247 {
7248         /* Same sequence ids and matching operations? */
7249         if (seqid == rqst_seq && resop->resop == lastop->resop) {
7250                 if (copyres == TRUE) {
7251                         rfs4_free_reply(resop);
7252                         rfs4_copy_reply(resop, lastop);
7253                 }
7254                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7255                     "Replayed SEQID %d\n", seqid));
7256                 return (NFS4_CHKSEQ_REPLAY);
7257         }
7258
7259         /* If the incoming sequence is not the next expected then it is bad */
7260         if (rqst_seq != seqid + 1) {
7261                 if (rqst_seq == seqid) {
7262                         NFS4_DEBUG(rfs4_debug,
7263                             (CE_NOTE, "BAD SEQID: Replayed sequence id "
7264                             "but last op was %d current op is %d\n",
7265                             lastop->resop, resop->resop));
7266                         return (NFS4_CHKSEQ_BAD);
7267                 }
7268                 NFS4_DEBUG(rfs4_debug,
7269                     (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7270                     rqst_seq, seqid));
7271                 return (NFS4_CHKSEQ_BAD);
7272         }
7273
7274         /* Everything okay -- next expected */
7275         return (NFS4_CHKSEQ_OKAY);
7276 }
7277
7278
7279 static rfs4_chkseq_t
7280 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7281 {
7282         rfs4_chkseq_t rc;
7283
7284         rfs4_dbe_lock(op->ro_dbe);
7285         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7286             TRUE);
7287         rfs4_dbe_unlock(op->ro_dbe);
7288
7289         if (rc == NFS4_CHKSEQ_OKAY)
7290                 rfs4_update_lease(op->ro_client);
7291
7292         return (rc);
7293 }
7294
7295 static rfs4_chkseq_t
7296 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7297 {
7298         rfs4_chkseq_t rc;
7299
7300         rfs4_dbe_lock(op->ro_dbe);
7301         rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7302             olo_seqid, resop, FALSE);
7303         rfs4_dbe_unlock(op->ro_dbe);
7304
7305         return (rc);
7306 }
7307
7308 static rfs4_chkseq_t
7309 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7310 {
7311         rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7312
7313         rfs4_dbe_lock(lsp->rls_dbe);
7314         if (!lsp->rls_skip_seqid_check)
7315                 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7316                     resop, TRUE);
7317         rfs4_dbe_unlock(lsp->rls_dbe);
7318
7319         return (rc);
7320 }
7321
7322 static void
7323 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7324     struct svc_req *req, struct compound_state *cs)
7325 {
7326         OPEN4args *args = &argop->nfs_argop4_u.opopen;
7327         OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7328         open_owner4 *owner = &args->owner;
7329         open_claim_type4 claim = args->claim;
7330         rfs4_client_t *cp;
7331         rfs4_openowner_t *oo;
7332         bool_t create;
7333         bool_t replay = FALSE;
7334         int can_reclaim;
7335
7336         DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7337             OPEN4args *, args);
7338
7339         if (cs->vp == NULL) {
7340                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7341                 goto end;
7342         }
7343
7344         /*
7345          * Need to check clientid and lease expiration first based on
7346          * error ordering and incrementing sequence id.
7347          */
7348         cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7349         if (cp == NULL) {
7350                 *cs->statusp = resp->status =
7351                     rfs4_check_clientid(&owner->clientid, 0);
7352                 goto end;
7353         }
7354
7355         if (rfs4_lease_expired(cp)) {
7356                 rfs4_client_close(cp);
7357                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7358                 goto end;
7359         }
7360         can_reclaim = cp->rc_can_reclaim;
7361
7362         /*
7363          * Find the open_owner for use from this point forward.  Take
7364          * care in updating the sequence id based on the type of error
7365          * being returned.
7366          */
7367 retry:
7368         create = TRUE;
7369         oo = rfs4_findopenowner(owner, &create, args->seqid);
7370         if (oo == NULL) {
7371                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
7372                 rfs4_client_rele(cp);
7373                 goto end;
7374         }
7375
7376         /* Hold off access to the sequence space while the open is done */
7377         rfs4_sw_enter(&oo->ro_sw);
7378
7379         /*
7380          * If the open_owner existed before at the server, then check
7381          * the sequence id.
7382          */
7383         if (!create && !oo->ro_postpone_confirm) {
7384                 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7385                 case NFS4_CHKSEQ_BAD:
7386                         if ((args->seqid > oo->ro_open_seqid) &&
7387                             oo->ro_need_confirm) {
7388                                 rfs4_free_opens(oo, TRUE, FALSE);
7389                                 rfs4_sw_exit(&oo->ro_sw);
7390                                 rfs4_openowner_rele(oo);
7391                                 goto retry;
7392                         }
7393                         resp->status = NFS4ERR_BAD_SEQID;
7394                         goto out;
7395                 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7396                         replay = TRUE;
7397                         goto out;
7398                 default:
7399                         break;
7400                 }
7401
7402                 /*
7403                  * Sequence was ok and open owner exists
7404                  * check to see if we have yet to see an
7405                  * open_confirm.
7406                  */
7407                 if (oo->ro_need_confirm) {
7408                         rfs4_free_opens(oo, TRUE, FALSE);
7409                         rfs4_sw_exit(&oo->ro_sw);
7410                         rfs4_openowner_rele(oo);
7411                         goto retry;
7412                 }
7413         }
7414         /* Grace only applies to regular-type OPENs */
7415         if (rfs4_clnt_in_grace(cp) &&
7416             (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7417                 *cs->statusp = resp->status = NFS4ERR_GRACE;
7418                 goto out;
7419         }
7420
7421         /*
7422          * If previous state at the server existed then can_reclaim
7423          * will be set. If not reply NFS4ERR_NO_GRACE to the
7424          * client.
7425          */
7426         if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7427                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7428                 goto out;
7429         }
7430
7431
7432         /*
7433          * Reject the open if the client has missed the grace period
7434          */
7435         if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7436                 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7437                 goto out;
7438         }
7439
7440         /* Couple of up-front bookkeeping items */
7441         if (oo->ro_need_confirm) {
7442                 /*
7443                  * If this is a reclaim OPEN then we should not ask
7444                  * for a confirmation of the open_owner per the
7445                  * protocol specification.
7446                  */
7447                 if (claim == CLAIM_PREVIOUS)
7448                         oo->ro_need_confirm = FALSE;
7449                 else
7450                         resp->rflags |= OPEN4_RESULT_CONFIRM;
7451         }
7452         resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7453
7454         /*
7455          * If there is an unshared filesystem mounted on this vnode,
7456          * do not allow to open/create in this directory.
7457          */
7458         if (vn_ismntpt(cs->vp)) {
7459                 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7460                 goto out;
7461         }
7462
7463         /*
7464          * access must READ, WRITE, or BOTH.  No access is invalid.
7465          * deny can be READ, WRITE, BOTH, or NONE.
7466          * bits not defined for access/deny are invalid.
7467          */
7468         if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7469             (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7470             (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7471                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7472                 goto out;
7473         }
7474
7475
7476         /*
7477          * make sure attrset is zero before response is built.
7478          */
7479         resp->attrset = 0;
7480
7481         switch (claim) {
7482         case CLAIM_NULL:
7483                 rfs4_do_opennull(cs, req, args, oo, resp);
7484                 break;
7485         case CLAIM_PREVIOUS:
7486                 rfs4_do_openprev(cs, req, args, oo, resp);
7487                 break;
7488         case CLAIM_DELEGATE_CUR:
7489                 rfs4_do_opendelcur(cs, req, args, oo, resp);
7490                 break;
7491         case CLAIM_DELEGATE_PREV:
7492                 rfs4_do_opendelprev(cs, req, args, oo, resp);
7493                 break;
7494         default:
7495                 resp->status = NFS4ERR_INVAL;
7496                 break;
7497         }
7498
7499 out:
7500         rfs4_client_rele(cp);
7501
7502         /* Catch sequence id handling here to make it a little easier */
7503         switch (resp->status) {
7504         case NFS4ERR_BADXDR:
7505         case NFS4ERR_BAD_SEQID:
7506         case NFS4ERR_BAD_STATEID:
7507         case NFS4ERR_NOFILEHANDLE:
7508         case NFS4ERR_RESOURCE:
7509         case NFS4ERR_STALE_CLIENTID:
7510         case NFS4ERR_STALE_STATEID:
7511                 /*
7512                  * The protocol states that if any of these errors are
7513                  * being returned, the sequence id should not be
7514                  * incremented.  Any other return requires an
7515                  * increment.
7516                  */
7517                 break;
7518         default:
7519                 /* Always update the lease in this case */
7520                 rfs4_update_lease(oo->ro_client);
7521
7522                 /* Regular response - copy the result */
7523                 if (!replay)
7524                         rfs4_update_open_resp(oo, resop, &cs->fh);
7525
7526                 /*
7527                  * REPLAY case: Only if the previous response was OK
7528                  * do we copy the filehandle.  If not OK, no
7529                  * filehandle to copy.
7530                  */
7531                 if (replay == TRUE &&
7532                     resp->status == NFS4_OK &&
7533                     oo->ro_reply_fh.nfs_fh4_val) {
7534                         /*
7535                          * If this is a replay, we must restore the
7536                          * current filehandle/vp to that of what was
7537                          * returned originally.  Try our best to do
7538                          * it.
7539                          */
7540                         nfs_fh4_fmt_t *fh_fmtp =
7541                             (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7542
7543                         cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7544                             (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7545
7546                         if (cs->exi == NULL) {
7547                                 resp->status = NFS4ERR_STALE;
7548                                 goto finish;
7549                         }
7550
7551                         VN_RELE(cs->vp);
7552
7553                         cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7554                             &resp->status);
7555
7556                         if (cs->vp == NULL)
7557                                 goto finish;
7558
7559                         nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7560                 }
7561
7562                 /*
7563                  * If this was a replay, no need to update the
7564                  * sequence id. If the open_owner was not created on
7565                  * this pass, then update.  The first use of an
7566                  * open_owner will not bump the sequence id.
7567                  */
7568                 if (replay == FALSE && !create)
7569                         rfs4_update_open_sequence(oo);
7570                 /*
7571                  * If the client is receiving an error and the
7572                  * open_owner needs to be confirmed, there is no way
7573                  * to notify the client of this fact ignoring the fact
7574                  * that the server has no method of returning a
7575                  * stateid to confirm.  Therefore, the server needs to
7576                  * mark this open_owner in a way as to avoid the
7577                  * sequence id checking the next time the client uses
7578                  * this open_owner.
7579                  */
7580                 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7581                         oo->ro_postpone_confirm = TRUE;
7582                 /*
7583                  * If OK response then clear the postpone flag and
7584                  * reset the sequence id to keep in sync with the
7585                  * client.
7586                  */
7587                 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7588                         oo->ro_postpone_confirm = FALSE;
7589                         oo->ro_open_seqid = args->seqid;
7590                 }
7591                 break;
7592         }
7593
7594 finish:
7595         *cs->statusp = resp->status;
7596
7597         rfs4_sw_exit(&oo->ro_sw);
7598         rfs4_openowner_rele(oo);
7599
7600 end:
7601         DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7602             OPEN4res *, resp);
7603 }
7604
7605 /*ARGSUSED*/
7606 void
7607 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7608     struct svc_req *req, struct compound_state *cs)
7609 {
7610         OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7611         OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7612         rfs4_state_t *sp;
7613         nfsstat4 status;
7614
7615         DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7616             OPEN_CONFIRM4args *, args);
7617
7618         if (cs->vp == NULL) {
7619                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7620                 goto out;
7621         }
7622
7623         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7624         if (status != NFS4_OK) {
7625                 *cs->statusp = resp->status = status;
7626                 goto out;
7627         }
7628
7629         /* Ensure specified filehandle matches */
7630         if (cs->vp != sp->rs_finfo->rf_vp) {
7631                 rfs4_state_rele(sp);
7632                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7633                 goto out;
7634         }
7635
7636         /* hold off other access to open_owner while we tinker */
7637         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7638
7639         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7640         case NFS4_CHECK_STATEID_OKAY:
7641                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7642                     resop) != 0) {
7643                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7644                         break;
7645                 }
7646                 /*
7647                  * If it is the appropriate stateid and determined to
7648                  * be "OKAY" then this means that the stateid does not
7649                  * need to be confirmed and the client is in error for
7650                  * sending an OPEN_CONFIRM.
7651                  */
7652                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7653                 break;
7654         case NFS4_CHECK_STATEID_OLD:
7655                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7656                 break;
7657         case NFS4_CHECK_STATEID_BAD:
7658                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7659                 break;
7660         case NFS4_CHECK_STATEID_EXPIRED:
7661                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7662                 break;
7663         case NFS4_CHECK_STATEID_CLOSED:
7664                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7665                 break;
7666         case NFS4_CHECK_STATEID_REPLAY:
7667                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7668                     resop)) {
7669                 case NFS4_CHKSEQ_OKAY:
7670                         /*
7671                          * This is replayed stateid; if seqid matches
7672                          * next expected, then client is using wrong seqid.
7673                          */
7674                         /* fall through */
7675                 case NFS4_CHKSEQ_BAD:
7676                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7677                         break;
7678                 case NFS4_CHKSEQ_REPLAY:
7679                         /*
7680                          * Note this case is the duplicate case so
7681                          * resp->status is already set.
7682                          */
7683                         *cs->statusp = resp->status;
7684                         rfs4_update_lease(sp->rs_owner->ro_client);
7685                         break;
7686                 }
7687                 break;
7688         case NFS4_CHECK_STATEID_UNCONFIRMED:
7689                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7690                     resop) != NFS4_CHKSEQ_OKAY) {
7691                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7692                         break;
7693                 }
7694                 *cs->statusp = resp->status = NFS4_OK;
7695
7696                 next_stateid(&sp->rs_stateid);
7697                 resp->open_stateid = sp->rs_stateid.stateid;
7698                 sp->rs_owner->ro_need_confirm = FALSE;
7699                 rfs4_update_lease(sp->rs_owner->ro_client);
7700                 rfs4_update_open_sequence(sp->rs_owner);
7701                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7702                 break;
7703         default:
7704                 ASSERT(FALSE);
7705                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7706                 break;
7707         }
7708         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7709         rfs4_state_rele(sp);
7710
7711 out:
7712         DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7713             OPEN_CONFIRM4res *, resp);
7714 }
7715
7716 /*ARGSUSED*/
7717 void
7718 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7719     struct svc_req *req, struct compound_state *cs)
7720 {
7721         OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7722         OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7723         uint32_t access = args->share_access;
7724         uint32_t deny = args->share_deny;
7725         nfsstat4 status;
7726         rfs4_state_t *sp;
7727         rfs4_file_t *fp;
7728         int fflags = 0;
7729
7730         DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7731             OPEN_DOWNGRADE4args *, args);
7732
7733         if (cs->vp == NULL) {
7734                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7735                 goto out;
7736         }
7737
7738         if (cs->vp->v_type != VREG) {
7739                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7740                 return;
7741         }
7742
7743         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7744         if (status != NFS4_OK) {
7745                 *cs->statusp = resp->status = status;
7746                 goto out;
7747         }
7748
7749         /* Ensure specified filehandle matches */
7750         if (cs->vp != sp->rs_finfo->rf_vp) {
7751                 rfs4_state_rele(sp);
7752                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7753                 goto out;
7754         }
7755
7756         /* hold off other access to open_owner while we tinker */
7757         rfs4_sw_enter(&sp->rs_owner->ro_sw);
7758
7759         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7760         case NFS4_CHECK_STATEID_OKAY:
7761                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7762                     resop) != NFS4_CHKSEQ_OKAY) {
7763                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7764                         goto end;
7765                 }
7766                 break;
7767         case NFS4_CHECK_STATEID_OLD:
7768                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7769                 goto end;
7770         case NFS4_CHECK_STATEID_BAD:
7771                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7772                 goto end;
7773         case NFS4_CHECK_STATEID_EXPIRED:
7774                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7775                 goto end;
7776         case NFS4_CHECK_STATEID_CLOSED:
7777                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7778                 goto end;
7779         case NFS4_CHECK_STATEID_UNCONFIRMED:
7780                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7781                 goto end;
7782         case NFS4_CHECK_STATEID_REPLAY:
7783                 /* Check the sequence id for the open owner */
7784                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7785                     resop)) {
7786                 case NFS4_CHKSEQ_OKAY:
7787                         /*
7788                          * This is replayed stateid; if seqid matches
7789                          * next expected, then client is using wrong seqid.
7790                          */
7791                         /* fall through */
7792                 case NFS4_CHKSEQ_BAD:
7793                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7794                         goto end;
7795                 case NFS4_CHKSEQ_REPLAY:
7796                         /*
7797                          * Note this case is the duplicate case so
7798                          * resp->status is already set.
7799                          */
7800                         *cs->statusp = resp->status;
7801                         rfs4_update_lease(sp->rs_owner->ro_client);
7802                         goto end;
7803                 }
7804                 break;
7805         default:
7806                 ASSERT(FALSE);
7807                 break;
7808         }
7809
7810         rfs4_dbe_lock(sp->rs_dbe);
7811         /*
7812          * Check that the new access modes and deny modes are valid.
7813          * Check that no invalid bits are set.
7814          */
7815         if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7816             (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7817                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7818                 rfs4_update_open_sequence(sp->rs_owner);
7819                 rfs4_dbe_unlock(sp->rs_dbe);
7820                 goto end;
7821         }
7822
7823         /*
7824          * The new modes must be a subset of the current modes and
7825          * the access must specify at least one mode. To test that
7826          * the new mode is a subset of the current modes we bitwise
7827          * AND them together and check that the result equals the new
7828          * mode. For example:
7829          * New mode, access == R and current mode, sp->rs_open_access  == RW
7830          * access & sp->rs_open_access == R == access, so the new access mode
7831          * is valid. Consider access == RW, sp->rs_open_access = R
7832          * access & sp->rs_open_access == R != access, so the new access mode
7833          * is invalid.
7834          */
7835         if ((access & sp->rs_open_access) != access ||
7836             (deny & sp->rs_open_deny) != deny ||
7837             (access &
7838             (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7839                 *cs->statusp = resp->status = NFS4ERR_INVAL;
7840                 rfs4_update_open_sequence(sp->rs_owner);
7841                 rfs4_dbe_unlock(sp->rs_dbe);
7842                 goto end;
7843         }
7844
7845         /*
7846          * Release any share locks associated with this stateID.
7847          * Strictly speaking, this violates the spec because the
7848          * spec effectively requires that open downgrade be atomic.
7849          * At present, fs_shrlock does not have this capability.
7850          */
7851         (void) rfs4_unshare(sp);
7852
7853         status = rfs4_share(sp, access, deny);
7854         if (status != NFS4_OK) {
7855                 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7856                 rfs4_update_open_sequence(sp->rs_owner);
7857                 rfs4_dbe_unlock(sp->rs_dbe);
7858                 goto end;
7859         }
7860
7861         fp = sp->rs_finfo;
7862         rfs4_dbe_lock(fp->rf_dbe);
7863
7864         /*
7865          * If the current mode has deny read and the new mode
7866          * does not, decrement the number of deny read mode bits
7867          * and if it goes to zero turn off the deny read bit
7868          * on the file.
7869          */
7870         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7871             (deny & OPEN4_SHARE_DENY_READ) == 0) {
7872                 fp->rf_deny_read--;
7873                 if (fp->rf_deny_read == 0)
7874                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7875         }
7876
7877         /*
7878          * If the current mode has deny write and the new mode
7879          * does not, decrement the number of deny write mode bits
7880          * and if it goes to zero turn off the deny write bit
7881          * on the file.
7882          */
7883         if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7884             (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7885                 fp->rf_deny_write--;
7886                 if (fp->rf_deny_write == 0)
7887                         fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7888         }
7889
7890         /*
7891          * If the current mode has access read and the new mode
7892          * does not, decrement the number of access read mode bits
7893          * and if it goes to zero turn off the access read bit
7894          * on the file.  set fflags to FREAD for the call to
7895          * vn_open_downgrade().
7896          */
7897         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7898             (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7899                 fp->rf_access_read--;
7900                 if (fp->rf_access_read == 0)
7901                         fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7902                 fflags |= FREAD;
7903         }
7904
7905         /*
7906          * If the current mode has access write and the new mode
7907          * does not, decrement the number of access write mode bits
7908          * and if it goes to zero turn off the access write bit
7909          * on the file.  set fflags to FWRITE for the call to
7910          * vn_open_downgrade().
7911          */
7912         if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7913             (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7914                 fp->rf_access_write--;
7915                 if (fp->rf_access_write == 0)
7916                         fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7917                 fflags |= FWRITE;
7918         }
7919
7920         /* Check that the file is still accessible */
7921         ASSERT(fp->rf_share_access);
7922
7923         rfs4_dbe_unlock(fp->rf_dbe);
7924
7925         /* now set the new open access and deny modes */
7926         sp->rs_open_access = access;
7927         sp->rs_open_deny = deny;
7928
7929         /*
7930          * we successfully downgraded the share lock, now we need to downgrade
7931          * the open. it is possible that the downgrade was only for a deny
7932          * mode and we have nothing else to do.
7933          */
7934         if ((fflags & (FREAD|FWRITE)) != 0)
7935                 vn_open_downgrade(cs->vp, fflags);
7936
7937         /* Update the stateid */
7938         next_stateid(&sp->rs_stateid);
7939         resp->open_stateid = sp->rs_stateid.stateid;
7940
7941         rfs4_dbe_unlock(sp->rs_dbe);
7942
7943         *cs->statusp = resp->status = NFS4_OK;
7944         /* Update the lease */
7945         rfs4_update_lease(sp->rs_owner->ro_client);
7946         /* And the sequence */
7947         rfs4_update_open_sequence(sp->rs_owner);
7948         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7949
7950 end:
7951         rfs4_sw_exit(&sp->rs_owner->ro_sw);
7952         rfs4_state_rele(sp);
7953 out:
7954         DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7955             OPEN_DOWNGRADE4res *, resp);
7956 }
7957
7958 /*
7959  * The logic behind this function is detailed in the NFSv4 RFC in the
7960  * SETCLIENTID operation description under IMPLEMENTATION.  Refer to
7961  * that section for explicit guidance to server behavior for
7962  * SETCLIENTID.
7963  */
7964 void
7965 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7966     struct svc_req *req, struct compound_state *cs)
7967 {
7968         SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7969         SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7970         rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7971         rfs4_clntip_t *ci;
7972         bool_t create;
7973         char *addr, *netid;
7974         int len;
7975
7976         DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7977             SETCLIENTID4args *, args);
7978 retry:
7979         newcp = cp_confirmed = cp_unconfirmed = NULL;
7980
7981         /*
7982          * Save the caller's IP address
7983          */
7984         args->client.cl_addr =
7985             (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7986
7987         /*
7988          * Record if it is a Solaris client that cannot handle referrals.
7989          */
7990         if (strstr(args->client.id_val, "Solaris") &&
7991             !strstr(args->client.id_val, "+referrals")) {
7992                 /* Add a "yes, it's downrev" record */
7993                 create = TRUE;
7994                 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7995                 ASSERT(ci != NULL);
7996                 rfs4_dbe_rele(ci->ri_dbe);
7997         } else {
7998                 /* Remove any previous record */
7999                 rfs4_invalidate_clntip(args->client.cl_addr);
8000         }
8001
8002         /*
8003          * In search of an EXISTING client matching the incoming
8004          * request to establish a new client identifier at the server
8005          */
8006         create = TRUE;
8007         cp = rfs4_findclient(&args->client, &create, NULL);
8008
8009         /* Should never happen */
8010         ASSERT(cp != NULL);
8011
8012         if (cp == NULL) {
8013                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8014                 goto out;
8015         }
8016
8017         /*
8018          * Easiest case. Client identifier is newly created and is
8019          * unconfirmed.  Also note that for this case, no other
8020          * entries exist for the client identifier.  Nothing else to
8021          * check.  Just setup the response and respond.
8022          */
8023         if (create) {
8024                 *cs->statusp = res->status = NFS4_OK;
8025                 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
8026                 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8027                     cp->rc_confirm_verf;
8028                 /* Setup callback information; CB_NULL confirmation later */
8029                 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
8030
8031                 rfs4_client_rele(cp);
8032                 goto out;
8033         }
8034
8035         /*
8036          * An existing, confirmed client may exist but it may not have
8037          * been active for at least one lease period.  If so, then
8038          * "close" the client and create a new client identifier
8039          */
8040         if (rfs4_lease_expired(cp)) {
8041                 rfs4_client_close(cp);
8042                 goto retry;
8043         }
8044
8045         if (cp->rc_need_confirm == TRUE)
8046                 cp_unconfirmed = cp;
8047         else
8048                 cp_confirmed = cp;
8049
8050         cp = NULL;
8051
8052         /*
8053          * We have a confirmed client, now check for an
8054          * unconfimred entry
8055          */
8056         if (cp_confirmed) {
8057                 /* If creds don't match then client identifier is inuse */
8058                 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8059                         rfs4_cbinfo_t *cbp;
8060                         /*
8061                          * Some one else has established this client
8062                          * id. Try and say * who they are. We will use
8063                          * the call back address supplied by * the
8064                          * first client.
8065                          */
8066                         *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8067
8068                         addr = netid = NULL;
8069
8070                         cbp = &cp_confirmed->rc_cbinfo;
8071                         if (cbp->cb_callback.cb_location.r_addr &&
8072                             cbp->cb_callback.cb_location.r_netid) {
8073                                 cb_client4 *cbcp = &cbp->cb_callback;
8074
8075                                 len = strlen(cbcp->cb_location.r_addr)+1;
8076                                 addr = kmem_alloc(len, KM_SLEEP);
8077                                 bcopy(cbcp->cb_location.r_addr, addr, len);
8078                                 len = strlen(cbcp->cb_location.r_netid)+1;
8079                                 netid = kmem_alloc(len, KM_SLEEP);
8080                                 bcopy(cbcp->cb_location.r_netid, netid, len);
8081                         }
8082
8083                         res->SETCLIENTID4res_u.client_using.r_addr = addr;
8084                         res->SETCLIENTID4res_u.client_using.r_netid = netid;
8085
8086                         rfs4_client_rele(cp_confirmed);
8087                 }
8088
8089                 /*
8090                  * Confirmed, creds match, and verifier matches; must
8091                  * be an update of the callback info
8092                  */
8093                 if (cp_confirmed->rc_nfs_client.verifier ==
8094                     args->client.verifier) {
8095                         /* Setup callback information */
8096                         rfs4_client_setcb(cp_confirmed, &args->callback,
8097                             args->callback_ident);
8098
8099                         /* everything okay -- move ahead */
8100                         *cs->statusp = res->status = NFS4_OK;
8101                         res->SETCLIENTID4res_u.resok4.clientid =
8102                             cp_confirmed->rc_clientid;
8103
8104                         /* update the confirm_verifier and return it */
8105                         rfs4_client_scv_next(cp_confirmed);
8106                         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8107                             cp_confirmed->rc_confirm_verf;
8108
8109                         rfs4_client_rele(cp_confirmed);
8110                         goto out;
8111                 }
8112
8113                 /*
8114                  * Creds match but the verifier doesn't.  Must search
8115                  * for an unconfirmed client that would be replaced by
8116                  * this request.
8117                  */
8118                 create = FALSE;
8119                 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8120                     cp_confirmed);
8121         }
8122
8123         /*
8124          * At this point, we have taken care of the brand new client
8125          * struct, INUSE case, update of an existing, and confirmed
8126          * client struct.
8127          */
8128
8129         /*
8130          * check to see if things have changed while we originally
8131          * picked up the client struct.  If they have, then return and
8132          * retry the processing of this SETCLIENTID request.
8133          */
8134         if (cp_unconfirmed) {
8135                 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8136                 if (!cp_unconfirmed->rc_need_confirm) {
8137                         rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8138                         rfs4_client_rele(cp_unconfirmed);
8139                         if (cp_confirmed)
8140                                 rfs4_client_rele(cp_confirmed);
8141                         goto retry;
8142                 }
8143                 /* do away with the old unconfirmed one */
8144                 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8145                 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8146                 rfs4_client_rele(cp_unconfirmed);
8147                 cp_unconfirmed = NULL;
8148         }
8149
8150         /*
8151          * This search will temporarily hide the confirmed client
8152          * struct while a new client struct is created as the
8153          * unconfirmed one.
8154          */
8155         create = TRUE;
8156         newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8157
8158         ASSERT(newcp != NULL);
8159
8160         if (newcp == NULL) {
8161                 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8162                 rfs4_client_rele(cp_confirmed);
8163                 goto out;
8164         }
8165
8166         /*
8167          * If one was not created, then a similar request must be in
8168          * process so release and start over with this one
8169          */
8170         if (create != TRUE) {
8171                 rfs4_client_rele(newcp);
8172                 if (cp_confirmed)
8173                         rfs4_client_rele(cp_confirmed);
8174                 goto retry;
8175         }
8176
8177         *cs->statusp = res->status = NFS4_OK;
8178         res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8179         res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8180             newcp->rc_confirm_verf;
8181         /* Setup callback information; CB_NULL confirmation later */
8182         rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8183
8184         newcp->rc_cp_confirmed = cp_confirmed;
8185
8186         rfs4_client_rele(newcp);
8187
8188 out:
8189         DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8190             SETCLIENTID4res *, res);
8191 }
8192
8193 /*ARGSUSED*/
8194 void
8195 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8196     struct svc_req *req, struct compound_state *cs)
8197 {
8198         SETCLIENTID_CONFIRM4args *args =
8199             &argop->nfs_argop4_u.opsetclientid_confirm;
8200         SETCLIENTID_CONFIRM4res *res =
8201             &resop->nfs_resop4_u.opsetclientid_confirm;
8202         rfs4_client_t *cp, *cptoclose = NULL;
8203
8204         DTRACE_NFSV4_2(op__setclientid__confirm__start,
8205             struct compound_state *, cs,
8206             SETCLIENTID_CONFIRM4args *, args);
8207
8208         *cs->statusp = res->status = NFS4_OK;
8209
8210         cp = rfs4_findclient_by_id(args->clientid, TRUE);
8211
8212         if (cp == NULL) {
8213                 *cs->statusp = res->status =
8214                     rfs4_check_clientid(&args->clientid, 1);
8215                 goto out;
8216         }
8217
8218         if (!creds_ok(cp, req, cs)) {
8219                 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8220                 rfs4_client_rele(cp);
8221                 goto out;
8222         }
8223
8224         /* If the verifier doesn't match, the record doesn't match */
8225         if (cp->rc_confirm_verf != args->setclientid_confirm) {
8226                 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8227                 rfs4_client_rele(cp);
8228                 goto out;
8229         }
8230
8231         rfs4_dbe_lock(cp->rc_dbe);
8232         cp->rc_need_confirm = FALSE;
8233         if (cp->rc_cp_confirmed) {
8234                 cptoclose = cp->rc_cp_confirmed;
8235                 cptoclose->rc_ss_remove = 1;
8236                 cp->rc_cp_confirmed = NULL;
8237         }
8238
8239         /*
8240          * Update the client's associated server instance, if it's changed
8241          * since the client was created.
8242          */
8243         if (rfs4_servinst(cp) != rfs4_cur_servinst)
8244                 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8245
8246         /*
8247          * Record clientid in stable storage.
8248          * Must be done after server instance has been assigned.
8249          */
8250         rfs4_ss_clid(cp);
8251
8252         rfs4_dbe_unlock(cp->rc_dbe);
8253
8254         if (cptoclose)
8255                 /* don't need to rele, client_close does it */
8256                 rfs4_client_close(cptoclose);
8257
8258         /* If needed, initiate CB_NULL call for callback path */
8259         rfs4_deleg_cb_check(cp);
8260         rfs4_update_lease(cp);
8261
8262         /*
8263          * Check to see if client can perform reclaims
8264          */
8265         rfs4_ss_chkclid(cp);
8266
8267         rfs4_client_rele(cp);
8268
8269 out:
8270         DTRACE_NFSV4_2(op__setclientid__confirm__done,
8271             struct compound_state *, cs,
8272             SETCLIENTID_CONFIRM4 *, res);
8273 }
8274
8275
8276 /*ARGSUSED*/
8277 void
8278 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8279     struct svc_req *req, struct compound_state *cs)
8280 {
8281         CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8282         CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8283         rfs4_state_t *sp;
8284         nfsstat4 status;
8285
8286         DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8287             CLOSE4args *, args);
8288
8289         if (cs->vp == NULL) {
8290                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8291                 goto out;
8292         }
8293
8294         status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8295         if (status != NFS4_OK) {
8296                 *cs->statusp = resp->status = status;
8297                 goto out;
8298         }
8299
8300         /* Ensure specified filehandle matches */
8301         if (cs->vp != sp->rs_finfo->rf_vp) {
8302                 rfs4_state_rele(sp);
8303                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8304                 goto out;
8305         }
8306
8307         /* hold off other access to open_owner while we tinker */
8308         rfs4_sw_enter(&sp->rs_owner->ro_sw);
8309
8310         switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8311         case NFS4_CHECK_STATEID_OKAY:
8312                 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8313                     resop) != NFS4_CHKSEQ_OKAY) {
8314                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8315                         goto end;
8316                 }
8317                 break;
8318         case NFS4_CHECK_STATEID_OLD:
8319                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8320                 goto end;
8321         case NFS4_CHECK_STATEID_BAD:
8322                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8323                 goto end;
8324         case NFS4_CHECK_STATEID_EXPIRED:
8325                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8326                 goto end;
8327         case NFS4_CHECK_STATEID_CLOSED:
8328                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8329                 goto end;
8330         case NFS4_CHECK_STATEID_UNCONFIRMED:
8331                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8332                 goto end;
8333         case NFS4_CHECK_STATEID_REPLAY:
8334                 /* Check the sequence id for the open owner */
8335                 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8336                     resop)) {
8337                 case NFS4_CHKSEQ_OKAY:
8338                         /*
8339                          * This is replayed stateid; if seqid matches
8340                          * next expected, then client is using wrong seqid.
8341                          */
8342                         /* FALL THROUGH */
8343                 case NFS4_CHKSEQ_BAD:
8344                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8345                         goto end;
8346                 case NFS4_CHKSEQ_REPLAY:
8347                         /*
8348                          * Note this case is the duplicate case so
8349                          * resp->status is already set.
8350                          */
8351                         *cs->statusp = resp->status;
8352                         rfs4_update_lease(sp->rs_owner->ro_client);
8353                         goto end;
8354                 }
8355                 break;
8356         default:
8357                 ASSERT(FALSE);
8358                 break;
8359         }
8360
8361         rfs4_dbe_lock(sp->rs_dbe);
8362
8363         /* Update the stateid. */
8364         next_stateid(&sp->rs_stateid);
8365         resp->open_stateid = sp->rs_stateid.stateid;
8366
8367         rfs4_dbe_unlock(sp->rs_dbe);
8368
8369         rfs4_update_lease(sp->rs_owner->ro_client);
8370         rfs4_update_open_sequence(sp->rs_owner);
8371         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8372
8373         rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8374
8375         *cs->statusp = resp->status = status;
8376
8377 end:
8378         rfs4_sw_exit(&sp->rs_owner->ro_sw);
8379         rfs4_state_rele(sp);
8380 out:
8381         DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8382             CLOSE4res *, resp);
8383 }
8384
8385 /*
8386  * Manage the counts on the file struct and close all file locks
8387  */
8388 /*ARGSUSED*/
8389 void
8390 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8391     bool_t close_of_client)
8392 {
8393         rfs4_file_t *fp = sp->rs_finfo;
8394         rfs4_lo_state_t *lsp;
8395         int fflags = 0;
8396
8397         /*
8398          * If this call is part of the larger closing down of client
8399          * state then it is just easier to release all locks
8400          * associated with this client instead of going through each
8401          * individual file and cleaning locks there.
8402          */
8403         if (close_of_client) {
8404                 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8405                     !list_is_empty(&sp->rs_lostatelist) &&
8406                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8407                         /* Is the PxFS kernel module loaded? */
8408                         if (lm_remove_file_locks != NULL) {
8409                                 int new_sysid;
8410
8411                                 /* Encode the cluster nodeid in new sysid */
8412                                 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8413                                 lm_set_nlmid_flk(&new_sysid);
8414
8415                                 /*
8416                                  * This PxFS routine removes file locks for a
8417                                  * client over all nodes of a cluster.
8418                                  */
8419                                 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8420                                     "lm_remove_file_locks(sysid=0x%x)\n",
8421                                     new_sysid));
8422                                 (*lm_remove_file_locks)(new_sysid);
8423                         } else {
8424                                 struct flock64 flk;
8425
8426                                 /* Release all locks for this client */
8427                                 flk.l_type = F_UNLKSYS;
8428                                 flk.l_whence = 0;
8429                                 flk.l_start = 0;
8430                                 flk.l_len = 0;
8431                                 flk.l_sysid =
8432                                     sp->rs_owner->ro_client->rc_sysidt;
8433                                 flk.l_pid = 0;
8434                                 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8435                                     &flk, F_REMOTELOCK | FREAD | FWRITE,
8436                                     (u_offset_t)0, NULL, CRED(), NULL);
8437                         }
8438
8439                         sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8440                 }
8441         }
8442
8443         /*
8444          * Release all locks on this file by this lock owner or at
8445          * least mark the locks as having been released
8446          */
8447         for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8448             lsp = list_next(&sp->rs_lostatelist, lsp)) {
8449                 lsp->rls_locks_cleaned = TRUE;
8450
8451                 /* Was this already taken care of above? */
8452                 if (!close_of_client &&
8453                     sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8454                         (void) cleanlocks(sp->rs_finfo->rf_vp,
8455                             lsp->rls_locker->rl_pid,
8456                             lsp->rls_locker->rl_client->rc_sysidt);
8457         }
8458
8459         /*
8460          * Release any shrlocks associated with this open state ID.
8461          * This must be done before the rfs4_state gets marked closed.
8462          */
8463         if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8464                 (void) rfs4_unshare(sp);
8465
8466         if (sp->rs_open_access) {
8467                 rfs4_dbe_lock(fp->rf_dbe);
8468
8469                 /*
8470                  * Decrement the count for each access and deny bit that this
8471                  * state has contributed to the file.
8472                  * If the file counts go to zero
8473                  * clear the appropriate bit in the appropriate mask.
8474                  */
8475                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8476                         fp->rf_access_read--;
8477                         fflags |= FREAD;
8478                         if (fp->rf_access_read == 0)
8479                                 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8480                 }
8481                 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8482                         fp->rf_access_write--;
8483                         fflags |= FWRITE;
8484                         if (fp->rf_access_write == 0)
8485                                 fp->rf_share_access &=
8486                                     ~OPEN4_SHARE_ACCESS_WRITE;
8487                 }
8488                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8489                         fp->rf_deny_read--;
8490                         if (fp->rf_deny_read == 0)
8491                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8492                 }
8493                 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8494                         fp->rf_deny_write--;
8495                         if (fp->rf_deny_write == 0)
8496                                 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8497                 }
8498
8499                 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8500
8501                 rfs4_dbe_unlock(fp->rf_dbe);
8502
8503                 sp->rs_open_access = 0;
8504                 sp->rs_open_deny = 0;
8505         }
8506 }
8507
8508 /*
8509  * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8510  */
8511 static nfsstat4
8512 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8513 {
8514         rfs4_lockowner_t *lo;
8515         rfs4_client_t *cp;
8516         uint32_t len;
8517
8518         lo = rfs4_findlockowner_by_pid(flk->l_pid);
8519         if (lo != NULL) {
8520                 cp = lo->rl_client;
8521                 if (rfs4_lease_expired(cp)) {
8522                         rfs4_lockowner_rele(lo);
8523                         rfs4_dbe_hold(cp->rc_dbe);
8524                         rfs4_client_close(cp);
8525                         return (NFS4ERR_EXPIRED);
8526                 }
8527                 dp->owner.clientid = lo->rl_owner.clientid;
8528                 len = lo->rl_owner.owner_len;
8529                 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8530                 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8531                 dp->owner.owner_len = len;
8532                 rfs4_lockowner_rele(lo);
8533                 goto finish;
8534         }
8535
8536         /*
8537          * Its not a NFS4 lock. We take advantage that the upper 32 bits
8538          * of the client id contain the boot time for a NFS4 lock. So we
8539          * fabricate and identity by setting clientid to the sysid, and
8540          * the lock owner to the pid.
8541          */
8542         dp->owner.clientid = flk->l_sysid;
8543         len = sizeof (pid_t);
8544         dp->owner.owner_len = len;
8545         dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8546         bcopy(&flk->l_pid, dp->owner.owner_val, len);
8547 finish:
8548         dp->offset = flk->l_start;
8549         dp->length = flk->l_len;
8550
8551         if (flk->l_type == F_RDLCK)
8552                 dp->locktype = READ_LT;
8553         else if (flk->l_type == F_WRLCK)
8554                 dp->locktype = WRITE_LT;
8555         else
8556                 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8557
8558         return (NFS4_OK);
8559 }
8560
8561 static int
8562 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8563 {
8564         int error;
8565         struct flock64 flk;
8566         int i;
8567         clock_t delaytime;
8568         int cmd;
8569
8570         cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8571 retry:
8572         delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8573
8574         for (i = 0; i < rfs4_maxlock_tries; i++) {
8575                 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8576                 error = VOP_FRLOCK(vp, cmd,
8577                     flock, flag, (u_offset_t)0, NULL, cred, NULL);
8578
8579                 if (error != EAGAIN && error != EACCES)
8580                         break;
8581
8582                 if (i < rfs4_maxlock_tries - 1) {
8583                         delay(delaytime);
8584                         delaytime *= 2;
8585                 }
8586         }
8587
8588         if (error == EAGAIN || error == EACCES) {
8589                 /* Get the owner of the lock */
8590                 flk = *flock;
8591                 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8592                 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag,
8593                     (u_offset_t)0, NULL, cred, NULL) == 0) {
8594                         if (flk.l_type == F_UNLCK) {
8595                                 /* No longer locked, retry */
8596                                 goto retry;
8597                         }
8598                         *flock = flk;
8599                         LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8600                             F_GETLK, &flk);
8601                 }
8602         }
8603
8604         return (error);
8605 }
8606
8607 /*ARGSUSED*/
8608 static nfsstat4
8609 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8610     offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8611 {
8612         nfsstat4 status;
8613         rfs4_lockowner_t *lo = lsp->rls_locker;
8614         rfs4_state_t *sp = lsp->rls_state;
8615         struct flock64 flock;
8616         int16_t ltype;
8617         int flag;
8618         int error;
8619         sysid_t sysid;
8620         LOCK4res *lres;
8621
8622         if (rfs4_lease_expired(lo->rl_client)) {
8623                 return (NFS4ERR_EXPIRED);
8624         }
8625
8626         if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8627                 return (status);
8628
8629         /* Check for zero length. To lock to end of file use all ones for V4 */
8630         if (length == 0)
8631                 return (NFS4ERR_INVAL);
8632         else if (length == (length4)(~0))
8633                 length = 0;             /* Posix to end of file  */
8634
8635 retry:
8636         rfs4_dbe_lock(sp->rs_dbe);
8637         if (sp->rs_closed) {
8638                 rfs4_dbe_unlock(sp->rs_dbe);
8639                 return (NFS4ERR_OLD_STATEID);
8640         }
8641
8642         if (resop->resop != OP_LOCKU) {
8643                 switch (locktype) {
8644                 case READ_LT:
8645                 case READW_LT:
8646                         if ((sp->rs_share_access
8647                             & OPEN4_SHARE_ACCESS_READ) == 0) {
8648                                 rfs4_dbe_unlock(sp->rs_dbe);
8649
8650                                 return (NFS4ERR_OPENMODE);
8651                         }
8652                         ltype = F_RDLCK;
8653                         break;
8654                 case WRITE_LT:
8655                 case WRITEW_LT:
8656                         if ((sp->rs_share_access
8657                             & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8658                                 rfs4_dbe_unlock(sp->rs_dbe);
8659
8660                                 return (NFS4ERR_OPENMODE);
8661                         }
8662                         ltype = F_WRLCK;
8663                         break;
8664                 }
8665         } else
8666                 ltype = F_UNLCK;
8667
8668         flock.l_type = ltype;
8669         flock.l_whence = 0;             /* SEEK_SET */
8670         flock.l_start = offset;
8671         flock.l_len = length;
8672         flock.l_sysid = sysid;
8673         flock.l_pid = lsp->rls_locker->rl_pid;
8674
8675         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8676         if (flock.l_len < 0 || flock.l_start < 0) {
8677                 rfs4_dbe_unlock(sp->rs_dbe);
8678                 return (NFS4ERR_INVAL);
8679         }
8680
8681         /*
8682          * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8683          * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8684          */
8685         flag = (int)sp->rs_share_access | F_REMOTELOCK;
8686
8687         error = setlock(sp->rs_finfo->rf_vp, &flock, flag, cred);
8688         if (error == 0) {
8689                 rfs4_dbe_lock(lsp->rls_dbe);
8690                 next_stateid(&lsp->rls_lockid);
8691                 rfs4_dbe_unlock(lsp->rls_dbe);
8692         }
8693
8694         rfs4_dbe_unlock(sp->rs_dbe);
8695
8696         /*
8697          * N.B. We map error values to nfsv4 errors. This is differrent
8698          * than puterrno4 routine.
8699          */
8700         switch (error) {
8701         case 0:
8702                 status = NFS4_OK;
8703                 break;
8704         case EAGAIN:
8705         case EACCES:            /* Old value */
8706                 /* Can only get here if op is OP_LOCK */
8707                 ASSERT(resop->resop == OP_LOCK);
8708                 lres = &resop->nfs_resop4_u.oplock;
8709                 status = NFS4ERR_DENIED;
8710                 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8711                     == NFS4ERR_EXPIRED)
8712                         goto retry;
8713                 break;
8714         case ENOLCK:
8715                 status = NFS4ERR_DELAY;
8716                 break;
8717         case EOVERFLOW:
8718                 status = NFS4ERR_INVAL;
8719                 break;
8720         case EINVAL:
8721                 status = NFS4ERR_NOTSUPP;
8722                 break;
8723         default:
8724                 status = NFS4ERR_SERVERFAULT;
8725                 break;
8726         }
8727
8728         return (status);
8729 }
8730
8731 /*ARGSUSED*/
8732 void
8733 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8734     struct svc_req *req, struct compound_state *cs)
8735 {
8736         LOCK4args *args = &argop->nfs_argop4_u.oplock;
8737         LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8738         nfsstat4 status;
8739         stateid4 *stateid;
8740         rfs4_lockowner_t *lo;
8741         rfs4_client_t *cp;
8742         rfs4_state_t *sp = NULL;
8743         rfs4_lo_state_t *lsp = NULL;
8744         bool_t ls_sw_held = FALSE;
8745         bool_t create = TRUE;
8746         bool_t lcreate = TRUE;
8747         bool_t dup_lock = FALSE;
8748         int rc;
8749
8750         DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8751             LOCK4args *, args);
8752
8753         if (cs->vp == NULL) {
8754                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8755                 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8756                     cs, LOCK4res *, resp);
8757                 return;
8758         }
8759
8760         if (args->locker.new_lock_owner) {
8761                 /* Create a new lockowner for this instance */
8762                 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8763
8764                 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8765
8766                 stateid = &olo->open_stateid;
8767                 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8768                 if (status != NFS4_OK) {
8769                         NFS4_DEBUG(rfs4_debug,
8770                             (CE_NOTE, "Get state failed in lock %d", status));
8771                         *cs->statusp = resp->status = status;
8772                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8773                             cs, LOCK4res *, resp);
8774                         return;
8775                 }
8776
8777                 /* Ensure specified filehandle matches */
8778                 if (cs->vp != sp->rs_finfo->rf_vp) {
8779                         rfs4_state_rele(sp);
8780                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8781                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8782                             cs, LOCK4res *, resp);
8783                         return;
8784                 }
8785
8786                 /* hold off other access to open_owner while we tinker */
8787                 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8788
8789                 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8790                 case NFS4_CHECK_STATEID_OLD:
8791                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8792                         goto end;
8793                 case NFS4_CHECK_STATEID_BAD:
8794                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8795                         goto end;
8796                 case NFS4_CHECK_STATEID_EXPIRED:
8797                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8798                         goto end;
8799                 case NFS4_CHECK_STATEID_UNCONFIRMED:
8800                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8801                         goto end;
8802                 case NFS4_CHECK_STATEID_CLOSED:
8803                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8804                         goto end;
8805                 case NFS4_CHECK_STATEID_OKAY:
8806                 case NFS4_CHECK_STATEID_REPLAY:
8807                         switch (rfs4_check_olo_seqid(olo->open_seqid,
8808                             sp->rs_owner, resop)) {
8809                         case NFS4_CHKSEQ_OKAY:
8810                                 if (rc == NFS4_CHECK_STATEID_OKAY)
8811                                         break;
8812                                 /*
8813                                  * This is replayed stateid; if seqid
8814                                  * matches next expected, then client
8815                                  * is using wrong seqid.
8816                                  */
8817                                 /* FALLTHROUGH */
8818                         case NFS4_CHKSEQ_BAD:
8819                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8820                                 goto end;
8821                         case NFS4_CHKSEQ_REPLAY:
8822                                 /* This is a duplicate LOCK request */
8823                                 dup_lock = TRUE;
8824
8825                                 /*
8826                                  * For a duplicate we do not want to
8827                                  * create a new lockowner as it should
8828                                  * already exist.
8829                                  * Turn off the lockowner create flag.
8830                                  */
8831                                 lcreate = FALSE;
8832                         }
8833                         break;
8834                 }
8835
8836                 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8837                 if (lo == NULL) {
8838                         NFS4_DEBUG(rfs4_debug,
8839                             (CE_NOTE, "rfs4_op_lock: no lock owner"));
8840                         *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8841                         goto end;
8842                 }
8843
8844                 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8845                 if (lsp == NULL) {
8846                         rfs4_update_lease(sp->rs_owner->ro_client);
8847                         /*
8848                          * Only update theh open_seqid if this is not
8849                          * a duplicate request
8850                          */
8851                         if (dup_lock == FALSE) {
8852                                 rfs4_update_open_sequence(sp->rs_owner);
8853                         }
8854
8855                         NFS4_DEBUG(rfs4_debug,
8856                             (CE_NOTE, "rfs4_op_lock: no state"));
8857                         *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8858                         rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8859                         rfs4_lockowner_rele(lo);
8860                         goto end;
8861                 }
8862
8863                 /*
8864                  * This is the new_lock_owner branch and the client is
8865                  * supposed to be associating a new lock_owner with
8866                  * the open file at this point.  If we find that a
8867                  * lock_owner/state association already exists and a
8868                  * successful LOCK request was returned to the client,
8869                  * an error is returned to the client since this is
8870                  * not appropriate.  The client should be using the
8871                  * existing lock_owner branch.
8872                  */
8873                 if (dup_lock == FALSE && create == FALSE) {
8874                         if (lsp->rls_lock_completed == TRUE) {
8875                                 *cs->statusp =
8876                                     resp->status = NFS4ERR_BAD_SEQID;
8877                                 rfs4_lockowner_rele(lo);
8878                                 goto end;
8879                         }
8880                 }
8881
8882                 rfs4_update_lease(sp->rs_owner->ro_client);
8883
8884                 /*
8885                  * Only update theh open_seqid if this is not
8886                  * a duplicate request
8887                  */
8888                 if (dup_lock == FALSE) {
8889                         rfs4_update_open_sequence(sp->rs_owner);
8890                 }
8891
8892                 /*
8893                  * If this is a duplicate lock request, just copy the
8894                  * previously saved reply and return.
8895                  */
8896                 if (dup_lock == TRUE) {
8897                         /* verify that lock_seqid's match */
8898                         if (lsp->rls_seqid != olo->lock_seqid) {
8899                                 NFS4_DEBUG(rfs4_debug,
8900                                     (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8901                                     "lsp->seqid=%d old->seqid=%d",
8902                                     lsp->rls_seqid, olo->lock_seqid));
8903                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8904                         } else {
8905                                 rfs4_copy_reply(resop, &lsp->rls_reply);
8906                                 /*
8907                                  * Make sure to copy the just
8908                                  * retrieved reply status into the
8909                                  * overall compound status
8910                                  */
8911                                 *cs->statusp = resp->status;
8912                         }
8913                         rfs4_lockowner_rele(lo);
8914                         goto end;
8915                 }
8916
8917                 rfs4_dbe_lock(lsp->rls_dbe);
8918
8919                 /* Make sure to update the lock sequence id */
8920                 lsp->rls_seqid = olo->lock_seqid;
8921
8922                 NFS4_DEBUG(rfs4_debug,
8923                     (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8924
8925                 /*
8926                  * This is used to signify the newly created lockowner
8927                  * stateid and its sequence number.  The checks for
8928                  * sequence number and increment don't occur on the
8929                  * very first lock request for a lockowner.
8930                  */
8931                 lsp->rls_skip_seqid_check = TRUE;
8932
8933                 /* hold off other access to lsp while we tinker */
8934                 rfs4_sw_enter(&lsp->rls_sw);
8935                 ls_sw_held = TRUE;
8936
8937                 rfs4_dbe_unlock(lsp->rls_dbe);
8938
8939                 rfs4_lockowner_rele(lo);
8940         } else {
8941                 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8942                 /* get lsp and hold the lock on the underlying file struct */
8943                 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8944                     != NFS4_OK) {
8945                         *cs->statusp = resp->status = status;
8946                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8947                             cs, LOCK4res *, resp);
8948                         return;
8949                 }
8950                 create = FALSE; /* We didn't create lsp */
8951
8952                 /* Ensure specified filehandle matches */
8953                 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8954                         rfs4_lo_state_rele(lsp, TRUE);
8955                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8956                         DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8957                             cs, LOCK4res *, resp);
8958                         return;
8959                 }
8960
8961                 /* hold off other access to lsp while we tinker */
8962                 rfs4_sw_enter(&lsp->rls_sw);
8963                 ls_sw_held = TRUE;
8964
8965                 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8966                 /*
8967                  * The stateid looks like it was okay (expected to be
8968                  * the next one)
8969                  */
8970                 case NFS4_CHECK_STATEID_OKAY:
8971                         /*
8972                          * The sequence id is now checked.  Determine
8973                          * if this is a replay or if it is in the
8974                          * expected (next) sequence.  In the case of a
8975                          * replay, there are two replay conditions
8976                          * that may occur.  The first is the normal
8977                          * condition where a LOCK is done with a
8978                          * NFS4_OK response and the stateid is
8979                          * updated.  That case is handled below when
8980                          * the stateid is identified as a REPLAY.  The
8981                          * second is the case where an error is
8982                          * returned, like NFS4ERR_DENIED, and the
8983                          * sequence number is updated but the stateid
8984                          * is not updated.  This second case is dealt
8985                          * with here.  So it may seem odd that the
8986                          * stateid is okay but the sequence id is a
8987                          * replay but it is okay.
8988                          */
8989                         switch (rfs4_check_lock_seqid(
8990                             args->locker.locker4_u.lock_owner.lock_seqid,
8991                             lsp, resop)) {
8992                         case NFS4_CHKSEQ_REPLAY:
8993                                 if (resp->status != NFS4_OK) {
8994                                         /*
8995                                          * Here is our replay and need
8996                                          * to verify that the last
8997                                          * response was an error.
8998                                          */
8999                                         *cs->statusp = resp->status;
9000                                         goto end;
9001                                 }
9002                                 /*
9003                                  * This is done since the sequence id
9004                                  * looked like a replay but it didn't
9005                                  * pass our check so a BAD_SEQID is
9006                                  * returned as a result.
9007                                  */
9008                                 /*FALLTHROUGH*/
9009                         case NFS4_CHKSEQ_BAD:
9010                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9011                                 goto end;
9012                         case NFS4_CHKSEQ_OKAY:
9013                                 /* Everything looks okay move ahead */
9014                                 break;
9015                         }
9016                         break;
9017                 case NFS4_CHECK_STATEID_OLD:
9018                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9019                         goto end;
9020                 case NFS4_CHECK_STATEID_BAD:
9021                         *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9022                         goto end;
9023                 case NFS4_CHECK_STATEID_EXPIRED:
9024                         *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9025                         goto end;
9026                 case NFS4_CHECK_STATEID_CLOSED:
9027                         *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9028                         goto end;
9029                 case NFS4_CHECK_STATEID_REPLAY:
9030                         switch (rfs4_check_lock_seqid(
9031                             args->locker.locker4_u.lock_owner.lock_seqid,
9032                             lsp, resop)) {
9033                         case NFS4_CHKSEQ_OKAY:
9034                                 /*
9035                                  * This is a replayed stateid; if
9036                                  * seqid matches the next expected,
9037                                  * then client is using wrong seqid.
9038                                  */
9039                         case NFS4_CHKSEQ_BAD:
9040                                 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9041                                 goto end;
9042                         case NFS4_CHKSEQ_REPLAY:
9043                                 rfs4_update_lease(lsp->rls_locker->rl_client);
9044                                 *cs->statusp = status = resp->status;
9045                                 goto end;
9046                         }
9047                         break;
9048                 default:
9049                         ASSERT(FALSE);
9050                         break;
9051                 }
9052
9053                 rfs4_update_lock_sequence(lsp);
9054                 rfs4_update_lease(lsp->rls_locker->rl_client);
9055         }
9056
9057         /*
9058          * NFS4 only allows locking on regular files, so
9059          * verify type of object.
9060          */
9061         if (cs->vp->v_type != VREG) {
9062                 if (cs->vp->v_type == VDIR)
9063                         status = NFS4ERR_ISDIR;
9064                 else
9065                         status = NFS4ERR_INVAL;
9066                 goto out;
9067         }
9068
9069         cp = lsp->rls_state->rs_owner->ro_client;
9070
9071         if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9072                 status = NFS4ERR_GRACE;
9073                 goto out;
9074         }
9075
9076         if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9077                 status = NFS4ERR_NO_GRACE;
9078                 goto out;
9079         }
9080
9081         if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9082                 status = NFS4ERR_NO_GRACE;
9083                 goto out;
9084         }
9085
9086         if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9087                 cs->deleg = TRUE;
9088
9089         status = rfs4_do_lock(lsp, args->locktype,
9090             args->offset, args->length, cs->cr, resop);
9091
9092 out:
9093         lsp->rls_skip_seqid_check = FALSE;
9094
9095         *cs->statusp = resp->status = status;
9096
9097         if (status == NFS4_OK) {
9098                 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9099                 lsp->rls_lock_completed = TRUE;
9100         }
9101         /*
9102          * Only update the "OPEN" response here if this was a new
9103          * lock_owner
9104          */
9105         if (sp)
9106                 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9107
9108         rfs4_update_lock_resp(lsp, resop);
9109
9110 end:
9111         if (lsp) {
9112                 if (ls_sw_held)
9113                         rfs4_sw_exit(&lsp->rls_sw);
9114                 /*
9115                  * If an sp obtained, then the lsp does not represent
9116                  * a lock on the file struct.
9117                  */
9118                 if (sp != NULL)
9119                         rfs4_lo_state_rele(lsp, FALSE);
9120                 else
9121                         rfs4_lo_state_rele(lsp, TRUE);
9122         }
9123         if (sp) {
9124                 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9125                 rfs4_state_rele(sp);
9126         }
9127
9128         DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9129             LOCK4res *, resp);
9130 }
9131
9132 /* free function for LOCK/LOCKT */
9133 static void
9134 lock_denied_free(nfs_resop4 *resop)
9135 {
9136         LOCK4denied *dp = NULL;
9137
9138         switch (resop->resop) {
9139         case OP_LOCK:
9140                 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9141                         dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9142                 break;
9143         case OP_LOCKT:
9144                 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9145                         dp = &resop->nfs_resop4_u.oplockt.denied;
9146                 break;
9147         default:
9148                 break;
9149         }
9150
9151         if (dp)
9152                 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9153 }
9154
9155 /*ARGSUSED*/
9156 void
9157 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9158     struct svc_req *req, struct compound_state *cs)
9159 {
9160         LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9161         LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9162         nfsstat4 status;
9163         stateid4 *stateid = &args->lock_stateid;
9164         rfs4_lo_state_t *lsp;
9165
9166         DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9167             LOCKU4args *, args);
9168
9169         if (cs->vp == NULL) {
9170                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9171                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9172                     LOCKU4res *, resp);
9173                 return;
9174         }
9175
9176         if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9177                 *cs->statusp = resp->status = status;
9178                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9179                     LOCKU4res *, resp);
9180                 return;
9181         }
9182
9183         /* Ensure specified filehandle matches */
9184         if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9185                 rfs4_lo_state_rele(lsp, TRUE);
9186                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9187                 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9188                     LOCKU4res *, resp);
9189                 return;
9190         }
9191
9192         /* hold off other access to lsp while we tinker */
9193         rfs4_sw_enter(&lsp->rls_sw);
9194
9195         switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9196         case NFS4_CHECK_STATEID_OKAY:
9197                 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9198                     != NFS4_CHKSEQ_OKAY) {
9199                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9200                         goto end;
9201                 }
9202                 break;
9203         case NFS4_CHECK_STATEID_OLD:
9204                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9205                 goto end;
9206         case NFS4_CHECK_STATEID_BAD:
9207                 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9208                 goto end;
9209         case NFS4_CHECK_STATEID_EXPIRED:
9210                 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9211                 goto end;
9212         case NFS4_CHECK_STATEID_CLOSED:
9213                 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9214                 goto end;
9215         case NFS4_CHECK_STATEID_REPLAY:
9216                 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9217                 case NFS4_CHKSEQ_OKAY:
9218                                 /*
9219                                  * This is a replayed stateid; if
9220                                  * seqid matches the next expected,
9221                                  * then client is using wrong seqid.
9222                                  */
9223                 case NFS4_CHKSEQ_BAD:
9224                         *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9225                         goto end;
9226                 case NFS4_CHKSEQ_REPLAY:
9227                         rfs4_update_lease(lsp->rls_locker->rl_client);
9228                         *cs->statusp = status = resp->status;
9229                         goto end;
9230                 }
9231                 break;
9232         default:
9233                 ASSERT(FALSE);
9234                 break;
9235         }
9236
9237         rfs4_update_lock_sequence(lsp);
9238         rfs4_update_lease(lsp->rls_locker->rl_client);
9239
9240         /*
9241          * NFS4 only allows locking on regular files, so
9242          * verify type of object.
9243          */
9244         if (cs->vp->v_type != VREG) {
9245                 if (cs->vp->v_type == VDIR)
9246                         status = NFS4ERR_ISDIR;
9247                 else
9248                         status = NFS4ERR_INVAL;
9249                 goto out;
9250         }
9251
9252         if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9253                 status = NFS4ERR_GRACE;
9254                 goto out;
9255         }
9256
9257         status = rfs4_do_lock(lsp, args->locktype,
9258             args->offset, args->length, cs->cr, resop);
9259
9260 out:
9261         *cs->statusp = resp->status = status;
9262
9263         if (status == NFS4_OK)
9264                 resp->lock_stateid = lsp->rls_lockid.stateid;
9265
9266         rfs4_update_lock_resp(lsp, resop);
9267
9268 end:
9269         rfs4_sw_exit(&lsp->rls_sw);
9270         rfs4_lo_state_rele(lsp, TRUE);
9271
9272         DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9273             LOCKU4res *, resp);
9274 }
9275
9276 /*
9277  * LOCKT is a best effort routine, the client can not be guaranteed that
9278  * the status return is still in effect by the time the reply is received.
9279  * They are numerous race conditions in this routine, but we are not required
9280  * and can not be accurate.
9281  */
9282 /*ARGSUSED*/
9283 void
9284 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9285     struct svc_req *req, struct compound_state *cs)
9286 {
9287         LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9288         LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9289         rfs4_lockowner_t *lo;
9290         rfs4_client_t *cp;
9291         bool_t create = FALSE;
9292         struct flock64 flk;
9293         int error;
9294         int flag = FREAD | FWRITE;
9295         int ltype;
9296         length4 posix_length;
9297         sysid_t sysid;
9298         pid_t pid;
9299
9300         DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9301             LOCKT4args *, args);
9302
9303         if (cs->vp == NULL) {
9304                 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9305                 goto out;
9306         }
9307
9308         /*
9309          * NFS4 only allows locking on regular files, so
9310          * verify type of object.
9311          */
9312         if (cs->vp->v_type != VREG) {
9313                 if (cs->vp->v_type == VDIR)
9314                         *cs->statusp = resp->status = NFS4ERR_ISDIR;
9315                 else
9316                         *cs->statusp = resp->status =  NFS4ERR_INVAL;
9317                 goto out;
9318         }
9319
9320         /*
9321          * Check out the clientid to ensure the server knows about it
9322          * so that we correctly inform the client of a server reboot.
9323          */
9324         if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9325             == NULL) {
9326                 *cs->statusp = resp->status =
9327                     rfs4_check_clientid(&args->owner.clientid, 0);
9328                 goto out;
9329         }
9330         if (rfs4_lease_expired(cp)) {
9331                 rfs4_client_close(cp);
9332                 /*
9333                  * Protocol doesn't allow returning NFS4ERR_STALE as
9334                  * other operations do on this check so STALE_CLIENTID
9335                  * is returned instead
9336                  */
9337                 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9338                 goto out;
9339         }
9340
9341         if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9342                 *cs->statusp = resp->status = NFS4ERR_GRACE;
9343                 rfs4_client_rele(cp);
9344                 goto out;
9345         }
9346         rfs4_client_rele(cp);
9347
9348         resp->status = NFS4_OK;
9349
9350         switch (args->locktype) {
9351         case READ_LT:
9352         case READW_LT:
9353                 ltype = F_RDLCK;
9354                 break;
9355         case WRITE_LT:
9356         case WRITEW_LT:
9357                 ltype = F_WRLCK;
9358                 break;
9359         }
9360
9361         posix_length = args->length;
9362         /* Check for zero length. To lock to end of file use all ones for V4 */
9363         if (posix_length == 0) {
9364                 *cs->statusp = resp->status = NFS4ERR_INVAL;
9365                 goto out;
9366         } else if (posix_length == (length4)(~0)) {
9367                 posix_length = 0;       /* Posix to end of file  */
9368         }
9369
9370         /* Find or create a lockowner */
9371         lo = rfs4_findlockowner(&args->owner, &create);
9372
9373         if (lo) {
9374                 pid = lo->rl_pid;
9375                 if ((resp->status =
9376                     rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9377                         goto err;
9378         } else {
9379                 pid = 0;
9380                 sysid = lockt_sysid;
9381         }
9382 retry:
9383         flk.l_type = ltype;
9384         flk.l_whence = 0;               /* SEEK_SET */
9385         flk.l_start = args->offset;
9386         flk.l_len = posix_length;
9387         flk.l_sysid = sysid;
9388         flk.l_pid = pid;
9389         flag |= F_REMOTELOCK;
9390
9391         LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9392
9393         /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9394         if (flk.l_len < 0 || flk.l_start < 0) {
9395                 resp->status = NFS4ERR_INVAL;
9396                 goto err;
9397         }
9398         error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9399             NULL, cs->cr, NULL);
9400
9401         /*
9402          * N.B. We map error values to nfsv4 errors. This is differrent
9403          * than puterrno4 routine.
9404          */
9405         switch (error) {
9406         case 0:
9407                 if (flk.l_type == F_UNLCK)
9408                         resp->status = NFS4_OK;
9409                 else {
9410                         if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9411                                 goto retry;
9412                         resp->status = NFS4ERR_DENIED;
9413                 }
9414                 break;
9415         case EOVERFLOW:
9416                 resp->status = NFS4ERR_INVAL;
9417                 break;
9418         case EINVAL:
9419                 resp->status = NFS4ERR_NOTSUPP;
9420                 break;
9421         default:
9422                 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9423                     error);
9424                 resp->status = NFS4ERR_SERVERFAULT;
9425                 break;
9426         }
9427
9428 err:
9429         if (lo)
9430                 rfs4_lockowner_rele(lo);
9431         *cs->statusp = resp->status;
9432 out:
9433         DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9434             LOCKT4res *, resp);
9435 }
9436
9437 int
9438 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9439 {
9440         int err;
9441         int cmd;
9442         vnode_t *vp;
9443         struct shrlock shr;
9444         struct shr_locowner shr_loco;
9445         int fflags = 0;
9446
9447         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9448         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9449
9450         if (sp->rs_closed)
9451                 return (NFS4ERR_OLD_STATEID);
9452
9453         vp = sp->rs_finfo->rf_vp;
9454         ASSERT(vp);
9455
9456         shr.s_access = shr.s_deny = 0;
9457
9458         if (access & OPEN4_SHARE_ACCESS_READ) {
9459                 fflags |= FREAD;
9460                 shr.s_access |= F_RDACC;
9461         }
9462         if (access & OPEN4_SHARE_ACCESS_WRITE) {
9463                 fflags |= FWRITE;
9464                 shr.s_access |= F_WRACC;
9465         }
9466         ASSERT(shr.s_access);
9467
9468         if (deny & OPEN4_SHARE_DENY_READ)
9469                 shr.s_deny |= F_RDDNY;
9470         if (deny & OPEN4_SHARE_DENY_WRITE)
9471                 shr.s_deny |= F_WRDNY;
9472
9473         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9474         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9475         shr_loco.sl_pid = shr.s_pid;
9476         shr_loco.sl_id = shr.s_sysid;
9477         shr.s_owner = (caddr_t)&shr_loco;
9478         shr.s_own_len = sizeof (shr_loco);
9479
9480         cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9481
9482         err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9483         if (err != 0) {
9484                 if (err == EAGAIN)
9485                         err = NFS4ERR_SHARE_DENIED;
9486                 else
9487                         err = puterrno4(err);
9488                 return (err);
9489         }
9490
9491         sp->rs_share_access |= access;
9492         sp->rs_share_deny |= deny;
9493
9494         return (0);
9495 }
9496
9497 int
9498 rfs4_unshare(rfs4_state_t *sp)
9499 {
9500         int err;
9501         struct shrlock shr;
9502         struct shr_locowner shr_loco;
9503
9504         ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9505
9506         if (sp->rs_closed || sp->rs_share_access == 0)
9507                 return (0);
9508
9509         ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9510         ASSERT(sp->rs_finfo->rf_vp);
9511
9512         shr.s_access = shr.s_deny = 0;
9513         shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9514         shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9515         shr_loco.sl_pid = shr.s_pid;
9516         shr_loco.sl_id = shr.s_sysid;
9517         shr.s_owner = (caddr_t)&shr_loco;
9518         shr.s_own_len = sizeof (shr_loco);
9519
9520         err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9521             NULL);
9522         if (err != 0) {
9523                 err = puterrno4(err);
9524                 return (err);
9525         }
9526
9527         sp->rs_share_access = 0;
9528         sp->rs_share_deny = 0;
9529
9530         return (0);
9531
9532 }
9533
9534 static int
9535 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9536 {
9537         struct clist    *wcl;
9538         count4          count = rok->data_len;
9539         int             wlist_len;
9540
9541         wcl = args->wlist;
9542         if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9543                 return (FALSE);
9544         }
9545         wcl = args->wlist;
9546         rok->wlist_len = wlist_len;
9547         rok->wlist = wcl;
9548         return (TRUE);
9549 }
9550
9551 /* tunable to disable server referrals */
9552 int rfs4_no_referrals = 0;
9553
9554 /*
9555  * Find an NFS record in reparse point data.
9556  * Returns 0 for success and <0 or an errno value on failure.
9557  */
9558 int
9559 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9560 {
9561         int err;
9562         char *stype, *val;
9563         nvlist_t *nvl;
9564         nvpair_t *curr;
9565
9566         if ((nvl = reparse_init()) == NULL)
9567                 return (-1);
9568
9569         if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9570                 reparse_free(nvl);
9571                 return (err);
9572         }
9573
9574         curr = NULL;
9575         while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9576                 if ((stype = nvpair_name(curr)) == NULL) {
9577                         reparse_free(nvl);
9578                         return (-2);
9579                 }
9580                 if (strncasecmp(stype, "NFS", 3) == 0)
9581                         break;
9582         }
9583
9584         if ((curr == NULL) ||
9585             (nvpair_value_string(curr, &val))) {
9586                 reparse_free(nvl);
9587                 return (-3);
9588         }
9589         *nvlp = nvl;
9590         *svcp = stype;
9591         *datap = val;
9592         return (0);
9593 }
9594
9595 int
9596 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9597 {
9598         nvlist_t *nvl;
9599         char *s, *d;
9600
9601         if (rfs4_no_referrals != 0)
9602                 return (B_FALSE);
9603
9604         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9605                 return (B_FALSE);
9606
9607         if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9608                 return (B_FALSE);
9609
9610         reparse_free(nvl);
9611
9612         return (B_TRUE);
9613 }
9614
9615 /*
9616  * There is a user-level copy of this routine in ref_subr.c.
9617  * Changes should be kept in sync.
9618  */
9619 static int
9620 nfs4_create_components(char *path, component4 *comp4)
9621 {
9622         int slen, plen, ncomp;
9623         char *ori_path, *nxtc, buf[MAXNAMELEN];
9624
9625         if (path == NULL)
9626                 return (0);
9627
9628         plen = strlen(path) + 1;        /* include the terminator */
9629         ori_path = path;
9630         ncomp = 0;
9631
9632         /* count number of components in the path */
9633         for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9634                 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9635                         if ((slen = nxtc - path) == 0) {
9636                                 path = nxtc + 1;
9637                                 continue;
9638                         }
9639
9640                         if (comp4 != NULL) {
9641                                 bcopy(path, buf, slen);
9642                                 buf[slen] = '\0';
9643                                 (void) str_to_utf8(buf, &comp4[ncomp]);
9644                         }
9645
9646                         ncomp++;        /* 1 valid component */
9647                         path = nxtc + 1;
9648                 }
9649                 if (*nxtc == '\0' || *nxtc == '\n')
9650                         break;
9651         }
9652
9653         return (ncomp);
9654 }
9655
9656 /*
9657  * There is a user-level copy of this routine in ref_subr.c.
9658  * Changes should be kept in sync.
9659  */
9660 static int
9661 make_pathname4(char *path, pathname4 *pathname)
9662 {
9663         int ncomp;
9664         component4 *comp4;
9665
9666         if (pathname == NULL)
9667                 return (0);
9668
9669         if (path == NULL) {
9670                 pathname->pathname4_val = NULL;
9671                 pathname->pathname4_len = 0;
9672                 return (0);
9673         }
9674
9675         /* count number of components to alloc buffer */
9676         if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9677                 pathname->pathname4_val = NULL;
9678                 pathname->pathname4_len = 0;
9679                 return (0);
9680         }
9681         comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9682
9683         /* copy components into allocated buffer */
9684         ncomp = nfs4_create_components(path, comp4);
9685
9686         pathname->pathname4_val = comp4;
9687         pathname->pathname4_len = ncomp;
9688
9689         return (ncomp);
9690 }
9691
9692 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9693
9694 fs_locations4 *
9695 fetch_referral(vnode_t *vp, cred_t *cr)
9696 {
9697         nvlist_t *nvl;
9698         char *stype, *sdata;
9699         fs_locations4 *result;
9700         char buf[1024];
9701         size_t bufsize;
9702         XDR xdr;
9703         int err;
9704
9705         /*
9706          * Check attrs to ensure it's a reparse point
9707          */
9708         if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9709                 return (NULL);
9710
9711         /*
9712          * Look for an NFS record and get the type and data
9713          */
9714         if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9715                 return (NULL);
9716
9717         /*
9718          * With the type and data, upcall to get the referral
9719          */
9720         bufsize = sizeof (buf);
9721         bzero(buf, sizeof (buf));
9722         err = reparse_kderef((const char *)stype, (const char *)sdata,
9723             buf, &bufsize);
9724         reparse_free(nvl);
9725
9726         DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9727             char *, stype, char *, sdata, char *, buf, int, err);
9728         if (err) {
9729                 cmn_err(CE_NOTE,
9730                     "reparsed daemon not running: unable to get referral (%d)",
9731                     err);
9732                 return (NULL);
9733         }
9734
9735         /*
9736          * We get an XDR'ed record back from the kderef call
9737          */
9738         xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9739         result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9740         err = xdr_fs_locations4(&xdr, result);
9741         XDR_DESTROY(&xdr);
9742         if (err != TRUE) {
9743                 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9744                     int, err);
9745                 return (NULL);
9746         }
9747
9748         /*
9749          * Look at path to recover fs_root, ignoring the leading '/'
9750          */
9751         (void) make_pathname4(vp->v_path, &result->fs_root);
9752
9753         return (result);
9754 }
9755
9756 char *
9757 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9758 {
9759         fs_locations4 *fsl;
9760         fs_location4 *fs;
9761         char *server, *path, *symbuf;
9762         static char *prefix = "/net/";
9763         int i, size, npaths;
9764         uint_t len;
9765
9766         /* Get the referral */
9767         if ((fsl = fetch_referral(vp, cr)) == NULL)
9768                 return (NULL);
9769
9770         /* Deal with only the first location and first server */
9771         fs = &fsl->locations_val[0];
9772         server = utf8_to_str(&fs->server_val[0], &len, NULL);
9773         if (server == NULL) {
9774                 rfs4_free_fs_locations4(fsl);
9775                 kmem_free(fsl, sizeof (fs_locations4));
9776                 return (NULL);
9777         }
9778
9779         /* Figure out size for "/net/" + host + /path/path/path + NULL */
9780         size = strlen(prefix) + len;
9781         for (i = 0; i < fs->rootpath.pathname4_len; i++)
9782                 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9783
9784         /* Allocate the symlink buffer and fill it */
9785         symbuf = kmem_zalloc(size, KM_SLEEP);
9786         (void) strcat(symbuf, prefix);
9787         (void) strcat(symbuf, server);
9788         kmem_free(server, len);
9789
9790         npaths = 0;
9791         for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9792                 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9793                 if (path == NULL)
9794                         continue;
9795                 (void) strcat(symbuf, "/");
9796                 (void) strcat(symbuf, path);
9797                 npaths++;
9798                 kmem_free(path, len);
9799         }
9800
9801         rfs4_free_fs_locations4(fsl);
9802         kmem_free(fsl, sizeof (fs_locations4));
9803
9804         if (strsz != NULL)
9805                 *strsz = size;
9806         return (symbuf);
9807 }
9808
9809 /*
9810  * Check to see if we have a downrev Solaris client, so that we
9811  * can send it a symlink instead of a referral.
9812  */
9813 int
9814 client_is_downrev(struct svc_req *req)
9815 {
9816         struct sockaddr *ca;
9817         rfs4_clntip_t *ci;
9818         bool_t create = FALSE;
9819         int is_downrev;
9820
9821         ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9822         ASSERT(ca);
9823         ci = rfs4_find_clntip(ca, &create);
9824         if (ci == NULL)
9825                 return (0);
9826         is_downrev = ci->ri_no_referrals;
9827         rfs4_dbe_rele(ci->ri_dbe);
9828         return (is_downrev);
9829 }