6914555 rfs4_op_rename() can double free converted_onm
[illumos-gate.git] / usr / src / uts / common / fs / nfs / nfs4_srv.c
blob288faadf85275bb7a965da793411a14f636fc3e6
1 /*
2 * CDDL HEADER START
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
19 * CDDL HEADER END
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
26 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
27 * All Rights Reserved
30 #include <sys/param.h>
31 #include <sys/types.h>
32 #include <sys/systm.h>
33 #include <sys/cred.h>
34 #include <sys/buf.h>
35 #include <sys/vfs.h>
36 #include <sys/vfs_opreg.h>
37 #include <sys/vnode.h>
38 #include <sys/uio.h>
39 #include <sys/errno.h>
40 #include <sys/sysmacros.h>
41 #include <sys/statvfs.h>
42 #include <sys/kmem.h>
43 #include <sys/dirent.h>
44 #include <sys/cmn_err.h>
45 #include <sys/debug.h>
46 #include <sys/systeminfo.h>
47 #include <sys/flock.h>
48 #include <sys/pathname.h>
49 #include <sys/nbmlock.h>
50 #include <sys/share.h>
51 #include <sys/atomic.h>
52 #include <sys/policy.h>
53 #include <sys/fem.h>
54 #include <sys/sdt.h>
55 #include <sys/ddi.h>
56 #include <sys/zone.h>
58 #include <fs/fs_reparse.h>
60 #include <rpc/types.h>
61 #include <rpc/auth.h>
62 #include <rpc/rpcsec_gss.h>
63 #include <rpc/svc.h>
65 #include <nfs/nfs.h>
66 #include <nfs/export.h>
67 #include <nfs/nfs_cmd.h>
68 #include <nfs/lm.h>
69 #include <nfs/nfs4.h>
71 #include <sys/strsubr.h>
72 #include <sys/strsun.h>
74 #include <inet/common.h>
75 #include <inet/ip.h>
76 #include <inet/ip6.h>
78 #include <sys/tsol/label.h>
79 #include <sys/tsol/tndb.h>
81 #define RFS4_MAXLOCK_TRIES 4 /* Try to get the lock this many times */
82 static int rfs4_maxlock_tries = RFS4_MAXLOCK_TRIES;
83 #define RFS4_LOCK_DELAY 10 /* Milliseconds */
84 static clock_t rfs4_lock_delay = RFS4_LOCK_DELAY;
85 extern struct svc_ops rdma_svc_ops;
86 extern int nfs_loaned_buffers;
87 /* End of Tunables */
89 static int rdma_setup_read_data4(READ4args *, READ4res *);
92 * Used to bump the stateid4.seqid value and show changes in the stateid
94 #define next_stateid(sp) (++(sp)->bits.chgseq)
97 * RFS4_MINLEN_ENTRY4: XDR-encoded size of smallest possible dirent.
98 * This is used to return NFS4ERR_TOOSMALL when clients specify
99 * maxcount that isn't large enough to hold the smallest possible
100 * XDR encoded dirent.
102 * sizeof cookie (8 bytes) +
103 * sizeof name_len (4 bytes) +
104 * sizeof smallest (padded) name (4 bytes) +
105 * sizeof bitmap4_len (12 bytes) + NOTE: we always encode len=2 bm4
106 * sizeof attrlist4_len (4 bytes) +
107 * sizeof next boolean (4 bytes)
109 * RFS4_MINLEN_RDDIR4: XDR-encoded size of READDIR op reply containing
110 * the smallest possible entry4 (assumes no attrs requested).
111 * sizeof nfsstat4 (4 bytes) +
112 * sizeof verifier4 (8 bytes) +
113 * sizeof entry4list bool (4 bytes) +
114 * sizeof entry4 (36 bytes) +
115 * sizeof eof bool (4 bytes)
117 * RFS4_MINLEN_RDDIR_BUF: minimum length of buffer server will provide to
118 * VOP_READDIR. Its value is the size of the maximum possible dirent
119 * for solaris. The DIRENT64_RECLEN macro returns the size of dirent
120 * required for a given name length. MAXNAMELEN is the maximum
121 * filename length allowed in Solaris. The first two DIRENT64_RECLEN()
122 * macros are to allow for . and .. entries -- just a minor tweak to try
123 * and guarantee that buffer we give to VOP_READDIR will be large enough
124 * to hold ., .., and the largest possible solaris dirent64.
126 #define RFS4_MINLEN_ENTRY4 36
127 #define RFS4_MINLEN_RDDIR4 (4 + NFS4_VERIFIER_SIZE + 4 + RFS4_MINLEN_ENTRY4 + 4)
128 #define RFS4_MINLEN_RDDIR_BUF \
129 (DIRENT64_RECLEN(1) + DIRENT64_RECLEN(2) + DIRENT64_RECLEN(MAXNAMELEN))
132 * It would be better to pad to 4 bytes since that's what XDR would do,
133 * but the dirents UFS gives us are already padded to 8, so just take
134 * what we're given. Dircount is only a hint anyway. Currently the
135 * solaris kernel is ASCII only, so there's no point in calling the
136 * UTF8 functions.
138 * dirent64: named padded to provide 8 byte struct alignment
139 * d_ino(8) + d_off(8) + d_reclen(2) + d_name(namelen + null(1) + pad)
141 * cookie: uint64_t + utf8namelen: uint_t + utf8name padded to 8 bytes
144 #define DIRENT64_TO_DIRCOUNT(dp) \
145 (3 * BYTES_PER_XDR_UNIT + DIRENT64_NAMELEN((dp)->d_reclen))
147 time_t rfs4_start_time; /* Initialized in rfs4_srvrinit */
149 static sysid_t lockt_sysid; /* dummy sysid for all LOCKT calls */
151 u_longlong_t nfs4_srv_caller_id;
152 uint_t nfs4_srv_vkey = 0;
154 verifier4 Write4verf;
155 verifier4 Readdir4verf;
157 void rfs4_init_compound_state(struct compound_state *);
159 static void nullfree(caddr_t);
160 static void rfs4_op_inval(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
161 struct compound_state *);
162 static void rfs4_op_access(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
163 struct compound_state *);
164 static void rfs4_op_close(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
165 struct compound_state *);
166 static void rfs4_op_commit(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
167 struct compound_state *);
168 static void rfs4_op_create(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
169 struct compound_state *);
170 static void rfs4_op_create_free(nfs_resop4 *resop);
171 static void rfs4_op_delegreturn(nfs_argop4 *, nfs_resop4 *,
172 struct svc_req *, struct compound_state *);
173 static void rfs4_op_delegpurge(nfs_argop4 *, nfs_resop4 *,
174 struct svc_req *, struct compound_state *);
175 static void rfs4_op_getattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
176 struct compound_state *);
177 static void rfs4_op_getattr_free(nfs_resop4 *);
178 static void rfs4_op_getfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
179 struct compound_state *);
180 static void rfs4_op_getfh_free(nfs_resop4 *);
181 static void rfs4_op_illegal(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
182 struct compound_state *);
183 static void rfs4_op_link(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
184 struct compound_state *);
185 static void rfs4_op_lock(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
186 struct compound_state *);
187 static void lock_denied_free(nfs_resop4 *);
188 static void rfs4_op_locku(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
189 struct compound_state *);
190 static void rfs4_op_lockt(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
191 struct compound_state *);
192 static void rfs4_op_lookup(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
193 struct compound_state *);
194 static void rfs4_op_lookupp(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
195 struct compound_state *);
196 static void rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop,
197 struct svc_req *req, struct compound_state *cs);
198 static void rfs4_op_nverify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
199 struct compound_state *);
200 static void rfs4_op_open(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
201 struct compound_state *);
202 static void rfs4_op_open_confirm(nfs_argop4 *, nfs_resop4 *,
203 struct svc_req *, struct compound_state *);
204 static void rfs4_op_open_downgrade(nfs_argop4 *, nfs_resop4 *,
205 struct svc_req *, struct compound_state *);
206 static void rfs4_op_putfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
207 struct compound_state *);
208 static void rfs4_op_putpubfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
209 struct compound_state *);
210 static void rfs4_op_putrootfh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
211 struct compound_state *);
212 static void rfs4_op_read(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
213 struct compound_state *);
214 static void rfs4_op_read_free(nfs_resop4 *);
215 static void rfs4_op_readdir_free(nfs_resop4 *resop);
216 static void rfs4_op_readlink(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
217 struct compound_state *);
218 static void rfs4_op_readlink_free(nfs_resop4 *);
219 static void rfs4_op_release_lockowner(nfs_argop4 *, nfs_resop4 *,
220 struct svc_req *, struct compound_state *);
221 static void rfs4_op_remove(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
222 struct compound_state *);
223 static void rfs4_op_rename(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
224 struct compound_state *);
225 static void rfs4_op_renew(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
226 struct compound_state *);
227 static void rfs4_op_restorefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
228 struct compound_state *);
229 static void rfs4_op_savefh(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
230 struct compound_state *);
231 static void rfs4_op_setattr(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
232 struct compound_state *);
233 static void rfs4_op_verify(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
234 struct compound_state *);
235 static void rfs4_op_write(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
236 struct compound_state *);
237 static void rfs4_op_setclientid(nfs_argop4 *, nfs_resop4 *,
238 struct svc_req *, struct compound_state *);
239 static void rfs4_op_setclientid_confirm(nfs_argop4 *, nfs_resop4 *,
240 struct svc_req *req, struct compound_state *);
241 static void rfs4_op_secinfo(nfs_argop4 *, nfs_resop4 *, struct svc_req *,
242 struct compound_state *);
243 static void rfs4_op_secinfo_free(nfs_resop4 *);
245 static nfsstat4 check_open_access(uint32_t,
246 struct compound_state *, struct svc_req *);
247 nfsstat4 rfs4_client_sysid(rfs4_client_t *, sysid_t *);
248 void rfs4_ss_clid(rfs4_client_t *);
251 * translation table for attrs
253 struct nfs4_ntov_table {
254 union nfs4_attr_u *na;
255 uint8_t amap[NFS4_MAXNUM_ATTRS];
256 int attrcnt;
257 bool_t vfsstat;
260 static void nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp);
261 static void nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
262 struct nfs4_svgetit_arg *sargp);
264 static nfsstat4 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp,
265 struct compound_state *cs, struct nfs4_svgetit_arg *sargp,
266 struct nfs4_ntov_table *ntovp, nfs4_attr_cmd_t cmd);
268 fem_t *deleg_rdops;
269 fem_t *deleg_wrops;
271 rfs4_servinst_t *rfs4_cur_servinst = NULL; /* current server instance */
272 kmutex_t rfs4_servinst_lock; /* protects linked list */
273 int rfs4_seen_first_compound; /* set first time we see one */
276 * NFS4 op dispatch table
279 struct rfsv4disp {
280 void (*dis_proc)(); /* proc to call */
281 void (*dis_resfree)(); /* frees space allocated by proc */
282 int dis_flags; /* RPC_IDEMPOTENT, etc... */
285 static struct rfsv4disp rfsv4disptab[] = {
287 * NFS VERSION 4
290 /* RFS_NULL = 0 */
291 {rfs4_op_illegal, nullfree, 0},
293 /* UNUSED = 1 */
294 {rfs4_op_illegal, nullfree, 0},
296 /* UNUSED = 2 */
297 {rfs4_op_illegal, nullfree, 0},
299 /* OP_ACCESS = 3 */
300 {rfs4_op_access, nullfree, RPC_IDEMPOTENT},
302 /* OP_CLOSE = 4 */
303 {rfs4_op_close, nullfree, 0},
305 /* OP_COMMIT = 5 */
306 {rfs4_op_commit, nullfree, RPC_IDEMPOTENT},
308 /* OP_CREATE = 6 */
309 {rfs4_op_create, nullfree, 0},
311 /* OP_DELEGPURGE = 7 */
312 {rfs4_op_delegpurge, nullfree, 0},
314 /* OP_DELEGRETURN = 8 */
315 {rfs4_op_delegreturn, nullfree, 0},
317 /* OP_GETATTR = 9 */
318 {rfs4_op_getattr, rfs4_op_getattr_free, RPC_IDEMPOTENT},
320 /* OP_GETFH = 10 */
321 {rfs4_op_getfh, rfs4_op_getfh_free, RPC_ALL},
323 /* OP_LINK = 11 */
324 {rfs4_op_link, nullfree, 0},
326 /* OP_LOCK = 12 */
327 {rfs4_op_lock, lock_denied_free, 0},
329 /* OP_LOCKT = 13 */
330 {rfs4_op_lockt, lock_denied_free, 0},
332 /* OP_LOCKU = 14 */
333 {rfs4_op_locku, nullfree, 0},
335 /* OP_LOOKUP = 15 */
336 {rfs4_op_lookup, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
338 /* OP_LOOKUPP = 16 */
339 {rfs4_op_lookupp, nullfree, (RPC_IDEMPOTENT | RPC_PUBLICFH_OK)},
341 /* OP_NVERIFY = 17 */
342 {rfs4_op_nverify, nullfree, RPC_IDEMPOTENT},
344 /* OP_OPEN = 18 */
345 {rfs4_op_open, rfs4_free_reply, 0},
347 /* OP_OPENATTR = 19 */
348 {rfs4_op_openattr, nullfree, 0},
350 /* OP_OPEN_CONFIRM = 20 */
351 {rfs4_op_open_confirm, nullfree, 0},
353 /* OP_OPEN_DOWNGRADE = 21 */
354 {rfs4_op_open_downgrade, nullfree, 0},
356 /* OP_OPEN_PUTFH = 22 */
357 {rfs4_op_putfh, nullfree, RPC_ALL},
359 /* OP_PUTPUBFH = 23 */
360 {rfs4_op_putpubfh, nullfree, RPC_ALL},
362 /* OP_PUTROOTFH = 24 */
363 {rfs4_op_putrootfh, nullfree, RPC_ALL},
365 /* OP_READ = 25 */
366 {rfs4_op_read, rfs4_op_read_free, RPC_IDEMPOTENT},
368 /* OP_READDIR = 26 */
369 {rfs4_op_readdir, rfs4_op_readdir_free, RPC_IDEMPOTENT},
371 /* OP_READLINK = 27 */
372 {rfs4_op_readlink, rfs4_op_readlink_free, RPC_IDEMPOTENT},
374 /* OP_REMOVE = 28 */
375 {rfs4_op_remove, nullfree, 0},
377 /* OP_RENAME = 29 */
378 {rfs4_op_rename, nullfree, 0},
380 /* OP_RENEW = 30 */
381 {rfs4_op_renew, nullfree, 0},
383 /* OP_RESTOREFH = 31 */
384 {rfs4_op_restorefh, nullfree, RPC_ALL},
386 /* OP_SAVEFH = 32 */
387 {rfs4_op_savefh, nullfree, RPC_ALL},
389 /* OP_SECINFO = 33 */
390 {rfs4_op_secinfo, rfs4_op_secinfo_free, 0},
392 /* OP_SETATTR = 34 */
393 {rfs4_op_setattr, nullfree, 0},
395 /* OP_SETCLIENTID = 35 */
396 {rfs4_op_setclientid, nullfree, 0},
398 /* OP_SETCLIENTID_CONFIRM = 36 */
399 {rfs4_op_setclientid_confirm, nullfree, 0},
401 /* OP_VERIFY = 37 */
402 {rfs4_op_verify, nullfree, RPC_IDEMPOTENT},
404 /* OP_WRITE = 38 */
405 {rfs4_op_write, nullfree, 0},
407 /* OP_RELEASE_LOCKOWNER = 39 */
408 {rfs4_op_release_lockowner, nullfree, 0},
411 static uint_t rfsv4disp_cnt = sizeof (rfsv4disptab) / sizeof (rfsv4disptab[0]);
413 #define OP_ILLEGAL_IDX (rfsv4disp_cnt)
415 #ifdef DEBUG
417 int rfs4_fillone_debug = 0;
418 int rfs4_no_stub_access = 1;
419 int rfs4_rddir_debug = 0;
421 static char *rfs4_op_string[] = {
422 "rfs4_op_null",
423 "rfs4_op_1 unused",
424 "rfs4_op_2 unused",
425 "rfs4_op_access",
426 "rfs4_op_close",
427 "rfs4_op_commit",
428 "rfs4_op_create",
429 "rfs4_op_delegpurge",
430 "rfs4_op_delegreturn",
431 "rfs4_op_getattr",
432 "rfs4_op_getfh",
433 "rfs4_op_link",
434 "rfs4_op_lock",
435 "rfs4_op_lockt",
436 "rfs4_op_locku",
437 "rfs4_op_lookup",
438 "rfs4_op_lookupp",
439 "rfs4_op_nverify",
440 "rfs4_op_open",
441 "rfs4_op_openattr",
442 "rfs4_op_open_confirm",
443 "rfs4_op_open_downgrade",
444 "rfs4_op_putfh",
445 "rfs4_op_putpubfh",
446 "rfs4_op_putrootfh",
447 "rfs4_op_read",
448 "rfs4_op_readdir",
449 "rfs4_op_readlink",
450 "rfs4_op_remove",
451 "rfs4_op_rename",
452 "rfs4_op_renew",
453 "rfs4_op_restorefh",
454 "rfs4_op_savefh",
455 "rfs4_op_secinfo",
456 "rfs4_op_setattr",
457 "rfs4_op_setclientid",
458 "rfs4_op_setclient_confirm",
459 "rfs4_op_verify",
460 "rfs4_op_write",
461 "rfs4_op_release_lockowner",
462 "rfs4_op_illegal"
464 #endif
466 void rfs4_ss_chkclid(rfs4_client_t *);
468 extern size_t strlcpy(char *dst, const char *src, size_t dstsize);
470 extern void rfs4_free_fs_locations4(fs_locations4 *);
472 #ifdef nextdp
473 #undef nextdp
474 #endif
475 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
477 static const fs_operation_def_t nfs4_rd_deleg_tmpl[] = {
478 VOPNAME_OPEN, { .femop_open = deleg_rd_open },
479 VOPNAME_WRITE, { .femop_write = deleg_rd_write },
480 VOPNAME_SETATTR, { .femop_setattr = deleg_rd_setattr },
481 VOPNAME_RWLOCK, { .femop_rwlock = deleg_rd_rwlock },
482 VOPNAME_SPACE, { .femop_space = deleg_rd_space },
483 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_rd_setsecattr },
484 VOPNAME_VNEVENT, { .femop_vnevent = deleg_rd_vnevent },
485 NULL, NULL
487 static const fs_operation_def_t nfs4_wr_deleg_tmpl[] = {
488 VOPNAME_OPEN, { .femop_open = deleg_wr_open },
489 VOPNAME_READ, { .femop_read = deleg_wr_read },
490 VOPNAME_WRITE, { .femop_write = deleg_wr_write },
491 VOPNAME_SETATTR, { .femop_setattr = deleg_wr_setattr },
492 VOPNAME_RWLOCK, { .femop_rwlock = deleg_wr_rwlock },
493 VOPNAME_SPACE, { .femop_space = deleg_wr_space },
494 VOPNAME_SETSECATTR, { .femop_setsecattr = deleg_wr_setsecattr },
495 VOPNAME_VNEVENT, { .femop_vnevent = deleg_wr_vnevent },
496 NULL, NULL
500 rfs4_srvrinit(void)
502 timespec32_t verf;
503 int error;
504 extern void rfs4_attr_init();
505 extern krwlock_t rfs4_deleg_policy_lock;
508 * The following algorithm attempts to find a unique verifier
509 * to be used as the write verifier returned from the server
510 * to the client. It is important that this verifier change
511 * whenever the server reboots. Of secondary importance, it
512 * is important for the verifier to be unique between two
513 * different servers.
515 * Thus, an attempt is made to use the system hostid and the
516 * current time in seconds when the nfssrv kernel module is
517 * loaded. It is assumed that an NFS server will not be able
518 * to boot and then to reboot in less than a second. If the
519 * hostid has not been set, then the current high resolution
520 * time is used. This will ensure different verifiers each
521 * time the server reboots and minimize the chances that two
522 * different servers will have the same verifier.
523 * XXX - this is broken on LP64 kernels.
525 verf.tv_sec = (time_t)zone_get_hostid(NULL);
526 if (verf.tv_sec != 0) {
527 verf.tv_nsec = gethrestime_sec();
528 } else {
529 timespec_t tverf;
531 gethrestime(&tverf);
532 verf.tv_sec = (time_t)tverf.tv_sec;
533 verf.tv_nsec = tverf.tv_nsec;
536 Write4verf = *(uint64_t *)&verf;
538 rfs4_attr_init();
539 mutex_init(&rfs4_deleg_lock, NULL, MUTEX_DEFAULT, NULL);
541 /* Used to manage create/destroy of server state */
542 mutex_init(&rfs4_state_lock, NULL, MUTEX_DEFAULT, NULL);
544 /* Used to manage access to server instance linked list */
545 mutex_init(&rfs4_servinst_lock, NULL, MUTEX_DEFAULT, NULL);
547 /* Used to manage access to rfs4_deleg_policy */
548 rw_init(&rfs4_deleg_policy_lock, NULL, RW_DEFAULT, NULL);
550 error = fem_create("deleg_rdops", nfs4_rd_deleg_tmpl, &deleg_rdops);
551 if (error != 0) {
552 rfs4_disable_delegation();
553 } else {
554 error = fem_create("deleg_wrops", nfs4_wr_deleg_tmpl,
555 &deleg_wrops);
556 if (error != 0) {
557 rfs4_disable_delegation();
558 fem_free(deleg_rdops);
562 nfs4_srv_caller_id = fs_new_caller_id();
564 lockt_sysid = lm_alloc_sysidt();
566 vsd_create(&nfs4_srv_vkey, NULL);
568 return (0);
571 void
572 rfs4_srvrfini(void)
574 extern krwlock_t rfs4_deleg_policy_lock;
576 if (lockt_sysid != LM_NOSYSID) {
577 lm_free_sysidt(lockt_sysid);
578 lockt_sysid = LM_NOSYSID;
581 mutex_destroy(&rfs4_deleg_lock);
582 mutex_destroy(&rfs4_state_lock);
583 rw_destroy(&rfs4_deleg_policy_lock);
585 fem_free(deleg_rdops);
586 fem_free(deleg_wrops);
589 void
590 rfs4_init_compound_state(struct compound_state *cs)
592 bzero(cs, sizeof (*cs));
593 cs->cont = TRUE;
594 cs->access = CS_ACCESS_DENIED;
595 cs->deleg = FALSE;
596 cs->mandlock = FALSE;
597 cs->fh.nfs_fh4_val = cs->fhbuf;
600 void
601 rfs4_grace_start(rfs4_servinst_t *sip)
603 rw_enter(&sip->rwlock, RW_WRITER);
604 sip->start_time = (time_t)TICK_TO_SEC(ddi_get_lbolt());
605 sip->grace_period = rfs4_grace_period;
606 rw_exit(&sip->rwlock);
610 * returns true if the instance's grace period has never been started
613 rfs4_servinst_grace_new(rfs4_servinst_t *sip)
615 time_t start_time;
617 rw_enter(&sip->rwlock, RW_READER);
618 start_time = sip->start_time;
619 rw_exit(&sip->rwlock);
621 return (start_time == 0);
625 * Indicates if server instance is within the
626 * grace period.
629 rfs4_servinst_in_grace(rfs4_servinst_t *sip)
631 time_t grace_expiry;
633 rw_enter(&sip->rwlock, RW_READER);
634 grace_expiry = sip->start_time + sip->grace_period;
635 rw_exit(&sip->rwlock);
637 return (((time_t)TICK_TO_SEC(ddi_get_lbolt())) < grace_expiry);
641 rfs4_clnt_in_grace(rfs4_client_t *cp)
643 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
645 return (rfs4_servinst_in_grace(cp->rc_server_instance));
649 * reset all currently active grace periods
651 void
652 rfs4_grace_reset_all(void)
654 rfs4_servinst_t *sip;
656 mutex_enter(&rfs4_servinst_lock);
657 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
658 if (rfs4_servinst_in_grace(sip))
659 rfs4_grace_start(sip);
660 mutex_exit(&rfs4_servinst_lock);
664 * start any new instances' grace periods
666 void
667 rfs4_grace_start_new(void)
669 rfs4_servinst_t *sip;
671 mutex_enter(&rfs4_servinst_lock);
672 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev)
673 if (rfs4_servinst_grace_new(sip))
674 rfs4_grace_start(sip);
675 mutex_exit(&rfs4_servinst_lock);
678 static rfs4_dss_path_t *
679 rfs4_dss_newpath(rfs4_servinst_t *sip, char *path, unsigned index)
681 size_t len;
682 rfs4_dss_path_t *dss_path;
684 dss_path = kmem_alloc(sizeof (rfs4_dss_path_t), KM_SLEEP);
687 * Take a copy of the string, since the original may be overwritten.
688 * Sadly, no strdup() in the kernel.
690 /* allow for NUL */
691 len = strlen(path) + 1;
692 dss_path->path = kmem_alloc(len, KM_SLEEP);
693 (void) strlcpy(dss_path->path, path, len);
695 /* associate with servinst */
696 dss_path->sip = sip;
697 dss_path->index = index;
700 * Add to list of served paths.
701 * No locking required, as we're only ever called at startup.
703 if (rfs4_dss_pathlist == NULL) {
704 /* this is the first dss_path_t */
706 /* needed for insque/remque */
707 dss_path->next = dss_path->prev = dss_path;
709 rfs4_dss_pathlist = dss_path;
710 } else {
711 insque(dss_path, rfs4_dss_pathlist);
714 return (dss_path);
718 * Create a new server instance, and make it the currently active instance.
719 * Note that starting the grace period too early will reduce the clients'
720 * recovery window.
722 void
723 rfs4_servinst_create(int start_grace, int dss_npaths, char **dss_paths)
725 unsigned i;
726 rfs4_servinst_t *sip;
727 rfs4_oldstate_t *oldstate;
729 sip = kmem_alloc(sizeof (rfs4_servinst_t), KM_SLEEP);
730 rw_init(&sip->rwlock, NULL, RW_DEFAULT, NULL);
732 sip->start_time = (time_t)0;
733 sip->grace_period = (time_t)0;
734 sip->next = NULL;
735 sip->prev = NULL;
737 rw_init(&sip->oldstate_lock, NULL, RW_DEFAULT, NULL);
739 * This initial dummy entry is required to setup for insque/remque.
740 * It must be skipped over whenever the list is traversed.
742 oldstate = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
743 /* insque/remque require initial list entry to be self-terminated */
744 oldstate->next = oldstate;
745 oldstate->prev = oldstate;
746 sip->oldstate = oldstate;
749 sip->dss_npaths = dss_npaths;
750 sip->dss_paths = kmem_alloc(dss_npaths *
751 sizeof (rfs4_dss_path_t *), KM_SLEEP);
753 for (i = 0; i < dss_npaths; i++) {
754 sip->dss_paths[i] = rfs4_dss_newpath(sip, dss_paths[i], i);
757 mutex_enter(&rfs4_servinst_lock);
758 if (rfs4_cur_servinst != NULL) {
759 /* add to linked list */
760 sip->prev = rfs4_cur_servinst;
761 rfs4_cur_servinst->next = sip;
763 if (start_grace)
764 rfs4_grace_start(sip);
765 /* make the new instance "current" */
766 rfs4_cur_servinst = sip;
768 mutex_exit(&rfs4_servinst_lock);
772 * In future, we might add a rfs4_servinst_destroy(sip) but, for now, destroy
773 * all instances directly.
775 void
776 rfs4_servinst_destroy_all(void)
778 rfs4_servinst_t *sip, *prev, *current;
779 #ifdef DEBUG
780 int n = 0;
781 #endif
783 mutex_enter(&rfs4_servinst_lock);
784 ASSERT(rfs4_cur_servinst != NULL);
785 current = rfs4_cur_servinst;
786 rfs4_cur_servinst = NULL;
787 for (sip = current; sip != NULL; sip = prev) {
788 prev = sip->prev;
789 rw_destroy(&sip->rwlock);
790 if (sip->oldstate)
791 kmem_free(sip->oldstate, sizeof (rfs4_oldstate_t));
792 if (sip->dss_paths)
793 kmem_free(sip->dss_paths,
794 sip->dss_npaths * sizeof (rfs4_dss_path_t *));
795 kmem_free(sip, sizeof (rfs4_servinst_t));
796 #ifdef DEBUG
797 n++;
798 #endif
800 mutex_exit(&rfs4_servinst_lock);
804 * Assign the current server instance to a client_t.
805 * Should be called with cp->rc_dbe held.
807 void
808 rfs4_servinst_assign(rfs4_client_t *cp, rfs4_servinst_t *sip)
810 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
813 * The lock ensures that if the current instance is in the process
814 * of changing, we will see the new one.
816 mutex_enter(&rfs4_servinst_lock);
817 cp->rc_server_instance = sip;
818 mutex_exit(&rfs4_servinst_lock);
821 rfs4_servinst_t *
822 rfs4_servinst(rfs4_client_t *cp)
824 ASSERT(rfs4_dbe_refcnt(cp->rc_dbe) > 0);
826 return (cp->rc_server_instance);
829 /* ARGSUSED */
830 static void
831 nullfree(caddr_t resop)
836 * This is a fall-through for invalid or not implemented (yet) ops
838 /* ARGSUSED */
839 static void
840 rfs4_op_inval(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
841 struct compound_state *cs)
843 *cs->statusp = *((nfsstat4 *)&(resop)->nfs_resop4_u) = NFS4ERR_INVAL;
847 * Check if the security flavor, nfsnum, is in the flavor_list.
849 bool_t
850 in_flavor_list(int nfsnum, int *flavor_list, int count)
852 int i;
854 for (i = 0; i < count; i++) {
855 if (nfsnum == flavor_list[i])
856 return (TRUE);
858 return (FALSE);
862 * Used by rfs4_op_secinfo to get the security information from the
863 * export structure associated with the component.
865 /* ARGSUSED */
866 static nfsstat4
867 do_rfs4_op_secinfo(struct compound_state *cs, char *nm, SECINFO4res *resp)
869 int error, different_export = 0;
870 vnode_t *dvp, *vp, *tvp;
871 struct exportinfo *exi = NULL;
872 fid_t fid;
873 uint_t count, i;
874 secinfo4 *resok_val;
875 struct secinfo *secp;
876 seconfig_t *si;
877 bool_t did_traverse = FALSE;
878 int dotdot, walk;
880 dvp = cs->vp;
881 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
884 * If dotdotting, then need to check whether it's above the
885 * root of a filesystem, or above an export point.
887 if (dotdot) {
890 * If dotdotting at the root of a filesystem, then
891 * need to traverse back to the mounted-on filesystem
892 * and do the dotdot lookup there.
894 if (cs->vp->v_flag & VROOT) {
897 * If at the system root, then can
898 * go up no further.
900 if (VN_CMP(dvp, rootdir))
901 return (puterrno4(ENOENT));
904 * Traverse back to the mounted-on filesystem
906 dvp = untraverse(cs->vp);
909 * Set the different_export flag so we remember
910 * to pick up a new exportinfo entry for
911 * this new filesystem.
913 different_export = 1;
914 } else {
917 * If dotdotting above an export point then set
918 * the different_export to get new export info.
920 different_export = nfs_exported(cs->exi, cs->vp);
925 * Get the vnode for the component "nm".
927 error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cs->cr,
928 NULL, NULL, NULL);
929 if (error)
930 return (puterrno4(error));
933 * If the vnode is in a pseudo filesystem, or if the security flavor
934 * used in the request is valid but not an explicitly shared flavor,
935 * or the access bit indicates that this is a limited access,
936 * check whether this vnode is visible.
938 if (!different_export &&
939 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
940 cs->access & CS_ACCESS_LIMITED)) {
941 if (! nfs_visible(cs->exi, vp, &different_export)) {
942 VN_RELE(vp);
943 return (puterrno4(ENOENT));
948 * If it's a mountpoint, then traverse it.
950 if (vn_ismntpt(vp)) {
951 tvp = vp;
952 if ((error = traverse(&tvp)) != 0) {
953 VN_RELE(vp);
954 return (puterrno4(error));
956 /* remember that we had to traverse mountpoint */
957 did_traverse = TRUE;
958 vp = tvp;
959 different_export = 1;
960 } else if (vp->v_vfsp != dvp->v_vfsp) {
962 * If vp isn't a mountpoint and the vfs ptrs aren't the same,
963 * then vp is probably an LOFS object. We don't need the
964 * realvp, we just need to know that we might have crossed
965 * a server fs boundary and need to call checkexport4.
966 * (LOFS lookup hides server fs mountpoints, and actually calls
967 * traverse)
969 different_export = 1;
973 * Get the export information for it.
975 if (different_export) {
977 bzero(&fid, sizeof (fid));
978 fid.fid_len = MAXFIDSZ;
979 error = vop_fid_pseudo(vp, &fid);
980 if (error) {
981 VN_RELE(vp);
982 return (puterrno4(error));
985 if (dotdot)
986 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
987 else
988 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
990 if (exi == NULL) {
991 if (did_traverse == TRUE) {
993 * If this vnode is a mounted-on vnode,
994 * but the mounted-on file system is not
995 * exported, send back the secinfo for
996 * the exported node that the mounted-on
997 * vnode lives in.
999 exi = cs->exi;
1000 } else {
1001 VN_RELE(vp);
1002 return (puterrno4(EACCES));
1005 } else {
1006 exi = cs->exi;
1008 ASSERT(exi != NULL);
1012 * Create the secinfo result based on the security information
1013 * from the exportinfo structure (exi).
1015 * Return all flavors for a pseudo node.
1016 * For a real export node, return the flavor that the client
1017 * has access with.
1019 ASSERT(RW_LOCK_HELD(&exported_lock));
1020 if (PSEUDO(exi)) {
1021 count = exi->exi_export.ex_seccnt; /* total sec count */
1022 resok_val = kmem_alloc(count * sizeof (secinfo4), KM_SLEEP);
1023 secp = exi->exi_export.ex_secinfo;
1025 for (i = 0; i < count; i++) {
1026 si = &secp[i].s_secinfo;
1027 resok_val[i].flavor = si->sc_rpcnum;
1028 if (resok_val[i].flavor == RPCSEC_GSS) {
1029 rpcsec_gss_info *info;
1031 info = &resok_val[i].flavor_info;
1032 info->qop = si->sc_qop;
1033 info->service = (rpc_gss_svc_t)si->sc_service;
1035 /* get oid opaque data */
1036 info->oid.sec_oid4_len =
1037 si->sc_gss_mech_type->length;
1038 info->oid.sec_oid4_val = kmem_alloc(
1039 si->sc_gss_mech_type->length, KM_SLEEP);
1040 bcopy(
1041 si->sc_gss_mech_type->elements,
1042 info->oid.sec_oid4_val,
1043 info->oid.sec_oid4_len);
1046 resp->SECINFO4resok_len = count;
1047 resp->SECINFO4resok_val = resok_val;
1048 } else {
1049 int ret_cnt = 0, k = 0;
1050 int *flavor_list;
1052 count = exi->exi_export.ex_seccnt; /* total sec count */
1053 secp = exi->exi_export.ex_secinfo;
1055 flavor_list = kmem_alloc(count * sizeof (int), KM_SLEEP);
1056 /* find out which flavors to return */
1057 for (i = 0; i < count; i ++) {
1058 int access, flavor, perm;
1060 flavor = secp[i].s_secinfo.sc_nfsnum;
1061 perm = secp[i].s_flags;
1063 access = nfsauth4_secinfo_access(exi, cs->req,
1064 flavor, perm);
1066 if (! (access & NFSAUTH_DENIED) &&
1067 ! (access & NFSAUTH_WRONGSEC)) {
1068 flavor_list[ret_cnt] = flavor;
1069 ret_cnt++;
1073 /* Create the returning SECINFO value */
1074 resok_val = kmem_alloc(ret_cnt * sizeof (secinfo4), KM_SLEEP);
1076 for (i = 0; i < count; i++) {
1078 * If the flavor is in the flavor list,
1079 * fill in resok_val.
1081 si = &secp[i].s_secinfo;
1082 if (in_flavor_list(si->sc_nfsnum,
1083 flavor_list, ret_cnt)) {
1084 resok_val[k].flavor = si->sc_rpcnum;
1085 if (resok_val[k].flavor == RPCSEC_GSS) {
1086 rpcsec_gss_info *info;
1088 info = &resok_val[k].flavor_info;
1089 info->qop = si->sc_qop;
1090 info->service = (rpc_gss_svc_t)
1091 si->sc_service;
1093 /* get oid opaque data */
1094 info->oid.sec_oid4_len =
1095 si->sc_gss_mech_type->length;
1096 info->oid.sec_oid4_val = kmem_alloc(
1097 si->sc_gss_mech_type->length,
1098 KM_SLEEP);
1099 bcopy(si->sc_gss_mech_type->elements,
1100 info->oid.sec_oid4_val,
1101 info->oid.sec_oid4_len);
1103 k++;
1105 if (k >= ret_cnt)
1106 break;
1108 resp->SECINFO4resok_len = ret_cnt;
1109 resp->SECINFO4resok_val = resok_val;
1110 kmem_free(flavor_list, count * sizeof (int));
1113 VN_RELE(vp);
1114 return (NFS4_OK);
1118 * SECINFO (Operation 33): Obtain required security information on
1119 * the component name in the format of (security-mechanism-oid, qop, service)
1120 * triplets.
1122 /* ARGSUSED */
1123 static void
1124 rfs4_op_secinfo(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1125 struct compound_state *cs)
1127 SECINFO4args *args = &argop->nfs_argop4_u.opsecinfo;
1128 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1129 utf8string *utfnm = &args->name;
1130 uint_t len;
1131 char *nm;
1132 struct sockaddr *ca;
1133 char *name = NULL;
1135 DTRACE_NFSV4_2(op__secinfo__start, struct compound_state *, cs,
1136 SECINFO4args *, args);
1139 * Current file handle (cfh) should have been set before getting
1140 * into this function. If not, return error.
1142 if (cs->vp == NULL) {
1143 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1144 goto out;
1147 if (cs->vp->v_type != VDIR) {
1148 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1149 goto out;
1153 * Verify the component name. If failed, error out, but
1154 * do not error out if the component name is a "..".
1155 * SECINFO will return its parents secinfo data for SECINFO "..".
1157 if (!utf8_dir_verify(utfnm)) {
1158 if (utfnm->utf8string_len != 2 ||
1159 utfnm->utf8string_val[0] != '.' ||
1160 utfnm->utf8string_val[1] != '.') {
1161 *cs->statusp = resp->status = NFS4ERR_INVAL;
1162 goto out;
1166 nm = utf8_to_str(utfnm, &len, NULL);
1167 if (nm == NULL) {
1168 *cs->statusp = resp->status = NFS4ERR_INVAL;
1169 goto out;
1172 if (len > MAXNAMELEN) {
1173 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1174 kmem_free(nm, len);
1175 goto out;
1178 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1179 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1180 MAXPATHLEN + 1);
1182 if (name == NULL) {
1183 *cs->statusp = resp->status = NFS4ERR_INVAL;
1184 kmem_free(nm, len);
1185 goto out;
1189 *cs->statusp = resp->status = do_rfs4_op_secinfo(cs, name, resp);
1191 if (name != nm)
1192 kmem_free(name, MAXPATHLEN + 1);
1193 kmem_free(nm, len);
1195 out:
1196 DTRACE_NFSV4_2(op__secinfo__done, struct compound_state *, cs,
1197 SECINFO4res *, resp);
1201 * Free SECINFO result.
1203 /* ARGSUSED */
1204 static void
1205 rfs4_op_secinfo_free(nfs_resop4 *resop)
1207 SECINFO4res *resp = &resop->nfs_resop4_u.opsecinfo;
1208 int count, i;
1209 secinfo4 *resok_val;
1211 /* If this is not an Ok result, nothing to free. */
1212 if (resp->status != NFS4_OK) {
1213 return;
1216 count = resp->SECINFO4resok_len;
1217 resok_val = resp->SECINFO4resok_val;
1219 for (i = 0; i < count; i++) {
1220 if (resok_val[i].flavor == RPCSEC_GSS) {
1221 rpcsec_gss_info *info;
1223 info = &resok_val[i].flavor_info;
1224 kmem_free(info->oid.sec_oid4_val,
1225 info->oid.sec_oid4_len);
1228 kmem_free(resok_val, count * sizeof (secinfo4));
1229 resp->SECINFO4resok_len = 0;
1230 resp->SECINFO4resok_val = NULL;
1233 /* ARGSUSED */
1234 static void
1235 rfs4_op_access(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1236 struct compound_state *cs)
1238 ACCESS4args *args = &argop->nfs_argop4_u.opaccess;
1239 ACCESS4res *resp = &resop->nfs_resop4_u.opaccess;
1240 int error;
1241 vnode_t *vp;
1242 struct vattr va;
1243 int checkwriteperm;
1244 cred_t *cr = cs->cr;
1245 bslabel_t *clabel, *slabel;
1246 ts_label_t *tslabel;
1247 boolean_t admin_low_client;
1249 DTRACE_NFSV4_2(op__access__start, struct compound_state *, cs,
1250 ACCESS4args *, args);
1252 #if 0 /* XXX allow access even if !cs->access. Eventually only pseudo fs */
1253 if (cs->access == CS_ACCESS_DENIED) {
1254 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1255 goto out;
1257 #endif
1258 if (cs->vp == NULL) {
1259 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1260 goto out;
1263 ASSERT(cr != NULL);
1265 vp = cs->vp;
1268 * If the file system is exported read only, it is not appropriate
1269 * to check write permissions for regular files and directories.
1270 * Special files are interpreted by the client, so the underlying
1271 * permissions are sent back to the client for interpretation.
1273 if (rdonly4(cs->exi, cs->vp, req) &&
1274 (vp->v_type == VREG || vp->v_type == VDIR))
1275 checkwriteperm = 0;
1276 else
1277 checkwriteperm = 1;
1280 * XXX
1281 * We need the mode so that we can correctly determine access
1282 * permissions relative to a mandatory lock file. Access to
1283 * mandatory lock files is denied on the server, so it might
1284 * as well be reflected to the server during the open.
1286 va.va_mask = AT_MODE;
1287 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1288 if (error) {
1289 *cs->statusp = resp->status = puterrno4(error);
1290 goto out;
1292 resp->access = 0;
1293 resp->supported = 0;
1295 if (is_system_labeled()) {
1296 ASSERT(req->rq_label != NULL);
1297 clabel = req->rq_label;
1298 DTRACE_PROBE2(tx__rfs4__log__info__opaccess__clabel, char *,
1299 "got client label from request(1)",
1300 struct svc_req *, req);
1301 if (!blequal(&l_admin_low->tsl_label, clabel)) {
1302 if ((tslabel = nfs_getflabel(vp, cs->exi)) == NULL) {
1303 *cs->statusp = resp->status = puterrno4(EACCES);
1304 goto out;
1306 slabel = label2bslabel(tslabel);
1307 DTRACE_PROBE3(tx__rfs4__log__info__opaccess__slabel,
1308 char *, "got server label(1) for vp(2)",
1309 bslabel_t *, slabel, vnode_t *, vp);
1311 admin_low_client = B_FALSE;
1312 } else
1313 admin_low_client = B_TRUE;
1316 if (args->access & ACCESS4_READ) {
1317 error = VOP_ACCESS(vp, VREAD, 0, cr, NULL);
1318 if (!error && !MANDLOCK(vp, va.va_mode) &&
1319 (!is_system_labeled() || admin_low_client ||
1320 bldominates(clabel, slabel)))
1321 resp->access |= ACCESS4_READ;
1322 resp->supported |= ACCESS4_READ;
1324 if ((args->access & ACCESS4_LOOKUP) && vp->v_type == VDIR) {
1325 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1326 if (!error && (!is_system_labeled() || admin_low_client ||
1327 bldominates(clabel, slabel)))
1328 resp->access |= ACCESS4_LOOKUP;
1329 resp->supported |= ACCESS4_LOOKUP;
1331 if (checkwriteperm &&
1332 (args->access & (ACCESS4_MODIFY|ACCESS4_EXTEND))) {
1333 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1334 if (!error && !MANDLOCK(vp, va.va_mode) &&
1335 (!is_system_labeled() || admin_low_client ||
1336 blequal(clabel, slabel)))
1337 resp->access |=
1338 (args->access & (ACCESS4_MODIFY | ACCESS4_EXTEND));
1339 resp->supported |= (ACCESS4_MODIFY | ACCESS4_EXTEND);
1342 if (checkwriteperm &&
1343 (args->access & ACCESS4_DELETE) && vp->v_type == VDIR) {
1344 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
1345 if (!error && (!is_system_labeled() || admin_low_client ||
1346 blequal(clabel, slabel)))
1347 resp->access |= ACCESS4_DELETE;
1348 resp->supported |= ACCESS4_DELETE;
1350 if (args->access & ACCESS4_EXECUTE && vp->v_type != VDIR) {
1351 error = VOP_ACCESS(vp, VEXEC, 0, cr, NULL);
1352 if (!error && !MANDLOCK(vp, va.va_mode) &&
1353 (!is_system_labeled() || admin_low_client ||
1354 bldominates(clabel, slabel)))
1355 resp->access |= ACCESS4_EXECUTE;
1356 resp->supported |= ACCESS4_EXECUTE;
1359 if (is_system_labeled() && !admin_low_client)
1360 label_rele(tslabel);
1362 *cs->statusp = resp->status = NFS4_OK;
1363 out:
1364 DTRACE_NFSV4_2(op__access__done, struct compound_state *, cs,
1365 ACCESS4res *, resp);
1368 /* ARGSUSED */
1369 static void
1370 rfs4_op_commit(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1371 struct compound_state *cs)
1373 COMMIT4args *args = &argop->nfs_argop4_u.opcommit;
1374 COMMIT4res *resp = &resop->nfs_resop4_u.opcommit;
1375 int error;
1376 vnode_t *vp = cs->vp;
1377 cred_t *cr = cs->cr;
1378 vattr_t va;
1380 DTRACE_NFSV4_2(op__commit__start, struct compound_state *, cs,
1381 COMMIT4args *, args);
1383 if (vp == NULL) {
1384 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1385 goto out;
1387 if (cs->access == CS_ACCESS_DENIED) {
1388 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1389 goto out;
1392 if (args->offset + args->count < args->offset) {
1393 *cs->statusp = resp->status = NFS4ERR_INVAL;
1394 goto out;
1397 va.va_mask = AT_UID;
1398 error = VOP_GETATTR(vp, &va, 0, cr, NULL);
1401 * If we can't get the attributes, then we can't do the
1402 * right access checking. So, we'll fail the request.
1404 if (error) {
1405 *cs->statusp = resp->status = puterrno4(error);
1406 goto out;
1408 if (rdonly4(cs->exi, cs->vp, req)) {
1409 *cs->statusp = resp->status = NFS4ERR_ROFS;
1410 goto out;
1413 if (vp->v_type != VREG) {
1414 if (vp->v_type == VDIR)
1415 resp->status = NFS4ERR_ISDIR;
1416 else
1417 resp->status = NFS4ERR_INVAL;
1418 *cs->statusp = resp->status;
1419 goto out;
1422 if (crgetuid(cr) != va.va_uid &&
1423 (error = VOP_ACCESS(vp, VWRITE, 0, cs->cr, NULL))) {
1424 *cs->statusp = resp->status = puterrno4(error);
1425 goto out;
1428 error = VOP_FSYNC(vp, FSYNC, cr, NULL);
1430 if (error) {
1431 *cs->statusp = resp->status = puterrno4(error);
1432 goto out;
1435 *cs->statusp = resp->status = NFS4_OK;
1436 resp->writeverf = Write4verf;
1437 out:
1438 DTRACE_NFSV4_2(op__commit__done, struct compound_state *, cs,
1439 COMMIT4res *, resp);
1443 * rfs4_op_mknod is called from rfs4_op_create after all initial verification
1444 * was completed. It does the nfsv4 create for special files.
1446 /* ARGSUSED */
1447 static vnode_t *
1448 do_rfs4_op_mknod(CREATE4args *args, CREATE4res *resp, struct svc_req *req,
1449 struct compound_state *cs, vattr_t *vap, char *nm)
1451 int error;
1452 cred_t *cr = cs->cr;
1453 vnode_t *dvp = cs->vp;
1454 vnode_t *vp = NULL;
1455 int mode;
1456 enum vcexcl excl;
1458 switch (args->type) {
1459 case NF4CHR:
1460 case NF4BLK:
1461 if (secpolicy_sys_devices(cr) != 0) {
1462 *cs->statusp = resp->status = NFS4ERR_PERM;
1463 return (NULL);
1465 if (args->type == NF4CHR)
1466 vap->va_type = VCHR;
1467 else
1468 vap->va_type = VBLK;
1469 vap->va_rdev = makedevice(args->ftype4_u.devdata.specdata1,
1470 args->ftype4_u.devdata.specdata2);
1471 vap->va_mask |= AT_RDEV;
1472 break;
1473 case NF4SOCK:
1474 vap->va_type = VSOCK;
1475 break;
1476 case NF4FIFO:
1477 vap->va_type = VFIFO;
1478 break;
1479 default:
1480 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1481 return (NULL);
1485 * Must specify the mode.
1487 if (!(vap->va_mask & AT_MODE)) {
1488 *cs->statusp = resp->status = NFS4ERR_INVAL;
1489 return (NULL);
1492 excl = EXCL;
1494 mode = 0;
1496 error = VOP_CREATE(dvp, nm, vap, excl, mode, &vp, cr, 0, NULL, NULL);
1497 if (error) {
1498 *cs->statusp = resp->status = puterrno4(error);
1499 return (NULL);
1501 return (vp);
1505 * nfsv4 create is used to create non-regular files. For regular files,
1506 * use nfsv4 open.
1508 /* ARGSUSED */
1509 static void
1510 rfs4_op_create(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1511 struct compound_state *cs)
1513 CREATE4args *args = &argop->nfs_argop4_u.opcreate;
1514 CREATE4res *resp = &resop->nfs_resop4_u.opcreate;
1515 int error;
1516 struct vattr bva, iva, iva2, ava, *vap;
1517 cred_t *cr = cs->cr;
1518 vnode_t *dvp = cs->vp;
1519 vnode_t *vp = NULL;
1520 vnode_t *realvp;
1521 char *nm, *lnm;
1522 uint_t len, llen;
1523 int syncval = 0;
1524 struct nfs4_svgetit_arg sarg;
1525 struct nfs4_ntov_table ntov;
1526 struct statvfs64 sb;
1527 nfsstat4 status;
1528 struct sockaddr *ca;
1529 char *name = NULL;
1530 char *lname = NULL;
1532 DTRACE_NFSV4_2(op__create__start, struct compound_state *, cs,
1533 CREATE4args *, args);
1535 resp->attrset = 0;
1537 if (dvp == NULL) {
1538 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
1539 goto out;
1543 * If there is an unshared filesystem mounted on this vnode,
1544 * do not allow to create an object in this directory.
1546 if (vn_ismntpt(dvp)) {
1547 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1548 goto out;
1551 /* Verify that type is correct */
1552 switch (args->type) {
1553 case NF4LNK:
1554 case NF4BLK:
1555 case NF4CHR:
1556 case NF4SOCK:
1557 case NF4FIFO:
1558 case NF4DIR:
1559 break;
1560 default:
1561 *cs->statusp = resp->status = NFS4ERR_BADTYPE;
1562 goto out;
1565 if (cs->access == CS_ACCESS_DENIED) {
1566 *cs->statusp = resp->status = NFS4ERR_ACCESS;
1567 goto out;
1569 if (dvp->v_type != VDIR) {
1570 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
1571 goto out;
1573 if (!utf8_dir_verify(&args->objname)) {
1574 *cs->statusp = resp->status = NFS4ERR_INVAL;
1575 goto out;
1578 if (rdonly4(cs->exi, cs->vp, req)) {
1579 *cs->statusp = resp->status = NFS4ERR_ROFS;
1580 goto out;
1584 * Name of newly created object
1586 nm = utf8_to_fn(&args->objname, &len, NULL);
1587 if (nm == NULL) {
1588 *cs->statusp = resp->status = NFS4ERR_INVAL;
1589 goto out;
1592 if (len > MAXNAMELEN) {
1593 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1594 kmem_free(nm, len);
1595 goto out;
1598 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
1599 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
1600 MAXPATHLEN + 1);
1602 if (name == NULL) {
1603 *cs->statusp = resp->status = NFS4ERR_INVAL;
1604 kmem_free(nm, len);
1605 goto out;
1608 resp->attrset = 0;
1610 sarg.sbp = &sb;
1611 sarg.is_referral = B_FALSE;
1612 nfs4_ntov_table_init(&ntov);
1614 status = do_rfs4_set_attrs(&resp->attrset,
1615 &args->createattrs, cs, &sarg, &ntov, NFS4ATTR_SETIT);
1617 if (sarg.vap->va_mask == 0 && status == NFS4_OK)
1618 status = NFS4ERR_INVAL;
1620 if (status != NFS4_OK) {
1621 *cs->statusp = resp->status = status;
1622 if (name != nm)
1623 kmem_free(name, MAXPATHLEN + 1);
1624 kmem_free(nm, len);
1625 nfs4_ntov_table_free(&ntov, &sarg);
1626 resp->attrset = 0;
1627 goto out;
1630 /* Get "before" change value */
1631 bva.va_mask = AT_CTIME|AT_SEQ|AT_MODE;
1632 error = VOP_GETATTR(dvp, &bva, 0, cr, NULL);
1633 if (error) {
1634 *cs->statusp = resp->status = puterrno4(error);
1635 if (name != nm)
1636 kmem_free(name, MAXPATHLEN + 1);
1637 kmem_free(nm, len);
1638 nfs4_ntov_table_free(&ntov, &sarg);
1639 resp->attrset = 0;
1640 goto out;
1642 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bva.va_ctime)
1644 vap = sarg.vap;
1647 * Set the default initial values for attributes when the parent
1648 * directory does not have the VSUID/VSGID bit set and they have
1649 * not been specified in createattrs.
1651 if (!(bva.va_mode & VSUID) && (vap->va_mask & AT_UID) == 0) {
1652 vap->va_uid = crgetuid(cr);
1653 vap->va_mask |= AT_UID;
1655 if (!(bva.va_mode & VSGID) && (vap->va_mask & AT_GID) == 0) {
1656 vap->va_gid = crgetgid(cr);
1657 vap->va_mask |= AT_GID;
1660 vap->va_mask |= AT_TYPE;
1661 switch (args->type) {
1662 case NF4DIR:
1663 vap->va_type = VDIR;
1664 if ((vap->va_mask & AT_MODE) == 0) {
1665 vap->va_mode = 0700; /* default: owner rwx only */
1666 vap->va_mask |= AT_MODE;
1668 error = VOP_MKDIR(dvp, name, vap, &vp, cr, NULL, 0, NULL);
1669 if (error)
1670 break;
1673 * Get the initial "after" sequence number, if it fails,
1674 * set to zero
1676 iva.va_mask = AT_SEQ;
1677 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1678 iva.va_seq = 0;
1679 break;
1680 case NF4LNK:
1681 vap->va_type = VLNK;
1682 if ((vap->va_mask & AT_MODE) == 0) {
1683 vap->va_mode = 0700; /* default: owner rwx only */
1684 vap->va_mask |= AT_MODE;
1688 * symlink names must be treated as data
1690 lnm = utf8_to_str(&args->ftype4_u.linkdata, &llen, NULL);
1692 if (lnm == NULL) {
1693 *cs->statusp = resp->status = NFS4ERR_INVAL;
1694 if (name != nm)
1695 kmem_free(name, MAXPATHLEN + 1);
1696 kmem_free(nm, len);
1697 nfs4_ntov_table_free(&ntov, &sarg);
1698 resp->attrset = 0;
1699 goto out;
1702 if (llen > MAXPATHLEN) {
1703 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
1704 if (name != nm)
1705 kmem_free(name, MAXPATHLEN + 1);
1706 kmem_free(nm, len);
1707 kmem_free(lnm, llen);
1708 nfs4_ntov_table_free(&ntov, &sarg);
1709 resp->attrset = 0;
1710 goto out;
1713 lname = nfscmd_convname(ca, cs->exi, lnm,
1714 NFSCMD_CONV_INBOUND, MAXPATHLEN + 1);
1716 if (lname == NULL) {
1717 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
1718 if (name != nm)
1719 kmem_free(name, MAXPATHLEN + 1);
1720 kmem_free(nm, len);
1721 kmem_free(lnm, llen);
1722 nfs4_ntov_table_free(&ntov, &sarg);
1723 resp->attrset = 0;
1724 goto out;
1727 error = VOP_SYMLINK(dvp, name, vap, lname, cr, NULL, 0);
1728 if (lname != lnm)
1729 kmem_free(lname, MAXPATHLEN + 1);
1730 kmem_free(lnm, llen);
1731 if (error)
1732 break;
1735 * Get the initial "after" sequence number, if it fails,
1736 * set to zero
1738 iva.va_mask = AT_SEQ;
1739 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1740 iva.va_seq = 0;
1742 error = VOP_LOOKUP(dvp, name, &vp, NULL, 0, NULL, cr,
1743 NULL, NULL, NULL);
1744 if (error)
1745 break;
1748 * va_seq is not safe over VOP calls, check it again
1749 * if it has changed zero out iva to force atomic = FALSE.
1751 iva2.va_mask = AT_SEQ;
1752 if (VOP_GETATTR(dvp, &iva2, 0, cs->cr, NULL) ||
1753 iva2.va_seq != iva.va_seq)
1754 iva.va_seq = 0;
1755 break;
1756 default:
1758 * probably a special file.
1760 if ((vap->va_mask & AT_MODE) == 0) {
1761 vap->va_mode = 0600; /* default: owner rw only */
1762 vap->va_mask |= AT_MODE;
1764 syncval = FNODSYNC;
1766 * We know this will only generate one VOP call
1768 vp = do_rfs4_op_mknod(args, resp, req, cs, vap, name);
1770 if (vp == NULL) {
1771 if (name != nm)
1772 kmem_free(name, MAXPATHLEN + 1);
1773 kmem_free(nm, len);
1774 nfs4_ntov_table_free(&ntov, &sarg);
1775 resp->attrset = 0;
1776 goto out;
1780 * Get the initial "after" sequence number, if it fails,
1781 * set to zero
1783 iva.va_mask = AT_SEQ;
1784 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL))
1785 iva.va_seq = 0;
1787 break;
1789 if (name != nm)
1790 kmem_free(name, MAXPATHLEN + 1);
1791 kmem_free(nm, len);
1793 if (error) {
1794 *cs->statusp = resp->status = puterrno4(error);
1798 * Force modified data and metadata out to stable storage.
1800 (void) VOP_FSYNC(dvp, 0, cr, NULL);
1802 if (resp->status != NFS4_OK) {
1803 if (vp != NULL)
1804 VN_RELE(vp);
1805 nfs4_ntov_table_free(&ntov, &sarg);
1806 resp->attrset = 0;
1807 goto out;
1811 * Finish setup of cinfo response, "before" value already set.
1812 * Get "after" change value, if it fails, simply return the
1813 * before value.
1815 ava.va_mask = AT_CTIME|AT_SEQ;
1816 if (VOP_GETATTR(dvp, &ava, 0, cr, NULL)) {
1817 ava.va_ctime = bva.va_ctime;
1818 ava.va_seq = 0;
1820 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, ava.va_ctime);
1823 * True verification that object was created with correct
1824 * attrs is impossible. The attrs could have been changed
1825 * immediately after object creation. If attributes did
1826 * not verify, the only recourse for the server is to
1827 * destroy the object. Maybe if some attrs (like gid)
1828 * are set incorrectly, the object should be destroyed;
1829 * however, seems bad as a default policy. Do we really
1830 * want to destroy an object over one of the times not
1831 * verifying correctly? For these reasons, the server
1832 * currently sets bits in attrset for createattrs
1833 * that were set; however, no verification is done.
1835 * vmask_to_nmask accounts for vattr bits set on create
1836 * [do_rfs4_set_attrs() only sets resp bits for
1837 * non-vattr/vfs bits.]
1838 * Mask off any bits set by default so as not to return
1839 * more attrset bits than were requested in createattrs
1841 nfs4_vmask_to_nmask(sarg.vap->va_mask, &resp->attrset);
1842 resp->attrset &= args->createattrs.attrmask;
1843 nfs4_ntov_table_free(&ntov, &sarg);
1845 error = makefh4(&cs->fh, vp, cs->exi);
1846 if (error) {
1847 *cs->statusp = resp->status = puterrno4(error);
1851 * The cinfo.atomic = TRUE only if we got no errors, we have
1852 * non-zero va_seq's, and it has incremented by exactly one
1853 * during the creation and it didn't change during the VOP_LOOKUP
1854 * or VOP_FSYNC.
1856 if (!error && bva.va_seq && iva.va_seq && ava.va_seq &&
1857 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
1858 resp->cinfo.atomic = TRUE;
1859 else
1860 resp->cinfo.atomic = FALSE;
1863 * Force modified metadata out to stable storage.
1865 * if a underlying vp exists, pass it to VOP_FSYNC
1867 if (VOP_REALVP(vp, &realvp, NULL) == 0)
1868 (void) VOP_FSYNC(realvp, syncval, cr, NULL);
1869 else
1870 (void) VOP_FSYNC(vp, syncval, cr, NULL);
1872 if (resp->status != NFS4_OK) {
1873 VN_RELE(vp);
1874 goto out;
1876 if (cs->vp)
1877 VN_RELE(cs->vp);
1879 cs->vp = vp;
1880 *cs->statusp = resp->status = NFS4_OK;
1881 out:
1882 DTRACE_NFSV4_2(op__create__done, struct compound_state *, cs,
1883 CREATE4res *, resp);
1886 /*ARGSUSED*/
1887 static void
1888 rfs4_op_delegpurge(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1889 struct compound_state *cs)
1891 DTRACE_NFSV4_2(op__delegpurge__start, struct compound_state *, cs,
1892 DELEGPURGE4args *, &argop->nfs_argop4_u.opdelegpurge);
1894 rfs4_op_inval(argop, resop, req, cs);
1896 DTRACE_NFSV4_2(op__delegpurge__done, struct compound_state *, cs,
1897 DELEGPURGE4res *, &resop->nfs_resop4_u.opdelegpurge);
1900 /*ARGSUSED*/
1901 static void
1902 rfs4_op_delegreturn(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
1903 struct compound_state *cs)
1905 DELEGRETURN4args *args = &argop->nfs_argop4_u.opdelegreturn;
1906 DELEGRETURN4res *resp = &resop->nfs_resop4_u.opdelegreturn;
1907 rfs4_deleg_state_t *dsp;
1908 nfsstat4 status;
1910 DTRACE_NFSV4_2(op__delegreturn__start, struct compound_state *, cs,
1911 DELEGRETURN4args *, args);
1913 status = rfs4_get_deleg_state(&args->deleg_stateid, &dsp);
1914 resp->status = *cs->statusp = status;
1915 if (status != NFS4_OK)
1916 goto out;
1918 /* Ensure specified filehandle matches */
1919 if (cs->vp != dsp->rds_finfo->rf_vp) {
1920 resp->status = *cs->statusp = NFS4ERR_BAD_STATEID;
1921 } else
1922 rfs4_return_deleg(dsp, FALSE);
1924 rfs4_update_lease(dsp->rds_client);
1926 rfs4_deleg_state_rele(dsp);
1927 out:
1928 DTRACE_NFSV4_2(op__delegreturn__done, struct compound_state *, cs,
1929 DELEGRETURN4res *, resp);
1933 * Check to see if a given "flavor" is an explicitly shared flavor.
1934 * The assumption of this routine is the "flavor" is already a valid
1935 * flavor in the secinfo list of "exi".
1937 * e.g.
1938 * # share -o sec=flavor1 /export
1939 * # share -o sec=flavor2 /export/home
1941 * flavor2 is not an explicitly shared flavor for /export,
1942 * however it is in the secinfo list for /export thru the
1943 * server namespace setup.
1946 is_exported_sec(int flavor, struct exportinfo *exi)
1948 int i;
1949 struct secinfo *sp;
1951 sp = exi->exi_export.ex_secinfo;
1952 for (i = 0; i < exi->exi_export.ex_seccnt; i++) {
1953 if (flavor == sp[i].s_secinfo.sc_nfsnum ||
1954 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE) {
1955 return (SEC_REF_EXPORTED(&sp[i]));
1959 /* Should not reach this point based on the assumption */
1960 return (0);
1964 * Check if the security flavor used in the request matches what is
1965 * required at the export point or at the root pseudo node (exi_root).
1967 * returns 1 if there's a match or if exported with AUTH_NONE; 0 otherwise.
1970 static int
1971 secinfo_match_or_authnone(struct compound_state *cs)
1973 int i;
1974 struct secinfo *sp;
1977 * Check cs->nfsflavor (from the request) against
1978 * the current export data in cs->exi.
1980 sp = cs->exi->exi_export.ex_secinfo;
1981 for (i = 0; i < cs->exi->exi_export.ex_seccnt; i++) {
1982 if (cs->nfsflavor == sp[i].s_secinfo.sc_nfsnum ||
1983 sp[i].s_secinfo.sc_nfsnum == AUTH_NONE)
1984 return (1);
1987 return (0);
1991 * Check the access authority for the client and return the correct error.
1993 nfsstat4
1994 call_checkauth4(struct compound_state *cs, struct svc_req *req)
1996 int authres;
1999 * First, check if the security flavor used in the request
2000 * are among the flavors set in the server namespace.
2002 if (!secinfo_match_or_authnone(cs)) {
2003 *cs->statusp = NFS4ERR_WRONGSEC;
2004 return (*cs->statusp);
2007 authres = checkauth4(cs, req);
2009 if (authres > 0) {
2010 *cs->statusp = NFS4_OK;
2011 if (! (cs->access & CS_ACCESS_LIMITED))
2012 cs->access = CS_ACCESS_OK;
2013 } else if (authres == 0) {
2014 *cs->statusp = NFS4ERR_ACCESS;
2015 } else if (authres == -2) {
2016 *cs->statusp = NFS4ERR_WRONGSEC;
2017 } else {
2018 *cs->statusp = NFS4ERR_DELAY;
2020 return (*cs->statusp);
2024 * bitmap4_to_attrmask is called by getattr and readdir.
2025 * It sets up the vattr mask and determines whether vfsstat call is needed
2026 * based on the input bitmap.
2027 * Returns nfsv4 status.
2029 static nfsstat4
2030 bitmap4_to_attrmask(bitmap4 breq, struct nfs4_svgetit_arg *sargp)
2032 int i;
2033 uint_t va_mask;
2034 struct statvfs64 *sbp = sargp->sbp;
2036 sargp->sbp = NULL;
2037 sargp->flag = 0;
2038 sargp->rdattr_error = NFS4_OK;
2039 sargp->mntdfid_set = FALSE;
2040 if (sargp->cs->vp)
2041 sargp->xattr = get_fh4_flag(&sargp->cs->fh,
2042 FH4_ATTRDIR | FH4_NAMEDATTR);
2043 else
2044 sargp->xattr = 0;
2047 * Set rdattr_error_req to true if return error per
2048 * failed entry rather than fail the readdir.
2050 if (breq & FATTR4_RDATTR_ERROR_MASK)
2051 sargp->rdattr_error_req = 1;
2052 else
2053 sargp->rdattr_error_req = 0;
2056 * generate the va_mask
2057 * Handle the easy cases first
2059 switch (breq) {
2060 case NFS4_NTOV_ATTR_MASK:
2061 sargp->vap->va_mask = NFS4_NTOV_ATTR_AT_MASK;
2062 return (NFS4_OK);
2064 case NFS4_FS_ATTR_MASK:
2065 sargp->vap->va_mask = NFS4_FS_ATTR_AT_MASK;
2066 sargp->sbp = sbp;
2067 return (NFS4_OK);
2069 case NFS4_NTOV_ATTR_CACHE_MASK:
2070 sargp->vap->va_mask = NFS4_NTOV_ATTR_CACHE_AT_MASK;
2071 return (NFS4_OK);
2073 case FATTR4_LEASE_TIME_MASK:
2074 sargp->vap->va_mask = 0;
2075 return (NFS4_OK);
2077 default:
2078 va_mask = 0;
2079 for (i = 0; i < nfs4_ntov_map_size; i++) {
2080 if ((breq & nfs4_ntov_map[i].fbit) &&
2081 nfs4_ntov_map[i].vbit)
2082 va_mask |= nfs4_ntov_map[i].vbit;
2086 * Check is vfsstat is needed
2088 if (breq & NFS4_FS_ATTR_MASK)
2089 sargp->sbp = sbp;
2091 sargp->vap->va_mask = va_mask;
2092 return (NFS4_OK);
2094 /* NOTREACHED */
2098 * bitmap4_get_sysattrs is called by getattr and readdir.
2099 * It calls both VOP_GETATTR and VFS_STATVFS calls to get the attrs.
2100 * Returns nfsv4 status.
2102 static nfsstat4
2103 bitmap4_get_sysattrs(struct nfs4_svgetit_arg *sargp)
2105 int error;
2106 struct compound_state *cs = sargp->cs;
2107 vnode_t *vp = cs->vp;
2109 if (sargp->sbp != NULL) {
2110 if (error = VFS_STATVFS(vp->v_vfsp, sargp->sbp)) {
2111 sargp->sbp = NULL; /* to identify error */
2112 return (puterrno4(error));
2116 return (rfs4_vop_getattr(vp, sargp->vap, 0, cs->cr));
2119 static void
2120 nfs4_ntov_table_init(struct nfs4_ntov_table *ntovp)
2122 ntovp->na = kmem_zalloc(sizeof (union nfs4_attr_u) * nfs4_ntov_map_size,
2123 KM_SLEEP);
2124 ntovp->attrcnt = 0;
2125 ntovp->vfsstat = FALSE;
2128 static void
2129 nfs4_ntov_table_free(struct nfs4_ntov_table *ntovp,
2130 struct nfs4_svgetit_arg *sargp)
2132 int i;
2133 union nfs4_attr_u *na;
2134 uint8_t *amap;
2137 * XXX Should do the same checks for whether the bit is set
2139 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2140 i < ntovp->attrcnt; i++, na++, amap++) {
2141 (void) (*nfs4_ntov_map[*amap].sv_getit)(
2142 NFS4ATTR_FREEIT, sargp, na);
2144 if ((sargp->op == NFS4ATTR_SETIT) || (sargp->op == NFS4ATTR_VERIT)) {
2146 * xdr_free for getattr will be done later
2148 for (i = 0, na = ntovp->na, amap = ntovp->amap;
2149 i < ntovp->attrcnt; i++, na++, amap++) {
2150 xdr_free(nfs4_ntov_map[*amap].xfunc, (caddr_t)na);
2153 kmem_free(ntovp->na, sizeof (union nfs4_attr_u) * nfs4_ntov_map_size);
2157 * do_rfs4_op_getattr gets the system attrs and converts into fattr4.
2159 static nfsstat4
2160 do_rfs4_op_getattr(bitmap4 breq, fattr4 *fattrp,
2161 struct nfs4_svgetit_arg *sargp)
2163 int error = 0;
2164 int i, k;
2165 struct nfs4_ntov_table ntov;
2166 XDR xdr;
2167 ulong_t xdr_size;
2168 char *xdr_attrs;
2169 nfsstat4 status = NFS4_OK;
2170 nfsstat4 prev_rdattr_error = sargp->rdattr_error;
2171 union nfs4_attr_u *na;
2172 uint8_t *amap;
2174 sargp->op = NFS4ATTR_GETIT;
2175 sargp->flag = 0;
2177 fattrp->attrmask = 0;
2178 /* if no bits requested, then return empty fattr4 */
2179 if (breq == 0) {
2180 fattrp->attrlist4_len = 0;
2181 fattrp->attrlist4 = NULL;
2182 return (NFS4_OK);
2186 * return NFS4ERR_INVAL when client requests write-only attrs
2188 if (breq & (FATTR4_TIME_ACCESS_SET_MASK | FATTR4_TIME_MODIFY_SET_MASK))
2189 return (NFS4ERR_INVAL);
2191 nfs4_ntov_table_init(&ntov);
2192 na = ntov.na;
2193 amap = ntov.amap;
2196 * Now loop to get or verify the attrs
2198 for (i = 0; i < nfs4_ntov_map_size; i++) {
2199 if (breq & nfs4_ntov_map[i].fbit) {
2200 if ((*nfs4_ntov_map[i].sv_getit)(
2201 NFS4ATTR_SUPPORTED, sargp, NULL) == 0) {
2203 error = (*nfs4_ntov_map[i].sv_getit)(
2204 NFS4ATTR_GETIT, sargp, na);
2207 * Possible error values:
2208 * >0 if sv_getit failed to
2209 * get the attr; 0 if succeeded;
2210 * <0 if rdattr_error and the
2211 * attribute cannot be returned.
2213 if (error && !(sargp->rdattr_error_req))
2214 goto done;
2216 * If error then just for entry
2218 if (error == 0) {
2219 fattrp->attrmask |=
2220 nfs4_ntov_map[i].fbit;
2221 *amap++ =
2222 (uint8_t)nfs4_ntov_map[i].nval;
2223 na++;
2224 (ntov.attrcnt)++;
2225 } else if ((error > 0) &&
2226 (sargp->rdattr_error == NFS4_OK)) {
2227 sargp->rdattr_error = puterrno4(error);
2229 error = 0;
2235 * If rdattr_error was set after the return value for it was assigned,
2236 * update it.
2238 if (prev_rdattr_error != sargp->rdattr_error) {
2239 na = ntov.na;
2240 amap = ntov.amap;
2241 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2242 k = *amap;
2243 if (k < FATTR4_RDATTR_ERROR) {
2244 continue;
2246 if ((k == FATTR4_RDATTR_ERROR) &&
2247 ((*nfs4_ntov_map[k].sv_getit)(
2248 NFS4ATTR_SUPPORTED, sargp, NULL) == 0)) {
2250 (void) (*nfs4_ntov_map[k].sv_getit)(
2251 NFS4ATTR_GETIT, sargp, na);
2253 break;
2257 xdr_size = 0;
2258 na = ntov.na;
2259 amap = ntov.amap;
2260 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2261 xdr_size += xdr_sizeof(nfs4_ntov_map[*amap].xfunc, na);
2264 fattrp->attrlist4_len = xdr_size;
2265 if (xdr_size) {
2266 /* freed by rfs4_op_getattr_free() */
2267 fattrp->attrlist4 = xdr_attrs = kmem_zalloc(xdr_size, KM_SLEEP);
2269 xdrmem_create(&xdr, xdr_attrs, xdr_size, XDR_ENCODE);
2271 na = ntov.na;
2272 amap = ntov.amap;
2273 for (i = 0; i < ntov.attrcnt; i++, na++, amap++) {
2274 if (!(*nfs4_ntov_map[*amap].xfunc)(&xdr, na)) {
2275 DTRACE_PROBE1(nfss__e__getattr4_encfail,
2276 int, *amap);
2277 status = NFS4ERR_SERVERFAULT;
2278 break;
2281 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
2282 } else {
2283 fattrp->attrlist4 = NULL;
2285 done:
2287 nfs4_ntov_table_free(&ntov, sargp);
2289 if (error != 0)
2290 status = puterrno4(error);
2292 return (status);
2295 /* ARGSUSED */
2296 static void
2297 rfs4_op_getattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2298 struct compound_state *cs)
2300 GETATTR4args *args = &argop->nfs_argop4_u.opgetattr;
2301 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2302 struct nfs4_svgetit_arg sarg;
2303 struct statvfs64 sb;
2304 nfsstat4 status;
2306 DTRACE_NFSV4_2(op__getattr__start, struct compound_state *, cs,
2307 GETATTR4args *, args);
2309 if (cs->vp == NULL) {
2310 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2311 goto out;
2314 if (cs->access == CS_ACCESS_DENIED) {
2315 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2316 goto out;
2319 sarg.sbp = &sb;
2320 sarg.cs = cs;
2321 sarg.is_referral = B_FALSE;
2323 status = bitmap4_to_attrmask(args->attr_request, &sarg);
2324 if (status == NFS4_OK) {
2326 status = bitmap4_get_sysattrs(&sarg);
2327 if (status == NFS4_OK) {
2329 /* Is this a referral? */
2330 if (vn_is_nfs_reparse(cs->vp, cs->cr)) {
2331 /* Older V4 Solaris client sees a link */
2332 if (client_is_downrev(req))
2333 sarg.vap->va_type = VLNK;
2334 else
2335 sarg.is_referral = B_TRUE;
2338 status = do_rfs4_op_getattr(args->attr_request,
2339 &resp->obj_attributes, &sarg);
2342 *cs->statusp = resp->status = status;
2343 out:
2344 DTRACE_NFSV4_2(op__getattr__done, struct compound_state *, cs,
2345 GETATTR4res *, resp);
2348 static void
2349 rfs4_op_getattr_free(nfs_resop4 *resop)
2351 GETATTR4res *resp = &resop->nfs_resop4_u.opgetattr;
2353 nfs4_fattr4_free(&resp->obj_attributes);
2356 /* ARGSUSED */
2357 static void
2358 rfs4_op_getfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2359 struct compound_state *cs)
2361 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2363 DTRACE_NFSV4_1(op__getfh__start, struct compound_state *, cs);
2365 if (cs->vp == NULL) {
2366 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2367 goto out;
2369 if (cs->access == CS_ACCESS_DENIED) {
2370 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2371 goto out;
2374 /* check for reparse point at the share point */
2375 if (cs->exi->exi_moved || vn_is_nfs_reparse(cs->exi->exi_vp, cs->cr)) {
2376 /* it's all bad */
2377 cs->exi->exi_moved = 1;
2378 *cs->statusp = resp->status = NFS4ERR_MOVED;
2379 DTRACE_PROBE2(nfs4serv__func__referral__shared__moved,
2380 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2381 return;
2384 /* check for reparse point at vp */
2385 if (vn_is_nfs_reparse(cs->vp, cs->cr) && !client_is_downrev(req)) {
2386 /* it's not all bad */
2387 *cs->statusp = resp->status = NFS4ERR_MOVED;
2388 DTRACE_PROBE2(nfs4serv__func__referral__moved,
2389 vnode_t *, cs->vp, char *, "rfs4_op_getfh");
2390 return;
2393 resp->object.nfs_fh4_val =
2394 kmem_alloc(cs->fh.nfs_fh4_len, KM_SLEEP);
2395 nfs_fh4_copy(&cs->fh, &resp->object);
2396 *cs->statusp = resp->status = NFS4_OK;
2397 out:
2398 DTRACE_NFSV4_2(op__getfh__done, struct compound_state *, cs,
2399 GETFH4res *, resp);
2402 static void
2403 rfs4_op_getfh_free(nfs_resop4 *resop)
2405 GETFH4res *resp = &resop->nfs_resop4_u.opgetfh;
2407 if (resp->status == NFS4_OK &&
2408 resp->object.nfs_fh4_val != NULL) {
2409 kmem_free(resp->object.nfs_fh4_val, resp->object.nfs_fh4_len);
2410 resp->object.nfs_fh4_val = NULL;
2411 resp->object.nfs_fh4_len = 0;
2416 * illegal: args: void
2417 * res : status (NFS4ERR_OP_ILLEGAL)
2419 /* ARGSUSED */
2420 static void
2421 rfs4_op_illegal(nfs_argop4 *argop, nfs_resop4 *resop,
2422 struct svc_req *req, struct compound_state *cs)
2424 ILLEGAL4res *resp = &resop->nfs_resop4_u.opillegal;
2426 resop->resop = OP_ILLEGAL;
2427 *cs->statusp = resp->status = NFS4ERR_OP_ILLEGAL;
2431 * link: args: SAVED_FH: file, CURRENT_FH: target directory
2432 * res: status. If success - CURRENT_FH unchanged, return change_info
2434 /* ARGSUSED */
2435 static void
2436 rfs4_op_link(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2437 struct compound_state *cs)
2439 LINK4args *args = &argop->nfs_argop4_u.oplink;
2440 LINK4res *resp = &resop->nfs_resop4_u.oplink;
2441 int error;
2442 vnode_t *vp;
2443 vnode_t *dvp;
2444 struct vattr bdva, idva, adva;
2445 char *nm;
2446 uint_t len;
2447 struct sockaddr *ca;
2448 char *name = NULL;
2450 DTRACE_NFSV4_2(op__link__start, struct compound_state *, cs,
2451 LINK4args *, args);
2453 /* SAVED_FH: source object */
2454 vp = cs->saved_vp;
2455 if (vp == NULL) {
2456 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2457 goto out;
2460 /* CURRENT_FH: target directory */
2461 dvp = cs->vp;
2462 if (dvp == NULL) {
2463 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2464 goto out;
2468 * If there is a non-shared filesystem mounted on this vnode,
2469 * do not allow to link any file in this directory.
2471 if (vn_ismntpt(dvp)) {
2472 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2473 goto out;
2476 if (cs->access == CS_ACCESS_DENIED) {
2477 *cs->statusp = resp->status = NFS4ERR_ACCESS;
2478 goto out;
2481 /* Check source object's type validity */
2482 if (vp->v_type == VDIR) {
2483 *cs->statusp = resp->status = NFS4ERR_ISDIR;
2484 goto out;
2487 /* Check target directory's type */
2488 if (dvp->v_type != VDIR) {
2489 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2490 goto out;
2493 if (cs->saved_exi != cs->exi) {
2494 *cs->statusp = resp->status = NFS4ERR_XDEV;
2495 goto out;
2498 if (!utf8_dir_verify(&args->newname)) {
2499 *cs->statusp = resp->status = NFS4ERR_INVAL;
2500 goto out;
2503 nm = utf8_to_fn(&args->newname, &len, NULL);
2504 if (nm == NULL) {
2505 *cs->statusp = resp->status = NFS4ERR_INVAL;
2506 goto out;
2509 if (len > MAXNAMELEN) {
2510 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2511 kmem_free(nm, len);
2512 goto out;
2515 if (rdonly4(cs->exi, cs->vp, req)) {
2516 *cs->statusp = resp->status = NFS4ERR_ROFS;
2517 kmem_free(nm, len);
2518 goto out;
2521 /* Get "before" change value */
2522 bdva.va_mask = AT_CTIME|AT_SEQ;
2523 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
2524 if (error) {
2525 *cs->statusp = resp->status = puterrno4(error);
2526 kmem_free(nm, len);
2527 goto out;
2530 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2531 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2532 MAXPATHLEN + 1);
2534 if (name == NULL) {
2535 *cs->statusp = resp->status = NFS4ERR_INVAL;
2536 kmem_free(nm, len);
2537 goto out;
2540 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
2542 error = VOP_LINK(dvp, vp, name, cs->cr, NULL, 0);
2544 if (nm != name)
2545 kmem_free(name, MAXPATHLEN + 1);
2546 kmem_free(nm, len);
2549 * Get the initial "after" sequence number, if it fails, set to zero
2551 idva.va_mask = AT_SEQ;
2552 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
2553 idva.va_seq = 0;
2556 * Force modified data and metadata out to stable storage.
2558 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
2559 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
2561 if (error) {
2562 *cs->statusp = resp->status = puterrno4(error);
2563 goto out;
2567 * Get "after" change value, if it fails, simply return the
2568 * before value.
2570 adva.va_mask = AT_CTIME|AT_SEQ;
2571 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
2572 adva.va_ctime = bdva.va_ctime;
2573 adva.va_seq = 0;
2576 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
2579 * The cinfo.atomic = TRUE only if we have
2580 * non-zero va_seq's, and it has incremented by exactly one
2581 * during the VOP_LINK and it didn't change during the VOP_FSYNC.
2583 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
2584 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
2585 resp->cinfo.atomic = TRUE;
2586 else
2587 resp->cinfo.atomic = FALSE;
2589 *cs->statusp = resp->status = NFS4_OK;
2590 out:
2591 DTRACE_NFSV4_2(op__link__done, struct compound_state *, cs,
2592 LINK4res *, resp);
2596 * Used by rfs4_op_lookup and rfs4_op_lookupp to do the actual work.
2599 /* ARGSUSED */
2600 static nfsstat4
2601 do_rfs4_op_lookup(char *nm, struct svc_req *req, struct compound_state *cs)
2603 int error;
2604 int different_export = 0;
2605 vnode_t *vp, *tvp, *pre_tvp = NULL, *oldvp = NULL;
2606 struct exportinfo *exi = NULL, *pre_exi = NULL;
2607 nfsstat4 stat;
2608 fid_t fid;
2609 int attrdir, dotdot, walk;
2610 bool_t is_newvp = FALSE;
2612 if (cs->vp->v_flag & V_XATTRDIR) {
2613 attrdir = 1;
2614 ASSERT(get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2615 } else {
2616 attrdir = 0;
2617 ASSERT(! get_fh4_flag(&cs->fh, FH4_ATTRDIR));
2620 dotdot = (nm[0] == '.' && nm[1] == '.' && nm[2] == '\0');
2623 * If dotdotting, then need to check whether it's
2624 * above the root of a filesystem, or above an
2625 * export point.
2627 if (dotdot) {
2630 * If dotdotting at the root of a filesystem, then
2631 * need to traverse back to the mounted-on filesystem
2632 * and do the dotdot lookup there.
2634 if (cs->vp->v_flag & VROOT) {
2637 * If at the system root, then can
2638 * go up no further.
2640 if (VN_CMP(cs->vp, rootdir))
2641 return (puterrno4(ENOENT));
2644 * Traverse back to the mounted-on filesystem
2646 cs->vp = untraverse(cs->vp);
2649 * Set the different_export flag so we remember
2650 * to pick up a new exportinfo entry for
2651 * this new filesystem.
2653 different_export = 1;
2654 } else {
2657 * If dotdotting above an export point then set
2658 * the different_export to get new export info.
2660 different_export = nfs_exported(cs->exi, cs->vp);
2664 error = VOP_LOOKUP(cs->vp, nm, &vp, NULL, 0, NULL, cs->cr,
2665 NULL, NULL, NULL);
2666 if (error)
2667 return (puterrno4(error));
2670 * If the vnode is in a pseudo filesystem, check whether it is visible.
2672 * XXX if the vnode is a symlink and it is not visible in
2673 * a pseudo filesystem, return ENOENT (not following symlink).
2674 * V4 client can not mount such symlink. This is a regression
2675 * from V2/V3.
2677 * In the same exported filesystem, if the security flavor used
2678 * is not an explicitly shared flavor, limit the view to the visible
2679 * list entries only. This is not a WRONGSEC case because it's already
2680 * checked via PUTROOTFH/PUTPUBFH or PUTFH.
2682 if (!different_export &&
2683 (PSEUDO(cs->exi) || ! is_exported_sec(cs->nfsflavor, cs->exi) ||
2684 cs->access & CS_ACCESS_LIMITED)) {
2685 if (! nfs_visible(cs->exi, vp, &different_export)) {
2686 VN_RELE(vp);
2687 return (puterrno4(ENOENT));
2692 * If it's a mountpoint, then traverse it.
2694 if (vn_ismntpt(vp)) {
2695 pre_exi = cs->exi; /* save pre-traversed exportinfo */
2696 pre_tvp = vp; /* save pre-traversed vnode */
2699 * hold pre_tvp to counteract rele by traverse. We will
2700 * need pre_tvp below if checkexport4 fails
2702 VN_HOLD(pre_tvp);
2703 tvp = vp;
2704 if ((error = traverse(&tvp)) != 0) {
2705 VN_RELE(vp);
2706 VN_RELE(pre_tvp);
2707 return (puterrno4(error));
2709 vp = tvp;
2710 different_export = 1;
2711 } else if (vp->v_vfsp != cs->vp->v_vfsp) {
2713 * The vfsp comparison is to handle the case where
2714 * a LOFS mount is shared. lo_lookup traverses mount points,
2715 * and NFS is unaware of local fs transistions because
2716 * v_vfsmountedhere isn't set. For this special LOFS case,
2717 * the dir and the obj returned by lookup will have different
2718 * vfs ptrs.
2720 different_export = 1;
2723 if (different_export) {
2725 bzero(&fid, sizeof (fid));
2726 fid.fid_len = MAXFIDSZ;
2727 error = vop_fid_pseudo(vp, &fid);
2728 if (error) {
2729 VN_RELE(vp);
2730 if (pre_tvp)
2731 VN_RELE(pre_tvp);
2732 return (puterrno4(error));
2735 if (dotdot)
2736 exi = nfs_vptoexi(NULL, vp, cs->cr, &walk, NULL, TRUE);
2737 else
2738 exi = checkexport4(&vp->v_vfsp->vfs_fsid, &fid, vp);
2740 if (exi == NULL) {
2741 if (pre_tvp) {
2743 * If this vnode is a mounted-on vnode,
2744 * but the mounted-on file system is not
2745 * exported, send back the filehandle for
2746 * the mounted-on vnode, not the root of
2747 * the mounted-on file system.
2749 VN_RELE(vp);
2750 vp = pre_tvp;
2751 exi = pre_exi;
2752 } else {
2753 VN_RELE(vp);
2754 return (puterrno4(EACCES));
2756 } else if (pre_tvp) {
2757 /* we're done with pre_tvp now. release extra hold */
2758 VN_RELE(pre_tvp);
2761 cs->exi = exi;
2764 * Now we do a checkauth4. The reason is that
2765 * this client/user may not have access to the new
2766 * exported file system, and if he does,
2767 * the client/user may be mapped to a different uid.
2769 * We start with a new cr, because the checkauth4 done
2770 * in the PUT*FH operation over wrote the cred's uid,
2771 * gid, etc, and we want the real thing before calling
2772 * checkauth4()
2774 crfree(cs->cr);
2775 cs->cr = crdup(cs->basecr);
2777 oldvp = cs->vp;
2778 cs->vp = vp;
2779 is_newvp = TRUE;
2781 stat = call_checkauth4(cs, req);
2782 if (stat != NFS4_OK) {
2783 VN_RELE(cs->vp);
2784 cs->vp = oldvp;
2785 return (stat);
2790 * After various NFS checks, do a label check on the path
2791 * component. The label on this path should either be the
2792 * global zone's label or a zone's label. We are only
2793 * interested in the zone's label because exported files
2794 * in global zone is accessible (though read-only) to
2795 * clients. The exportability/visibility check is already
2796 * done before reaching this code.
2798 if (is_system_labeled()) {
2799 bslabel_t *clabel;
2801 ASSERT(req->rq_label != NULL);
2802 clabel = req->rq_label;
2803 DTRACE_PROBE2(tx__rfs4__log__info__oplookup__clabel, char *,
2804 "got client label from request(1)", struct svc_req *, req);
2806 if (!blequal(&l_admin_low->tsl_label, clabel)) {
2807 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
2808 cs->exi)) {
2809 error = EACCES;
2810 goto err_out;
2812 } else {
2814 * We grant access to admin_low label clients
2815 * only if the client is trusted, i.e. also
2816 * running Solaris Trusted Extension.
2818 struct sockaddr *ca;
2819 int addr_type;
2820 void *ipaddr;
2821 tsol_tpc_t *tp;
2823 ca = (struct sockaddr *)svc_getrpccaller(
2824 req->rq_xprt)->buf;
2825 if (ca->sa_family == AF_INET) {
2826 addr_type = IPV4_VERSION;
2827 ipaddr = &((struct sockaddr_in *)ca)->sin_addr;
2828 } else if (ca->sa_family == AF_INET6) {
2829 addr_type = IPV6_VERSION;
2830 ipaddr = &((struct sockaddr_in6 *)
2831 ca)->sin6_addr;
2833 tp = find_tpc(ipaddr, addr_type, B_FALSE);
2834 if (tp == NULL || tp->tpc_tp.tp_doi !=
2835 l_admin_low->tsl_doi || tp->tpc_tp.host_type !=
2836 SUN_CIPSO) {
2837 if (tp != NULL)
2838 TPC_RELE(tp);
2839 error = EACCES;
2840 goto err_out;
2842 TPC_RELE(tp);
2846 error = makefh4(&cs->fh, vp, cs->exi);
2848 err_out:
2849 if (error) {
2850 if (is_newvp) {
2851 VN_RELE(cs->vp);
2852 cs->vp = oldvp;
2853 } else
2854 VN_RELE(vp);
2855 return (puterrno4(error));
2858 if (!is_newvp) {
2859 if (cs->vp)
2860 VN_RELE(cs->vp);
2861 cs->vp = vp;
2862 } else if (oldvp)
2863 VN_RELE(oldvp);
2866 * if did lookup on attrdir and didn't lookup .., set named
2867 * attr fh flag
2869 if (attrdir && ! dotdot)
2870 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
2872 /* Assume false for now, open proc will set this */
2873 cs->mandlock = FALSE;
2875 return (NFS4_OK);
2878 /* ARGSUSED */
2879 static void
2880 rfs4_op_lookup(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2881 struct compound_state *cs)
2883 LOOKUP4args *args = &argop->nfs_argop4_u.oplookup;
2884 LOOKUP4res *resp = &resop->nfs_resop4_u.oplookup;
2885 char *nm;
2886 uint_t len;
2887 struct sockaddr *ca;
2888 char *name = NULL;
2890 DTRACE_NFSV4_2(op__lookup__start, struct compound_state *, cs,
2891 LOOKUP4args *, args);
2893 if (cs->vp == NULL) {
2894 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2895 goto out;
2898 if (cs->vp->v_type == VLNK) {
2899 *cs->statusp = resp->status = NFS4ERR_SYMLINK;
2900 goto out;
2903 if (cs->vp->v_type != VDIR) {
2904 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2905 goto out;
2908 if (!utf8_dir_verify(&args->objname)) {
2909 *cs->statusp = resp->status = NFS4ERR_INVAL;
2910 goto out;
2913 nm = utf8_to_str(&args->objname, &len, NULL);
2914 if (nm == NULL) {
2915 *cs->statusp = resp->status = NFS4ERR_INVAL;
2916 goto out;
2919 if (len > MAXNAMELEN) {
2920 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
2921 kmem_free(nm, len);
2922 goto out;
2925 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
2926 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
2927 MAXPATHLEN + 1);
2929 if (name == NULL) {
2930 *cs->statusp = resp->status = NFS4ERR_INVAL;
2931 kmem_free(nm, len);
2932 goto out;
2935 *cs->statusp = resp->status = do_rfs4_op_lookup(name, req, cs);
2937 if (name != nm)
2938 kmem_free(name, MAXPATHLEN + 1);
2939 kmem_free(nm, len);
2941 out:
2942 DTRACE_NFSV4_2(op__lookup__done, struct compound_state *, cs,
2943 LOOKUP4res *, resp);
2946 /* ARGSUSED */
2947 static void
2948 rfs4_op_lookupp(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
2949 struct compound_state *cs)
2951 LOOKUPP4res *resp = &resop->nfs_resop4_u.oplookupp;
2953 DTRACE_NFSV4_1(op__lookupp__start, struct compound_state *, cs);
2955 if (cs->vp == NULL) {
2956 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2957 goto out;
2960 if (cs->vp->v_type != VDIR) {
2961 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
2962 goto out;
2965 *cs->statusp = resp->status = do_rfs4_op_lookup("..", req, cs);
2968 * From NFSV4 Specification, LOOKUPP should not check for
2969 * NFS4ERR_WRONGSEC. Retrun NFS4_OK instead.
2971 if (resp->status == NFS4ERR_WRONGSEC) {
2972 *cs->statusp = resp->status = NFS4_OK;
2975 out:
2976 DTRACE_NFSV4_2(op__lookupp__done, struct compound_state *, cs,
2977 LOOKUPP4res *, resp);
2981 /*ARGSUSED2*/
2982 static void
2983 rfs4_op_openattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
2984 struct compound_state *cs)
2986 OPENATTR4args *args = &argop->nfs_argop4_u.opopenattr;
2987 OPENATTR4res *resp = &resop->nfs_resop4_u.opopenattr;
2988 vnode_t *avp = NULL;
2989 int lookup_flags = LOOKUP_XATTR, error;
2990 int exp_ro = 0;
2992 DTRACE_NFSV4_2(op__openattr__start, struct compound_state *, cs,
2993 OPENATTR4args *, args);
2995 if (cs->vp == NULL) {
2996 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
2997 goto out;
3000 if ((cs->vp->v_vfsp->vfs_flag & VFS_XATTR) == 0 &&
3001 !vfs_has_feature(cs->vp->v_vfsp, VFSFT_SYSATTR_VIEWS)) {
3002 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3003 goto out;
3007 * If file system supports passing ACE mask to VOP_ACCESS then
3008 * check for ACE_READ_NAMED_ATTRS, otherwise do legacy checks
3011 if (vfs_has_feature(cs->vp->v_vfsp, VFSFT_ACEMASKONACCESS))
3012 error = VOP_ACCESS(cs->vp, ACE_READ_NAMED_ATTRS,
3013 V_ACE_MASK, cs->cr, NULL);
3014 else
3015 error = ((VOP_ACCESS(cs->vp, VREAD, 0, cs->cr, NULL) != 0) &&
3016 (VOP_ACCESS(cs->vp, VWRITE, 0, cs->cr, NULL) != 0) &&
3017 (VOP_ACCESS(cs->vp, VEXEC, 0, cs->cr, NULL) != 0));
3019 if (error) {
3020 *cs->statusp = resp->status = puterrno4(EACCES);
3021 goto out;
3025 * The CREATE_XATTR_DIR VOP flag cannot be specified if
3026 * the file system is exported read-only -- regardless of
3027 * createdir flag. Otherwise the attrdir would be created
3028 * (assuming server fs isn't mounted readonly locally). If
3029 * VOP_LOOKUP returns ENOENT in this case, the error will
3030 * be translated into EROFS. ENOSYS is mapped to ENOTSUP
3031 * because specfs has no VOP_LOOKUP op, so the macro would
3032 * return ENOSYS. EINVAL is returned by all (current)
3033 * Solaris file system implementations when any of their
3034 * restrictions are violated (xattr(dir) can't have xattrdir).
3035 * Returning NOTSUPP is more appropriate in this case
3036 * because the object will never be able to have an attrdir.
3038 if (args->createdir && ! (exp_ro = rdonly4(cs->exi, cs->vp, req)))
3039 lookup_flags |= CREATE_XATTR_DIR;
3041 error = VOP_LOOKUP(cs->vp, "", &avp, NULL, lookup_flags, NULL, cs->cr,
3042 NULL, NULL, NULL);
3044 if (error) {
3045 if (error == ENOENT && args->createdir && exp_ro)
3046 *cs->statusp = resp->status = puterrno4(EROFS);
3047 else if (error == EINVAL || error == ENOSYS)
3048 *cs->statusp = resp->status = puterrno4(ENOTSUP);
3049 else
3050 *cs->statusp = resp->status = puterrno4(error);
3051 goto out;
3054 ASSERT(avp->v_flag & V_XATTRDIR);
3056 error = makefh4(&cs->fh, avp, cs->exi);
3058 if (error) {
3059 VN_RELE(avp);
3060 *cs->statusp = resp->status = puterrno4(error);
3061 goto out;
3064 VN_RELE(cs->vp);
3065 cs->vp = avp;
3068 * There is no requirement for an attrdir fh flag
3069 * because the attrdir has a vnode flag to distinguish
3070 * it from regular (non-xattr) directories. The
3071 * FH4_ATTRDIR flag is set for future sanity checks.
3073 set_fh4_flag(&cs->fh, FH4_ATTRDIR);
3074 *cs->statusp = resp->status = NFS4_OK;
3076 out:
3077 DTRACE_NFSV4_2(op__openattr__done, struct compound_state *, cs,
3078 OPENATTR4res *, resp);
3081 static int
3082 do_io(int direction, vnode_t *vp, struct uio *uio, int ioflag, cred_t *cred,
3083 caller_context_t *ct)
3085 int error;
3086 int i;
3087 clock_t delaytime;
3089 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
3092 * Don't block on mandatory locks. If this routine returns
3093 * EAGAIN, the caller should return NFS4ERR_LOCKED.
3095 uio->uio_fmode = FNONBLOCK;
3097 for (i = 0; i < rfs4_maxlock_tries; i++) {
3100 if (direction == FREAD) {
3101 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, ct);
3102 error = VOP_READ(vp, uio, ioflag, cred, ct);
3103 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, ct);
3104 } else {
3105 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, ct);
3106 error = VOP_WRITE(vp, uio, ioflag, cred, ct);
3107 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, ct);
3110 if (error != EAGAIN)
3111 break;
3113 if (i < rfs4_maxlock_tries - 1) {
3114 delay(delaytime);
3115 delaytime *= 2;
3119 return (error);
3122 /* ARGSUSED */
3123 static void
3124 rfs4_op_read(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3125 struct compound_state *cs)
3127 READ4args *args = &argop->nfs_argop4_u.opread;
3128 READ4res *resp = &resop->nfs_resop4_u.opread;
3129 int error;
3130 int verror;
3131 vnode_t *vp;
3132 struct vattr va;
3133 struct iovec iov;
3134 struct uio uio;
3135 u_offset_t offset;
3136 bool_t *deleg = &cs->deleg;
3137 nfsstat4 stat;
3138 int in_crit = 0;
3139 mblk_t *mp = NULL;
3140 int alloc_err = 0;
3141 int rdma_used = 0;
3142 int loaned_buffers;
3143 caller_context_t ct;
3144 struct uio *uiop;
3146 DTRACE_NFSV4_2(op__read__start, struct compound_state *, cs,
3147 READ4args, args);
3149 vp = cs->vp;
3150 if (vp == NULL) {
3151 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3152 goto out;
3154 if (cs->access == CS_ACCESS_DENIED) {
3155 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3156 goto out;
3159 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3160 deleg, TRUE, &ct)) != NFS4_OK) {
3161 *cs->statusp = resp->status = stat;
3162 goto out;
3166 * Enter the critical region before calling VOP_RWLOCK
3167 * to avoid a deadlock with write requests.
3169 if (nbl_need_check(vp)) {
3170 nbl_start_crit(vp, RW_READER);
3171 in_crit = 1;
3172 if (nbl_conflict(vp, NBL_READ, args->offset, args->count, 0,
3173 &ct)) {
3174 *cs->statusp = resp->status = NFS4ERR_LOCKED;
3175 goto out;
3179 if ((stat = rfs4_check_stateid(FREAD, vp, &args->stateid, FALSE,
3180 deleg, TRUE, &ct)) != NFS4_OK) {
3181 *cs->statusp = resp->status = stat;
3182 goto out;
3185 if (args->wlist)
3186 rdma_used = 1;
3188 /* use loaned buffers for TCP */
3189 loaned_buffers = (nfs_loaned_buffers && !rdma_used) ? 1 : 0;
3191 va.va_mask = AT_MODE|AT_SIZE|AT_UID;
3192 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3195 * If we can't get the attributes, then we can't do the
3196 * right access checking. So, we'll fail the request.
3198 if (verror) {
3199 *cs->statusp = resp->status = puterrno4(verror);
3200 goto out;
3203 if (vp->v_type != VREG) {
3204 *cs->statusp = resp->status =
3205 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
3206 goto out;
3209 if (crgetuid(cs->cr) != va.va_uid &&
3210 (error = VOP_ACCESS(vp, VREAD, 0, cs->cr, &ct)) &&
3211 (error = VOP_ACCESS(vp, VEXEC, 0, cs->cr, &ct))) {
3212 *cs->statusp = resp->status = puterrno4(error);
3213 goto out;
3216 if (MANDLOCK(vp, va.va_mode)) { /* XXX - V4 supports mand locking */
3217 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3218 goto out;
3221 offset = args->offset;
3222 if (offset >= va.va_size) {
3223 *cs->statusp = resp->status = NFS4_OK;
3224 resp->eof = TRUE;
3225 resp->data_len = 0;
3226 resp->data_val = NULL;
3227 resp->mblk = NULL;
3228 /* RDMA */
3229 resp->wlist = args->wlist;
3230 resp->wlist_len = resp->data_len;
3231 *cs->statusp = resp->status = NFS4_OK;
3232 if (resp->wlist)
3233 clist_zero_len(resp->wlist);
3234 goto out;
3237 if (args->count == 0) {
3238 *cs->statusp = resp->status = NFS4_OK;
3239 resp->eof = FALSE;
3240 resp->data_len = 0;
3241 resp->data_val = NULL;
3242 resp->mblk = NULL;
3243 /* RDMA */
3244 resp->wlist = args->wlist;
3245 resp->wlist_len = resp->data_len;
3246 if (resp->wlist)
3247 clist_zero_len(resp->wlist);
3248 goto out;
3252 * Do not allocate memory more than maximum allowed
3253 * transfer size
3255 if (args->count > rfs4_tsize(req))
3256 args->count = rfs4_tsize(req);
3258 if (loaned_buffers) {
3259 uiop = (uio_t *)rfs_setup_xuio(vp);
3260 ASSERT(uiop != NULL);
3261 uiop->uio_segflg = UIO_SYSSPACE;
3262 uiop->uio_loffset = args->offset;
3263 uiop->uio_resid = args->count;
3265 /* Jump to do the read if successful */
3266 if (!VOP_REQZCBUF(vp, UIO_READ, (xuio_t *)uiop, cs->cr, &ct)) {
3268 * Need to hold the vnode until after VOP_RETZCBUF()
3269 * is called.
3271 VN_HOLD(vp);
3272 goto doio_read;
3275 DTRACE_PROBE2(nfss__i__reqzcbuf_failed, int,
3276 uiop->uio_loffset, int, uiop->uio_resid);
3278 uiop->uio_extflg = 0;
3280 /* failure to setup for zero copy */
3281 rfs_free_xuio((void *)uiop);
3282 loaned_buffers = 0;
3286 * If returning data via RDMA Write, then grab the chunk list. If we
3287 * aren't returning READ data w/RDMA_WRITE, then grab a mblk.
3289 if (rdma_used) {
3290 mp = NULL;
3291 (void) rdma_get_wchunk(req, &iov, args->wlist);
3292 } else {
3294 * mp will contain the data to be sent out in the read reply.
3295 * It will be freed after the reply has been sent. Let's
3296 * roundup the data to a BYTES_PER_XDR_UNIT multiple, so that
3297 * the call to xdrmblk_putmblk() never fails. If the first
3298 * alloc of the requested size fails, then decrease the size to
3299 * something more reasonable and wait for the allocation to
3300 * occur.
3302 mp = allocb(RNDUP(args->count), BPRI_MED);
3303 if (mp == NULL) {
3304 if (args->count > MAXBSIZE)
3305 args->count = MAXBSIZE;
3306 mp = allocb_wait(RNDUP(args->count), BPRI_MED,
3307 STR_NOSIG, &alloc_err);
3309 ASSERT(mp != NULL);
3310 ASSERT(alloc_err == 0);
3312 iov.iov_base = (caddr_t)mp->b_datap->db_base;
3313 iov.iov_len = args->count;
3316 uio.uio_iov = &iov;
3317 uio.uio_iovcnt = 1;
3318 uio.uio_segflg = UIO_SYSSPACE;
3319 uio.uio_extflg = UIO_COPY_CACHED;
3320 uio.uio_loffset = args->offset;
3321 uio.uio_resid = args->count;
3322 uiop = &uio;
3324 doio_read:
3325 error = do_io(FREAD, vp, uiop, 0, cs->cr, &ct);
3327 va.va_mask = AT_SIZE;
3328 verror = VOP_GETATTR(vp, &va, 0, cs->cr, &ct);
3330 if (error) {
3331 if (mp)
3332 freemsg(mp);
3333 *cs->statusp = resp->status = puterrno4(error);
3334 goto out;
3337 /* make mblk using zc buffers */
3338 if (loaned_buffers) {
3339 mp = uio_to_mblk(uiop);
3340 ASSERT(mp != NULL);
3343 *cs->statusp = resp->status = NFS4_OK;
3345 ASSERT(uiop->uio_resid >= 0);
3346 resp->data_len = args->count - uiop->uio_resid;
3347 if (mp) {
3348 resp->data_val = (char *)mp->b_datap->db_base;
3349 rfs_rndup_mblks(mp, resp->data_len, loaned_buffers);
3350 } else {
3351 resp->data_val = (caddr_t)iov.iov_base;
3354 resp->mblk = mp;
3356 if (!verror && offset + resp->data_len == va.va_size)
3357 resp->eof = TRUE;
3358 else
3359 resp->eof = FALSE;
3361 if (rdma_used) {
3362 if (!rdma_setup_read_data4(args, resp)) {
3363 *cs->statusp = resp->status = NFS4ERR_INVAL;
3365 } else {
3366 resp->wlist = NULL;
3369 out:
3370 if (in_crit)
3371 nbl_end_crit(vp);
3373 DTRACE_NFSV4_2(op__read__done, struct compound_state *, cs,
3374 READ4res *, resp);
3377 static void
3378 rfs4_op_read_free(nfs_resop4 *resop)
3380 READ4res *resp = &resop->nfs_resop4_u.opread;
3382 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3383 freemsg(resp->mblk);
3384 resp->mblk = NULL;
3385 resp->data_val = NULL;
3386 resp->data_len = 0;
3390 static void
3391 rfs4_op_readdir_free(nfs_resop4 * resop)
3393 READDIR4res *resp = &resop->nfs_resop4_u.opreaddir;
3395 if (resp->status == NFS4_OK && resp->mblk != NULL) {
3396 freeb(resp->mblk);
3397 resp->mblk = NULL;
3398 resp->data_len = 0;
3403 /* ARGSUSED */
3404 static void
3405 rfs4_op_putpubfh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
3406 struct compound_state *cs)
3408 PUTPUBFH4res *resp = &resop->nfs_resop4_u.opputpubfh;
3409 int error;
3410 vnode_t *vp;
3411 struct exportinfo *exi, *sav_exi;
3412 nfs_fh4_fmt_t *fh_fmtp;
3414 DTRACE_NFSV4_1(op__putpubfh__start, struct compound_state *, cs);
3416 if (cs->vp) {
3417 VN_RELE(cs->vp);
3418 cs->vp = NULL;
3421 if (cs->cr)
3422 crfree(cs->cr);
3424 cs->cr = crdup(cs->basecr);
3426 vp = exi_public->exi_vp;
3427 if (vp == NULL) {
3428 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3429 goto out;
3432 error = makefh4(&cs->fh, vp, exi_public);
3433 if (error != 0) {
3434 *cs->statusp = resp->status = puterrno4(error);
3435 goto out;
3437 sav_exi = cs->exi;
3438 if (exi_public == exi_root) {
3440 * No filesystem is actually shared public, so we default
3441 * to exi_root. In this case, we must check whether root
3442 * is exported.
3444 fh_fmtp = (nfs_fh4_fmt_t *)cs->fh.nfs_fh4_val;
3447 * if root filesystem is exported, the exportinfo struct that we
3448 * should use is what checkexport4 returns, because root_exi is
3449 * actually a mostly empty struct.
3451 exi = checkexport4(&fh_fmtp->fh4_fsid,
3452 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
3453 cs->exi = ((exi != NULL) ? exi : exi_public);
3454 } else {
3456 * it's a properly shared filesystem
3458 cs->exi = exi_public;
3461 if (is_system_labeled()) {
3462 bslabel_t *clabel;
3464 ASSERT(req->rq_label != NULL);
3465 clabel = req->rq_label;
3466 DTRACE_PROBE2(tx__rfs4__log__info__opputpubfh__clabel, char *,
3467 "got client label from request(1)",
3468 struct svc_req *, req);
3469 if (!blequal(&l_admin_low->tsl_label, clabel)) {
3470 if (!do_rfs_label_check(clabel, vp, DOMINANCE_CHECK,
3471 cs->exi)) {
3472 *cs->statusp = resp->status =
3473 NFS4ERR_SERVERFAULT;
3474 goto out;
3479 VN_HOLD(vp);
3480 cs->vp = vp;
3482 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3483 VN_RELE(cs->vp);
3484 cs->vp = NULL;
3485 cs->exi = sav_exi;
3486 goto out;
3489 *cs->statusp = resp->status = NFS4_OK;
3490 out:
3491 DTRACE_NFSV4_2(op__putpubfh__done, struct compound_state *, cs,
3492 PUTPUBFH4res *, resp);
3496 * XXX - issue with put*fh operations. Suppose /export/home is exported.
3497 * Suppose an NFS client goes to mount /export/home/joe. If /export, home,
3498 * or joe have restrictive search permissions, then we shouldn't let
3499 * the client get a file handle. This is easy to enforce. However, we
3500 * don't know what security flavor should be used until we resolve the
3501 * path name. Another complication is uid mapping. If root is
3502 * the user, then it will be mapped to the anonymous user by default,
3503 * but we won't know that till we've resolved the path name. And we won't
3504 * know what the anonymous user is.
3505 * Luckily, SECINFO is specified to take a full filename.
3506 * So what we will have to in rfs4_op_lookup is check that flavor of
3507 * the target object matches that of the request, and if root was the
3508 * caller, check for the root= and anon= options, and if necessary,
3509 * repeat the lookup using the right cred_t. But that's not done yet.
3511 /* ARGSUSED */
3512 static void
3513 rfs4_op_putfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3514 struct compound_state *cs)
3516 PUTFH4args *args = &argop->nfs_argop4_u.opputfh;
3517 PUTFH4res *resp = &resop->nfs_resop4_u.opputfh;
3518 nfs_fh4_fmt_t *fh_fmtp;
3520 DTRACE_NFSV4_2(op__putfh__start, struct compound_state *, cs,
3521 PUTFH4args *, args);
3523 if (cs->vp) {
3524 VN_RELE(cs->vp);
3525 cs->vp = NULL;
3528 if (cs->cr) {
3529 crfree(cs->cr);
3530 cs->cr = NULL;
3534 if (args->object.nfs_fh4_len < NFS_FH4_LEN) {
3535 *cs->statusp = resp->status = NFS4ERR_BADHANDLE;
3536 goto out;
3539 fh_fmtp = (nfs_fh4_fmt_t *)args->object.nfs_fh4_val;
3540 cs->exi = checkexport4(&fh_fmtp->fh4_fsid, (fid_t *)&fh_fmtp->fh4_xlen,
3541 NULL);
3543 if (cs->exi == NULL) {
3544 *cs->statusp = resp->status = NFS4ERR_STALE;
3545 goto out;
3548 cs->cr = crdup(cs->basecr);
3550 ASSERT(cs->cr != NULL);
3552 if (! (cs->vp = nfs4_fhtovp(&args->object, cs->exi, &resp->status))) {
3553 *cs->statusp = resp->status;
3554 goto out;
3557 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3558 VN_RELE(cs->vp);
3559 cs->vp = NULL;
3560 goto out;
3563 nfs_fh4_copy(&args->object, &cs->fh);
3564 *cs->statusp = resp->status = NFS4_OK;
3565 cs->deleg = FALSE;
3567 out:
3568 DTRACE_NFSV4_2(op__putfh__done, struct compound_state *, cs,
3569 PUTFH4res *, resp);
3572 /* ARGSUSED */
3573 static void
3574 rfs4_op_putrootfh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3575 struct compound_state *cs)
3577 PUTROOTFH4res *resp = &resop->nfs_resop4_u.opputrootfh;
3578 int error;
3579 fid_t fid;
3580 struct exportinfo *exi, *sav_exi;
3582 DTRACE_NFSV4_1(op__putrootfh__start, struct compound_state *, cs);
3584 if (cs->vp) {
3585 VN_RELE(cs->vp);
3586 cs->vp = NULL;
3589 if (cs->cr)
3590 crfree(cs->cr);
3592 cs->cr = crdup(cs->basecr);
3595 * Using rootdir, the system root vnode,
3596 * get its fid.
3598 bzero(&fid, sizeof (fid));
3599 fid.fid_len = MAXFIDSZ;
3600 error = vop_fid_pseudo(rootdir, &fid);
3601 if (error != 0) {
3602 *cs->statusp = resp->status = puterrno4(error);
3603 goto out;
3607 * Then use the root fsid & fid it to find out if it's exported
3609 * If the server root isn't exported directly, then
3610 * it should at least be a pseudo export based on
3611 * one or more exports further down in the server's
3612 * file tree.
3614 exi = checkexport4(&rootdir->v_vfsp->vfs_fsid, &fid, NULL);
3615 if (exi == NULL || exi->exi_export.ex_flags & EX_PUBLIC) {
3616 NFS4_DEBUG(rfs4_debug,
3617 (CE_WARN, "rfs4_op_putrootfh: export check failure"));
3618 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
3619 goto out;
3623 * Now make a filehandle based on the root
3624 * export and root vnode.
3626 error = makefh4(&cs->fh, rootdir, exi);
3627 if (error != 0) {
3628 *cs->statusp = resp->status = puterrno4(error);
3629 goto out;
3632 sav_exi = cs->exi;
3633 cs->exi = exi;
3635 VN_HOLD(rootdir);
3636 cs->vp = rootdir;
3638 if ((resp->status = call_checkauth4(cs, req)) != NFS4_OK) {
3639 VN_RELE(rootdir);
3640 cs->vp = NULL;
3641 cs->exi = sav_exi;
3642 goto out;
3645 *cs->statusp = resp->status = NFS4_OK;
3646 cs->deleg = FALSE;
3647 out:
3648 DTRACE_NFSV4_2(op__putrootfh__done, struct compound_state *, cs,
3649 PUTROOTFH4res *, resp);
3653 * A directory entry is a valid nfsv4 entry if
3654 * - it has a non-zero ino
3655 * - it is not a dot or dotdot name
3656 * - it is visible in a pseudo export or in a real export that can
3657 * only have a limited view.
3659 static bool_t
3660 valid_nfs4_entry(struct exportinfo *exi, struct dirent64 *dp,
3661 int *expseudo, int check_visible)
3663 if (dp->d_ino == 0 || NFS_IS_DOTNAME(dp->d_name)) {
3664 *expseudo = 0;
3665 return (FALSE);
3668 if (! check_visible) {
3669 *expseudo = 0;
3670 return (TRUE);
3673 return (nfs_visible_inode(exi, dp->d_ino, expseudo));
3677 * set_rdattr_params sets up the variables used to manage what information
3678 * to get for each directory entry.
3680 static nfsstat4
3681 set_rdattr_params(struct nfs4_svgetit_arg *sargp,
3682 bitmap4 attrs, bool_t *need_to_lookup)
3684 uint_t va_mask;
3685 nfsstat4 status;
3686 bitmap4 objbits;
3688 status = bitmap4_to_attrmask(attrs, sargp);
3689 if (status != NFS4_OK) {
3691 * could not even figure attr mask
3693 return (status);
3695 va_mask = sargp->vap->va_mask;
3698 * dirent's d_ino is always correct value for mounted_on_fileid.
3699 * mntdfid_set is set once here, but mounted_on_fileid is
3700 * set in main dirent processing loop for each dirent.
3701 * The mntdfid_set is a simple optimization that lets the
3702 * server attr code avoid work when caller is readdir.
3704 sargp->mntdfid_set = TRUE;
3707 * Lookup entry only if client asked for any of the following:
3708 * a) vattr attrs
3709 * b) vfs attrs
3710 * c) attrs w/per-object scope requested (change, filehandle, etc)
3711 * other than mounted_on_fileid (which we can take from dirent)
3713 objbits = attrs ? attrs & NFS4_VP_ATTR_MASK : 0;
3715 if (va_mask || sargp->sbp || (objbits & ~FATTR4_MOUNTED_ON_FILEID_MASK))
3716 *need_to_lookup = TRUE;
3717 else
3718 *need_to_lookup = FALSE;
3720 if (sargp->sbp == NULL)
3721 return (NFS4_OK);
3724 * If filesystem attrs are requested, get them now from the
3725 * directory vp, as most entries will have same filesystem. The only
3726 * exception are mounted over entries but we handle
3727 * those as we go (XXX mounted over detection not yet implemented).
3729 sargp->vap->va_mask = 0; /* to avoid VOP_GETATTR */
3730 status = bitmap4_get_sysattrs(sargp);
3731 sargp->vap->va_mask = va_mask;
3733 if ((status != NFS4_OK) && sargp->rdattr_error_req) {
3735 * Failed to get filesystem attributes.
3736 * Return a rdattr_error for each entry, but don't fail.
3737 * However, don't get any obj-dependent attrs.
3739 sargp->rdattr_error = status; /* for rdattr_error */
3740 *need_to_lookup = FALSE;
3742 * At least get fileid for regular readdir output
3744 sargp->vap->va_mask &= AT_NODEID;
3745 status = NFS4_OK;
3748 return (status);
3752 * readlink: args: CURRENT_FH.
3753 * res: status. If success - CURRENT_FH unchanged, return linktext.
3756 /* ARGSUSED */
3757 static void
3758 rfs4_op_readlink(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
3759 struct compound_state *cs)
3761 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3762 int error;
3763 vnode_t *vp;
3764 struct iovec iov;
3765 struct vattr va;
3766 struct uio uio;
3767 char *data;
3768 struct sockaddr *ca;
3769 char *name = NULL;
3770 int is_referral;
3772 DTRACE_NFSV4_1(op__readlink__start, struct compound_state *, cs);
3774 /* CURRENT_FH: directory */
3775 vp = cs->vp;
3776 if (vp == NULL) {
3777 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
3778 goto out;
3781 if (cs->access == CS_ACCESS_DENIED) {
3782 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3783 goto out;
3786 /* Is it a referral? */
3787 if (vn_is_nfs_reparse(vp, cs->cr) && client_is_downrev(req)) {
3789 is_referral = 1;
3791 } else {
3793 is_referral = 0;
3795 if (vp->v_type == VDIR) {
3796 *cs->statusp = resp->status = NFS4ERR_ISDIR;
3797 goto out;
3800 if (vp->v_type != VLNK) {
3801 *cs->statusp = resp->status = NFS4ERR_INVAL;
3802 goto out;
3807 va.va_mask = AT_MODE;
3808 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
3809 if (error) {
3810 *cs->statusp = resp->status = puterrno4(error);
3811 goto out;
3814 if (MANDLOCK(vp, va.va_mode)) {
3815 *cs->statusp = resp->status = NFS4ERR_ACCESS;
3816 goto out;
3819 data = kmem_alloc(MAXPATHLEN + 1, KM_SLEEP);
3821 if (is_referral) {
3822 char *s;
3823 size_t strsz;
3825 /* Get an artificial symlink based on a referral */
3826 s = build_symlink(vp, cs->cr, &strsz);
3827 global_svstat_ptr[4][NFS_REFERLINKS].value.ui64++;
3828 DTRACE_PROBE2(nfs4serv__func__referral__reflink,
3829 vnode_t *, vp, char *, s);
3830 if (s == NULL)
3831 error = EINVAL;
3832 else {
3833 error = 0;
3834 (void) strlcpy(data, s, MAXPATHLEN + 1);
3835 kmem_free(s, strsz);
3838 } else {
3840 iov.iov_base = data;
3841 iov.iov_len = MAXPATHLEN;
3842 uio.uio_iov = &iov;
3843 uio.uio_iovcnt = 1;
3844 uio.uio_segflg = UIO_SYSSPACE;
3845 uio.uio_extflg = UIO_COPY_CACHED;
3846 uio.uio_loffset = 0;
3847 uio.uio_resid = MAXPATHLEN;
3849 error = VOP_READLINK(vp, &uio, cs->cr, NULL);
3851 if (!error)
3852 *(data + MAXPATHLEN - uio.uio_resid) = '\0';
3855 if (error) {
3856 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3857 *cs->statusp = resp->status = puterrno4(error);
3858 goto out;
3861 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
3862 name = nfscmd_convname(ca, cs->exi, data, NFSCMD_CONV_OUTBOUND,
3863 MAXPATHLEN + 1);
3865 if (name == NULL) {
3867 * Even though the conversion failed, we return
3868 * something. We just don't translate it.
3870 name = data;
3874 * treat link name as data
3876 (void) str_to_utf8(name, &resp->link);
3878 if (name != data)
3879 kmem_free(name, MAXPATHLEN + 1);
3880 kmem_free((caddr_t)data, (uint_t)MAXPATHLEN + 1);
3881 *cs->statusp = resp->status = NFS4_OK;
3883 out:
3884 DTRACE_NFSV4_2(op__readlink__done, struct compound_state *, cs,
3885 READLINK4res *, resp);
3888 static void
3889 rfs4_op_readlink_free(nfs_resop4 *resop)
3891 READLINK4res *resp = &resop->nfs_resop4_u.opreadlink;
3892 utf8string *symlink = &resp->link;
3894 if (symlink->utf8string_val) {
3895 UTF8STRING_FREE(*symlink)
3900 * release_lockowner:
3901 * Release any state associated with the supplied
3902 * lockowner. Note if any lo_state is holding locks we will not
3903 * rele that lo_state and thus the lockowner will not be destroyed.
3904 * A client using lock after the lock owner stateid has been released
3905 * will suffer the consequence of NFS4ERR_BAD_STATEID and would have
3906 * to reissue the lock with new_lock_owner set to TRUE.
3907 * args: lock_owner
3908 * res: status
3910 /* ARGSUSED */
3911 static void
3912 rfs4_op_release_lockowner(nfs_argop4 *argop, nfs_resop4 *resop,
3913 struct svc_req *req, struct compound_state *cs)
3915 RELEASE_LOCKOWNER4args *ap = &argop->nfs_argop4_u.oprelease_lockowner;
3916 RELEASE_LOCKOWNER4res *resp = &resop->nfs_resop4_u.oprelease_lockowner;
3917 rfs4_lockowner_t *lo;
3918 rfs4_openowner_t *oo;
3919 rfs4_state_t *sp;
3920 rfs4_lo_state_t *lsp;
3921 rfs4_client_t *cp;
3922 bool_t create = FALSE;
3923 locklist_t *llist;
3924 sysid_t sysid;
3926 DTRACE_NFSV4_2(op__release__lockowner__start, struct compound_state *,
3927 cs, RELEASE_LOCKOWNER4args *, ap);
3929 /* Make sure there is a clientid around for this request */
3930 cp = rfs4_findclient_by_id(ap->lock_owner.clientid, FALSE);
3932 if (cp == NULL) {
3933 *cs->statusp = resp->status =
3934 rfs4_check_clientid(&ap->lock_owner.clientid, 0);
3935 goto out;
3937 rfs4_client_rele(cp);
3939 lo = rfs4_findlockowner(&ap->lock_owner, &create);
3940 if (lo == NULL) {
3941 *cs->statusp = resp->status = NFS4_OK;
3942 goto out;
3944 ASSERT(lo->rl_client != NULL);
3947 * Check for EXPIRED client. If so will reap state with in a lease
3948 * period or on next set_clientid_confirm step
3950 if (rfs4_lease_expired(lo->rl_client)) {
3951 rfs4_lockowner_rele(lo);
3952 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
3953 goto out;
3957 * If no sysid has been assigned, then no locks exist; just return.
3959 rfs4_dbe_lock(lo->rl_client->rc_dbe);
3960 if (lo->rl_client->rc_sysidt == LM_NOSYSID) {
3961 rfs4_lockowner_rele(lo);
3962 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3963 goto out;
3966 sysid = lo->rl_client->rc_sysidt;
3967 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
3970 * Mark the lockowner invalid.
3972 rfs4_dbe_hide(lo->rl_dbe);
3975 * sysid-pid pair should now not be used since the lockowner is
3976 * invalid. If the client were to instantiate the lockowner again
3977 * it would be assigned a new pid. Thus we can get the list of
3978 * current locks.
3981 llist = flk_get_active_locks(sysid, lo->rl_pid);
3982 /* If we are still holding locks fail */
3983 if (llist != NULL) {
3985 *cs->statusp = resp->status = NFS4ERR_LOCKS_HELD;
3987 flk_free_locklist(llist);
3989 * We need to unhide the lockowner so the client can
3990 * try it again. The bad thing here is if the client
3991 * has a logic error that took it here in the first place
3992 * he probably has lost accounting of the locks that it
3993 * is holding. So we may have dangling state until the
3994 * open owner state is reaped via close. One scenario
3995 * that could possibly occur is that the client has
3996 * sent the unlock request(s) in separate threads
3997 * and has not waited for the replies before sending the
3998 * RELEASE_LOCKOWNER request. Presumably, it would expect
3999 * and deal appropriately with NFS4ERR_LOCKS_HELD, by
4000 * reissuing the request.
4002 rfs4_dbe_unhide(lo->rl_dbe);
4003 rfs4_lockowner_rele(lo);
4004 goto out;
4008 * For the corresponding client we need to check each open
4009 * owner for any opens that have lockowner state associated
4010 * with this lockowner.
4013 rfs4_dbe_lock(lo->rl_client->rc_dbe);
4014 for (oo = list_head(&lo->rl_client->rc_openownerlist); oo != NULL;
4015 oo = list_next(&lo->rl_client->rc_openownerlist, oo)) {
4017 rfs4_dbe_lock(oo->ro_dbe);
4018 for (sp = list_head(&oo->ro_statelist); sp != NULL;
4019 sp = list_next(&oo->ro_statelist, sp)) {
4021 rfs4_dbe_lock(sp->rs_dbe);
4022 for (lsp = list_head(&sp->rs_lostatelist);
4023 lsp != NULL;
4024 lsp = list_next(&sp->rs_lostatelist, lsp)) {
4025 if (lsp->rls_locker == lo) {
4026 rfs4_dbe_lock(lsp->rls_dbe);
4027 rfs4_dbe_invalidate(lsp->rls_dbe);
4028 rfs4_dbe_unlock(lsp->rls_dbe);
4031 rfs4_dbe_unlock(sp->rs_dbe);
4033 rfs4_dbe_unlock(oo->ro_dbe);
4035 rfs4_dbe_unlock(lo->rl_client->rc_dbe);
4037 rfs4_lockowner_rele(lo);
4039 *cs->statusp = resp->status = NFS4_OK;
4041 out:
4042 DTRACE_NFSV4_2(op__release__lockowner__done, struct compound_state *,
4043 cs, RELEASE_LOCKOWNER4res *, resp);
4047 * short utility function to lookup a file and recall the delegation
4049 static rfs4_file_t *
4050 rfs4_lookup_and_findfile(vnode_t *dvp, char *nm, vnode_t **vpp,
4051 int *lkup_error, cred_t *cr)
4053 vnode_t *vp;
4054 rfs4_file_t *fp = NULL;
4055 bool_t fcreate = FALSE;
4056 int error;
4058 if (vpp)
4059 *vpp = NULL;
4061 if ((error = VOP_LOOKUP(dvp, nm, &vp, NULL, 0, NULL, cr, NULL, NULL,
4062 NULL)) == 0) {
4063 if (vp->v_type == VREG)
4064 fp = rfs4_findfile(vp, NULL, &fcreate);
4065 if (vpp)
4066 *vpp = vp;
4067 else
4068 VN_RELE(vp);
4071 if (lkup_error)
4072 *lkup_error = error;
4074 return (fp);
4078 * remove: args: CURRENT_FH: directory; name.
4079 * res: status. If success - CURRENT_FH unchanged, return change_info
4080 * for directory.
4082 /* ARGSUSED */
4083 static void
4084 rfs4_op_remove(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4085 struct compound_state *cs)
4087 REMOVE4args *args = &argop->nfs_argop4_u.opremove;
4088 REMOVE4res *resp = &resop->nfs_resop4_u.opremove;
4089 int error;
4090 vnode_t *dvp, *vp;
4091 struct vattr bdva, idva, adva;
4092 char *nm;
4093 uint_t len;
4094 rfs4_file_t *fp;
4095 int in_crit = 0;
4096 bslabel_t *clabel;
4097 struct sockaddr *ca;
4098 char *name = NULL;
4100 DTRACE_NFSV4_2(op__remove__start, struct compound_state *, cs,
4101 REMOVE4args *, args);
4103 /* CURRENT_FH: directory */
4104 dvp = cs->vp;
4105 if (dvp == NULL) {
4106 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4107 goto out;
4110 if (cs->access == CS_ACCESS_DENIED) {
4111 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4112 goto out;
4116 * If there is an unshared filesystem mounted on this vnode,
4117 * Do not allow to remove anything in this directory.
4119 if (vn_ismntpt(dvp)) {
4120 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4121 goto out;
4124 if (dvp->v_type != VDIR) {
4125 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4126 goto out;
4129 if (!utf8_dir_verify(&args->target)) {
4130 *cs->statusp = resp->status = NFS4ERR_INVAL;
4131 goto out;
4135 * Lookup the file so that we can check if it's a directory
4137 nm = utf8_to_fn(&args->target, &len, NULL);
4138 if (nm == NULL) {
4139 *cs->statusp = resp->status = NFS4ERR_INVAL;
4140 goto out;
4143 if (len > MAXNAMELEN) {
4144 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4145 kmem_free(nm, len);
4146 goto out;
4149 if (rdonly4(cs->exi, cs->vp, req)) {
4150 *cs->statusp = resp->status = NFS4ERR_ROFS;
4151 kmem_free(nm, len);
4152 goto out;
4155 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4156 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
4157 MAXPATHLEN + 1);
4159 if (name == NULL) {
4160 *cs->statusp = resp->status = NFS4ERR_INVAL;
4161 kmem_free(nm, len);
4162 goto out;
4166 * Lookup the file to determine type and while we are see if
4167 * there is a file struct around and check for delegation.
4168 * We don't need to acquire va_seq before this lookup, if
4169 * it causes an update, cinfo.before will not match, which will
4170 * trigger a cache flush even if atomic is TRUE.
4172 if (fp = rfs4_lookup_and_findfile(dvp, name, &vp, &error, cs->cr)) {
4173 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4174 NULL)) {
4175 VN_RELE(vp);
4176 rfs4_file_rele(fp);
4177 *cs->statusp = resp->status = NFS4ERR_DELAY;
4178 if (nm != name)
4179 kmem_free(name, MAXPATHLEN + 1);
4180 kmem_free(nm, len);
4181 goto out;
4185 /* Didn't find anything to remove */
4186 if (vp == NULL) {
4187 *cs->statusp = resp->status = error;
4188 if (nm != name)
4189 kmem_free(name, MAXPATHLEN + 1);
4190 kmem_free(nm, len);
4191 goto out;
4194 if (nbl_need_check(vp)) {
4195 nbl_start_crit(vp, RW_READER);
4196 in_crit = 1;
4197 if (nbl_conflict(vp, NBL_REMOVE, 0, 0, 0, NULL)) {
4198 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4199 if (nm != name)
4200 kmem_free(name, MAXPATHLEN + 1);
4201 kmem_free(nm, len);
4202 nbl_end_crit(vp);
4203 VN_RELE(vp);
4204 if (fp) {
4205 rfs4_clear_dont_grant(fp);
4206 rfs4_file_rele(fp);
4208 goto out;
4212 /* check label before allowing removal */
4213 if (is_system_labeled()) {
4214 ASSERT(req->rq_label != NULL);
4215 clabel = req->rq_label;
4216 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
4217 "got client label from request(1)",
4218 struct svc_req *, req);
4219 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4220 if (!do_rfs_label_check(clabel, vp, EQUALITY_CHECK,
4221 cs->exi)) {
4222 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4223 if (name != nm)
4224 kmem_free(name, MAXPATHLEN + 1);
4225 kmem_free(nm, len);
4226 if (in_crit)
4227 nbl_end_crit(vp);
4228 VN_RELE(vp);
4229 if (fp) {
4230 rfs4_clear_dont_grant(fp);
4231 rfs4_file_rele(fp);
4233 goto out;
4238 /* Get dir "before" change value */
4239 bdva.va_mask = AT_CTIME|AT_SEQ;
4240 error = VOP_GETATTR(dvp, &bdva, 0, cs->cr, NULL);
4241 if (error) {
4242 *cs->statusp = resp->status = puterrno4(error);
4243 if (nm != name)
4244 kmem_free(name, MAXPATHLEN + 1);
4245 kmem_free(nm, len);
4246 if (in_crit)
4247 nbl_end_crit(vp);
4248 VN_RELE(vp);
4249 if (fp) {
4250 rfs4_clear_dont_grant(fp);
4251 rfs4_file_rele(fp);
4253 goto out;
4255 NFS4_SET_FATTR4_CHANGE(resp->cinfo.before, bdva.va_ctime)
4257 /* Actually do the REMOVE operation */
4258 if (vp->v_type == VDIR) {
4260 * Can't remove a directory that has a mounted-on filesystem.
4262 if (vn_ismntpt(vp)) {
4263 error = EACCES;
4264 } else {
4266 * System V defines rmdir to return EEXIST,
4267 * not ENOTEMPTY, if the directory is not
4268 * empty. A System V NFS server needs to map
4269 * NFS4ERR_EXIST to NFS4ERR_NOTEMPTY to
4270 * transmit over the wire.
4272 if ((error = VOP_RMDIR(dvp, name, rootdir, cs->cr,
4273 NULL, 0)) == EEXIST)
4274 error = ENOTEMPTY;
4276 } else {
4277 if ((error = VOP_REMOVE(dvp, name, cs->cr, NULL, 0)) == 0 &&
4278 fp != NULL) {
4279 struct vattr va;
4280 vnode_t *tvp;
4282 rfs4_dbe_lock(fp->rf_dbe);
4283 tvp = fp->rf_vp;
4284 if (tvp)
4285 VN_HOLD(tvp);
4286 rfs4_dbe_unlock(fp->rf_dbe);
4288 if (tvp) {
4290 * This is va_seq safe because we are not
4291 * manipulating dvp.
4293 va.va_mask = AT_NLINK;
4294 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4295 va.va_nlink == 0) {
4296 /* Remove state on file remove */
4297 if (in_crit) {
4298 nbl_end_crit(vp);
4299 in_crit = 0;
4301 rfs4_close_all_state(fp);
4303 VN_RELE(tvp);
4308 if (in_crit)
4309 nbl_end_crit(vp);
4310 VN_RELE(vp);
4312 if (fp) {
4313 rfs4_clear_dont_grant(fp);
4314 rfs4_file_rele(fp);
4316 if (nm != name)
4317 kmem_free(name, MAXPATHLEN + 1);
4318 kmem_free(nm, len);
4320 if (error) {
4321 *cs->statusp = resp->status = puterrno4(error);
4322 goto out;
4326 * Get the initial "after" sequence number, if it fails, set to zero
4328 idva.va_mask = AT_SEQ;
4329 if (VOP_GETATTR(dvp, &idva, 0, cs->cr, NULL))
4330 idva.va_seq = 0;
4333 * Force modified data and metadata out to stable storage.
4335 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
4338 * Get "after" change value, if it fails, simply return the
4339 * before value.
4341 adva.va_mask = AT_CTIME|AT_SEQ;
4342 if (VOP_GETATTR(dvp, &adva, 0, cs->cr, NULL)) {
4343 adva.va_ctime = bdva.va_ctime;
4344 adva.va_seq = 0;
4347 NFS4_SET_FATTR4_CHANGE(resp->cinfo.after, adva.va_ctime)
4350 * The cinfo.atomic = TRUE only if we have
4351 * non-zero va_seq's, and it has incremented by exactly one
4352 * during the VOP_REMOVE/RMDIR and it didn't change during
4353 * the VOP_FSYNC.
4355 if (bdva.va_seq && idva.va_seq && adva.va_seq &&
4356 idva.va_seq == (bdva.va_seq + 1) && idva.va_seq == adva.va_seq)
4357 resp->cinfo.atomic = TRUE;
4358 else
4359 resp->cinfo.atomic = FALSE;
4361 *cs->statusp = resp->status = NFS4_OK;
4363 out:
4364 DTRACE_NFSV4_2(op__remove__done, struct compound_state *, cs,
4365 REMOVE4res *, resp);
4369 * rename: args: SAVED_FH: from directory, CURRENT_FH: target directory,
4370 * oldname and newname.
4371 * res: status. If success - CURRENT_FH unchanged, return change_info
4372 * for both from and target directories.
4374 /* ARGSUSED */
4375 static void
4376 rfs4_op_rename(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4377 struct compound_state *cs)
4379 RENAME4args *args = &argop->nfs_argop4_u.oprename;
4380 RENAME4res *resp = &resop->nfs_resop4_u.oprename;
4381 int error;
4382 vnode_t *odvp;
4383 vnode_t *ndvp;
4384 vnode_t *srcvp, *targvp;
4385 struct vattr obdva, oidva, oadva;
4386 struct vattr nbdva, nidva, nadva;
4387 char *onm, *nnm;
4388 uint_t olen, nlen;
4389 rfs4_file_t *fp, *sfp;
4390 int in_crit_src, in_crit_targ;
4391 int fp_rele_grant_hold, sfp_rele_grant_hold;
4392 bslabel_t *clabel;
4393 struct sockaddr *ca;
4394 char *converted_onm = NULL;
4395 char *converted_nnm = NULL;
4397 DTRACE_NFSV4_2(op__rename__start, struct compound_state *, cs,
4398 RENAME4args *, args);
4400 fp = sfp = NULL;
4401 srcvp = targvp = NULL;
4402 in_crit_src = in_crit_targ = 0;
4403 fp_rele_grant_hold = sfp_rele_grant_hold = 0;
4405 /* CURRENT_FH: target directory */
4406 ndvp = cs->vp;
4407 if (ndvp == NULL) {
4408 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4409 goto out;
4412 /* SAVED_FH: from directory */
4413 odvp = cs->saved_vp;
4414 if (odvp == NULL) {
4415 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4416 goto out;
4419 if (cs->access == CS_ACCESS_DENIED) {
4420 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4421 goto out;
4425 * If there is an unshared filesystem mounted on this vnode,
4426 * do not allow to rename objects in this directory.
4428 if (vn_ismntpt(odvp)) {
4429 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4430 goto out;
4434 * If there is an unshared filesystem mounted on this vnode,
4435 * do not allow to rename to this directory.
4437 if (vn_ismntpt(ndvp)) {
4438 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4439 goto out;
4442 if (odvp->v_type != VDIR || ndvp->v_type != VDIR) {
4443 *cs->statusp = resp->status = NFS4ERR_NOTDIR;
4444 goto out;
4447 if (cs->saved_exi != cs->exi) {
4448 *cs->statusp = resp->status = NFS4ERR_XDEV;
4449 goto out;
4452 if (!utf8_dir_verify(&args->oldname)) {
4453 *cs->statusp = resp->status = NFS4ERR_INVAL;
4454 goto out;
4457 if (!utf8_dir_verify(&args->newname)) {
4458 *cs->statusp = resp->status = NFS4ERR_INVAL;
4459 goto out;
4462 onm = utf8_to_fn(&args->oldname, &olen, NULL);
4463 if (onm == NULL) {
4464 *cs->statusp = resp->status = NFS4ERR_INVAL;
4465 goto out;
4467 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
4468 nlen = MAXPATHLEN + 1;
4469 converted_onm = nfscmd_convname(ca, cs->exi, onm, NFSCMD_CONV_INBOUND,
4470 nlen);
4472 if (converted_onm == NULL) {
4473 *cs->statusp = resp->status = NFS4ERR_INVAL;
4474 kmem_free(onm, olen);
4475 goto out;
4478 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4479 if (nnm == NULL) {
4480 *cs->statusp = resp->status = NFS4ERR_INVAL;
4481 if (onm != converted_onm)
4482 kmem_free(converted_onm, MAXPATHLEN + 1);
4483 kmem_free(onm, olen);
4484 goto out;
4486 converted_nnm = nfscmd_convname(ca, cs->exi, nnm, NFSCMD_CONV_INBOUND,
4487 MAXPATHLEN + 1);
4489 if (converted_nnm == NULL) {
4490 *cs->statusp = resp->status = NFS4ERR_INVAL;
4491 kmem_free(nnm, nlen);
4492 nnm = NULL;
4493 if (onm != converted_onm)
4494 kmem_free(converted_onm, MAXPATHLEN + 1);
4495 kmem_free(onm, olen);
4496 goto out;
4500 if (olen > MAXNAMELEN || nlen > MAXNAMELEN) {
4501 *cs->statusp = resp->status = NFS4ERR_NAMETOOLONG;
4502 kmem_free(onm, olen);
4503 kmem_free(nnm, nlen);
4504 goto out;
4508 if (rdonly4(cs->exi, cs->vp, req)) {
4509 *cs->statusp = resp->status = NFS4ERR_ROFS;
4510 if (onm != converted_onm)
4511 kmem_free(converted_onm, MAXPATHLEN + 1);
4512 kmem_free(onm, olen);
4513 if (nnm != converted_nnm)
4514 kmem_free(converted_nnm, MAXPATHLEN + 1);
4515 kmem_free(nnm, nlen);
4516 goto out;
4519 /* check label of the target dir */
4520 if (is_system_labeled()) {
4521 ASSERT(req->rq_label != NULL);
4522 clabel = req->rq_label;
4523 DTRACE_PROBE2(tx__rfs4__log__info__oprename__clabel, char *,
4524 "got client label from request(1)",
4525 struct svc_req *, req);
4526 if (!blequal(&l_admin_low->tsl_label, clabel)) {
4527 if (!do_rfs_label_check(clabel, ndvp,
4528 EQUALITY_CHECK, cs->exi)) {
4529 *cs->statusp = resp->status = NFS4ERR_ACCESS;
4530 goto err_out;
4536 * Is the source a file and have a delegation?
4537 * We don't need to acquire va_seq before these lookups, if
4538 * it causes an update, cinfo.before will not match, which will
4539 * trigger a cache flush even if atomic is TRUE.
4541 if (sfp = rfs4_lookup_and_findfile(odvp, converted_onm, &srcvp,
4542 &error, cs->cr)) {
4543 if (rfs4_check_delegated_byfp(FWRITE, sfp, TRUE, TRUE, TRUE,
4544 NULL)) {
4545 *cs->statusp = resp->status = NFS4ERR_DELAY;
4546 goto err_out;
4550 if (srcvp == NULL) {
4551 *cs->statusp = resp->status = puterrno4(error);
4552 if (onm != converted_onm)
4553 kmem_free(converted_onm, MAXPATHLEN + 1);
4554 kmem_free(onm, olen);
4555 if (nnm != converted_nnm)
4556 kmem_free(converted_nnm, MAXPATHLEN + 1);
4557 kmem_free(nnm, nlen);
4558 goto out;
4561 sfp_rele_grant_hold = 1;
4563 /* Does the destination exist and a file and have a delegation? */
4564 if (fp = rfs4_lookup_and_findfile(ndvp, converted_nnm, &targvp,
4565 NULL, cs->cr)) {
4566 if (rfs4_check_delegated_byfp(FWRITE, fp, TRUE, TRUE, TRUE,
4567 NULL)) {
4568 *cs->statusp = resp->status = NFS4ERR_DELAY;
4569 goto err_out;
4572 fp_rele_grant_hold = 1;
4575 /* Check for NBMAND lock on both source and target */
4576 if (nbl_need_check(srcvp)) {
4577 nbl_start_crit(srcvp, RW_READER);
4578 in_crit_src = 1;
4579 if (nbl_conflict(srcvp, NBL_RENAME, 0, 0, 0, NULL)) {
4580 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4581 goto err_out;
4585 if (targvp && nbl_need_check(targvp)) {
4586 nbl_start_crit(targvp, RW_READER);
4587 in_crit_targ = 1;
4588 if (nbl_conflict(targvp, NBL_REMOVE, 0, 0, 0, NULL)) {
4589 *cs->statusp = resp->status = NFS4ERR_FILE_OPEN;
4590 goto err_out;
4594 /* Get source "before" change value */
4595 obdva.va_mask = AT_CTIME|AT_SEQ;
4596 error = VOP_GETATTR(odvp, &obdva, 0, cs->cr, NULL);
4597 if (!error) {
4598 nbdva.va_mask = AT_CTIME|AT_SEQ;
4599 error = VOP_GETATTR(ndvp, &nbdva, 0, cs->cr, NULL);
4601 if (error) {
4602 *cs->statusp = resp->status = puterrno4(error);
4603 goto err_out;
4606 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.before, obdva.va_ctime)
4607 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.before, nbdva.va_ctime)
4609 if ((error = VOP_RENAME(odvp, converted_onm, ndvp, converted_nnm,
4610 cs->cr, NULL, 0)) == 0 && fp != NULL) {
4611 struct vattr va;
4612 vnode_t *tvp;
4614 rfs4_dbe_lock(fp->rf_dbe);
4615 tvp = fp->rf_vp;
4616 if (tvp)
4617 VN_HOLD(tvp);
4618 rfs4_dbe_unlock(fp->rf_dbe);
4620 if (tvp) {
4621 va.va_mask = AT_NLINK;
4622 if (!VOP_GETATTR(tvp, &va, 0, cs->cr, NULL) &&
4623 va.va_nlink == 0) {
4624 /* The file is gone and so should the state */
4625 if (in_crit_targ) {
4626 nbl_end_crit(targvp);
4627 in_crit_targ = 0;
4629 rfs4_close_all_state(fp);
4631 VN_RELE(tvp);
4634 if (error == 0)
4635 vn_renamepath(ndvp, srcvp, nnm, nlen - 1);
4637 if (in_crit_src)
4638 nbl_end_crit(srcvp);
4639 if (srcvp)
4640 VN_RELE(srcvp);
4641 if (in_crit_targ)
4642 nbl_end_crit(targvp);
4643 if (targvp)
4644 VN_RELE(targvp);
4646 if (sfp) {
4647 rfs4_clear_dont_grant(sfp);
4648 rfs4_file_rele(sfp);
4650 if (fp) {
4651 rfs4_clear_dont_grant(fp);
4652 rfs4_file_rele(fp);
4655 if (converted_onm != onm)
4656 kmem_free(converted_onm, MAXPATHLEN + 1);
4657 kmem_free(onm, olen);
4658 if (converted_nnm != nnm)
4659 kmem_free(converted_nnm, MAXPATHLEN + 1);
4660 kmem_free(nnm, nlen);
4663 * Get the initial "after" sequence number, if it fails, set to zero
4665 oidva.va_mask = AT_SEQ;
4666 if (VOP_GETATTR(odvp, &oidva, 0, cs->cr, NULL))
4667 oidva.va_seq = 0;
4669 nidva.va_mask = AT_SEQ;
4670 if (VOP_GETATTR(ndvp, &nidva, 0, cs->cr, NULL))
4671 nidva.va_seq = 0;
4674 * Force modified data and metadata out to stable storage.
4676 (void) VOP_FSYNC(odvp, 0, cs->cr, NULL);
4677 (void) VOP_FSYNC(ndvp, 0, cs->cr, NULL);
4679 if (error) {
4680 *cs->statusp = resp->status = puterrno4(error);
4681 goto out;
4685 * Get "after" change values, if it fails, simply return the
4686 * before value.
4688 oadva.va_mask = AT_CTIME|AT_SEQ;
4689 if (VOP_GETATTR(odvp, &oadva, 0, cs->cr, NULL)) {
4690 oadva.va_ctime = obdva.va_ctime;
4691 oadva.va_seq = 0;
4694 nadva.va_mask = AT_CTIME|AT_SEQ;
4695 if (VOP_GETATTR(odvp, &nadva, 0, cs->cr, NULL)) {
4696 nadva.va_ctime = nbdva.va_ctime;
4697 nadva.va_seq = 0;
4700 NFS4_SET_FATTR4_CHANGE(resp->source_cinfo.after, oadva.va_ctime)
4701 NFS4_SET_FATTR4_CHANGE(resp->target_cinfo.after, nadva.va_ctime)
4704 * The cinfo.atomic = TRUE only if we have
4705 * non-zero va_seq's, and it has incremented by exactly one
4706 * during the VOP_RENAME and it didn't change during the VOP_FSYNC.
4708 if (obdva.va_seq && oidva.va_seq && oadva.va_seq &&
4709 oidva.va_seq == (obdva.va_seq + 1) && oidva.va_seq == oadva.va_seq)
4710 resp->source_cinfo.atomic = TRUE;
4711 else
4712 resp->source_cinfo.atomic = FALSE;
4714 if (nbdva.va_seq && nidva.va_seq && nadva.va_seq &&
4715 nidva.va_seq == (nbdva.va_seq + 1) && nidva.va_seq == nadva.va_seq)
4716 resp->target_cinfo.atomic = TRUE;
4717 else
4718 resp->target_cinfo.atomic = FALSE;
4720 #ifdef VOLATILE_FH_TEST
4722 extern void add_volrnm_fh(struct exportinfo *, vnode_t *);
4725 * Add the renamed file handle to the volatile rename list
4727 if (cs->exi->exi_export.ex_flags & EX_VOLRNM) {
4728 /* file handles may expire on rename */
4729 vnode_t *vp;
4731 nnm = utf8_to_fn(&args->newname, &nlen, NULL);
4733 * Already know that nnm will be a valid string
4735 error = VOP_LOOKUP(ndvp, nnm, &vp, NULL, 0, NULL, cs->cr,
4736 NULL, NULL, NULL);
4737 kmem_free(nnm, nlen);
4738 if (!error) {
4739 add_volrnm_fh(cs->exi, vp);
4740 VN_RELE(vp);
4744 #endif /* VOLATILE_FH_TEST */
4746 *cs->statusp = resp->status = NFS4_OK;
4747 out:
4748 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4749 RENAME4res *, resp);
4750 return;
4752 err_out:
4753 if (onm != converted_onm)
4754 kmem_free(converted_onm, MAXPATHLEN + 1);
4755 if (onm != NULL)
4756 kmem_free(onm, olen);
4757 if (nnm != converted_nnm)
4758 kmem_free(converted_nnm, MAXPATHLEN + 1);
4759 if (nnm != NULL)
4760 kmem_free(nnm, nlen);
4762 if (in_crit_src) nbl_end_crit(srcvp);
4763 if (in_crit_targ) nbl_end_crit(targvp);
4764 if (targvp) VN_RELE(targvp);
4765 if (srcvp) VN_RELE(srcvp);
4766 if (sfp) {
4767 if (sfp_rele_grant_hold) rfs4_clear_dont_grant(sfp);
4768 rfs4_file_rele(sfp);
4770 if (fp) {
4771 if (fp_rele_grant_hold) rfs4_clear_dont_grant(fp);
4772 rfs4_file_rele(fp);
4775 DTRACE_NFSV4_2(op__rename__done, struct compound_state *, cs,
4776 RENAME4res *, resp);
4779 /* ARGSUSED */
4780 static void
4781 rfs4_op_renew(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4782 struct compound_state *cs)
4784 RENEW4args *args = &argop->nfs_argop4_u.oprenew;
4785 RENEW4res *resp = &resop->nfs_resop4_u.oprenew;
4786 rfs4_client_t *cp;
4788 DTRACE_NFSV4_2(op__renew__start, struct compound_state *, cs,
4789 RENEW4args *, args);
4791 if ((cp = rfs4_findclient_by_id(args->clientid, FALSE)) == NULL) {
4792 *cs->statusp = resp->status =
4793 rfs4_check_clientid(&args->clientid, 0);
4794 goto out;
4797 if (rfs4_lease_expired(cp)) {
4798 rfs4_client_rele(cp);
4799 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
4800 goto out;
4803 rfs4_update_lease(cp);
4805 mutex_enter(cp->rc_cbinfo.cb_lock);
4806 if (cp->rc_cbinfo.cb_notified_of_cb_path_down == FALSE) {
4807 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
4808 *cs->statusp = resp->status = NFS4ERR_CB_PATH_DOWN;
4809 } else {
4810 *cs->statusp = resp->status = NFS4_OK;
4812 mutex_exit(cp->rc_cbinfo.cb_lock);
4814 rfs4_client_rele(cp);
4816 out:
4817 DTRACE_NFSV4_2(op__renew__done, struct compound_state *, cs,
4818 RENEW4res *, resp);
4821 /* ARGSUSED */
4822 static void
4823 rfs4_op_restorefh(nfs_argop4 *args, nfs_resop4 *resop, struct svc_req *req,
4824 struct compound_state *cs)
4826 RESTOREFH4res *resp = &resop->nfs_resop4_u.oprestorefh;
4828 DTRACE_NFSV4_1(op__restorefh__start, struct compound_state *, cs);
4830 /* No need to check cs->access - we are not accessing any object */
4831 if ((cs->saved_vp == NULL) || (cs->saved_fh.nfs_fh4_val == NULL)) {
4832 *cs->statusp = resp->status = NFS4ERR_RESTOREFH;
4833 goto out;
4835 if (cs->vp != NULL) {
4836 VN_RELE(cs->vp);
4838 cs->vp = cs->saved_vp;
4839 cs->saved_vp = NULL;
4840 cs->exi = cs->saved_exi;
4841 nfs_fh4_copy(&cs->saved_fh, &cs->fh);
4842 *cs->statusp = resp->status = NFS4_OK;
4843 cs->deleg = FALSE;
4845 out:
4846 DTRACE_NFSV4_2(op__restorefh__done, struct compound_state *, cs,
4847 RESTOREFH4res *, resp);
4850 /* ARGSUSED */
4851 static void
4852 rfs4_op_savefh(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
4853 struct compound_state *cs)
4855 SAVEFH4res *resp = &resop->nfs_resop4_u.opsavefh;
4857 DTRACE_NFSV4_1(op__savefh__start, struct compound_state *, cs);
4859 /* No need to check cs->access - we are not accessing any object */
4860 if (cs->vp == NULL) {
4861 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
4862 goto out;
4864 if (cs->saved_vp != NULL) {
4865 VN_RELE(cs->saved_vp);
4867 cs->saved_vp = cs->vp;
4868 VN_HOLD(cs->saved_vp);
4869 cs->saved_exi = cs->exi;
4871 * since SAVEFH is fairly rare, don't alloc space for its fh
4872 * unless necessary.
4874 if (cs->saved_fh.nfs_fh4_val == NULL) {
4875 cs->saved_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
4877 nfs_fh4_copy(&cs->fh, &cs->saved_fh);
4878 *cs->statusp = resp->status = NFS4_OK;
4880 out:
4881 DTRACE_NFSV4_2(op__savefh__done, struct compound_state *, cs,
4882 SAVEFH4res *, resp);
4886 * rfs4_verify_attr is called when nfsv4 Setattr failed, but we wish to
4887 * return the bitmap of attrs that were set successfully. It is also
4888 * called by Verify/Nverify to test the vattr/vfsstat attrs. It should
4889 * always be called only after rfs4_do_set_attrs().
4891 * Verify that the attributes are same as the expected ones. sargp->vap
4892 * and sargp->sbp contain the input attributes as translated from fattr4.
4894 * This function verifies only the attrs that correspond to a vattr or
4895 * vfsstat struct. That is because of the extra step needed to get the
4896 * corresponding system structs. Other attributes have already been set or
4897 * verified by do_rfs4_set_attrs.
4899 * Return 0 if all attrs match, -1 if some don't, error if error processing.
4901 static int
4902 rfs4_verify_attr(struct nfs4_svgetit_arg *sargp,
4903 bitmap4 *resp, struct nfs4_ntov_table *ntovp)
4905 int error, ret_error = 0;
4906 int i, k;
4907 uint_t sva_mask = sargp->vap->va_mask;
4908 uint_t vbit;
4909 union nfs4_attr_u *na;
4910 uint8_t *amap;
4911 bool_t getsb = ntovp->vfsstat;
4913 if (sva_mask != 0) {
4915 * Okay to overwrite sargp->vap because we verify based
4916 * on the incoming values.
4918 ret_error = VOP_GETATTR(sargp->cs->vp, sargp->vap, 0,
4919 sargp->cs->cr, NULL);
4920 if (ret_error) {
4921 if (resp == NULL)
4922 return (ret_error);
4924 * Must return bitmap of successful attrs
4926 sva_mask = 0; /* to prevent checking vap later */
4927 } else {
4929 * Some file systems clobber va_mask. it is probably
4930 * wrong of them to do so, nonethless we practice
4931 * defensive coding.
4932 * See bug id 4276830.
4934 sargp->vap->va_mask = sva_mask;
4938 if (getsb) {
4940 * Now get the superblock and loop on the bitmap, as there is
4941 * no simple way of translating from superblock to bitmap4.
4943 ret_error = VFS_STATVFS(sargp->cs->vp->v_vfsp, sargp->sbp);
4944 if (ret_error) {
4945 if (resp == NULL)
4946 goto errout;
4947 getsb = FALSE;
4952 * Now loop and verify each attribute which getattr returned
4953 * whether it's the same as the input.
4955 if (resp == NULL && !getsb && (sva_mask == 0))
4956 goto errout;
4958 na = ntovp->na;
4959 amap = ntovp->amap;
4960 k = 0;
4961 for (i = 0; i < ntovp->attrcnt; i++, na++, amap++) {
4962 k = *amap;
4963 ASSERT(nfs4_ntov_map[k].nval == k);
4964 vbit = nfs4_ntov_map[k].vbit;
4967 * If vattr attribute but VOP_GETATTR failed, or it's
4968 * superblock attribute but VFS_STATVFS failed, skip
4970 if (vbit) {
4971 if ((vbit & sva_mask) == 0)
4972 continue;
4973 } else if (!(getsb && nfs4_ntov_map[k].vfsstat)) {
4974 continue;
4976 error = (*nfs4_ntov_map[k].sv_getit)(NFS4ATTR_VERIT, sargp, na);
4977 if (resp != NULL) {
4978 if (error)
4979 ret_error = -1; /* not all match */
4980 else /* update response bitmap */
4981 *resp |= nfs4_ntov_map[k].fbit;
4982 continue;
4984 if (error) {
4985 ret_error = -1; /* not all match */
4986 break;
4989 errout:
4990 return (ret_error);
4994 * Decode the attribute to be set/verified. If the attr requires a sys op
4995 * (VOP_GETATTR, VFS_VFSSTAT), and the request is to verify, then don't
4996 * call the sv_getit function for it, because the sys op hasn't yet been done.
4997 * Return 0 for success, error code if failed.
4999 * Note: the decoded arg is not freed here but in nfs4_ntov_table_free.
5001 static int
5002 decode_fattr4_attr(nfs4_attr_cmd_t cmd, struct nfs4_svgetit_arg *sargp,
5003 int k, XDR *xdrp, bitmap4 *resp_bval, union nfs4_attr_u *nap)
5005 int error = 0;
5006 bool_t set_later;
5008 sargp->vap->va_mask |= nfs4_ntov_map[k].vbit;
5010 if ((*nfs4_ntov_map[k].xfunc)(xdrp, nap)) {
5011 set_later = nfs4_ntov_map[k].vbit || nfs4_ntov_map[k].vfsstat;
5013 * don't verify yet if a vattr or sb dependent attr,
5014 * because we don't have their sys values yet.
5015 * Will be done later.
5017 if (! (set_later && (cmd == NFS4ATTR_VERIT))) {
5019 * ACLs are a special case, since setting the MODE
5020 * conflicts with setting the ACL. We delay setting
5021 * the ACL until all other attributes have been set.
5022 * The ACL gets set in do_rfs4_op_setattr().
5024 if (nfs4_ntov_map[k].fbit != FATTR4_ACL_MASK) {
5025 error = (*nfs4_ntov_map[k].sv_getit)(cmd,
5026 sargp, nap);
5027 if (error) {
5028 xdr_free(nfs4_ntov_map[k].xfunc,
5029 (caddr_t)nap);
5033 } else {
5034 #ifdef DEBUG
5035 cmn_err(CE_NOTE, "decode_fattr4_attr: error "
5036 "decoding attribute %d\n", k);
5037 #endif
5038 error = EINVAL;
5040 if (!error && resp_bval && !set_later) {
5041 *resp_bval |= nfs4_ntov_map[k].fbit;
5044 return (error);
5048 * Set vattr based on incoming fattr4 attrs - used by setattr.
5049 * Set response mask. Ignore any values that are not writable vattr attrs.
5051 static nfsstat4
5052 do_rfs4_set_attrs(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5053 struct nfs4_svgetit_arg *sargp, struct nfs4_ntov_table *ntovp,
5054 nfs4_attr_cmd_t cmd)
5056 int error = 0;
5057 int i;
5058 char *attrs = fattrp->attrlist4;
5059 uint32_t attrslen = fattrp->attrlist4_len;
5060 XDR xdr;
5061 nfsstat4 status = NFS4_OK;
5062 vnode_t *vp = cs->vp;
5063 union nfs4_attr_u *na;
5064 uint8_t *amap;
5066 #ifndef lint
5068 * Make sure that maximum attribute number can be expressed as an
5069 * 8 bit quantity.
5071 ASSERT(NFS4_MAXNUM_ATTRS <= (UINT8_MAX + 1));
5072 #endif
5074 if (vp == NULL) {
5075 if (resp)
5076 *resp = 0;
5077 return (NFS4ERR_NOFILEHANDLE);
5079 if (cs->access == CS_ACCESS_DENIED) {
5080 if (resp)
5081 *resp = 0;
5082 return (NFS4ERR_ACCESS);
5085 sargp->op = cmd;
5086 sargp->cs = cs;
5087 sargp->flag = 0; /* may be set later */
5088 sargp->vap->va_mask = 0;
5089 sargp->rdattr_error = NFS4_OK;
5090 sargp->rdattr_error_req = FALSE;
5091 /* sargp->sbp is set by the caller */
5093 xdrmem_create(&xdr, attrs, attrslen, XDR_DECODE);
5095 na = ntovp->na;
5096 amap = ntovp->amap;
5099 * The following loop iterates on the nfs4_ntov_map checking
5100 * if the fbit is set in the requested bitmap.
5101 * If set then we process the arguments using the
5102 * rfs4_fattr4 conversion functions to populate the setattr
5103 * vattr and va_mask. Any settable attrs that are not using vattr
5104 * will be set in this loop.
5106 for (i = 0; i < nfs4_ntov_map_size; i++) {
5107 if (!(fattrp->attrmask & nfs4_ntov_map[i].fbit)) {
5108 continue;
5111 * If setattr, must be a writable attr.
5112 * If verify/nverify, must be a readable attr.
5114 if ((error = (*nfs4_ntov_map[i].sv_getit)(
5115 NFS4ATTR_SUPPORTED, sargp, NULL)) != 0) {
5117 * Client tries to set/verify an
5118 * unsupported attribute, tries to set
5119 * a read only attr or verify a write
5120 * only one - error!
5122 break;
5125 * Decode the attribute to set/verify
5127 error = decode_fattr4_attr(cmd, sargp, nfs4_ntov_map[i].nval,
5128 &xdr, resp ? resp : NULL, na);
5129 if (error)
5130 break;
5131 *amap++ = (uint8_t)nfs4_ntov_map[i].nval;
5132 na++;
5133 (ntovp->attrcnt)++;
5134 if (nfs4_ntov_map[i].vfsstat)
5135 ntovp->vfsstat = TRUE;
5138 if (error != 0)
5139 status = (error == ENOTSUP ? NFS4ERR_ATTRNOTSUPP :
5140 puterrno4(error));
5141 /* xdrmem_destroy(&xdrs); */ /* NO-OP */
5142 return (status);
5145 static nfsstat4
5146 do_rfs4_op_setattr(bitmap4 *resp, fattr4 *fattrp, struct compound_state *cs,
5147 stateid4 *stateid)
5149 int error = 0;
5150 struct nfs4_svgetit_arg sarg;
5151 bool_t trunc;
5153 nfsstat4 status = NFS4_OK;
5154 cred_t *cr = cs->cr;
5155 vnode_t *vp = cs->vp;
5156 struct nfs4_ntov_table ntov;
5157 struct statvfs64 sb;
5158 struct vattr bva;
5159 struct flock64 bf;
5160 int in_crit = 0;
5161 uint_t saved_mask = 0;
5162 caller_context_t ct;
5164 *resp = 0;
5165 sarg.sbp = &sb;
5166 sarg.is_referral = B_FALSE;
5167 nfs4_ntov_table_init(&ntov);
5168 status = do_rfs4_set_attrs(resp, fattrp, cs, &sarg, &ntov,
5169 NFS4ATTR_SETIT);
5170 if (status != NFS4_OK) {
5172 * failed set attrs
5174 goto done;
5176 if ((sarg.vap->va_mask == 0) &&
5177 (! (fattrp->attrmask & FATTR4_ACL_MASK))) {
5179 * no further work to be done
5181 goto done;
5185 * If we got a request to set the ACL and the MODE, only
5186 * allow changing VSUID, VSGID, and VSVTX. Attempting
5187 * to change any other bits, along with setting an ACL,
5188 * gives NFS4ERR_INVAL.
5190 if ((fattrp->attrmask & FATTR4_ACL_MASK) &&
5191 (fattrp->attrmask & FATTR4_MODE_MASK)) {
5192 vattr_t va;
5194 va.va_mask = AT_MODE;
5195 error = VOP_GETATTR(vp, &va, 0, cs->cr, NULL);
5196 if (error) {
5197 status = puterrno4(error);
5198 goto done;
5200 if ((sarg.vap->va_mode ^ va.va_mode) &
5201 ~(VSUID | VSGID | VSVTX)) {
5202 status = NFS4ERR_INVAL;
5203 goto done;
5207 /* Check stateid only if size has been set */
5208 if (sarg.vap->va_mask & AT_SIZE) {
5209 trunc = (sarg.vap->va_size == 0);
5210 status = rfs4_check_stateid(FWRITE, cs->vp, stateid,
5211 trunc, &cs->deleg, sarg.vap->va_mask & AT_SIZE, &ct);
5212 if (status != NFS4_OK)
5213 goto done;
5214 } else {
5215 ct.cc_sysid = 0;
5216 ct.cc_pid = 0;
5217 ct.cc_caller_id = nfs4_srv_caller_id;
5218 ct.cc_flags = CC_DONTBLOCK;
5221 /* XXX start of possible race with delegations */
5224 * We need to specially handle size changes because it is
5225 * possible for the client to create a file with read-only
5226 * modes, but with the file opened for writing. If the client
5227 * then tries to set the file size, e.g. ftruncate(3C),
5228 * fcntl(F_FREESP), the normal access checking done in
5229 * VOP_SETATTR would prevent the client from doing it even though
5230 * it should be allowed to do so. To get around this, we do the
5231 * access checking for ourselves and use VOP_SPACE which doesn't
5232 * do the access checking.
5233 * Also the client should not be allowed to change the file
5234 * size if there is a conflicting non-blocking mandatory lock in
5235 * the region of the change.
5237 if (vp->v_type == VREG && (sarg.vap->va_mask & AT_SIZE)) {
5238 u_offset_t offset;
5239 ssize_t length;
5242 * ufs_setattr clears AT_SIZE from vap->va_mask, but
5243 * before returning, sarg.vap->va_mask is used to
5244 * generate the setattr reply bitmap. We also clear
5245 * AT_SIZE below before calling VOP_SPACE. For both
5246 * of these cases, the va_mask needs to be saved here
5247 * and restored after calling VOP_SETATTR.
5249 saved_mask = sarg.vap->va_mask;
5252 * Check any possible conflict due to NBMAND locks.
5253 * Get into critical region before VOP_GETATTR, so the
5254 * size attribute is valid when checking conflicts.
5256 if (nbl_need_check(vp)) {
5257 nbl_start_crit(vp, RW_READER);
5258 in_crit = 1;
5261 bva.va_mask = AT_UID|AT_SIZE;
5262 if (error = VOP_GETATTR(vp, &bva, 0, cr, &ct)) {
5263 status = puterrno4(error);
5264 goto done;
5267 if (in_crit) {
5268 if (sarg.vap->va_size < bva.va_size) {
5269 offset = sarg.vap->va_size;
5270 length = bva.va_size - sarg.vap->va_size;
5271 } else {
5272 offset = bva.va_size;
5273 length = sarg.vap->va_size - bva.va_size;
5275 if (nbl_conflict(vp, NBL_WRITE, offset, length, 0,
5276 &ct)) {
5277 status = NFS4ERR_LOCKED;
5278 goto done;
5282 if (crgetuid(cr) == bva.va_uid) {
5283 sarg.vap->va_mask &= ~AT_SIZE;
5284 bf.l_type = F_WRLCK;
5285 bf.l_whence = 0;
5286 bf.l_start = (off64_t)sarg.vap->va_size;
5287 bf.l_len = 0;
5288 bf.l_sysid = 0;
5289 bf.l_pid = 0;
5290 error = VOP_SPACE(vp, F_FREESP, &bf, FWRITE,
5291 (offset_t)sarg.vap->va_size, cr, &ct);
5295 if (!error && sarg.vap->va_mask != 0)
5296 error = VOP_SETATTR(vp, sarg.vap, sarg.flag, cr, &ct);
5298 /* restore va_mask -- ufs_setattr clears AT_SIZE */
5299 if (saved_mask & AT_SIZE)
5300 sarg.vap->va_mask |= AT_SIZE;
5303 * If an ACL was being set, it has been delayed until now,
5304 * in order to set the mode (via the VOP_SETATTR() above) first.
5306 if ((! error) && (fattrp->attrmask & FATTR4_ACL_MASK)) {
5307 int i;
5309 for (i = 0; i < NFS4_MAXNUM_ATTRS; i++)
5310 if (ntov.amap[i] == FATTR4_ACL)
5311 break;
5312 if (i < NFS4_MAXNUM_ATTRS) {
5313 error = (*nfs4_ntov_map[FATTR4_ACL].sv_getit)(
5314 NFS4ATTR_SETIT, &sarg, &ntov.na[i]);
5315 if (error == 0) {
5316 *resp |= FATTR4_ACL_MASK;
5317 } else if (error == ENOTSUP) {
5318 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5319 status = NFS4ERR_ATTRNOTSUPP;
5320 goto done;
5322 } else {
5323 NFS4_DEBUG(rfs4_debug,
5324 (CE_NOTE, "do_rfs4_op_setattr: "
5325 "unable to find ACL in fattr4"));
5326 error = EINVAL;
5330 if (error) {
5331 /* check if a monitor detected a delegation conflict */
5332 if (error == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
5333 status = NFS4ERR_DELAY;
5334 else
5335 status = puterrno4(error);
5338 * Set the response bitmap when setattr failed.
5339 * If VOP_SETATTR partially succeeded, test by doing a
5340 * VOP_GETATTR on the object and comparing the data
5341 * to the setattr arguments.
5343 (void) rfs4_verify_attr(&sarg, resp, &ntov);
5344 } else {
5346 * Force modified metadata out to stable storage.
5348 (void) VOP_FSYNC(vp, FNODSYNC, cr, &ct);
5350 * Set response bitmap
5352 nfs4_vmask_to_nmask_set(sarg.vap->va_mask, resp);
5355 /* Return early and already have a NFSv4 error */
5356 done:
5358 * Except for nfs4_vmask_to_nmask_set(), vattr --> fattr
5359 * conversion sets both readable and writeable NFS4 attrs
5360 * for AT_MTIME and AT_ATIME. The line below masks out
5361 * unrequested attrs from the setattr result bitmap. This
5362 * is placed after the done: label to catch the ATTRNOTSUP
5363 * case.
5365 *resp &= fattrp->attrmask;
5367 if (in_crit)
5368 nbl_end_crit(vp);
5370 nfs4_ntov_table_free(&ntov, &sarg);
5372 return (status);
5375 /* ARGSUSED */
5376 static void
5377 rfs4_op_setattr(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5378 struct compound_state *cs)
5380 SETATTR4args *args = &argop->nfs_argop4_u.opsetattr;
5381 SETATTR4res *resp = &resop->nfs_resop4_u.opsetattr;
5382 bslabel_t *clabel;
5384 DTRACE_NFSV4_2(op__setattr__start, struct compound_state *, cs,
5385 SETATTR4args *, args);
5387 if (cs->vp == NULL) {
5388 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5389 goto out;
5393 * If there is an unshared filesystem mounted on this vnode,
5394 * do not allow to setattr on this vnode.
5396 if (vn_ismntpt(cs->vp)) {
5397 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5398 goto out;
5401 resp->attrsset = 0;
5403 if (rdonly4(cs->exi, cs->vp, req)) {
5404 *cs->statusp = resp->status = NFS4ERR_ROFS;
5405 goto out;
5408 /* check label before setting attributes */
5409 if (is_system_labeled()) {
5410 ASSERT(req->rq_label != NULL);
5411 clabel = req->rq_label;
5412 DTRACE_PROBE2(tx__rfs4__log__info__opsetattr__clabel, char *,
5413 "got client label from request(1)",
5414 struct svc_req *, req);
5415 if (!blequal(&l_admin_low->tsl_label, clabel)) {
5416 if (!do_rfs_label_check(clabel, cs->vp,
5417 EQUALITY_CHECK, cs->exi)) {
5418 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5419 goto out;
5424 *cs->statusp = resp->status =
5425 do_rfs4_op_setattr(&resp->attrsset, &args->obj_attributes, cs,
5426 &args->stateid);
5428 out:
5429 DTRACE_NFSV4_2(op__setattr__done, struct compound_state *, cs,
5430 SETATTR4res *, resp);
5433 /* ARGSUSED */
5434 static void
5435 rfs4_op_verify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5436 struct compound_state *cs)
5439 * verify and nverify are exactly the same, except that nverify
5440 * succeeds when some argument changed, and verify succeeds when
5441 * when none changed.
5444 VERIFY4args *args = &argop->nfs_argop4_u.opverify;
5445 VERIFY4res *resp = &resop->nfs_resop4_u.opverify;
5447 int error;
5448 struct nfs4_svgetit_arg sarg;
5449 struct statvfs64 sb;
5450 struct nfs4_ntov_table ntov;
5452 DTRACE_NFSV4_2(op__verify__start, struct compound_state *, cs,
5453 VERIFY4args *, args);
5455 if (cs->vp == NULL) {
5456 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5457 goto out;
5460 sarg.sbp = &sb;
5461 sarg.is_referral = B_FALSE;
5462 nfs4_ntov_table_init(&ntov);
5463 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5464 &sarg, &ntov, NFS4ATTR_VERIT);
5465 if (resp->status != NFS4_OK) {
5467 * do_rfs4_set_attrs will try to verify systemwide attrs,
5468 * so could return -1 for "no match".
5470 if (resp->status == -1)
5471 resp->status = NFS4ERR_NOT_SAME;
5472 goto done;
5474 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5475 switch (error) {
5476 case 0:
5477 resp->status = NFS4_OK;
5478 break;
5479 case -1:
5480 resp->status = NFS4ERR_NOT_SAME;
5481 break;
5482 default:
5483 resp->status = puterrno4(error);
5484 break;
5486 done:
5487 *cs->statusp = resp->status;
5488 nfs4_ntov_table_free(&ntov, &sarg);
5489 out:
5490 DTRACE_NFSV4_2(op__verify__done, struct compound_state *, cs,
5491 VERIFY4res *, resp);
5494 /* ARGSUSED */
5495 static void
5496 rfs4_op_nverify(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5497 struct compound_state *cs)
5500 * verify and nverify are exactly the same, except that nverify
5501 * succeeds when some argument changed, and verify succeeds when
5502 * when none changed.
5505 NVERIFY4args *args = &argop->nfs_argop4_u.opnverify;
5506 NVERIFY4res *resp = &resop->nfs_resop4_u.opnverify;
5508 int error;
5509 struct nfs4_svgetit_arg sarg;
5510 struct statvfs64 sb;
5511 struct nfs4_ntov_table ntov;
5513 DTRACE_NFSV4_2(op__nverify__start, struct compound_state *, cs,
5514 NVERIFY4args *, args);
5516 if (cs->vp == NULL) {
5517 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5518 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5519 NVERIFY4res *, resp);
5520 return;
5522 sarg.sbp = &sb;
5523 sarg.is_referral = B_FALSE;
5524 nfs4_ntov_table_init(&ntov);
5525 resp->status = do_rfs4_set_attrs(NULL, &args->obj_attributes, cs,
5526 &sarg, &ntov, NFS4ATTR_VERIT);
5527 if (resp->status != NFS4_OK) {
5529 * do_rfs4_set_attrs will try to verify systemwide attrs,
5530 * so could return -1 for "no match".
5532 if (resp->status == -1)
5533 resp->status = NFS4_OK;
5534 goto done;
5536 error = rfs4_verify_attr(&sarg, NULL, &ntov);
5537 switch (error) {
5538 case 0:
5539 resp->status = NFS4ERR_SAME;
5540 break;
5541 case -1:
5542 resp->status = NFS4_OK;
5543 break;
5544 default:
5545 resp->status = puterrno4(error);
5546 break;
5548 done:
5549 *cs->statusp = resp->status;
5550 nfs4_ntov_table_free(&ntov, &sarg);
5552 DTRACE_NFSV4_2(op__nverify__done, struct compound_state *, cs,
5553 NVERIFY4res *, resp);
5557 * XXX - This should live in an NFS header file.
5559 #define MAX_IOVECS 12
5561 /* ARGSUSED */
5562 static void
5563 rfs4_op_write(nfs_argop4 *argop, nfs_resop4 *resop, struct svc_req *req,
5564 struct compound_state *cs)
5566 WRITE4args *args = &argop->nfs_argop4_u.opwrite;
5567 WRITE4res *resp = &resop->nfs_resop4_u.opwrite;
5568 int error;
5569 vnode_t *vp;
5570 struct vattr bva;
5571 u_offset_t rlimit;
5572 struct uio uio;
5573 struct iovec iov[MAX_IOVECS];
5574 struct iovec *iovp;
5575 int iovcnt;
5576 int ioflag;
5577 cred_t *savecred, *cr;
5578 bool_t *deleg = &cs->deleg;
5579 nfsstat4 stat;
5580 int in_crit = 0;
5581 caller_context_t ct;
5583 DTRACE_NFSV4_2(op__write__start, struct compound_state *, cs,
5584 WRITE4args *, args);
5586 vp = cs->vp;
5587 if (vp == NULL) {
5588 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
5589 goto out;
5591 if (cs->access == CS_ACCESS_DENIED) {
5592 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5593 goto out;
5596 cr = cs->cr;
5598 if ((stat = rfs4_check_stateid(FWRITE, vp, &args->stateid, FALSE,
5599 deleg, TRUE, &ct)) != NFS4_OK) {
5600 *cs->statusp = resp->status = stat;
5601 goto out;
5605 * We have to enter the critical region before calling VOP_RWLOCK
5606 * to avoid a deadlock with ufs.
5608 if (nbl_need_check(vp)) {
5609 nbl_start_crit(vp, RW_READER);
5610 in_crit = 1;
5611 if (nbl_conflict(vp, NBL_WRITE,
5612 args->offset, args->data_len, 0, &ct)) {
5613 *cs->statusp = resp->status = NFS4ERR_LOCKED;
5614 goto out;
5618 bva.va_mask = AT_MODE | AT_UID;
5619 error = VOP_GETATTR(vp, &bva, 0, cr, &ct);
5622 * If we can't get the attributes, then we can't do the
5623 * right access checking. So, we'll fail the request.
5625 if (error) {
5626 *cs->statusp = resp->status = puterrno4(error);
5627 goto out;
5630 if (rdonly4(cs->exi, cs->vp, req)) {
5631 *cs->statusp = resp->status = NFS4ERR_ROFS;
5632 goto out;
5635 if (vp->v_type != VREG) {
5636 *cs->statusp = resp->status =
5637 ((vp->v_type == VDIR) ? NFS4ERR_ISDIR : NFS4ERR_INVAL);
5638 goto out;
5641 if (crgetuid(cr) != bva.va_uid &&
5642 (error = VOP_ACCESS(vp, VWRITE, 0, cr, &ct))) {
5643 *cs->statusp = resp->status = puterrno4(error);
5644 goto out;
5647 if (MANDLOCK(vp, bva.va_mode)) {
5648 *cs->statusp = resp->status = NFS4ERR_ACCESS;
5649 goto out;
5652 if (args->data_len == 0) {
5653 *cs->statusp = resp->status = NFS4_OK;
5654 resp->count = 0;
5655 resp->committed = args->stable;
5656 resp->writeverf = Write4verf;
5657 goto out;
5660 if (args->mblk != NULL) {
5661 mblk_t *m;
5662 uint_t bytes, round_len;
5664 iovcnt = 0;
5665 bytes = 0;
5666 round_len = roundup(args->data_len, BYTES_PER_XDR_UNIT);
5667 for (m = args->mblk;
5668 m != NULL && bytes < round_len;
5669 m = m->b_cont) {
5670 iovcnt++;
5671 bytes += MBLKL(m);
5673 #ifdef DEBUG
5674 /* should have ended on an mblk boundary */
5675 if (bytes != round_len) {
5676 printf("bytes=0x%x, round_len=0x%x, req len=0x%x\n",
5677 bytes, round_len, args->data_len);
5678 printf("args=%p, args->mblk=%p, m=%p", (void *)args,
5679 (void *)args->mblk, (void *)m);
5680 ASSERT(bytes == round_len);
5682 #endif
5683 if (iovcnt <= MAX_IOVECS) {
5684 iovp = iov;
5685 } else {
5686 iovp = kmem_alloc(sizeof (*iovp) * iovcnt, KM_SLEEP);
5688 mblk_to_iov(args->mblk, iovcnt, iovp);
5689 } else if (args->rlist != NULL) {
5690 iovcnt = 1;
5691 iovp = iov;
5692 iovp->iov_base = (char *)((args->rlist)->u.c_daddr3);
5693 iovp->iov_len = args->data_len;
5694 } else {
5695 iovcnt = 1;
5696 iovp = iov;
5697 iovp->iov_base = args->data_val;
5698 iovp->iov_len = args->data_len;
5701 uio.uio_iov = iovp;
5702 uio.uio_iovcnt = iovcnt;
5704 uio.uio_segflg = UIO_SYSSPACE;
5705 uio.uio_extflg = UIO_COPY_DEFAULT;
5706 uio.uio_loffset = args->offset;
5707 uio.uio_resid = args->data_len;
5708 uio.uio_llimit = curproc->p_fsz_ctl;
5709 rlimit = uio.uio_llimit - args->offset;
5710 if (rlimit < (u_offset_t)uio.uio_resid)
5711 uio.uio_resid = (int)rlimit;
5713 if (args->stable == UNSTABLE4)
5714 ioflag = 0;
5715 else if (args->stable == FILE_SYNC4)
5716 ioflag = FSYNC;
5717 else if (args->stable == DATA_SYNC4)
5718 ioflag = FDSYNC;
5719 else {
5720 if (iovp != iov)
5721 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5722 *cs->statusp = resp->status = NFS4ERR_INVAL;
5723 goto out;
5727 * We're changing creds because VM may fault and we need
5728 * the cred of the current thread to be used if quota
5729 * checking is enabled.
5731 savecred = curthread->t_cred;
5732 curthread->t_cred = cr;
5733 error = do_io(FWRITE, vp, &uio, ioflag, cr, &ct);
5734 curthread->t_cred = savecred;
5736 if (iovp != iov)
5737 kmem_free(iovp, sizeof (*iovp) * iovcnt);
5739 if (error) {
5740 *cs->statusp = resp->status = puterrno4(error);
5741 goto out;
5744 *cs->statusp = resp->status = NFS4_OK;
5745 resp->count = args->data_len - uio.uio_resid;
5747 if (ioflag == 0)
5748 resp->committed = UNSTABLE4;
5749 else
5750 resp->committed = FILE_SYNC4;
5752 resp->writeverf = Write4verf;
5754 out:
5755 if (in_crit)
5756 nbl_end_crit(vp);
5758 DTRACE_NFSV4_2(op__write__done, struct compound_state *, cs,
5759 WRITE4res *, resp);
5763 /* XXX put in a header file */
5764 extern int sec_svc_getcred(struct svc_req *, cred_t *, caddr_t *, int *);
5766 void
5767 rfs4_compound(COMPOUND4args *args, COMPOUND4res *resp, struct exportinfo *exi,
5768 struct svc_req *req, cred_t *cr, int *rv)
5770 uint_t i;
5771 struct compound_state cs;
5773 if (rv != NULL)
5774 *rv = 0;
5775 rfs4_init_compound_state(&cs);
5777 * Form a reply tag by copying over the reqeuest tag.
5779 resp->tag.utf8string_val =
5780 kmem_alloc(args->tag.utf8string_len, KM_SLEEP);
5781 resp->tag.utf8string_len = args->tag.utf8string_len;
5782 bcopy(args->tag.utf8string_val, resp->tag.utf8string_val,
5783 resp->tag.utf8string_len);
5785 cs.statusp = &resp->status;
5786 cs.req = req;
5789 * XXX for now, minorversion should be zero
5791 if (args->minorversion != NFS4_MINORVERSION) {
5792 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5793 &cs, COMPOUND4args *, args);
5794 resp->array_len = 0;
5795 resp->array = NULL;
5796 resp->status = NFS4ERR_MINOR_VERS_MISMATCH;
5797 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5798 &cs, COMPOUND4res *, resp);
5799 return;
5802 ASSERT(exi == NULL);
5803 ASSERT(cr == NULL);
5805 cr = crget();
5806 ASSERT(cr != NULL);
5808 if (sec_svc_getcred(req, cr, &cs.principal, &cs.nfsflavor) == 0) {
5809 DTRACE_NFSV4_2(compound__start, struct compound_state *,
5810 &cs, COMPOUND4args *, args);
5811 crfree(cr);
5812 DTRACE_NFSV4_2(compound__done, struct compound_state *,
5813 &cs, COMPOUND4res *, resp);
5814 svcerr_badcred(req->rq_xprt);
5815 if (rv != NULL)
5816 *rv = 1;
5817 return;
5819 resp->array_len = args->array_len;
5820 resp->array = kmem_zalloc(args->array_len * sizeof (nfs_resop4),
5821 KM_SLEEP);
5823 cs.basecr = cr;
5825 DTRACE_NFSV4_2(compound__start, struct compound_state *, &cs,
5826 COMPOUND4args *, args);
5829 * For now, NFS4 compound processing must be protected by
5830 * exported_lock because it can access more than one exportinfo
5831 * per compound and share/unshare can now change multiple
5832 * exinfo structs. The NFS2/3 code only refs 1 exportinfo
5833 * per proc (excluding public exinfo), and exi_count design
5834 * is sufficient to protect concurrent execution of NFS2/3
5835 * ops along with unexport. This lock will be removed as
5836 * part of the NFSv4 phase 2 namespace redesign work.
5838 rw_enter(&exported_lock, RW_READER);
5841 * If this is the first compound we've seen, we need to start all
5842 * new instances' grace periods.
5844 if (rfs4_seen_first_compound == 0) {
5845 rfs4_grace_start_new();
5847 * This must be set after rfs4_grace_start_new(), otherwise
5848 * another thread could proceed past here before the former
5849 * is finished.
5851 rfs4_seen_first_compound = 1;
5854 for (i = 0; i < args->array_len && cs.cont; i++) {
5855 nfs_argop4 *argop;
5856 nfs_resop4 *resop;
5857 uint_t op;
5859 argop = &args->array[i];
5860 resop = &resp->array[i];
5861 resop->resop = argop->argop;
5862 op = (uint_t)resop->resop;
5864 if (op < rfsv4disp_cnt) {
5866 * Count the individual ops here; NULL and COMPOUND
5867 * are counted in common_dispatch()
5869 rfsproccnt_v4_ptr[op].value.ui64++;
5871 NFS4_DEBUG(rfs4_debug > 1,
5872 (CE_NOTE, "Executing %s", rfs4_op_string[op]));
5873 (*rfsv4disptab[op].dis_proc)(argop, resop, req, &cs);
5874 NFS4_DEBUG(rfs4_debug > 1, (CE_NOTE, "%s returned %d",
5875 rfs4_op_string[op], *cs.statusp));
5876 if (*cs.statusp != NFS4_OK)
5877 cs.cont = FALSE;
5878 } else {
5880 * This is effectively dead code since XDR code
5881 * will have already returned BADXDR if op doesn't
5882 * decode to legal value. This only done for a
5883 * day when XDR code doesn't verify v4 opcodes.
5885 op = OP_ILLEGAL;
5886 rfsproccnt_v4_ptr[OP_ILLEGAL_IDX].value.ui64++;
5888 rfs4_op_illegal(argop, resop, req, &cs);
5889 cs.cont = FALSE;
5893 * If not at last op, and if we are to stop, then
5894 * compact the results array.
5896 if ((i + 1) < args->array_len && !cs.cont) {
5897 nfs_resop4 *new_res = kmem_alloc(
5898 (i+1) * sizeof (nfs_resop4), KM_SLEEP);
5899 bcopy(resp->array,
5900 new_res, (i+1) * sizeof (nfs_resop4));
5901 kmem_free(resp->array,
5902 args->array_len * sizeof (nfs_resop4));
5904 resp->array_len = i + 1;
5905 resp->array = new_res;
5909 rw_exit(&exported_lock);
5911 DTRACE_NFSV4_2(compound__done, struct compound_state *, &cs,
5912 COMPOUND4res *, resp);
5914 if (cs.vp)
5915 VN_RELE(cs.vp);
5916 if (cs.saved_vp)
5917 VN_RELE(cs.saved_vp);
5918 if (cs.saved_fh.nfs_fh4_val)
5919 kmem_free(cs.saved_fh.nfs_fh4_val, NFS4_FHSIZE);
5921 if (cs.basecr)
5922 crfree(cs.basecr);
5923 if (cs.cr)
5924 crfree(cs.cr);
5926 * done with this compound request, free the label
5929 if (req->rq_label != NULL) {
5930 kmem_free(req->rq_label, sizeof (bslabel_t));
5931 req->rq_label = NULL;
5936 * XXX because of what appears to be duplicate calls to rfs4_compound_free
5937 * XXX zero out the tag and array values. Need to investigate why the
5938 * XXX calls occur, but at least prevent the panic for now.
5940 void
5941 rfs4_compound_free(COMPOUND4res *resp)
5943 uint_t i;
5945 if (resp->tag.utf8string_val) {
5946 UTF8STRING_FREE(resp->tag)
5949 for (i = 0; i < resp->array_len; i++) {
5950 nfs_resop4 *resop;
5951 uint_t op;
5953 resop = &resp->array[i];
5954 op = (uint_t)resop->resop;
5955 if (op < rfsv4disp_cnt) {
5956 (*rfsv4disptab[op].dis_resfree)(resop);
5959 if (resp->array != NULL) {
5960 kmem_free(resp->array, resp->array_len * sizeof (nfs_resop4));
5965 * Process the value of the compound request rpc flags, as a bit-AND
5966 * of the individual per-op flags (idempotent, allowork, publicfh_ok)
5968 void
5969 rfs4_compound_flagproc(COMPOUND4args *args, int *flagp)
5971 int i;
5972 int flag = RPC_ALL;
5974 for (i = 0; flag && i < args->array_len; i++) {
5975 uint_t op;
5977 op = (uint_t)args->array[i].argop;
5979 if (op < rfsv4disp_cnt)
5980 flag &= rfsv4disptab[op].dis_flags;
5981 else
5982 flag = 0;
5984 *flagp = flag;
5987 nfsstat4
5988 rfs4_client_sysid(rfs4_client_t *cp, sysid_t *sp)
5990 nfsstat4 e;
5992 rfs4_dbe_lock(cp->rc_dbe);
5994 if (cp->rc_sysidt != LM_NOSYSID) {
5995 *sp = cp->rc_sysidt;
5996 e = NFS4_OK;
5998 } else if ((cp->rc_sysidt = lm_alloc_sysidt()) != LM_NOSYSID) {
5999 *sp = cp->rc_sysidt;
6000 e = NFS4_OK;
6002 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
6003 "rfs4_client_sysid: allocated 0x%x\n", *sp));
6004 } else
6005 e = NFS4ERR_DELAY;
6007 rfs4_dbe_unlock(cp->rc_dbe);
6008 return (e);
6011 #if defined(DEBUG) && ! defined(lint)
6012 static void lock_print(char *str, int operation, struct flock64 *flk)
6014 char *op, *type;
6016 switch (operation) {
6017 case F_GETLK: op = "F_GETLK";
6018 break;
6019 case F_SETLK: op = "F_SETLK";
6020 break;
6021 case F_SETLK_NBMAND: op = "F_SETLK_NBMAND";
6022 break;
6023 default: op = "F_UNKNOWN";
6024 break;
6026 switch (flk->l_type) {
6027 case F_UNLCK: type = "F_UNLCK";
6028 break;
6029 case F_RDLCK: type = "F_RDLCK";
6030 break;
6031 case F_WRLCK: type = "F_WRLCK";
6032 break;
6033 default: type = "F_UNKNOWN";
6034 break;
6037 ASSERT(flk->l_whence == 0);
6038 cmn_err(CE_NOTE, "%s: %s, type = %s, off = %llx len = %llx pid = %d",
6039 str, op, type, (longlong_t)flk->l_start,
6040 flk->l_len ? (longlong_t)flk->l_len : ~0LL, flk->l_pid);
6043 #define LOCK_PRINT(d, s, t, f) if (d) lock_print(s, t, f)
6044 #else
6045 #define LOCK_PRINT(d, s, t, f)
6046 #endif
6048 /*ARGSUSED*/
6049 static bool_t
6050 creds_ok(cred_set_t cr_set, struct svc_req *req, struct compound_state *cs)
6052 return (TRUE);
6056 * Look up the pathname using the vp in cs as the directory vnode.
6057 * cs->vp will be the vnode for the file on success
6060 static nfsstat4
6061 rfs4_lookup(component4 *component, struct svc_req *req,
6062 struct compound_state *cs)
6064 char *nm;
6065 uint32_t len;
6066 nfsstat4 status;
6067 struct sockaddr *ca;
6068 char *name;
6070 if (cs->vp == NULL) {
6071 return (NFS4ERR_NOFILEHANDLE);
6073 if (cs->vp->v_type != VDIR) {
6074 return (NFS4ERR_NOTDIR);
6077 if (!utf8_dir_verify(component))
6078 return (NFS4ERR_INVAL);
6080 nm = utf8_to_fn(component, &len, NULL);
6081 if (nm == NULL) {
6082 return (NFS4ERR_INVAL);
6085 if (len > MAXNAMELEN) {
6086 kmem_free(nm, len);
6087 return (NFS4ERR_NAMETOOLONG);
6090 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6091 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6092 MAXPATHLEN + 1);
6094 if (name == NULL) {
6095 kmem_free(nm, len);
6096 return (NFS4ERR_INVAL);
6099 status = do_rfs4_op_lookup(name, req, cs);
6101 if (name != nm)
6102 kmem_free(name, MAXPATHLEN + 1);
6104 kmem_free(nm, len);
6106 return (status);
6109 static nfsstat4
6110 rfs4_lookupfile(component4 *component, struct svc_req *req,
6111 struct compound_state *cs, uint32_t access, change_info4 *cinfo)
6113 nfsstat4 status;
6114 vnode_t *dvp = cs->vp;
6115 vattr_t bva, ava, fva;
6116 int error;
6118 /* Get "before" change value */
6119 bva.va_mask = AT_CTIME|AT_SEQ;
6120 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6121 if (error)
6122 return (puterrno4(error));
6124 /* rfs4_lookup may VN_RELE directory */
6125 VN_HOLD(dvp);
6127 status = rfs4_lookup(component, req, cs);
6128 if (status != NFS4_OK) {
6129 VN_RELE(dvp);
6130 return (status);
6134 * Get "after" change value, if it fails, simply return the
6135 * before value.
6137 ava.va_mask = AT_CTIME|AT_SEQ;
6138 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6139 ava.va_ctime = bva.va_ctime;
6140 ava.va_seq = 0;
6142 VN_RELE(dvp);
6145 * Validate the file is a file
6147 fva.va_mask = AT_TYPE|AT_MODE;
6148 error = VOP_GETATTR(cs->vp, &fva, 0, cs->cr, NULL);
6149 if (error)
6150 return (puterrno4(error));
6152 if (fva.va_type != VREG) {
6153 if (fva.va_type == VDIR)
6154 return (NFS4ERR_ISDIR);
6155 if (fva.va_type == VLNK)
6156 return (NFS4ERR_SYMLINK);
6157 return (NFS4ERR_INVAL);
6160 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime);
6161 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6164 * It is undefined if VOP_LOOKUP will change va_seq, so
6165 * cinfo.atomic = TRUE only if we have
6166 * non-zero va_seq's, and they have not changed.
6168 if (bva.va_seq && ava.va_seq && ava.va_seq == bva.va_seq)
6169 cinfo->atomic = TRUE;
6170 else
6171 cinfo->atomic = FALSE;
6173 /* Check for mandatory locking */
6174 cs->mandlock = MANDLOCK(cs->vp, fva.va_mode);
6175 return (check_open_access(access, cs, req));
6178 static nfsstat4
6179 create_vnode(vnode_t *dvp, char *nm, vattr_t *vap, createmode4 mode,
6180 timespec32_t *mtime, cred_t *cr, vnode_t **vpp, bool_t *created)
6182 int error;
6183 nfsstat4 status = NFS4_OK;
6184 vattr_t va;
6186 tryagain:
6189 * The file open mode used is VWRITE. If the client needs
6190 * some other semantic, then it should do the access checking
6191 * itself. It would have been nice to have the file open mode
6192 * passed as part of the arguments.
6195 *created = TRUE;
6196 error = VOP_CREATE(dvp, nm, vap, EXCL, VWRITE, vpp, cr, 0, NULL, NULL);
6198 if (error) {
6199 *created = FALSE;
6202 * If we got something other than file already exists
6203 * then just return this error. Otherwise, we got
6204 * EEXIST. If we were doing a GUARDED create, then
6205 * just return this error. Otherwise, we need to
6206 * make sure that this wasn't a duplicate of an
6207 * exclusive create request.
6209 * The assumption is made that a non-exclusive create
6210 * request will never return EEXIST.
6213 if (error != EEXIST || mode == GUARDED4) {
6214 status = puterrno4(error);
6215 return (status);
6217 error = VOP_LOOKUP(dvp, nm, vpp, NULL, 0, NULL, cr,
6218 NULL, NULL, NULL);
6220 if (error) {
6222 * We couldn't find the file that we thought that
6223 * we just created. So, we'll just try creating
6224 * it again.
6226 if (error == ENOENT)
6227 goto tryagain;
6229 status = puterrno4(error);
6230 return (status);
6233 if (mode == UNCHECKED4) {
6234 /* existing object must be regular file */
6235 if ((*vpp)->v_type != VREG) {
6236 if ((*vpp)->v_type == VDIR)
6237 status = NFS4ERR_ISDIR;
6238 else if ((*vpp)->v_type == VLNK)
6239 status = NFS4ERR_SYMLINK;
6240 else
6241 status = NFS4ERR_INVAL;
6242 VN_RELE(*vpp);
6243 return (status);
6246 return (NFS4_OK);
6249 /* Check for duplicate request */
6250 ASSERT(mtime != 0);
6251 va.va_mask = AT_MTIME;
6252 error = VOP_GETATTR(*vpp, &va, 0, cr, NULL);
6253 if (!error) {
6254 /* We found the file */
6255 if (va.va_mtime.tv_sec != mtime->tv_sec ||
6256 va.va_mtime.tv_nsec != mtime->tv_nsec) {
6257 /* but its not our creation */
6258 VN_RELE(*vpp);
6259 return (NFS4ERR_EXIST);
6261 *created = TRUE; /* retrans of create == created */
6262 return (NFS4_OK);
6264 VN_RELE(*vpp);
6265 return (NFS4ERR_EXIST);
6268 return (NFS4_OK);
6271 static nfsstat4
6272 check_open_access(uint32_t access, struct compound_state *cs,
6273 struct svc_req *req)
6275 int error;
6276 vnode_t *vp;
6277 bool_t readonly;
6278 cred_t *cr = cs->cr;
6280 /* For now we don't allow mandatory locking as per V2/V3 */
6281 if (cs->access == CS_ACCESS_DENIED || cs->mandlock) {
6282 return (NFS4ERR_ACCESS);
6285 vp = cs->vp;
6286 ASSERT(cr != NULL && vp->v_type == VREG);
6289 * If the file system is exported read only and we are trying
6290 * to open for write, then return NFS4ERR_ROFS
6293 readonly = rdonly4(cs->exi, cs->vp, req);
6295 if ((access & OPEN4_SHARE_ACCESS_WRITE) && readonly)
6296 return (NFS4ERR_ROFS);
6298 if (access & OPEN4_SHARE_ACCESS_READ) {
6299 if ((VOP_ACCESS(vp, VREAD, 0, cr, NULL) != 0) &&
6300 (VOP_ACCESS(vp, VEXEC, 0, cr, NULL) != 0)) {
6301 return (NFS4ERR_ACCESS);
6305 if (access & OPEN4_SHARE_ACCESS_WRITE) {
6306 error = VOP_ACCESS(vp, VWRITE, 0, cr, NULL);
6307 if (error)
6308 return (NFS4ERR_ACCESS);
6311 return (NFS4_OK);
6314 static nfsstat4
6315 rfs4_createfile(OPEN4args *args, struct svc_req *req, struct compound_state *cs,
6316 change_info4 *cinfo, bitmap4 *attrset, clientid4 clientid)
6318 struct nfs4_svgetit_arg sarg;
6319 struct nfs4_ntov_table ntov;
6321 bool_t ntov_table_init = FALSE;
6322 struct statvfs64 sb;
6323 nfsstat4 status;
6324 vnode_t *vp;
6325 vattr_t bva, ava, iva, cva, *vap;
6326 vnode_t *dvp;
6327 timespec32_t *mtime;
6328 char *nm = NULL;
6329 uint_t buflen;
6330 bool_t created;
6331 bool_t setsize = FALSE;
6332 len_t reqsize;
6333 int error;
6334 bool_t trunc;
6335 caller_context_t ct;
6336 component4 *component;
6337 bslabel_t *clabel;
6338 struct sockaddr *ca;
6339 char *name = NULL;
6341 sarg.sbp = &sb;
6342 sarg.is_referral = B_FALSE;
6344 dvp = cs->vp;
6346 /* Check if the file system is read only */
6347 if (rdonly4(cs->exi, dvp, req))
6348 return (NFS4ERR_ROFS);
6350 /* check the label of including directory */
6351 if (is_system_labeled()) {
6352 ASSERT(req->rq_label != NULL);
6353 clabel = req->rq_label;
6354 DTRACE_PROBE2(tx__rfs4__log__info__opremove__clabel, char *,
6355 "got client label from request(1)",
6356 struct svc_req *, req);
6357 if (!blequal(&l_admin_low->tsl_label, clabel)) {
6358 if (!do_rfs_label_check(clabel, dvp, EQUALITY_CHECK,
6359 cs->exi)) {
6360 return (NFS4ERR_ACCESS);
6366 * Get the last component of path name in nm. cs will reference
6367 * the including directory on success.
6369 component = &args->open_claim4_u.file;
6370 if (!utf8_dir_verify(component))
6371 return (NFS4ERR_INVAL);
6373 nm = utf8_to_fn(component, &buflen, NULL);
6375 if (nm == NULL)
6376 return (NFS4ERR_RESOURCE);
6378 if (buflen > MAXNAMELEN) {
6379 kmem_free(nm, buflen);
6380 return (NFS4ERR_NAMETOOLONG);
6383 bva.va_mask = AT_TYPE|AT_CTIME|AT_SEQ;
6384 error = VOP_GETATTR(dvp, &bva, 0, cs->cr, NULL);
6385 if (error) {
6386 kmem_free(nm, buflen);
6387 return (puterrno4(error));
6390 if (bva.va_type != VDIR) {
6391 kmem_free(nm, buflen);
6392 return (NFS4ERR_NOTDIR);
6395 NFS4_SET_FATTR4_CHANGE(cinfo->before, bva.va_ctime)
6397 switch (args->mode) {
6398 case GUARDED4:
6399 /*FALLTHROUGH*/
6400 case UNCHECKED4:
6401 nfs4_ntov_table_init(&ntov);
6402 ntov_table_init = TRUE;
6404 *attrset = 0;
6405 status = do_rfs4_set_attrs(attrset,
6406 &args->createhow4_u.createattrs,
6407 cs, &sarg, &ntov, NFS4ATTR_SETIT);
6409 if (status == NFS4_OK && (sarg.vap->va_mask & AT_TYPE) &&
6410 sarg.vap->va_type != VREG) {
6411 if (sarg.vap->va_type == VDIR)
6412 status = NFS4ERR_ISDIR;
6413 else if (sarg.vap->va_type == VLNK)
6414 status = NFS4ERR_SYMLINK;
6415 else
6416 status = NFS4ERR_INVAL;
6419 if (status != NFS4_OK) {
6420 kmem_free(nm, buflen);
6421 nfs4_ntov_table_free(&ntov, &sarg);
6422 *attrset = 0;
6423 return (status);
6426 vap = sarg.vap;
6427 vap->va_type = VREG;
6428 vap->va_mask |= AT_TYPE;
6430 if ((vap->va_mask & AT_MODE) == 0) {
6431 vap->va_mask |= AT_MODE;
6432 vap->va_mode = (mode_t)0600;
6435 if (vap->va_mask & AT_SIZE) {
6437 /* Disallow create with a non-zero size */
6439 if ((reqsize = sarg.vap->va_size) != 0) {
6440 kmem_free(nm, buflen);
6441 nfs4_ntov_table_free(&ntov, &sarg);
6442 *attrset = 0;
6443 return (NFS4ERR_INVAL);
6445 setsize = TRUE;
6447 break;
6449 case EXCLUSIVE4:
6450 /* prohibit EXCL create of named attributes */
6451 if (dvp->v_flag & V_XATTRDIR) {
6452 kmem_free(nm, buflen);
6453 *attrset = 0;
6454 return (NFS4ERR_INVAL);
6457 cva.va_mask = AT_TYPE | AT_MTIME | AT_MODE;
6458 cva.va_type = VREG;
6460 * Ensure no time overflows. Assumes underlying
6461 * filesystem supports at least 32 bits.
6462 * Truncate nsec to usec resolution to allow valid
6463 * compares even if the underlying filesystem truncates.
6465 mtime = (timespec32_t *)&args->createhow4_u.createverf;
6466 cva.va_mtime.tv_sec = mtime->tv_sec % TIME32_MAX;
6467 cva.va_mtime.tv_nsec = (mtime->tv_nsec / 1000) * 1000;
6468 cva.va_mode = (mode_t)0;
6469 vap = &cva;
6472 * For EXCL create, attrset is set to the server attr
6473 * used to cache the client's verifier.
6475 *attrset = FATTR4_TIME_MODIFY_MASK;
6476 break;
6479 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
6480 name = nfscmd_convname(ca, cs->exi, nm, NFSCMD_CONV_INBOUND,
6481 MAXPATHLEN + 1);
6483 if (name == NULL) {
6484 kmem_free(nm, buflen);
6485 return (NFS4ERR_SERVERFAULT);
6488 status = create_vnode(dvp, name, vap, args->mode, mtime,
6489 cs->cr, &vp, &created);
6490 if (nm != name)
6491 kmem_free(name, MAXPATHLEN + 1);
6492 kmem_free(nm, buflen);
6494 if (status != NFS4_OK) {
6495 if (ntov_table_init)
6496 nfs4_ntov_table_free(&ntov, &sarg);
6497 *attrset = 0;
6498 return (status);
6501 trunc = (setsize && !created);
6503 if (args->mode != EXCLUSIVE4) {
6504 bitmap4 createmask = args->createhow4_u.createattrs.attrmask;
6507 * True verification that object was created with correct
6508 * attrs is impossible. The attrs could have been changed
6509 * immediately after object creation. If attributes did
6510 * not verify, the only recourse for the server is to
6511 * destroy the object. Maybe if some attrs (like gid)
6512 * are set incorrectly, the object should be destroyed;
6513 * however, seems bad as a default policy. Do we really
6514 * want to destroy an object over one of the times not
6515 * verifying correctly? For these reasons, the server
6516 * currently sets bits in attrset for createattrs
6517 * that were set; however, no verification is done.
6519 * vmask_to_nmask accounts for vattr bits set on create
6520 * [do_rfs4_set_attrs() only sets resp bits for
6521 * non-vattr/vfs bits.]
6522 * Mask off any bits we set by default so as not to return
6523 * more attrset bits than were requested in createattrs
6525 if (created) {
6526 nfs4_vmask_to_nmask(sarg.vap->va_mask, attrset);
6527 *attrset &= createmask;
6528 } else {
6530 * We did not create the vnode (we tried but it
6531 * already existed). In this case, the only createattr
6532 * that the spec allows the server to set is size,
6533 * and even then, it can only be set if it is 0.
6535 *attrset = 0;
6536 if (trunc)
6537 *attrset = FATTR4_SIZE_MASK;
6540 if (ntov_table_init)
6541 nfs4_ntov_table_free(&ntov, &sarg);
6544 * Get the initial "after" sequence number, if it fails,
6545 * set to zero, time to before.
6547 iva.va_mask = AT_CTIME|AT_SEQ;
6548 if (VOP_GETATTR(dvp, &iva, 0, cs->cr, NULL)) {
6549 iva.va_seq = 0;
6550 iva.va_ctime = bva.va_ctime;
6554 * create_vnode attempts to create the file exclusive,
6555 * if it already exists the VOP_CREATE will fail and
6556 * may not increase va_seq. It is atomic if
6557 * we haven't changed the directory, but if it has changed
6558 * we don't know what changed it.
6560 if (!created) {
6561 if (bva.va_seq && iva.va_seq &&
6562 bva.va_seq == iva.va_seq)
6563 cinfo->atomic = TRUE;
6564 else
6565 cinfo->atomic = FALSE;
6566 NFS4_SET_FATTR4_CHANGE(cinfo->after, iva.va_ctime);
6567 } else {
6569 * The entry was created, we need to sync the
6570 * directory metadata.
6572 (void) VOP_FSYNC(dvp, 0, cs->cr, NULL);
6575 * Get "after" change value, if it fails, simply return the
6576 * before value.
6578 ava.va_mask = AT_CTIME|AT_SEQ;
6579 if (VOP_GETATTR(dvp, &ava, 0, cs->cr, NULL)) {
6580 ava.va_ctime = bva.va_ctime;
6581 ava.va_seq = 0;
6584 NFS4_SET_FATTR4_CHANGE(cinfo->after, ava.va_ctime);
6587 * The cinfo->atomic = TRUE only if we have
6588 * non-zero va_seq's, and it has incremented by exactly one
6589 * during the create_vnode and it didn't
6590 * change during the VOP_FSYNC.
6592 if (bva.va_seq && iva.va_seq && ava.va_seq &&
6593 iva.va_seq == (bva.va_seq + 1) && iva.va_seq == ava.va_seq)
6594 cinfo->atomic = TRUE;
6595 else
6596 cinfo->atomic = FALSE;
6599 /* Check for mandatory locking and that the size gets set. */
6600 cva.va_mask = AT_MODE;
6601 if (setsize)
6602 cva.va_mask |= AT_SIZE;
6604 /* Assume the worst */
6605 cs->mandlock = TRUE;
6607 if (VOP_GETATTR(vp, &cva, 0, cs->cr, NULL) == 0) {
6608 cs->mandlock = MANDLOCK(cs->vp, cva.va_mode);
6611 * Truncate the file if necessary; this would be
6612 * the case for create over an existing file.
6615 if (trunc) {
6616 int in_crit = 0;
6617 rfs4_file_t *fp;
6618 bool_t create = FALSE;
6621 * We are writing over an existing file.
6622 * Check to see if we need to recall a delegation.
6624 rfs4_hold_deleg_policy();
6625 if ((fp = rfs4_findfile(vp, NULL, &create)) != NULL) {
6626 if (rfs4_check_delegated_byfp(FWRITE, fp,
6627 (reqsize == 0), FALSE, FALSE, &clientid)) {
6628 rfs4_file_rele(fp);
6629 rfs4_rele_deleg_policy();
6630 VN_RELE(vp);
6631 *attrset = 0;
6632 return (NFS4ERR_DELAY);
6634 rfs4_file_rele(fp);
6636 rfs4_rele_deleg_policy();
6638 if (nbl_need_check(vp)) {
6639 in_crit = 1;
6641 ASSERT(reqsize == 0);
6643 nbl_start_crit(vp, RW_READER);
6644 if (nbl_conflict(vp, NBL_WRITE, 0,
6645 cva.va_size, 0, NULL)) {
6646 in_crit = 0;
6647 nbl_end_crit(vp);
6648 VN_RELE(vp);
6649 *attrset = 0;
6650 return (NFS4ERR_ACCESS);
6653 ct.cc_sysid = 0;
6654 ct.cc_pid = 0;
6655 ct.cc_caller_id = nfs4_srv_caller_id;
6656 ct.cc_flags = CC_DONTBLOCK;
6658 cva.va_mask = AT_SIZE;
6659 cva.va_size = reqsize;
6660 (void) VOP_SETATTR(vp, &cva, 0, cs->cr, &ct);
6661 if (in_crit)
6662 nbl_end_crit(vp);
6666 error = makefh4(&cs->fh, vp, cs->exi);
6669 * Force modified data and metadata out to stable storage.
6671 (void) VOP_FSYNC(vp, FNODSYNC, cs->cr, NULL);
6673 if (error) {
6674 VN_RELE(vp);
6675 *attrset = 0;
6676 return (puterrno4(error));
6679 /* if parent dir is attrdir, set namedattr fh flag */
6680 if (dvp->v_flag & V_XATTRDIR)
6681 set_fh4_flag(&cs->fh, FH4_NAMEDATTR);
6683 if (cs->vp)
6684 VN_RELE(cs->vp);
6686 cs->vp = vp;
6689 * if we did not create the file, we will need to check
6690 * the access bits on the file
6693 if (!created) {
6694 if (setsize)
6695 args->share_access |= OPEN4_SHARE_ACCESS_WRITE;
6696 status = check_open_access(args->share_access, cs, req);
6697 if (status != NFS4_OK)
6698 *attrset = 0;
6700 return (status);
6703 /*ARGSUSED*/
6704 static void
6705 rfs4_do_open(struct compound_state *cs, struct svc_req *req,
6706 rfs4_openowner_t *oo, delegreq_t deleg,
6707 uint32_t access, uint32_t deny,
6708 OPEN4res *resp, int deleg_cur)
6710 /* XXX Currently not using req */
6711 rfs4_state_t *sp;
6712 rfs4_file_t *fp;
6713 bool_t screate = TRUE;
6714 bool_t fcreate = TRUE;
6715 uint32_t open_a, share_a;
6716 uint32_t open_d, share_d;
6717 rfs4_deleg_state_t *dsp;
6718 sysid_t sysid;
6719 nfsstat4 status;
6720 caller_context_t ct;
6721 int fflags = 0;
6722 int recall = 0;
6723 int err;
6724 int first_open;
6726 /* get the file struct and hold a lock on it during initial open */
6727 fp = rfs4_findfile_withlock(cs->vp, &cs->fh, &fcreate);
6728 if (fp == NULL) {
6729 resp->status = NFS4ERR_RESOURCE;
6730 DTRACE_PROBE1(nfss__e__do__open1, nfsstat4, resp->status);
6731 return;
6734 sp = rfs4_findstate_by_owner_file(oo, fp, &screate);
6735 if (sp == NULL) {
6736 resp->status = NFS4ERR_RESOURCE;
6737 DTRACE_PROBE1(nfss__e__do__open2, nfsstat4, resp->status);
6738 /* No need to keep any reference */
6739 rw_exit(&fp->rf_file_rwlock);
6740 rfs4_file_rele(fp);
6741 return;
6744 /* try to get the sysid before continuing */
6745 if ((status = rfs4_client_sysid(oo->ro_client, &sysid)) != NFS4_OK) {
6746 resp->status = status;
6747 rfs4_file_rele(fp);
6748 /* Not a fully formed open; "close" it */
6749 if (screate == TRUE)
6750 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6751 rfs4_state_rele(sp);
6752 return;
6755 /* Calculate the fflags for this OPEN. */
6756 if (access & OPEN4_SHARE_ACCESS_READ)
6757 fflags |= FREAD;
6758 if (access & OPEN4_SHARE_ACCESS_WRITE)
6759 fflags |= FWRITE;
6761 rfs4_dbe_lock(sp->rs_dbe);
6764 * Calculate the new deny and access mode that this open is adding to
6765 * the file for this open owner;
6767 open_d = (deny & ~sp->rs_open_deny);
6768 open_a = (access & ~sp->rs_open_access);
6771 * Calculate the new share access and share deny modes that this open
6772 * is adding to the file for this open owner;
6774 share_a = (access & ~sp->rs_share_access);
6775 share_d = (deny & ~sp->rs_share_deny);
6777 first_open = (sp->rs_open_access & OPEN4_SHARE_ACCESS_BOTH) == 0;
6780 * Check to see the client has already sent an open for this
6781 * open owner on this file with the same share/deny modes.
6782 * If so, we don't need to check for a conflict and we don't
6783 * need to add another shrlock. If not, then we need to
6784 * check for conflicts in deny and access before checking for
6785 * conflicts in delegation. We don't want to recall a
6786 * delegation based on an open that will eventually fail based
6787 * on shares modes.
6790 if (share_a || share_d) {
6791 if ((err = rfs4_share(sp, access, deny)) != 0) {
6792 rfs4_dbe_unlock(sp->rs_dbe);
6793 resp->status = err;
6795 rfs4_file_rele(fp);
6796 /* Not a fully formed open; "close" it */
6797 if (screate == TRUE)
6798 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6799 rfs4_state_rele(sp);
6800 return;
6804 rfs4_dbe_lock(fp->rf_dbe);
6807 * Check to see if this file is delegated and if so, if a
6808 * recall needs to be done.
6810 if (rfs4_check_recall(sp, access)) {
6811 rfs4_dbe_unlock(fp->rf_dbe);
6812 rfs4_dbe_unlock(sp->rs_dbe);
6813 rfs4_recall_deleg(fp, FALSE, sp->rs_owner->ro_client);
6814 delay(NFS4_DELEGATION_CONFLICT_DELAY);
6815 rfs4_dbe_lock(sp->rs_dbe);
6817 /* if state closed while lock was dropped */
6818 if (sp->rs_closed) {
6819 if (share_a || share_d)
6820 (void) rfs4_unshare(sp);
6821 rfs4_dbe_unlock(sp->rs_dbe);
6822 rfs4_file_rele(fp);
6823 /* Not a fully formed open; "close" it */
6824 if (screate == TRUE)
6825 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6826 rfs4_state_rele(sp);
6827 resp->status = NFS4ERR_OLD_STATEID;
6828 return;
6831 rfs4_dbe_lock(fp->rf_dbe);
6832 /* Let's see if the delegation was returned */
6833 if (rfs4_check_recall(sp, access)) {
6834 rfs4_dbe_unlock(fp->rf_dbe);
6835 if (share_a || share_d)
6836 (void) rfs4_unshare(sp);
6837 rfs4_dbe_unlock(sp->rs_dbe);
6838 rfs4_file_rele(fp);
6839 rfs4_update_lease(sp->rs_owner->ro_client);
6841 /* Not a fully formed open; "close" it */
6842 if (screate == TRUE)
6843 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6844 rfs4_state_rele(sp);
6845 resp->status = NFS4ERR_DELAY;
6846 return;
6850 * the share check passed and any delegation conflict has been
6851 * taken care of, now call vop_open.
6852 * if this is the first open then call vop_open with fflags.
6853 * if not, call vn_open_upgrade with just the upgrade flags.
6855 * if the file has been opened already, it will have the current
6856 * access mode in the state struct. if it has no share access, then
6857 * this is a new open.
6859 * However, if this is open with CLAIM_DLEGATE_CUR, then don't
6860 * call VOP_OPEN(), just do the open upgrade.
6862 if (first_open && !deleg_cur) {
6863 ct.cc_sysid = sysid;
6864 ct.cc_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
6865 ct.cc_caller_id = nfs4_srv_caller_id;
6866 ct.cc_flags = CC_DONTBLOCK;
6867 err = VOP_OPEN(&cs->vp, fflags, cs->cr, &ct);
6868 if (err) {
6869 rfs4_dbe_unlock(fp->rf_dbe);
6870 if (share_a || share_d)
6871 (void) rfs4_unshare(sp);
6872 rfs4_dbe_unlock(sp->rs_dbe);
6873 rfs4_file_rele(fp);
6875 /* Not a fully formed open; "close" it */
6876 if (screate == TRUE)
6877 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
6878 rfs4_state_rele(sp);
6879 /* check if a monitor detected a delegation conflict */
6880 if (err == EAGAIN && (ct.cc_flags & CC_WOULDBLOCK))
6881 resp->status = NFS4ERR_DELAY;
6882 else
6883 resp->status = NFS4ERR_SERVERFAULT;
6884 return;
6886 } else { /* open upgrade */
6888 * calculate the fflags for the new mode that is being added
6889 * by this upgrade.
6891 fflags = 0;
6892 if (open_a & OPEN4_SHARE_ACCESS_READ)
6893 fflags |= FREAD;
6894 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6895 fflags |= FWRITE;
6896 vn_open_upgrade(cs->vp, fflags);
6898 sp->rs_open_access |= access;
6899 sp->rs_open_deny |= deny;
6901 if (open_d & OPEN4_SHARE_DENY_READ)
6902 fp->rf_deny_read++;
6903 if (open_d & OPEN4_SHARE_DENY_WRITE)
6904 fp->rf_deny_write++;
6905 fp->rf_share_deny |= deny;
6907 if (open_a & OPEN4_SHARE_ACCESS_READ)
6908 fp->rf_access_read++;
6909 if (open_a & OPEN4_SHARE_ACCESS_WRITE)
6910 fp->rf_access_write++;
6911 fp->rf_share_access |= access;
6914 * Check for delegation here. if the deleg argument is not
6915 * DELEG_ANY, then this is a reclaim from a client and
6916 * we must honor the delegation requested. If necessary we can
6917 * set the recall flag.
6920 dsp = rfs4_grant_delegation(deleg, sp, &recall);
6922 cs->deleg = (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE);
6924 next_stateid(&sp->rs_stateid);
6926 resp->stateid = sp->rs_stateid.stateid;
6928 rfs4_dbe_unlock(fp->rf_dbe);
6929 rfs4_dbe_unlock(sp->rs_dbe);
6931 if (dsp) {
6932 rfs4_set_deleg_response(dsp, &resp->delegation, NULL, recall);
6933 rfs4_deleg_state_rele(dsp);
6936 rfs4_file_rele(fp);
6937 rfs4_state_rele(sp);
6939 resp->status = NFS4_OK;
6942 /*ARGSUSED*/
6943 static void
6944 rfs4_do_opennull(struct compound_state *cs, struct svc_req *req,
6945 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6947 change_info4 *cinfo = &resp->cinfo;
6948 bitmap4 *attrset = &resp->attrset;
6950 if (args->opentype == OPEN4_NOCREATE)
6951 resp->status = rfs4_lookupfile(&args->open_claim4_u.file,
6952 req, cs, args->share_access, cinfo);
6953 else {
6954 /* inhibit delegation grants during exclusive create */
6956 if (args->mode == EXCLUSIVE4)
6957 rfs4_disable_delegation();
6959 resp->status = rfs4_createfile(args, req, cs, cinfo, attrset,
6960 oo->ro_client->rc_clientid);
6963 if (resp->status == NFS4_OK) {
6965 /* cs->vp cs->fh now reference the desired file */
6967 rfs4_do_open(cs, req, oo,
6968 oo->ro_need_confirm ? DELEG_NONE : DELEG_ANY,
6969 args->share_access, args->share_deny, resp, 0);
6972 * If rfs4_createfile set attrset, we must
6973 * clear this attrset before the response is copied.
6975 if (resp->status != NFS4_OK && resp->attrset) {
6976 resp->attrset = 0;
6979 else
6980 *cs->statusp = resp->status;
6982 if (args->mode == EXCLUSIVE4)
6983 rfs4_enable_delegation();
6986 /*ARGSUSED*/
6987 static void
6988 rfs4_do_openprev(struct compound_state *cs, struct svc_req *req,
6989 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
6991 change_info4 *cinfo = &resp->cinfo;
6992 vattr_t va;
6993 vtype_t v_type = cs->vp->v_type;
6994 int error = 0;
6996 /* Verify that we have a regular file */
6997 if (v_type != VREG) {
6998 if (v_type == VDIR)
6999 resp->status = NFS4ERR_ISDIR;
7000 else if (v_type == VLNK)
7001 resp->status = NFS4ERR_SYMLINK;
7002 else
7003 resp->status = NFS4ERR_INVAL;
7004 return;
7007 va.va_mask = AT_MODE|AT_UID;
7008 error = VOP_GETATTR(cs->vp, &va, 0, cs->cr, NULL);
7009 if (error) {
7010 resp->status = puterrno4(error);
7011 return;
7014 cs->mandlock = MANDLOCK(cs->vp, va.va_mode);
7017 * Check if we have access to the file, Note the the file
7018 * could have originally been open UNCHECKED or GUARDED
7019 * with mode bits that will now fail, but there is nothing
7020 * we can really do about that except in the case that the
7021 * owner of the file is the one requesting the open.
7023 if (crgetuid(cs->cr) != va.va_uid) {
7024 resp->status = check_open_access(args->share_access, cs, req);
7025 if (resp->status != NFS4_OK) {
7026 return;
7031 * cinfo on a CLAIM_PREVIOUS is undefined, initialize to zero
7033 cinfo->before = 0;
7034 cinfo->after = 0;
7035 cinfo->atomic = FALSE;
7037 rfs4_do_open(cs, req, oo,
7038 NFS4_DELEG4TYPE2REQTYPE(args->open_claim4_u.delegate_type),
7039 args->share_access, args->share_deny, resp, 0);
7042 static void
7043 rfs4_do_opendelcur(struct compound_state *cs, struct svc_req *req,
7044 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7046 int error;
7047 nfsstat4 status;
7048 stateid4 stateid =
7049 args->open_claim4_u.delegate_cur_info.delegate_stateid;
7050 rfs4_deleg_state_t *dsp;
7053 * Find the state info from the stateid and confirm that the
7054 * file is delegated. If the state openowner is the same as
7055 * the supplied openowner we're done. If not, get the file
7056 * info from the found state info. Use that file info to
7057 * create the state for this lock owner. Note solaris doen't
7058 * really need the pathname to find the file. We may want to
7059 * lookup the pathname and make sure that the vp exist and
7060 * matches the vp in the file structure. However it is
7061 * possible that the pathname nolonger exists (local process
7062 * unlinks the file), so this may not be that useful.
7065 status = rfs4_get_deleg_state(&stateid, &dsp);
7066 if (status != NFS4_OK) {
7067 resp->status = status;
7068 return;
7071 ASSERT(dsp->rds_finfo->rf_dinfo.rd_dtype != OPEN_DELEGATE_NONE);
7074 * New lock owner, create state. Since this was probably called
7075 * in response to a CB_RECALL we set deleg to DELEG_NONE
7078 ASSERT(cs->vp != NULL);
7079 VN_RELE(cs->vp);
7080 VN_HOLD(dsp->rds_finfo->rf_vp);
7081 cs->vp = dsp->rds_finfo->rf_vp;
7083 if (error = makefh4(&cs->fh, cs->vp, cs->exi)) {
7084 rfs4_deleg_state_rele(dsp);
7085 *cs->statusp = resp->status = puterrno4(error);
7086 return;
7089 /* Mark progress for delegation returns */
7090 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = gethrestime_sec();
7091 rfs4_deleg_state_rele(dsp);
7092 rfs4_do_open(cs, req, oo, DELEG_NONE,
7093 args->share_access, args->share_deny, resp, 1);
7096 /*ARGSUSED*/
7097 static void
7098 rfs4_do_opendelprev(struct compound_state *cs, struct svc_req *req,
7099 OPEN4args *args, rfs4_openowner_t *oo, OPEN4res *resp)
7102 * Lookup the pathname, it must already exist since this file
7103 * was delegated.
7105 * Find the file and state info for this vp and open owner pair.
7106 * check that they are in fact delegated.
7107 * check that the state access and deny modes are the same.
7109 * Return the delgation possibly seting the recall flag.
7111 rfs4_file_t *fp;
7112 rfs4_state_t *sp;
7113 bool_t create = FALSE;
7114 bool_t dcreate = FALSE;
7115 rfs4_deleg_state_t *dsp;
7116 nfsace4 *ace;
7118 /* Note we ignore oflags */
7119 resp->status = rfs4_lookupfile(&args->open_claim4_u.file_delegate_prev,
7120 req, cs, args->share_access, &resp->cinfo);
7122 if (resp->status != NFS4_OK) {
7123 return;
7126 /* get the file struct and hold a lock on it during initial open */
7127 fp = rfs4_findfile_withlock(cs->vp, NULL, &create);
7128 if (fp == NULL) {
7129 resp->status = NFS4ERR_RESOURCE;
7130 DTRACE_PROBE1(nfss__e__do_opendelprev1, nfsstat4, resp->status);
7131 return;
7134 sp = rfs4_findstate_by_owner_file(oo, fp, &create);
7135 if (sp == NULL) {
7136 resp->status = NFS4ERR_SERVERFAULT;
7137 DTRACE_PROBE1(nfss__e__do_opendelprev2, nfsstat4, resp->status);
7138 rw_exit(&fp->rf_file_rwlock);
7139 rfs4_file_rele(fp);
7140 return;
7143 rfs4_dbe_lock(sp->rs_dbe);
7144 rfs4_dbe_lock(fp->rf_dbe);
7145 if (args->share_access != sp->rs_share_access ||
7146 args->share_deny != sp->rs_share_deny ||
7147 sp->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
7148 NFS4_DEBUG(rfs4_debug,
7149 (CE_NOTE, "rfs4_do_opendelprev: state mixup"));
7150 rfs4_dbe_unlock(fp->rf_dbe);
7151 rfs4_dbe_unlock(sp->rs_dbe);
7152 rfs4_file_rele(fp);
7153 rfs4_state_rele(sp);
7154 resp->status = NFS4ERR_SERVERFAULT;
7155 return;
7157 rfs4_dbe_unlock(fp->rf_dbe);
7158 rfs4_dbe_unlock(sp->rs_dbe);
7160 dsp = rfs4_finddeleg(sp, &dcreate);
7161 if (dsp == NULL) {
7162 rfs4_state_rele(sp);
7163 rfs4_file_rele(fp);
7164 resp->status = NFS4ERR_SERVERFAULT;
7165 return;
7168 next_stateid(&sp->rs_stateid);
7170 resp->stateid = sp->rs_stateid.stateid;
7172 resp->delegation.delegation_type = dsp->rds_dtype;
7174 if (dsp->rds_dtype == OPEN_DELEGATE_READ) {
7175 open_read_delegation4 *rv =
7176 &resp->delegation.open_delegation4_u.read;
7178 rv->stateid = dsp->rds_delegid.stateid;
7179 rv->recall = FALSE; /* no policy in place to set to TRUE */
7180 ace = &rv->permissions;
7181 } else {
7182 open_write_delegation4 *rv =
7183 &resp->delegation.open_delegation4_u.write;
7185 rv->stateid = dsp->rds_delegid.stateid;
7186 rv->recall = FALSE; /* no policy in place to set to TRUE */
7187 ace = &rv->permissions;
7188 rv->space_limit.limitby = NFS_LIMIT_SIZE;
7189 rv->space_limit.nfs_space_limit4_u.filesize = UINT64_MAX;
7192 /* XXX For now */
7193 ace->type = ACE4_ACCESS_ALLOWED_ACE_TYPE;
7194 ace->flag = 0;
7195 ace->access_mask = 0;
7196 ace->who.utf8string_len = 0;
7197 ace->who.utf8string_val = 0;
7199 rfs4_deleg_state_rele(dsp);
7200 rfs4_state_rele(sp);
7201 rfs4_file_rele(fp);
7204 typedef enum {
7205 NFS4_CHKSEQ_OKAY = 0,
7206 NFS4_CHKSEQ_REPLAY = 1,
7207 NFS4_CHKSEQ_BAD = 2
7208 } rfs4_chkseq_t;
7211 * Generic function for sequence number checks.
7213 static rfs4_chkseq_t
7214 rfs4_check_seqid(seqid4 seqid, nfs_resop4 *lastop,
7215 seqid4 rqst_seq, nfs_resop4 *resop, bool_t copyres)
7217 /* Same sequence ids and matching operations? */
7218 if (seqid == rqst_seq && resop->resop == lastop->resop) {
7219 if (copyres == TRUE) {
7220 rfs4_free_reply(resop);
7221 rfs4_copy_reply(resop, lastop);
7223 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
7224 "Replayed SEQID %d\n", seqid));
7225 return (NFS4_CHKSEQ_REPLAY);
7228 /* If the incoming sequence is not the next expected then it is bad */
7229 if (rqst_seq != seqid + 1) {
7230 if (rqst_seq == seqid) {
7231 NFS4_DEBUG(rfs4_debug,
7232 (CE_NOTE, "BAD SEQID: Replayed sequence id "
7233 "but last op was %d current op is %d\n",
7234 lastop->resop, resop->resop));
7235 return (NFS4_CHKSEQ_BAD);
7237 NFS4_DEBUG(rfs4_debug,
7238 (CE_NOTE, "BAD SEQID: got %u expecting %u\n",
7239 rqst_seq, seqid));
7240 return (NFS4_CHKSEQ_BAD);
7243 /* Everything okay -- next expected */
7244 return (NFS4_CHKSEQ_OKAY);
7248 static rfs4_chkseq_t
7249 rfs4_check_open_seqid(seqid4 seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7251 rfs4_chkseq_t rc;
7253 rfs4_dbe_lock(op->ro_dbe);
7254 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply, seqid, resop,
7255 TRUE);
7256 rfs4_dbe_unlock(op->ro_dbe);
7258 if (rc == NFS4_CHKSEQ_OKAY)
7259 rfs4_update_lease(op->ro_client);
7261 return (rc);
7264 static rfs4_chkseq_t
7265 rfs4_check_olo_seqid(seqid4 olo_seqid, rfs4_openowner_t *op, nfs_resop4 *resop)
7267 rfs4_chkseq_t rc;
7269 rfs4_dbe_lock(op->ro_dbe);
7270 rc = rfs4_check_seqid(op->ro_open_seqid, &op->ro_reply,
7271 olo_seqid, resop, FALSE);
7272 rfs4_dbe_unlock(op->ro_dbe);
7274 return (rc);
7277 static rfs4_chkseq_t
7278 rfs4_check_lock_seqid(seqid4 seqid, rfs4_lo_state_t *lsp, nfs_resop4 *resop)
7280 rfs4_chkseq_t rc = NFS4_CHKSEQ_OKAY;
7282 rfs4_dbe_lock(lsp->rls_dbe);
7283 if (!lsp->rls_skip_seqid_check)
7284 rc = rfs4_check_seqid(lsp->rls_seqid, &lsp->rls_reply, seqid,
7285 resop, TRUE);
7286 rfs4_dbe_unlock(lsp->rls_dbe);
7288 return (rc);
7291 static void
7292 rfs4_op_open(nfs_argop4 *argop, nfs_resop4 *resop,
7293 struct svc_req *req, struct compound_state *cs)
7295 OPEN4args *args = &argop->nfs_argop4_u.opopen;
7296 OPEN4res *resp = &resop->nfs_resop4_u.opopen;
7297 open_owner4 *owner = &args->owner;
7298 open_claim_type4 claim = args->claim;
7299 rfs4_client_t *cp;
7300 rfs4_openowner_t *oo;
7301 bool_t create;
7302 bool_t replay = FALSE;
7303 int can_reclaim;
7305 DTRACE_NFSV4_2(op__open__start, struct compound_state *, cs,
7306 OPEN4args *, args);
7308 if (cs->vp == NULL) {
7309 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7310 goto end;
7314 * Need to check clientid and lease expiration first based on
7315 * error ordering and incrementing sequence id.
7317 cp = rfs4_findclient_by_id(owner->clientid, FALSE);
7318 if (cp == NULL) {
7319 *cs->statusp = resp->status =
7320 rfs4_check_clientid(&owner->clientid, 0);
7321 goto end;
7324 if (rfs4_lease_expired(cp)) {
7325 rfs4_client_close(cp);
7326 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7327 goto end;
7329 can_reclaim = cp->rc_can_reclaim;
7332 * Find the open_owner for use from this point forward. Take
7333 * care in updating the sequence id based on the type of error
7334 * being returned.
7336 retry:
7337 create = TRUE;
7338 oo = rfs4_findopenowner(owner, &create, args->seqid);
7339 if (oo == NULL) {
7340 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
7341 rfs4_client_rele(cp);
7342 goto end;
7345 /* Hold off access to the sequence space while the open is done */
7346 rfs4_sw_enter(&oo->ro_sw);
7349 * If the open_owner existed before at the server, then check
7350 * the sequence id.
7352 if (!create && !oo->ro_postpone_confirm) {
7353 switch (rfs4_check_open_seqid(args->seqid, oo, resop)) {
7354 case NFS4_CHKSEQ_BAD:
7355 if ((args->seqid > oo->ro_open_seqid) &&
7356 oo->ro_need_confirm) {
7357 rfs4_free_opens(oo, TRUE, FALSE);
7358 rfs4_sw_exit(&oo->ro_sw);
7359 rfs4_openowner_rele(oo);
7360 goto retry;
7362 resp->status = NFS4ERR_BAD_SEQID;
7363 goto out;
7364 case NFS4_CHKSEQ_REPLAY: /* replay of previous request */
7365 replay = TRUE;
7366 goto out;
7367 default:
7368 break;
7372 * Sequence was ok and open owner exists
7373 * check to see if we have yet to see an
7374 * open_confirm.
7376 if (oo->ro_need_confirm) {
7377 rfs4_free_opens(oo, TRUE, FALSE);
7378 rfs4_sw_exit(&oo->ro_sw);
7379 rfs4_openowner_rele(oo);
7380 goto retry;
7383 /* Grace only applies to regular-type OPENs */
7384 if (rfs4_clnt_in_grace(cp) &&
7385 (claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR)) {
7386 *cs->statusp = resp->status = NFS4ERR_GRACE;
7387 goto out;
7391 * If previous state at the server existed then can_reclaim
7392 * will be set. If not reply NFS4ERR_NO_GRACE to the
7393 * client.
7395 if (rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS && !can_reclaim) {
7396 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7397 goto out;
7402 * Reject the open if the client has missed the grace period
7404 if (!rfs4_clnt_in_grace(cp) && claim == CLAIM_PREVIOUS) {
7405 *cs->statusp = resp->status = NFS4ERR_NO_GRACE;
7406 goto out;
7409 /* Couple of up-front bookkeeping items */
7410 if (oo->ro_need_confirm) {
7412 * If this is a reclaim OPEN then we should not ask
7413 * for a confirmation of the open_owner per the
7414 * protocol specification.
7416 if (claim == CLAIM_PREVIOUS)
7417 oo->ro_need_confirm = FALSE;
7418 else
7419 resp->rflags |= OPEN4_RESULT_CONFIRM;
7421 resp->rflags |= OPEN4_RESULT_LOCKTYPE_POSIX;
7424 * If there is an unshared filesystem mounted on this vnode,
7425 * do not allow to open/create in this directory.
7427 if (vn_ismntpt(cs->vp)) {
7428 *cs->statusp = resp->status = NFS4ERR_ACCESS;
7429 goto out;
7433 * access must READ, WRITE, or BOTH. No access is invalid.
7434 * deny can be READ, WRITE, BOTH, or NONE.
7435 * bits not defined for access/deny are invalid.
7437 if (! (args->share_access & OPEN4_SHARE_ACCESS_BOTH) ||
7438 (args->share_access & ~OPEN4_SHARE_ACCESS_BOTH) ||
7439 (args->share_deny & ~OPEN4_SHARE_DENY_BOTH)) {
7440 *cs->statusp = resp->status = NFS4ERR_INVAL;
7441 goto out;
7446 * make sure attrset is zero before response is built.
7448 resp->attrset = 0;
7450 switch (claim) {
7451 case CLAIM_NULL:
7452 rfs4_do_opennull(cs, req, args, oo, resp);
7453 break;
7454 case CLAIM_PREVIOUS:
7455 rfs4_do_openprev(cs, req, args, oo, resp);
7456 break;
7457 case CLAIM_DELEGATE_CUR:
7458 rfs4_do_opendelcur(cs, req, args, oo, resp);
7459 break;
7460 case CLAIM_DELEGATE_PREV:
7461 rfs4_do_opendelprev(cs, req, args, oo, resp);
7462 break;
7463 default:
7464 resp->status = NFS4ERR_INVAL;
7465 break;
7468 out:
7469 rfs4_client_rele(cp);
7471 /* Catch sequence id handling here to make it a little easier */
7472 switch (resp->status) {
7473 case NFS4ERR_BADXDR:
7474 case NFS4ERR_BAD_SEQID:
7475 case NFS4ERR_BAD_STATEID:
7476 case NFS4ERR_NOFILEHANDLE:
7477 case NFS4ERR_RESOURCE:
7478 case NFS4ERR_STALE_CLIENTID:
7479 case NFS4ERR_STALE_STATEID:
7481 * The protocol states that if any of these errors are
7482 * being returned, the sequence id should not be
7483 * incremented. Any other return requires an
7484 * increment.
7486 break;
7487 default:
7488 /* Always update the lease in this case */
7489 rfs4_update_lease(oo->ro_client);
7491 /* Regular response - copy the result */
7492 if (!replay)
7493 rfs4_update_open_resp(oo, resop, &cs->fh);
7496 * REPLAY case: Only if the previous response was OK
7497 * do we copy the filehandle. If not OK, no
7498 * filehandle to copy.
7500 if (replay == TRUE &&
7501 resp->status == NFS4_OK &&
7502 oo->ro_reply_fh.nfs_fh4_val) {
7504 * If this is a replay, we must restore the
7505 * current filehandle/vp to that of what was
7506 * returned originally. Try our best to do
7507 * it.
7509 nfs_fh4_fmt_t *fh_fmtp =
7510 (nfs_fh4_fmt_t *)oo->ro_reply_fh.nfs_fh4_val;
7512 cs->exi = checkexport4(&fh_fmtp->fh4_fsid,
7513 (fid_t *)&fh_fmtp->fh4_xlen, NULL);
7515 if (cs->exi == NULL) {
7516 resp->status = NFS4ERR_STALE;
7517 goto finish;
7520 VN_RELE(cs->vp);
7522 cs->vp = nfs4_fhtovp(&oo->ro_reply_fh, cs->exi,
7523 &resp->status);
7525 if (cs->vp == NULL)
7526 goto finish;
7528 nfs_fh4_copy(&oo->ro_reply_fh, &cs->fh);
7532 * If this was a replay, no need to update the
7533 * sequence id. If the open_owner was not created on
7534 * this pass, then update. The first use of an
7535 * open_owner will not bump the sequence id.
7537 if (replay == FALSE && !create)
7538 rfs4_update_open_sequence(oo);
7540 * If the client is receiving an error and the
7541 * open_owner needs to be confirmed, there is no way
7542 * to notify the client of this fact ignoring the fact
7543 * that the server has no method of returning a
7544 * stateid to confirm. Therefore, the server needs to
7545 * mark this open_owner in a way as to avoid the
7546 * sequence id checking the next time the client uses
7547 * this open_owner.
7549 if (resp->status != NFS4_OK && oo->ro_need_confirm)
7550 oo->ro_postpone_confirm = TRUE;
7552 * If OK response then clear the postpone flag and
7553 * reset the sequence id to keep in sync with the
7554 * client.
7556 if (resp->status == NFS4_OK && oo->ro_postpone_confirm) {
7557 oo->ro_postpone_confirm = FALSE;
7558 oo->ro_open_seqid = args->seqid;
7560 break;
7563 finish:
7564 *cs->statusp = resp->status;
7566 rfs4_sw_exit(&oo->ro_sw);
7567 rfs4_openowner_rele(oo);
7569 end:
7570 DTRACE_NFSV4_2(op__open__done, struct compound_state *, cs,
7571 OPEN4res *, resp);
7574 /*ARGSUSED*/
7575 void
7576 rfs4_op_open_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
7577 struct svc_req *req, struct compound_state *cs)
7579 OPEN_CONFIRM4args *args = &argop->nfs_argop4_u.opopen_confirm;
7580 OPEN_CONFIRM4res *resp = &resop->nfs_resop4_u.opopen_confirm;
7581 rfs4_state_t *sp;
7582 nfsstat4 status;
7584 DTRACE_NFSV4_2(op__open__confirm__start, struct compound_state *, cs,
7585 OPEN_CONFIRM4args *, args);
7587 if (cs->vp == NULL) {
7588 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7589 goto out;
7592 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7593 if (status != NFS4_OK) {
7594 *cs->statusp = resp->status = status;
7595 goto out;
7598 /* Ensure specified filehandle matches */
7599 if (cs->vp != sp->rs_finfo->rf_vp) {
7600 rfs4_state_rele(sp);
7601 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7602 goto out;
7605 /* hold off other access to open_owner while we tinker */
7606 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7608 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7609 case NFS4_CHECK_STATEID_OKAY:
7610 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7611 resop) != 0) {
7612 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7613 break;
7616 * If it is the appropriate stateid and determined to
7617 * be "OKAY" then this means that the stateid does not
7618 * need to be confirmed and the client is in error for
7619 * sending an OPEN_CONFIRM.
7621 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7622 break;
7623 case NFS4_CHECK_STATEID_OLD:
7624 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7625 break;
7626 case NFS4_CHECK_STATEID_BAD:
7627 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7628 break;
7629 case NFS4_CHECK_STATEID_EXPIRED:
7630 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7631 break;
7632 case NFS4_CHECK_STATEID_CLOSED:
7633 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7634 break;
7635 case NFS4_CHECK_STATEID_REPLAY:
7636 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7637 resop)) {
7638 case NFS4_CHKSEQ_OKAY:
7640 * This is replayed stateid; if seqid matches
7641 * next expected, then client is using wrong seqid.
7643 /* fall through */
7644 case NFS4_CHKSEQ_BAD:
7645 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7646 break;
7647 case NFS4_CHKSEQ_REPLAY:
7649 * Note this case is the duplicate case so
7650 * resp->status is already set.
7652 *cs->statusp = resp->status;
7653 rfs4_update_lease(sp->rs_owner->ro_client);
7654 break;
7656 break;
7657 case NFS4_CHECK_STATEID_UNCONFIRMED:
7658 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7659 resop) != NFS4_CHKSEQ_OKAY) {
7660 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7661 break;
7663 *cs->statusp = resp->status = NFS4_OK;
7665 next_stateid(&sp->rs_stateid);
7666 resp->open_stateid = sp->rs_stateid.stateid;
7667 sp->rs_owner->ro_need_confirm = FALSE;
7668 rfs4_update_lease(sp->rs_owner->ro_client);
7669 rfs4_update_open_sequence(sp->rs_owner);
7670 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7671 break;
7672 default:
7673 ASSERT(FALSE);
7674 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7675 break;
7677 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7678 rfs4_state_rele(sp);
7680 out:
7681 DTRACE_NFSV4_2(op__open__confirm__done, struct compound_state *, cs,
7682 OPEN_CONFIRM4res *, resp);
7685 /*ARGSUSED*/
7686 void
7687 rfs4_op_open_downgrade(nfs_argop4 *argop, nfs_resop4 *resop,
7688 struct svc_req *req, struct compound_state *cs)
7690 OPEN_DOWNGRADE4args *args = &argop->nfs_argop4_u.opopen_downgrade;
7691 OPEN_DOWNGRADE4res *resp = &resop->nfs_resop4_u.opopen_downgrade;
7692 uint32_t access = args->share_access;
7693 uint32_t deny = args->share_deny;
7694 nfsstat4 status;
7695 rfs4_state_t *sp;
7696 rfs4_file_t *fp;
7697 int fflags = 0;
7699 DTRACE_NFSV4_2(op__open__downgrade__start, struct compound_state *, cs,
7700 OPEN_DOWNGRADE4args *, args);
7702 if (cs->vp == NULL) {
7703 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
7704 goto out;
7707 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_VALID);
7708 if (status != NFS4_OK) {
7709 *cs->statusp = resp->status = status;
7710 goto out;
7713 /* Ensure specified filehandle matches */
7714 if (cs->vp != sp->rs_finfo->rf_vp) {
7715 rfs4_state_rele(sp);
7716 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7717 goto out;
7720 /* hold off other access to open_owner while we tinker */
7721 rfs4_sw_enter(&sp->rs_owner->ro_sw);
7723 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
7724 case NFS4_CHECK_STATEID_OKAY:
7725 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7726 resop) != NFS4_CHKSEQ_OKAY) {
7727 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7728 goto end;
7730 break;
7731 case NFS4_CHECK_STATEID_OLD:
7732 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7733 goto end;
7734 case NFS4_CHECK_STATEID_BAD:
7735 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7736 goto end;
7737 case NFS4_CHECK_STATEID_EXPIRED:
7738 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
7739 goto end;
7740 case NFS4_CHECK_STATEID_CLOSED:
7741 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
7742 goto end;
7743 case NFS4_CHECK_STATEID_UNCONFIRMED:
7744 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
7745 goto end;
7746 case NFS4_CHECK_STATEID_REPLAY:
7747 /* Check the sequence id for the open owner */
7748 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
7749 resop)) {
7750 case NFS4_CHKSEQ_OKAY:
7752 * This is replayed stateid; if seqid matches
7753 * next expected, then client is using wrong seqid.
7755 /* fall through */
7756 case NFS4_CHKSEQ_BAD:
7757 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
7758 goto end;
7759 case NFS4_CHKSEQ_REPLAY:
7761 * Note this case is the duplicate case so
7762 * resp->status is already set.
7764 *cs->statusp = resp->status;
7765 rfs4_update_lease(sp->rs_owner->ro_client);
7766 goto end;
7768 break;
7769 default:
7770 ASSERT(FALSE);
7771 break;
7774 rfs4_dbe_lock(sp->rs_dbe);
7776 * Check that the new access modes and deny modes are valid.
7777 * Check that no invalid bits are set.
7779 if ((access & ~(OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) ||
7780 (deny & ~(OPEN4_SHARE_DENY_READ | OPEN4_SHARE_DENY_WRITE))) {
7781 *cs->statusp = resp->status = NFS4ERR_INVAL;
7782 rfs4_update_open_sequence(sp->rs_owner);
7783 rfs4_dbe_unlock(sp->rs_dbe);
7784 goto end;
7788 * The new modes must be a subset of the current modes and
7789 * the access must specify at least one mode. To test that
7790 * the new mode is a subset of the current modes we bitwise
7791 * AND them together and check that the result equals the new
7792 * mode. For example:
7793 * New mode, access == R and current mode, sp->rs_open_access == RW
7794 * access & sp->rs_open_access == R == access, so the new access mode
7795 * is valid. Consider access == RW, sp->rs_open_access = R
7796 * access & sp->rs_open_access == R != access, so the new access mode
7797 * is invalid.
7799 if ((access & sp->rs_open_access) != access ||
7800 (deny & sp->rs_open_deny) != deny ||
7801 (access &
7802 (OPEN4_SHARE_ACCESS_READ | OPEN4_SHARE_ACCESS_WRITE)) == 0) {
7803 *cs->statusp = resp->status = NFS4ERR_INVAL;
7804 rfs4_update_open_sequence(sp->rs_owner);
7805 rfs4_dbe_unlock(sp->rs_dbe);
7806 goto end;
7810 * Release any share locks associated with this stateID.
7811 * Strictly speaking, this violates the spec because the
7812 * spec effectively requires that open downgrade be atomic.
7813 * At present, fs_shrlock does not have this capability.
7815 (void) rfs4_unshare(sp);
7817 status = rfs4_share(sp, access, deny);
7818 if (status != NFS4_OK) {
7819 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
7820 rfs4_update_open_sequence(sp->rs_owner);
7821 rfs4_dbe_unlock(sp->rs_dbe);
7822 goto end;
7825 fp = sp->rs_finfo;
7826 rfs4_dbe_lock(fp->rf_dbe);
7829 * If the current mode has deny read and the new mode
7830 * does not, decrement the number of deny read mode bits
7831 * and if it goes to zero turn off the deny read bit
7832 * on the file.
7834 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_READ) &&
7835 (deny & OPEN4_SHARE_DENY_READ) == 0) {
7836 fp->rf_deny_read--;
7837 if (fp->rf_deny_read == 0)
7838 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
7842 * If the current mode has deny write and the new mode
7843 * does not, decrement the number of deny write mode bits
7844 * and if it goes to zero turn off the deny write bit
7845 * on the file.
7847 if ((sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) &&
7848 (deny & OPEN4_SHARE_DENY_WRITE) == 0) {
7849 fp->rf_deny_write--;
7850 if (fp->rf_deny_write == 0)
7851 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
7855 * If the current mode has access read and the new mode
7856 * does not, decrement the number of access read mode bits
7857 * and if it goes to zero turn off the access read bit
7858 * on the file. set fflags to FREAD for the call to
7859 * vn_open_downgrade().
7861 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) &&
7862 (access & OPEN4_SHARE_ACCESS_READ) == 0) {
7863 fp->rf_access_read--;
7864 if (fp->rf_access_read == 0)
7865 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
7866 fflags |= FREAD;
7870 * If the current mode has access write and the new mode
7871 * does not, decrement the number of access write mode bits
7872 * and if it goes to zero turn off the access write bit
7873 * on the file. set fflags to FWRITE for the call to
7874 * vn_open_downgrade().
7876 if ((sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) &&
7877 (access & OPEN4_SHARE_ACCESS_WRITE) == 0) {
7878 fp->rf_access_write--;
7879 if (fp->rf_access_write == 0)
7880 fp->rf_share_deny &= ~OPEN4_SHARE_ACCESS_WRITE;
7881 fflags |= FWRITE;
7884 /* Check that the file is still accessible */
7885 ASSERT(fp->rf_share_access);
7887 rfs4_dbe_unlock(fp->rf_dbe);
7889 /* now set the new open access and deny modes */
7890 sp->rs_open_access = access;
7891 sp->rs_open_deny = deny;
7894 * we successfully downgraded the share lock, now we need to downgrade
7895 * the open. it is possible that the downgrade was only for a deny
7896 * mode and we have nothing else to do.
7898 if ((fflags & (FREAD|FWRITE)) != 0)
7899 vn_open_downgrade(cs->vp, fflags);
7901 /* Update the stateid */
7902 next_stateid(&sp->rs_stateid);
7903 resp->open_stateid = sp->rs_stateid.stateid;
7905 rfs4_dbe_unlock(sp->rs_dbe);
7907 *cs->statusp = resp->status = NFS4_OK;
7908 /* Update the lease */
7909 rfs4_update_lease(sp->rs_owner->ro_client);
7910 /* And the sequence */
7911 rfs4_update_open_sequence(sp->rs_owner);
7912 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
7914 end:
7915 rfs4_sw_exit(&sp->rs_owner->ro_sw);
7916 rfs4_state_rele(sp);
7917 out:
7918 DTRACE_NFSV4_2(op__open__downgrade__done, struct compound_state *, cs,
7919 OPEN_DOWNGRADE4res *, resp);
7923 * The logic behind this function is detailed in the NFSv4 RFC in the
7924 * SETCLIENTID operation description under IMPLEMENTATION. Refer to
7925 * that section for explicit guidance to server behavior for
7926 * SETCLIENTID.
7928 void
7929 rfs4_op_setclientid(nfs_argop4 *argop, nfs_resop4 *resop,
7930 struct svc_req *req, struct compound_state *cs)
7932 SETCLIENTID4args *args = &argop->nfs_argop4_u.opsetclientid;
7933 SETCLIENTID4res *res = &resop->nfs_resop4_u.opsetclientid;
7934 rfs4_client_t *cp, *newcp, *cp_confirmed, *cp_unconfirmed;
7935 rfs4_clntip_t *ci;
7936 bool_t create;
7937 char *addr, *netid;
7938 int len;
7940 DTRACE_NFSV4_2(op__setclientid__start, struct compound_state *, cs,
7941 SETCLIENTID4args *, args);
7942 retry:
7943 newcp = cp_confirmed = cp_unconfirmed = NULL;
7946 * Save the caller's IP address
7948 args->client.cl_addr =
7949 (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
7952 * Record if it is a Solaris client that cannot handle referrals.
7954 if (strstr(args->client.id_val, "Solaris") &&
7955 !strstr(args->client.id_val, "+referrals")) {
7956 /* Add a "yes, it's downrev" record */
7957 create = TRUE;
7958 ci = rfs4_find_clntip(args->client.cl_addr, &create);
7959 ASSERT(ci != NULL);
7960 rfs4_dbe_rele(ci->ri_dbe);
7961 } else {
7962 /* Remove any previous record */
7963 rfs4_invalidate_clntip(args->client.cl_addr);
7967 * In search of an EXISTING client matching the incoming
7968 * request to establish a new client identifier at the server
7970 create = TRUE;
7971 cp = rfs4_findclient(&args->client, &create, NULL);
7973 /* Should never happen */
7974 ASSERT(cp != NULL);
7976 if (cp == NULL) {
7977 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
7978 goto out;
7982 * Easiest case. Client identifier is newly created and is
7983 * unconfirmed. Also note that for this case, no other
7984 * entries exist for the client identifier. Nothing else to
7985 * check. Just setup the response and respond.
7987 if (create) {
7988 *cs->statusp = res->status = NFS4_OK;
7989 res->SETCLIENTID4res_u.resok4.clientid = cp->rc_clientid;
7990 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
7991 cp->rc_confirm_verf;
7992 /* Setup callback information; CB_NULL confirmation later */
7993 rfs4_client_setcb(cp, &args->callback, args->callback_ident);
7995 rfs4_client_rele(cp);
7996 goto out;
8000 * An existing, confirmed client may exist but it may not have
8001 * been active for at least one lease period. If so, then
8002 * "close" the client and create a new client identifier
8004 if (rfs4_lease_expired(cp)) {
8005 rfs4_client_close(cp);
8006 goto retry;
8009 if (cp->rc_need_confirm == TRUE)
8010 cp_unconfirmed = cp;
8011 else
8012 cp_confirmed = cp;
8014 cp = NULL;
8017 * We have a confirmed client, now check for an
8018 * unconfimred entry
8020 if (cp_confirmed) {
8021 /* If creds don't match then client identifier is inuse */
8022 if (!creds_ok(cp_confirmed->rc_cr_set, req, cs)) {
8023 rfs4_cbinfo_t *cbp;
8025 * Some one else has established this client
8026 * id. Try and say * who they are. We will use
8027 * the call back address supplied by * the
8028 * first client.
8030 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8032 addr = netid = NULL;
8034 cbp = &cp_confirmed->rc_cbinfo;
8035 if (cbp->cb_callback.cb_location.r_addr &&
8036 cbp->cb_callback.cb_location.r_netid) {
8037 cb_client4 *cbcp = &cbp->cb_callback;
8039 len = strlen(cbcp->cb_location.r_addr)+1;
8040 addr = kmem_alloc(len, KM_SLEEP);
8041 bcopy(cbcp->cb_location.r_addr, addr, len);
8042 len = strlen(cbcp->cb_location.r_netid)+1;
8043 netid = kmem_alloc(len, KM_SLEEP);
8044 bcopy(cbcp->cb_location.r_netid, netid, len);
8047 res->SETCLIENTID4res_u.client_using.r_addr = addr;
8048 res->SETCLIENTID4res_u.client_using.r_netid = netid;
8050 rfs4_client_rele(cp_confirmed);
8054 * Confirmed, creds match, and verifier matches; must
8055 * be an update of the callback info
8057 if (cp_confirmed->rc_nfs_client.verifier ==
8058 args->client.verifier) {
8059 /* Setup callback information */
8060 rfs4_client_setcb(cp_confirmed, &args->callback,
8061 args->callback_ident);
8063 /* everything okay -- move ahead */
8064 *cs->statusp = res->status = NFS4_OK;
8065 res->SETCLIENTID4res_u.resok4.clientid =
8066 cp_confirmed->rc_clientid;
8068 /* update the confirm_verifier and return it */
8069 rfs4_client_scv_next(cp_confirmed);
8070 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8071 cp_confirmed->rc_confirm_verf;
8073 rfs4_client_rele(cp_confirmed);
8074 goto out;
8078 * Creds match but the verifier doesn't. Must search
8079 * for an unconfirmed client that would be replaced by
8080 * this request.
8082 create = FALSE;
8083 cp_unconfirmed = rfs4_findclient(&args->client, &create,
8084 cp_confirmed);
8088 * At this point, we have taken care of the brand new client
8089 * struct, INUSE case, update of an existing, and confirmed
8090 * client struct.
8094 * check to see if things have changed while we originally
8095 * picked up the client struct. If they have, then return and
8096 * retry the processing of this SETCLIENTID request.
8098 if (cp_unconfirmed) {
8099 rfs4_dbe_lock(cp_unconfirmed->rc_dbe);
8100 if (!cp_unconfirmed->rc_need_confirm) {
8101 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8102 rfs4_client_rele(cp_unconfirmed);
8103 if (cp_confirmed)
8104 rfs4_client_rele(cp_confirmed);
8105 goto retry;
8107 /* do away with the old unconfirmed one */
8108 rfs4_dbe_invalidate(cp_unconfirmed->rc_dbe);
8109 rfs4_dbe_unlock(cp_unconfirmed->rc_dbe);
8110 rfs4_client_rele(cp_unconfirmed);
8111 cp_unconfirmed = NULL;
8115 * This search will temporarily hide the confirmed client
8116 * struct while a new client struct is created as the
8117 * unconfirmed one.
8119 create = TRUE;
8120 newcp = rfs4_findclient(&args->client, &create, cp_confirmed);
8122 ASSERT(newcp != NULL);
8124 if (newcp == NULL) {
8125 *cs->statusp = res->status = NFS4ERR_SERVERFAULT;
8126 rfs4_client_rele(cp_confirmed);
8127 goto out;
8131 * If one was not created, then a similar request must be in
8132 * process so release and start over with this one
8134 if (create != TRUE) {
8135 rfs4_client_rele(newcp);
8136 if (cp_confirmed)
8137 rfs4_client_rele(cp_confirmed);
8138 goto retry;
8141 *cs->statusp = res->status = NFS4_OK;
8142 res->SETCLIENTID4res_u.resok4.clientid = newcp->rc_clientid;
8143 res->SETCLIENTID4res_u.resok4.setclientid_confirm =
8144 newcp->rc_confirm_verf;
8145 /* Setup callback information; CB_NULL confirmation later */
8146 rfs4_client_setcb(newcp, &args->callback, args->callback_ident);
8148 newcp->rc_cp_confirmed = cp_confirmed;
8150 rfs4_client_rele(newcp);
8152 out:
8153 DTRACE_NFSV4_2(op__setclientid__done, struct compound_state *, cs,
8154 SETCLIENTID4res *, res);
8157 /*ARGSUSED*/
8158 void
8159 rfs4_op_setclientid_confirm(nfs_argop4 *argop, nfs_resop4 *resop,
8160 struct svc_req *req, struct compound_state *cs)
8162 SETCLIENTID_CONFIRM4args *args =
8163 &argop->nfs_argop4_u.opsetclientid_confirm;
8164 SETCLIENTID_CONFIRM4res *res =
8165 &resop->nfs_resop4_u.opsetclientid_confirm;
8166 rfs4_client_t *cp, *cptoclose = NULL;
8168 DTRACE_NFSV4_2(op__setclientid__confirm__start,
8169 struct compound_state *, cs,
8170 SETCLIENTID_CONFIRM4args *, args);
8172 *cs->statusp = res->status = NFS4_OK;
8174 cp = rfs4_findclient_by_id(args->clientid, TRUE);
8176 if (cp == NULL) {
8177 *cs->statusp = res->status =
8178 rfs4_check_clientid(&args->clientid, 1);
8179 goto out;
8182 if (!creds_ok(cp, req, cs)) {
8183 *cs->statusp = res->status = NFS4ERR_CLID_INUSE;
8184 rfs4_client_rele(cp);
8185 goto out;
8188 /* If the verifier doesn't match, the record doesn't match */
8189 if (cp->rc_confirm_verf != args->setclientid_confirm) {
8190 *cs->statusp = res->status = NFS4ERR_STALE_CLIENTID;
8191 rfs4_client_rele(cp);
8192 goto out;
8195 rfs4_dbe_lock(cp->rc_dbe);
8196 cp->rc_need_confirm = FALSE;
8197 if (cp->rc_cp_confirmed) {
8198 cptoclose = cp->rc_cp_confirmed;
8199 cptoclose->rc_ss_remove = 1;
8200 cp->rc_cp_confirmed = NULL;
8204 * Update the client's associated server instance, if it's changed
8205 * since the client was created.
8207 if (rfs4_servinst(cp) != rfs4_cur_servinst)
8208 rfs4_servinst_assign(cp, rfs4_cur_servinst);
8211 * Record clientid in stable storage.
8212 * Must be done after server instance has been assigned.
8214 rfs4_ss_clid(cp);
8216 rfs4_dbe_unlock(cp->rc_dbe);
8218 if (cptoclose)
8219 /* don't need to rele, client_close does it */
8220 rfs4_client_close(cptoclose);
8222 /* If needed, initiate CB_NULL call for callback path */
8223 rfs4_deleg_cb_check(cp);
8224 rfs4_update_lease(cp);
8227 * Check to see if client can perform reclaims
8229 rfs4_ss_chkclid(cp);
8231 rfs4_client_rele(cp);
8233 out:
8234 DTRACE_NFSV4_2(op__setclientid__confirm__done,
8235 struct compound_state *, cs,
8236 SETCLIENTID_CONFIRM4 *, res);
8240 /*ARGSUSED*/
8241 void
8242 rfs4_op_close(nfs_argop4 *argop, nfs_resop4 *resop,
8243 struct svc_req *req, struct compound_state *cs)
8245 CLOSE4args *args = &argop->nfs_argop4_u.opclose;
8246 CLOSE4res *resp = &resop->nfs_resop4_u.opclose;
8247 rfs4_state_t *sp;
8248 nfsstat4 status;
8250 DTRACE_NFSV4_2(op__close__start, struct compound_state *, cs,
8251 CLOSE4args *, args);
8253 if (cs->vp == NULL) {
8254 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8255 goto out;
8258 status = rfs4_get_state(&args->open_stateid, &sp, RFS4_DBS_INVALID);
8259 if (status != NFS4_OK) {
8260 *cs->statusp = resp->status = status;
8261 goto out;
8264 /* Ensure specified filehandle matches */
8265 if (cs->vp != sp->rs_finfo->rf_vp) {
8266 rfs4_state_rele(sp);
8267 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8268 goto out;
8271 /* hold off other access to open_owner while we tinker */
8272 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8274 switch (rfs4_check_stateid_seqid(sp, &args->open_stateid)) {
8275 case NFS4_CHECK_STATEID_OKAY:
8276 if (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8277 resop) != NFS4_CHKSEQ_OKAY) {
8278 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8279 goto end;
8281 break;
8282 case NFS4_CHECK_STATEID_OLD:
8283 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8284 goto end;
8285 case NFS4_CHECK_STATEID_BAD:
8286 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8287 goto end;
8288 case NFS4_CHECK_STATEID_EXPIRED:
8289 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8290 goto end;
8291 case NFS4_CHECK_STATEID_CLOSED:
8292 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8293 goto end;
8294 case NFS4_CHECK_STATEID_UNCONFIRMED:
8295 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8296 goto end;
8297 case NFS4_CHECK_STATEID_REPLAY:
8298 /* Check the sequence id for the open owner */
8299 switch (rfs4_check_open_seqid(args->seqid, sp->rs_owner,
8300 resop)) {
8301 case NFS4_CHKSEQ_OKAY:
8303 * This is replayed stateid; if seqid matches
8304 * next expected, then client is using wrong seqid.
8306 /* FALL THROUGH */
8307 case NFS4_CHKSEQ_BAD:
8308 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8309 goto end;
8310 case NFS4_CHKSEQ_REPLAY:
8312 * Note this case is the duplicate case so
8313 * resp->status is already set.
8315 *cs->statusp = resp->status;
8316 rfs4_update_lease(sp->rs_owner->ro_client);
8317 goto end;
8319 break;
8320 default:
8321 ASSERT(FALSE);
8322 break;
8325 rfs4_dbe_lock(sp->rs_dbe);
8327 /* Update the stateid. */
8328 next_stateid(&sp->rs_stateid);
8329 resp->open_stateid = sp->rs_stateid.stateid;
8331 rfs4_dbe_unlock(sp->rs_dbe);
8333 rfs4_update_lease(sp->rs_owner->ro_client);
8334 rfs4_update_open_sequence(sp->rs_owner);
8335 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8337 rfs4_state_close(sp, FALSE, FALSE, cs->cr);
8339 *cs->statusp = resp->status = status;
8341 end:
8342 rfs4_sw_exit(&sp->rs_owner->ro_sw);
8343 rfs4_state_rele(sp);
8344 out:
8345 DTRACE_NFSV4_2(op__close__done, struct compound_state *, cs,
8346 CLOSE4res *, resp);
8350 * Manage the counts on the file struct and close all file locks
8352 /*ARGSUSED*/
8353 void
8354 rfs4_release_share_lock_state(rfs4_state_t *sp, cred_t *cr,
8355 bool_t close_of_client)
8357 rfs4_file_t *fp = sp->rs_finfo;
8358 rfs4_lo_state_t *lsp;
8359 int fflags = 0;
8362 * If this call is part of the larger closing down of client
8363 * state then it is just easier to release all locks
8364 * associated with this client instead of going through each
8365 * individual file and cleaning locks there.
8367 if (close_of_client) {
8368 if (sp->rs_owner->ro_client->rc_unlksys_completed == FALSE &&
8369 !list_is_empty(&sp->rs_lostatelist) &&
8370 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID) {
8371 /* Is the PxFS kernel module loaded? */
8372 if (lm_remove_file_locks != NULL) {
8373 int new_sysid;
8375 /* Encode the cluster nodeid in new sysid */
8376 new_sysid = sp->rs_owner->ro_client->rc_sysidt;
8377 lm_set_nlmid_flk(&new_sysid);
8380 * This PxFS routine removes file locks for a
8381 * client over all nodes of a cluster.
8383 NFS4_DEBUG(rfs4_debug, (CE_NOTE,
8384 "lm_remove_file_locks(sysid=0x%x)\n",
8385 new_sysid));
8386 (*lm_remove_file_locks)(new_sysid);
8387 } else {
8388 struct flock64 flk;
8390 /* Release all locks for this client */
8391 flk.l_type = F_UNLKSYS;
8392 flk.l_whence = 0;
8393 flk.l_start = 0;
8394 flk.l_len = 0;
8395 flk.l_sysid =
8396 sp->rs_owner->ro_client->rc_sysidt;
8397 flk.l_pid = 0;
8398 (void) VOP_FRLOCK(sp->rs_finfo->rf_vp, F_SETLK,
8399 &flk, F_REMOTELOCK | FREAD | FWRITE,
8400 (u_offset_t)0, NULL, CRED(), NULL);
8403 sp->rs_owner->ro_client->rc_unlksys_completed = TRUE;
8408 * Release all locks on this file by this lock owner or at
8409 * least mark the locks as having been released
8411 for (lsp = list_head(&sp->rs_lostatelist); lsp != NULL;
8412 lsp = list_next(&sp->rs_lostatelist, lsp)) {
8413 lsp->rls_locks_cleaned = TRUE;
8415 /* Was this already taken care of above? */
8416 if (!close_of_client &&
8417 sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8418 (void) cleanlocks(sp->rs_finfo->rf_vp,
8419 lsp->rls_locker->rl_pid,
8420 lsp->rls_locker->rl_client->rc_sysidt);
8424 * Release any shrlocks associated with this open state ID.
8425 * This must be done before the rfs4_state gets marked closed.
8427 if (sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID)
8428 (void) rfs4_unshare(sp);
8430 if (sp->rs_open_access) {
8431 rfs4_dbe_lock(fp->rf_dbe);
8434 * Decrement the count for each access and deny bit that this
8435 * state has contributed to the file.
8436 * If the file counts go to zero
8437 * clear the appropriate bit in the appropriate mask.
8439 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_READ) {
8440 fp->rf_access_read--;
8441 fflags |= FREAD;
8442 if (fp->rf_access_read == 0)
8443 fp->rf_share_access &= ~OPEN4_SHARE_ACCESS_READ;
8445 if (sp->rs_open_access & OPEN4_SHARE_ACCESS_WRITE) {
8446 fp->rf_access_write--;
8447 fflags |= FWRITE;
8448 if (fp->rf_access_write == 0)
8449 fp->rf_share_access &=
8450 ~OPEN4_SHARE_ACCESS_WRITE;
8452 if (sp->rs_open_deny & OPEN4_SHARE_DENY_READ) {
8453 fp->rf_deny_read--;
8454 if (fp->rf_deny_read == 0)
8455 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_READ;
8457 if (sp->rs_open_deny & OPEN4_SHARE_DENY_WRITE) {
8458 fp->rf_deny_write--;
8459 if (fp->rf_deny_write == 0)
8460 fp->rf_share_deny &= ~OPEN4_SHARE_DENY_WRITE;
8463 (void) VOP_CLOSE(fp->rf_vp, fflags, 1, (offset_t)0, cr, NULL);
8465 rfs4_dbe_unlock(fp->rf_dbe);
8467 sp->rs_open_access = 0;
8468 sp->rs_open_deny = 0;
8473 * lock_denied: Fill in a LOCK4deneid structure given an flock64 structure.
8475 static nfsstat4
8476 lock_denied(LOCK4denied *dp, struct flock64 *flk)
8478 rfs4_lockowner_t *lo;
8479 rfs4_client_t *cp;
8480 uint32_t len;
8482 lo = rfs4_findlockowner_by_pid(flk->l_pid);
8483 if (lo != NULL) {
8484 cp = lo->rl_client;
8485 if (rfs4_lease_expired(cp)) {
8486 rfs4_lockowner_rele(lo);
8487 rfs4_dbe_hold(cp->rc_dbe);
8488 rfs4_client_close(cp);
8489 return (NFS4ERR_EXPIRED);
8491 dp->owner.clientid = lo->rl_owner.clientid;
8492 len = lo->rl_owner.owner_len;
8493 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8494 bcopy(lo->rl_owner.owner_val, dp->owner.owner_val, len);
8495 dp->owner.owner_len = len;
8496 rfs4_lockowner_rele(lo);
8497 goto finish;
8501 * Its not a NFS4 lock. We take advantage that the upper 32 bits
8502 * of the client id contain the boot time for a NFS4 lock. So we
8503 * fabricate and identity by setting clientid to the sysid, and
8504 * the lock owner to the pid.
8506 dp->owner.clientid = flk->l_sysid;
8507 len = sizeof (pid_t);
8508 dp->owner.owner_len = len;
8509 dp->owner.owner_val = kmem_alloc(len, KM_SLEEP);
8510 bcopy(&flk->l_pid, dp->owner.owner_val, len);
8511 finish:
8512 dp->offset = flk->l_start;
8513 dp->length = flk->l_len;
8515 if (flk->l_type == F_RDLCK)
8516 dp->locktype = READ_LT;
8517 else if (flk->l_type == F_WRLCK)
8518 dp->locktype = WRITE_LT;
8519 else
8520 return (NFS4ERR_INVAL); /* no mapping from POSIX ltype to v4 */
8522 return (NFS4_OK);
8525 static int
8526 setlock(vnode_t *vp, struct flock64 *flock, int flag, cred_t *cred)
8528 int error;
8529 struct flock64 flk;
8530 int i;
8531 clock_t delaytime;
8532 int cmd;
8534 cmd = nbl_need_check(vp) ? F_SETLK_NBMAND : F_SETLK;
8535 retry:
8536 delaytime = MSEC_TO_TICK_ROUNDUP(rfs4_lock_delay);
8538 for (i = 0; i < rfs4_maxlock_tries; i++) {
8539 LOCK_PRINT(rfs4_debug, "setlock", cmd, flock);
8540 error = VOP_FRLOCK(vp, cmd,
8541 flock, flag, (u_offset_t)0, NULL, cred, NULL);
8543 if (error != EAGAIN && error != EACCES)
8544 break;
8546 if (i < rfs4_maxlock_tries - 1) {
8547 delay(delaytime);
8548 delaytime *= 2;
8552 if (error == EAGAIN || error == EACCES) {
8553 /* Get the owner of the lock */
8554 flk = *flock;
8555 LOCK_PRINT(rfs4_debug, "setlock", F_GETLK, &flk);
8556 if (VOP_FRLOCK(vp, F_GETLK, &flk, flag,
8557 (u_offset_t)0, NULL, cred, NULL) == 0) {
8558 if (flk.l_type == F_UNLCK) {
8559 /* No longer locked, retry */
8560 goto retry;
8562 *flock = flk;
8563 LOCK_PRINT(rfs4_debug, "setlock(blocking lock)",
8564 F_GETLK, &flk);
8568 return (error);
8571 /*ARGSUSED*/
8572 static nfsstat4
8573 rfs4_do_lock(rfs4_lo_state_t *lsp, nfs_lock_type4 locktype,
8574 offset4 offset, length4 length, cred_t *cred, nfs_resop4 *resop)
8576 nfsstat4 status;
8577 rfs4_lockowner_t *lo = lsp->rls_locker;
8578 rfs4_state_t *sp = lsp->rls_state;
8579 struct flock64 flock;
8580 int16_t ltype;
8581 int flag;
8582 int error;
8583 sysid_t sysid;
8584 LOCK4res *lres;
8586 if (rfs4_lease_expired(lo->rl_client)) {
8587 return (NFS4ERR_EXPIRED);
8590 if ((status = rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
8591 return (status);
8593 /* Check for zero length. To lock to end of file use all ones for V4 */
8594 if (length == 0)
8595 return (NFS4ERR_INVAL);
8596 else if (length == (length4)(~0))
8597 length = 0; /* Posix to end of file */
8599 retry:
8600 rfs4_dbe_lock(sp->rs_dbe);
8601 if (sp->rs_closed) {
8602 rfs4_dbe_unlock(sp->rs_dbe);
8603 return (NFS4ERR_OLD_STATEID);
8606 if (resop->resop != OP_LOCKU) {
8607 switch (locktype) {
8608 case READ_LT:
8609 case READW_LT:
8610 if ((sp->rs_share_access
8611 & OPEN4_SHARE_ACCESS_READ) == 0) {
8612 rfs4_dbe_unlock(sp->rs_dbe);
8614 return (NFS4ERR_OPENMODE);
8616 ltype = F_RDLCK;
8617 break;
8618 case WRITE_LT:
8619 case WRITEW_LT:
8620 if ((sp->rs_share_access
8621 & OPEN4_SHARE_ACCESS_WRITE) == 0) {
8622 rfs4_dbe_unlock(sp->rs_dbe);
8624 return (NFS4ERR_OPENMODE);
8626 ltype = F_WRLCK;
8627 break;
8629 } else
8630 ltype = F_UNLCK;
8632 flock.l_type = ltype;
8633 flock.l_whence = 0; /* SEEK_SET */
8634 flock.l_start = offset;
8635 flock.l_len = length;
8636 flock.l_sysid = sysid;
8637 flock.l_pid = lsp->rls_locker->rl_pid;
8639 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
8640 if (flock.l_len < 0 || flock.l_start < 0) {
8641 rfs4_dbe_unlock(sp->rs_dbe);
8642 return (NFS4ERR_INVAL);
8646 * N.B. FREAD has the same value as OPEN4_SHARE_ACCESS_READ and
8647 * FWRITE has the same value as OPEN4_SHARE_ACCESS_WRITE.
8649 flag = (int)sp->rs_share_access | F_REMOTELOCK;
8651 error = setlock(sp->rs_finfo->rf_vp, &flock, flag, cred);
8652 if (error == 0) {
8653 rfs4_dbe_lock(lsp->rls_dbe);
8654 next_stateid(&lsp->rls_lockid);
8655 rfs4_dbe_unlock(lsp->rls_dbe);
8658 rfs4_dbe_unlock(sp->rs_dbe);
8661 * N.B. We map error values to nfsv4 errors. This is differrent
8662 * than puterrno4 routine.
8664 switch (error) {
8665 case 0:
8666 status = NFS4_OK;
8667 break;
8668 case EAGAIN:
8669 case EACCES: /* Old value */
8670 /* Can only get here if op is OP_LOCK */
8671 ASSERT(resop->resop == OP_LOCK);
8672 lres = &resop->nfs_resop4_u.oplock;
8673 status = NFS4ERR_DENIED;
8674 if (lock_denied(&lres->LOCK4res_u.denied, &flock)
8675 == NFS4ERR_EXPIRED)
8676 goto retry;
8677 break;
8678 case ENOLCK:
8679 status = NFS4ERR_DELAY;
8680 break;
8681 case EOVERFLOW:
8682 status = NFS4ERR_INVAL;
8683 break;
8684 case EINVAL:
8685 status = NFS4ERR_NOTSUPP;
8686 break;
8687 default:
8688 status = NFS4ERR_SERVERFAULT;
8689 break;
8692 return (status);
8695 /*ARGSUSED*/
8696 void
8697 rfs4_op_lock(nfs_argop4 *argop, nfs_resop4 *resop,
8698 struct svc_req *req, struct compound_state *cs)
8700 LOCK4args *args = &argop->nfs_argop4_u.oplock;
8701 LOCK4res *resp = &resop->nfs_resop4_u.oplock;
8702 nfsstat4 status;
8703 stateid4 *stateid;
8704 rfs4_lockowner_t *lo;
8705 rfs4_client_t *cp;
8706 rfs4_state_t *sp = NULL;
8707 rfs4_lo_state_t *lsp = NULL;
8708 bool_t ls_sw_held = FALSE;
8709 bool_t create = TRUE;
8710 bool_t lcreate = TRUE;
8711 bool_t dup_lock = FALSE;
8712 int rc;
8714 DTRACE_NFSV4_2(op__lock__start, struct compound_state *, cs,
8715 LOCK4args *, args);
8717 if (cs->vp == NULL) {
8718 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
8719 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8720 cs, LOCK4res *, resp);
8721 return;
8724 if (args->locker.new_lock_owner) {
8725 /* Create a new lockowner for this instance */
8726 open_to_lock_owner4 *olo = &args->locker.locker4_u.open_owner;
8728 NFS4_DEBUG(rfs4_debug, (CE_NOTE, "Creating new lock owner"));
8730 stateid = &olo->open_stateid;
8731 status = rfs4_get_state(stateid, &sp, RFS4_DBS_VALID);
8732 if (status != NFS4_OK) {
8733 NFS4_DEBUG(rfs4_debug,
8734 (CE_NOTE, "Get state failed in lock %d", status));
8735 *cs->statusp = resp->status = status;
8736 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8737 cs, LOCK4res *, resp);
8738 return;
8741 /* Ensure specified filehandle matches */
8742 if (cs->vp != sp->rs_finfo->rf_vp) {
8743 rfs4_state_rele(sp);
8744 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8745 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8746 cs, LOCK4res *, resp);
8747 return;
8750 /* hold off other access to open_owner while we tinker */
8751 rfs4_sw_enter(&sp->rs_owner->ro_sw);
8753 switch (rc = rfs4_check_stateid_seqid(sp, stateid)) {
8754 case NFS4_CHECK_STATEID_OLD:
8755 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8756 goto end;
8757 case NFS4_CHECK_STATEID_BAD:
8758 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8759 goto end;
8760 case NFS4_CHECK_STATEID_EXPIRED:
8761 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8762 goto end;
8763 case NFS4_CHECK_STATEID_UNCONFIRMED:
8764 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8765 goto end;
8766 case NFS4_CHECK_STATEID_CLOSED:
8767 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8768 goto end;
8769 case NFS4_CHECK_STATEID_OKAY:
8770 case NFS4_CHECK_STATEID_REPLAY:
8771 switch (rfs4_check_olo_seqid(olo->open_seqid,
8772 sp->rs_owner, resop)) {
8773 case NFS4_CHKSEQ_OKAY:
8774 if (rc == NFS4_CHECK_STATEID_OKAY)
8775 break;
8777 * This is replayed stateid; if seqid
8778 * matches next expected, then client
8779 * is using wrong seqid.
8781 /* FALLTHROUGH */
8782 case NFS4_CHKSEQ_BAD:
8783 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8784 goto end;
8785 case NFS4_CHKSEQ_REPLAY:
8786 /* This is a duplicate LOCK request */
8787 dup_lock = TRUE;
8790 * For a duplicate we do not want to
8791 * create a new lockowner as it should
8792 * already exist.
8793 * Turn off the lockowner create flag.
8795 lcreate = FALSE;
8797 break;
8800 lo = rfs4_findlockowner(&olo->lock_owner, &lcreate);
8801 if (lo == NULL) {
8802 NFS4_DEBUG(rfs4_debug,
8803 (CE_NOTE, "rfs4_op_lock: no lock owner"));
8804 *cs->statusp = resp->status = NFS4ERR_RESOURCE;
8805 goto end;
8808 lsp = rfs4_findlo_state_by_owner(lo, sp, &create);
8809 if (lsp == NULL) {
8810 rfs4_update_lease(sp->rs_owner->ro_client);
8812 * Only update theh open_seqid if this is not
8813 * a duplicate request
8815 if (dup_lock == FALSE) {
8816 rfs4_update_open_sequence(sp->rs_owner);
8819 NFS4_DEBUG(rfs4_debug,
8820 (CE_NOTE, "rfs4_op_lock: no state"));
8821 *cs->statusp = resp->status = NFS4ERR_SERVERFAULT;
8822 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
8823 rfs4_lockowner_rele(lo);
8824 goto end;
8828 * This is the new_lock_owner branch and the client is
8829 * supposed to be associating a new lock_owner with
8830 * the open file at this point. If we find that a
8831 * lock_owner/state association already exists and a
8832 * successful LOCK request was returned to the client,
8833 * an error is returned to the client since this is
8834 * not appropriate. The client should be using the
8835 * existing lock_owner branch.
8837 if (dup_lock == FALSE && create == FALSE) {
8838 if (lsp->rls_lock_completed == TRUE) {
8839 *cs->statusp =
8840 resp->status = NFS4ERR_BAD_SEQID;
8841 rfs4_lockowner_rele(lo);
8842 goto end;
8846 rfs4_update_lease(sp->rs_owner->ro_client);
8849 * Only update theh open_seqid if this is not
8850 * a duplicate request
8852 if (dup_lock == FALSE) {
8853 rfs4_update_open_sequence(sp->rs_owner);
8857 * If this is a duplicate lock request, just copy the
8858 * previously saved reply and return.
8860 if (dup_lock == TRUE) {
8861 /* verify that lock_seqid's match */
8862 if (lsp->rls_seqid != olo->lock_seqid) {
8863 NFS4_DEBUG(rfs4_debug,
8864 (CE_NOTE, "rfs4_op_lock: Dup-Lock seqid bad"
8865 "lsp->seqid=%d old->seqid=%d",
8866 lsp->rls_seqid, olo->lock_seqid));
8867 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8868 } else {
8869 rfs4_copy_reply(resop, &lsp->rls_reply);
8871 * Make sure to copy the just
8872 * retrieved reply status into the
8873 * overall compound status
8875 *cs->statusp = resp->status;
8877 rfs4_lockowner_rele(lo);
8878 goto end;
8881 rfs4_dbe_lock(lsp->rls_dbe);
8883 /* Make sure to update the lock sequence id */
8884 lsp->rls_seqid = olo->lock_seqid;
8886 NFS4_DEBUG(rfs4_debug,
8887 (CE_NOTE, "Lock seqid established as %d", lsp->rls_seqid));
8890 * This is used to signify the newly created lockowner
8891 * stateid and its sequence number. The checks for
8892 * sequence number and increment don't occur on the
8893 * very first lock request for a lockowner.
8895 lsp->rls_skip_seqid_check = TRUE;
8897 /* hold off other access to lsp while we tinker */
8898 rfs4_sw_enter(&lsp->rls_sw);
8899 ls_sw_held = TRUE;
8901 rfs4_dbe_unlock(lsp->rls_dbe);
8903 rfs4_lockowner_rele(lo);
8904 } else {
8905 stateid = &args->locker.locker4_u.lock_owner.lock_stateid;
8906 /* get lsp and hold the lock on the underlying file struct */
8907 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE))
8908 != NFS4_OK) {
8909 *cs->statusp = resp->status = status;
8910 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8911 cs, LOCK4res *, resp);
8912 return;
8914 create = FALSE; /* We didn't create lsp */
8916 /* Ensure specified filehandle matches */
8917 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
8918 rfs4_lo_state_rele(lsp, TRUE);
8919 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8920 DTRACE_NFSV4_2(op__lock__done, struct compound_state *,
8921 cs, LOCK4res *, resp);
8922 return;
8925 /* hold off other access to lsp while we tinker */
8926 rfs4_sw_enter(&lsp->rls_sw);
8927 ls_sw_held = TRUE;
8929 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
8931 * The stateid looks like it was okay (expected to be
8932 * the next one)
8934 case NFS4_CHECK_STATEID_OKAY:
8936 * The sequence id is now checked. Determine
8937 * if this is a replay or if it is in the
8938 * expected (next) sequence. In the case of a
8939 * replay, there are two replay conditions
8940 * that may occur. The first is the normal
8941 * condition where a LOCK is done with a
8942 * NFS4_OK response and the stateid is
8943 * updated. That case is handled below when
8944 * the stateid is identified as a REPLAY. The
8945 * second is the case where an error is
8946 * returned, like NFS4ERR_DENIED, and the
8947 * sequence number is updated but the stateid
8948 * is not updated. This second case is dealt
8949 * with here. So it may seem odd that the
8950 * stateid is okay but the sequence id is a
8951 * replay but it is okay.
8953 switch (rfs4_check_lock_seqid(
8954 args->locker.locker4_u.lock_owner.lock_seqid,
8955 lsp, resop)) {
8956 case NFS4_CHKSEQ_REPLAY:
8957 if (resp->status != NFS4_OK) {
8959 * Here is our replay and need
8960 * to verify that the last
8961 * response was an error.
8963 *cs->statusp = resp->status;
8964 goto end;
8967 * This is done since the sequence id
8968 * looked like a replay but it didn't
8969 * pass our check so a BAD_SEQID is
8970 * returned as a result.
8972 /*FALLTHROUGH*/
8973 case NFS4_CHKSEQ_BAD:
8974 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
8975 goto end;
8976 case NFS4_CHKSEQ_OKAY:
8977 /* Everything looks okay move ahead */
8978 break;
8980 break;
8981 case NFS4_CHECK_STATEID_OLD:
8982 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8983 goto end;
8984 case NFS4_CHECK_STATEID_BAD:
8985 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
8986 goto end;
8987 case NFS4_CHECK_STATEID_EXPIRED:
8988 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
8989 goto end;
8990 case NFS4_CHECK_STATEID_CLOSED:
8991 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
8992 goto end;
8993 case NFS4_CHECK_STATEID_REPLAY:
8994 switch (rfs4_check_lock_seqid(
8995 args->locker.locker4_u.lock_owner.lock_seqid,
8996 lsp, resop)) {
8997 case NFS4_CHKSEQ_OKAY:
8999 * This is a replayed stateid; if
9000 * seqid matches the next expected,
9001 * then client is using wrong seqid.
9003 case NFS4_CHKSEQ_BAD:
9004 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9005 goto end;
9006 case NFS4_CHKSEQ_REPLAY:
9007 rfs4_update_lease(lsp->rls_locker->rl_client);
9008 *cs->statusp = status = resp->status;
9009 goto end;
9011 break;
9012 default:
9013 ASSERT(FALSE);
9014 break;
9017 rfs4_update_lock_sequence(lsp);
9018 rfs4_update_lease(lsp->rls_locker->rl_client);
9022 * NFS4 only allows locking on regular files, so
9023 * verify type of object.
9025 if (cs->vp->v_type != VREG) {
9026 if (cs->vp->v_type == VDIR)
9027 status = NFS4ERR_ISDIR;
9028 else
9029 status = NFS4ERR_INVAL;
9030 goto out;
9033 cp = lsp->rls_state->rs_owner->ro_client;
9035 if (rfs4_clnt_in_grace(cp) && !args->reclaim) {
9036 status = NFS4ERR_GRACE;
9037 goto out;
9040 if (rfs4_clnt_in_grace(cp) && args->reclaim && !cp->rc_can_reclaim) {
9041 status = NFS4ERR_NO_GRACE;
9042 goto out;
9045 if (!rfs4_clnt_in_grace(cp) && args->reclaim) {
9046 status = NFS4ERR_NO_GRACE;
9047 goto out;
9050 if (lsp->rls_state->rs_finfo->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE)
9051 cs->deleg = TRUE;
9053 status = rfs4_do_lock(lsp, args->locktype,
9054 args->offset, args->length, cs->cr, resop);
9056 out:
9057 lsp->rls_skip_seqid_check = FALSE;
9059 *cs->statusp = resp->status = status;
9061 if (status == NFS4_OK) {
9062 resp->LOCK4res_u.lock_stateid = lsp->rls_lockid.stateid;
9063 lsp->rls_lock_completed = TRUE;
9066 * Only update the "OPEN" response here if this was a new
9067 * lock_owner
9069 if (sp)
9070 rfs4_update_open_resp(sp->rs_owner, resop, NULL);
9072 rfs4_update_lock_resp(lsp, resop);
9074 end:
9075 if (lsp) {
9076 if (ls_sw_held)
9077 rfs4_sw_exit(&lsp->rls_sw);
9079 * If an sp obtained, then the lsp does not represent
9080 * a lock on the file struct.
9082 if (sp != NULL)
9083 rfs4_lo_state_rele(lsp, FALSE);
9084 else
9085 rfs4_lo_state_rele(lsp, TRUE);
9087 if (sp) {
9088 rfs4_sw_exit(&sp->rs_owner->ro_sw);
9089 rfs4_state_rele(sp);
9092 DTRACE_NFSV4_2(op__lock__done, struct compound_state *, cs,
9093 LOCK4res *, resp);
9096 /* free function for LOCK/LOCKT */
9097 static void
9098 lock_denied_free(nfs_resop4 *resop)
9100 LOCK4denied *dp = NULL;
9102 switch (resop->resop) {
9103 case OP_LOCK:
9104 if (resop->nfs_resop4_u.oplock.status == NFS4ERR_DENIED)
9105 dp = &resop->nfs_resop4_u.oplock.LOCK4res_u.denied;
9106 break;
9107 case OP_LOCKT:
9108 if (resop->nfs_resop4_u.oplockt.status == NFS4ERR_DENIED)
9109 dp = &resop->nfs_resop4_u.oplockt.denied;
9110 break;
9111 default:
9112 break;
9115 if (dp)
9116 kmem_free(dp->owner.owner_val, dp->owner.owner_len);
9119 /*ARGSUSED*/
9120 void
9121 rfs4_op_locku(nfs_argop4 *argop, nfs_resop4 *resop,
9122 struct svc_req *req, struct compound_state *cs)
9124 LOCKU4args *args = &argop->nfs_argop4_u.oplocku;
9125 LOCKU4res *resp = &resop->nfs_resop4_u.oplocku;
9126 nfsstat4 status;
9127 stateid4 *stateid = &args->lock_stateid;
9128 rfs4_lo_state_t *lsp;
9130 DTRACE_NFSV4_2(op__locku__start, struct compound_state *, cs,
9131 LOCKU4args *, args);
9133 if (cs->vp == NULL) {
9134 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9135 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9136 LOCKU4res *, resp);
9137 return;
9140 if ((status = rfs4_get_lo_state(stateid, &lsp, TRUE)) != NFS4_OK) {
9141 *cs->statusp = resp->status = status;
9142 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9143 LOCKU4res *, resp);
9144 return;
9147 /* Ensure specified filehandle matches */
9148 if (cs->vp != lsp->rls_state->rs_finfo->rf_vp) {
9149 rfs4_lo_state_rele(lsp, TRUE);
9150 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9151 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9152 LOCKU4res *, resp);
9153 return;
9156 /* hold off other access to lsp while we tinker */
9157 rfs4_sw_enter(&lsp->rls_sw);
9159 switch (rfs4_check_lo_stateid_seqid(lsp, stateid)) {
9160 case NFS4_CHECK_STATEID_OKAY:
9161 if (rfs4_check_lock_seqid(args->seqid, lsp, resop)
9162 != NFS4_CHKSEQ_OKAY) {
9163 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9164 goto end;
9166 break;
9167 case NFS4_CHECK_STATEID_OLD:
9168 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9169 goto end;
9170 case NFS4_CHECK_STATEID_BAD:
9171 *cs->statusp = resp->status = NFS4ERR_BAD_STATEID;
9172 goto end;
9173 case NFS4_CHECK_STATEID_EXPIRED:
9174 *cs->statusp = resp->status = NFS4ERR_EXPIRED;
9175 goto end;
9176 case NFS4_CHECK_STATEID_CLOSED:
9177 *cs->statusp = resp->status = NFS4ERR_OLD_STATEID;
9178 goto end;
9179 case NFS4_CHECK_STATEID_REPLAY:
9180 switch (rfs4_check_lock_seqid(args->seqid, lsp, resop)) {
9181 case NFS4_CHKSEQ_OKAY:
9183 * This is a replayed stateid; if
9184 * seqid matches the next expected,
9185 * then client is using wrong seqid.
9187 case NFS4_CHKSEQ_BAD:
9188 *cs->statusp = resp->status = NFS4ERR_BAD_SEQID;
9189 goto end;
9190 case NFS4_CHKSEQ_REPLAY:
9191 rfs4_update_lease(lsp->rls_locker->rl_client);
9192 *cs->statusp = status = resp->status;
9193 goto end;
9195 break;
9196 default:
9197 ASSERT(FALSE);
9198 break;
9201 rfs4_update_lock_sequence(lsp);
9202 rfs4_update_lease(lsp->rls_locker->rl_client);
9205 * NFS4 only allows locking on regular files, so
9206 * verify type of object.
9208 if (cs->vp->v_type != VREG) {
9209 if (cs->vp->v_type == VDIR)
9210 status = NFS4ERR_ISDIR;
9211 else
9212 status = NFS4ERR_INVAL;
9213 goto out;
9216 if (rfs4_clnt_in_grace(lsp->rls_state->rs_owner->ro_client)) {
9217 status = NFS4ERR_GRACE;
9218 goto out;
9221 status = rfs4_do_lock(lsp, args->locktype,
9222 args->offset, args->length, cs->cr, resop);
9224 out:
9225 *cs->statusp = resp->status = status;
9227 if (status == NFS4_OK)
9228 resp->lock_stateid = lsp->rls_lockid.stateid;
9230 rfs4_update_lock_resp(lsp, resop);
9232 end:
9233 rfs4_sw_exit(&lsp->rls_sw);
9234 rfs4_lo_state_rele(lsp, TRUE);
9236 DTRACE_NFSV4_2(op__locku__done, struct compound_state *, cs,
9237 LOCKU4res *, resp);
9241 * LOCKT is a best effort routine, the client can not be guaranteed that
9242 * the status return is still in effect by the time the reply is received.
9243 * They are numerous race conditions in this routine, but we are not required
9244 * and can not be accurate.
9246 /*ARGSUSED*/
9247 void
9248 rfs4_op_lockt(nfs_argop4 *argop, nfs_resop4 *resop,
9249 struct svc_req *req, struct compound_state *cs)
9251 LOCKT4args *args = &argop->nfs_argop4_u.oplockt;
9252 LOCKT4res *resp = &resop->nfs_resop4_u.oplockt;
9253 rfs4_lockowner_t *lo;
9254 rfs4_client_t *cp;
9255 bool_t create = FALSE;
9256 struct flock64 flk;
9257 int error;
9258 int flag = FREAD | FWRITE;
9259 int ltype;
9260 length4 posix_length;
9261 sysid_t sysid;
9262 pid_t pid;
9264 DTRACE_NFSV4_2(op__lockt__start, struct compound_state *, cs,
9265 LOCKT4args *, args);
9267 if (cs->vp == NULL) {
9268 *cs->statusp = resp->status = NFS4ERR_NOFILEHANDLE;
9269 goto out;
9273 * NFS4 only allows locking on regular files, so
9274 * verify type of object.
9276 if (cs->vp->v_type != VREG) {
9277 if (cs->vp->v_type == VDIR)
9278 *cs->statusp = resp->status = NFS4ERR_ISDIR;
9279 else
9280 *cs->statusp = resp->status = NFS4ERR_INVAL;
9281 goto out;
9285 * Check out the clientid to ensure the server knows about it
9286 * so that we correctly inform the client of a server reboot.
9288 if ((cp = rfs4_findclient_by_id(args->owner.clientid, FALSE))
9289 == NULL) {
9290 *cs->statusp = resp->status =
9291 rfs4_check_clientid(&args->owner.clientid, 0);
9292 goto out;
9294 if (rfs4_lease_expired(cp)) {
9295 rfs4_client_close(cp);
9297 * Protocol doesn't allow returning NFS4ERR_STALE as
9298 * other operations do on this check so STALE_CLIENTID
9299 * is returned instead
9301 *cs->statusp = resp->status = NFS4ERR_STALE_CLIENTID;
9302 goto out;
9305 if (rfs4_clnt_in_grace(cp) && !(cp->rc_can_reclaim)) {
9306 *cs->statusp = resp->status = NFS4ERR_GRACE;
9307 rfs4_client_rele(cp);
9308 goto out;
9310 rfs4_client_rele(cp);
9312 resp->status = NFS4_OK;
9314 switch (args->locktype) {
9315 case READ_LT:
9316 case READW_LT:
9317 ltype = F_RDLCK;
9318 break;
9319 case WRITE_LT:
9320 case WRITEW_LT:
9321 ltype = F_WRLCK;
9322 break;
9325 posix_length = args->length;
9326 /* Check for zero length. To lock to end of file use all ones for V4 */
9327 if (posix_length == 0) {
9328 *cs->statusp = resp->status = NFS4ERR_INVAL;
9329 goto out;
9330 } else if (posix_length == (length4)(~0)) {
9331 posix_length = 0; /* Posix to end of file */
9334 /* Find or create a lockowner */
9335 lo = rfs4_findlockowner(&args->owner, &create);
9337 if (lo) {
9338 pid = lo->rl_pid;
9339 if ((resp->status =
9340 rfs4_client_sysid(lo->rl_client, &sysid)) != NFS4_OK)
9341 goto err;
9342 } else {
9343 pid = 0;
9344 sysid = lockt_sysid;
9346 retry:
9347 flk.l_type = ltype;
9348 flk.l_whence = 0; /* SEEK_SET */
9349 flk.l_start = args->offset;
9350 flk.l_len = posix_length;
9351 flk.l_sysid = sysid;
9352 flk.l_pid = pid;
9353 flag |= F_REMOTELOCK;
9355 LOCK_PRINT(rfs4_debug, "rfs4_op_lockt", F_GETLK, &flk);
9357 /* Note that length4 is uint64_t but l_len and l_start are off64_t */
9358 if (flk.l_len < 0 || flk.l_start < 0) {
9359 resp->status = NFS4ERR_INVAL;
9360 goto err;
9362 error = VOP_FRLOCK(cs->vp, F_GETLK, &flk, flag, (u_offset_t)0,
9363 NULL, cs->cr, NULL);
9366 * N.B. We map error values to nfsv4 errors. This is differrent
9367 * than puterrno4 routine.
9369 switch (error) {
9370 case 0:
9371 if (flk.l_type == F_UNLCK)
9372 resp->status = NFS4_OK;
9373 else {
9374 if (lock_denied(&resp->denied, &flk) == NFS4ERR_EXPIRED)
9375 goto retry;
9376 resp->status = NFS4ERR_DENIED;
9378 break;
9379 case EOVERFLOW:
9380 resp->status = NFS4ERR_INVAL;
9381 break;
9382 case EINVAL:
9383 resp->status = NFS4ERR_NOTSUPP;
9384 break;
9385 default:
9386 cmn_err(CE_WARN, "rfs4_op_lockt: unexpected errno (%d)",
9387 error);
9388 resp->status = NFS4ERR_SERVERFAULT;
9389 break;
9392 err:
9393 if (lo)
9394 rfs4_lockowner_rele(lo);
9395 *cs->statusp = resp->status;
9396 out:
9397 DTRACE_NFSV4_2(op__lockt__done, struct compound_state *, cs,
9398 LOCKT4res *, resp);
9402 rfs4_share(rfs4_state_t *sp, uint32_t access, uint32_t deny)
9404 int err;
9405 int cmd;
9406 vnode_t *vp;
9407 struct shrlock shr;
9408 struct shr_locowner shr_loco;
9409 int fflags = 0;
9411 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9412 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9414 if (sp->rs_closed)
9415 return (NFS4ERR_OLD_STATEID);
9417 vp = sp->rs_finfo->rf_vp;
9418 ASSERT(vp);
9420 shr.s_access = shr.s_deny = 0;
9422 if (access & OPEN4_SHARE_ACCESS_READ) {
9423 fflags |= FREAD;
9424 shr.s_access |= F_RDACC;
9426 if (access & OPEN4_SHARE_ACCESS_WRITE) {
9427 fflags |= FWRITE;
9428 shr.s_access |= F_WRACC;
9430 ASSERT(shr.s_access);
9432 if (deny & OPEN4_SHARE_DENY_READ)
9433 shr.s_deny |= F_RDDNY;
9434 if (deny & OPEN4_SHARE_DENY_WRITE)
9435 shr.s_deny |= F_WRDNY;
9437 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9438 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9439 shr_loco.sl_pid = shr.s_pid;
9440 shr_loco.sl_id = shr.s_sysid;
9441 shr.s_owner = (caddr_t)&shr_loco;
9442 shr.s_own_len = sizeof (shr_loco);
9444 cmd = nbl_need_check(vp) ? F_SHARE_NBMAND : F_SHARE;
9446 err = VOP_SHRLOCK(vp, cmd, &shr, fflags, CRED(), NULL);
9447 if (err != 0) {
9448 if (err == EAGAIN)
9449 err = NFS4ERR_SHARE_DENIED;
9450 else
9451 err = puterrno4(err);
9452 return (err);
9455 sp->rs_share_access |= access;
9456 sp->rs_share_deny |= deny;
9458 return (0);
9462 rfs4_unshare(rfs4_state_t *sp)
9464 int err;
9465 struct shrlock shr;
9466 struct shr_locowner shr_loco;
9468 ASSERT(rfs4_dbe_islocked(sp->rs_dbe));
9470 if (sp->rs_closed || sp->rs_share_access == 0)
9471 return (0);
9473 ASSERT(sp->rs_owner->ro_client->rc_sysidt != LM_NOSYSID);
9474 ASSERT(sp->rs_finfo->rf_vp);
9476 shr.s_access = shr.s_deny = 0;
9477 shr.s_pid = rfs4_dbe_getid(sp->rs_owner->ro_dbe);
9478 shr.s_sysid = sp->rs_owner->ro_client->rc_sysidt;
9479 shr_loco.sl_pid = shr.s_pid;
9480 shr_loco.sl_id = shr.s_sysid;
9481 shr.s_owner = (caddr_t)&shr_loco;
9482 shr.s_own_len = sizeof (shr_loco);
9484 err = VOP_SHRLOCK(sp->rs_finfo->rf_vp, F_UNSHARE, &shr, 0, CRED(),
9485 NULL);
9486 if (err != 0) {
9487 err = puterrno4(err);
9488 return (err);
9491 sp->rs_share_access = 0;
9492 sp->rs_share_deny = 0;
9494 return (0);
9498 static int
9499 rdma_setup_read_data4(READ4args *args, READ4res *rok)
9501 struct clist *wcl;
9502 count4 count = rok->data_len;
9503 int wlist_len;
9505 wcl = args->wlist;
9506 if (rdma_setup_read_chunks(wcl, count, &wlist_len) == FALSE) {
9507 return (FALSE);
9509 wcl = args->wlist;
9510 rok->wlist_len = wlist_len;
9511 rok->wlist = wcl;
9512 return (TRUE);
9515 /* tunable to disable server referrals */
9516 int rfs4_no_referrals = 0;
9519 * Find an NFS record in reparse point data.
9520 * Returns 0 for success and <0 or an errno value on failure.
9523 vn_find_nfs_record(vnode_t *vp, nvlist_t **nvlp, char **svcp, char **datap)
9525 int err;
9526 char *stype, *val;
9527 nvlist_t *nvl;
9528 nvpair_t *curr;
9530 if ((nvl = reparse_init()) == NULL)
9531 return (-1);
9533 if ((err = reparse_vnode_parse(vp, nvl)) != 0) {
9534 reparse_free(nvl);
9535 return (err);
9538 curr = NULL;
9539 while ((curr = nvlist_next_nvpair(nvl, curr)) != NULL) {
9540 if ((stype = nvpair_name(curr)) == NULL) {
9541 reparse_free(nvl);
9542 return (-2);
9544 if (strncasecmp(stype, "NFS", 3) == 0)
9545 break;
9548 if ((curr == NULL) ||
9549 (nvpair_value_string(curr, &val))) {
9550 reparse_free(nvl);
9551 return (-3);
9553 *nvlp = nvl;
9554 *svcp = stype;
9555 *datap = val;
9556 return (0);
9560 vn_is_nfs_reparse(vnode_t *vp, cred_t *cr)
9562 nvlist_t *nvl;
9563 char *s, *d;
9565 if (rfs4_no_referrals != 0)
9566 return (B_FALSE);
9568 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9569 return (B_FALSE);
9571 if (vn_find_nfs_record(vp, &nvl, &s, &d) != 0)
9572 return (B_FALSE);
9574 reparse_free(nvl);
9576 return (B_TRUE);
9580 * There is a user-level copy of this routine in ref_subr.c.
9581 * Changes should be kept in sync.
9583 static int
9584 nfs4_create_components(char *path, component4 *comp4)
9586 int slen, plen, ncomp;
9587 char *ori_path, *nxtc, buf[MAXNAMELEN];
9589 if (path == NULL)
9590 return (0);
9592 plen = strlen(path) + 1; /* include the terminator */
9593 ori_path = path;
9594 ncomp = 0;
9596 /* count number of components in the path */
9597 for (nxtc = path; nxtc < ori_path + plen; nxtc++) {
9598 if (*nxtc == '/' || *nxtc == '\0' || *nxtc == '\n') {
9599 if ((slen = nxtc - path) == 0) {
9600 path = nxtc + 1;
9601 continue;
9604 if (comp4 != NULL) {
9605 bcopy(path, buf, slen);
9606 buf[slen] = '\0';
9607 (void) str_to_utf8(buf, &comp4[ncomp]);
9610 ncomp++; /* 1 valid component */
9611 path = nxtc + 1;
9613 if (*nxtc == '\0' || *nxtc == '\n')
9614 break;
9617 return (ncomp);
9621 * There is a user-level copy of this routine in ref_subr.c.
9622 * Changes should be kept in sync.
9624 static int
9625 make_pathname4(char *path, pathname4 *pathname)
9627 int ncomp;
9628 component4 *comp4;
9630 if (pathname == NULL)
9631 return (0);
9633 if (path == NULL) {
9634 pathname->pathname4_val = NULL;
9635 pathname->pathname4_len = 0;
9636 return (0);
9639 /* count number of components to alloc buffer */
9640 if ((ncomp = nfs4_create_components(path, NULL)) == 0) {
9641 pathname->pathname4_val = NULL;
9642 pathname->pathname4_len = 0;
9643 return (0);
9645 comp4 = kmem_zalloc(ncomp * sizeof (component4), KM_SLEEP);
9647 /* copy components into allocated buffer */
9648 ncomp = nfs4_create_components(path, comp4);
9650 pathname->pathname4_val = comp4;
9651 pathname->pathname4_len = ncomp;
9653 return (ncomp);
9656 #define xdr_fs_locations4 xdr_fattr4_fs_locations
9658 fs_locations4 *
9659 fetch_referral(vnode_t *vp, cred_t *cr)
9661 nvlist_t *nvl;
9662 char *stype, *sdata;
9663 fs_locations4 *result;
9664 char buf[1024];
9665 size_t bufsize;
9666 XDR xdr;
9667 int err;
9670 * Check attrs to ensure it's a reparse point
9672 if (vn_is_reparse(vp, cr, NULL) == B_FALSE)
9673 return (NULL);
9676 * Look for an NFS record and get the type and data
9678 if (vn_find_nfs_record(vp, &nvl, &stype, &sdata) != 0)
9679 return (NULL);
9682 * With the type and data, upcall to get the referral
9684 bufsize = sizeof (buf);
9685 bzero(buf, sizeof (buf));
9686 err = reparse_kderef((const char *)stype, (const char *)sdata,
9687 buf, &bufsize);
9688 reparse_free(nvl);
9690 DTRACE_PROBE4(nfs4serv__func__referral__upcall,
9691 char *, stype, char *, sdata, char *, buf, int, err);
9692 if (err) {
9693 cmn_err(CE_NOTE,
9694 "reparsed daemon not running: unable to get referral (%d)",
9695 err);
9696 return (NULL);
9700 * We get an XDR'ed record back from the kderef call
9702 xdrmem_create(&xdr, buf, bufsize, XDR_DECODE);
9703 result = kmem_alloc(sizeof (fs_locations4), KM_SLEEP);
9704 err = xdr_fs_locations4(&xdr, result);
9705 XDR_DESTROY(&xdr);
9706 if (err != TRUE) {
9707 DTRACE_PROBE1(nfs4serv__func__referral__upcall__xdrfail,
9708 int, err);
9709 return (NULL);
9713 * Look at path to recover fs_root, ignoring the leading '/'
9715 (void) make_pathname4(vp->v_path, &result->fs_root);
9717 return (result);
9720 char *
9721 build_symlink(vnode_t *vp, cred_t *cr, size_t *strsz)
9723 fs_locations4 *fsl;
9724 fs_location4 *fs;
9725 char *server, *path, *symbuf;
9726 static char *prefix = "/net/";
9727 int i, size, npaths;
9728 uint_t len;
9730 /* Get the referral */
9731 if ((fsl = fetch_referral(vp, cr)) == NULL)
9732 return (NULL);
9734 /* Deal with only the first location and first server */
9735 fs = &fsl->locations_val[0];
9736 server = utf8_to_str(&fs->server_val[0], &len, NULL);
9737 if (server == NULL) {
9738 rfs4_free_fs_locations4(fsl);
9739 kmem_free(fsl, sizeof (fs_locations4));
9740 return (NULL);
9743 /* Figure out size for "/net/" + host + /path/path/path + NULL */
9744 size = strlen(prefix) + len;
9745 for (i = 0; i < fs->rootpath.pathname4_len; i++)
9746 size += fs->rootpath.pathname4_val[i].utf8string_len + 1;
9748 /* Allocate the symlink buffer and fill it */
9749 symbuf = kmem_zalloc(size, KM_SLEEP);
9750 (void) strcat(symbuf, prefix);
9751 (void) strcat(symbuf, server);
9752 kmem_free(server, len);
9754 npaths = 0;
9755 for (i = 0; i < fs->rootpath.pathname4_len; i++) {
9756 path = utf8_to_str(&fs->rootpath.pathname4_val[i], &len, NULL);
9757 if (path == NULL)
9758 continue;
9759 (void) strcat(symbuf, "/");
9760 (void) strcat(symbuf, path);
9761 npaths++;
9762 kmem_free(path, len);
9765 rfs4_free_fs_locations4(fsl);
9766 kmem_free(fsl, sizeof (fs_locations4));
9768 if (strsz != NULL)
9769 *strsz = size;
9770 return (symbuf);
9774 * Check to see if we have a downrev Solaris client, so that we
9775 * can send it a symlink instead of a referral.
9778 client_is_downrev(struct svc_req *req)
9780 struct sockaddr *ca;
9781 rfs4_clntip_t *ci;
9782 bool_t create = FALSE;
9783 int is_downrev;
9785 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf;
9786 ASSERT(ca);
9787 ci = rfs4_find_clntip(ca, &create);
9788 if (ci == NULL)
9789 return (0);
9790 is_downrev = ci->ri_no_referrals;
9791 rfs4_dbe_rele(ci->ri_dbe);
9792 return (is_downrev);