usr/src/uts/common/klm/nlm_client.c

   1 /*
   2  * Copyright (c) 2008 Isilon Inc http://www.isilon.com/
   3  * Authors: Doug Rabson <dfr@rabson.org>
   4  * Developed with Red Inc: Alfred Perlstein <alfred@freebsd.org>
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25  * SUCH DAMAGE.
  26  */
  27
  28 /*
  29  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  30  * Copyright (c) 2012 by Delphix. All rights reserved.
  31  */
  32
  33 /*
  34  * Client-side support for (NFS) VOP_FRLOCK, VOP_SHRLOCK.
  35  * (called via klmops.c: lm_frlock, lm4_frlock)
  36  *
  37  * Source code derived from FreeBSD nlm_advlock.c
  38  */
  39
  40 #include <sys/param.h>
  41 #include <sys/fcntl.h>
  42 #include <sys/lock.h>
  43 #include <sys/flock.h>
  44 #include <sys/mount.h>
  45 #include <sys/mutex.h>
  46 #include <sys/proc.h>
  47 #include <sys/share.h>
  48 #include <sys/syslog.h>
  49 #include <sys/systm.h>
  50 #include <sys/unistd.h>
  51 #include <sys/vnode.h>
  52 #include <sys/queue.h>
  53 #include <sys/sdt.h>
  54 #include <netinet/in.h>
  55
  56 #include <fs/fs_subr.h>
  57 #include <rpcsvc/nlm_prot.h>
  58
  59 #include <nfs/nfs.h>
  60 #include <nfs/nfs_clnt.h>
  61 #include <nfs/export.h>
  62 #include <nfs/rnode.h>
  63 #include <nfs/lm.h>
  64
  65 #include "nlm_impl.h"
  66
  67 /* Extra flags for nlm_call_lock() - xflags */
  68 #define NLM_X_RECLAIM   1
  69 #define NLM_X_BLOCKING  2
  70
  71 /*
  72  * Max. number of retries nlm_call_cancel() does
  73  * when NLM server is in grace period or doesn't
  74  * respond correctly.
  75  */
  76 #define NLM_CANCEL_NRETRS 5
  77
  78 /*
  79  * Determines wether given lock "flp" is safe.
  80  * The lock is considered to be safe when it
  81  * acquires the whole file (i.e. its start
  82  * and len are zeroes).
  83  */
  84 #define NLM_FLOCK_IS_SAFE(flp) \
  85         ((flp)->l_start == 0 && (flp)->l_len == 0)
  86
  87 static volatile uint32_t nlm_xid = 1;
  88
  89 static int nlm_init_fh_by_vp(vnode_t *, struct netobj *, rpcvers_t *);
  90 static int nlm_map_status(nlm4_stats);
  91 static int nlm_map_clnt_stat(enum clnt_stat);
  92 static void nlm_send_siglost(pid_t);
  93
  94 static int nlm_frlock_getlk(struct nlm_host *, vnode_t *,
  95     struct flock64 *, int, u_offset_t, struct netobj *, int);
  96
  97 static int nlm_frlock_setlk(struct nlm_host *, vnode_t *,
  98     struct flock64 *, int, u_offset_t, struct netobj *,
  99     struct flk_callback *, int, bool_t);
 100
 101 static int nlm_reclaim_lock(struct nlm_host *, vnode_t *,
 102     struct flock64 *, int32_t);
 103
 104 static void nlm_init_lock(struct nlm4_lock *,
 105     const struct flock64 *, struct netobj *,
 106     struct nlm_owner_handle *);
 107
 108 static int nlm_call_lock(vnode_t *, struct flock64 *,
 109     struct nlm_host *, struct netobj *,
 110     struct flk_callback *, int, int);
 111 static int nlm_call_unlock(struct flock64 *, struct nlm_host *,
 112     struct netobj *, int);
 113 static int nlm_call_test(struct flock64 *, struct nlm_host *,
 114     struct netobj *, int);
 115 static int nlm_call_cancel(struct nlm4_lockargs *,
 116     struct nlm_host *, int);
 117
 118 static int nlm_local_getlk(vnode_t *, struct flock64 *, int);
 119 static int nlm_local_setlk(vnode_t *, struct flock64 *, int);
 120 static void nlm_local_cancelk(vnode_t *, struct flock64 *);
 121
 122 static void nlm_init_share(struct nlm4_share *,
 123     const struct shrlock *, struct netobj *);
 124
 125 static int nlm_call_share(struct shrlock *, struct nlm_host *,
 126     struct netobj *, int, int);
 127 static int nlm_call_unshare(struct shrlock *, struct nlm_host *,
 128     struct netobj *, int);
 129 static int nlm_reclaim_share(struct nlm_host *, vnode_t *,
 130     struct shrlock *, uint32_t);
 131 static int nlm_local_shrlock(vnode_t *, struct shrlock *, int, int);
 132 static void nlm_local_shrcancel(vnode_t *, struct shrlock *);
 133
 134 /*
 135  * Reclaim locks/shares acquired by the client side
 136  * on the given server represented by hostp.
 137  * The function is called from a dedicated thread
 138  * when server reports us that it's entered grace
 139  * period.
 140  */
 141 void
 142 nlm_reclaim_client(struct nlm_globals *g, struct nlm_host *hostp)
 143 {
 144         int32_t state;
 145         int error, sysid;
 146         struct locklist *llp_head, *llp;
 147         struct nlm_shres *nsp_head, *nsp;
 148         bool_t restart;
 149
 150         sysid = hostp->nh_sysid | LM_SYSID_CLIENT;
 151         do {
 152                 error = 0;
 153                 restart = FALSE;
 154                 state = nlm_host_get_state(hostp);
 155
 156                 DTRACE_PROBE3(reclaim__iter, struct nlm_globals *, g,
 157                     struct nlm_host *, hostp, int, state);
 158
 159                 /*
 160                  * We cancel all sleeping locks that were
 161                  * done by the host, because we don't allow
 162                  * reclamation of sleeping locks. The reason
 163                  * we do this is that allowing of sleeping locks
 164                  * reclamation can potentially break locks recovery
 165                  * order.
 166                  *
 167                  * Imagine that we have two client machines A and B
 168                  * and an NLM server machine. A adds a non sleeping
 169                  * lock to the file F and aquires this file. Machine
 170                  * B in its turn adds sleeping lock to the file
 171                  * F and blocks because F is already aquired by
 172                  * the machine A. Then server crashes and after the
 173                  * reboot it notifies its clients about the crash.
 174                  * If we would allow sleeping locks reclamation,
 175                  * there would be possible that machine B recovers
 176                  * its lock faster than machine A (by some reason).
 177                  * So that B aquires the file F after server crash and
 178                  * machine A (that by some reason recovers slower) fails
 179                  * to recover its non sleeping lock. Thus the original
 180                  * locks order becames broken.
 181                  */
 182                 nlm_host_cancel_slocks(g, hostp);
 183
 184                 /*
 185                  * Try to reclaim all active locks we have
 186                  */
 187                 llp_head = llp = flk_get_active_locks(sysid, NOPID);
 188                 while (llp != NULL) {
 189                         error = nlm_reclaim_lock(hostp, llp->ll_vp,
 190                             &llp->ll_flock, state);
 191
 192                         if (error == 0) {
 193                                 llp = llp->ll_next;
 194                                 continue;
 195                         } else if (error == ERESTART) {
 196                                 restart = TRUE;
 197                                 break;
 198                         } else {
 199                                 /*
 200                                  * Critical error occurred, the lock
 201                                  * can not be recovered, just take it away.
 202                                  */
 203                                 nlm_local_cancelk(llp->ll_vp, &llp->ll_flock);
 204                         }
 205
 206                         llp = llp->ll_next;
 207                 }
 208
 209                 flk_free_locklist(llp_head);
 210                 if (restart) {
 211                         /*
 212                          * Lock reclamation fucntion reported us that
 213                          * the server state was changed (again), so
 214                          * try to repeat the whole reclamation process.
 215                          */
 216                         continue;
 217                 }
 218
 219                 nsp_head = nsp = nlm_get_active_shres(hostp);
 220                 while (nsp != NULL) {
 221                         error = nlm_reclaim_share(hostp, nsp->ns_vp,
 222                             nsp->ns_shr, state);
 223
 224                         if (error == 0) {
 225                                 nsp = nsp->ns_next;
 226                                 continue;
 227                         } else if (error == ERESTART) {
 228                                 break;
 229                         } else {
 230                                 /* Failed to reclaim share */
 231                                 nlm_shres_untrack(hostp, nsp->ns_vp,
 232                                     nsp->ns_shr);
 233                                 nlm_local_shrcancel(nsp->ns_vp,
 234                                     nsp->ns_shr);
 235                         }
 236
 237                         nsp = nsp->ns_next;
 238                 }
 239
 240                 nlm_free_shrlist(nsp_head);
 241         } while (state != nlm_host_get_state(hostp));
 242 }
 243
 244 /*
 245  * nlm_frlock --
 246  *      NFS advisory byte-range locks.
 247  *      Called in klmops.c
 248  *
 249  * Note that the local locking code (os/flock.c) is used to
 250  * keep track of remote locks granted by some server, so we
 251  * can reclaim those locks after a server restarts.  We can
 252  * also sometimes use this as a cache of lock information.
 253  *
 254  * Was: nlm_advlock()
 255  */
 256 /* ARGSUSED */
 257 int
 258 nlm_frlock(struct vnode *vp, int cmd, struct flock64 *flkp,
 259         int flags, u_offset_t offset, struct cred *crp,
 260         struct netobj *fhp, struct flk_callback *flcb, int vers)
 261 {
 262         mntinfo_t *mi;
 263         servinfo_t *sv;
 264         const char *netid;
 265         struct nlm_host *hostp;
 266         int error;
 267         struct nlm_globals *g;
 268
 269         mi = VTOMI(vp);
 270         sv = mi->mi_curr_serv;
 271
 272         netid = nlm_knc_to_netid(sv->sv_knconf);
 273         if (netid == NULL) {
 274                 NLM_ERR("nlm_frlock: unknown NFS netid");
 275                 return (ENOSYS);
 276         }
 277
 278         g = zone_getspecific(nlm_zone_key, curzone);
 279         hostp = nlm_host_findcreate(g, sv->sv_hostname, netid, &sv->sv_addr);
 280         if (hostp == NULL)
 281                 return (ENOSYS);
 282
 283         /*
 284          * Purge cached attributes in order to make sure that
 285          * future calls of convoff()/VOP_GETATTR() will get the
 286          * latest data.
 287          */
 288         if (flkp->l_whence == SEEK_END)
 289                 PURGE_ATTRCACHE(vp);
 290
 291         /* Now flk0 is the zero-based lock request. */
 292         switch (cmd) {
 293         case F_GETLK:
 294                 error = nlm_frlock_getlk(hostp, vp, flkp, flags,
 295                     offset, fhp, vers);
 296                 break;
 297
 298         case F_SETLK:
 299         case F_SETLKW:
 300                 error = nlm_frlock_setlk(hostp, vp, flkp, flags,
 301                     offset, fhp, flcb, vers, (cmd == F_SETLKW));
 302                 if (error == 0)
 303                         nlm_host_monitor(g, hostp, 0);
 304                 break;
 305
 306         default:
 307                 error = EINVAL;
 308                 break;
 309         }
 310
 311         nlm_host_release(g, hostp);
 312         return (error);
 313 }
 314
 315 static int
 316 nlm_frlock_getlk(struct nlm_host *hostp, vnode_t *vp,
 317     struct flock64 *flkp, int flags, u_offset_t offset,
 318     struct netobj *fhp, int vers)
 319 {
 320         struct flock64 flk0;
 321         int error;
 322
 323         /*
 324          * Check local (cached) locks first.
 325          * If we find one, no need for RPC.
 326          */
 327         flk0 = *flkp;
 328         flk0.l_pid = curproc->p_pid;
 329         error = nlm_local_getlk(vp, &flk0, flags);
 330         if (error != 0)
 331                 return (error);
 332         if (flk0.l_type != F_UNLCK) {
 333                 *flkp = flk0;
 334                 return (0);
 335         }
 336
 337         /* Not found locally.  Try remote. */
 338         flk0 = *flkp;
 339         flk0.l_pid = curproc->p_pid;
 340         error = convoff(vp, &flk0, 0, (offset_t)offset);
 341         if (error != 0)
 342                 return (error);
 343
 344         error = nlm_call_test(&flk0, hostp, fhp, vers);
 345         if (error != 0)
 346                 return (error);
 347
 348         if (flk0.l_type == F_UNLCK) {
 349                 /*
 350                  * Update the caller's *flkp with information
 351                  * on the conflicting lock (or lack thereof).
 352                  */
 353                 flkp->l_type = F_UNLCK;
 354         } else {
 355                 /*
 356                  * Found a conflicting lock.  Set the
 357                  * caller's *flkp with the info, first
 358                  * converting to the caller's whence.
 359                  */
 360                 (void) convoff(vp, &flk0, flkp->l_whence, (offset_t)offset);
 361                 *flkp = flk0;
 362         }
 363
 364         return (0);
 365 }
 366
 367 static int
 368 nlm_frlock_setlk(struct nlm_host *hostp, vnode_t *vp,
 369     struct flock64 *flkp, int flags, u_offset_t offset,
 370     struct netobj *fhp, struct flk_callback *flcb,
 371     int vers, bool_t do_block)
 372 {
 373         int error, xflags;
 374
 375         error = convoff(vp, flkp, 0, (offset_t)offset);
 376         if (error != 0)
 377                 return (error);
 378
 379         /*
 380          * NFS v2 clients should not request locks where any part
 381          * of the lock range is beyond 0xffffffff.  The NFS code
 382          * checks that (see nfs_frlock, flk_check_lock_data), but
 383          * as that's outside this module, let's check here too.
 384          * This check ensures that we will be able to convert this
 385          * lock request into 32-bit form without change, and that
 386          * (more importantly) when the granted call back arrives,
 387          * it's unchanged when converted back into 64-bit form.
 388          * If this lock range were to change in any way during
 389          * either of those conversions, the "granted" call back
 390          * from the NLM server would not find our sleeping lock.
 391          */
 392         if (vers < NLM4_VERS) {
 393                 if (flkp->l_start > MAX_UOFF32 ||
 394                     flkp->l_start + flkp->l_len > MAX_UOFF32 + 1)
 395                         return (EINVAL);
 396         }
 397
 398         /*
 399          * Fill in l_sysid for the local locking calls.
 400          * Also, let's not trust the caller's l_pid.
 401          */
 402         flkp->l_sysid = hostp->nh_sysid | LM_SYSID_CLIENT;
 403         flkp->l_pid = curproc->p_pid;
 404
 405         if (flkp->l_type == F_UNLCK) {
 406                 /*
 407                  * Purge local (cached) lock information first,
 408                  * then clear the remote lock.
 409                  */
 410                 (void) nlm_local_setlk(vp, flkp, flags);
 411                 error = nlm_call_unlock(flkp, hostp, fhp, vers);
 412
 413                 return (error);
 414         }
 415
 416         if (!do_block) {
 417                 /*
 418                  * This is a non-blocking "set" request,
 419                  * so we can check locally first, and
 420                  * sometimes avoid an RPC call.
 421                  */
 422                 struct flock64 flk0;
 423
 424                 flk0 = *flkp;
 425                 error = nlm_local_getlk(vp, &flk0, flags);
 426                 if (error != 0 && flk0.l_type != F_UNLCK) {
 427                         /* Found a conflicting lock. */
 428                         return (EAGAIN);
 429                 }
 430
 431                 xflags = 0;
 432         } else {
 433                 xflags = NLM_X_BLOCKING;
 434         }
 435
 436         nfs_add_locking_id(vp, curproc->p_pid, RLMPL_PID,
 437             (char *)&curproc->p_pid, sizeof (pid_t));
 438
 439         error = nlm_call_lock(vp, flkp, hostp, fhp, flcb, vers, xflags);
 440         if (error != 0)
 441                 return (error);
 442
 443         /*
 444          * Save the lock locally.  This should not fail,
 445          * because the server is authoritative about locks
 446          * and it just told us we have the lock!
 447          */
 448         error = nlm_local_setlk(vp, flkp, flags);
 449         if (error != 0) {
 450                 /*
 451                  * That's unexpected situation. Just ignore the error.
 452                  */
 453                 NLM_WARN("nlm_frlock_setlk: Failed to set local lock. "
 454                     "[err=%d]\n", error);
 455                 error = 0;
 456         }
 457
 458         return (error);
 459 }
 460
 461 /*
 462  * Cancel all client side remote locks/shares on the
 463  * given host. Report to the processes that own
 464  * cancelled locks that they are removed by force
 465  * by sending SIGLOST.
 466  */
 467 void
 468 nlm_client_cancel_all(struct nlm_globals *g, struct nlm_host *hostp)
 469 {
 470         struct locklist *llp_head, *llp;
 471         struct nlm_shres *nsp_head, *nsp;
 472         struct netobj lm_fh;
 473         rpcvers_t vers;
 474         int error, sysid;
 475
 476         sysid = hostp->nh_sysid | LM_SYSID_CLIENT;
 477         nlm_host_cancel_slocks(g, hostp);
 478
 479         /*
 480          * Destroy all active locks
 481          */
 482         llp_head = llp = flk_get_active_locks(sysid, NOPID);
 483         while (llp != NULL) {
 484                 llp->ll_flock.l_type = F_UNLCK;
 485
 486                 error = nlm_init_fh_by_vp(llp->ll_vp, &lm_fh, &vers);
 487                 if (error == 0)
 488                         (void) nlm_call_unlock(&llp->ll_flock, hostp,
 489                             &lm_fh, vers);
 490
 491                 nlm_local_cancelk(llp->ll_vp, &llp->ll_flock);
 492                 llp = llp->ll_next;
 493         }
 494
 495         flk_free_locklist(llp_head);
 496
 497         /*
 498          * Destroy all active share reservations
 499          */
 500         nsp_head = nsp = nlm_get_active_shres(hostp);
 501         while (nsp != NULL) {
 502                 error = nlm_init_fh_by_vp(nsp->ns_vp, &lm_fh, &vers);
 503                 if (error == 0)
 504                         (void) nlm_call_unshare(nsp->ns_shr, hostp,
 505                             &lm_fh, vers);
 506
 507                 nlm_local_shrcancel(nsp->ns_vp, nsp->ns_shr);
 508                 nlm_shres_untrack(hostp, nsp->ns_vp, nsp->ns_shr);
 509                 nsp = nsp->ns_next;
 510         }
 511
 512         nlm_free_shrlist(nsp_head);
 513 }
 514
 515 /*
 516  * The function determines whether the lock "fl" can
 517  * be safely applied to the file vnode "vp" corresponds to.
 518  * The lock can be "safely" applied if all the conditions
 519  * above are held:
 520  *  - It's not a mandatory lock
 521  *  - The vnode wasn't mapped by anyone
 522  *  - The vnode was mapped, but it hasn't any locks on it.
 523  *  - The vnode was mapped and all locks it has occupies
 524  *    the whole file.
 525  */
 526 int
 527 nlm_safelock(vnode_t *vp, const struct flock64 *fl, cred_t *cr)
 528 {
 529         rnode_t *rp = VTOR(vp);
 530         struct vattr va;
 531         int err;
 532
 533         if ((rp->r_mapcnt > 0) && (fl->l_start != 0 || fl->l_len != 0))
 534                 return (0);
 535
 536         va.va_mask = AT_MODE;
 537         err = VOP_GETATTR(vp, &va, 0, cr, NULL);
 538         if (err != 0)
 539                 return (0);
 540
 541         /* NLM4 doesn't allow mandatory file locking */
 542         if (MANDLOCK(vp, va.va_mode))
 543                 return (0);
 544
 545         return (1);
 546 }
 547
 548 /*
 549  * The function determines whether it's safe to map
 550  * a file correspoding to vnode vp.
 551  * The mapping is considered to be "safe" if file
 552  * either has no any locks on it or all locks it
 553  * has occupy the whole file.
 554  */
 555 int
 556 nlm_safemap(const vnode_t *vp)
 557 {
 558         struct locklist *llp, *llp_next;
 559         struct nlm_slock *nslp;
 560         struct nlm_globals *g;
 561         int safe = 1;
 562
 563         /* Check active locks at first */
 564         llp = flk_active_locks_for_vp(vp);
 565         while (llp != NULL) {
 566                 if ((llp->ll_vp == vp) &&
 567                     !NLM_FLOCK_IS_SAFE(&llp->ll_flock))
 568                         safe = 0;
 569
 570                 llp_next = llp->ll_next;
 571                 VN_RELE(llp->ll_vp);
 572                 kmem_free(llp, sizeof (*llp));
 573                 llp = llp_next;
 574         }
 575         if (!safe)
 576                 return (safe);
 577
 578         /* Then check sleeping locks if any */
 579         g = zone_getspecific(nlm_zone_key, curzone);
 580         mutex_enter(&g->lock);
 581         TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
 582                 if (nslp->nsl_state == NLM_SL_BLOCKED &&
 583                     nslp->nsl_vp == vp &&
 584                     (nslp->nsl_lock.l_offset != 0 ||
 585                     nslp->nsl_lock.l_len != 0)) {
 586                         safe = 0;
 587                         break;
 588                 }
 589         }
 590
 591         mutex_exit(&g->lock);
 592         return (safe);
 593 }
 594
 595 int
 596 nlm_has_sleep(const vnode_t *vp)
 597 {
 598         struct nlm_globals *g;
 599         struct nlm_slock *nslp;
 600         int has_slocks = FALSE;
 601
 602         g = zone_getspecific(nlm_zone_key, curzone);
 603         mutex_enter(&g->lock);
 604         TAILQ_FOREACH(nslp, &g->nlm_slocks, nsl_link) {
 605                 if (nslp->nsl_state == NLM_SL_BLOCKED &&
 606                     nslp->nsl_vp == vp) {
 607                         has_slocks = TRUE;
 608                         break;
 609                 }
 610         }
 611
 612         mutex_exit(&g->lock);
 613         return (has_slocks);
 614 }
 615
 616 void
 617 nlm_register_lock_locally(struct vnode *vp, struct nlm_host *hostp,
 618     struct flock64 *flk, int flags, u_offset_t offset)
 619 {
 620         int sysid = 0;
 621
 622         if (hostp != NULL) {
 623                 sysid = hostp->nh_sysid | LM_SYSID_CLIENT;
 624         }
 625
 626         flk->l_sysid = sysid;
 627         (void) convoff(vp, flk, 0, (offset_t)offset);
 628         (void) nlm_local_setlk(vp, flk, flags);
 629 }
 630
 631
 632 /*
 633  * The BSD code had functions here to "reclaim" (destroy)
 634  * remote locks when a vnode is being forcibly destroyed.
 635  * We just keep vnodes around until statd tells us the
 636  * client has gone away.
 637  */
 638
 639 static int
 640 nlm_reclaim_lock(struct nlm_host *hostp, vnode_t *vp,
 641     struct flock64 *flp, int32_t orig_state)
 642 {
 643         struct netobj lm_fh;
 644         int error, state;
 645         rpcvers_t vers;
 646
 647         /*
 648          * If the remote NSM state changes during recovery, the host
 649          * must have rebooted a second time. In that case, we must
 650          * restart the recovery.
 651          */
 652         state = nlm_host_get_state(hostp);
 653         if (state != orig_state)
 654                 return (ERESTART);
 655
 656         error = nlm_init_fh_by_vp(vp, &lm_fh, &vers);
 657         if (error != 0)
 658                 return (error);
 659
 660         return (nlm_call_lock(vp, flp, hostp, &lm_fh,
 661             NULL, vers, NLM_X_RECLAIM));
 662 }
 663
 664 /*
 665  * Get local lock information for some NFS server.
 666  *
 667  * This gets (checks for) a local conflicting lock.
 668  * Note: Modifies passed flock, if a conflict is found,
 669  * but the caller expects that.
 670  */
 671 static int
 672 nlm_local_getlk(vnode_t *vp, struct flock64 *fl, int flags)
 673 {
 674         VERIFY(fl->l_whence == SEEK_SET);
 675         return (reclock(vp, fl, 0, flags, 0, NULL));
 676 }
 677
 678 /*
 679  * Set local lock information for some NFS server.
 680  *
 681  * Called after a lock request (set or clear) succeeded. We record the
 682  * details in the local lock manager. Note that since the remote
 683  * server has granted the lock, we can be sure that it doesn't
 684  * conflict with any other locks we have in the local lock manager.
 685  *
 686  * Since it is possible that host may also make NLM client requests to
 687  * our NLM server, we use a different sysid value to record our own
 688  * client locks.
 689  *
 690  * Note that since it is possible for us to receive replies from the
 691  * server in a different order than the locks were granted (e.g. if
 692  * many local threads are contending for the same lock), we must use a
 693  * blocking operation when registering with the local lock manager.
 694  * We expect that any actual wait will be rare and short hence we
 695  * ignore signals for this.
 696  */
 697 static int
 698 nlm_local_setlk(vnode_t *vp, struct flock64 *fl, int flags)
 699 {
 700         VERIFY(fl->l_whence == SEEK_SET);
 701         return (reclock(vp, fl, SETFLCK, flags, 0, NULL));
 702 }
 703
 704 /*
 705  * Cancel local lock and send send SIGLOST signal
 706  * to the lock owner.
 707  *
 708  * NOTE: modifies flp
 709  */
 710 static void
 711 nlm_local_cancelk(vnode_t *vp, struct flock64 *flp)
 712 {
 713         flp->l_type = F_UNLCK;
 714         (void) nlm_local_setlk(vp, flp, FREAD | FWRITE);
 715         nlm_send_siglost(flp->l_pid);
 716 }
 717
 718 /*
 719  * Do NLM_LOCK call.
 720  * Was: nlm_setlock()
 721  *
 722  * NOTE: nlm_call_lock() function should care about locking/unlocking
 723  * of rnode->r_lkserlock which should be released before nlm_call_lock()
 724  * sleeps on waiting lock and acquired when it wakes up.
 725  */
 726 static int
 727 nlm_call_lock(vnode_t *vp, struct flock64 *flp,
 728         struct nlm_host *hostp, struct netobj *fhp,
 729         struct flk_callback *flcb, int vers, int xflags)
 730 {
 731         struct nlm4_lockargs args;
 732         struct nlm_owner_handle oh;
 733         struct nlm_globals *g;
 734         rnode_t *rnp = VTOR(vp);
 735         struct nlm_slock *nslp = NULL;
 736         uint32_t xid;
 737         int error = 0;
 738
 739         bzero(&args, sizeof (args));
 740         g = zone_getspecific(nlm_zone_key, curzone);
 741         nlm_init_lock(&args.alock, flp, fhp, &oh);
 742
 743         args.exclusive = (flp->l_type == F_WRLCK);
 744         args.reclaim = xflags & NLM_X_RECLAIM;
 745         args.state = g->nsm_state;
 746         args.cookie.n_len = sizeof (xid);
 747         args.cookie.n_bytes = (char *)&xid;
 748
 749         oh.oh_sysid = hostp->nh_sysid;
 750         xid = atomic_inc_32_nv(&nlm_xid);
 751
 752         if (xflags & NLM_X_BLOCKING) {
 753                 args.block = TRUE;
 754                 nslp = nlm_slock_register(g, hostp, &args.alock, vp);
 755         }
 756
 757         for (;;) {
 758                 nlm_rpc_t *rpcp;
 759                 enum clnt_stat stat;
 760                 struct nlm4_res res;
 761                 enum nlm4_stats nlm_err;
 762
 763                 error = nlm_host_get_rpc(hostp, vers, &rpcp);
 764                 if (error != 0) {
 765                         error = ENOLCK;
 766                         goto out;
 767                 }
 768
 769                 bzero(&res, sizeof (res));
 770                 stat = nlm_lock_rpc(&args, &res, rpcp->nr_handle, vers);
 771                 nlm_host_rele_rpc(hostp, rpcp);
 772
 773                 error = nlm_map_clnt_stat(stat);
 774                 if (error != 0) {
 775                         if (error == EAGAIN)
 776                                 continue;
 777
 778                         goto out;
 779                 }
 780
 781                 DTRACE_PROBE1(lock__res, enum nlm4_stats, res.stat.stat);
 782                 nlm_err = res.stat.stat;
 783                 xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res);
 784                 if (nlm_err == nlm4_denied_grace_period) {
 785                         if (args.reclaim) {
 786                                 error = ENOLCK;
 787                                 goto out;
 788                         }
 789
 790                         error = nlm_host_wait_grace(hostp);
 791                         if (error != 0)
 792                                 goto out;
 793
 794                         continue;
 795                 }
 796
 797                 switch (nlm_err) {
 798                 case nlm4_granted:
 799                 case nlm4_blocked:
 800                         error = 0;
 801                         break;
 802
 803                 case nlm4_denied:
 804                         if (nslp != NULL) {
 805                                 NLM_WARN("nlm_call_lock: got nlm4_denied for "
 806                                     "blocking lock\n");
 807                         }
 808
 809                         error = EAGAIN;
 810                         break;
 811
 812                 default:
 813                         error = nlm_map_status(nlm_err);
 814                 }
 815
 816                 /*
 817                  * If we deal with either non-blocking lock or
 818                  * with a blocking locks that wasn't blocked on
 819                  * the server side (by some reason), our work
 820                  * is finished.
 821                  */
 822                 if (nslp == NULL                        ||
 823                     nlm_err != nlm4_blocked             ||
 824                     error != 0)
 825                         goto out;
 826
 827                 /*
 828                  * Before releasing the r_lkserlock of rnode, we should
 829                  * check whether the new lock is "safe". If it's not
 830                  * safe, disable caching for the given vnode. That is done
 831                  * for sleeping locks only that are waiting for a GRANT reply
 832                  * from the NLM server.
 833                  *
 834                  * NOTE: the vnode cache can be enabled back later if an
 835                  * unsafe lock will be merged with existent locks so that
 836                  * it will become safe. This condition is checked in the
 837                  * NFSv3 code (see nfs_lockcompletion).
 838                  */
 839                 if (!NLM_FLOCK_IS_SAFE(flp)) {
 840                         mutex_enter(&vp->v_lock);
 841                         vp->v_flag &= ~VNOCACHE;
 842                         mutex_exit(&vp->v_lock);
 843                 }
 844
 845                 /*
 846                  * The server should call us back with a
 847                  * granted message when the lock succeeds.
 848                  * In order to deal with broken servers,
 849                  * lost granted messages, or server reboots,
 850                  * we will also re-try every few seconds.
 851                  *
 852                  * Note: We're supposed to call these
 853                  * flk_invoke_callbacks when blocking.
 854                  * Take care on rnode->r_lkserlock, we should
 855                  * release it before going to sleep.
 856                  */
 857                 (void) flk_invoke_callbacks(flcb, FLK_BEFORE_SLEEP);
 858                 nfs_rw_exit(&rnp->r_lkserlock);
 859
 860                 error = nlm_slock_wait(g, nslp, g->retrans_tmo);
 861
 862                 /*
 863                  * NFS expects that we return with rnode->r_lkserlock
 864                  * locked on write, lock it back.
 865                  *
 866                  * NOTE: nfs_rw_enter_sig() can be either interruptible
 867                  * or not. It depends on options of NFS mount. Here
 868                  * we're _always_ uninterruptible (independently of mount
 869                  * options), because nfs_frlock/nfs3_frlock expects that
 870                  * we return with rnode->r_lkserlock acquired. So we don't
 871                  * want our lock attempt to be interrupted by a signal.
 872                  */
 873                 (void) nfs_rw_enter_sig(&rnp->r_lkserlock, RW_WRITER, 0);
 874                 (void) flk_invoke_callbacks(flcb, FLK_AFTER_SLEEP);
 875
 876                 if (error == 0) {
 877                         break;
 878                 } else if (error == EINTR) {
 879                         /*
 880                          * We need to call the server to cancel our
 881                          * lock request.
 882                          */
 883                         DTRACE_PROBE1(cancel__lock, int, error);
 884                         (void) nlm_call_cancel(&args, hostp, vers);
 885                         break;
 886                 } else {
 887                         /*
 888                          * Timeout happened, resend the lock request to
 889                          * the server. Well, we're a bit paranoid here,
 890                          * but keep in mind previous request could lost
 891                          * (especially with conectionless transport).
 892                          */
 893
 894                         ASSERT(error == ETIMEDOUT);
 895                         continue;
 896                 }
 897         }
 898
 899         /*
 900          * We could disable the vnode cache for the given _sleeping_
 901          * (codition: nslp != NULL) lock if it was unsafe. Normally,
 902          * nfs_lockcompletion() function can enable the vnode cache
 903          * back if the lock becomes safe after activativation. But it
 904          * will not happen if any error occurs on the locking path.
 905          *
 906          * Here we enable the vnode cache back if the error occurred
 907          * and if there aren't any unsafe locks on the given vnode.
 908          * Note that if error happened, sleeping lock was derigistered.
 909          */
 910         if (error != 0 && nslp != NULL && nlm_safemap(vp)) {
 911                 mutex_enter(&vp->v_lock);
 912                 vp->v_flag |= VNOCACHE;
 913                 mutex_exit(&vp->v_lock);
 914         }
 915
 916 out:
 917         if (nslp != NULL)
 918                 nlm_slock_unregister(g, nslp);
 919
 920         return (error);
 921 }
 922
 923 /*
 924  * Do NLM_CANCEL call.
 925  * Helper for nlm_call_lock() error recovery.
 926  */
 927 static int
 928 nlm_call_cancel(struct nlm4_lockargs *largs,
 929         struct nlm_host *hostp, int vers)
 930 {
 931         nlm4_cancargs cargs;
 932         uint32_t xid;
 933         int error, retries;
 934
 935         bzero(&cargs, sizeof (cargs));
 936
 937         xid = atomic_inc_32_nv(&nlm_xid);
 938         cargs.cookie.n_len = sizeof (xid);
 939         cargs.cookie.n_bytes = (char *)&xid;
 940         cargs.block     = largs->block;
 941         cargs.exclusive = largs->exclusive;
 942         cargs.alock     = largs->alock;
 943
 944         /*
 945          * Unlike all other nlm_call_* functions, nlm_call_cancel
 946          * doesn't spin forever until it gets reasonable response
 947          * from NLM server. It makes limited number of retries and
 948          * if server doesn't send a reasonable reply, it returns an
 949          * error. It behaves like that because it's called from nlm_call_lock
 950          * with blocked signals and thus it can not be interrupted from
 951          * user space.
 952          */
 953         for (retries = 0; retries < NLM_CANCEL_NRETRS; retries++) {
 954                 nlm_rpc_t *rpcp;
 955                 enum clnt_stat stat;
 956                 struct nlm4_res res;
 957
 958                 error = nlm_host_get_rpc(hostp, vers, &rpcp);
 959                 if (error != 0)
 960                         return (ENOLCK);
 961
 962                 bzero(&res, sizeof (res));
 963                 stat = nlm_cancel_rpc(&cargs, &res, rpcp->nr_handle, vers);
 964                 nlm_host_rele_rpc(hostp, rpcp);
 965
 966                 DTRACE_PROBE1(cancel__rloop_end, enum clnt_stat, stat);
 967                 error = nlm_map_clnt_stat(stat);
 968                 if (error != 0) {
 969                         if (error == EAGAIN)
 970                                 continue;
 971
 972                         return (error);
 973                 }
 974
 975                 DTRACE_PROBE1(cancel__res, enum nlm4_stats, res.stat.stat);
 976                 switch (res.stat.stat) {
 977                         /*
 978                          * There was nothing to cancel. We are going to go ahead
 979                          * and assume we got the lock.
 980                          */
 981                 case nlm_denied:
 982                         /*
 983                          * The server has recently rebooted.  Treat this as a
 984                          * successful cancellation.
 985                          */
 986                 case nlm4_denied_grace_period:
 987                         /*
 988                          * We managed to cancel.
 989                          */
 990                 case nlm4_granted:
 991                         error = 0;
 992                         break;
 993
 994                 default:
 995                         /*
 996                          * Broken server implementation.  Can't really do
 997                          * anything here.
 998                          */
 999                         error = EIO;
1000                         break;
1001                 }
1002
1003                 xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res);
1004                 break;
1005         }
1006
1007         return (error);
1008 }
1009
1010 /*
1011  * Do NLM_UNLOCK call.
1012  * Was: nlm_clearlock
1013  */
1014 static int
1015 nlm_call_unlock(struct flock64 *flp, struct nlm_host *hostp,
1016     struct netobj *fhp, int vers)
1017 {
1018         struct nlm4_unlockargs args;
1019         struct nlm_owner_handle oh;
1020         enum nlm4_stats nlm_err;
1021         uint32_t xid;
1022         int error;
1023
1024         bzero(&args, sizeof (args));
1025         nlm_init_lock(&args.alock, flp, fhp, &oh);
1026
1027         oh.oh_sysid = hostp->nh_sysid;
1028         xid = atomic_inc_32_nv(&nlm_xid);
1029         args.cookie.n_len = sizeof (xid);
1030         args.cookie.n_bytes = (char *)&xid;
1031
1032         for (;;) {
1033                 nlm_rpc_t *rpcp;
1034                 struct nlm4_res res;
1035                 enum clnt_stat stat;
1036
1037                 error = nlm_host_get_rpc(hostp, vers, &rpcp);
1038                 if (error != 0)
1039                         return (ENOLCK);
1040
1041                 bzero(&res, sizeof (res));
1042                 stat = nlm_unlock_rpc(&args, &res, rpcp->nr_handle, vers);
1043                 nlm_host_rele_rpc(hostp, rpcp);
1044
1045                 error = nlm_map_clnt_stat(stat);
1046                 if (error != 0) {
1047                         if (error == EAGAIN)
1048                                 continue;
1049
1050                         return (error);
1051                 }
1052
1053                 DTRACE_PROBE1(unlock__res, enum nlm4_stats, res.stat.stat);
1054                 nlm_err = res.stat.stat;
1055                 xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res);
1056                 if (nlm_err == nlm4_denied_grace_period) {
1057                         error = nlm_host_wait_grace(hostp);
1058                         if (error != 0)
1059                                 return (error);
1060
1061                         continue;
1062                 }
1063
1064                 break;
1065         }
1066
1067         /* special cases */
1068         switch (nlm_err) {
1069         case nlm4_denied:
1070                 error = EINVAL;
1071                 break;
1072         default:
1073                 error = nlm_map_status(nlm_err);
1074                 break;
1075         }
1076
1077         return (error);
1078 }
1079
1080 /*
1081  * Do NLM_TEST call.
1082  * Was: nlm_getlock()
1083  */
1084 static int
1085 nlm_call_test(struct flock64 *flp, struct nlm_host *hostp,
1086     struct netobj *fhp, int vers)
1087 {
1088         struct nlm4_testargs args;
1089         struct nlm4_holder h;
1090         struct nlm_owner_handle oh;
1091         enum nlm4_stats nlm_err;
1092         uint32_t xid;
1093         int error;
1094
1095         bzero(&args, sizeof (args));
1096         nlm_init_lock(&args.alock, flp, fhp, &oh);
1097
1098         args.exclusive = (flp->l_type == F_WRLCK);
1099         oh.oh_sysid = hostp->nh_sysid;
1100         xid = atomic_inc_32_nv(&nlm_xid);
1101         args.cookie.n_len = sizeof (xid);
1102         args.cookie.n_bytes = (char *)&xid;
1103
1104         for (;;) {
1105                 nlm_rpc_t *rpcp;
1106                 struct nlm4_testres res;
1107                 enum clnt_stat stat;
1108
1109                 error = nlm_host_get_rpc(hostp, vers, &rpcp);
1110                 if (error != 0)
1111                         return (ENOLCK);
1112
1113                 bzero(&res, sizeof (res));
1114                 stat = nlm_test_rpc(&args, &res, rpcp->nr_handle, vers);
1115                 nlm_host_rele_rpc(hostp, rpcp);
1116
1117                 error = nlm_map_clnt_stat(stat);
1118                 if (error != 0) {
1119                         if (error == EAGAIN)
1120                                 continue;
1121
1122                         return (error);
1123                 }
1124
1125                 DTRACE_PROBE1(test__res, enum nlm4_stats, res.stat.stat);
1126                 nlm_err = res.stat.stat;
1127                 bcopy(&res.stat.nlm4_testrply_u.holder, &h, sizeof (h));
1128                 xdr_free((xdrproc_t)xdr_nlm4_testres, (void *)&res);
1129                 if (nlm_err == nlm4_denied_grace_period) {
1130                         error = nlm_host_wait_grace(hostp);
1131                         if (error != 0)
1132                                 return (error);
1133
1134                         continue;
1135                 }
1136
1137                 break;
1138         }
1139
1140         switch (nlm_err) {
1141         case nlm4_granted:
1142                 flp->l_type = F_UNLCK;
1143                 error = 0;
1144                 break;
1145
1146         case nlm4_denied:
1147                 flp->l_start = h.l_offset;
1148                 flp->l_len = h.l_len;
1149                 flp->l_pid = h.svid;
1150                 flp->l_type = (h.exclusive) ? F_WRLCK : F_RDLCK;
1151                 flp->l_whence = SEEK_SET;
1152                 flp->l_sysid = 0;
1153                 error = 0;
1154                 break;
1155
1156         default:
1157                 error = nlm_map_status(nlm_err);
1158                 break;
1159         }
1160
1161         return (error);
1162 }
1163
1164
1165 static void
1166 nlm_init_lock(struct nlm4_lock *lock,
1167         const struct flock64 *fl, struct netobj *fh,
1168         struct nlm_owner_handle *oh)
1169 {
1170
1171         /* Caller converts to zero-base. */
1172         VERIFY(fl->l_whence == SEEK_SET);
1173         bzero(lock, sizeof (*lock));
1174         bzero(oh, sizeof (*oh));
1175
1176         lock->caller_name = uts_nodename();
1177         lock->fh.n_len = fh->n_len;
1178         lock->fh.n_bytes = fh->n_bytes;
1179         lock->oh.n_len = sizeof (*oh);
1180         lock->oh.n_bytes = (void *)oh;
1181         lock->svid = fl->l_pid;
1182         lock->l_offset = fl->l_start;
1183         lock->l_len = fl->l_len;
1184 }
1185
1186 /* ************************************************************** */
1187
1188 int
1189 nlm_shrlock(struct vnode *vp, int cmd, struct shrlock *shr,
1190         int flags, struct netobj *fh, int vers)
1191 {
1192         struct shrlock shlk;
1193         mntinfo_t *mi;
1194         servinfo_t *sv;
1195         const char *netid;
1196         struct nlm_host *host = NULL;
1197         int error;
1198         struct nlm_globals *g;
1199
1200         mi = VTOMI(vp);
1201         sv = mi->mi_curr_serv;
1202
1203         netid = nlm_knc_to_netid(sv->sv_knconf);
1204         if (netid == NULL) {
1205                 NLM_ERR("nlm_shrlock: unknown NFS netid\n");
1206                 return (ENOSYS);
1207         }
1208
1209         g = zone_getspecific(nlm_zone_key, curzone);
1210         host = nlm_host_findcreate(g, sv->sv_hostname, netid, &sv->sv_addr);
1211         if (host == NULL)
1212                 return (ENOSYS);
1213
1214         /*
1215          * Fill in s_sysid for the local locking calls.
1216          * Also, let's not trust the caller's l_pid.
1217          */
1218         shlk = *shr;
1219         shlk.s_sysid = host->nh_sysid | LM_SYSID_CLIENT;
1220         shlk.s_pid = curproc->p_pid;
1221
1222         if (cmd == F_UNSHARE) {
1223                 /*
1224                  * Purge local (cached) share information first,
1225                  * then clear the remote share.
1226                  */
1227                 (void) nlm_local_shrlock(vp, &shlk, cmd, flags);
1228                 nlm_shres_untrack(host, vp, &shlk);
1229                 error = nlm_call_unshare(&shlk, host, fh, vers);
1230                 goto out;
1231         }
1232
1233         nfs_add_locking_id(vp, curproc->p_pid, RLMPL_OWNER,
1234             shr->s_owner, shr->s_own_len);
1235
1236         error = nlm_call_share(&shlk, host, fh, vers, FALSE);
1237         if (error != 0)
1238                 goto out;
1239
1240         /*
1241          * Save the share locally.  This should not fail,
1242          * because the server is authoritative about shares
1243          * and it just told us we have the share reservation!
1244          */
1245         error = nlm_local_shrlock(vp, shr, cmd, flags);
1246         if (error != 0) {
1247                 /*
1248                  * Oh oh, we really don't expect an error here.
1249                  */
1250                 NLM_WARN("nlm_shrlock: set locally, err %d\n", error);
1251                 error = 0;
1252         }
1253
1254         nlm_shres_track(host, vp, &shlk);
1255         nlm_host_monitor(g, host, 0);
1256
1257 out:
1258         nlm_host_release(g, host);
1259
1260         return (error);
1261 }
1262
1263 static int
1264 nlm_reclaim_share(struct nlm_host *hostp, vnode_t *vp,
1265     struct shrlock *shr, uint32_t orig_state)
1266 {
1267         struct netobj lm_fh;
1268         int error, state;
1269         rpcvers_t vers;
1270
1271         state = nlm_host_get_state(hostp);
1272         if (state != orig_state) {
1273                 /*
1274                  * It seems that NLM server rebooted while
1275                  * we were busy with recovery.
1276                  */
1277                 return (ERESTART);
1278         }
1279
1280         error = nlm_init_fh_by_vp(vp, &lm_fh, &vers);
1281         if (error != 0)
1282                 return (error);
1283
1284         return (nlm_call_share(shr, hostp, &lm_fh, vers, 1));
1285 }
1286
1287 /*
1288  * Set local share information for some NFS server.
1289  *
1290  * Called after a share request (set or clear) succeeded. We record
1291  * the details in the local lock manager. Note that since the remote
1292  * server has granted the share, we can be sure that it doesn't
1293  * conflict with any other shares we have in the local lock manager.
1294  *
1295  * Since it is possible that host may also make NLM client requests to
1296  * our NLM server, we use a different sysid value to record our own
1297  * client shares.
1298  */
1299 int
1300 nlm_local_shrlock(vnode_t *vp, struct shrlock *shr, int cmd, int flags)
1301 {
1302         return (fs_shrlock(vp, cmd, shr, flags, CRED(), NULL));
1303 }
1304
1305 static void
1306 nlm_local_shrcancel(vnode_t *vp, struct shrlock *shr)
1307 {
1308         (void) nlm_local_shrlock(vp, shr, F_UNSHARE, FREAD | FWRITE);
1309         nlm_send_siglost(shr->s_pid);
1310 }
1311
1312 /*
1313  * Do NLM_SHARE call.
1314  * Was: nlm_setshare()
1315  */
1316 static int
1317 nlm_call_share(struct shrlock *shr, struct nlm_host *host,
1318     struct netobj *fh, int vers, int reclaim)
1319 {
1320         struct nlm4_shareargs args;
1321         enum nlm4_stats nlm_err;
1322         uint32_t xid;
1323         int error;
1324
1325         bzero(&args, sizeof (args));
1326         nlm_init_share(&args.share, shr, fh);
1327
1328         args.reclaim = reclaim;
1329         xid = atomic_inc_32_nv(&nlm_xid);
1330         args.cookie.n_len = sizeof (xid);
1331         args.cookie.n_bytes = (char *)&xid;
1332
1333
1334         for (;;) {
1335                 nlm_rpc_t *rpcp;
1336                 struct nlm4_shareres res;
1337                 enum clnt_stat stat;
1338
1339                 error = nlm_host_get_rpc(host, vers, &rpcp);
1340                 if (error != 0)
1341                         return (ENOLCK);
1342
1343                 bzero(&res, sizeof (res));
1344                 stat = nlm_share_rpc(&args, &res, rpcp->nr_handle, vers);
1345                 nlm_host_rele_rpc(host, rpcp);
1346
1347                 error = nlm_map_clnt_stat(stat);
1348                 if (error != 0) {
1349                         if (error == EAGAIN)
1350                                 continue;
1351
1352                         return (error);
1353                 }
1354
1355                 DTRACE_PROBE1(share__res, enum nlm4_stats, res.stat);
1356                 nlm_err = res.stat;
1357                 xdr_free((xdrproc_t)xdr_nlm4_shareres, (void *)&res);
1358                 if (nlm_err == nlm4_denied_grace_period) {
1359                         if (args.reclaim)
1360                                 return (ENOLCK);
1361
1362                         error = nlm_host_wait_grace(host);
1363                         if (error != 0)
1364                                 return (error);
1365
1366                         continue;
1367                 }
1368
1369                 break;
1370         }
1371
1372         switch (nlm_err) {
1373         case nlm4_granted:
1374                 error = 0;
1375                 break;
1376         case nlm4_blocked:
1377         case nlm4_denied:
1378                 error = EAGAIN;
1379                 break;
1380         case nlm4_denied_nolocks:
1381         case nlm4_deadlck:
1382                 error = ENOLCK;
1383                 break;
1384         default:
1385                 error = EINVAL;
1386                 break;
1387         }
1388
1389         return (error);
1390 }
1391
1392 /*
1393  * Do NLM_UNSHARE call.
1394  */
1395 static int
1396 nlm_call_unshare(struct shrlock *shr, struct nlm_host *host,
1397     struct netobj *fh, int vers)
1398 {
1399         struct nlm4_shareargs args;
1400         enum nlm4_stats nlm_err;
1401         uint32_t xid;
1402         int error;
1403
1404         bzero(&args, sizeof (args));
1405         nlm_init_share(&args.share, shr, fh);
1406
1407         xid = atomic_inc_32_nv(&nlm_xid);
1408         args.cookie.n_len = sizeof (xid);
1409         args.cookie.n_bytes = (char *)&xid;
1410
1411         for (;;) {
1412                 nlm_rpc_t *rpcp;
1413                 struct nlm4_shareres res;
1414                 enum clnt_stat stat;
1415
1416                 error = nlm_host_get_rpc(host, vers, &rpcp);
1417                 if (error != 0)
1418                         return (ENOLCK);
1419
1420                 bzero(&res, sizeof (res));
1421                 stat = nlm_unshare_rpc(&args, &res, rpcp->nr_handle, vers);
1422                 nlm_host_rele_rpc(host, rpcp);
1423
1424                 error = nlm_map_clnt_stat(stat);
1425                 if (error != 0) {
1426                         if (error == EAGAIN)
1427                                 continue;
1428
1429                         return (error);
1430                 }
1431
1432                 DTRACE_PROBE1(unshare__res, enum nlm4_stats, res.stat);
1433                 nlm_err = res.stat;
1434                 xdr_free((xdrproc_t)xdr_nlm4_res, (void *)&res);
1435                 if (nlm_err == nlm4_denied_grace_period) {
1436                         error = nlm_host_wait_grace(host);
1437                         if (error != 0)
1438                                 return (error);
1439
1440                         continue;
1441                 }
1442
1443                 break;
1444         }
1445
1446         switch (nlm_err) {
1447         case nlm4_granted:
1448                 error = 0;
1449                 break;
1450         case nlm4_denied:
1451                 error = EAGAIN;
1452                 break;
1453         case nlm4_denied_nolocks:
1454                 error = ENOLCK;
1455                 break;
1456         default:
1457                 error = EINVAL;
1458                 break;
1459         }
1460
1461         return (error);
1462 }
1463
1464 static void
1465 nlm_init_share(struct nlm4_share *args,
1466         const struct shrlock *shr, struct netobj *fh)
1467 {
1468
1469         bzero(args, sizeof (*args));
1470
1471         args->caller_name = uts_nodename();
1472         args->fh.n_len = fh->n_len;
1473         args->fh.n_bytes = fh->n_bytes;
1474         args->oh.n_len = shr->s_own_len;
1475         args->oh.n_bytes = (void *)shr->s_owner;
1476
1477         switch (shr->s_deny) {
1478         default:
1479         case F_NODNY:
1480                 args->mode = fsm_DN;
1481                 break;
1482         case F_RDDNY:
1483                 args->mode = fsm_DR;
1484                 break;
1485         case F_WRDNY:
1486                 args->mode = fsm_DW;
1487                 break;
1488         case F_RWDNY:
1489                 args->mode = fsm_DRW;
1490                 break;
1491         }
1492
1493         switch (shr->s_access) {
1494         default:
1495         case 0: /* seen with F_UNSHARE */
1496                 args->access = fsa_NONE;
1497                 break;
1498         case F_RDACC:
1499                 args->access = fsa_R;
1500                 break;
1501         case F_WRACC:
1502                 args->access = fsa_W;
1503                 break;
1504         case F_RWACC:
1505                 args->access = fsa_RW;
1506                 break;
1507         }
1508 }
1509
1510 /*
1511  * Initialize filehandle according to the version
1512  * of NFS vnode was created on. The version of
1513  * NLM that can be used with given NFS version
1514  * is saved to lm_vers.
1515  */
1516 static int
1517 nlm_init_fh_by_vp(vnode_t *vp, struct netobj *fh, rpcvers_t *lm_vers)
1518 {
1519         mntinfo_t *mi = VTOMI(vp);
1520
1521         /*
1522          * Too bad the NFS code doesn't just carry the FH
1523          * in a netobj or a netbuf.
1524          */
1525         switch (mi->mi_vers) {
1526         case NFS_V3:
1527                 /* See nfs3_frlock() */
1528                 *lm_vers = NLM4_VERS;
1529                 fh->n_len = VTOFH3(vp)->fh3_length;
1530                 fh->n_bytes = (char *)&(VTOFH3(vp)->fh3_u.data);
1531                 break;
1532
1533         case NFS_VERSION:
1534                 /* See nfs_frlock() */
1535                 *lm_vers = NLM_VERS;
1536                 fh->n_len = sizeof (fhandle_t);
1537                 /* LINTED E_BAD_PTR_CAST_ALIGN */
1538                 fh->n_bytes = (char *)VTOFH(vp);
1539                 break;
1540         default:
1541                 return (ENOSYS);
1542         }
1543
1544         return (0);
1545 }
1546
1547 /*
1548  * Send SIGLOST to the process identified by pid.
1549  * NOTE: called when NLM decides to remove lock
1550  * or share reservation ownder by the process
1551  * by force.
1552  */
1553 static void
1554 nlm_send_siglost(pid_t pid)
1555 {
1556         proc_t *p;
1557
1558         mutex_enter(&pidlock);
1559         p = prfind(pid);
1560         if (p != NULL)
1561                 psignal(p, SIGLOST);
1562
1563         mutex_exit(&pidlock);
1564 }
1565
1566 static int
1567 nlm_map_clnt_stat(enum clnt_stat stat)
1568 {
1569         switch (stat) {
1570         case RPC_SUCCESS:
1571                 return (0);
1572
1573         case RPC_TIMEDOUT:
1574         case RPC_PROGUNAVAIL:
1575                 return (EAGAIN);
1576
1577         case RPC_INTR:
1578                 return (EINTR);
1579
1580         default:
1581                 return (EINVAL);
1582         }
1583 }
1584
1585 static int
1586 nlm_map_status(enum nlm4_stats stat)
1587 {
1588         switch (stat) {
1589         case nlm4_granted:
1590                 return (0);
1591
1592         case nlm4_denied:
1593                 return (EAGAIN);
1594
1595         case nlm4_denied_nolocks:
1596                 return (ENOLCK);
1597
1598         case nlm4_blocked:
1599                 return (EAGAIN);
1600
1601         case nlm4_denied_grace_period:
1602                 return (EAGAIN);
1603
1604         case nlm4_deadlck:
1605                 return (EDEADLK);
1606
1607         case nlm4_rofs:
1608                 return (EROFS);
1609
1610         case nlm4_stale_fh:
1611                 return (ESTALE);
1612
1613         case nlm4_fbig:
1614                 return (EFBIG);
1615
1616         case nlm4_failed:
1617                 return (EACCES);
1618
1619         default:
1620                 return (EINVAL);
1621         }
1622 }