kernel/os/flock.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21
  22 /*
  23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  24  * Use is subject to license terms.
  25  */
  26
  27 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
  28 /*      All Rights Reserved */
  29
  30 /*
  31  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  32  * Copyright 2015 Joyent, Inc.
  33  */
  34
  35 #include <sys/flock_impl.h>
  36 #include <sys/vfs.h>
  37 #include <sys/t_lock.h>         /* for <sys/callb.h> */
  38 #include <sys/callb.h>
  39 #include <sys/nbmlock.h>
  40 #include <sys/cred.h>
  41 #include <sys/policy.h>
  42
  43 /*
  44  * The following four variables are for statistics purposes and they are
  45  * not protected by locks. They may not be accurate but will at least be
  46  * close to the actual value.
  47  */
  48
  49 int     flk_lock_allocs;
  50 int     flk_lock_frees;
  51 int     edge_allocs;
  52 int     edge_frees;
  53 int     flk_proc_vertex_allocs;
  54 int     flk_proc_edge_allocs;
  55 int     flk_proc_vertex_frees;
  56 int     flk_proc_edge_frees;
  57
  58 static kmutex_t flock_lock;
  59
  60 #ifdef DEBUG
  61 int check_debug = 0;
  62 #define CHECK_ACTIVE_LOCKS(gp)  if (check_debug) \
  63                                         check_active_locks(gp);
  64 #define CHECK_SLEEPING_LOCKS(gp)        if (check_debug) \
  65                                                 check_sleeping_locks(gp);
  66 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)   \
  67                 if (check_debug)        \
  68                         check_owner_locks(gp, pid, sysid, vp);
  69 #define CHECK_LOCK_TRANSITION(old_state, new_state) \
  70         { \
  71                 if (check_lock_transition(old_state, new_state)) { \
  72                         cmn_err(CE_PANIC, "Illegal lock transition \
  73                             from %d to %d", old_state, new_state); \
  74                 } \
  75         }
  76 #else
  77
  78 #define CHECK_ACTIVE_LOCKS(gp)
  79 #define CHECK_SLEEPING_LOCKS(gp)
  80 #define CHECK_OWNER_LOCKS(gp, pid, sysid, vp)
  81 #define CHECK_LOCK_TRANSITION(old_state, new_state)
  82
  83 #endif /* DEBUG */
  84
  85 struct kmem_cache       *flk_edge_cache;
  86
  87 graph_t         *lock_graph[HASH_SIZE];
  88 proc_graph_t    pgraph;
  89
  90 /*
  91  * Clustering.
  92  *
  93  * NLM REGISTRY TYPE IMPLEMENTATION
  94  *
  95  * Assumptions:
  96  *  1.  Nodes in a cluster are numbered starting at 1; always non-negative
  97  *      integers.
  98  *  2.  We use this node id to identify the node an NLM server runs on.
  99  */
 100
 101 /*
 102  * NLM registry object keeps track of NLM servers via their
 103  * nlmids (which are the node ids of the node in the cluster they run on)
 104  * that have requested locks at this LLM with which this registry is
 105  * associated.
 106  *
 107  * Representation of abstraction:
 108  *    rep = record[     states: array[nlm_state],
 109  *                      lock: mutex]
 110  *
 111  *    Representation invariants:
 112  *      1. index i of rep.states is between 0 and n - 1 where n is number
 113  *         of elements in the array, which happen to be the maximum number
 114  *         of nodes in the cluster configuration + 1.
 115  *      2. map nlmid to index i of rep.states
 116  *              0   -> 0
 117  *              1   -> 1
 118  *              2   -> 2
 119  *      3.  This 1-1 mapping is quite convenient and it avoids errors resulting
 120  *          from forgetting to subtract 1 from the index.
 121  *      4.  The reason we keep the 0th index is the following.  A legitimate
 122  *          cluster configuration includes making a UFS file system NFS
 123  *          exportable.  The code is structured so that if you're in a cluster
 124  *          you do one thing; otherwise, you do something else.  The problem
 125  *          is what to do if you think you're in a cluster with PXFS loaded,
 126  *          but you're using UFS not PXFS?  The upper two bytes of the sysid
 127  *          encode the node id of the node where NLM server runs; these bytes
 128  *          are zero for UFS.  Since the nodeid is used to index into the
 129  *          registry, we can record the NLM server state information at index
 130  *          0 using the same mechanism used for PXFS file locks!
 131  */
 132 static flk_nlm_status_t *nlm_reg_status = NULL; /* state array 0..N-1 */
 133 static kmutex_t nlm_reg_lock;                   /* lock to protect arrary */
 134 static uint_t nlm_status_size;                  /* size of state array */
 135
 136 /*
 137  * Although we need a global lock dependency graph (and associated data
 138  * structures), we also need a per-zone notion of whether the lock manager is
 139  * running, and so whether to allow lock manager requests or not.
 140  *
 141  * Thus, on a per-zone basis we maintain a ``global'' variable
 142  * (flk_lockmgr_status), protected by flock_lock, and set when the lock
 143  * manager is determined to be changing state (starting or stopping).
 144  *
 145  * Each graph/zone pair also has a copy of this variable, which is protected by
 146  * the graph's mutex.
 147  *
 148  * The per-graph copies are used to synchronize lock requests with shutdown
 149  * requests.  The global copy is used to initialize the per-graph field when a
 150  * new graph is created.
 151  */
 152 struct flock_globals {
 153         flk_lockmgr_status_t flk_lockmgr_status;
 154         flk_lockmgr_status_t lockmgr_status[HASH_SIZE];
 155 };
 156
 157 zone_key_t flock_zone_key;
 158
 159 static void create_flock(lock_descriptor_t *, flock64_t *);
 160 static lock_descriptor_t        *flk_get_lock(void);
 161 static void     flk_free_lock(lock_descriptor_t *lock);
 162 static void     flk_get_first_blocking_lock(lock_descriptor_t *request);
 163 static int flk_process_request(lock_descriptor_t *);
 164 static int flk_add_edge(lock_descriptor_t *, lock_descriptor_t *, int, int);
 165 static edge_t *flk_get_edge(void);
 166 static int flk_wait_execute_request(lock_descriptor_t *);
 167 static int flk_relation(lock_descriptor_t *, lock_descriptor_t *);
 168 static void flk_insert_active_lock(lock_descriptor_t *);
 169 static void flk_delete_active_lock(lock_descriptor_t *, int);
 170 static void flk_insert_sleeping_lock(lock_descriptor_t *);
 171 static void flk_graph_uncolor(graph_t *);
 172 static void flk_wakeup(lock_descriptor_t *, int);
 173 static void flk_free_edge(edge_t *);
 174 static void flk_recompute_dependencies(lock_descriptor_t *,
 175                         lock_descriptor_t **,  int, int);
 176 static int flk_find_barriers(lock_descriptor_t *);
 177 static void flk_update_barriers(lock_descriptor_t *);
 178 static int flk_color_reachables(lock_descriptor_t *);
 179 static int flk_canceled(lock_descriptor_t *);
 180 static void flk_delete_locks_by_sysid(lock_descriptor_t *);
 181 static void report_blocker(lock_descriptor_t *, lock_descriptor_t *);
 182 static void wait_for_lock(lock_descriptor_t *);
 183 static void unlock_lockmgr_granted(struct flock_globals *);
 184 static void wakeup_sleeping_lockmgr_locks(struct flock_globals *);
 185
 186 #ifdef DEBUG
 187 static int check_lock_transition(int, int);
 188 static void check_sleeping_locks(graph_t *);
 189 static void check_active_locks(graph_t *);
 190 static int no_path(lock_descriptor_t *, lock_descriptor_t *);
 191 static void path(lock_descriptor_t *, lock_descriptor_t *);
 192 static void check_owner_locks(graph_t *, pid_t, int, vnode_t *);
 193 static int level_one_path(lock_descriptor_t *, lock_descriptor_t *);
 194 static int level_two_path(lock_descriptor_t *, lock_descriptor_t *, int);
 195 #endif
 196
 197 /*      proc_graph function definitions */
 198 static int flk_check_deadlock(lock_descriptor_t *);
 199 static void flk_proc_graph_uncolor(void);
 200 static proc_vertex_t *flk_get_proc_vertex(lock_descriptor_t *);
 201 static proc_edge_t *flk_get_proc_edge(void);
 202 static void flk_proc_release(proc_vertex_t *);
 203 static void flk_free_proc_edge(proc_edge_t *);
 204 static void flk_update_proc_graph(edge_t *, int);
 205
 206 /* Non-blocking mandatory locking */
 207 static int lock_blocks_io(nbl_op_t, uoff_t, ssize_t, int, uoff_t,
 208                         uoff_t);
 209
 210 static struct flock_globals *
 211 flk_get_globals(void)
 212 {
 213         /*
 214          * The KLM module had better be loaded if we're attempting to handle
 215          * lockmgr requests.
 216          */
 217         ASSERT(flock_zone_key != ZONE_KEY_UNINITIALIZED);
 218         return (zone_getspecific(flock_zone_key, curproc->p_zone));
 219 }
 220
 221 static flk_lockmgr_status_t
 222 flk_get_lockmgr_status(void)
 223 {
 224         struct flock_globals *fg;
 225
 226         ASSERT(MUTEX_HELD(&flock_lock));
 227
 228         if (flock_zone_key == ZONE_KEY_UNINITIALIZED) {
 229                 /*
 230                  * KLM module not loaded; lock manager definitely not running.
 231                  */
 232                 return (FLK_LOCKMGR_DOWN);
 233         }
 234         fg = flk_get_globals();
 235         return (fg->flk_lockmgr_status);
 236 }
 237
 238 /*
 239  * This implements Open File Description (not descriptor) style record locking.
 240  * These locks can also be thought of as pid-less since they are not tied to a
 241  * specific process, thus they're preserved across fork.
 242  *
 243  * Called directly from fcntl.
 244  *
 245  * See reclock() for the implementation of the traditional POSIX style record
 246  * locking scheme (pid-ful). This function is derived from reclock() but
 247  * simplified and modified to work for OFD style locking.
 248  *
 249  * The two primary advantages of OFD style of locking are:
 250  * 1) It is per-file description, so closing a file descriptor that refers to a
 251  *    different file description for the same file will not drop the lock (i.e.
 252  *    two open's of the same file get different descriptions but a dup or fork
 253  *    will refer to the same description).
 254  * 2) Locks are preserved across fork(2).
 255  *
 256  * Because these locks are per-description a lock ptr lives at the f_filocks
 257  * member of the file_t and the lock_descriptor includes a file_t pointer
 258  * to enable unique lock identification and management.
 259  *
 260  * Since these locks are pid-less we cannot do deadlock detection with the
 261  * current process-oriented implementation. This is consistent with OFD locking
 262  * behavior on other operating systems such as Linux. Since we don't do
 263  * deadlock detection we never interact with the process graph that is
 264  * maintained for deadlock detection on the traditional POSIX-style locks.
 265  *
 266  * Future Work:
 267  *
 268  * The current implementation does not support record locks. That is,
 269  * currently the single lock must cover the entire file. This is validated in
 270  * fcntl. To support record locks the f_filock pointer in the file_t needs to
 271  * be changed to a list of pointers to the locks. That list needs to be
 272  * managed independently of the lock list on the vnode itself and it needs to
 273  * be maintained as record locks are created, split, coalesced and deleted.
 274  *
 275  * The current implementation does not support remote file systems (e.g.
 276  * NFS or CIFS). This is handled in fs_frlock(). The design of how OFD locks
 277  * interact with the NLM is not clear since the NLM protocol/implementation
 278  * appears to be oriented around locks associated with a process. A further
 279  * problem is that a design is needed for what nlm_send_siglost() should do and
 280  * where it will send SIGLOST. More recent versions of Linux apparently try to
 281  * emulate OFD locks on NFS by converting them to traditional POSIX style locks
 282  * that work with the NLM. It is not clear that this provides the correct
 283  * semantics in all cases.
 284  */
 285 int
 286 ofdlock(file_t *fp, int fcmd, flock64_t *lckdat, int flag, uoff_t offset)
 287 {
 288         int cmd = 0;
 289         vnode_t *vp;
 290         lock_descriptor_t       stack_lock_request;
 291         lock_descriptor_t       *lock_request;
 292         int error = 0;
 293         graph_t *gp;
 294         int serialize = 0;
 295
 296         if (fcmd != F_OFD_GETLK)
 297                 cmd = SETFLCK;
 298
 299         if (fcmd == F_OFD_SETLKW || fcmd == F_FLOCKW)
 300                 cmd |= SLPFLCK;
 301
 302         /* see block comment */
 303         VERIFY(lckdat->l_whence == 0);
 304         VERIFY(lckdat->l_start == 0);
 305         VERIFY(lckdat->l_len == 0);
 306
 307         vp = fp->f_vnode;
 308
 309         /*
 310          * For reclock fs_frlock() would normally have set these in a few
 311          * places but for us it's cleaner to centralize it here. Note that
 312          * IGN_PID is -1. We use 0 for our pid-less locks.
 313          */
 314         lckdat->l_pid = 0;
 315         lckdat->l_sysid = 0;
 316
 317         /*
 318          * Check access permissions
 319          */
 320         if ((fcmd == F_OFD_SETLK || fcmd == F_OFD_SETLKW) &&
 321             ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
 322             (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
 323                 return (EBADF);
 324
 325         /*
 326          * for query and unlock we use the stack_lock_request
 327          */
 328         if (lckdat->l_type == F_UNLCK || !(cmd & SETFLCK)) {
 329                 lock_request = &stack_lock_request;
 330                 (void) bzero((caddr_t)lock_request,
 331                     sizeof (lock_descriptor_t));
 332
 333                 /*
 334                  * following is added to make the assertions in
 335                  * flk_execute_request() pass
 336                  */
 337                 lock_request->l_edge.edge_in_next = &lock_request->l_edge;
 338                 lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
 339                 lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
 340                 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
 341                 lock_request->l_status = FLK_INITIAL_STATE;
 342         } else {
 343                 lock_request = flk_get_lock();
 344                 fp->f_filock = (struct filock *)lock_request;
 345         }
 346         lock_request->l_state = 0;
 347         lock_request->l_vnode = vp;
 348         lock_request->l_zoneid = getzoneid();
 349         lock_request->l_ofd = fp;
 350
 351         /*
 352          * Convert the request range into the canonical start and end
 353          * values then check the validity of the lock range.
 354          */
 355         error = flk_convert_lock_data(vp, lckdat, &lock_request->l_start,
 356             &lock_request->l_end, offset);
 357         if (error)
 358                 goto done;
 359
 360         error = flk_check_lock_data(lock_request->l_start, lock_request->l_end,
 361             MAXEND);
 362         if (error)
 363                 goto done;
 364
 365         ASSERT(lock_request->l_end >= lock_request->l_start);
 366
 367         lock_request->l_type = lckdat->l_type;
 368         if (cmd & SLPFLCK)
 369                 lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
 370
 371         if (!(cmd & SETFLCK)) {
 372                 if (lock_request->l_type == F_RDLCK ||
 373                     lock_request->l_type == F_WRLCK)
 374                         lock_request->l_state |= QUERY_LOCK;
 375         }
 376         lock_request->l_flock = (*lckdat);
 377
 378         /*
 379          * We are ready for processing the request
 380          */
 381
 382         if (fcmd != F_OFD_GETLK && lock_request->l_type != F_UNLCK &&
 383             nbl_need_check(vp)) {
 384                 nbl_start_crit(vp, RW_WRITER);
 385                 serialize = 1;
 386         }
 387
 388         /* Get the lock graph for a particular vnode */
 389         gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
 390
 391         mutex_enter(&gp->gp_mutex);
 392
 393         lock_request->l_state |= REFERENCED_LOCK;
 394         lock_request->l_graph = gp;
 395
 396         switch (lock_request->l_type) {
 397         case F_RDLCK:
 398         case F_WRLCK:
 399                 if (IS_QUERY_LOCK(lock_request)) {
 400                         flk_get_first_blocking_lock(lock_request);
 401                         if (lock_request->l_ofd != NULL)
 402                                 lock_request->l_flock.l_pid = -1;
 403                         (*lckdat) = lock_request->l_flock;
 404                 } else {
 405                         /* process the request now */
 406                         error = flk_process_request(lock_request);
 407                 }
 408                 break;
 409
 410         case F_UNLCK:
 411                 /* unlock request will not block so execute it immediately */
 412                 error = flk_execute_request(lock_request);
 413                 break;
 414
 415         default:
 416                 error = EINVAL;
 417                 break;
 418         }
 419
 420         if (lock_request == &stack_lock_request) {
 421                 flk_set_state(lock_request, FLK_DEAD_STATE);
 422         } else {
 423                 lock_request->l_state &= ~REFERENCED_LOCK;
 424                 if ((error != 0) || IS_DELETED(lock_request)) {
 425                         flk_set_state(lock_request, FLK_DEAD_STATE);
 426                         flk_free_lock(lock_request);
 427                 }
 428         }
 429
 430         mutex_exit(&gp->gp_mutex);
 431         if (serialize)
 432                 nbl_end_crit(vp);
 433
 434         return (error);
 435
 436 done:
 437         flk_set_state(lock_request, FLK_DEAD_STATE);
 438         if (lock_request != &stack_lock_request)
 439                 flk_free_lock(lock_request);
 440         return (error);
 441 }
 442
 443 /*
 444  * Remove any lock on the vnode belonging to the given file_t.
 445  * Called from closef on last close, file_t is locked.
 446  *
 447  * This is modeled on the cleanlocks() function but only removes the single
 448  * lock associated with fp.
 449  */
 450 void
 451 ofdcleanlock(file_t *fp)
 452 {
 453         lock_descriptor_t *fplock, *lock, *nlock;
 454         vnode_t *vp;
 455         graph_t *gp;
 456
 457         ASSERT(MUTEX_HELD(&fp->f_tlock));
 458
 459         if ((fplock = (lock_descriptor_t *)fp->f_filock) == NULL)
 460                 return;
 461
 462         fp->f_filock = NULL;
 463         vp = fp->f_vnode;
 464
 465         gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
 466
 467         if (gp == NULL)
 468                 return;
 469         mutex_enter(&gp->gp_mutex);
 470
 471         CHECK_SLEEPING_LOCKS(gp);
 472         CHECK_ACTIVE_LOCKS(gp);
 473
 474         SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
 475
 476         if (lock) {
 477                 do {
 478                         nlock = lock->l_next;
 479                         if (fplock == lock) {
 480                                 CANCEL_WAKEUP(lock);
 481                                 break;
 482                         }
 483                         lock = nlock;
 484                 } while (lock->l_vnode == vp);
 485         }
 486
 487         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
 488
 489         if (lock) {
 490                 do {
 491                         nlock = lock->l_next;
 492                         if (fplock == lock) {
 493                                 flk_delete_active_lock(lock, 0);
 494                                 flk_wakeup(lock, 1);
 495                                 flk_free_lock(lock);
 496                                 break;
 497                         }
 498                         lock = nlock;
 499                 } while (lock->l_vnode == vp);
 500         }
 501
 502         CHECK_SLEEPING_LOCKS(gp);
 503         CHECK_ACTIVE_LOCKS(gp);
 504         mutex_exit(&gp->gp_mutex);
 505 }
 506
 507 /*
 508  * Routine called from fs_frlock in fs/fs_subr.c
 509  *
 510  * This implements traditional POSIX style record locking. The two primary
 511  * drawbacks to this style of locking are:
 512  * 1) It is per-process, so any close of a file descriptor that refers to the
 513  *    file will drop the lock (e.g. lock /etc/passwd, call a library function
 514  *    which opens /etc/passwd to read the file, when the library closes it's
 515  *    file descriptor the application loses its lock and does not know).
 516  * 2) Locks are not preserved across fork(2).
 517  *
 518  * Because these locks are only associated with a PID, they are per-process.
 519  * This is why any close will drop the lock and is also why, once the process
 520  * forks, the lock is no longer related to the new process. These locks can
 521  * be considered as PID-ful.
 522  *
 523  * See ofdlock() for the implementation of a similar but improved locking
 524  * scheme.
 525  */
 526 int
 527 reclock(vnode_t *vp, flock64_t *lckdat, int cmd, int flag, uoff_t offset,
 528     flk_callback_t *flk_cbp)
 529 {
 530         lock_descriptor_t       stack_lock_request;
 531         lock_descriptor_t       *lock_request;
 532         int error = 0;
 533         graph_t *gp;
 534         int                     nlmid;
 535
 536         /*
 537          * Check access permissions
 538          */
 539         if ((cmd & SETFLCK) &&
 540             ((lckdat->l_type == F_RDLCK && (flag & FREAD) == 0) ||
 541             (lckdat->l_type == F_WRLCK && (flag & FWRITE) == 0)))
 542                         return (EBADF);
 543
 544         /*
 545          * for query and unlock we use the stack_lock_request
 546          */
 547
 548         if ((lckdat->l_type == F_UNLCK) ||
 549             !((cmd & INOFLCK) || (cmd & SETFLCK))) {
 550                 lock_request = &stack_lock_request;
 551                 (void) bzero((caddr_t)lock_request,
 552                     sizeof (lock_descriptor_t));
 553
 554                 /*
 555                  * following is added to make the assertions in
 556                  * flk_execute_request() to pass through
 557                  */
 558
 559                 lock_request->l_edge.edge_in_next = &lock_request->l_edge;
 560                 lock_request->l_edge.edge_in_prev = &lock_request->l_edge;
 561                 lock_request->l_edge.edge_adj_next = &lock_request->l_edge;
 562                 lock_request->l_edge.edge_adj_prev = &lock_request->l_edge;
 563                 lock_request->l_status = FLK_INITIAL_STATE;
 564         } else {
 565                 lock_request = flk_get_lock();
 566         }
 567         lock_request->l_state = 0;
 568         lock_request->l_vnode = vp;
 569         lock_request->l_zoneid = getzoneid();
 570
 571         /*
 572          * Convert the request range into the canonical start and end
 573          * values.  The NLM protocol supports locking over the entire
 574          * 32-bit range, so there's no range checking for remote requests,
 575          * but we still need to verify that local requests obey the rules.
 576          */
 577         if ((cmd & RCMDLCK) != 0) {
 578                 ASSERT(lckdat->l_whence == 0);
 579                 lock_request->l_start = lckdat->l_start;
 580                 lock_request->l_end = (lckdat->l_len == 0) ? MAX_U_OFFSET_T :
 581                     lckdat->l_start + (lckdat->l_len - 1);
 582         } else {
 583                 /* check the validity of the lock range */
 584                 error = flk_convert_lock_data(vp, lckdat,
 585                     &lock_request->l_start, &lock_request->l_end,
 586                     offset);
 587                 if (error) {
 588                         goto done;
 589                 }
 590                 error = flk_check_lock_data(lock_request->l_start,
 591                     lock_request->l_end, MAXEND);
 592                 if (error) {
 593                         goto done;
 594                 }
 595         }
 596
 597         ASSERT(lock_request->l_end >= lock_request->l_start);
 598
 599         lock_request->l_type = lckdat->l_type;
 600         if (cmd & INOFLCK)
 601                 lock_request->l_state |= IO_LOCK;
 602         if (cmd & SLPFLCK)
 603                 lock_request->l_state |= WILLING_TO_SLEEP_LOCK;
 604         if (cmd & RCMDLCK)
 605                 lock_request->l_state |= LOCKMGR_LOCK;
 606         if (cmd & NBMLCK)
 607                 lock_request->l_state |= NBMAND_LOCK;
 608
 609         if (!((cmd & SETFLCK) || (cmd & INOFLCK))) {
 610                 if (lock_request->l_type == F_RDLCK ||
 611                     lock_request->l_type == F_WRLCK)
 612                         lock_request->l_state |= QUERY_LOCK;
 613         }
 614         lock_request->l_flock = (*lckdat);
 615         lock_request->l_callbacks = flk_cbp;
 616
 617         /*
 618          * We are ready for processing the request
 619          */
 620         if (IS_LOCKMGR(lock_request)) {
 621                 /*
 622                  * If the lock request is an NLM server request ....
 623                  */
 624                 if (nlm_status_size == 0) { /* not booted as cluster */
 625                         mutex_enter(&flock_lock);
 626                         /*
 627                          * Bail out if this is a lock manager request and the
 628                          * lock manager is not supposed to be running.
 629                          */
 630                         if (flk_get_lockmgr_status() != FLK_LOCKMGR_UP) {
 631                                 mutex_exit(&flock_lock);
 632                                 error = ENOLCK;
 633                                 goto done;
 634                         }
 635                         mutex_exit(&flock_lock);
 636                 } else {                        /* booted as a cluster */
 637                         nlmid = GETNLMID(lock_request->l_flock.l_sysid);
 638                         ASSERT(nlmid <= nlm_status_size && nlmid >= 0);
 639
 640                         mutex_enter(&nlm_reg_lock);
 641                         /*
 642                          * If the NLM registry does not know about this
 643                          * NLM server making the request, add its nlmid
 644                          * to the registry.
 645                          */
 646                         if (FLK_REGISTRY_IS_NLM_UNKNOWN(nlm_reg_status,
 647                             nlmid)) {
 648                                 FLK_REGISTRY_ADD_NLMID(nlm_reg_status, nlmid);
 649                         } else if (!FLK_REGISTRY_IS_NLM_UP(nlm_reg_status,
 650                             nlmid)) {
 651                                 /*
 652                                  * If the NLM server is already known (has made
 653                                  * previous lock requests) and its state is
 654                                  * not NLM_UP (means that NLM server is
 655                                  * shutting down), then bail out with an
 656                                  * error to deny the lock request.
 657                                  */
 658                                 mutex_exit(&nlm_reg_lock);
 659                                 error = ENOLCK;
 660                                 goto done;
 661                         }
 662                         mutex_exit(&nlm_reg_lock);
 663                 }
 664         }
 665
 666         /* Now get the lock graph for a particular vnode */
 667         gp = flk_get_lock_graph(vp, FLK_INIT_GRAPH);
 668
 669         /*
 670          * We drop rwlock here otherwise this might end up causing a
 671          * deadlock if this IOLOCK sleeps. (bugid # 1183392).
 672          */
 673
 674         if (IS_IO_LOCK(lock_request)) {
 675                 fop_rwunlock(vp,
 676                     (lock_request->l_type == F_RDLCK) ?
 677                     V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
 678         }
 679         mutex_enter(&gp->gp_mutex);
 680
 681         lock_request->l_state |= REFERENCED_LOCK;
 682         lock_request->l_graph = gp;
 683
 684         switch (lock_request->l_type) {
 685         case F_RDLCK:
 686         case F_WRLCK:
 687                 if (IS_QUERY_LOCK(lock_request)) {
 688                         flk_get_first_blocking_lock(lock_request);
 689                         if (lock_request->l_ofd != NULL)
 690                                 lock_request->l_flock.l_pid = -1;
 691                         (*lckdat) = lock_request->l_flock;
 692                         break;
 693                 }
 694
 695                 /* process the request now */
 696
 697                 error = flk_process_request(lock_request);
 698                 break;
 699
 700         case F_UNLCK:
 701                 /* unlock request will not block so execute it immediately */
 702
 703                 if (IS_LOCKMGR(lock_request) &&
 704                     flk_canceled(lock_request)) {
 705                         error = 0;
 706                 } else {
 707                         error = flk_execute_request(lock_request);
 708                 }
 709                 break;
 710
 711         case F_UNLKSYS:
 712                 /*
 713                  * Recovery mechanism to release lock manager locks when
 714                  * NFS client crashes and restart. NFS server will clear
 715                  * old locks and grant new locks.
 716                  */
 717
 718                 if (lock_request->l_flock.l_sysid == 0) {
 719                         mutex_exit(&gp->gp_mutex);
 720                         return (EINVAL);
 721                 }
 722                 if (secpolicy_nfs(CRED()) != 0) {
 723                         mutex_exit(&gp->gp_mutex);
 724                         return (EPERM);
 725                 }
 726                 flk_delete_locks_by_sysid(lock_request);
 727                 lock_request->l_state &= ~REFERENCED_LOCK;
 728                 flk_set_state(lock_request, FLK_DEAD_STATE);
 729                 flk_free_lock(lock_request);
 730                 mutex_exit(&gp->gp_mutex);
 731                 return (0);
 732
 733         default:
 734                 error = EINVAL;
 735                 break;
 736         }
 737
 738         /*
 739          * Now that we have seen the status of locks in the system for
 740          * this vnode we acquire the rwlock if it is an IO_LOCK.
 741          */
 742
 743         if (IS_IO_LOCK(lock_request)) {
 744                 (void) fop_rwlock(vp,
 745                     (lock_request->l_type == F_RDLCK) ?
 746                     V_WRITELOCK_FALSE : V_WRITELOCK_TRUE, NULL);
 747                 if (!error) {
 748                         lckdat->l_type = F_UNLCK;
 749
 750                         /*
 751                          * This wake up is needed otherwise
 752                          * if IO_LOCK has slept the dependents on this
 753                          * will not be woken up at all. (bugid # 1185482).
 754                          */
 755
 756                         flk_wakeup(lock_request, 1);
 757                         flk_set_state(lock_request, FLK_DEAD_STATE);
 758                         flk_free_lock(lock_request);
 759                 }
 760                 /*
 761                  * else if error had occurred either flk_process_request()
 762                  * has returned EDEADLK in which case there will be no
 763                  * dependents for this lock or EINTR from flk_wait_execute_
 764                  * request() in which case flk_cancel_sleeping_lock()
 765                  * would have been done. same is true with EBADF.
 766                  */
 767         }
 768
 769         if (lock_request == &stack_lock_request) {
 770                 flk_set_state(lock_request, FLK_DEAD_STATE);
 771         } else {
 772                 lock_request->l_state &= ~REFERENCED_LOCK;
 773                 if ((error != 0) || IS_DELETED(lock_request)) {
 774                         flk_set_state(lock_request, FLK_DEAD_STATE);
 775                         flk_free_lock(lock_request);
 776                 }
 777         }
 778
 779         mutex_exit(&gp->gp_mutex);
 780         return (error);
 781
 782 done:
 783         flk_set_state(lock_request, FLK_DEAD_STATE);
 784         if (lock_request != &stack_lock_request)
 785                 flk_free_lock(lock_request);
 786         return (error);
 787 }
 788
 789 /*
 790  * Invoke the callbacks in the given list.  If before sleeping, invoke in
 791  * list order.  If after sleeping, invoke in reverse order.
 792  *
 793  * CPR (suspend/resume) support: if one of the callbacks returns a
 794  * callb_cpr_t, return it.   This will be used to make the thread CPR-safe
 795  * while it is sleeping.  There should be at most one callb_cpr_t for the
 796  * thread.
 797  * XXX This is unnecessarily complicated.  The CPR information should just
 798  * get passed in directly through fop_frlock and reclock, rather than
 799  * sneaking it in via a callback.
 800  */
 801
 802 callb_cpr_t *
 803 flk_invoke_callbacks(flk_callback_t *cblist, flk_cb_when_t when)
 804 {
 805         callb_cpr_t *cpr_callbackp = NULL;
 806         callb_cpr_t *one_result;
 807         flk_callback_t *cb;
 808
 809         if (cblist == NULL)
 810                 return (NULL);
 811
 812         if (when == FLK_BEFORE_SLEEP) {
 813                 cb = cblist;
 814                 do {
 815                         one_result = (*cb->cb_callback)(when, cb->cb_data);
 816                         if (one_result != NULL) {
 817                                 ASSERT(cpr_callbackp == NULL);
 818                                 cpr_callbackp = one_result;
 819                         }
 820                         cb = cb->cb_next;
 821                 } while (cb != cblist);
 822         } else {
 823                 cb = cblist->cb_prev;
 824                 do {
 825                         one_result = (*cb->cb_callback)(when, cb->cb_data);
 826                         if (one_result != NULL) {
 827                                 cpr_callbackp = one_result;
 828                         }
 829                         cb = cb->cb_prev;
 830                 } while (cb != cblist->cb_prev);
 831         }
 832
 833         return (cpr_callbackp);
 834 }
 835
 836 /*
 837  * Initialize a flk_callback_t to hold the given callback.
 838  */
 839
 840 void
 841 flk_init_callback(flk_callback_t *flk_cb,
 842     callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *), void *cbdata)
 843 {
 844         flk_cb->cb_next = flk_cb;
 845         flk_cb->cb_prev = flk_cb;
 846         flk_cb->cb_callback = cb_fcn;
 847         flk_cb->cb_data = cbdata;
 848 }
 849
 850 /*
 851  * Initialize an flk_callback_t and then link it into the head of an
 852  * existing list (which may be NULL).
 853  */
 854
 855 void
 856 flk_add_callback(flk_callback_t *newcb,
 857     callb_cpr_t *(*cb_fcn)(flk_cb_when_t, void *),
 858     void *cbdata, flk_callback_t *cblist)
 859 {
 860         flk_init_callback(newcb, cb_fcn, cbdata);
 861
 862         if (cblist == NULL)
 863                 return;
 864
 865         newcb->cb_prev = cblist->cb_prev;
 866         newcb->cb_next = cblist;
 867         cblist->cb_prev->cb_next = newcb;
 868         cblist->cb_prev = newcb;
 869 }
 870
 871 /*
 872  * Remove the callback from a list.
 873  */
 874
 875 void
 876 flk_del_callback(flk_callback_t *flk_cb)
 877 {
 878         flk_cb->cb_next->cb_prev = flk_cb->cb_prev;
 879         flk_cb->cb_prev->cb_next = flk_cb->cb_next;
 880
 881         flk_cb->cb_prev = flk_cb;
 882         flk_cb->cb_next = flk_cb;
 883 }
 884
 885 /*
 886  * Initialize the flk_edge_cache data structure and create the
 887  * nlm_reg_status array.
 888  */
 889
 890 void
 891 flk_init(void)
 892 {
 893         uint_t  i;
 894
 895         flk_edge_cache = kmem_cache_create("flk_edges",
 896             sizeof (struct edge), 0, NULL, NULL, NULL, NULL, NULL, 0);
 897         if (flk_edge_cache == NULL) {
 898                 cmn_err(CE_PANIC, "Couldn't create flk_edge_cache\n");
 899         }
 900         /*
 901          * Create the NLM registry object.
 902          */
 903
 904         nlm_status_size = 0;
 905 }
 906
 907 /*
 908  * Zone constructor/destructor callbacks to be executed when a zone is
 909  * created/destroyed.
 910  */
 911 /* ARGSUSED */
 912 void *
 913 flk_zone_init(zoneid_t zoneid)
 914 {
 915         struct flock_globals *fg;
 916         uint_t i;
 917
 918         fg = kmem_alloc(sizeof (*fg), KM_SLEEP);
 919         fg->flk_lockmgr_status = FLK_LOCKMGR_UP;
 920         for (i = 0; i < HASH_SIZE; i++)
 921                 fg->lockmgr_status[i] = FLK_LOCKMGR_UP;
 922         return (fg);
 923 }
 924
 925 /* ARGSUSED */
 926 void
 927 flk_zone_fini(zoneid_t zoneid, void *data)
 928 {
 929         struct flock_globals *fg = data;
 930
 931         kmem_free(fg, sizeof (*fg));
 932 }
 933
 934 /*
 935  * Get a lock_descriptor structure with initialization of edge lists.
 936  */
 937
 938 static lock_descriptor_t *
 939 flk_get_lock(void)
 940 {
 941         lock_descriptor_t       *l;
 942
 943         l = kmem_zalloc(sizeof (lock_descriptor_t), KM_SLEEP);
 944
 945         cv_init(&l->l_cv, NULL, CV_DRIVER, NULL);
 946         l->l_edge.edge_in_next = &l->l_edge;
 947         l->l_edge.edge_in_prev = &l->l_edge;
 948         l->l_edge.edge_adj_next = &l->l_edge;
 949         l->l_edge.edge_adj_prev = &l->l_edge;
 950         l->pvertex = -1;
 951         l->l_status = FLK_INITIAL_STATE;
 952         flk_lock_allocs++;
 953         return (l);
 954 }
 955
 956 /*
 957  * Free a lock_descriptor structure. Just sets the DELETED_LOCK flag
 958  * when some thread has a reference to it as in reclock().
 959  */
 960
 961 void
 962 flk_free_lock(lock_descriptor_t *lock)
 963 {
 964         file_t *fp;
 965
 966         ASSERT(IS_DEAD(lock));
 967
 968         if ((fp = lock->l_ofd) != NULL && fp->f_filock == (struct filock *)lock)
 969                 fp->f_filock = NULL;
 970
 971         if (IS_REFERENCED(lock)) {
 972                 lock->l_state |= DELETED_LOCK;
 973                 return;
 974         }
 975         flk_lock_frees++;
 976         kmem_free((void *)lock, sizeof (lock_descriptor_t));
 977 }
 978
 979 void
 980 flk_set_state(lock_descriptor_t *lock, int new_state)
 981 {
 982         /*
 983          * Locks in the sleeping list may be woken up in a number of ways,
 984          * and more than once.  If a sleeping lock is signaled awake more
 985          * than once, then it may or may not change state depending on its
 986          * current state.
 987          * Also note that NLM locks that are sleeping could be moved to an
 988          * interrupted state more than once if the unlock request is
 989          * retransmitted by the NLM client - the second time around, this is
 990          * just a nop.
 991          * The ordering of being signaled awake is:
 992          * INTERRUPTED_STATE > CANCELLED_STATE > GRANTED_STATE.
 993          * The checks below implement this ordering.
 994          */
 995         if (IS_INTERRUPTED(lock)) {
 996                 if ((new_state == FLK_CANCELLED_STATE) ||
 997                     (new_state == FLK_GRANTED_STATE) ||
 998                     (new_state == FLK_INTERRUPTED_STATE)) {
 999                         return;
1000                 }
1001         }
1002         if (IS_CANCELLED(lock)) {
1003                 if ((new_state == FLK_GRANTED_STATE) ||
1004                     (new_state == FLK_CANCELLED_STATE)) {
1005                         return;
1006                 }
1007         }
1008         CHECK_LOCK_TRANSITION(lock->l_status, new_state);
1009         lock->l_status = new_state;
1010 }
1011
1012 /*
1013  * Routine that checks whether there are any blocking locks in the system.
1014  *
1015  * The policy followed is if a write lock is sleeping we don't allow read
1016  * locks before this write lock even though there may not be any active
1017  * locks corresponding to the read locks' region.
1018  *
1019  * flk_add_edge() function adds an edge between l1 and l2 iff there
1020  * is no path between l1 and l2. This is done to have a "minimum
1021  * storage representation" of the dependency graph.
1022  *
1023  * Another property of the graph is since only the new request throws
1024  * edges to the existing locks in the graph, the graph is always topologically
1025  * ordered.
1026  */
1027
1028 static int
1029 flk_process_request(lock_descriptor_t *request)
1030 {
1031         graph_t *gp = request->l_graph;
1032         lock_descriptor_t *lock;
1033         int request_blocked_by_active = 0;
1034         int request_blocked_by_granted = 0;
1035         int request_blocked_by_sleeping = 0;
1036         vnode_t *vp = request->l_vnode;
1037         int     error = 0;
1038         int request_will_wait = 0;
1039         int found_covering_lock = 0;
1040         lock_descriptor_t *covered_by = NULL;
1041
1042         ASSERT(MUTEX_HELD(&gp->gp_mutex));
1043         request_will_wait = IS_WILLING_TO_SLEEP(request);
1044
1045         /*
1046          * check active locks
1047          */
1048
1049         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1050
1051
1052         if (lock) {
1053                 do {
1054                         if (BLOCKS(lock, request)) {
1055                                 if (!request_will_wait)
1056                                         return (EAGAIN);
1057                                 request_blocked_by_active = 1;
1058                                 break;
1059                         }
1060                         /*
1061                          * Grant lock if it is for the same owner holding active
1062                          * lock that covers the request.
1063                          */
1064
1065                         if (SAME_OWNER(lock, request) &&
1066                             COVERS(lock, request) &&
1067                             (request->l_type == F_RDLCK))
1068                                 return (flk_execute_request(request));
1069                         lock = lock->l_next;
1070                 } while (lock->l_vnode == vp);
1071         }
1072
1073         if (!request_blocked_by_active) {
1074                         lock_descriptor_t *lk[1];
1075                         lock_descriptor_t *first_glock = NULL;
1076                 /*
1077                  * Shall we grant this?! NO!!
1078                  * What about those locks that were just granted and still
1079                  * in sleep queue. Those threads are woken up and so locks
1080                  * are almost active.
1081                  */
1082                 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1083                 if (lock) {
1084                         do {
1085                                 if (BLOCKS(lock, request)) {
1086                                         if (IS_GRANTED(lock)) {
1087                                                 request_blocked_by_granted = 1;
1088                                         } else {
1089                                                 request_blocked_by_sleeping = 1;
1090                                         }
1091                                 }
1092
1093                                 lock = lock->l_next;
1094                         } while ((lock->l_vnode == vp));
1095                         first_glock = lock->l_prev;
1096                         ASSERT(first_glock->l_vnode == vp);
1097                 }
1098
1099                 if (request_blocked_by_granted)
1100                         goto block;
1101
1102                 if (!request_blocked_by_sleeping) {
1103                         /*
1104                          * If the request isn't going to be blocked by a
1105                          * sleeping request, we know that it isn't going to
1106                          * be blocked; we can just execute the request --
1107                          * without performing costly deadlock detection.
1108                          */
1109                         ASSERT(!request_blocked_by_active);
1110                         return (flk_execute_request(request));
1111                 } else if (request->l_type == F_RDLCK) {
1112                         /*
1113                          * If we have a sleeping writer in the requested
1114                          * lock's range, block.
1115                          */
1116                         goto block;
1117                 }
1118
1119                 lk[0] = request;
1120                 request->l_state |= RECOMPUTE_LOCK;
1121                 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1122                 if (lock) {
1123                         do {
1124                                 flk_recompute_dependencies(lock, lk, 1, 0);
1125                                 lock = lock->l_next;
1126                         } while (lock->l_vnode == vp);
1127                 }
1128                 lock = first_glock;
1129                 if (lock) {
1130                         do {
1131                                 if (IS_GRANTED(lock)) {
1132                                 flk_recompute_dependencies(lock, lk, 1, 0);
1133                                 }
1134                                 lock = lock->l_prev;
1135                         } while ((lock->l_vnode == vp));
1136                 }
1137                 request->l_state &= ~RECOMPUTE_LOCK;
1138                 if (!NO_DEPENDENTS(request) && flk_check_deadlock(request))
1139                         return (EDEADLK);
1140                 return (flk_execute_request(request));
1141         }
1142
1143 block:
1144         if (request_will_wait)
1145                 flk_graph_uncolor(gp);
1146
1147         /* check sleeping locks */
1148
1149         SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1150
1151         /*
1152          * If we find a sleeping write lock that is a superset of the
1153          * region wanted by request we can be assured that by adding an
1154          * edge to this write lock we have paths to all locks in the
1155          * graph that blocks the request except in one case and that is why
1156          * another check for SAME_OWNER in the loop below. The exception
1157          * case is when this process that owns the sleeping write lock 'l1'
1158          * has other locks l2, l3, l4 that are in the system and arrived
1159          * before l1. l1 does not have path to these locks as they are from
1160          * same process. We break when we find a second covering sleeping
1161          * lock l5 owned by a process different from that owning l1, because
1162          * there cannot be any of l2, l3, l4, etc., arrived before l5, and if
1163          * it has l1 would have produced a deadlock already.
1164          */
1165
1166         if (lock) {
1167                 do {
1168                         if (BLOCKS(lock, request)) {
1169                                 if (!request_will_wait)
1170                                         return (EAGAIN);
1171                                 if (COVERS(lock, request) &&
1172                                     lock->l_type == F_WRLCK) {
1173                                         if (found_covering_lock &&
1174                                             !SAME_OWNER(lock, covered_by)) {
1175                                                 found_covering_lock++;
1176                                                 break;
1177                                         }
1178                                         found_covering_lock = 1;
1179                                         covered_by = lock;
1180                                 }
1181                                 if (found_covering_lock &&
1182                                     !SAME_OWNER(lock, covered_by)) {
1183                                         lock = lock->l_next;
1184                                         continue;
1185                                 }
1186                                 if ((error = flk_add_edge(request, lock,
1187                                     !found_covering_lock, 0)))
1188                                         return (error);
1189                         }
1190                         lock = lock->l_next;
1191                 } while (lock->l_vnode == vp);
1192         }
1193
1194 /*
1195  * found_covering_lock == 2 iff at this point 'request' has paths
1196  * to all locks that blocks 'request'. found_covering_lock == 1 iff at this
1197  * point 'request' has paths to all locks that blocks 'request' whose owners
1198  * are not same as the one that covers 'request' (covered_by above) and
1199  * we can have locks whose owner is same as covered_by in the active list.
1200  */
1201
1202         if (request_blocked_by_active && found_covering_lock != 2) {
1203                 SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1204                 ASSERT(lock != NULL);
1205                 do {
1206                         if (BLOCKS(lock, request)) {
1207                                 if (found_covering_lock &&
1208                                     !SAME_OWNER(lock, covered_by)) {
1209                                         lock = lock->l_next;
1210                                         continue;
1211                                 }
1212                                 if ((error = flk_add_edge(request, lock,
1213                                     CHECK_CYCLE, 0)))
1214                                         return (error);
1215                         }
1216                         lock = lock->l_next;
1217                 } while (lock->l_vnode == vp);
1218         }
1219
1220         if (NOT_BLOCKED(request)) {
1221                 /*
1222                  * request not dependent on any other locks
1223                  * so execute this request
1224                  */
1225                 return (flk_execute_request(request));
1226         } else {
1227                 /*
1228                  * check for deadlock
1229                  */
1230                 if (flk_check_deadlock(request))
1231                         return (EDEADLK);
1232                 /*
1233                  * this thread has to sleep
1234                  */
1235                 return (flk_wait_execute_request(request));
1236         }
1237 }
1238
1239 /*
1240  * The actual execution of the request in the simple case is only to
1241  * insert the 'request' in the list of active locks if it is not an
1242  * UNLOCK.
1243  * We have to consider the existing active locks' relation to
1244  * this 'request' if they are owned by same process. flk_relation() does
1245  * this job and sees to that the dependency graph information is maintained
1246  * properly.
1247  */
1248
1249 int
1250 flk_execute_request(lock_descriptor_t *request)
1251 {
1252         graph_t *gp = request->l_graph;
1253         vnode_t *vp = request->l_vnode;
1254         lock_descriptor_t       *lock, *lock1;
1255         int done_searching = 0;
1256
1257         CHECK_SLEEPING_LOCKS(gp);
1258         CHECK_ACTIVE_LOCKS(gp);
1259
1260         ASSERT(MUTEX_HELD(&gp->gp_mutex));
1261
1262         flk_set_state(request, FLK_START_STATE);
1263
1264         ASSERT(NOT_BLOCKED(request));
1265
1266         /* IO_LOCK requests are only to check status */
1267
1268         if (IS_IO_LOCK(request))
1269                 return (0);
1270
1271         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1272
1273         if (lock == NULL && request->l_type == F_UNLCK)
1274                 return (0);
1275         if (lock == NULL) {
1276                 flk_insert_active_lock(request);
1277                 return (0);
1278         }
1279
1280         do {
1281                 lock1 = lock->l_next;
1282                 if (SAME_OWNER(request, lock)) {
1283                         done_searching = flk_relation(lock, request);
1284                 }
1285                 lock = lock1;
1286         } while (lock->l_vnode == vp && !done_searching);
1287
1288         /*
1289          * insert in active queue
1290          */
1291
1292         if (request->l_type != F_UNLCK)
1293                 flk_insert_active_lock(request);
1294
1295         return (0);
1296 }
1297
1298 /*
1299  * 'request' is blocked by some one therefore we put it into sleep queue.
1300  */
1301 static int
1302 flk_wait_execute_request(lock_descriptor_t *request)
1303 {
1304         graph_t *gp = request->l_graph;
1305         callb_cpr_t     *cprp;          /* CPR info from callback */
1306         struct flock_globals *fg;
1307         int index;
1308
1309         ASSERT(MUTEX_HELD(&gp->gp_mutex));
1310         ASSERT(IS_WILLING_TO_SLEEP(request));
1311
1312         flk_insert_sleeping_lock(request);
1313
1314         if (IS_LOCKMGR(request)) {
1315                 index = HASH_INDEX(request->l_vnode);
1316                 fg = flk_get_globals();
1317
1318                 if (nlm_status_size == 0) {     /* not booted as a cluster */
1319                         if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP) {
1320                                 flk_cancel_sleeping_lock(request, 1);
1321                                 return (ENOLCK);
1322                         }
1323                 } else {                        /* booted as a cluster */
1324                         /*
1325                          * If the request is an NLM server lock request,
1326                          * and the NLM state of the lock request is not
1327                          * NLM_UP (because the NLM server is shutting
1328                          * down), then cancel the sleeping lock and
1329                          * return error ENOLCK that will encourage the
1330                          * client to retransmit.
1331                          */
1332                         if (!IS_NLM_UP(request)) {
1333                                 flk_cancel_sleeping_lock(request, 1);
1334                                 return (ENOLCK);
1335                         }
1336                 }
1337         }
1338
1339         if (request->l_callbacks != NULL) {
1340                 /*
1341                  * To make sure the shutdown code works correctly, either
1342                  * the callback must happen after putting the lock on the
1343                  * sleep list, or we must check the shutdown status after
1344                  * returning from the callback (and before sleeping).  At
1345                  * least for now, we'll use the first option.  If a
1346                  * shutdown or signal or whatever happened while the graph
1347                  * mutex was dropped, that will be detected by
1348                  * wait_for_lock().
1349                  */
1350                 mutex_exit(&gp->gp_mutex);
1351
1352                 cprp = flk_invoke_callbacks(request->l_callbacks,
1353                     FLK_BEFORE_SLEEP);
1354
1355                 mutex_enter(&gp->gp_mutex);
1356
1357                 if (cprp == NULL) {
1358                         wait_for_lock(request);
1359                 } else {
1360                         mutex_enter(cprp->cc_lockp);
1361                         CALLB_CPR_SAFE_BEGIN(cprp);
1362                         mutex_exit(cprp->cc_lockp);
1363                         wait_for_lock(request);
1364                         mutex_enter(cprp->cc_lockp);
1365                         CALLB_CPR_SAFE_END(cprp, cprp->cc_lockp);
1366                         mutex_exit(cprp->cc_lockp);
1367                 }
1368
1369                 mutex_exit(&gp->gp_mutex);
1370                 (void) flk_invoke_callbacks(request->l_callbacks,
1371                     FLK_AFTER_SLEEP);
1372                 mutex_enter(&gp->gp_mutex);
1373         } else {
1374                 wait_for_lock(request);
1375         }
1376
1377         if (IS_LOCKMGR(request)) {
1378                 /*
1379                  * If the lock manager is shutting down, return an
1380                  * error that will encourage the client to retransmit.
1381                  */
1382                 if (fg->lockmgr_status[index] != FLK_LOCKMGR_UP &&
1383                     !IS_GRANTED(request)) {
1384                         flk_cancel_sleeping_lock(request, 1);
1385                         return (ENOLCK);
1386                 }
1387         }
1388
1389         if (IS_INTERRUPTED(request)) {
1390                 /* we got a signal, or act like we did */
1391                 flk_cancel_sleeping_lock(request, 1);
1392                 return (EINTR);
1393         }
1394
1395         /* Cancelled if some other thread has closed the file */
1396
1397         if (IS_CANCELLED(request)) {
1398                 flk_cancel_sleeping_lock(request, 1);
1399                 return (EBADF);
1400         }
1401
1402         request->l_state &= ~GRANTED_LOCK;
1403         REMOVE_SLEEP_QUEUE(request);
1404         return (flk_execute_request(request));
1405 }
1406
1407 /*
1408  * This routine adds an edge between from and to because from depends
1409  * to. If asked to check for deadlock it checks whether there are any
1410  * reachable locks from "from_lock" that is owned by the same process
1411  * as "from_lock".
1412  * NOTE: It is the caller's responsibility to make sure that the color
1413  * of the graph is consistent between the calls to flk_add_edge as done
1414  * in flk_process_request. This routine does not color and check for
1415  * deadlock explicitly.
1416  */
1417
1418 static int
1419 flk_add_edge(lock_descriptor_t *from_lock, lock_descriptor_t *to_lock,
1420     int check_cycle, int update_graph)
1421 {
1422         edge_t  *edge;
1423         edge_t  *ep;
1424         lock_descriptor_t       *vertex;
1425         lock_descriptor_t *vertex_stack;
1426
1427         STACK_INIT(vertex_stack);
1428
1429         /*
1430          * if to vertex already has mark_color just return
1431          * don't add an edge as it is reachable from from vertex
1432          * before itself.
1433          */
1434
1435         if (COLORED(to_lock))
1436                 return (0);
1437
1438         edge = flk_get_edge();
1439
1440         /*
1441          * set the from and to vertex
1442          */
1443
1444         edge->from_vertex = from_lock;
1445         edge->to_vertex = to_lock;
1446
1447         /*
1448          * put in adjacency list of from vertex
1449          */
1450
1451         from_lock->l_edge.edge_adj_next->edge_adj_prev = edge;
1452         edge->edge_adj_next = from_lock->l_edge.edge_adj_next;
1453         edge->edge_adj_prev = &from_lock->l_edge;
1454         from_lock->l_edge.edge_adj_next = edge;
1455
1456         /*
1457          * put in list of to vertex
1458          */
1459
1460         to_lock->l_edge.edge_in_next->edge_in_prev = edge;
1461         edge->edge_in_next = to_lock->l_edge.edge_in_next;
1462         to_lock->l_edge.edge_in_next = edge;
1463         edge->edge_in_prev = &to_lock->l_edge;
1464
1465
1466         if (update_graph) {
1467                 flk_update_proc_graph(edge, 0);
1468                 return (0);
1469         }
1470         if (!check_cycle) {
1471                 return (0);
1472         }
1473
1474         STACK_PUSH(vertex_stack, from_lock, l_stack);
1475
1476         while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1477
1478                 STACK_POP(vertex_stack, l_stack);
1479
1480                 for (ep = FIRST_ADJ(vertex);
1481                     ep != HEAD(vertex);
1482                     ep = NEXT_ADJ(ep)) {
1483                         if (COLORED(ep->to_vertex))
1484                                 continue;
1485                         COLOR(ep->to_vertex);
1486                         if (SAME_OWNER(ep->to_vertex, from_lock))
1487                                 goto dead_lock;
1488                         STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1489                 }
1490         }
1491         return (0);
1492
1493 dead_lock:
1494
1495         /*
1496          * remove all edges
1497          */
1498
1499         ep = FIRST_ADJ(from_lock);
1500
1501         while (ep != HEAD(from_lock)) {
1502                 IN_LIST_REMOVE(ep);
1503                 from_lock->l_sedge = NEXT_ADJ(ep);
1504                 ADJ_LIST_REMOVE(ep);
1505                 flk_free_edge(ep);
1506                 ep = from_lock->l_sedge;
1507         }
1508         return (EDEADLK);
1509 }
1510
1511 /*
1512  * Get an edge structure for representing the dependency between two locks.
1513  */
1514
1515 static edge_t *
1516 flk_get_edge()
1517 {
1518         edge_t  *ep;
1519
1520         ASSERT(flk_edge_cache != NULL);
1521
1522         ep = kmem_cache_alloc(flk_edge_cache, KM_SLEEP);
1523         edge_allocs++;
1524         return (ep);
1525 }
1526
1527 /*
1528  * Free the edge structure.
1529  */
1530
1531 static void
1532 flk_free_edge(edge_t *ep)
1533 {
1534         edge_frees++;
1535         kmem_cache_free(flk_edge_cache, (void *)ep);
1536 }
1537
1538 /*
1539  * Check the relationship of request with lock and perform the
1540  * recomputation of dependencies, break lock if required, and return
1541  * 1 if request cannot have any more relationship with the next
1542  * active locks.
1543  * The 'lock' and 'request' are compared and in case of overlap we
1544  * delete the 'lock' and form new locks to represent the non-overlapped
1545  * portion of original 'lock'. This function has side effects such as
1546  * 'lock' will be freed, new locks will be added to the active list.
1547  */
1548
1549 static int
1550 flk_relation(lock_descriptor_t *lock, lock_descriptor_t *request)
1551 {
1552         int lock_effect;
1553         lock_descriptor_t *lock1, *lock2;
1554         lock_descriptor_t *topology[3];
1555         int nvertex = 0;
1556         int i;
1557         edge_t  *ep;
1558         graph_t *gp = (lock->l_graph);
1559
1560
1561         CHECK_SLEEPING_LOCKS(gp);
1562         CHECK_ACTIVE_LOCKS(gp);
1563
1564         ASSERT(MUTEX_HELD(&gp->gp_mutex));
1565
1566         topology[0] = topology[1] = topology[2] = NULL;
1567
1568         if (request->l_type == F_UNLCK)
1569                 lock_effect = FLK_UNLOCK;
1570         else if (request->l_type == F_RDLCK &&
1571             lock->l_type == F_WRLCK)
1572                 lock_effect = FLK_DOWNGRADE;
1573         else if (request->l_type == F_WRLCK &&
1574             lock->l_type == F_RDLCK)
1575                 lock_effect = FLK_UPGRADE;
1576         else
1577                 lock_effect = FLK_STAY_SAME;
1578
1579         if (lock->l_end < request->l_start) {
1580                 if (lock->l_end == request->l_start - 1 &&
1581                     lock_effect == FLK_STAY_SAME) {
1582                         topology[0] = request;
1583                         request->l_start = lock->l_start;
1584                         nvertex = 1;
1585                         goto recompute;
1586                 } else {
1587                         return (0);
1588                 }
1589         }
1590
1591         if (lock->l_start > request->l_end) {
1592                 if (request->l_end == lock->l_start - 1 &&
1593                     lock_effect == FLK_STAY_SAME) {
1594                         topology[0] = request;
1595                         request->l_end = lock->l_end;
1596                         nvertex = 1;
1597                         goto recompute;
1598                 } else {
1599                         return (1);
1600                 }
1601         }
1602
1603         if (request->l_end < lock->l_end) {
1604                 if (request->l_start > lock->l_start) {
1605                         if (lock_effect == FLK_STAY_SAME) {
1606                                 request->l_start = lock->l_start;
1607                                 request->l_end = lock->l_end;
1608                                 topology[0] = request;
1609                                 nvertex = 1;
1610                         } else {
1611                                 lock1 = flk_get_lock();
1612                                 lock2 = flk_get_lock();
1613                                 COPY(lock1, lock);
1614                                 COPY(lock2, lock);
1615                                 lock1->l_start = lock->l_start;
1616                                 lock1->l_end = request->l_start - 1;
1617                                 lock2->l_start = request->l_end + 1;
1618                                 lock2->l_end = lock->l_end;
1619                                 topology[0] = lock1;
1620                                 topology[1] = lock2;
1621                                 topology[2] = request;
1622                                 nvertex = 3;
1623                         }
1624                 } else if (request->l_start < lock->l_start) {
1625                         if (lock_effect == FLK_STAY_SAME) {
1626                                 request->l_end = lock->l_end;
1627                                 topology[0] = request;
1628                                 nvertex = 1;
1629                         } else {
1630                                 lock1 = flk_get_lock();
1631                                 COPY(lock1, lock);
1632                                 lock1->l_start = request->l_end + 1;
1633                                 topology[0] = lock1;
1634                                 topology[1] = request;
1635                                 nvertex = 2;
1636                         }
1637                 } else  {
1638                         if (lock_effect == FLK_STAY_SAME) {
1639                                 request->l_start = lock->l_start;
1640                                 request->l_end = lock->l_end;
1641                                 topology[0] = request;
1642                                 nvertex = 1;
1643                         } else {
1644                                 lock1 = flk_get_lock();
1645                                 COPY(lock1, lock);
1646                                 lock1->l_start = request->l_end + 1;
1647                                 topology[0] = lock1;
1648                                 topology[1] = request;
1649                                 nvertex = 2;
1650                         }
1651                 }
1652         } else if (request->l_end > lock->l_end) {
1653                 if (request->l_start > lock->l_start)  {
1654                         if (lock_effect == FLK_STAY_SAME) {
1655                                 request->l_start = lock->l_start;
1656                                 topology[0] = request;
1657                                 nvertex = 1;
1658                         } else {
1659                                 lock1 = flk_get_lock();
1660                                 COPY(lock1, lock);
1661                                 lock1->l_end = request->l_start - 1;
1662                                 topology[0] = lock1;
1663                                 topology[1] = request;
1664                                 nvertex = 2;
1665                         }
1666                 } else if (request->l_start < lock->l_start)  {
1667                         topology[0] = request;
1668                         nvertex = 1;
1669                 } else {
1670                         topology[0] = request;
1671                         nvertex = 1;
1672                 }
1673         } else {
1674                 if (request->l_start > lock->l_start) {
1675                         if (lock_effect == FLK_STAY_SAME) {
1676                                 request->l_start = lock->l_start;
1677                                 topology[0] = request;
1678                                 nvertex = 1;
1679                         } else {
1680                                 lock1 = flk_get_lock();
1681                                 COPY(lock1, lock);
1682                                 lock1->l_end = request->l_start - 1;
1683                                 topology[0] = lock1;
1684                                 topology[1] = request;
1685                                 nvertex = 2;
1686                         }
1687                 } else if (request->l_start < lock->l_start) {
1688                         topology[0] = request;
1689                         nvertex = 1;
1690                 } else {
1691                         if (lock_effect !=  FLK_UNLOCK) {
1692                                 topology[0] = request;
1693                                 nvertex = 1;
1694                         } else {
1695                                 flk_delete_active_lock(lock, 0);
1696                                 flk_wakeup(lock, 1);
1697                                 flk_free_lock(lock);
1698                                 CHECK_SLEEPING_LOCKS(gp);
1699                                 CHECK_ACTIVE_LOCKS(gp);
1700                                 return (1);
1701                         }
1702                 }
1703         }
1704
1705 recompute:
1706
1707         /*
1708          * For unlock we don't send the 'request' to for recomputing
1709          * dependencies because no lock will add an edge to this.
1710          */
1711
1712         if (lock_effect == FLK_UNLOCK) {
1713                 topology[nvertex-1] = NULL;
1714                 nvertex--;
1715         }
1716         for (i = 0; i < nvertex; i++) {
1717                 topology[i]->l_state |= RECOMPUTE_LOCK;
1718                 topology[i]->l_color = NO_COLOR;
1719         }
1720
1721         ASSERT(FIRST_ADJ(lock) == HEAD(lock));
1722
1723         /*
1724          * we remove the adjacent edges for all vertices' to this vertex
1725          * 'lock'.
1726          */
1727
1728         ep = FIRST_IN(lock);
1729         while (ep != HEAD(lock)) {
1730                 ADJ_LIST_REMOVE(ep);
1731                 ep = NEXT_IN(ep);
1732         }
1733
1734         flk_delete_active_lock(lock, 0);
1735
1736         /* We are ready for recomputing the dependencies now */
1737
1738         flk_recompute_dependencies(lock, topology, nvertex, 1);
1739
1740         for (i = 0; i < nvertex; i++) {
1741                 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1742                 topology[i]->l_color = NO_COLOR;
1743         }
1744
1745
1746         if (lock_effect == FLK_UNLOCK) {
1747                 nvertex++;
1748         }
1749         for (i = 0; i < nvertex - 1; i++) {
1750                 flk_insert_active_lock(topology[i]);
1751         }
1752
1753
1754         if (lock_effect == FLK_DOWNGRADE || lock_effect == FLK_UNLOCK) {
1755                 flk_wakeup(lock, 0);
1756         } else {
1757                 ep = FIRST_IN(lock);
1758                 while (ep != HEAD(lock)) {
1759                         lock->l_sedge = NEXT_IN(ep);
1760                         IN_LIST_REMOVE(ep);
1761                         flk_update_proc_graph(ep, 1);
1762                         flk_free_edge(ep);
1763                         ep = lock->l_sedge;
1764                 }
1765         }
1766         flk_free_lock(lock);
1767
1768         CHECK_SLEEPING_LOCKS(gp);
1769         CHECK_ACTIVE_LOCKS(gp);
1770         return (0);
1771 }
1772
1773 /*
1774  * Insert a lock into the active queue.
1775  */
1776
1777 static void
1778 flk_insert_active_lock(lock_descriptor_t *new_lock)
1779 {
1780         graph_t *gp = new_lock->l_graph;
1781         vnode_t *vp = new_lock->l_vnode;
1782         lock_descriptor_t *first_lock, *lock;
1783
1784         ASSERT(MUTEX_HELD(&gp->gp_mutex));
1785
1786         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1787         first_lock = lock;
1788
1789         if (first_lock != NULL) {
1790                 for (; (lock->l_vnode == vp &&
1791                     lock->l_start < new_lock->l_start); lock = lock->l_next)
1792                         ;
1793         } else {
1794                 lock = ACTIVE_HEAD(gp);
1795         }
1796
1797         lock->l_prev->l_next = new_lock;
1798         new_lock->l_next = lock;
1799         new_lock->l_prev = lock->l_prev;
1800         lock->l_prev = new_lock;
1801
1802         if (first_lock == NULL || (new_lock->l_start <= first_lock->l_start)) {
1803                 vp->v_filocks = (struct filock *)new_lock;
1804         }
1805         flk_set_state(new_lock, FLK_ACTIVE_STATE);
1806         new_lock->l_state |= ACTIVE_LOCK;
1807
1808         CHECK_ACTIVE_LOCKS(gp);
1809         CHECK_SLEEPING_LOCKS(gp);
1810 }
1811
1812 /*
1813  * Delete the active lock : Performs two functions depending on the
1814  * value of second parameter. One is to remove from the active lists
1815  * only and other is to both remove and free the lock.
1816  */
1817
1818 static void
1819 flk_delete_active_lock(lock_descriptor_t *lock, int free_lock)
1820 {
1821         vnode_t *vp = lock->l_vnode;
1822         graph_t *gp = lock->l_graph;
1823
1824         ASSERT(MUTEX_HELD(&gp->gp_mutex));
1825         if (free_lock)
1826                 ASSERT(NO_DEPENDENTS(lock));
1827         ASSERT(NOT_BLOCKED(lock));
1828         ASSERT(IS_ACTIVE(lock));
1829
1830         ASSERT((vp->v_filocks != NULL));
1831
1832         if (vp->v_filocks == (struct filock *)lock) {
1833                 vp->v_filocks = (struct filock *)
1834                     ((lock->l_next->l_vnode == vp) ? lock->l_next :
1835                     NULL);
1836         }
1837         lock->l_next->l_prev = lock->l_prev;
1838         lock->l_prev->l_next = lock->l_next;
1839         lock->l_next = lock->l_prev = NULL;
1840         flk_set_state(lock, FLK_DEAD_STATE);
1841         lock->l_state &= ~ACTIVE_LOCK;
1842
1843         if (free_lock)
1844                 flk_free_lock(lock);
1845         CHECK_ACTIVE_LOCKS(gp);
1846         CHECK_SLEEPING_LOCKS(gp);
1847 }
1848
1849 /*
1850  * Insert into the sleep queue.
1851  */
1852
1853 static void
1854 flk_insert_sleeping_lock(lock_descriptor_t *request)
1855 {
1856         graph_t *gp = request->l_graph;
1857         vnode_t *vp = request->l_vnode;
1858         lock_descriptor_t       *lock;
1859
1860         ASSERT(MUTEX_HELD(&gp->gp_mutex));
1861         ASSERT(IS_INITIAL(request));
1862
1863         for (lock = gp->sleeping_locks.l_next; (lock != &gp->sleeping_locks &&
1864             lock->l_vnode < vp); lock = lock->l_next)
1865                 ;
1866
1867         lock->l_prev->l_next = request;
1868         request->l_prev = lock->l_prev;
1869         lock->l_prev = request;
1870         request->l_next = lock;
1871         flk_set_state(request, FLK_SLEEPING_STATE);
1872         request->l_state |= SLEEPING_LOCK;
1873 }
1874
1875 /*
1876  * Cancelling a sleeping lock implies removing a vertex from the
1877  * dependency graph and therefore we should recompute the dependencies
1878  * of all vertices that have a path  to this vertex, w.r.t. all
1879  * vertices reachable from this vertex.
1880  */
1881
1882 void
1883 flk_cancel_sleeping_lock(lock_descriptor_t *request, int remove_from_queue)
1884 {
1885         graph_t *gp = request->l_graph;
1886         vnode_t *vp = request->l_vnode;
1887         lock_descriptor_t **topology = NULL;
1888         edge_t  *ep;
1889         lock_descriptor_t *vertex, *lock;
1890         int nvertex = 0;
1891         int i;
1892         lock_descriptor_t *vertex_stack;
1893
1894         STACK_INIT(vertex_stack);
1895
1896         ASSERT(MUTEX_HELD(&gp->gp_mutex));
1897         /*
1898          * count number of vertex pointers that has to be allocated
1899          * All vertices that are reachable from request.
1900          */
1901
1902         STACK_PUSH(vertex_stack, request, l_stack);
1903
1904         while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
1905                 STACK_POP(vertex_stack, l_stack);
1906                 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
1907                     ep = NEXT_ADJ(ep)) {
1908                         if (IS_RECOMPUTE(ep->to_vertex))
1909                                 continue;
1910                         ep->to_vertex->l_state |= RECOMPUTE_LOCK;
1911                         STACK_PUSH(vertex_stack, ep->to_vertex, l_stack);
1912                         nvertex++;
1913                 }
1914         }
1915
1916         /*
1917          * allocate memory for holding the vertex pointers
1918          */
1919
1920         if (nvertex) {
1921                 topology = kmem_zalloc(nvertex * sizeof (lock_descriptor_t *),
1922                     KM_SLEEP);
1923         }
1924
1925         /*
1926          * one more pass to actually store the vertices in the
1927          * allocated array.
1928          * We first check sleeping locks and then active locks
1929          * so that topology array will be in a topological
1930          * order.
1931          */
1932
1933         nvertex = 0;
1934         SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
1935
1936         if (lock) {
1937                 do {
1938                         if (IS_RECOMPUTE(lock)) {
1939                                 lock->l_index = nvertex;
1940                                 topology[nvertex++] = lock;
1941                         }
1942                         lock->l_color = NO_COLOR;
1943                         lock = lock->l_next;
1944                 } while (lock->l_vnode == vp);
1945         }
1946
1947         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
1948
1949         if (lock) {
1950                 do {
1951                         if (IS_RECOMPUTE(lock)) {
1952                                 lock->l_index = nvertex;
1953                                 topology[nvertex++] = lock;
1954                         }
1955                         lock->l_color = NO_COLOR;
1956                         lock = lock->l_next;
1957                 } while (lock->l_vnode == vp);
1958         }
1959
1960         /*
1961          * remove in and out edges of request
1962          * They are freed after updating proc_graph below.
1963          */
1964
1965         for (ep = FIRST_IN(request); ep != HEAD(request); ep = NEXT_IN(ep)) {
1966                 ADJ_LIST_REMOVE(ep);
1967         }
1968
1969
1970         if (remove_from_queue)
1971                 REMOVE_SLEEP_QUEUE(request);
1972
1973         /* we are ready to recompute */
1974
1975         flk_recompute_dependencies(request, topology, nvertex, 1);
1976
1977         ep = FIRST_ADJ(request);
1978         while (ep != HEAD(request)) {
1979                 IN_LIST_REMOVE(ep);
1980                 request->l_sedge = NEXT_ADJ(ep);
1981                 ADJ_LIST_REMOVE(ep);
1982                 flk_update_proc_graph(ep, 1);
1983                 flk_free_edge(ep);
1984                 ep = request->l_sedge;
1985         }
1986
1987
1988         /*
1989          * unset the RECOMPUTE flag in those vertices
1990          */
1991
1992         for (i = 0; i < nvertex; i++) {
1993                 topology[i]->l_state &= ~RECOMPUTE_LOCK;
1994         }
1995
1996         /*
1997          * free the topology
1998          */
1999         if (nvertex)
2000                 kmem_free((void *)topology,
2001                     (nvertex * sizeof (lock_descriptor_t *)));
2002         /*
2003          * Possibility of some locks unblocked now
2004          */
2005
2006         flk_wakeup(request, 0);
2007
2008         /*
2009          * we expect to have a correctly recomputed graph  now.
2010          */
2011         flk_set_state(request, FLK_DEAD_STATE);
2012         flk_free_lock(request);
2013         CHECK_SLEEPING_LOCKS(gp);
2014         CHECK_ACTIVE_LOCKS(gp);
2015
2016 }
2017
2018 /*
2019  * Uncoloring the graph is simply to increment the mark value of the graph
2020  * And only when wrap round takes place will we color all vertices in
2021  * the graph explicitly.
2022  */
2023
2024 static void
2025 flk_graph_uncolor(graph_t *gp)
2026 {
2027         lock_descriptor_t *lock;
2028
2029         if (gp->mark == UINT_MAX) {
2030                 gp->mark = 1;
2031         for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
2032             lock = lock->l_next)
2033                         lock->l_color  = 0;
2034
2035         for (lock = SLEEPING_HEAD(gp)->l_next; lock != SLEEPING_HEAD(gp);
2036             lock = lock->l_next)
2037                         lock->l_color  = 0;
2038         } else {
2039                 gp->mark++;
2040         }
2041 }
2042
2043 /*
2044  * Wake up locks that are blocked on the given lock.
2045  */
2046
2047 static void
2048 flk_wakeup(lock_descriptor_t *lock, int adj_list_remove)
2049 {
2050         edge_t  *ep;
2051         graph_t *gp = lock->l_graph;
2052         lock_descriptor_t       *lck;
2053
2054         ASSERT(MUTEX_HELD(&gp->gp_mutex));
2055         if (NO_DEPENDENTS(lock))
2056                 return;
2057         ep = FIRST_IN(lock);
2058         do {
2059                 /*
2060                  * delete the edge from the adjacency list
2061                  * of from vertex. if no more adjacent edges
2062                  * for this vertex wake this process.
2063                  */
2064                 lck = ep->from_vertex;
2065                 if (adj_list_remove)
2066                         ADJ_LIST_REMOVE(ep);
2067                 flk_update_proc_graph(ep, 1);
2068                 if (NOT_BLOCKED(lck)) {
2069                         GRANT_WAKEUP(lck);
2070                 }
2071                 lock->l_sedge = NEXT_IN(ep);
2072                 IN_LIST_REMOVE(ep);
2073                 flk_free_edge(ep);
2074                 ep = lock->l_sedge;
2075         } while (ep != HEAD(lock));
2076         ASSERT(NO_DEPENDENTS(lock));
2077 }
2078
2079 /*
2080  * The dependents of request, is checked for its dependency against the
2081  * locks in topology (called topology because the array is and should be in
2082  * topological order for this algorithm, if not in topological order the
2083  * inner loop below might add more edges than necessary. Topological ordering
2084  * of vertices satisfies the property that all edges will be from left to
2085  * right i.e., topology[i] can have an edge to  topology[j], iff i<j)
2086  * If lock l1 in the dependent set of request is dependent (blocked by)
2087  * on lock l2 in topology but does not have a path to it, we add an edge
2088  * in the inner loop below.
2089  *
2090  * We don't want to add an edge between l1 and l2 if there exists
2091  * already a path from l1 to l2, so care has to be taken for those vertices
2092  * that  have two paths to 'request'. These vertices are referred to here
2093  * as barrier locks.
2094  *
2095  * The barriers has to be found (those vertex that originally had two paths
2096  * to request) because otherwise we may end up adding edges unnecessarily
2097  * to vertices in topology, and thus barrier vertices can have an edge
2098  * to a vertex in topology as well a path to it.
2099  */
2100
2101 static void
2102 flk_recompute_dependencies(lock_descriptor_t *request,
2103     lock_descriptor_t **topology, int nvertex, int update_graph)
2104 {
2105         lock_descriptor_t *vertex, *lock;
2106         graph_t *gp = request->l_graph;
2107         int i, count;
2108         int barrier_found = 0;
2109         edge_t  *ep;
2110         lock_descriptor_t *vertex_stack;
2111
2112         STACK_INIT(vertex_stack);
2113
2114         ASSERT(MUTEX_HELD(&gp->gp_mutex));
2115         if (nvertex == 0)
2116                 return;
2117         flk_graph_uncolor(request->l_graph);
2118         barrier_found = flk_find_barriers(request);
2119         request->l_state |= RECOMPUTE_DONE;
2120
2121         STACK_PUSH(vertex_stack, request, l_stack);
2122         request->l_sedge = FIRST_IN(request);
2123
2124
2125         while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2126                 if (vertex->l_state & RECOMPUTE_DONE) {
2127                         count = 0;
2128                         goto next_in_edge;
2129                 }
2130                 if (IS_BARRIER(vertex)) {
2131                         /* decrement the barrier count */
2132                         if (vertex->l_index) {
2133                                 vertex->l_index--;
2134                                 /* this guy will be pushed again anyway ? */
2135                                 STACK_POP(vertex_stack, l_stack);
2136                                 if (vertex->l_index == 0)  {
2137                                 /*
2138                                  * barrier is over we can recompute
2139                                  * dependencies for this lock in the
2140                                  * next stack pop
2141                                  */
2142                                         vertex->l_state &= ~BARRIER_LOCK;
2143                                 }
2144                                 continue;
2145                         }
2146                 }
2147                 vertex->l_state |= RECOMPUTE_DONE;
2148                 flk_graph_uncolor(gp);
2149                 count = flk_color_reachables(vertex);
2150                 for (i = 0; i < nvertex; i++) {
2151                         lock = topology[i];
2152                         if (COLORED(lock))
2153                                 continue;
2154                         if (BLOCKS(lock, vertex)) {
2155                                 (void) flk_add_edge(vertex, lock,
2156                                     NO_CHECK_CYCLE, update_graph);
2157                                 COLOR(lock);
2158                                 count++;
2159                                 count += flk_color_reachables(lock);
2160                         }
2161
2162                 }
2163
2164 next_in_edge:
2165                 if (count == nvertex ||
2166                     vertex->l_sedge == HEAD(vertex)) {
2167                         /* prune the tree below this */
2168                         STACK_POP(vertex_stack, l_stack);
2169                         vertex->l_state &= ~RECOMPUTE_DONE;
2170                         /* update the barrier locks below this! */
2171                         if (vertex->l_sedge != HEAD(vertex) && barrier_found) {
2172                                 flk_graph_uncolor(gp);
2173                                 flk_update_barriers(vertex);
2174                         }
2175                         continue;
2176                 }
2177
2178                 ep = vertex->l_sedge;
2179                 lock = ep->from_vertex;
2180                 STACK_PUSH(vertex_stack, lock, l_stack);
2181                 lock->l_sedge = FIRST_IN(lock);
2182                 vertex->l_sedge = NEXT_IN(ep);
2183         }
2184
2185 }
2186
2187 /*
2188  * Color all reachable vertices from vertex that belongs to topology (here
2189  * those that have RECOMPUTE_LOCK set in their state) and yet uncolored.
2190  *
2191  * Note: we need to use a different stack_link l_stack1 because this is
2192  * called from flk_recompute_dependencies() that already uses a stack with
2193  * l_stack as stack_link.
2194  */
2195
2196 static int
2197 flk_color_reachables(lock_descriptor_t *vertex)
2198 {
2199         lock_descriptor_t *ver, *lock;
2200         int count;
2201         edge_t  *ep;
2202         lock_descriptor_t *vertex_stack;
2203
2204         STACK_INIT(vertex_stack);
2205
2206         STACK_PUSH(vertex_stack, vertex, l_stack1);
2207         count = 0;
2208         while ((ver = STACK_TOP(vertex_stack)) != NULL) {
2209
2210                 STACK_POP(vertex_stack, l_stack1);
2211                 for (ep = FIRST_ADJ(ver); ep != HEAD(ver);
2212                     ep = NEXT_ADJ(ep)) {
2213                         lock = ep->to_vertex;
2214                         if (COLORED(lock))
2215                                 continue;
2216                         COLOR(lock);
2217                         if (IS_RECOMPUTE(lock))
2218                                 count++;
2219                         STACK_PUSH(vertex_stack, lock, l_stack1);
2220                 }
2221
2222         }
2223         return (count);
2224 }
2225
2226 /*
2227  * Called from flk_recompute_dependencies() this routine decrements
2228  * the barrier count of barrier vertices that are reachable from lock.
2229  */
2230
2231 static void
2232 flk_update_barriers(lock_descriptor_t *lock)
2233 {
2234         lock_descriptor_t *vertex, *lck;
2235         edge_t  *ep;
2236         lock_descriptor_t *vertex_stack;
2237
2238         STACK_INIT(vertex_stack);
2239
2240         STACK_PUSH(vertex_stack, lock, l_stack1);
2241
2242         while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2243                 STACK_POP(vertex_stack, l_stack1);
2244                 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2245                     ep = NEXT_IN(ep)) {
2246                         lck = ep->from_vertex;
2247                         if (COLORED(lck)) {
2248                                 if (IS_BARRIER(lck)) {
2249                                         ASSERT(lck->l_index > 0);
2250                                         lck->l_index--;
2251                                         if (lck->l_index == 0)
2252                                                 lck->l_state &= ~BARRIER_LOCK;
2253                                 }
2254                                 continue;
2255                         }
2256                         COLOR(lck);
2257                         if (IS_BARRIER(lck)) {
2258                                 ASSERT(lck->l_index > 0);
2259                                 lck->l_index--;
2260                                 if (lck->l_index == 0)
2261                                         lck->l_state &= ~BARRIER_LOCK;
2262                         }
2263                         STACK_PUSH(vertex_stack, lck, l_stack1);
2264                 }
2265         }
2266 }
2267
2268 /*
2269  * Finds all vertices that are reachable from 'lock' more than once and
2270  * mark them as barrier vertices and increment their barrier count.
2271  * The barrier count is one minus the total number of paths from lock
2272  * to that vertex.
2273  */
2274
2275 static int
2276 flk_find_barriers(lock_descriptor_t *lock)
2277 {
2278         lock_descriptor_t *vertex, *lck;
2279         int found = 0;
2280         edge_t  *ep;
2281         lock_descriptor_t *vertex_stack;
2282
2283         STACK_INIT(vertex_stack);
2284
2285         STACK_PUSH(vertex_stack, lock, l_stack1);
2286
2287         while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
2288                 STACK_POP(vertex_stack, l_stack1);
2289                 for (ep = FIRST_IN(vertex); ep != HEAD(vertex);
2290                     ep = NEXT_IN(ep)) {
2291                         lck = ep->from_vertex;
2292                         if (COLORED(lck)) {
2293                                 /* this is a barrier */
2294                                 lck->l_state |= BARRIER_LOCK;
2295                                 /* index will have barrier count */
2296                                 lck->l_index++;
2297                                 if (!found)
2298                                         found = 1;
2299                                 continue;
2300                         }
2301                         COLOR(lck);
2302                         lck->l_index = 0;
2303                         STACK_PUSH(vertex_stack, lck, l_stack1);
2304                 }
2305         }
2306         return (found);
2307 }
2308
2309 /*
2310  * Finds the first lock that is mainly responsible for blocking this
2311  * request.  If there is no such lock, request->l_flock.l_type is set to
2312  * F_UNLCK.  Otherwise, request->l_flock is filled in with the particulars
2313  * of the blocking lock.
2314  *
2315  * Note: It is possible a request is blocked by a sleeping lock because
2316  * of the fairness policy used in flk_process_request() to construct the
2317  * dependencies. (see comments before flk_process_request()).
2318  */
2319
2320 static void
2321 flk_get_first_blocking_lock(lock_descriptor_t *request)
2322 {
2323         graph_t *gp = request->l_graph;
2324         vnode_t *vp = request->l_vnode;
2325         lock_descriptor_t *lock, *blocker;
2326
2327         ASSERT(MUTEX_HELD(&gp->gp_mutex));
2328         blocker = NULL;
2329         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2330
2331         if (lock) {
2332                 do {
2333                         if (BLOCKS(lock, request)) {
2334                                 blocker = lock;
2335                                 break;
2336                         }
2337                         lock = lock->l_next;
2338                 } while (lock->l_vnode == vp);
2339         }
2340
2341         if (blocker == NULL && request->l_flock.l_type == F_RDLCK) {
2342                 /*
2343                  * No active lock is blocking this request, but if a read
2344                  * lock is requested, it may also get blocked by a waiting
2345                  * writer. So search all sleeping locks and see if there is
2346                  * a writer waiting.
2347                  */
2348                 SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2349                 if (lock) {
2350                         do {
2351                                 if (BLOCKS(lock, request)) {
2352                                         blocker = lock;
2353                                         break;
2354                                 }
2355                                 lock = lock->l_next;
2356                         } while (lock->l_vnode == vp);
2357                 }
2358         }
2359
2360         if (blocker) {
2361                 report_blocker(blocker, request);
2362         } else
2363                 request->l_flock.l_type = F_UNLCK;
2364 }
2365
2366 /*
2367  * Get the graph_t structure associated with a vnode.
2368  * If 'initialize' is non-zero, and the graph_t structure for this vnode has
2369  * not yet been initialized, then a new element is allocated and returned.
2370  */
2371 graph_t *
2372 flk_get_lock_graph(vnode_t *vp, int initialize)
2373 {
2374         graph_t *gp;
2375         graph_t *gp_alloc = NULL;
2376         int index = HASH_INDEX(vp);
2377
2378         if (initialize == FLK_USE_GRAPH) {
2379                 mutex_enter(&flock_lock);
2380                 gp = lock_graph[index];
2381                 mutex_exit(&flock_lock);
2382                 return (gp);
2383         }
2384
2385         ASSERT(initialize == FLK_INIT_GRAPH);
2386
2387         if (lock_graph[index] == NULL) {
2388
2389                 gp_alloc = kmem_zalloc(sizeof (graph_t), KM_SLEEP);
2390
2391                 /* Initialize the graph */
2392
2393                 gp_alloc->active_locks.l_next =
2394                     gp_alloc->active_locks.l_prev =
2395                     (lock_descriptor_t *)ACTIVE_HEAD(gp_alloc);
2396                 gp_alloc->sleeping_locks.l_next =
2397                     gp_alloc->sleeping_locks.l_prev =
2398                     (lock_descriptor_t *)SLEEPING_HEAD(gp_alloc);
2399                 gp_alloc->index = index;
2400                 mutex_init(&gp_alloc->gp_mutex, NULL, MUTEX_DEFAULT, NULL);
2401         }
2402
2403         mutex_enter(&flock_lock);
2404
2405         gp = lock_graph[index];
2406
2407         /* Recheck the value within flock_lock */
2408         if (gp == NULL) {
2409                 struct flock_globals *fg;
2410
2411                 /* We must have previously allocated the graph_t structure */
2412                 ASSERT(gp_alloc != NULL);
2413                 lock_graph[index] = gp = gp_alloc;
2414                 /*
2415                  * The lockmgr status is only needed if KLM is loaded.
2416                  */
2417                 if (flock_zone_key != ZONE_KEY_UNINITIALIZED) {
2418                         fg = flk_get_globals();
2419                         fg->lockmgr_status[index] = fg->flk_lockmgr_status;
2420                 }
2421         }
2422
2423         mutex_exit(&flock_lock);
2424
2425         if ((gp_alloc != NULL) && (gp != gp_alloc)) {
2426                 /* There was a race to allocate the graph_t and we lost */
2427                 mutex_destroy(&gp_alloc->gp_mutex);
2428                 kmem_free(gp_alloc, sizeof (graph_t));
2429         }
2430
2431         return (gp);
2432 }
2433
2434 /*
2435  * Determine whether there are any locks for the given vnode with a remote
2436  * sysid.  Returns zero if not, non-zero if there are.
2437  *
2438  * Note that the return value from this function is potentially invalid
2439  * once it has been returned.  The caller is responsible for providing its
2440  * own synchronization mechanism to ensure that the return value is useful
2441  * (e.g., see nfs_lockcompletion()).
2442  */
2443 int
2444 flk_has_remote_locks(vnode_t *vp)
2445 {
2446         lock_descriptor_t *lock;
2447         int result = 0;
2448         graph_t *gp;
2449
2450         gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2451         if (gp == NULL) {
2452                 return (0);
2453         }
2454
2455         mutex_enter(&gp->gp_mutex);
2456
2457         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2458
2459         if (lock) {
2460                 while (lock->l_vnode == vp) {
2461                         if (IS_REMOTE(lock)) {
2462                                 result = 1;
2463                                 goto done;
2464                         }
2465                         lock = lock->l_next;
2466                 }
2467         }
2468
2469         SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2470
2471         if (lock) {
2472                 while (lock->l_vnode == vp) {
2473                         if (IS_REMOTE(lock)) {
2474                                 result = 1;
2475                                 goto done;
2476                         }
2477                         lock = lock->l_next;
2478                 }
2479         }
2480
2481 done:
2482         mutex_exit(&gp->gp_mutex);
2483         return (result);
2484 }
2485
2486 /*
2487  * Determine whether there are any locks for the given vnode with a remote
2488  * sysid matching given sysid.
2489  * Used by the new (open source) NFS Lock Manager (NLM)
2490  */
2491 int
2492 flk_has_remote_locks_for_sysid(vnode_t *vp, int sysid)
2493 {
2494         lock_descriptor_t *lock;
2495         int result = 0;
2496         graph_t *gp;
2497
2498         if (sysid == 0)
2499                 return (0);
2500
2501         gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2502         if (gp == NULL) {
2503                 return (0);
2504         }
2505
2506         mutex_enter(&gp->gp_mutex);
2507
2508         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2509
2510         if (lock) {
2511                 while (lock->l_vnode == vp) {
2512                         if (lock->l_flock.l_sysid == sysid) {
2513                                 result = 1;
2514                                 goto done;
2515                         }
2516                         lock = lock->l_next;
2517                 }
2518         }
2519
2520         SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2521
2522         if (lock) {
2523                 while (lock->l_vnode == vp) {
2524                         if (lock->l_flock.l_sysid == sysid) {
2525                                 result = 1;
2526                                 goto done;
2527                         }
2528                         lock = lock->l_next;
2529                 }
2530         }
2531
2532 done:
2533         mutex_exit(&gp->gp_mutex);
2534         return (result);
2535 }
2536
2537 /*
2538  * Determine if there are any locks owned by the given sysid.
2539  * Returns zero if not, non-zero if there are.  Note that this return code
2540  * could be derived from flk_get_{sleeping,active}_locks, but this routine
2541  * avoids all the memory allocations of those routines.
2542  *
2543  * This routine has the same synchronization issues as
2544  * flk_has_remote_locks.
2545  */
2546
2547 int
2548 flk_sysid_has_locks(int sysid, int lck_type)
2549 {
2550         int             has_locks = 0;
2551         lock_descriptor_t       *lock;
2552         graph_t         *gp;
2553         int             i;
2554
2555         for (i = 0; i < HASH_SIZE && !has_locks; i++) {
2556                 mutex_enter(&flock_lock);
2557                 gp = lock_graph[i];
2558                 mutex_exit(&flock_lock);
2559                 if (gp == NULL) {
2560                         continue;
2561                 }
2562
2563                 mutex_enter(&gp->gp_mutex);
2564
2565                 if (lck_type & FLK_QUERY_ACTIVE) {
2566                         for (lock = ACTIVE_HEAD(gp)->l_next;
2567                             lock != ACTIVE_HEAD(gp) && !has_locks;
2568                             lock = lock->l_next) {
2569                                 if (lock->l_flock.l_sysid == sysid)
2570                                         has_locks = 1;
2571                         }
2572                 }
2573
2574                 if (lck_type & FLK_QUERY_SLEEPING) {
2575                         for (lock = SLEEPING_HEAD(gp)->l_next;
2576                             lock != SLEEPING_HEAD(gp) && !has_locks;
2577                             lock = lock->l_next) {
2578                                 if (lock->l_flock.l_sysid == sysid)
2579                                         has_locks = 1;
2580                         }
2581                 }
2582                 mutex_exit(&gp->gp_mutex);
2583         }
2584
2585         return (has_locks);
2586 }
2587
2588
2589 /*
2590  * Delete all locks in the system that belongs to the sysid of the request.
2591  */
2592
2593 static void
2594 flk_delete_locks_by_sysid(lock_descriptor_t *request)
2595 {
2596         int     sysid  = request->l_flock.l_sysid;
2597         lock_descriptor_t *lock, *nlock;
2598         graph_t *gp;
2599         int i;
2600
2601         ASSERT(MUTEX_HELD(&request->l_graph->gp_mutex));
2602         ASSERT(sysid != 0);
2603
2604         mutex_exit(&request->l_graph->gp_mutex);
2605
2606         for (i = 0; i < HASH_SIZE; i++) {
2607                 mutex_enter(&flock_lock);
2608                 gp = lock_graph[i];
2609                 mutex_exit(&flock_lock);
2610
2611                 if (gp == NULL)
2612                         continue;
2613
2614                 mutex_enter(&gp->gp_mutex);
2615
2616                 /* signal sleeping requests so that they bail out */
2617                 lock = SLEEPING_HEAD(gp)->l_next;
2618                 while (lock != SLEEPING_HEAD(gp)) {
2619                         nlock = lock->l_next;
2620                         if (lock->l_flock.l_sysid == sysid) {
2621                                 INTERRUPT_WAKEUP(lock);
2622                         }
2623                         lock = nlock;
2624                 }
2625
2626                 /* delete active locks */
2627                 lock = ACTIVE_HEAD(gp)->l_next;
2628                 while (lock != ACTIVE_HEAD(gp)) {
2629                         nlock = lock->l_next;
2630                         if (lock->l_flock.l_sysid == sysid) {
2631                                 flk_delete_active_lock(lock, 0);
2632                                 flk_wakeup(lock, 1);
2633                                 flk_free_lock(lock);
2634                         }
2635                         lock = nlock;
2636                 }
2637                 mutex_exit(&gp->gp_mutex);
2638         }
2639
2640         mutex_enter(&request->l_graph->gp_mutex);
2641 }
2642
2643 /*
2644  * Search for a sleeping lock manager lock which matches exactly this lock
2645  * request; if one is found, fake a signal to cancel it.
2646  *
2647  * Return 1 if a matching lock was found, 0 otherwise.
2648  */
2649
2650 static int
2651 flk_canceled(lock_descriptor_t *request)
2652 {
2653         lock_descriptor_t *lock, *nlock;
2654         graph_t *gp = request->l_graph;
2655         vnode_t *vp = request->l_vnode;
2656
2657         ASSERT(MUTEX_HELD(&gp->gp_mutex));
2658         ASSERT(IS_LOCKMGR(request));
2659         SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2660
2661         if (lock) {
2662                 while (lock->l_vnode == vp) {
2663                         nlock = lock->l_next;
2664                         if (SAME_OWNER(lock, request) &&
2665                             lock->l_start == request->l_start &&
2666                             lock->l_end == request->l_end) {
2667                                 INTERRUPT_WAKEUP(lock);
2668                                 return (1);
2669                         }
2670                         lock = nlock;
2671                 }
2672         }
2673         return (0);
2674 }
2675
2676 /*
2677  * Remove all non-OFD locks for the vnode belonging to the given pid and sysid.
2678  * That is, since OFD locks are pid-less we'll never match on the incoming
2679  * pid. OFD locks are removed earlier in the close() path via closef() and
2680  * ofdcleanlock().
2681  */
2682 void
2683 cleanlocks(vnode_t *vp, pid_t pid, int sysid)
2684 {
2685         graph_t *gp;
2686         lock_descriptor_t *lock, *nlock;
2687         lock_descriptor_t *link_stack;
2688
2689         STACK_INIT(link_stack);
2690
2691         gp = flk_get_lock_graph(vp, FLK_USE_GRAPH);
2692
2693         if (gp == NULL)
2694                 return;
2695         mutex_enter(&gp->gp_mutex);
2696
2697         CHECK_SLEEPING_LOCKS(gp);
2698         CHECK_ACTIVE_LOCKS(gp);
2699
2700         SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
2701
2702         if (lock) {
2703                 do {
2704                         nlock = lock->l_next;
2705                         if ((lock->l_flock.l_pid == pid ||
2706                             pid == IGN_PID) &&
2707                             lock->l_flock.l_sysid == sysid) {
2708                                 CANCEL_WAKEUP(lock);
2709                         }
2710                         lock = nlock;
2711                 } while (lock->l_vnode == vp);
2712         }
2713
2714         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
2715
2716         if (lock) {
2717                 do {
2718                         nlock = lock->l_next;
2719                         if ((lock->l_flock.l_pid == pid ||
2720                             pid == IGN_PID) &&
2721                             lock->l_flock.l_sysid == sysid) {
2722                                 flk_delete_active_lock(lock, 0);
2723                                 STACK_PUSH(link_stack, lock, l_stack);
2724                         }
2725                         lock = nlock;
2726                 } while (lock->l_vnode == vp);
2727         }
2728
2729         while ((lock = STACK_TOP(link_stack)) != NULL) {
2730                 STACK_POP(link_stack, l_stack);
2731                 flk_wakeup(lock, 1);
2732                 flk_free_lock(lock);
2733         }
2734
2735         CHECK_SLEEPING_LOCKS(gp);
2736         CHECK_ACTIVE_LOCKS(gp);
2737         CHECK_OWNER_LOCKS(gp, pid, sysid, vp);
2738         mutex_exit(&gp->gp_mutex);
2739 }
2740
2741
2742 /*
2743  * Called from 'fs' read and write routines for files that have mandatory
2744  * locking enabled.
2745  */
2746
2747 int
2748 chklock(struct vnode *vp, int iomode, uoff_t offset, ssize_t len, int fmode,
2749     caller_context_t *ct)
2750 {
2751         register int    i;
2752         struct flock64  bf;
2753         int             error = 0;
2754
2755         bf.l_type = (iomode & FWRITE) ? F_WRLCK : F_RDLCK;
2756         bf.l_whence = 0;
2757         bf.l_start = offset;
2758         bf.l_len = len;
2759         if (ct == NULL) {
2760                 bf.l_pid = curproc->p_pid;
2761                 bf.l_sysid = 0;
2762         } else {
2763                 bf.l_pid = ct->cc_pid;
2764                 bf.l_sysid = ct->cc_sysid;
2765         }
2766         i = (fmode & (FNDELAY|FNONBLOCK)) ? INOFLCK : INOFLCK|SLPFLCK;
2767         if ((i = reclock(vp, &bf, i, 0, offset, NULL)) != 0 ||
2768             bf.l_type != F_UNLCK)
2769                 error = i ? i : EAGAIN;
2770         return (error);
2771 }
2772
2773 /*
2774  * convoff - converts the given data (start, whence) to the
2775  * given whence.
2776  */
2777 int
2778 convoff(struct vnode *vp, struct flock64 *lckdat, int whence, offset_t offset)
2779 {
2780         int             error;
2781         struct vattr    vattr;
2782
2783         if ((lckdat->l_whence == 2) || (whence == 2)) {
2784                 vattr.va_mask = AT_SIZE;
2785                 if (error = fop_getattr(vp, &vattr, 0, CRED(), NULL))
2786                         return (error);
2787         }
2788
2789         switch (lckdat->l_whence) {
2790         case 1:
2791                 lckdat->l_start += offset;
2792                 break;
2793         case 2:
2794                 lckdat->l_start += vattr.va_size;
2795                 /* FALLTHRU */
2796         case 0:
2797                 break;
2798         default:
2799                 return (EINVAL);
2800         }
2801
2802         if (lckdat->l_start < 0)
2803                 return (EINVAL);
2804
2805         switch (whence) {
2806         case 1:
2807                 lckdat->l_start -= offset;
2808                 break;
2809         case 2:
2810                 lckdat->l_start -= vattr.va_size;
2811                 /* FALLTHRU */
2812         case 0:
2813                 break;
2814         default:
2815                 return (EINVAL);
2816         }
2817
2818         lckdat->l_whence = (short)whence;
2819         return (0);
2820 }
2821
2822
2823 /*      proc_graph function definitions */
2824
2825 /*
2826  * Function checks for deadlock due to the new 'lock'. If deadlock found
2827  * edges of this lock are freed and returned.
2828  */
2829
2830 static int
2831 flk_check_deadlock(lock_descriptor_t *lock)
2832 {
2833         proc_vertex_t   *start_vertex, *pvertex;
2834         proc_vertex_t *dvertex;
2835         proc_edge_t *pep, *ppep;
2836         edge_t  *ep, *nep;
2837         proc_vertex_t *process_stack;
2838
2839         /*
2840          * OFD style locks are not associated with any process so there is
2841          * no proc graph for these. Thus we cannot, and do not, do deadlock
2842          * detection.
2843          */
2844         if (lock->l_ofd != NULL)
2845                 return (0);
2846
2847         STACK_INIT(process_stack);
2848
2849         mutex_enter(&flock_lock);
2850         start_vertex = flk_get_proc_vertex(lock);
2851         ASSERT(start_vertex != NULL);
2852
2853         /* construct the edges from this process to other processes */
2854
2855         ep = FIRST_ADJ(lock);
2856         while (ep != HEAD(lock)) {
2857                 proc_vertex_t *adj_proc;
2858
2859                 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2860                 for (pep = start_vertex->edge; pep != NULL; pep = pep->next) {
2861                         if (pep->to_proc == adj_proc) {
2862                                 ASSERT(pep->refcount);
2863                                 pep->refcount++;
2864                                 break;
2865                         }
2866                 }
2867                 if (pep == NULL) {
2868                         pep = flk_get_proc_edge();
2869                         pep->to_proc = adj_proc;
2870                         pep->refcount = 1;
2871                         adj_proc->incount++;
2872                         pep->next = start_vertex->edge;
2873                         start_vertex->edge = pep;
2874                 }
2875                 ep = NEXT_ADJ(ep);
2876         }
2877
2878         ep = FIRST_IN(lock);
2879
2880         while (ep != HEAD(lock)) {
2881                 proc_vertex_t *in_proc;
2882
2883                 in_proc = flk_get_proc_vertex(ep->from_vertex);
2884
2885                 for (pep = in_proc->edge; pep != NULL; pep = pep->next) {
2886                         if (pep->to_proc == start_vertex) {
2887                                 ASSERT(pep->refcount);
2888                                 pep->refcount++;
2889                                 break;
2890                         }
2891                 }
2892                 if (pep == NULL) {
2893                         pep = flk_get_proc_edge();
2894                         pep->to_proc = start_vertex;
2895                         pep->refcount = 1;
2896                         start_vertex->incount++;
2897                         pep->next = in_proc->edge;
2898                         in_proc->edge = pep;
2899                 }
2900                 ep = NEXT_IN(ep);
2901         }
2902
2903         if (start_vertex->incount == 0) {
2904                 mutex_exit(&flock_lock);
2905                 return (0);
2906         }
2907
2908         flk_proc_graph_uncolor();
2909
2910         start_vertex->p_sedge = start_vertex->edge;
2911
2912         STACK_PUSH(process_stack, start_vertex, p_stack);
2913
2914         while ((pvertex = STACK_TOP(process_stack)) != NULL) {
2915                 for (pep = pvertex->p_sedge; pep != NULL; pep = pep->next) {
2916                         dvertex = pep->to_proc;
2917                         if (!PROC_ARRIVED(dvertex)) {
2918                                 STACK_PUSH(process_stack, dvertex, p_stack);
2919                                 dvertex->p_sedge = dvertex->edge;
2920                                 PROC_ARRIVE(pvertex);
2921                                 pvertex->p_sedge = pep->next;
2922                                 break;
2923                         }
2924                         if (!PROC_DEPARTED(dvertex))
2925                                 goto deadlock;
2926                 }
2927                 if (pep == NULL) {
2928                         PROC_DEPART(pvertex);
2929                         STACK_POP(process_stack, p_stack);
2930                 }
2931         }
2932         mutex_exit(&flock_lock);
2933         return (0);
2934
2935 deadlock:
2936
2937         /* we remove all lock edges and proc edges */
2938
2939         ep = FIRST_ADJ(lock);
2940         while (ep != HEAD(lock)) {
2941                 proc_vertex_t *adj_proc;
2942                 adj_proc = flk_get_proc_vertex(ep->to_vertex);
2943                 nep = NEXT_ADJ(ep);
2944                 IN_LIST_REMOVE(ep);
2945                 ADJ_LIST_REMOVE(ep);
2946                 flk_free_edge(ep);
2947                 ppep = start_vertex->edge;
2948                 for (pep = start_vertex->edge; pep != NULL; ppep = pep,
2949                     pep = ppep->next) {
2950                         if (pep->to_proc == adj_proc) {
2951                                 pep->refcount--;
2952                                 if (pep->refcount == 0) {
2953                                         if (pep == ppep) {
2954                                                 start_vertex->edge = pep->next;
2955                                         } else {
2956                                                 ppep->next = pep->next;
2957                                         }
2958                                         adj_proc->incount--;
2959                                         flk_proc_release(adj_proc);
2960                                         flk_free_proc_edge(pep);
2961                                 }
2962                                 break;
2963                         }
2964                 }
2965                 ep = nep;
2966         }
2967         ep = FIRST_IN(lock);
2968         while (ep != HEAD(lock)) {
2969                 proc_vertex_t *in_proc;
2970                 in_proc = flk_get_proc_vertex(ep->from_vertex);
2971                 nep = NEXT_IN(ep);
2972                 IN_LIST_REMOVE(ep);
2973                 ADJ_LIST_REMOVE(ep);
2974                 flk_free_edge(ep);
2975                 ppep = in_proc->edge;
2976                 for (pep = in_proc->edge; pep != NULL; ppep = pep,
2977                     pep = ppep->next) {
2978                         if (pep->to_proc == start_vertex) {
2979                                 pep->refcount--;
2980                                 if (pep->refcount == 0) {
2981                                         if (pep == ppep) {
2982                                                 in_proc->edge = pep->next;
2983                                         } else {
2984                                                 ppep->next = pep->next;
2985                                         }
2986                                         start_vertex->incount--;
2987                                         flk_proc_release(in_proc);
2988                                         flk_free_proc_edge(pep);
2989                                 }
2990                                 break;
2991                         }
2992                 }
2993                 ep = nep;
2994         }
2995         flk_proc_release(start_vertex);
2996         mutex_exit(&flock_lock);
2997         return (1);
2998 }
2999
3000 /*
3001  * Get a proc vertex. If lock's pvertex value gets a correct proc vertex
3002  * from the list we return that, otherwise we allocate one. If necessary,
3003  * we grow the list of vertices also.
3004  */
3005
3006 static proc_vertex_t *
3007 flk_get_proc_vertex(lock_descriptor_t *lock)
3008 {
3009         int i;
3010         proc_vertex_t   *pv;
3011         proc_vertex_t   **palloc;
3012
3013         ASSERT(MUTEX_HELD(&flock_lock));
3014         if (lock->pvertex != -1) {
3015                 ASSERT(lock->pvertex >= 0);
3016                 pv = pgraph.proc[lock->pvertex];
3017                 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
3018                         return (pv);
3019                 }
3020         }
3021         for (i = 0; i < pgraph.gcount; i++) {
3022                 pv = pgraph.proc[i];
3023                 if (pv != NULL && PROC_SAME_OWNER(lock, pv)) {
3024                         lock->pvertex = pv->index = i;
3025                         return (pv);
3026                 }
3027         }
3028         pv = kmem_zalloc(sizeof (struct proc_vertex), KM_SLEEP);
3029         pv->pid = lock->l_flock.l_pid;
3030         pv->sysid = lock->l_flock.l_sysid;
3031         flk_proc_vertex_allocs++;
3032         if (pgraph.free != 0) {
3033                 for (i = 0; i < pgraph.gcount; i++) {
3034                         if (pgraph.proc[i] == NULL) {
3035                                 pgraph.proc[i] = pv;
3036                                 lock->pvertex = pv->index = i;
3037                                 pgraph.free--;
3038                                 return (pv);
3039                         }
3040                 }
3041         }
3042         palloc = kmem_zalloc((pgraph.gcount + PROC_CHUNK) *
3043             sizeof (proc_vertex_t *), KM_SLEEP);
3044
3045         if (pgraph.proc) {
3046                 bcopy(pgraph.proc, palloc,
3047                     pgraph.gcount * sizeof (proc_vertex_t *));
3048
3049                 kmem_free(pgraph.proc,
3050                     pgraph.gcount * sizeof (proc_vertex_t *));
3051         }
3052         pgraph.proc = palloc;
3053         pgraph.free += (PROC_CHUNK - 1);
3054         pv->index = lock->pvertex = pgraph.gcount;
3055         pgraph.gcount += PROC_CHUNK;
3056         pgraph.proc[pv->index] = pv;
3057         return (pv);
3058 }
3059
3060 /*
3061  * Allocate a proc edge.
3062  */
3063
3064 static proc_edge_t *
3065 flk_get_proc_edge()
3066 {
3067         proc_edge_t *pep;
3068
3069         pep = kmem_zalloc(sizeof (proc_edge_t), KM_SLEEP);
3070         flk_proc_edge_allocs++;
3071         return (pep);
3072 }
3073
3074 /*
3075  * Free the proc edge. Called whenever its reference count goes to zero.
3076  */
3077
3078 static void
3079 flk_free_proc_edge(proc_edge_t *pep)
3080 {
3081         ASSERT(pep->refcount == 0);
3082         kmem_free((void *)pep, sizeof (proc_edge_t));
3083         flk_proc_edge_frees++;
3084 }
3085
3086 /*
3087  * Color the graph explicitly done only when the mark value hits max value.
3088  */
3089
3090 static void
3091 flk_proc_graph_uncolor()
3092 {
3093         int i;
3094
3095         if (pgraph.mark == UINT_MAX) {
3096                 for (i = 0; i < pgraph.gcount; i++)
3097                         if (pgraph.proc[i] != NULL) {
3098                                 pgraph.proc[i]->atime = 0;
3099                                 pgraph.proc[i]->dtime = 0;
3100                         }
3101                 pgraph.mark = 1;
3102         } else {
3103                 pgraph.mark++;
3104         }
3105 }
3106
3107 /*
3108  * Release the proc vertex iff both there are no in edges and out edges
3109  */
3110
3111 static void
3112 flk_proc_release(proc_vertex_t *proc)
3113 {
3114         ASSERT(MUTEX_HELD(&flock_lock));
3115         if (proc->edge == NULL && proc->incount == 0) {
3116                 pgraph.proc[proc->index] = NULL;
3117                 pgraph.free++;
3118                 kmem_free(proc, sizeof (proc_vertex_t));
3119                 flk_proc_vertex_frees++;
3120         }
3121 }
3122
3123 /*
3124  * Updates process graph to reflect change in a lock_graph.
3125  * Note: We should call this function only after we have a correctly
3126  * recomputed lock graph. Otherwise we might miss a deadlock detection.
3127  * eg: in function flk_relation() we call this function after flk_recompute_
3128  * dependencies() otherwise if a process tries to lock a vnode hashed
3129  * into another graph it might sleep for ever.
3130  */
3131
3132 static void
3133 flk_update_proc_graph(edge_t *ep, int delete)
3134 {
3135         proc_vertex_t *toproc, *fromproc;
3136         proc_edge_t *pep, *prevpep;
3137
3138         mutex_enter(&flock_lock);
3139
3140         /*
3141          * OFD style locks are not associated with any process so there is
3142          * no proc graph for these.
3143          */
3144         if (ep->from_vertex->l_ofd != NULL) {
3145                 mutex_exit(&flock_lock);
3146                 return;
3147         }
3148
3149         toproc = flk_get_proc_vertex(ep->to_vertex);
3150         fromproc = flk_get_proc_vertex(ep->from_vertex);
3151
3152         if (!delete)
3153                 goto add;
3154         pep = prevpep = fromproc->edge;
3155
3156         ASSERT(pep != NULL);
3157         while (pep != NULL) {
3158                 if (pep->to_proc == toproc) {
3159                         ASSERT(pep->refcount > 0);
3160                         pep->refcount--;
3161                         if (pep->refcount == 0) {
3162                                 if (pep == prevpep) {
3163                                         fromproc->edge = pep->next;
3164                                 } else {
3165                                         prevpep->next = pep->next;
3166                                 }
3167                                 toproc->incount--;
3168                                 flk_proc_release(toproc);
3169                                 flk_free_proc_edge(pep);
3170                         }
3171                         break;
3172                 }
3173                 prevpep = pep;
3174                 pep = pep->next;
3175         }
3176         flk_proc_release(fromproc);
3177         mutex_exit(&flock_lock);
3178         return;
3179 add:
3180
3181         pep = fromproc->edge;
3182
3183         while (pep != NULL) {
3184                 if (pep->to_proc == toproc) {
3185                         ASSERT(pep->refcount > 0);
3186                         pep->refcount++;
3187                         break;
3188                 }
3189                 pep = pep->next;
3190         }
3191         if (pep == NULL) {
3192                 pep = flk_get_proc_edge();
3193                 pep->to_proc = toproc;
3194                 pep->refcount = 1;
3195                 toproc->incount++;
3196                 pep->next = fromproc->edge;
3197                 fromproc->edge = pep;
3198         }
3199         mutex_exit(&flock_lock);
3200 }
3201
3202 /*
3203  * Set the control status for lock manager requests.
3204  *
3205  * Note that when this routine is called with FLK_WAKEUP_SLEEPERS, there
3206  * may be locks requests that have gotten started but not finished.  In
3207  * particular, there may be blocking requests that are in the callback code
3208  * before sleeping (so they're not holding the lock for the graph).  If
3209  * such a thread reacquires the graph's lock (to go to sleep) after
3210  * flk_lockmgr_status is set to a non-up value, it will notice the status
3211  * and bail out.  If the request gets granted before the thread can check
3212  * flk_lockmgr_status, let it continue normally.  It will get flushed when
3213  * we are called with FLK_LOCKMGR_DOWN.
3214  */
3215
3216 void
3217 flk_set_lockmgr_status(flk_lockmgr_status_t status)
3218 {
3219         int i;
3220         graph_t *gp;
3221         struct flock_globals *fg;
3222
3223         fg = flk_get_globals();
3224         ASSERT(fg != NULL);
3225
3226         mutex_enter(&flock_lock);
3227         fg->flk_lockmgr_status = status;
3228         mutex_exit(&flock_lock);
3229
3230         /*
3231          * If the lock manager is coming back up, all that's needed is to
3232          * propagate this information to the graphs.  If the lock manager
3233          * is going down, additional action is required, and each graph's
3234          * copy of the state is updated atomically with this other action.
3235          */
3236         switch (status) {
3237         case FLK_LOCKMGR_UP:
3238                 for (i = 0; i < HASH_SIZE; i++) {
3239                         mutex_enter(&flock_lock);
3240                         gp = lock_graph[i];
3241                         mutex_exit(&flock_lock);
3242                         if (gp == NULL)
3243                                 continue;
3244                         mutex_enter(&gp->gp_mutex);
3245                         fg->lockmgr_status[i] = status;
3246                         mutex_exit(&gp->gp_mutex);
3247                 }
3248                 break;
3249         case FLK_WAKEUP_SLEEPERS:
3250                 wakeup_sleeping_lockmgr_locks(fg);
3251                 break;
3252         case FLK_LOCKMGR_DOWN:
3253                 unlock_lockmgr_granted(fg);
3254                 break;
3255         default:
3256                 panic("flk_set_lockmgr_status: bad status (%d)", status);
3257                 break;
3258         }
3259 }
3260
3261 /*
3262  * This routine returns all the locks that are active or sleeping and are
3263  * associated with a particular set of identifiers.  If lock_state != 0, then
3264  * only locks that match the lock_state are returned. If lock_state == 0, then
3265  * all locks are returned. If pid == NOPID, the pid is ignored.  If
3266  * use_sysid is FALSE, then the sysid is ignored.  If vp is NULL, then the
3267  * vnode pointer is ignored.
3268  *
3269  * A list containing the vnode pointer and an flock structure
3270  * describing the lock is returned.  Each element in the list is
3271  * dynamically allocated and must be freed by the caller.  The
3272  * last item in the list is denoted by a NULL value in the ll_next
3273  * field.
3274  *
3275  * The vnode pointers returned are held.  The caller is responsible
3276  * for releasing these.  Note that the returned list is only a snapshot of
3277  * the current lock information, and that it is a snapshot of a moving
3278  * target (only one graph is locked at a time).
3279  */
3280
3281 locklist_t *
3282 get_lock_list(int list_type, int lock_state, int sysid, boolean_t use_sysid,
3283     pid_t pid, const vnode_t *vp, zoneid_t zoneid)
3284 {
3285         lock_descriptor_t       *lock;
3286         lock_descriptor_t       *graph_head;
3287         locklist_t              listhead;
3288         locklist_t              *llheadp;
3289         locklist_t              *llp;
3290         locklist_t              *lltp;
3291         graph_t                 *gp;
3292         int                     i;
3293         int                     first_index; /* graph index */
3294         int                     num_indexes; /* graph index */
3295
3296         ASSERT((list_type == FLK_ACTIVE_STATE) ||
3297             (list_type == FLK_SLEEPING_STATE));
3298
3299         /*
3300          * Get a pointer to something to use as a list head while building
3301          * the rest of the list.
3302          */
3303         llheadp = &listhead;
3304         lltp = llheadp;
3305         llheadp->ll_next = (locklist_t *)NULL;
3306
3307         /* Figure out which graphs we want to look at. */
3308         if (vp == NULL) {
3309                 first_index = 0;
3310                 num_indexes = HASH_SIZE;
3311         } else {
3312                 first_index = HASH_INDEX(vp);
3313                 num_indexes = 1;
3314         }
3315
3316         for (i = first_index; i < first_index + num_indexes; i++) {
3317                 mutex_enter(&flock_lock);
3318                 gp = lock_graph[i];
3319                 mutex_exit(&flock_lock);
3320                 if (gp == NULL) {
3321                         continue;
3322                 }
3323
3324                 mutex_enter(&gp->gp_mutex);
3325                 graph_head = (list_type == FLK_ACTIVE_STATE) ?
3326                     ACTIVE_HEAD(gp) : SLEEPING_HEAD(gp);
3327                 for (lock = graph_head->l_next;
3328                     lock != graph_head;
3329                     lock = lock->l_next) {
3330                         if (use_sysid && lock->l_flock.l_sysid != sysid)
3331                                 continue;
3332                         if (pid != NOPID && lock->l_flock.l_pid != pid)
3333                                 continue;
3334                         if (vp != NULL && lock->l_vnode != vp)
3335                                 continue;
3336                         if (lock_state && !(lock_state & lock->l_state))
3337                                 continue;
3338                         if (zoneid != lock->l_zoneid && zoneid != ALL_ZONES)
3339                                 continue;
3340                         /*
3341                          * A matching lock was found.  Allocate
3342                          * space for a new locklist entry and fill
3343                          * it in.
3344                          */
3345                         llp = kmem_alloc(sizeof (locklist_t), KM_SLEEP);
3346                         lltp->ll_next = llp;
3347                         VN_HOLD(lock->l_vnode);
3348                         llp->ll_vp = lock->l_vnode;
3349                         create_flock(lock, &(llp->ll_flock));
3350                         llp->ll_next = (locklist_t *)NULL;
3351                         lltp = llp;
3352                 }
3353                 mutex_exit(&gp->gp_mutex);
3354         }
3355
3356         llp = llheadp->ll_next;
3357         return (llp);
3358 }
3359
3360 /*
3361  * These two functions are simply interfaces to get_lock_list.  They return
3362  * a list of sleeping or active locks for the given sysid and pid.  See
3363  * get_lock_list for details.
3364  *
3365  * In either case we don't particularly care to specify the zone of interest;
3366  * the sysid-space is global across zones, so the sysid will map to exactly one
3367  * zone, and we'll return information for that zone.
3368  */
3369
3370 locklist_t *
3371 flk_get_sleeping_locks(int sysid, pid_t pid)
3372 {
3373         return (get_lock_list(FLK_SLEEPING_STATE, 0, sysid, B_TRUE, pid, NULL,
3374             ALL_ZONES));
3375 }
3376
3377 locklist_t *
3378 flk_get_active_locks(int sysid, pid_t pid)
3379 {
3380         return (get_lock_list(FLK_ACTIVE_STATE, 0, sysid, B_TRUE, pid, NULL,
3381             ALL_ZONES));
3382 }
3383
3384 /*
3385  * Another interface to get_lock_list.  This one returns all the active
3386  * locks for a given vnode.  Again, see get_lock_list for details.
3387  *
3388  * We don't need to specify which zone's locks we're interested in.  The matter
3389  * would only be interesting if the vnode belonged to NFS, and NFS vnodes can't
3390  * be used by multiple zones, so the list of locks will all be from the right
3391  * zone.
3392  */
3393
3394 locklist_t *
3395 flk_active_locks_for_vp(const vnode_t *vp)
3396 {
3397         return (get_lock_list(FLK_ACTIVE_STATE, 0, 0, B_FALSE, NOPID, vp,
3398             ALL_ZONES));
3399 }
3400
3401 /*
3402  * Another interface to get_lock_list.  This one returns all the active
3403  * nbmand locks for a given vnode.  Again, see get_lock_list for details.
3404  *
3405  * See the comment for flk_active_locks_for_vp() for why we don't care to
3406  * specify the particular zone of interest.
3407  */
3408 locklist_t *
3409 flk_active_nbmand_locks_for_vp(const vnode_t *vp)
3410 {
3411         return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3412             NOPID, vp, ALL_ZONES));
3413 }
3414
3415 /*
3416  * Another interface to get_lock_list.  This one returns all the active
3417  * nbmand locks for a given pid.  Again, see get_lock_list for details.
3418  *
3419  * The zone doesn't need to be specified here; the locks held by a
3420  * particular process will either be local (ie, non-NFS) or from the zone
3421  * the process is executing in.  This is because other parts of the system
3422  * ensure that an NFS vnode can't be used in a zone other than that in
3423  * which it was opened.
3424  */
3425 locklist_t *
3426 flk_active_nbmand_locks(pid_t pid)
3427 {
3428         return (get_lock_list(FLK_ACTIVE_STATE, NBMAND_LOCK, 0, B_FALSE,
3429             pid, NULL, ALL_ZONES));
3430 }
3431
3432 /*
3433  * Free up all entries in the locklist.
3434  */
3435 void
3436 flk_free_locklist(locklist_t *llp)
3437 {
3438         locklist_t *next_llp;
3439
3440         while (llp) {
3441                 next_llp = llp->ll_next;
3442                 VN_RELE(llp->ll_vp);
3443                 kmem_free(llp, sizeof (*llp));
3444                 llp = next_llp;
3445         }
3446 }
3447
3448 /*
3449  * Find all sleeping lock manager requests and poke them.
3450  */
3451 static void
3452 wakeup_sleeping_lockmgr_locks(struct flock_globals *fg)
3453 {
3454         lock_descriptor_t *lock;
3455         lock_descriptor_t *nlock = NULL; /* next lock */
3456         int i;
3457         graph_t *gp;
3458         zoneid_t zoneid = getzoneid();
3459
3460         for (i = 0; i < HASH_SIZE; i++) {
3461                 mutex_enter(&flock_lock);
3462                 gp = lock_graph[i];
3463                 mutex_exit(&flock_lock);
3464                 if (gp == NULL) {
3465                         continue;
3466                 }
3467
3468                 mutex_enter(&gp->gp_mutex);
3469                 fg->lockmgr_status[i] = FLK_WAKEUP_SLEEPERS;
3470                 for (lock = SLEEPING_HEAD(gp)->l_next;
3471                     lock != SLEEPING_HEAD(gp);
3472                     lock = nlock) {
3473                         nlock = lock->l_next;
3474                         if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3475                                 INTERRUPT_WAKEUP(lock);
3476                         }
3477                 }
3478                 mutex_exit(&gp->gp_mutex);
3479         }
3480 }
3481
3482
3483 /*
3484  * Find all active (granted) lock manager locks and release them.
3485  */
3486 static void
3487 unlock_lockmgr_granted(struct flock_globals *fg)
3488 {
3489         lock_descriptor_t *lock;
3490         lock_descriptor_t *nlock = NULL; /* next lock */
3491         int i;
3492         graph_t *gp;
3493         zoneid_t zoneid = getzoneid();
3494
3495         for (i = 0; i < HASH_SIZE; i++) {
3496                 mutex_enter(&flock_lock);
3497                 gp = lock_graph[i];
3498                 mutex_exit(&flock_lock);
3499                 if (gp == NULL) {
3500                         continue;
3501                 }
3502
3503                 mutex_enter(&gp->gp_mutex);
3504                 fg->lockmgr_status[i] = FLK_LOCKMGR_DOWN;
3505                 for (lock = ACTIVE_HEAD(gp)->l_next;
3506                     lock != ACTIVE_HEAD(gp);
3507                     lock = nlock) {
3508                         nlock = lock->l_next;
3509                         if (IS_LOCKMGR(lock) && lock->l_zoneid == zoneid) {
3510                                 ASSERT(IS_ACTIVE(lock));
3511                                 flk_delete_active_lock(lock, 0);
3512                                 flk_wakeup(lock, 1);
3513                                 flk_free_lock(lock);
3514                         }
3515                 }
3516                 mutex_exit(&gp->gp_mutex);
3517         }
3518 }
3519
3520
3521 /*
3522  * Wait until a lock is granted, cancelled, or interrupted.
3523  */
3524
3525 static void
3526 wait_for_lock(lock_descriptor_t *request)
3527 {
3528         graph_t *gp = request->l_graph;
3529
3530         ASSERT(MUTEX_HELD(&gp->gp_mutex));
3531
3532         while (!(IS_GRANTED(request)) && !(IS_CANCELLED(request)) &&
3533             !(IS_INTERRUPTED(request))) {
3534                 if (!cv_wait_sig(&request->l_cv, &gp->gp_mutex)) {
3535                         flk_set_state(request, FLK_INTERRUPTED_STATE);
3536                         request->l_state |= INTERRUPTED_LOCK;
3537                 }
3538         }
3539 }
3540
3541 /*
3542  * Create an flock structure from the existing lock information
3543  *
3544  * This routine is used to create flock structures for the lock manager
3545  * to use in a reclaim request.  Since the lock was originated on this
3546  * host, it must be conforming to UNIX semantics, so no checking is
3547  * done to make sure it falls within the lower half of the 32-bit range.
3548  */
3549
3550 static void
3551 create_flock(lock_descriptor_t *lp, flock64_t *flp)
3552 {
3553         ASSERT(lp->l_end == MAX_U_OFFSET_T || lp->l_end <= MAXEND);
3554         ASSERT(lp->l_end >= lp->l_start);
3555
3556         flp->l_type = lp->l_type;
3557         flp->l_whence = 0;
3558         flp->l_start = lp->l_start;
3559         flp->l_len = (lp->l_end == MAX_U_OFFSET_T) ? 0 :
3560             (lp->l_end - lp->l_start + 1);
3561         flp->l_sysid = lp->l_flock.l_sysid;
3562         flp->l_pid = lp->l_flock.l_pid;
3563 }
3564
3565 /*
3566  * Convert flock_t data describing a lock range into unsigned long starting
3567  * and ending points, which are put into lock_request.  Returns 0 or an
3568  * errno value.
3569  */
3570
3571 int
3572 flk_convert_lock_data(vnode_t *vp, flock64_t *flp,
3573     uoff_t *start, uoff_t *end, offset_t offset)
3574 {
3575         struct vattr    vattr;
3576         int     error;
3577
3578         /*
3579          * Determine the starting point of the request
3580          */
3581         switch (flp->l_whence) {
3582         case 0:         /* SEEK_SET */
3583                 *start = (uoff_t)flp->l_start;
3584                 break;
3585         case 1:         /* SEEK_CUR */
3586                 *start = (uoff_t)(flp->l_start + offset);
3587                 break;
3588         case 2:         /* SEEK_END */
3589                 vattr.va_mask = AT_SIZE;
3590                 if (error = fop_getattr(vp, &vattr, 0, CRED(), NULL))
3591                         return (error);
3592                 *start = (uoff_t)(flp->l_start + vattr.va_size);
3593                 break;
3594         default:
3595                 return (EINVAL);
3596         }
3597
3598         /*
3599          * Determine the range covered by the request.
3600          */
3601         if (flp->l_len == 0)
3602                 *end = MAX_U_OFFSET_T;
3603         else if ((offset_t)flp->l_len > 0) {
3604                 *end = (uoff_t)(*start + (flp->l_len - 1));
3605         } else {
3606                 /*
3607                  * Negative length; why do we even allow this ?
3608                  * Because this allows easy specification of
3609                  * the last n bytes of the file.
3610                  */
3611                 *end = *start;
3612                 *start += (uoff_t)flp->l_len;
3613                 (*start)++;
3614         }
3615         return (0);
3616 }
3617
3618 /*
3619  * Check the validity of lock data.  This can used by the NFS
3620  * frlock routines to check data before contacting the server.  The
3621  * server must support semantics that aren't as restrictive as
3622  * the UNIX API, so the NFS client is required to check.
3623  * The maximum is now passed in by the caller.
3624  */
3625
3626 int
3627 flk_check_lock_data(uoff_t start, uoff_t end, offset_t max)
3628 {
3629         /*
3630          * The end (length) for local locking should never be greater
3631          * than MAXEND. However, the representation for
3632          * the entire file is MAX_U_OFFSET_T.
3633          */
3634         if ((start > max) ||
3635             ((end > max) && (end != MAX_U_OFFSET_T))) {
3636                 return (EINVAL);
3637         }
3638         if (start > end) {
3639                 return (EINVAL);
3640         }
3641         return (0);
3642 }
3643
3644 /*
3645  * Fill in request->l_flock with information about the lock blocking the
3646  * request.  The complexity here is that lock manager requests are allowed
3647  * to see into the upper part of the 32-bit address range, whereas local
3648  * requests are only allowed to see signed values.
3649  *
3650  * What should be done when "blocker" is a lock manager lock that uses the
3651  * upper portion of the 32-bit range, but "request" is local?  Since the
3652  * request has already been determined to have been blocked by the blocker,
3653  * at least some portion of "blocker" must be in the range of the request,
3654  * or the request extends to the end of file.  For the first case, the
3655  * portion in the lower range is returned with the indication that it goes
3656  * "to EOF."  For the second case, the last byte of the lower range is
3657  * returned with the indication that it goes "to EOF."
3658  */
3659
3660 static void
3661 report_blocker(lock_descriptor_t *blocker, lock_descriptor_t *request)
3662 {
3663         flock64_t *flrp;                        /* l_flock portion of request */
3664
3665         ASSERT(blocker != NULL);
3666
3667         flrp = &request->l_flock;
3668         flrp->l_whence = 0;
3669         flrp->l_type = blocker->l_type;
3670         flrp->l_pid = blocker->l_flock.l_pid;
3671         flrp->l_sysid = blocker->l_flock.l_sysid;
3672         request->l_ofd = blocker->l_ofd;
3673
3674         if (IS_LOCKMGR(request)) {
3675                 flrp->l_start = blocker->l_start;
3676                 if (blocker->l_end == MAX_U_OFFSET_T)
3677                         flrp->l_len = 0;
3678                 else
3679                         flrp->l_len = blocker->l_end - blocker->l_start + 1;
3680         } else {
3681                 if (blocker->l_start > MAXEND) {
3682                         flrp->l_start = MAXEND;
3683                         flrp->l_len = 0;
3684                 } else {
3685                         flrp->l_start = blocker->l_start;
3686                         if (blocker->l_end == MAX_U_OFFSET_T)
3687                                 flrp->l_len = 0;
3688                         else
3689                                 flrp->l_len = blocker->l_end -
3690                                     blocker->l_start + 1;
3691                 }
3692         }
3693 }
3694
3695 /*
3696  * Return non-zero if the given I/O request conflicts with an active NBMAND
3697  * lock.
3698  * If svmand is non-zero, it means look at all active locks, not just NBMAND
3699  * locks.
3700  */
3701
3702 int
3703 nbl_lock_conflict(vnode_t *vp, nbl_op_t op, uoff_t offset,
3704     ssize_t length, int svmand, caller_context_t *ct)
3705 {
3706         int conflict = 0;
3707         graph_t                 *gp;
3708         lock_descriptor_t       *lock;
3709         pid_t pid;
3710         int sysid;
3711
3712         if (ct == NULL) {
3713                 pid = curproc->p_pid;
3714                 sysid = 0;
3715         } else {
3716                 pid = ct->cc_pid;
3717                 sysid = ct->cc_sysid;
3718         }
3719
3720         mutex_enter(&flock_lock);
3721         gp = lock_graph[HASH_INDEX(vp)];
3722         mutex_exit(&flock_lock);
3723         if (gp == NULL)
3724                 return (0);
3725
3726         mutex_enter(&gp->gp_mutex);
3727         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
3728
3729         for (; lock && lock->l_vnode == vp; lock = lock->l_next) {
3730                 if ((svmand || (lock->l_state & NBMAND_LOCK)) &&
3731                     (lock->l_flock.l_sysid != sysid ||
3732                     lock->l_flock.l_pid != pid) &&
3733                     lock_blocks_io(op, offset, length,
3734                     lock->l_type, lock->l_start, lock->l_end)) {
3735                         conflict = 1;
3736                         break;
3737                 }
3738         }
3739         mutex_exit(&gp->gp_mutex);
3740
3741         return (conflict);
3742 }
3743
3744 /*
3745  * Return non-zero if the given I/O request conflicts with the given lock.
3746  */
3747
3748 static int
3749 lock_blocks_io(nbl_op_t op, uoff_t offset, ssize_t length,
3750     int lock_type, uoff_t lock_start, uoff_t lock_end)
3751 {
3752         ASSERT(op == NBL_READ || op == NBL_WRITE || op == NBL_READWRITE);
3753         ASSERT(lock_type == F_RDLCK || lock_type == F_WRLCK);
3754
3755         if (op == NBL_READ && lock_type == F_RDLCK)
3756                 return (0);
3757
3758         if (offset <= lock_start && lock_start < offset + length)
3759                 return (1);
3760         if (lock_start <= offset && offset <= lock_end)
3761                 return (1);
3762
3763         return (0);
3764 }
3765
3766 #ifdef DEBUG
3767 static void
3768 check_active_locks(graph_t *gp)
3769 {
3770         lock_descriptor_t *lock, *lock1;
3771         edge_t  *ep;
3772
3773         for (lock = ACTIVE_HEAD(gp)->l_next; lock != ACTIVE_HEAD(gp);
3774             lock = lock->l_next) {
3775                 ASSERT(IS_ACTIVE(lock));
3776                 ASSERT(NOT_BLOCKED(lock));
3777                 ASSERT(!IS_BARRIER(lock));
3778
3779                 ep = FIRST_IN(lock);
3780
3781                 while (ep != HEAD(lock)) {
3782                         ASSERT(IS_SLEEPING(ep->from_vertex));
3783                         ASSERT(!NOT_BLOCKED(ep->from_vertex));
3784                         ep = NEXT_IN(ep);
3785                 }
3786
3787                 for (lock1 = lock->l_next; lock1 != ACTIVE_HEAD(gp);
3788                     lock1 = lock1->l_next) {
3789                         if (lock1->l_vnode == lock->l_vnode) {
3790                         if (BLOCKS(lock1, lock)) {
3791                                 cmn_err(CE_PANIC,
3792                                     "active lock %p blocks %p",
3793                                     (void *)lock1, (void *)lock);
3794                         } else if (BLOCKS(lock, lock1)) {
3795                                 cmn_err(CE_PANIC,
3796                                     "active lock %p blocks %p",
3797                                     (void *)lock, (void *)lock1);
3798                         }
3799                         }
3800                 }
3801         }
3802 }
3803
3804 /*
3805  * Effect: This functions checks to see if the transition from 'old_state' to
3806  *      'new_state' is a valid one.  It returns 0 if the transition is valid
3807  *      and 1 if it is not.
3808  *      For a map of valid transitions, see sys/flock_impl.h
3809  */
3810 static int
3811 check_lock_transition(int old_state, int new_state)
3812 {
3813         switch (old_state) {
3814         case FLK_INITIAL_STATE:
3815                 if ((new_state == FLK_START_STATE) ||
3816                     (new_state == FLK_SLEEPING_STATE) ||
3817                     (new_state == FLK_ACTIVE_STATE) ||
3818                     (new_state == FLK_DEAD_STATE)) {
3819                         return (0);
3820                 } else {
3821                         return (1);
3822                 }
3823         case FLK_START_STATE:
3824                 if ((new_state == FLK_ACTIVE_STATE) ||
3825                     (new_state == FLK_DEAD_STATE)) {
3826                         return (0);
3827                 } else {
3828                         return (1);
3829                 }
3830         case FLK_ACTIVE_STATE:
3831                 if (new_state == FLK_DEAD_STATE) {
3832                         return (0);
3833                 } else {
3834                         return (1);
3835                 }
3836         case FLK_SLEEPING_STATE:
3837                 if ((new_state == FLK_GRANTED_STATE) ||
3838                     (new_state == FLK_INTERRUPTED_STATE) ||
3839                     (new_state == FLK_CANCELLED_STATE)) {
3840                         return (0);
3841                 } else {
3842                         return (1);
3843                 }
3844         case FLK_GRANTED_STATE:
3845                 if ((new_state == FLK_START_STATE) ||
3846                     (new_state == FLK_INTERRUPTED_STATE) ||
3847                     (new_state == FLK_CANCELLED_STATE)) {
3848                         return (0);
3849                 } else {
3850                         return (1);
3851                 }
3852         case FLK_CANCELLED_STATE:
3853                 if ((new_state == FLK_INTERRUPTED_STATE) ||
3854                     (new_state == FLK_DEAD_STATE)) {
3855                         return (0);
3856                 } else {
3857                         return (1);
3858                 }
3859         case FLK_INTERRUPTED_STATE:
3860                 if (new_state == FLK_DEAD_STATE) {
3861                         return (0);
3862                 } else {
3863                         return (1);
3864                 }
3865         case FLK_DEAD_STATE:
3866                 /* May be set more than once */
3867                 if (new_state == FLK_DEAD_STATE) {
3868                         return (0);
3869                 } else {
3870                         return (1);
3871                 }
3872         default:
3873                 return (1);
3874         }
3875 }
3876
3877 static void
3878 check_sleeping_locks(graph_t *gp)
3879 {
3880         lock_descriptor_t *lock1, *lock2;
3881         edge_t *ep;
3882         for (lock1 = SLEEPING_HEAD(gp)->l_next; lock1 != SLEEPING_HEAD(gp);
3883             lock1 = lock1->l_next) {
3884                                 ASSERT(!IS_BARRIER(lock1));
3885         for (lock2 = lock1->l_next; lock2 != SLEEPING_HEAD(gp);
3886             lock2 = lock2->l_next) {
3887                 if (lock1->l_vnode == lock2->l_vnode) {
3888                         if (BLOCKS(lock2, lock1)) {
3889                                 ASSERT(!IS_GRANTED(lock1));
3890                                 ASSERT(!NOT_BLOCKED(lock1));
3891                                 path(lock1, lock2);
3892                         }
3893                 }
3894         }
3895
3896         for (lock2 = ACTIVE_HEAD(gp)->l_next; lock2 != ACTIVE_HEAD(gp);
3897             lock2 = lock2->l_next) {
3898                                 ASSERT(!IS_BARRIER(lock1));
3899                 if (lock1->l_vnode == lock2->l_vnode) {
3900                         if (BLOCKS(lock2, lock1)) {
3901                                 ASSERT(!IS_GRANTED(lock1));
3902                                 ASSERT(!NOT_BLOCKED(lock1));
3903                                 path(lock1, lock2);
3904                         }
3905                 }
3906         }
3907         ep = FIRST_ADJ(lock1);
3908         while (ep != HEAD(lock1)) {
3909                 ASSERT(BLOCKS(ep->to_vertex, lock1));
3910                 ep = NEXT_ADJ(ep);
3911         }
3912         }
3913 }
3914
3915 static int
3916 level_two_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2, int no_path)
3917 {
3918         edge_t  *ep;
3919         lock_descriptor_t       *vertex;
3920         lock_descriptor_t *vertex_stack;
3921
3922         STACK_INIT(vertex_stack);
3923
3924         flk_graph_uncolor(lock1->l_graph);
3925         ep = FIRST_ADJ(lock1);
3926         ASSERT(ep != HEAD(lock1));
3927         while (ep != HEAD(lock1)) {
3928                 if (no_path)
3929                         ASSERT(ep->to_vertex != lock2);
3930                 STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
3931                 COLOR(ep->to_vertex);
3932                 ep = NEXT_ADJ(ep);
3933         }
3934
3935         while ((vertex = STACK_TOP(vertex_stack)) != NULL) {
3936                 STACK_POP(vertex_stack, l_dstack);
3937                 for (ep = FIRST_ADJ(vertex); ep != HEAD(vertex);
3938                     ep = NEXT_ADJ(ep)) {
3939                         if (COLORED(ep->to_vertex))
3940                                 continue;
3941                         COLOR(ep->to_vertex);
3942                         if (ep->to_vertex == lock2)
3943                                 return (1);
3944
3945                         STACK_PUSH(vertex_stack, ep->to_vertex, l_dstack);
3946                 }
3947         }
3948         return (0);
3949 }
3950
3951 static void
3952 check_owner_locks(graph_t *gp, pid_t pid, int sysid, vnode_t *vp)
3953 {
3954         lock_descriptor_t *lock;
3955
3956         /* Ignore OFD style locks since they're not process-wide. */
3957         if (pid == 0)
3958                 return;
3959
3960         SET_LOCK_TO_FIRST_ACTIVE_VP(gp, lock, vp);
3961
3962         if (lock) {
3963                 while (lock != ACTIVE_HEAD(gp) && (lock->l_vnode == vp)) {
3964                         if (lock->l_flock.l_pid == pid &&
3965                             lock->l_flock.l_sysid == sysid)
3966                                 cmn_err(CE_PANIC,
3967                                     "owner pid %d's lock %p in active queue",
3968                                     pid, (void *)lock);
3969                         lock = lock->l_next;
3970                 }
3971         }
3972         SET_LOCK_TO_FIRST_SLEEP_VP(gp, lock, vp);
3973
3974         if (lock) {
3975                 while (lock != SLEEPING_HEAD(gp) && (lock->l_vnode == vp)) {
3976                         if (lock->l_flock.l_pid == pid &&
3977                             lock->l_flock.l_sysid == sysid)
3978                                 cmn_err(CE_PANIC,
3979                                     "owner pid %d's lock %p in sleep queue",
3980                                     pid, (void *)lock);
3981                         lock = lock->l_next;
3982                 }
3983         }
3984 }
3985
3986 static int
3987 level_one_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
3988 {
3989         edge_t *ep = FIRST_ADJ(lock1);
3990
3991         while (ep != HEAD(lock1)) {
3992                 if (ep->to_vertex == lock2)
3993                         return (1);
3994                 else
3995                         ep = NEXT_ADJ(ep);
3996         }
3997         return (0);
3998 }
3999
4000 static int
4001 no_path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4002 {
4003         return (!level_two_path(lock1, lock2, 1));
4004 }
4005
4006 static void
4007 path(lock_descriptor_t *lock1, lock_descriptor_t *lock2)
4008 {
4009         if (level_one_path(lock1, lock2)) {
4010                 if (level_two_path(lock1, lock2, 0) != 0) {
4011                         cmn_err(CE_WARN,
4012                             "one edge one path from lock1 %p lock2 %p",
4013                             (void *)lock1, (void *)lock2);
4014                 }
4015         } else if (no_path(lock1, lock2)) {
4016                 cmn_err(CE_PANIC,
4017                     "No path from  lock1 %p to lock2 %p",
4018                     (void *)lock1, (void *)lock2);
4019         }
4020 }
4021 #endif /* DEBUG */