kernel/os/mutex.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  * Copyright (c) 2016 by Delphix. All rights reserved.
  25  */
  26
  27 /*
  28  * Big Theory Statement for mutual exclusion locking primitives.
  29  *
  30  * A mutex serializes multiple threads so that only one thread
  31  * (the "owner" of the mutex) is active at a time.  See mutex(9F)
  32  * for a full description of the interfaces and programming model.
  33  * The rest of this comment describes the implementation.
  34  *
  35  * Mutexes come in two flavors: adaptive and spin.  mutex_init(9F)
  36  * determines the type based solely on the iblock cookie (PIL) argument.
  37  * PIL > LOCK_LEVEL implies a spin lock; everything else is adaptive.
  38  *
  39  * Spin mutexes block interrupts and spin until the lock becomes available.
  40  * A thread may not sleep, or call any function that might sleep, while
  41  * holding a spin mutex.  With few exceptions, spin mutexes should only
  42  * be used to synchronize with interrupt handlers.
  43  *
  44  * Adaptive mutexes (the default type) spin if the owner is running on
  45  * another CPU and block otherwise.  This policy is based on the assumption
  46  * that mutex hold times are typically short enough that the time spent
  47  * spinning is less than the time it takes to block.  If you need mutual
  48  * exclusion semantics with long hold times, consider an rwlock(9F) as
  49  * RW_WRITER.  Better still, reconsider the algorithm: if it requires
  50  * mutual exclusion for long periods of time, it's probably not scalable.
  51  *
  52  * Adaptive mutexes are overwhelmingly more common than spin mutexes,
  53  * so mutex_enter() assumes that the lock is adaptive.  We get away
  54  * with this by structuring mutexes so that an attempt to acquire a
  55  * spin mutex as adaptive always fails.  When mutex_enter() fails
  56  * it punts to mutex_vector_enter(), which does all the hard stuff.
  57  *
  58  * mutex_vector_enter() first checks the type.  If it's spin mutex,
  59  * we just call lock_set_spl() and return.  If it's an adaptive mutex,
  60  * we check to see what the owner is doing.  If the owner is running,
  61  * we spin until the lock becomes available; if not, we mark the lock
  62  * as having waiters and block.
  63  *
  64  * Blocking on a mutex is surprisingly delicate dance because, for speed,
  65  * mutex_exit() doesn't use an atomic instruction.  Thus we have to work
  66  * a little harder in the (rarely-executed) blocking path to make sure
  67  * we don't block on a mutex that's just been released -- otherwise we
  68  * might never be woken up.
  69  *
  70  * The logic for synchronizing mutex_vector_enter() with mutex_exit()
  71  * in the face of preemption and relaxed memory ordering is as follows:
  72  *
  73  * (1) Preemption in the middle of mutex_exit() must cause mutex_exit()
  74  *     to restart.  Each platform must enforce this by checking the
  75  *     interrupted PC in the interrupt handler (or on return from trap --
  76  *     whichever is more convenient for the platform).  If the PC
  77  *     lies within the critical region of mutex_exit(), the interrupt
  78  *     handler must reset the PC back to the beginning of mutex_exit().
  79  *     The critical region consists of all instructions up to, but not
  80  *     including, the store that clears the lock (which, of course,
  81  *     must never be executed twice.)
  82  *
  83  *     This ensures that the owner will always check for waiters after
  84  *     resuming from a previous preemption.
  85  *
  86  * (2) A thread resuming in mutex_exit() does (at least) the following:
  87  *
  88  *      when resuming:  set CPU_THREAD = owner
  89  *                      membar #StoreLoad
  90  *
  91  *      in mutex_exit:  check waiters bit; do wakeup if set
  92  *                      membar #LoadStore|#StoreStore
  93  *                      clear owner
  94  *                      (at this point, other threads may or may not grab
  95  *                      the lock, and we may or may not reacquire it)
  96  *
  97  *      when blocking:  membar #StoreStore (due to disp_lock_enter())
  98  *                      set CPU_THREAD = (possibly) someone else
  99  *
 100  * (3) A thread blocking in mutex_vector_enter() does the following:
 101  *
 102  *                      set waiters bit
 103  *                      membar #StoreLoad (via membar_enter())
 104  *                      check CPU_THREAD for owner's t_cpu
 105  *                              continue if owner running
 106  *                      membar #LoadLoad (via membar_consumer())
 107  *                      check owner and waiters bit; abort if either changed
 108  *                      block
 109  *
 110  * Thus the global memory orderings for (2) and (3) are as follows:
 111  *
 112  * (2M) mutex_exit() memory order:
 113  *
 114  *                      STORE   CPU_THREAD = owner
 115  *                      LOAD    waiters bit
 116  *                      STORE   owner = NULL
 117  *                      STORE   CPU_THREAD = (possibly) someone else
 118  *
 119  * (3M) mutex_vector_enter() memory order:
 120  *
 121  *                      STORE   waiters bit = 1
 122  *                      LOAD    CPU_THREAD for each CPU
 123  *                      LOAD    owner and waiters bit
 124  *
 125  * It has been verified by exhaustive simulation that all possible global
 126  * memory orderings of (2M) interleaved with (3M) result in correct
 127  * behavior.  Moreover, these ordering constraints are minimal: changing
 128  * the ordering of anything in (2M) or (3M) breaks the algorithm, creating
 129  * windows for missed wakeups.  Note: the possibility that other threads
 130  * may grab the lock after the owner drops it can be factored out of the
 131  * memory ordering analysis because mutex_vector_enter() won't block
 132  * if the lock isn't still owned by the same thread.
 133  *
 134  * The only requirements of code outside the mutex implementation are
 135  * (1) mutex_exit() preemption fixup in interrupt handlers or trap return,
 136  * (2) a membar #StoreLoad after setting CPU_THREAD in resume(),
 137  * (3) mutex_owner_running() preemption fixup in interrupt handlers
 138  * or trap returns.
 139  * Note: idle threads cannot grab adaptive locks (since they cannot block),
 140  * so the membar may be safely omitted when resuming an idle thread.
 141  *
 142  * When a mutex has waiters, mutex_vector_exit() has several options:
 143  *
 144  * (1) Choose a waiter and make that thread the owner before waking it;
 145  *     this is known as "direct handoff" of ownership.
 146  *
 147  * (2) Drop the lock and wake one waiter.
 148  *
 149  * (3) Drop the lock, clear the waiters bit, and wake all waiters.
 150  *
 151  * In many ways (1) is the cleanest solution, but if a lock is moderately
 152  * contended it defeats the adaptive spin logic.  If we make some other
 153  * thread the owner, but it's not ONPROC yet, then all other threads on
 154  * other cpus that try to get the lock will conclude that the owner is
 155  * blocked, so they'll block too.  And so on -- it escalates quickly,
 156  * with every thread taking the blocking path rather than the spin path.
 157  * Thus, direct handoff is *not* a good idea for adaptive mutexes.
 158  *
 159  * Option (2) is the next most natural-seeming option, but it has several
 160  * annoying properties.  If there's more than one waiter, we must preserve
 161  * the waiters bit on an unheld lock.  On cas-capable platforms, where
 162  * the waiters bit is part of the lock word, this means that both 0x0
 163  * and 0x1 represent unheld locks, so we have to cas against *both*.
 164  * Priority inheritance also gets more complicated, because a lock can
 165  * have waiters but no owner to whom priority can be willed.  So while
 166  * it is possible to make option (2) work, it's surprisingly vile.
 167  *
 168  * Option (3), the least-intuitive at first glance, is what we actually do.
 169  * It has the advantage that because you always wake all waiters, you
 170  * never have to preserve the waiters bit.  Waking all waiters seems like
 171  * begging for a thundering herd problem, but consider: under option (2),
 172  * every thread that grabs and drops the lock will wake one waiter -- so
 173  * if the lock is fairly active, all waiters will be awakened very quickly
 174  * anyway.  Moreover, this is how adaptive locks are *supposed* to work.
 175  * The blocking case is rare; the more common case (by 3-4 orders of
 176  * magnitude) is that one or more threads spin waiting to get the lock.
 177  * Only direct handoff can prevent the thundering herd problem, but as
 178  * mentioned earlier, that would tend to defeat the adaptive spin logic.
 179  * In practice, option (3) works well because the blocking case is rare.
 180  */
 181
 182 /*
 183  * delayed lock retry with exponential delay for spin locks
 184  *
 185  * It is noted above that for both the spin locks and the adaptive locks,
 186  * spinning is the dominate mode of operation.  So long as there is only
 187  * one thread waiting on a lock, the naive spin loop works very well in
 188  * cache based architectures.  The lock data structure is pulled into the
 189  * cache of the processor with the waiting/spinning thread and no further
 190  * memory traffic is generated until the lock is released.  Unfortunately,
 191  * once two or more threads are waiting on a lock, the naive spin has
 192  * the property of generating maximum memory traffic from each spinning
 193  * thread as the spinning threads contend for the lock data structure.
 194  *
 195  * By executing a delay loop before retrying a lock, a waiting thread
 196  * can reduce its memory traffic by a large factor, depending on the
 197  * size of the delay loop.  A large delay loop greatly reduced the memory
 198  * traffic, but has the drawback of having a period of time when
 199  * no thread is attempting to gain the lock even though several threads
 200  * might be waiting.  A small delay loop has the drawback of not
 201  * much reduction in memory traffic, but reduces the potential idle time.
 202  * The theory of the exponential delay code is to start with a short
 203  * delay loop and double the waiting time on each iteration, up to
 204  * a preselected maximum.
 205  */
 206
 207 #include <sys/param.h>
 208 #include <sys/time.h>
 209 #include <sys/cpuvar.h>
 210 #include <sys/thread.h>
 211 #include <sys/debug.h>
 212 #include <sys/cmn_err.h>
 213 #include <sys/sobject.h>
 214 #include <sys/turnstile.h>
 215 #include <sys/systm.h>
 216 #include <sys/mutex_impl.h>
 217 #include <sys/spl.h>
 218 #include <sys/lockstat.h>
 219 #include <sys/atomic.h>
 220 #include <sys/cpu.h>
 221 #include <sys/stack.h>
 222 #include <sys/archsystm.h>
 223 #include <sys/machsystm.h>
 224 #include <sys/x_call.h>
 225
 226 /*
 227  * The sobj_ops vector exports a set of functions needed when a thread
 228  * is asleep on a synchronization object of this type.
 229  */
 230 static sobj_ops_t mutex_sobj_ops = {
 231         SOBJ_MUTEX, mutex_owner, turnstile_stay_asleep, turnstile_change_pri
 232 };
 233
 234 /*
 235  * If the system panics on a mutex, save the address of the offending
 236  * mutex in panic_mutex_addr, and save the contents in panic_mutex.
 237  */
 238 static mutex_impl_t panic_mutex;
 239 static mutex_impl_t *panic_mutex_addr;
 240
 241 static void
 242 mutex_panic(char *msg, mutex_impl_t *lp)
 243 {
 244         if (panicstr)
 245                 return;
 246
 247         if (atomic_cas_ptr(&panic_mutex_addr, NULL, lp) == NULL)
 248                 panic_mutex = *lp;
 249
 250         panic("%s, lp=%p owner=%p thread=%p",
 251             msg, (void *)lp, (void *)MUTEX_OWNER(&panic_mutex),
 252             (void *)curthread);
 253 }
 254
 255 /* "tunables" for per-platform backoff constants. */
 256 uint_t mutex_backoff_cap = 0;
 257 ushort_t mutex_backoff_base = MUTEX_BACKOFF_BASE;
 258 ushort_t mutex_cap_factor = MUTEX_CAP_FACTOR;
 259 uchar_t mutex_backoff_shift = MUTEX_BACKOFF_SHIFT;
 260
 261 void
 262 mutex_sync(void)
 263 {
 264         MUTEX_SYNC();
 265 }
 266
 267 /* calculate the backoff interval */
 268 uint_t
 269 default_lock_backoff(uint_t backoff)
 270 {
 271         uint_t cap;             /* backoff cap calculated */
 272
 273         if (backoff == 0) {
 274                 backoff = mutex_backoff_base;
 275                 /* first call just sets the base */
 276                 return (backoff);
 277         }
 278
 279         /* set cap */
 280         if (mutex_backoff_cap == 0) {
 281                 /*
 282                  * For a contended lock, in the worst case a load + cas may
 283                  * be queued  at the controller for each contending CPU.
 284                  * Therefore, to avoid queueing, the accesses for all CPUS must
 285                  * be spread out in time over an interval of (ncpu *
 286                  * cap-factor).  Maximum backoff is set to this value, and
 287                  * actual backoff is a random number from 0 to the current max.
 288                  */
 289                 cap = ncpus_online * mutex_cap_factor;
 290         } else {
 291                 cap = mutex_backoff_cap;
 292         }
 293
 294         /* calculate new backoff value */
 295         backoff <<= mutex_backoff_shift;        /* increase backoff */
 296         if (backoff > cap) {
 297                 if (cap < mutex_backoff_base)
 298                         backoff = mutex_backoff_base;
 299                 else
 300                         backoff = cap;
 301         }
 302
 303         return (backoff);
 304 }
 305
 306 /*
 307  * default delay function for mutexes.
 308  */
 309 void
 310 default_lock_delay(uint_t backoff)
 311 {
 312         ulong_t rnd;            /* random factor */
 313         uint_t cur_backoff;     /* calculated backoff */
 314         uint_t backctr;
 315
 316         /*
 317          * Modify backoff by a random amount to avoid lockstep, and to
 318          * make it probable that some thread gets a small backoff, and
 319          * re-checks quickly
 320          */
 321         rnd = (((long)curthread >> PTR24_LSB) ^ (long)MUTEX_GETTICK());
 322         cur_backoff = (uint_t)(rnd % (backoff - mutex_backoff_base + 1)) +
 323             mutex_backoff_base;
 324
 325         /*
 326          * Delay before trying
 327          * to touch the mutex data structure.
 328          */
 329         for (backctr = cur_backoff; backctr; backctr--) {
 330                 MUTEX_DELAY();
 331         };
 332 }
 333
 334 uint_t (*mutex_lock_backoff)(uint_t) = default_lock_backoff;
 335 void (*mutex_lock_delay)(uint_t) = default_lock_delay;
 336 void (*mutex_delay)(void) = mutex_delay_default;
 337
 338 /*
 339  * mutex_vector_enter() is called from the assembly mutex_enter() routine
 340  * if the lock is held or is not of type MUTEX_ADAPTIVE.
 341  */
 342 void
 343 mutex_vector_enter(mutex_impl_t *lp)
 344 {
 345         kthread_id_t    owner;
 346         kthread_id_t    lastowner = MUTEX_NO_OWNER; /* track owner changes */
 347         hrtime_t        sleep_time = 0; /* how long we slept */
 348         hrtime_t        spin_time = 0;  /* how long we spun */
 349         cpu_t           *cpup;
 350         turnstile_t     *ts;
 351         volatile mutex_impl_t *vlp = (volatile mutex_impl_t *)lp;
 352         uint_t          backoff = 0;    /* current backoff */
 353         int             changecnt = 0;  /* count of owner changes */
 354
 355         ASSERT_STACK_ALIGNED();
 356
 357         if (MUTEX_TYPE_SPIN(lp)) {
 358                 lock_set_spl(&lp->m_spin.m_spinlock, lp->m_spin.m_minspl,
 359                     &lp->m_spin.m_oldspl);
 360                 return;
 361         }
 362
 363         if (!MUTEX_TYPE_ADAPTIVE(lp)) {
 364                 mutex_panic("mutex_enter: bad mutex", lp);
 365                 return;
 366         }
 367
 368         /*
 369          * Adaptive mutexes must not be acquired from above LOCK_LEVEL.
 370          * We can migrate after loading CPU but before checking CPU_ON_INTR,
 371          * so we must verify by disabling preemption and loading CPU again.
 372          */
 373         cpup = CPU;
 374         if (CPU_ON_INTR(cpup) && !panicstr) {
 375                 kpreempt_disable();
 376                 if (CPU_ON_INTR(CPU))
 377                         mutex_panic("mutex_enter: adaptive at high PIL", lp);
 378                 kpreempt_enable();
 379         }
 380
 381         CPU_STATS_ADDQ(cpup, sys, mutex_adenters, 1);
 382
 383         spin_time = LOCKSTAT_START_TIME(LS_MUTEX_ENTER_SPIN);
 384
 385         backoff = mutex_lock_backoff(0);        /* set base backoff */
 386         for (;;) {
 387                 mutex_lock_delay(backoff); /* backoff delay */
 388
 389                 if (panicstr)
 390                         return;
 391
 392                 if ((owner = MUTEX_OWNER(vlp)) == NULL) {
 393                         if (mutex_adaptive_tryenter(lp)) {
 394                                 break;
 395                         }
 396                         /* increase backoff only on failed attempt. */
 397                         backoff = mutex_lock_backoff(backoff);
 398                         changecnt++;
 399                         continue;
 400                 } else if (lastowner != owner) {
 401                         lastowner = owner;
 402                         backoff = mutex_lock_backoff(backoff);
 403                         changecnt++;
 404                 }
 405
 406                 if (changecnt >= ncpus_online) {
 407                         backoff = mutex_lock_backoff(0);
 408                         changecnt = 0;
 409                 }
 410
 411                 if (owner == curthread)
 412                         mutex_panic("recursive mutex_enter", lp);
 413
 414                 /*
 415                  * If lock is held but owner is not yet set, spin.
 416                  * (Only relevant for platforms that don't have cas.)
 417                  */
 418                 if (owner == MUTEX_NO_OWNER)
 419                         continue;
 420
 421                 if (mutex_owner_running(lp) != NULL)  {
 422                         continue;
 423                 }
 424
 425                 /*
 426                  * The owner appears not to be running, so block.
 427                  * See the Big Theory Statement for memory ordering issues.
 428                  */
 429                 ts = turnstile_lookup(lp);
 430                 MUTEX_SET_WAITERS(lp);
 431                 membar_enter();
 432
 433                 /*
 434                  * Recheck whether owner is running after waiters bit hits
 435                  * global visibility (above).  If owner is running, spin.
 436                  */
 437                 if (mutex_owner_running(lp) != NULL) {
 438                         turnstile_exit(lp);
 439                         continue;
 440                 }
 441                 membar_consumer();
 442
 443                 /*
 444                  * If owner and waiters bit are unchanged, block.
 445                  */
 446                 if (MUTEX_OWNER(vlp) == owner && MUTEX_HAS_WAITERS(vlp)) {
 447                         sleep_time -= gethrtime();
 448                         (void) turnstile_block(ts, TS_WRITER_Q, lp,
 449                             &mutex_sobj_ops, NULL, NULL);
 450                         sleep_time += gethrtime();
 451                         /* reset backoff after turnstile */
 452                         backoff = mutex_lock_backoff(0);
 453                 } else {
 454                         turnstile_exit(lp);
 455                 }
 456         }
 457
 458         ASSERT(MUTEX_OWNER(lp) == curthread);
 459
 460         if (sleep_time != 0) {
 461                 /*
 462                  * Note, sleep time is the sum of all the sleeping we
 463                  * did.
 464                  */
 465                 LOCKSTAT_RECORD(LS_MUTEX_ENTER_BLOCK, lp, sleep_time);
 466         }
 467
 468         /* record spin time, don't count sleep time */
 469         if (spin_time != 0) {
 470                 LOCKSTAT_RECORD_TIME(LS_MUTEX_ENTER_SPIN, lp,
 471                     spin_time + sleep_time);
 472         }
 473
 474         LOCKSTAT_RECORD0(LS_MUTEX_ENTER_ACQUIRE, lp);
 475 }
 476
 477 /*
 478  * mutex_vector_tryenter() is called from the assembly mutex_tryenter()
 479  * routine if the lock is held or is not of type MUTEX_ADAPTIVE.
 480  */
 481 int
 482 mutex_vector_tryenter(mutex_impl_t *lp)
 483 {
 484         int s;
 485
 486         if (MUTEX_TYPE_ADAPTIVE(lp))
 487                 return (0);             /* we already tried in assembly */
 488
 489         if (!MUTEX_TYPE_SPIN(lp)) {
 490                 mutex_panic("mutex_tryenter: bad mutex", lp);
 491                 return (0);
 492         }
 493
 494         s = splr(lp->m_spin.m_minspl);
 495         if (lock_try(&lp->m_spin.m_spinlock)) {
 496                 lp->m_spin.m_oldspl = (ushort_t)s;
 497                 return (1);
 498         }
 499         splx(s);
 500         return (0);
 501 }
 502
 503 /*
 504  * mutex_vector_exit() is called from mutex_exit() if the lock is not
 505  * adaptive, has waiters, or is not owned by the current thread (panic).
 506  */
 507 void
 508 mutex_vector_exit(mutex_impl_t *lp)
 509 {
 510         turnstile_t *ts;
 511
 512         if (MUTEX_TYPE_SPIN(lp)) {
 513                 lock_clear_splx(&lp->m_spin.m_spinlock, lp->m_spin.m_oldspl);
 514                 return;
 515         }
 516
 517         if (MUTEX_OWNER(lp) != curthread) {
 518                 mutex_panic("mutex_exit: not owner", lp);
 519                 return;
 520         }
 521
 522         ts = turnstile_lookup(lp);
 523         MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
 524         if (ts == NULL)
 525                 turnstile_exit(lp);
 526         else
 527                 turnstile_wakeup(ts, TS_WRITER_Q, ts->ts_waiters, NULL);
 528         LOCKSTAT_RECORD0(LS_MUTEX_EXIT_RELEASE, lp);
 529 }
 530
 531 int
 532 mutex_owned(const kmutex_t *mp)
 533 {
 534         const mutex_impl_t *lp = (const mutex_impl_t *)mp;
 535
 536         if (panicstr || quiesce_active)
 537                 return (1);
 538
 539         if (MUTEX_TYPE_ADAPTIVE(lp))
 540                 return (MUTEX_OWNER(lp) == curthread);
 541         return (LOCK_HELD(&lp->m_spin.m_spinlock));
 542 }
 543
 544 kthread_t *
 545 mutex_owner(const kmutex_t *mp)
 546 {
 547         const mutex_impl_t *lp = (const mutex_impl_t *)mp;
 548         kthread_id_t t;
 549
 550         if (MUTEX_TYPE_ADAPTIVE(lp) && (t = MUTEX_OWNER(lp)) != MUTEX_NO_OWNER)
 551                 return (t);
 552         return (NULL);
 553 }
 554
 555 /*
 556  * The iblock cookie 'ibc' is the spl level associated with the lock;
 557  * this alone determines whether the lock will be ADAPTIVE or SPIN.
 558  *
 559  * Adaptive mutexes created in zeroed memory do not need to call
 560  * mutex_init() as their allocation in this fashion guarantees
 561  * their initialization.
 562  *   eg adaptive mutexes created as static within the BSS or allocated
 563  *      by kmem_zalloc().
 564  */
 565 /* ARGSUSED */
 566 void
 567 mutex_init(kmutex_t *mp, char *name, kmutex_type_t type, void *ibc)
 568 {
 569         mutex_impl_t *lp = (mutex_impl_t *)mp;
 570
 571         ASSERT(ibc < (void *)KERNELBASE);       /* see 1215173 */
 572
 573         if ((intptr_t)ibc > ipltospl(LOCK_LEVEL) && ibc < (void *)KERNELBASE) {
 574                 ASSERT(type != MUTEX_ADAPTIVE && type != MUTEX_DEFAULT);
 575                 MUTEX_SET_TYPE(lp, MUTEX_SPIN);
 576                 LOCK_INIT_CLEAR(&lp->m_spin.m_spinlock);
 577                 LOCK_INIT_HELD(&lp->m_spin.m_dummylock);
 578                 lp->m_spin.m_minspl = (int)(intptr_t)ibc;
 579         } else {
 580 #ifdef MUTEX_ALIGN
 581                 static int misalign_cnt = 0;
 582
 583                 if (((uintptr_t)lp & (uintptr_t)(MUTEX_ALIGN - 1)) &&
 584                     (misalign_cnt < MUTEX_ALIGN_WARNINGS)) {
 585                         /*
 586                          * The mutex is not aligned and may cross a cache line.
 587                          * This is not supported and may cause a panic.
 588                          * Show a warning that the mutex is not aligned
 589                          * and attempt to identify the origin.
 590                          * Unaligned mutexes are not (supposed to be)
 591                          * possible on SPARC.
 592                          */
 593                         char *funcname;
 594                         ulong_t offset = 0;
 595
 596                         funcname = modgetsymname((uintptr_t)caller(), &offset);
 597                         cmn_err(CE_WARN, "mutex_init: %p is not %d byte "
 598                             "aligned; caller %s+%lx in module %s. "
 599                             "This is unsupported and may cause a panic. "
 600                             "Please report this to the kernel module supplier.",
 601                             (void *)lp, MUTEX_ALIGN,
 602                             funcname ? funcname : "unknown", offset,
 603                             mod_containing_pc(caller()));
 604                         misalign_cnt++;
 605                         if (misalign_cnt >= MUTEX_ALIGN_WARNINGS) {
 606                                 cmn_err(CE_WARN, "mutex_init: further unaligned"
 607                                     " mutex warnings will be suppressed.");
 608                         }
 609                 }
 610 #endif  /* MUTEX_ALIGN */
 611                 ASSERT(type != MUTEX_SPIN);
 612
 613                 MUTEX_SET_TYPE(lp, MUTEX_ADAPTIVE);
 614                 MUTEX_CLEAR_LOCK_AND_WAITERS(lp);
 615         }
 616 }
 617
 618 void
 619 mutex_destroy(kmutex_t *mp)
 620 {
 621         mutex_impl_t *lp = (mutex_impl_t *)mp;
 622
 623         if (lp->m_owner == 0 && !MUTEX_HAS_WAITERS(lp)) {
 624                 MUTEX_DESTROY(lp);
 625         } else if (MUTEX_TYPE_SPIN(lp)) {
 626                 LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
 627                 MUTEX_DESTROY(lp);
 628         } else if (MUTEX_TYPE_ADAPTIVE(lp)) {
 629                 LOCKSTAT_RECORD0(LS_MUTEX_DESTROY_RELEASE, lp);
 630                 if (MUTEX_OWNER(lp) != curthread)
 631                         mutex_panic("mutex_destroy: not owner", lp);
 632                 if (MUTEX_HAS_WAITERS(lp)) {
 633                         turnstile_t *ts = turnstile_lookup(lp);
 634                         turnstile_exit(lp);
 635                         if (ts != NULL)
 636                                 mutex_panic("mutex_destroy: has waiters", lp);
 637                 }
 638                 MUTEX_DESTROY(lp);
 639         } else {
 640                 mutex_panic("mutex_destroy: bad mutex", lp);
 641         }
 642 }
 643
 644 /*
 645  * Simple C support for the cases where spin locks miss on the first try.
 646  */
 647 void
 648 lock_set_spin(lock_t *lp)
 649 {
 650         int loop_count = 0;
 651         uint_t backoff = 0;     /* current backoff */
 652         hrtime_t spin_time = 0; /* how long we spun */
 653
 654         if (panicstr)
 655                 return;
 656
 657         if (ncpus == 1)
 658                 panic("lock_set: %p lock held and only one CPU", (void *)lp);
 659
 660         spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPIN);
 661
 662         while (LOCK_HELD(lp) || !lock_spin_try(lp)) {
 663                 if (panicstr)
 664                         return;
 665                 loop_count++;
 666
 667                 if (ncpus_online == loop_count) {
 668                         backoff = mutex_lock_backoff(0);
 669                         loop_count = 0;
 670                 } else {
 671                         backoff = mutex_lock_backoff(backoff);
 672                 }
 673                 mutex_lock_delay(backoff);
 674         }
 675
 676         LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPIN, lp, spin_time);
 677
 678         LOCKSTAT_RECORD0(LS_LOCK_SET_ACQUIRE, lp);
 679 }
 680
 681 void
 682 lock_set_spl_spin(lock_t *lp, int new_pil, ushort_t *old_pil_addr, int old_pil)
 683 {
 684         int loop_count = 0;
 685         uint_t backoff = 0;     /* current backoff */
 686         hrtime_t spin_time = 0; /* how long we spun */
 687
 688         if (panicstr)
 689                 return;
 690
 691         if (ncpus == 1)
 692                 panic("lock_set_spl: %p lock held and only one CPU",
 693                     (void *)lp);
 694
 695         ASSERT(new_pil > LOCK_LEVEL);
 696
 697         spin_time = LOCKSTAT_START_TIME(LS_LOCK_SET_SPL_SPIN);
 698
 699         do {
 700                 splx(old_pil);
 701                 while (LOCK_HELD(lp)) {
 702                         loop_count++;
 703
 704                         if (panicstr) {
 705                                 *old_pil_addr = (ushort_t)splr(new_pil);
 706                                 return;
 707                         }
 708                         if (ncpus_online == loop_count) {
 709                                 backoff = mutex_lock_backoff(0);
 710                                 loop_count = 0;
 711                         } else {
 712                                 backoff = mutex_lock_backoff(backoff);
 713                         }
 714                         mutex_lock_delay(backoff);
 715                 }
 716                 old_pil = splr(new_pil);
 717         } while (!lock_spin_try(lp));
 718
 719         *old_pil_addr = (ushort_t)old_pil;
 720
 721         LOCKSTAT_RECORD_TIME(LS_LOCK_SET_SPL_SPIN, lp, spin_time);
 722
 723         LOCKSTAT_RECORD0(LS_LOCK_SET_SPL_ACQUIRE, lp);
 724 }