sys/kern/kern_mutex.c

   1 /*
   2  * Copyright (c) 2009 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34 /*
  35  * Implement fast persistent locks based on atomic_cmpset_int() with
  36  * semantics similar to lockmgr locks but faster and taking up much less
  37  * space.  Taken from HAMMER's lock implementation.
  38  *
  39  * These are meant to complement our LWKT tokens.  Tokens are only held
  40  * while the thread is running.  Mutexes can be held across blocking
  41  * conditions.
  42  *
  43  * - Exclusive priority over shared to prevent SMP starvation.
  44  * - locks can be aborted (async callback, if any, will be made w/ENOLCK).
  45  * - locks can be asynchronous.
  46  * - synchronous fast path if no blocking occurs (async callback is not
  47  *   made in this case).
  48  *
  49  * Generally speaking any caller-supplied link state must be properly
  50  * initialized before use.
  51  *
  52  * Most of the support is in sys/mutex[2].h.  We mostly provide backoff
  53  * functions here.
  54  */
  55
  56 #include <sys/param.h>
  57 #include <sys/systm.h>
  58 #include <sys/kernel.h>
  59 #include <sys/sysctl.h>
  60 #include <sys/thread.h>
  61
  62 #include <machine/cpufunc.h>
  63
  64 #include <sys/thread2.h>
  65 #include <sys/mutex2.h>
  66
  67 static int mtx_chain_link_ex(mtx_t *mtx, u_int olock);
  68 static int mtx_chain_link_sh(mtx_t *mtx, u_int olock, int addcount);
  69 static void mtx_delete_link(mtx_t *mtx, mtx_link_t *link);
  70
  71 /*
  72  * Exclusive-lock a mutex, block until acquired unless link is async.
  73  * Recursion is allowed.
  74  *
  75  * Returns 0 on success, the tsleep() return code on failure, EINPROGRESS
  76  * if async.  If immediately successful an async exclusive lock will return 0
  77  * and not issue the async callback or link the link structure.  The caller
  78  * must handle this case (typically this is an optimal code path).
  79  *
  80  * A tsleep() error can only be returned if PCATCH is specified in the flags.
  81  */
  82 static __inline int
  83 __mtx_lock_ex(mtx_t *mtx, mtx_link_t *link, int flags, int to)
  84 {
  85         thread_t td;
  86         u_int   lock;
  87         u_int   nlock;
  88         int     error;
  89         int     isasync;
  90
  91         for (;;) {
  92                 lock = mtx->mtx_lock;
  93                 cpu_ccfence();
  94
  95                 if (lock == 0) {
  96                         nlock = MTX_EXCLUSIVE | 1;
  97                         if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
  98                                 mtx->mtx_owner = curthread;
  99                                 link->state = MTX_LINK_ACQUIRED;
 100                                 error = 0;
 101                                 break;
 102                         }
 103                         continue;
 104                 }
 105                 if ((lock & MTX_EXCLUSIVE) && mtx->mtx_owner == curthread) {
 106                         KKASSERT((lock & MTX_MASK) != MTX_MASK);
 107                         nlock = lock + 1;
 108                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
 109                                 link->state = MTX_LINK_ACQUIRED;
 110                                 error = 0;
 111                                 break;
 112                         }
 113                         continue;
 114                 }
 115
 116                 /*
 117                  * We need MTX_LINKSPIN to manipulate exlink or
 118                  * shlink.
 119                  *
 120                  * We must set MTX_EXWANTED with MTX_LINKSPIN to indicate
 121                  * pending shared requests.  It cannot be set as a separate
 122                  * operation prior to acquiring MTX_LINKSPIN.
 123                  *
 124                  * To avoid unnecessary cpu cache traffic we poll
 125                  * for collisions.  It is also possible that EXWANTED
 126                  * state failing the above test was spurious, so all the
 127                  * tests must be repeated if we cannot obtain LINKSPIN
 128                  * with the prior state tests intact (i.e. don't reload
 129                  * the (lock) variable here, for heaven's sake!).
 130                  */
 131                 if (lock & MTX_LINKSPIN) {
 132                         cpu_pause();
 133                         continue;
 134                 }
 135                 td = curthread;
 136                 nlock = lock | MTX_EXWANTED | MTX_LINKSPIN;
 137                 ++td->td_critcount;
 138                 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
 139                         --td->td_critcount;
 140                         continue;
 141                 }
 142
 143                 /*
 144                  * Check for early abort.
 145                  */
 146                 if (link->state == MTX_LINK_ABORTED) {
 147                         if (mtx->mtx_exlink == NULL) {
 148                                 atomic_clear_int(&mtx->mtx_lock,
 149                                                  MTX_LINKSPIN |
 150                                                  MTX_EXWANTED);
 151                         } else {
 152                                 atomic_clear_int(&mtx->mtx_lock,
 153                                                  MTX_LINKSPIN);
 154                         }
 155                         --td->td_critcount;
 156                         link->state = MTX_LINK_IDLE;
 157                         error = ENOLCK;
 158                         break;
 159                 }
 160
 161                 /*
 162                  * Add our link to the exlink list and release LINKSPIN.
 163                  */
 164                 link->owner = td;
 165                 link->state = MTX_LINK_LINKED_EX;
 166                 if (mtx->mtx_exlink) {
 167                         link->next = mtx->mtx_exlink;
 168                         link->prev = link->next->prev;
 169                         link->next->prev = link;
 170                         link->prev->next = link;
 171                 } else {
 172                         link->next = link;
 173                         link->prev = link;
 174                         mtx->mtx_exlink = link;
 175                 }
 176                 isasync = (link->callback != NULL);
 177                 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
 178                 --td->td_critcount;
 179
 180                 /*
 181                  * If asynchronous lock request return without
 182                  * blocking, leave link structure linked.
 183                  */
 184                 if (isasync) {
 185                         error = EINPROGRESS;
 186                         break;
 187                 }
 188
 189                 /*
 190                  * Wait for lock
 191                  */
 192                 error = mtx_wait_link(mtx, link, flags, to);
 193                 break;
 194         }
 195         return (error);
 196 }
 197
 198 int
 199 _mtx_lock_ex_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
 200 {
 201         return(__mtx_lock_ex(mtx, link, flags, to));
 202 }
 203
 204 int
 205 _mtx_lock_ex(mtx_t *mtx, int flags, int to)
 206 {
 207         mtx_link_t link;
 208
 209         mtx_link_init(&link);
 210         return(__mtx_lock_ex(mtx, &link, flags, to));
 211 }
 212
 213 int
 214 _mtx_lock_ex_quick(mtx_t *mtx)
 215 {
 216         mtx_link_t link;
 217
 218         mtx_link_init(&link);
 219         return(__mtx_lock_ex(mtx, &link, 0, 0));
 220 }
 221
 222 /*
 223  * Share-lock a mutex, block until acquired.  Recursion is allowed.
 224  *
 225  * Returns 0 on success, or the tsleep() return code on failure.
 226  * An error can only be returned if PCATCH is specified in the flags.
 227  *
 228  * NOTE: Shared locks get a mass-wakeup so if the tsleep fails we
 229  *       do not have to chain the wakeup().
 230  */
 231 static __inline int
 232 __mtx_lock_sh(mtx_t *mtx, mtx_link_t *link, int flags, int to)
 233 {
 234         thread_t td;
 235         u_int   lock;
 236         u_int   nlock;
 237         int     error;
 238         int     isasync;
 239
 240         for (;;) {
 241                 lock = mtx->mtx_lock;
 242                 cpu_ccfence();
 243
 244                 if (lock == 0) {
 245                         nlock = 1;
 246                         if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
 247                                 error = 0;
 248                                 link->state = MTX_LINK_ACQUIRED;
 249                                 break;
 250                         }
 251                         continue;
 252                 }
 253                 if ((lock & (MTX_EXCLUSIVE | MTX_EXWANTED)) == 0) {
 254                         KKASSERT((lock & MTX_MASK) != MTX_MASK);
 255                         nlock = lock + 1;
 256                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
 257                                 error = 0;
 258                                 link->state = MTX_LINK_ACQUIRED;
 259                                 break;
 260                         }
 261                         continue;
 262                 }
 263
 264                 /*
 265                  * We need MTX_LINKSPIN to manipulate exlink or
 266                  * shlink.
 267                  *
 268                  * We must set MTX_SHWANTED with MTX_LINKSPIN to indicate
 269                  * pending shared requests.  It cannot be set as a separate
 270                  * operation prior to acquiring MTX_LINKSPIN.
 271                  *
 272                  * To avoid unnecessary cpu cache traffic we poll
 273                  * for collisions.  It is also possible that EXWANTED
 274                  * state failing the above test was spurious, so all the
 275                  * tests must be repeated if we cannot obtain LINKSPIN
 276                  * with the prior state tests intact (i.e. don't reload
 277                  * the (lock) variable here, for heaven's sake!).
 278                  */
 279                 if (lock & MTX_LINKSPIN) {
 280                         cpu_pause();
 281                         continue;
 282                 }
 283                 td = curthread;
 284                 nlock = lock | MTX_SHWANTED | MTX_LINKSPIN;
 285                 ++td->td_critcount;
 286                 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock) == 0) {
 287                         --td->td_critcount;
 288                         continue;
 289                 }
 290
 291                 /*
 292                  * Check for early abort.
 293                  */
 294                 if (link->state == MTX_LINK_ABORTED) {
 295                         if (mtx->mtx_exlink == NULL) {
 296                                 atomic_clear_int(&mtx->mtx_lock,
 297                                                  MTX_LINKSPIN |
 298                                                  MTX_SHWANTED);
 299                         } else {
 300                                 atomic_clear_int(&mtx->mtx_lock,
 301                                                  MTX_LINKSPIN);
 302                         }
 303                         --td->td_critcount;
 304                         link->state = MTX_LINK_IDLE;
 305                         error = ENOLCK;
 306                         break;
 307                 }
 308
 309                 /*
 310                  * Add our link to the exlink list and release LINKSPIN.
 311                  */
 312                 link->owner = td;
 313                 link->state = MTX_LINK_LINKED_SH;
 314                 if (mtx->mtx_shlink) {
 315                         link->next = mtx->mtx_shlink;
 316                         link->prev = link->next->prev;
 317                         link->next->prev = link;
 318                         link->prev->next = link;
 319                 } else {
 320                         link->next = link;
 321                         link->prev = link;
 322                         mtx->mtx_shlink = link;
 323                 }
 324                 isasync = (link->callback != NULL);
 325                 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN);
 326                 --td->td_critcount;
 327
 328                 /*
 329                  * If asynchronous lock request return without
 330                  * blocking, leave link structure linked.
 331                  */
 332                 if (isasync) {
 333                         error = EINPROGRESS;
 334                         break;
 335                 }
 336
 337                 /*
 338                  * Wait for lock
 339                  */
 340                 error = mtx_wait_link(mtx, link, flags, to);
 341                 break;
 342         }
 343         return (error);
 344 }
 345
 346 int
 347 _mtx_lock_sh_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
 348 {
 349         return(__mtx_lock_sh(mtx, link, flags, to));
 350 }
 351
 352 int
 353 _mtx_lock_sh(mtx_t *mtx, int flags, int to)
 354 {
 355         mtx_link_t link;
 356
 357         mtx_link_init(&link);
 358         return(__mtx_lock_sh(mtx, &link, flags, to));
 359 }
 360
 361 int
 362 _mtx_lock_sh_quick(mtx_t *mtx)
 363 {
 364         mtx_link_t link;
 365
 366         mtx_link_init(&link);
 367         return(__mtx_lock_sh(mtx, &link, 0, 0));
 368 }
 369
 370 /*
 371  * Get an exclusive spinlock the hard way.
 372  */
 373 void
 374 _mtx_spinlock(mtx_t *mtx)
 375 {
 376         u_int   lock;
 377         u_int   nlock;
 378         int     bb = 1;
 379         int     bo;
 380
 381         for (;;) {
 382                 lock = mtx->mtx_lock;
 383                 if (lock == 0) {
 384                         nlock = MTX_EXCLUSIVE | 1;
 385                         if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
 386                                 mtx->mtx_owner = curthread;
 387                                 break;
 388                         }
 389                 } else if ((lock & MTX_EXCLUSIVE) &&
 390                            mtx->mtx_owner == curthread) {
 391                         KKASSERT((lock & MTX_MASK) != MTX_MASK);
 392                         nlock = lock + 1;
 393                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
 394                                 break;
 395                 } else {
 396                         /* MWAIT here */
 397                         if (bb < 1000)
 398                                 ++bb;
 399                         cpu_pause();
 400                         for (bo = 0; bo < bb; ++bo)
 401                                 ;
 402                 }
 403                 cpu_pause();
 404         }
 405 }
 406
 407 /*
 408  * Attempt to acquire a spinlock, if we fail we must undo the
 409  * gd->gd_spinlocks/gd->gd_curthead->td_critcount predisposition.
 410  *
 411  * Returns 0 on success, EAGAIN on failure.
 412  */
 413 int
 414 _mtx_spinlock_try(mtx_t *mtx)
 415 {
 416         globaldata_t gd = mycpu;
 417         u_int   lock;
 418         u_int   nlock;
 419         int     res = 0;
 420
 421         for (;;) {
 422                 lock = mtx->mtx_lock;
 423                 if (lock == 0) {
 424                         nlock = MTX_EXCLUSIVE | 1;
 425                         if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
 426                                 mtx->mtx_owner = gd->gd_curthread;
 427                                 break;
 428                         }
 429                 } else if ((lock & MTX_EXCLUSIVE) &&
 430                            mtx->mtx_owner == gd->gd_curthread) {
 431                         KKASSERT((lock & MTX_MASK) != MTX_MASK);
 432                         nlock = lock + 1;
 433                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
 434                                 break;
 435                 } else {
 436                         --gd->gd_spinlocks;
 437                         cpu_ccfence();
 438                         --gd->gd_curthread->td_critcount;
 439                         res = EAGAIN;
 440                         break;
 441                 }
 442                 cpu_pause();
 443         }
 444         return res;
 445 }
 446
 447 #if 0
 448
 449 void
 450 _mtx_spinlock_sh(mtx_t *mtx)
 451 {
 452         u_int   lock;
 453         u_int   nlock;
 454         int     bb = 1;
 455         int     bo;
 456
 457         for (;;) {
 458                 lock = mtx->mtx_lock;
 459                 if ((lock & MTX_EXCLUSIVE) == 0) {
 460                         KKASSERT((lock & MTX_MASK) != MTX_MASK);
 461                         nlock = lock + 1;
 462                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
 463                                 break;
 464                 } else {
 465                         /* MWAIT here */
 466                         if (bb < 1000)
 467                                 ++bb;
 468                         cpu_pause();
 469                         for (bo = 0; bo < bb; ++bo)
 470                                 ;
 471                 }
 472                 cpu_pause();
 473         }
 474 }
 475
 476 #endif
 477
 478 int
 479 _mtx_lock_ex_try(mtx_t *mtx)
 480 {
 481         u_int   lock;
 482         u_int   nlock;
 483         int     error;
 484
 485         for (;;) {
 486                 lock = mtx->mtx_lock;
 487                 if (lock == 0) {
 488                         nlock = MTX_EXCLUSIVE | 1;
 489                         if (atomic_cmpset_int(&mtx->mtx_lock, 0, nlock)) {
 490                                 mtx->mtx_owner = curthread;
 491                                 error = 0;
 492                                 break;
 493                         }
 494                 } else if ((lock & MTX_EXCLUSIVE) &&
 495                            mtx->mtx_owner == curthread) {
 496                         KKASSERT((lock & MTX_MASK) != MTX_MASK);
 497                         nlock = lock + 1;
 498                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
 499                                 error = 0;
 500                                 break;
 501                         }
 502                 } else {
 503                         error = EAGAIN;
 504                         break;
 505                 }
 506                 cpu_pause();
 507         }
 508         return (error);
 509 }
 510
 511 int
 512 _mtx_lock_sh_try(mtx_t *mtx)
 513 {
 514         u_int   lock;
 515         u_int   nlock;
 516         int     error = 0;
 517
 518         for (;;) {
 519                 lock = mtx->mtx_lock;
 520                 if ((lock & MTX_EXCLUSIVE) == 0) {
 521                         KKASSERT((lock & MTX_MASK) != MTX_MASK);
 522                         nlock = lock + 1;
 523                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
 524                                 break;
 525                 } else {
 526                         error = EAGAIN;
 527                         break;
 528                 }
 529                 cpu_pause();
 530         }
 531         return (error);
 532 }
 533
 534 /*
 535  * If the lock is held exclusively it must be owned by the caller.  If the
 536  * lock is already a shared lock this operation is a NOP.  A panic will
 537  * occur if the lock is not held either shared or exclusive.
 538  *
 539  * The exclusive count is converted to a shared count.
 540  */
 541 void
 542 _mtx_downgrade(mtx_t *mtx)
 543 {
 544         u_int   lock;
 545         u_int   nlock;
 546
 547         for (;;) {
 548                 lock = mtx->mtx_lock;
 549                 cpu_ccfence();
 550
 551                 /*
 552                  * NOP if already shared.
 553                  */
 554                 if ((lock & MTX_EXCLUSIVE) == 0) {
 555                         KKASSERT((lock & MTX_MASK) > 0);
 556                         break;
 557                 }
 558
 559                 /*
 560                  * Transfer count to shared.  Any additional pending shared
 561                  * waiters must be woken up.
 562                  */
 563                 if (lock & MTX_SHWANTED) {
 564                         if (mtx_chain_link_sh(mtx, lock, 1))
 565                                 break;
 566                         /* retry */
 567                 } else {
 568                         nlock = lock & ~MTX_EXCLUSIVE;
 569                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
 570                                 break;
 571                         /* retry */
 572                 }
 573                 cpu_pause();
 574         }
 575 }
 576
 577 /*
 578  * Upgrade a shared lock to an exclusive lock.  The upgrade will fail if
 579  * the shared lock has a count other then 1.  Optimize the most likely case
 580  * but note that a single cmpset can fail due to WANTED races.
 581  *
 582  * If the lock is held exclusively it must be owned by the caller and
 583  * this function will simply return without doing anything.   A panic will
 584  * occur if the lock is held exclusively by someone other then the caller.
 585  *
 586  * Returns 0 on success, EDEADLK on failure.
 587  */
 588 int
 589 _mtx_upgrade_try(mtx_t *mtx)
 590 {
 591         u_int   lock;
 592         u_int   nlock;
 593         int     error = 0;
 594
 595         for (;;) {
 596                 lock = mtx->mtx_lock;
 597
 598                 if ((lock & ~MTX_EXWANTED) == 1) {
 599                         nlock = lock | MTX_EXCLUSIVE;
 600                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock)) {
 601                                 mtx->mtx_owner = curthread;
 602                                 break;
 603                         }
 604                 } else if (lock & MTX_EXCLUSIVE) {
 605                         KKASSERT(mtx->mtx_owner == curthread);
 606                         break;
 607                 } else {
 608                         error = EDEADLK;
 609                         break;
 610                 }
 611                 cpu_pause();
 612         }
 613         return (error);
 614 }
 615
 616 /*
 617  * Unlock a lock.  The caller must hold the lock either shared or exclusive.
 618  *
 619  * On the last release we handle any pending chains.
 620  */
 621 void
 622 _mtx_unlock(mtx_t *mtx)
 623 {
 624         u_int   lock;
 625         u_int   nlock;
 626
 627         for (;;) {
 628                 lock = mtx->mtx_lock;
 629                 cpu_ccfence();
 630
 631                 switch(lock) {
 632                 case MTX_EXCLUSIVE | 1:
 633                         /*
 634                          * Last release, exclusive lock.
 635                          * No exclusive or shared requests pending.
 636                          */
 637                         mtx->mtx_owner = NULL;
 638                         nlock = 0;
 639                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
 640                                 goto done;
 641                         break;
 642                 case MTX_EXCLUSIVE | MTX_EXWANTED | 1:
 643                 case MTX_EXCLUSIVE | MTX_EXWANTED | MTX_SHWANTED | 1:
 644                         /*
 645                          * Last release, exclusive lock.
 646                          * Exclusive requests pending.
 647                          * Exclusive requests have priority over shared reqs.
 648                          */
 649                         if (mtx_chain_link_ex(mtx, lock))
 650                                 goto done;
 651                         break;
 652                 case MTX_EXCLUSIVE | MTX_SHWANTED | 1:
 653                         /*
 654                          * Last release, exclusive lock.
 655                          *
 656                          * Shared requests are pending.  Transfer our count (1)
 657                          * to the first shared request, wakeup all shared reqs.
 658                          */
 659                         if (mtx_chain_link_sh(mtx, lock, 0))
 660                                 goto done;
 661                         break;
 662                 case 1:
 663                         /*
 664                          * Last release, shared lock.
 665                          * No exclusive or shared requests pending.
 666                          */
 667                         nlock = 0;
 668                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
 669                                 goto done;
 670                         break;
 671                 case MTX_EXWANTED | 1:
 672                 case MTX_EXWANTED | MTX_SHWANTED | 1:
 673                         /*
 674                          * Last release, shared lock.
 675                          *
 676                          * Exclusive requests are pending.  Transfer our
 677                          * count (1) to the next exclusive request.
 678                          *
 679                          * Exclusive requests have priority over shared reqs.
 680                          */
 681                         if (mtx_chain_link_ex(mtx, lock))
 682                                 goto done;
 683                         break;
 684                 case MTX_SHWANTED | 1:
 685                         /*
 686                          * Last release, shared lock.
 687                          * Shared requests pending.
 688                          */
 689                         if (mtx_chain_link_sh(mtx, lock, 0))
 690                                 goto done;
 691                         break;
 692                 default:
 693                         /*
 694                          * We have to loop if this is the last release but
 695                          * someone is fiddling with LINKSPIN.
 696                          */
 697                         if ((lock & MTX_MASK) == 1) {
 698                                 KKASSERT(lock & MTX_LINKSPIN);
 699                                 break;
 700                         }
 701
 702                         /*
 703                          * Not the last release (shared or exclusive)
 704                          */
 705                         nlock = lock - 1;
 706                         KKASSERT((nlock & MTX_MASK) != MTX_MASK);
 707                         if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
 708                                 goto done;
 709                         break;
 710                 }
 711                 /* loop try again */
 712                 cpu_pause();
 713         }
 714 done:
 715         ;
 716 }
 717
 718 /*
 719  * Chain pending links.  Called on the last release of an exclusive or
 720  * shared lock when the appropriate WANTED bit is set.  mtx_lock old state
 721  * is passed in with the count left at 1, which we can inherit, and other
 722  * bits which we must adjust in a single atomic operation.
 723  *
 724  * Return non-zero on success, 0 if caller needs to retry.
 725  *
 726  * NOTE: It's ok if MTX_EXWANTED is in an indeterminant state while we are
 727  *       acquiring LINKSPIN as all other cases will also need to acquire
 728  *       LINKSPIN when handling the EXWANTED case.
 729  */
 730 static int
 731 mtx_chain_link_ex(mtx_t *mtx, u_int olock)
 732 {
 733         thread_t td = curthread;
 734         mtx_link_t *link;
 735         u_int   nlock;
 736
 737         olock &= ~MTX_LINKSPIN;
 738         nlock = olock | MTX_LINKSPIN | MTX_EXCLUSIVE;
 739         ++td->td_critcount;
 740         if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
 741                 link = mtx->mtx_exlink;
 742                 KKASSERT(link != NULL);
 743                 if (link->next == link) {
 744                         mtx->mtx_exlink = NULL;
 745                         nlock = MTX_LINKSPIN | MTX_EXWANTED;    /* to clear */
 746                 } else {
 747                         mtx->mtx_exlink = link->next;
 748                         link->next->prev = link->prev;
 749                         link->prev->next = link->next;
 750                         nlock = MTX_LINKSPIN;                   /* to clear */
 751                 }
 752                 KKASSERT(link->state == MTX_LINK_LINKED_EX);
 753                 mtx->mtx_owner = link->owner;
 754                 cpu_sfence();
 755
 756                 /*
 757                  * WARNING! The callback can only be safely
 758                  *          made with LINKSPIN still held
 759                  *          and in a critical section.
 760                  *
 761                  * WARNING! The link can go away after the
 762                  *          state is set, or after the
 763                  *          callback.
 764                  */
 765                 if (link->callback) {
 766                         link->state = MTX_LINK_CALLEDBACK;
 767                         link->callback(link, link->arg, 0);
 768                 } else {
 769                         link->state = MTX_LINK_ACQUIRED;
 770                         wakeup(link);
 771                 }
 772                 atomic_clear_int(&mtx->mtx_lock, nlock);
 773                 --td->td_critcount;
 774                 return 1;
 775         }
 776         /* retry */
 777         --td->td_critcount;
 778         return 0;
 779 }
 780
 781 /*
 782  * Flush waiting shared locks.  The lock's prior state is passed in and must
 783  * be adjusted atomically only if it matches.
 784  *
 785  * If addcount is 0, the count for the first shared lock in the chain is
 786  * assumed to have already been accounted for.
 787  *
 788  * If addcount is 1, the count for the first shared lock in the chain has
 789  * not yet been accounted for.
 790  */
 791 static int
 792 mtx_chain_link_sh(mtx_t *mtx, u_int olock, int addcount)
 793 {
 794         thread_t td = curthread;
 795         mtx_link_t *link;
 796         u_int   nlock;
 797
 798         olock &= ~MTX_LINKSPIN;
 799         nlock = olock | MTX_LINKSPIN;
 800         nlock &= ~MTX_EXCLUSIVE;
 801         ++td->td_critcount;
 802         if (atomic_cmpset_int(&mtx->mtx_lock, olock, nlock)) {
 803                 KKASSERT(mtx->mtx_shlink != NULL);
 804                 for (;;) {
 805                         link = mtx->mtx_shlink;
 806                         atomic_add_int(&mtx->mtx_lock, addcount);
 807                         KKASSERT(link->state == MTX_LINK_LINKED_SH);
 808                         if (link->next == link) {
 809                                 mtx->mtx_shlink = NULL;
 810                                 cpu_sfence();
 811
 812                                 /*
 813                                  * WARNING! The callback can only be safely
 814                                  *          made with LINKSPIN still held
 815                                  *          and in a critical section.
 816                                  *
 817                                  * WARNING! The link can go away after the
 818                                  *          state is set, or after the
 819                                  *          callback.
 820                                  */
 821                                 if (link->callback) {
 822                                         link->state = MTX_LINK_CALLEDBACK;
 823                                         link->callback(link, link->arg, 0);
 824                                 } else {
 825                                         link->state = MTX_LINK_ACQUIRED;
 826                                         wakeup(link);
 827                                 }
 828                                 break;
 829                         }
 830                         mtx->mtx_shlink = link->next;
 831                         link->next->prev = link->prev;
 832                         link->prev->next = link->next;
 833                         cpu_sfence();
 834                         link->state = MTX_LINK_ACQUIRED;
 835                         /* link can go away */
 836                         wakeup(link);
 837                         addcount = 1;
 838                 }
 839                 atomic_clear_int(&mtx->mtx_lock, MTX_LINKSPIN |
 840                                                  MTX_SHWANTED);
 841                 --td->td_critcount;
 842                 return 1;
 843         }
 844         /* retry */
 845         --td->td_critcount;
 846         return 0;
 847 }
 848
 849 /*
 850  * Delete a link structure after tsleep has failed.  This code is not
 851  * in the critical path as most exclusive waits are chained.
 852  */
 853 static
 854 void
 855 mtx_delete_link(mtx_t *mtx, mtx_link_t *link)
 856 {
 857         thread_t td = curthread;
 858         u_int   lock;
 859         u_int   nlock;
 860
 861         /*
 862          * Acquire MTX_LINKSPIN.
 863          *
 864          * Do not use cmpxchg to wait for LINKSPIN to clear as this might
 865          * result in too much cpu cache traffic.
 866          */
 867         ++td->td_critcount;
 868         for (;;) {
 869                 lock = mtx->mtx_lock;
 870                 if (lock & MTX_LINKSPIN) {
 871                         cpu_pause();
 872                         continue;
 873                 }
 874                 nlock = lock | MTX_LINKSPIN;
 875                 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
 876                         break;
 877                 cpu_pause();
 878         }
 879
 880         /*
 881          * Delete the link and release LINKSPIN.
 882          */
 883         nlock = MTX_LINKSPIN;   /* to clear */
 884
 885         switch(link->state) {
 886         case MTX_LINK_LINKED_EX:
 887                 if (link->next == link) {
 888                         mtx->mtx_exlink = NULL;
 889                         nlock |= MTX_EXWANTED;  /* to clear */
 890                 } else {
 891                         mtx->mtx_exlink = link->next;
 892                         link->next->prev = link->prev;
 893                         link->prev->next = link->next;
 894                 }
 895                 break;
 896         case MTX_LINK_LINKED_SH:
 897                 if (link->next == link) {
 898                         mtx->mtx_shlink = NULL;
 899                         nlock |= MTX_SHWANTED;  /* to clear */
 900                 } else {
 901                         mtx->mtx_shlink = link->next;
 902                         link->next->prev = link->prev;
 903                         link->prev->next = link->next;
 904                 }
 905                 break;
 906         default:
 907                 /* no change */
 908                 break;
 909         }
 910         atomic_clear_int(&mtx->mtx_lock, nlock);
 911         --td->td_critcount;
 912 }
 913
 914 /*
 915  * Wait for async lock completion or abort.  Returns ENOLCK if an abort
 916  * occurred.
 917  */
 918 int
 919 mtx_wait_link(mtx_t *mtx, mtx_link_t *link, int flags, int to)
 920 {
 921         int error;
 922
 923         /*
 924          * Sleep.  Handle false wakeups, interruptions, etc.
 925          * The link may also have been aborted.
 926          */
 927         error = 0;
 928         while (link->state & MTX_LINK_LINKED) {
 929                 tsleep_interlock(link, 0);
 930                 cpu_lfence();
 931                 if (link->state & MTX_LINK_LINKED) {
 932                         if (link->state & MTX_LINK_LINKED_SH)
 933                                 mycpu->gd_cnt.v_lock_name[0] = 'S';
 934                         else
 935                                 mycpu->gd_cnt.v_lock_name[0] = 'X';
 936                         strncpy(mycpu->gd_cnt.v_lock_name + 1,
 937                                 mtx->mtx_ident,
 938                                 sizeof(mycpu->gd_cnt.v_lock_name) - 2);
 939                         ++mycpu->gd_cnt.v_lock_colls;
 940
 941                         error = tsleep(link, flags | PINTERLOCKED,
 942                                        mtx->mtx_ident, to);
 943                         if (error)
 944                                 break;
 945                 }
 946         }
 947
 948         /*
 949          * We are done, make sure the link structure is unlinked.
 950          * It may still be on the list due to e.g. EINTR or
 951          * EWOULDBLOCK.
 952          *
 953          * It is possible for the tsleep to race an ABORT and cause
 954          * error to be 0.
 955          *
 956          * The tsleep() can be woken up for numerous reasons and error
 957          * might be zero in situations where we intend to return an error.
 958          *
 959          * (This is the synchronous case so state cannot be CALLEDBACK)
 960          */
 961         switch(link->state) {
 962         case MTX_LINK_ACQUIRED:
 963         case MTX_LINK_CALLEDBACK:
 964                 error = 0;
 965                 break;
 966         case MTX_LINK_ABORTED:
 967                 error = ENOLCK;
 968                 break;
 969         case MTX_LINK_LINKED_EX:
 970         case MTX_LINK_LINKED_SH:
 971                 mtx_delete_link(mtx, link);
 972                 /* fall through */
 973         default:
 974                 if (error == 0)
 975                         error = EWOULDBLOCK;
 976                 break;
 977         }
 978
 979         /*
 980          * Clear state on status returned.
 981          */
 982         link->state = MTX_LINK_IDLE;
 983
 984         return error;
 985 }
 986
 987 /*
 988  * Abort a mutex locking operation, causing mtx_lock_ex_link() to
 989  * return ENOLCK.  This may be called at any time after the mtx_link
 990  * is initialized or the status from a previous lock has been
 991  * returned.  If called prior to the next (non-try) lock attempt, the
 992  * next lock attempt using this link structure will abort instantly.
 993  *
 994  * Caller must still wait for the operation to complete, either from a
 995  * blocking call that is still in progress or by calling mtx_wait_link().
 996  *
 997  * If an asynchronous lock request is possibly in-progress, the caller
 998  * should call mtx_wait_link() synchronously.  Note that the asynchronous
 999  * lock callback will NOT be called if a successful abort occurred. XXX
1000  */
1001 void
1002 mtx_abort_link(mtx_t *mtx, mtx_link_t *link)
1003 {
1004         thread_t td = curthread;
1005         u_int   lock;
1006         u_int   nlock;
1007
1008         /*
1009          * Acquire MTX_LINKSPIN
1010          */
1011         ++td->td_critcount;
1012         for (;;) {
1013                 lock = mtx->mtx_lock;
1014                 if (lock & MTX_LINKSPIN) {
1015                         cpu_pause();
1016                         continue;
1017                 }
1018                 nlock = lock | MTX_LINKSPIN;
1019                 if (atomic_cmpset_int(&mtx->mtx_lock, lock, nlock))
1020                         break;
1021                 cpu_pause();
1022         }
1023
1024         /*
1025          * Do the abort.
1026          *
1027          * WARNING! Link structure can disappear once link->state is set.
1028          */
1029         nlock = MTX_LINKSPIN;   /* to clear */
1030
1031         switch(link->state) {
1032         case MTX_LINK_IDLE:
1033                 /*
1034                  * Link not started yet
1035                  */
1036                 link->state = MTX_LINK_ABORTED;
1037                 break;
1038         case MTX_LINK_LINKED_EX:
1039                 /*
1040                  * de-link, mark aborted, and potentially wakeup the thread
1041                  * or issue the callback.
1042                  */
1043                 if (link->next == link) {
1044                         if (mtx->mtx_exlink == link) {
1045                                 mtx->mtx_exlink = NULL;
1046                                 nlock |= MTX_EXWANTED;  /* to clear */
1047                         }
1048                 } else {
1049                         if (mtx->mtx_exlink == link)
1050                                 mtx->mtx_exlink = link->next;
1051                         link->next->prev = link->prev;
1052                         link->prev->next = link->next;
1053                 }
1054
1055                 /*
1056                  * When aborting the async callback is still made.  We must
1057                  * not set the link status to ABORTED in the callback case
1058                  * since there is nothing else to clear its status if the
1059                  * link is reused.
1060                  */
1061                 if (link->callback) {
1062                         link->state = MTX_LINK_CALLEDBACK;
1063                         link->callback(link, link->arg, ENOLCK);
1064                 } else {
1065                         link->state = MTX_LINK_ABORTED;
1066                         wakeup(link);
1067                 }
1068                 break;
1069         case MTX_LINK_LINKED_SH:
1070                 /*
1071                  * de-link, mark aborted, and potentially wakeup the thread
1072                  * or issue the callback.
1073                  */
1074                 if (link->next == link) {
1075                         if (mtx->mtx_shlink == link) {
1076                                 mtx->mtx_shlink = NULL;
1077                                 nlock |= MTX_SHWANTED;  /* to clear */
1078                         }
1079                 } else {
1080                         if (mtx->mtx_shlink == link)
1081                                 mtx->mtx_shlink = link->next;
1082                         link->next->prev = link->prev;
1083                         link->prev->next = link->next;
1084                 }
1085
1086                 /*
1087                  * When aborting the async callback is still made.  We must
1088                  * not set the link status to ABORTED in the callback case
1089                  * since there is nothing else to clear its status if the
1090                  * link is reused.
1091                  */
1092                 if (link->callback) {
1093                         link->state = MTX_LINK_CALLEDBACK;
1094                         link->callback(link, link->arg, ENOLCK);
1095                 } else {
1096                         link->state = MTX_LINK_ABORTED;
1097                         wakeup(link);
1098                 }
1099                 break;
1100         case MTX_LINK_ACQUIRED:
1101         case MTX_LINK_CALLEDBACK:
1102                 /*
1103                  * Too late, the lock was acquired.  Let it complete.
1104                  */
1105                 break;
1106         default:
1107                 /*
1108                  * link already aborted, do nothing.
1109                  */
1110                 break;
1111         }
1112         atomic_clear_int(&mtx->mtx_lock, nlock);
1113         --td->td_critcount;
1114 }