sys/kern/kern_timeout.c

   1 /*
   2  * Copyright (c) 2004,2014,2019-2020 The DragonFly Project.
   3  * All rights reserved.
   4  *
   5  * This code is derived from software contributed to The DragonFly Project
   6  * by Matthew Dillon <dillon@backplane.com>
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  *
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in
  16  *    the documentation and/or other materials provided with the
  17  *    distribution.
  18  * 3. Neither the name of The DragonFly Project nor the names of its
  19  *    contributors may be used to endorse or promote products derived
  20  *    from this software without specific, prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  33  * SUCH DAMAGE.
  34  */
  35 /*
  36  * Copyright (c) 1982, 1986, 1991, 1993
  37  *      The Regents of the University of California.  All rights reserved.
  38  * (c) UNIX System Laboratories, Inc.
  39  * All or some portions of this file are derived from material licensed
  40  * to the University of California by American Telephone and Telegraph
  41  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  42  * the permission of UNIX System Laboratories, Inc.
  43  *
  44  * Redistribution and use in source and binary forms, with or without
  45  * modification, are permitted provided that the following conditions
  46  * are met:
  47  * 1. Redistributions of source code must retain the above copyright
  48  *    notice, this list of conditions and the following disclaimer.
  49  * 2. Redistributions in binary form must reproduce the above copyright
  50  *    notice, this list of conditions and the following disclaimer in the
  51  *    documentation and/or other materials provided with the distribution.
  52  * 3. Neither the name of the University nor the names of its contributors
  53  *    may be used to endorse or promote products derived from this software
  54  *    without specific prior written permission.
  55  *
  56  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  57  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  58  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  59  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  60  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  61  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  62  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  63  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  64  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  65  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  66  * SUCH DAMAGE.
  67  */
  68 /*
  69  * The original callout mechanism was based on the work of Adam M. Costello
  70  * and George Varghese, published in a technical report entitled "Redesigning
  71  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
  72  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
  73  * used in this implementation was published by G. Varghese and T. Lauck in
  74  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
  75  * the Efficient Implementation of a Timer Facility" in the Proceedings of
  76  * the 11th ACM Annual Symposium on Operating Systems Principles,
  77  * Austin, Texas Nov 1987.
  78  */
  79
  80 #include <sys/param.h>
  81 #include <sys/systm.h>
  82 #include <sys/spinlock.h>
  83 #include <sys/callout.h>
  84 #include <sys/kernel.h>
  85 #include <sys/malloc.h>
  86 #include <sys/interrupt.h>
  87 #include <sys/thread.h>
  88 #include <sys/sysctl.h>
  89 #include <sys/exislock.h>
  90 #include <vm/vm_extern.h>
  91 #include <machine/atomic.h>
  92
  93 #include <sys/spinlock2.h>
  94 #include <sys/thread2.h>
  95 #include <sys/mplock2.h>
  96 #include <sys/exislock2.h>
  97
  98 TAILQ_HEAD(colist, _callout);
  99 struct softclock_pcpu;
 100
 101 /*
 102  * DID_INIT     - Sanity check
 103  * PREVENTED    - A callback was prevented
 104  * RESET        - Callout_reset requested
 105  * STOP         - Callout_stop requested
 106  * INPROG       - Softclock_handler thread processing in-progress on callout,
 107  *                queue linkage is indeterminant.  Third parties must queue
 108  *                a STOP or CANCEL and await completion.
 109  * SET          - Callout is linked to queue (if INPROG not set)
 110  * AUTOLOCK     - Lockmgr cancelable interlock (copied from frontend)
 111  * MPSAFE       - Callout is MPSAFE (copied from frontend)
 112  * CANCEL       - callout_cancel requested
 113  * ACTIVE       - active/inactive (frontend only, see documentation).
 114  *                This is *NOT* the same as whether a callout is queued or
 115  *                not.
 116  */
 117 #define CALLOUT_DID_INIT        0x00000001      /* frontend */
 118 #define CALLOUT_PREVENTED       0x00000002      /* backend */
 119 #define CALLOUT_FREELIST        0x00000004      /* backend */
 120 #define CALLOUT_UNUSED0008      0x00000008
 121 #define CALLOUT_UNUSED0010      0x00000010
 122 #define CALLOUT_RESET           0x00000020      /* backend */
 123 #define CALLOUT_STOP            0x00000040      /* backend */
 124 #define CALLOUT_INPROG          0x00000080      /* backend */
 125 #define CALLOUT_SET             0x00000100      /* backend */
 126 #define CALLOUT_AUTOLOCK        0x00000200      /* both */
 127 #define CALLOUT_MPSAFE          0x00000400      /* both */
 128 #define CALLOUT_CANCEL          0x00000800      /* backend */
 129 #define CALLOUT_ACTIVE          0x00001000      /* frontend */
 130
 131 struct wheel {
 132         struct spinlock spin;
 133         struct colist   list;
 134 };
 135
 136 struct softclock_pcpu {
 137         struct wheel    *callwheel;
 138         struct _callout *running;
 139         struct _callout * volatile next;
 140         struct colist   freelist;
 141         int             softticks;      /* softticks index */
 142         int             curticks;       /* per-cpu ticks counter */
 143         int             isrunning;
 144         struct thread   thread;
 145 };
 146
 147 typedef struct softclock_pcpu *softclock_pcpu_t;
 148
 149 static int callout_debug = 0;
 150 SYSCTL_INT(_debug, OID_AUTO, callout_debug, CTLFLAG_RW,
 151            &callout_debug, 0, "");
 152
 153 static MALLOC_DEFINE(M_CALLOUT, "callouts", "softclock callouts");
 154
 155 static int cwheelsize;
 156 static int cwheelmask;
 157 static softclock_pcpu_t softclock_pcpu_ary[MAXCPU];
 158
 159 static void softclock_handler(void *arg);
 160 static void slotimer_callback(void *arg);
 161
 162 /*
 163  * Handle pending requests.  No action can be taken if the callout is still
 164  * flagged INPROG.  Called from softclock for post-processing and from
 165  * various API functions.
 166  *
 167  * This routine does not block in any way.
 168  * Caller must hold c->spin.
 169  *
 170  * NOTE: Flags can be adjusted without holding c->spin, so atomic ops
 171  *       must be used at all times.
 172  *
 173  * NOTE: The related (sc) might refer to another cpu.
 174  *
 175  * NOTE: The cc-vs-c frontend-vs-backend might be disconnected during the
 176  *       operation, but the EXIS lock prevents (c) from being destroyed.
 177  */
 178 static __inline
 179 void
 180 _callout_update_spinlocked(struct _callout *c)
 181 {
 182         struct wheel *wheel;
 183
 184         if ((c->flags & CALLOUT_INPROG) && curthread != &c->qsc->thread) {
 185                 /*
 186                  * If the callout is in-progress the SET queuing state is
 187                  * indeterminant and no action can be taken at this time.
 188                  *
 189                  * (however, recursive calls from the call-back are not
 190                  * indeterminant and must be processed at this time).
 191                  */
 192                 /* nop */
 193         } else if (c->flags & CALLOUT_SET) {
 194                 /*
 195                  * If the callout is SET it is queued on a callwheel, process
 196                  * various requests relative to it being in this queued state.
 197                  *
 198                  * c->q* fields are stable while we hold c->spin and
 199                  * wheel->spin.
 200                  */
 201                 softclock_pcpu_t sc;
 202
 203                 sc = c->qsc;
 204                 wheel = &sc->callwheel[c->qtick & cwheelmask];
 205                 spin_lock(&wheel->spin);
 206
 207                 if ((c->flags & CALLOUT_INPROG) &&
 208                     curthread != &c->qsc->thread) {
 209                         /*
 210                          * Raced against INPROG getting set by the softclock
 211                          * handler while we were acquiring wheel->spin.  We
 212                          * can do nothing at this time.
 213                          *
 214                          * (however, recursive calls from the call-back are not
 215                          * indeterminant and must be processed at this time).
 216                          */
 217                         /* nop */
 218                 } else if (c->flags & CALLOUT_CANCEL) {
 219                         /*
 220                          * CANCEL requests override everything else.
 221                          */
 222                         if (sc->next == c)
 223                                 sc->next = TAILQ_NEXT(c, entry);
 224                         TAILQ_REMOVE(&wheel->list, c, entry);
 225                         atomic_clear_int(&c->flags, CALLOUT_SET |
 226                                                     CALLOUT_STOP |
 227                                                     CALLOUT_CANCEL |
 228                                                     CALLOUT_RESET);
 229                         atomic_set_int(&c->flags, CALLOUT_PREVENTED);
 230                         if (c->waiters)
 231                                 wakeup(c);
 232                 } else if (c->flags & CALLOUT_RESET) {
 233                         /*
 234                          * RESET requests reload the callout, potentially
 235                          * to a different cpu.  Once removed from the wheel,
 236                          * the retention of c->spin prevents further races.
 237                          *
 238                          * Leave SET intact.
 239                          */
 240                         if (sc->next == c)
 241                                 sc->next = TAILQ_NEXT(c, entry);
 242                         TAILQ_REMOVE(&wheel->list, c, entry);
 243                         spin_unlock(&wheel->spin);
 244
 245                         atomic_clear_int(&c->flags, CALLOUT_RESET);
 246                         sc = c->rsc;
 247                         c->qsc = sc;
 248                         c->qarg = c->rarg;
 249                         c->qfunc = c->rfunc;
 250                         c->qtick = c->rtick;
 251
 252                         /*
 253                          * Do not queue to a current or past wheel slot or
 254                          * the callout will be lost for ages.  Handle
 255                          * potential races against soft ticks.
 256                          */
 257                         wheel = &sc->callwheel[c->qtick & cwheelmask];
 258                         spin_lock(&wheel->spin);
 259                         while (c->qtick - sc->softticks <= 0) {
 260                                 c->qtick = sc->softticks + 1;
 261                                 spin_unlock(&wheel->spin);
 262                                 wheel = &sc->callwheel[c->qtick & cwheelmask];
 263                                 spin_lock(&wheel->spin);
 264                         }
 265                         TAILQ_INSERT_TAIL(&wheel->list, c, entry);
 266                 } else if (c->flags & CALLOUT_STOP) {
 267                         /*
 268                          * STOP request simply unloads the callout.
 269                          */
 270                         if (sc->next == c)
 271                                 sc->next = TAILQ_NEXT(c, entry);
 272                         TAILQ_REMOVE(&wheel->list, c, entry);
 273                         atomic_clear_int(&c->flags, CALLOUT_STOP |
 274                                                     CALLOUT_SET);
 275
 276                         atomic_set_int(&c->flags, CALLOUT_PREVENTED);
 277                         if (c->waiters)
 278                                 wakeup(c);
 279                 } else {
 280                         /*
 281                          * Do nothing if no request is pending.
 282                          */
 283                         /* nop */
 284                 }
 285                 spin_unlock(&wheel->spin);
 286         } else {
 287                 /*
 288                  * If the callout is not SET it is not queued to any callwheel,
 289                  * process various requests relative to it not being queued.
 290                  *
 291                  * c->q* fields are stable while we hold c->spin.
 292                  */
 293                 if (c->flags & CALLOUT_CANCEL) {
 294                         /*
 295                          * CANCEL requests override everything else.
 296                          *
 297                          * There is no state being canceled in this case,
 298                          * so do not set the PREVENTED flag.
 299                          */
 300                         atomic_clear_int(&c->flags, CALLOUT_STOP |
 301                                                     CALLOUT_CANCEL |
 302                                                     CALLOUT_RESET);
 303                         if (c->waiters)
 304                                 wakeup(c);
 305                 } else if (c->flags & CALLOUT_RESET) {
 306                         /*
 307                          * RESET requests get queued.  Do not queue to the
 308                          * currently-processing tick.
 309                          */
 310                         softclock_pcpu_t sc;
 311
 312                         sc = c->rsc;
 313                         c->qsc = sc;
 314                         c->qarg = c->rarg;
 315                         c->qfunc = c->rfunc;
 316                         c->qtick = c->rtick;
 317
 318                         /*
 319                          * Do not queue to current or past wheel or the
 320                          * callout will be lost for ages.
 321                          */
 322                         wheel = &sc->callwheel[c->qtick & cwheelmask];
 323                         spin_lock(&wheel->spin);
 324                         while (c->qtick - sc->softticks <= 0) {
 325                                 c->qtick = sc->softticks + 1;
 326                                 spin_unlock(&wheel->spin);
 327                                 wheel = &sc->callwheel[c->qtick & cwheelmask];
 328                                 spin_lock(&wheel->spin);
 329                         }
 330                         TAILQ_INSERT_TAIL(&wheel->list, c, entry);
 331                         atomic_clear_int(&c->flags, CALLOUT_RESET);
 332                         atomic_set_int(&c->flags, CALLOUT_SET);
 333                         spin_unlock(&wheel->spin);
 334                 } else if (c->flags & CALLOUT_STOP) {
 335                         /*
 336                          * STOP requests.
 337                          *
 338                          * There is no state being stopped in this case,
 339                          * so do not set the PREVENTED flag.
 340                          */
 341                         atomic_clear_int(&c->flags, CALLOUT_STOP);
 342                         if (c->waiters)
 343                                 wakeup(c);
 344                 } else {
 345                         /*
 346                          * No request pending (someone else processed the
 347                          * request before we could)
 348                          */
 349                         /* nop */
 350                 }
 351         }
 352 }
 353
 354 static __inline
 355 void
 356 _callout_free(struct _callout *c)
 357 {
 358         softclock_pcpu_t sc;
 359
 360         sc = softclock_pcpu_ary[mycpu->gd_cpuid];
 361
 362         crit_enter();
 363         exis_terminate(&c->exis);
 364         atomic_set_int(&c->flags, CALLOUT_FREELIST);
 365         atomic_clear_int(&c->flags, CALLOUT_DID_INIT);
 366         TAILQ_INSERT_TAIL(&sc->freelist, c, entry);
 367         crit_exit();
 368 }
 369
 370 /*
 371  * System init
 372  */
 373 static void
 374 swi_softclock_setup(void *arg)
 375 {
 376         int cpu;
 377         int i;
 378         int target;
 379
 380         /*
 381          * Figure out how large a callwheel we need.  It must be a power of 2.
 382          *
 383          * ncallout is primarily based on available memory, don't explode
 384          * the allocations if the system has a lot of cpus.
 385          */
 386         target = ncallout / ncpus + 16;
 387
 388         cwheelsize = 1;
 389         while (cwheelsize < target)
 390                 cwheelsize <<= 1;
 391         cwheelmask = cwheelsize - 1;
 392
 393         /*
 394          * Initialize per-cpu data structures.
 395          */
 396         for (cpu = 0; cpu < ncpus; ++cpu) {
 397                 softclock_pcpu_t sc;
 398                 int wheel_sz;
 399
 400                 sc = (void *)kmem_alloc3(kernel_map, sizeof(*sc),
 401                                          VM_SUBSYS_GD, KM_CPU(cpu));
 402                 memset(sc, 0, sizeof(*sc));
 403                 TAILQ_INIT(&sc->freelist);
 404                 softclock_pcpu_ary[cpu] = sc;
 405
 406                 wheel_sz = sizeof(*sc->callwheel) * cwheelsize;
 407                 sc->callwheel = (void *)kmem_alloc3(kernel_map, wheel_sz,
 408                                                     VM_SUBSYS_GD, KM_CPU(cpu));
 409                 memset(sc->callwheel, 0, wheel_sz);
 410                 for (i = 0; i < cwheelsize; ++i) {
 411                         spin_init(&sc->callwheel[i].spin, "wheel");
 412                         TAILQ_INIT(&sc->callwheel[i].list);
 413                 }
 414
 415                 /*
 416                  * Mark the softclock handler as being an interrupt thread
 417                  * even though it really isn't, but do not allow it to
 418                  * preempt other threads (do not assign td_preemptable).
 419                  *
 420                  * Kernel code now assumes that callouts do not preempt
 421                  * the cpu they were scheduled on.
 422                  */
 423                 lwkt_create(softclock_handler, sc, NULL, &sc->thread,
 424                             TDF_NOSTART | TDF_INTTHREAD,
 425                             cpu, "softclock %d", cpu);
 426         }
 427 }
 428
 429 /*
 430  * Must occur after ncpus has been initialized.
 431  */
 432 SYSINIT(softclock_setup, SI_BOOT2_SOFTCLOCK, SI_ORDER_SECOND,
 433         swi_softclock_setup, NULL);
 434
 435 /*
 436  * This routine is called from the hardclock() (basically a FASTint/IPI) on
 437  * each cpu in the system.  sc->curticks is this cpu's notion of the timebase.
 438  * It IS NOT NECESSARILY SYNCHRONIZED WITH 'ticks'!  sc->softticks is where
 439  * the callwheel is currently indexed.
 440  *
 441  * sc->softticks is adjusted by either this routine or our helper thread
 442  * depending on whether the helper thread is running or not.
 443  *
 444  * sc->curticks and sc->softticks are adjusted using atomic ops in order
 445  * to ensure that remote cpu callout installation does not race the thread.
 446  */
 447 void
 448 hardclock_softtick(globaldata_t gd)
 449 {
 450         softclock_pcpu_t sc;
 451         struct wheel *wheel;
 452
 453         sc = softclock_pcpu_ary[gd->gd_cpuid];
 454         atomic_add_int(&sc->curticks, 1);
 455         if (sc->isrunning)
 456                 return;
 457         if (sc->softticks == sc->curticks) {
 458                 /*
 459                  * In sync, only wakeup the thread if there is something to
 460                  * do.
 461                  */
 462                 wheel = &sc->callwheel[sc->softticks & cwheelmask];
 463                 spin_lock(&wheel->spin);
 464                 if (TAILQ_FIRST(&wheel->list)) {
 465                         sc->isrunning = 1;
 466                         spin_unlock(&wheel->spin);
 467                         lwkt_schedule(&sc->thread);
 468                 } else {
 469                         atomic_add_int(&sc->softticks, 1);
 470                         spin_unlock(&wheel->spin);
 471                 }
 472         } else {
 473                 /*
 474                  * out of sync, wakeup the thread unconditionally so it can
 475                  * catch up.
 476                  */
 477                 sc->isrunning = 1;
 478                 lwkt_schedule(&sc->thread);
 479         }
 480 }
 481
 482 /*
 483  * This procedure is the main loop of our per-cpu helper thread.  The
 484  * sc->isrunning flag prevents us from racing hardclock_softtick().
 485  *
 486  * The thread starts with the MP lock released and not in a critical
 487  * section.  The loop itself is MP safe while individual callbacks
 488  * may or may not be, so we obtain or release the MP lock as appropriate.
 489  */
 490 static void
 491 softclock_handler(void *arg)
 492 {
 493         softclock_pcpu_t sc;
 494         struct _callout *c;
 495         struct wheel *wheel;
 496         struct callout slotimer1;
 497         struct _callout slotimer2;
 498         int mpsafe = 1;
 499
 500         /*
 501          * Setup pcpu slow clocks which we want to run from the callout
 502          * thread.  This thread starts very early and cannot kmalloc(),
 503          * so use internal functions to supply the _callout.
 504          */
 505         _callout_setup_quick(&slotimer1, &slotimer2, hz * 10,
 506                              slotimer_callback, &slotimer1);
 507
 508         /*
 509          * Run the callout thread at the same priority as other kernel
 510          * threads so it can be round-robined.
 511          */
 512         /*lwkt_setpri_self(TDPRI_SOFT_NORM);*/
 513
 514         sc = arg;
 515 loop:
 516         while (sc->softticks != (int)(sc->curticks + 1)) {
 517                 wheel = &sc->callwheel[sc->softticks & cwheelmask];
 518
 519                 spin_lock(&wheel->spin);
 520                 sc->next = TAILQ_FIRST(&wheel->list);
 521                 while ((c = sc->next) != NULL) {
 522                         int error;
 523
 524                         /*
 525                          * Match callouts for this tick.
 526                          */
 527                         sc->next = TAILQ_NEXT(c, entry);
 528                         if (c->qtick != sc->softticks)
 529                                 continue;
 530
 531                         /*
 532                          * Double check the validity of the callout, detect
 533                          * if the originator's structure has been ripped out.
 534                          */
 535                         if ((uintptr_t)c->verifier < VM_MAX_USER_ADDRESS) {
 536                                 spin_unlock(&wheel->spin);
 537                                 panic("_callout %p verifier %p failed "
 538                                       "func %p/%p\n",
 539                                       c, c->verifier, c->rfunc, c->qfunc);
 540                         }
 541
 542                         if (c->verifier->toc != c) {
 543                                 spin_unlock(&wheel->spin);
 544                                 panic("_callout %p verifier %p failed "
 545                                       "func %p/%p\n",
 546                                       c, c->verifier, c->rfunc, c->qfunc);
 547                         }
 548
 549                         /*
 550                          * The wheel spinlock is sufficient to set INPROG and
 551                          * remove (c) from the list.  Once INPROG is set,
 552                          * other threads can only make limited changes to (c).
 553                          *
 554                          * Setting INPROG masks SET tests in all other
 555                          * conditionals except the 'quick' code (which is
 556                          * always same-cpu and doesn't race).  This means
 557                          * that we can clear SET here without obtaining
 558                          * c->spin.
 559                          */
 560                         TAILQ_REMOVE(&wheel->list, c, entry);
 561                         atomic_set_int(&c->flags, CALLOUT_INPROG);
 562                         atomic_clear_int(&c->flags, CALLOUT_SET);
 563                         sc->running = c;
 564                         spin_unlock(&wheel->spin);
 565
 566                         /*
 567                          * Legacy mplock support
 568                          */
 569                         if (c->flags & CALLOUT_MPSAFE) {
 570                                 if (mpsafe == 0) {
 571                                         mpsafe = 1;
 572                                         rel_mplock();
 573                                 }
 574                         } else {
 575                                 if (mpsafe) {
 576                                         mpsafe = 0;
 577                                         get_mplock();
 578                                 }
 579                         }
 580
 581                         /*
 582                          * Execute the 'q' function (protected by INPROG)
 583                          */
 584                         if (c->flags & (CALLOUT_STOP | CALLOUT_CANCEL)) {
 585                                 /*
 586                                  * Raced a stop or cancel request, do
 587                                  * not execute.  The processing code
 588                                  * thinks its a normal completion so
 589                                  * flag the fact that cancel/stop actually
 590                                  * prevented a callout here.
 591                                  */
 592                                 if (c->flags &
 593                                     (CALLOUT_CANCEL | CALLOUT_STOP)) {
 594                                         atomic_set_int(&c->verifier->flags,
 595                                                        CALLOUT_PREVENTED);
 596                                 }
 597                         } else if (c->flags & CALLOUT_RESET) {
 598                                 /*
 599                                  * A RESET raced, make it seem like it
 600                                  * didn't.  Do nothing here and let the
 601                                  * update procedure requeue us.
 602                                  */
 603                         } else if (c->flags & CALLOUT_AUTOLOCK) {
 604                                 /*
 605                                  * Interlocked cancelable call.  If the
 606                                  * lock gets canceled we have to flag the
 607                                  * fact that the cancel/stop actually
 608                                  * prevented the callout here.
 609                                  */
 610                                 error = lockmgr(c->lk, LK_EXCLUSIVE |
 611                                                        LK_CANCELABLE);
 612                                 if (error == 0) {
 613                                         c->qfunc(c->qarg);
 614                                         lockmgr(c->lk, LK_RELEASE);
 615                                 } else if (c->flags &
 616                                            (CALLOUT_CANCEL | CALLOUT_STOP)) {
 617                                         atomic_set_int(&c->verifier->flags,
 618                                                        CALLOUT_PREVENTED);
 619                                 }
 620                         } else {
 621                                 /*
 622                                  * Normal call
 623                                  */
 624                                 c->qfunc(c->qarg);
 625                         }
 626
 627                         /*
 628                          * INPROG will prevent SET from being set again.
 629                          * Once we clear INPROG, update the callout to
 630                          * handle any pending operations that have built-up.
 631                          */
 632
 633                         /*
 634                          * Interlocked clearing of INPROG, then handle any
 635                          * queued request (such as a callout_reset() request).
 636                          */
 637                         spin_lock(&c->spin);
 638                         atomic_clear_int(&c->flags, CALLOUT_INPROG);
 639                         sc->running = NULL;
 640                         _callout_update_spinlocked(c);
 641                         spin_unlock(&c->spin);
 642
 643                         spin_lock(&wheel->spin);
 644                 }
 645                 spin_unlock(&wheel->spin);
 646                 atomic_add_int(&sc->softticks, 1);
 647
 648                 /*
 649                  * Clean up any _callout structures which are now allowed
 650                  * to be freed.
 651                  */
 652                 crit_enter();
 653                 while ((c = TAILQ_FIRST(&sc->freelist)) != NULL) {
 654                         if (!exis_freeable(&c->exis))
 655                                 break;
 656                         TAILQ_REMOVE(&sc->freelist, c, entry);
 657                         c->flags = 0;
 658                         kfree(c, M_CALLOUT);
 659                         if (callout_debug)
 660                                 kprintf("KFREEB %p\n", c);
 661                 }
 662                 crit_exit();
 663         }
 664
 665         /*
 666          * Don't leave us holding the MP lock when we deschedule ourselves.
 667          */
 668         if (mpsafe == 0) {
 669                 mpsafe = 1;
 670                 rel_mplock();
 671         }
 672
 673         /*
 674          * Recheck in critical section to interlock against hardlock
 675          */
 676         crit_enter();
 677         if (sc->softticks == (int)(sc->curticks + 1)) {
 678                 sc->isrunning = 0;
 679                 lwkt_deschedule_self(&sc->thread);      /* == curthread */
 680                 lwkt_switch();
 681         }
 682         crit_exit();
 683         goto loop;
 684         /* NOT REACHED */
 685 }
 686
 687 /*
 688  * A very slow system cleanup timer (10 second interval),
 689  * per-cpu.
 690  */
 691 void
 692 slotimer_callback(void *arg)
 693 {
 694         struct callout *c = arg;
 695
 696         slab_cleanup();
 697         callout_reset(c, hz * 10, slotimer_callback, c);
 698 }
 699
 700 /*
 701  * API FUNCTIONS
 702  */
 703
 704 static __inline
 705 struct _callout *
 706 _callout_gettoc(struct callout *cc)
 707 {
 708         globaldata_t gd = mycpu;
 709         struct _callout *c;
 710         softclock_pcpu_t sc;
 711
 712         KKASSERT(cc->flags & CALLOUT_DID_INIT);
 713         exis_hold_gd(gd);
 714         for (;;) {
 715                 c = cc->toc;
 716                 cpu_ccfence();
 717                 if (c) {
 718                         KKASSERT(c->verifier == cc);
 719                         spin_lock(&c->spin);
 720                         break;
 721                 }
 722                 sc = softclock_pcpu_ary[gd->gd_cpuid];
 723                 c = kmalloc(sizeof(*c), M_CALLOUT, M_INTWAIT | M_ZERO);
 724                 if (callout_debug)
 725                         kprintf("ALLOC %p\n", c);
 726                 c->flags = cc->flags;
 727                 c->lk = cc->lk;
 728                 c->verifier = cc;
 729                 exis_init(&c->exis);
 730                 spin_init(&c->spin, "calou");
 731                 spin_lock(&c->spin);
 732                 if (atomic_cmpset_ptr(&cc->toc, NULL, c))
 733                         break;
 734                 spin_unlock(&c->spin);
 735                 c->verifier = NULL;
 736                 kfree(c, M_CALLOUT);
 737                 if (callout_debug)
 738                         kprintf("KFREEA %p\n", c);
 739         }
 740         exis_drop_gd(gd);
 741
 742         /*
 743          * Return internal __callout with spin-lock held
 744          */
 745         return c;
 746 }
 747
 748 /*
 749  * Macrod in sys/callout.h for debugging
 750  *
 751  * WARNING! tsleep() assumes this will not block
 752  */
 753 void
 754 _callout_init(struct callout *cc CALLOUT_DEBUG_ARGS)
 755 {
 756         bzero(cc, sizeof(*cc));
 757         cc->flags = CALLOUT_DID_INIT;
 758 }
 759
 760 void
 761 _callout_init_mp(struct callout *cc CALLOUT_DEBUG_ARGS)
 762 {
 763         bzero(cc, sizeof(*cc));
 764         cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE;
 765 }
 766
 767 void
 768 _callout_init_lk(struct callout *cc, struct lock *lk CALLOUT_DEBUG_ARGS)
 769 {
 770         bzero(cc, sizeof(*cc));
 771         cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE | CALLOUT_AUTOLOCK;
 772         cc->lk = lk;
 773 }
 774
 775 /*
 776  * Start or restart a timeout.  New timeouts can be installed while the
 777  * current one is running.
 778  *
 779  * Start or restart a timeout.  Installs the callout structure on the
 780  * callwheel of the current cpu.  Callers may legally pass any value, even
 781  * if 0 or negative, but since the sc->curticks index may have already
 782  * been processed a minimum timeout of 1 tick will be enforced.
 783  *
 784  * This function will not deadlock against a running call.
 785  *
 786  * WARNING! tsleep() assumes this will not block
 787  */
 788 void
 789 callout_reset(struct callout *cc, int to_ticks, void (*ftn)(void *), void *arg)
 790 {
 791         softclock_pcpu_t sc;
 792         struct _callout *c;
 793
 794         /*
 795          * We need to acquire/associate a _callout.
 796          * gettoc spin-locks (c).
 797          */
 798         KKASSERT(cc->flags & CALLOUT_DID_INIT);
 799         atomic_set_int(&cc->flags, CALLOUT_ACTIVE);
 800         c = _callout_gettoc(cc);
 801
 802         /*
 803          * Request a RESET.  This automatically overrides a STOP in
 804          * _callout_update_spinlocked().
 805          */
 806         atomic_set_int(&c->flags, CALLOUT_RESET);
 807         sc = softclock_pcpu_ary[mycpu->gd_cpuid];
 808         c->rsc = sc;
 809         c->rtick = sc->curticks + to_ticks;
 810         c->rfunc = ftn;
 811         c->rarg = arg;
 812         _callout_update_spinlocked(c);
 813         spin_unlock(&c->spin);
 814 }
 815
 816 /*
 817  * Same as callout_reset() but the timeout will run on a particular cpu.
 818  */
 819 void
 820 callout_reset_bycpu(struct callout *cc, int to_ticks, void (*ftn)(void *),
 821                     void *arg, int cpuid)
 822 {
 823         softclock_pcpu_t sc;
 824         struct _callout *c;
 825
 826         /*
 827          * We need to acquire/associate a _callout.
 828          * gettoc spin-locks (c).
 829          */
 830         KKASSERT(cc->flags & CALLOUT_DID_INIT);
 831         atomic_set_int(&cc->flags, CALLOUT_ACTIVE);
 832         c = _callout_gettoc(cc);
 833
 834         /*
 835          * Set RESET.  Do not clear STOP here (let the process code do it).
 836          */
 837         atomic_set_int(&c->flags, CALLOUT_RESET);
 838
 839         sc = softclock_pcpu_ary[cpuid];
 840         c->rsc = sc;
 841         c->rtick = sc->curticks + to_ticks;
 842         c->rfunc = ftn;
 843         c->rarg = arg;
 844         _callout_update_spinlocked(c);
 845         spin_unlock(&c->spin);
 846 }
 847
 848 /*
 849  * Issue synchronous or asynchronous cancel or stop
 850  */
 851 static __inline
 852 int
 853 _callout_cancel_or_stop(struct callout *cc, uint32_t flags, int sync)
 854 {
 855         globaldata_t gd = mycpu;
 856         struct _callout *c;
 857         int res;
 858
 859         /*
 860          * Callout is inactive after cancel or stop.  Degenerate case if
 861          * no _callout is currently associated.
 862          */
 863         atomic_clear_int(&cc->flags, CALLOUT_ACTIVE);
 864         if (cc->toc == NULL)
 865                 return 0;
 866
 867         /*
 868          * Ensure that the related (c) is not destroyed.  Set the CANCEL
 869          * or STOP request flag, clear the PREVENTED status flag, and update.
 870          */
 871         exis_hold_gd(gd);
 872         c = _callout_gettoc(cc);
 873         atomic_clear_int(&c->flags, CALLOUT_PREVENTED);
 874         atomic_set_int(&c->flags, flags);
 875         _callout_update_spinlocked(c);
 876         spin_unlock(&c->spin);
 877
 878         /*
 879          * If the operation is still in-progress then re-acquire the spin-lock
 880          * and block if necessary.  Also initiate the lock cancel.
 881          */
 882         if (sync == 0 || (c->flags & (CALLOUT_INPROG | CALLOUT_SET)) == 0) {
 883                 exis_drop_gd(gd);
 884                 return 0;
 885         }
 886         if (c->flags & CALLOUT_AUTOLOCK)
 887                 lockmgr(c->lk, LK_CANCEL_BEG);
 888         spin_lock(&c->spin);
 889         if ((c->flags & (CALLOUT_INPROG | CALLOUT_SET)) == 0) {
 890                 spin_unlock(&c->spin);
 891                 if (c->flags & CALLOUT_AUTOLOCK)
 892                         lockmgr(c->lk, LK_CANCEL_END);
 893                 exis_drop_gd(gd);
 894                 return ((c->flags & CALLOUT_PREVENTED) != 0);
 895         }
 896
 897         /*
 898          * With c->spin held we can synchronously wait completion of our
 899          * request.
 900          *
 901          * If INPROG is set and we are recursing from the callback the
 902          * function completes immediately.
 903          */
 904         ++c->waiters;
 905         for (;;) {
 906                 cpu_ccfence();
 907                 if ((c->flags & flags) == 0)
 908                         break;
 909                 if ((c->flags & CALLOUT_INPROG) &&
 910                     curthread == &c->qsc->thread) {
 911                         _callout_update_spinlocked(c);
 912                         break;
 913                 }
 914                 ssleep(c, &c->spin, 0, "costp", 0);
 915         }
 916         --c->waiters;
 917         spin_unlock(&c->spin);
 918         if (c->flags & CALLOUT_AUTOLOCK)
 919                 lockmgr(c->lk, LK_CANCEL_END);
 920         res = ((c->flags & CALLOUT_PREVENTED) != 0);
 921         exis_drop_gd(gd);
 922
 923         return res;
 924 }
 925
 926 /*
 927  * Internalized special low-overhead version without normal safety
 928  * checks or allocations.  Used by tsleep().
 929  *
 930  * Must be called from critical section, specify both the external
 931  * and internal callout structure and set timeout on the current cpu.
 932  */
 933 void
 934 _callout_setup_quick(struct callout *cc, struct _callout *c, int ticks,
 935                      void (*ftn)(void *), void *arg)
 936 {
 937         softclock_pcpu_t sc;
 938         struct wheel *wheel;
 939
 940         /*
 941          * Request a RESET.  This automatically overrides a STOP in
 942          * _callout_update_spinlocked().
 943          */
 944         sc = softclock_pcpu_ary[mycpu->gd_cpuid];
 945
 946         cc->flags = CALLOUT_DID_INIT | CALLOUT_MPSAFE;
 947         cc->toc = c;
 948         cc->lk = NULL;
 949         c->flags = cc->flags | CALLOUT_SET;
 950         c->lk = NULL;
 951         c->verifier = cc;
 952         c->qsc = sc;
 953         c->qtick = sc->curticks + ticks;
 954         c->qfunc = ftn;
 955         c->qarg = arg;
 956         spin_init(&c->spin, "calou");
 957
 958         /*
 959          * Since we are on the same cpu with a critical section, we can
 960          * do this with only the wheel spinlock.
 961          */
 962         if (c->qtick - sc->softticks <= 0)
 963                 c->qtick = sc->softticks + 1;
 964         wheel = &sc->callwheel[c->qtick & cwheelmask];
 965
 966         spin_lock(&wheel->spin);
 967         TAILQ_INSERT_TAIL(&wheel->list, c, entry);
 968         spin_unlock(&wheel->spin);
 969 }
 970
 971 /*
 972  * Internalized special low-overhead version without normal safety
 973  * checks or allocations.  Used by tsleep().
 974  *
 975  * Must be called on the same cpu that queued the timeout.
 976  * Must be called with a critical section already held.
 977  */
 978 void
 979 _callout_cancel_quick(struct _callout *c)
 980 {
 981         softclock_pcpu_t sc;
 982         struct wheel *wheel;
 983
 984         /*
 985          * Wakeup callouts for tsleep() should never block, so this flag
 986          * had better never be found set.
 987          */
 988         KKASSERT((c->flags & CALLOUT_INPROG) == 0);
 989
 990         /*
 991          * Remove from queue if necessary.  Since we are in a critical
 992          * section on the same cpu, the queueing status should not change.
 993          */
 994         if (c->flags & CALLOUT_SET) {
 995                 sc = c->qsc;
 996                 KKASSERT(sc == softclock_pcpu_ary[mycpu->gd_cpuid]);
 997                 wheel = &sc->callwheel[c->qtick & cwheelmask];
 998
 999                 /*
1000                  * NOTE: We must still spin-lock the wheel because other
1001                  *       cpus can manipulate the list, and adjust sc->next
1002                  *       if necessary.
1003                  */
1004                 spin_lock(&wheel->spin);
1005                 if (sc->next == c)
1006                         sc->next = TAILQ_NEXT(c, entry);
1007                 TAILQ_REMOVE(&wheel->list, c, entry);
1008                 c->flags &= ~(CALLOUT_SET | CALLOUT_STOP |
1009                               CALLOUT_CANCEL | CALLOUT_RESET);
1010                 spin_unlock(&wheel->spin);
1011         }
1012         c->verifier = NULL;
1013 }
1014
1015 /*
1016  * This is a synchronous STOP which cancels the callout.  If AUTOLOCK
1017  * then a CANCEL will be issued to the lock holder.  Unlike STOP, the
1018  * cancel function prevents any new callout_reset()s from being issued
1019  * in addition to canceling the lock.  The lock will also be deactivated.
1020  *
1021  * Returns 0 if the callout was not active (or was active and completed,
1022  *           but didn't try to start a new timeout).
1023  * Returns 1 if the cancel is responsible for stopping the callout.
1024  */
1025 int
1026 callout_cancel(struct callout *cc)
1027 {
1028         return _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1);
1029 }
1030
1031 /*
1032  * Currently the same as callout_cancel.  Ultimately we may wish the
1033  * drain function to allow a pending callout to proceed, but for now
1034  * we will attempt to to cancel it.
1035  *
1036  * Returns 0 if the callout was not active (or was active and completed,
1037  *           but didn't try to start a new timeout).
1038  * Returns 1 if the drain is responsible for stopping the callout.
1039  */
1040 int
1041 callout_drain(struct callout *cc)
1042 {
1043         return _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1);
1044 }
1045
1046 /*
1047  * Stops a callout if it is pending or queued, does not block.
1048  * This function does not interlock against a callout that is in-progress.
1049  *
1050  * Returns whether the STOP operation was responsible for removing a
1051  * queued or pending callout.
1052  */
1053 int
1054 callout_stop_async(struct callout *cc)
1055 {
1056         return _callout_cancel_or_stop(cc, CALLOUT_STOP, 0);
1057 }
1058
1059 /*
1060  * Callout deactivate merely clears the CALLOUT_ACTIVE bit and stop a
1061  * callout if it is pending or queued.  However this cannot stop a callout
1062  * whos callback is in-progress.
1063  *
1064  *
1065  * This function does not interlock against a callout that is in-progress.
1066  */
1067 void
1068 callout_deactivate(struct callout *cc)
1069 {
1070         atomic_clear_int(&cc->flags, CALLOUT_ACTIVE);
1071         callout_stop_async(cc);
1072 }
1073
1074 /*
1075  * lock-aided callouts are STOPped synchronously using STOP semantics
1076  * (meaning that another thread can start the callout again before we
1077  * return).
1078  *
1079  * non-lock-aided callouts
1080  *
1081  * Stops a callout if it is pending or queued, does not block.
1082  * This function does not interlock against a callout that is in-progress.
1083  */
1084 int
1085 callout_stop(struct callout *cc)
1086 {
1087         return _callout_cancel_or_stop(cc, CALLOUT_STOP, 1);
1088 }
1089
1090 /*
1091  * Destroy the callout.  Synchronously cancel any operation in progress,
1092  * clear the INIT flag, and disconnect the internal _callout.  The internal
1093  * callout will be safely freed via EXIS.
1094  *
1095  * Upon return, the callout structure may only be reused if re-initialized.
1096  */
1097 void
1098 callout_terminate(struct callout *cc)
1099 {
1100         struct _callout *c;
1101
1102         exis_hold();
1103
1104         _callout_cancel_or_stop(cc, CALLOUT_CANCEL, 1);
1105         KKASSERT(cc->flags & CALLOUT_DID_INIT);
1106         atomic_clear_int(&cc->flags, CALLOUT_DID_INIT);
1107         c = atomic_swap_ptr((void *)&cc->toc, NULL);
1108         if (c) {
1109                 KKASSERT(c->verifier == cc);
1110                 c->verifier = NULL;
1111                 _callout_free(c);
1112         }
1113
1114         exis_drop();
1115 }
1116
1117 /*
1118  * Returns whether a callout is queued and the time has not yet
1119  * arrived (the callout is not yet in-progress).
1120  */
1121 int
1122 callout_pending(struct callout *cc)
1123 {
1124         struct _callout *c;
1125
1126         /*
1127          * Don't instantiate toc to test pending
1128          */
1129         if (cc->toc == NULL)
1130                 return 0;
1131         c = _callout_gettoc(cc);
1132         if ((c->flags & (CALLOUT_SET | CALLOUT_INPROG)) == CALLOUT_SET) {
1133                 spin_unlock(&c->spin);
1134                 return 1;
1135         }
1136         spin_unlock(&c->spin);
1137
1138         return 0;
1139 }
1140
1141 /*
1142  * Returns whether a callout is active or not.  A callout is active when
1143  * a timeout is set and remains active upon normal termination, even if
1144  * it does not issue a new timeout.  A callout is inactive if a timeout has
1145  * never been set or if the callout has been stopped or canceled.  The next
1146  * timeout that is set will re-set the active state.
1147  */
1148 int
1149 callout_active(struct callout *cc)
1150 {
1151         return ((cc->flags & CALLOUT_ACTIVE) ? 1 : 0);
1152 }