sys/kern/usched_dummy.c

   1 /*
   2  * Copyright (c) 2006 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/lock.h>
  39 #include <sys/queue.h>
  40 #include <sys/proc.h>
  41 #include <sys/rtprio.h>
  42 #include <sys/uio.h>
  43 #include <sys/sysctl.h>
  44 #include <sys/resourcevar.h>
  45 #include <sys/spinlock.h>
  46 #include <machine/cpu.h>
  47 #include <machine/smp.h>
  48
  49 #include <sys/thread2.h>
  50 #include <sys/spinlock2.h>
  51
  52 #define MAXPRI                  128
  53 #define PRIBASE_REALTIME        0
  54 #define PRIBASE_NORMAL          MAXPRI
  55 #define PRIBASE_IDLE            (MAXPRI * 2)
  56 #define PRIBASE_THREAD          (MAXPRI * 3)
  57 #define PRIBASE_NULL            (MAXPRI * 4)
  58
  59 #define lwp_priority    lwp_usdata.bsd4.priority
  60 #define lwp_estcpu      lwp_usdata.bsd4.estcpu
  61
  62 static void dummy_acquire_curproc(struct lwp *lp);
  63 static void dummy_release_curproc(struct lwp *lp);
  64 static void dummy_select_curproc(globaldata_t gd);
  65 static void dummy_setrunqueue(struct lwp *lp);
  66 static void dummy_schedulerclock(struct lwp *lp, sysclock_t period,
  67                                 sysclock_t cpstamp);
  68 static void dummy_recalculate_estcpu(struct lwp *lp);
  69 static void dummy_resetpriority(struct lwp *lp);
  70 static void dummy_forking(struct lwp *plp, struct lwp *lp);
  71 static void dummy_exiting(struct lwp *plp, struct proc *child);
  72 static void dummy_uload_update(struct lwp *lp);
  73 static void dummy_yield(struct lwp *lp);
  74 static void dummy_changedcpu(struct lwp *lp);
  75
  76 struct usched usched_dummy = {
  77         { NULL },
  78         "dummy", "Dummy DragonFly Scheduler",
  79         NULL,                   /* default registration */
  80         NULL,                   /* default deregistration */
  81         dummy_acquire_curproc,
  82         dummy_release_curproc,
  83         dummy_setrunqueue,
  84         dummy_schedulerclock,
  85         dummy_recalculate_estcpu,
  86         dummy_resetpriority,
  87         dummy_forking,
  88         dummy_exiting,
  89         dummy_uload_update,
  90         NULL,                   /* setcpumask not supported */
  91         dummy_yield,
  92         dummy_changedcpu
  93 };
  94
  95 struct usched_dummy_pcpu {
  96         int     rrcount;
  97         struct thread *helper_thread;
  98         struct lwp *uschedcp;
  99 };
 100
 101 typedef struct usched_dummy_pcpu *dummy_pcpu_t;
 102
 103 static struct usched_dummy_pcpu dummy_pcpu[MAXCPU];
 104 static cpumask_t dummy_curprocmask = CPUMASK_INITIALIZER_ALLONES;
 105 static cpumask_t dummy_rdyprocmask;
 106 static struct spinlock dummy_spin;
 107 static TAILQ_HEAD(rq, lwp) dummy_runq;
 108 static int dummy_runqcount;
 109
 110 static int usched_dummy_rrinterval = (ESTCPUFREQ + 9) / 10;
 111 SYSCTL_INT(_kern, OID_AUTO, usched_dummy_rrinterval, CTLFLAG_RW,
 112         &usched_dummy_rrinterval, 0, "");
 113
 114 /*
 115  * Initialize the run queues at boot time, clear cpu 0 in curprocmask
 116  * to allow dummy scheduling on cpu 0.
 117  */
 118 static void
 119 dummyinit(void *dummy)
 120 {
 121         TAILQ_INIT(&dummy_runq);
 122         spin_init(&dummy_spin, "uscheddummy");
 123         ATOMIC_CPUMASK_NANDBIT(dummy_curprocmask, 0);
 124 }
 125 SYSINIT(runqueue, SI_BOOT2_USCHED, SI_ORDER_FIRST, dummyinit, NULL);
 126
 127 /*
 128  * DUMMY_ACQUIRE_CURPROC
 129  *
 130  * This function is called when the kernel intends to return to userland.
 131  * It is responsible for making the thread the current designated userland
 132  * thread for this cpu, blocking if necessary.
 133  *
 134  * The kernel will not depress our LWKT priority until after we return,
 135  * in case we have to shove over to another cpu.
 136  *
 137  * We must determine our thread's disposition before we switch away.  This
 138  * is very sensitive code.
 139  *
 140  * We are expected to handle userland reschedule requests here too.
 141  *
 142  * WARNING! THIS FUNCTION IS ALLOWED TO CAUSE THE CURRENT THREAD TO MIGRATE
 143  * TO ANOTHER CPU!  Because most of the kernel assumes that no migration will
 144  * occur, this function is called only under very controlled circumstances.
 145  *
 146  * MPSAFE
 147  */
 148 static void
 149 dummy_acquire_curproc(struct lwp *lp)
 150 {
 151         globaldata_t gd = mycpu;
 152         dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
 153         thread_t td = lp->lwp_thread;
 154
 155         /*
 156          * Possibly select another thread
 157          */
 158         if (user_resched_wanted())
 159                 dummy_select_curproc(gd);
 160
 161         /*
 162          * If this cpu has no current thread, select ourself
 163          */
 164         if (dd->uschedcp == lp ||
 165             (dd->uschedcp == NULL && TAILQ_EMPTY(&dummy_runq))) {
 166                 ATOMIC_CPUMASK_ORBIT(dummy_curprocmask, gd->gd_cpuid);
 167                 dd->uschedcp = lp;
 168                 return;
 169         }
 170
 171         /*
 172          * If this cpu's current user process thread is not our thread,
 173          * deschedule ourselves and place us on the run queue, then
 174          * switch away.
 175          *
 176          * We loop until we become the current process.  Its a good idea
 177          * to run any passive release(s) before we mess with the scheduler
 178          * so our thread is in the expected state.
 179          */
 180         KKASSERT(dd->uschedcp != lp);
 181         if (td->td_release)
 182                 td->td_release(lp->lwp_thread);
 183         do {
 184                 crit_enter();
 185                 lwkt_deschedule_self(td);
 186                 dummy_setrunqueue(lp);
 187                 if ((td->td_flags & TDF_RUNQ) == 0)
 188                         ++lp->lwp_ru.ru_nivcsw;
 189                 lwkt_switch();          /* WE MAY MIGRATE TO ANOTHER CPU */
 190                 crit_exit();
 191                 gd = mycpu;
 192                 dd = &dummy_pcpu[gd->gd_cpuid];
 193                 KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
 194         } while (dd->uschedcp != lp);
 195 }
 196
 197 /*
 198  * DUMMY_RELEASE_CURPROC
 199  *
 200  * This routine detaches the current thread from the userland scheduler,
 201  * usually because the thread needs to run in the kernel (at kernel priority)
 202  * for a while.
 203  *
 204  * This routine is also responsible for selecting a new thread to
 205  * make the current thread.
 206  *
 207  * MPSAFE
 208  */
 209 static void
 210 dummy_release_curproc(struct lwp *lp)
 211 {
 212         globaldata_t gd = mycpu;
 213         dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
 214
 215         KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
 216         if (dd->uschedcp == lp) {
 217                 dummy_select_curproc(gd);
 218         }
 219 }
 220
 221 /*
 222  * DUMMY_SELECT_CURPROC
 223  *
 224  * Select a new current process for this cpu.  This satisfies a user
 225  * scheduler reschedule request so clear that too.
 226  *
 227  * This routine is also responsible for equal-priority round-robining,
 228  * typically triggered from dummy_schedulerclock().  In our dummy example
 229  * all the 'user' threads are LWKT scheduled all at once and we just
 230  * call lwkt_switch().
 231  *
 232  * MPSAFE
 233  */
 234 static
 235 void
 236 dummy_select_curproc(globaldata_t gd)
 237 {
 238         dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
 239         struct lwp *lp;
 240
 241         clear_user_resched();
 242         spin_lock(&dummy_spin);
 243         if ((lp = TAILQ_FIRST(&dummy_runq)) == NULL) {
 244                 dd->uschedcp = NULL;
 245                 ATOMIC_CPUMASK_NANDBIT(dummy_curprocmask, gd->gd_cpuid);
 246                 spin_unlock(&dummy_spin);
 247         } else {
 248                 --dummy_runqcount;
 249                 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq);
 250                 atomic_clear_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
 251                 dd->uschedcp = lp;
 252                 ATOMIC_CPUMASK_ORBIT(dummy_curprocmask, gd->gd_cpuid);
 253                 spin_unlock(&dummy_spin);
 254                 lwkt_acquire(lp->lwp_thread);
 255                 lwkt_schedule(lp->lwp_thread);
 256         }
 257 }
 258
 259 /*
 260  * DUMMY_SETRUNQUEUE
 261  *
 262  * This routine is called to schedule a new user process after a fork.
 263  * The scheduler module itself might also call this routine to place
 264  * the current process on the userland scheduler's run queue prior
 265  * to calling dummy_select_curproc().
 266  *
 267  * The caller may set LWP_PASSIVE_ACQ in lwp_flags to indicate that we should
 268  * attempt to leave the thread on the current cpu.
 269  *
 270  * MPSAFE
 271  */
 272 static void
 273 dummy_setrunqueue(struct lwp *lp)
 274 {
 275         globaldata_t gd = mycpu;
 276         dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
 277         cpumask_t mask;
 278         int cpuid;
 279
 280         if (dd->uschedcp == NULL) {
 281                 dd->uschedcp = lp;
 282                 ATOMIC_CPUMASK_ORBIT(dummy_curprocmask, gd->gd_cpuid);
 283                 lwkt_schedule(lp->lwp_thread);
 284         } else {
 285                 /*
 286                  * Add to our global runq
 287                  */
 288                 KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
 289                 spin_lock(&dummy_spin);
 290                 ++dummy_runqcount;
 291                 TAILQ_INSERT_TAIL(&dummy_runq, lp, lwp_procq);
 292                 atomic_set_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
 293                 lwkt_giveaway(lp->lwp_thread);
 294
 295                 /* lp = TAILQ_FIRST(&dummy_runq); */
 296
 297                 /*
 298                  * Notify the next available cpu.  P.S. some
 299                  * cpu affinity could be done here.
 300                  *
 301                  * The rdyprocmask bit placeholds the knowledge that there
 302                  * is a process on the runq that needs service.  If the
 303                  * helper thread cannot find a home for it it will forward
 304                  * the request to another available cpu.
 305                  */
 306                 mask = dummy_rdyprocmask;
 307                 CPUMASK_NANDMASK(mask, dummy_curprocmask);
 308                 CPUMASK_ANDMASK(mask, gd->gd_other_cpus);
 309                 if (CPUMASK_TESTNZERO(mask)) {
 310                         cpuid = BSFCPUMASK(mask);
 311                         ATOMIC_CPUMASK_NANDBIT(dummy_rdyprocmask, cpuid);
 312                         spin_unlock(&dummy_spin);
 313                         lwkt_schedule(dummy_pcpu[cpuid].helper_thread);
 314                 } else {
 315                         spin_unlock(&dummy_spin);
 316                 }
 317         }
 318 }
 319
 320 /*
 321  * This routine is called from a systimer IPI.  It must NEVER block.
 322  * If a lwp compatible with this scheduler is the currently running
 323  * thread this function is called with a non-NULL lp, otherwise it
 324  * will be called with a NULL lp.
 325  *
 326  * This routine is called at ESTCPUFREQ on each cpu independantly.
 327  *
 328  * This routine typically queues a reschedule request, which will cause
 329  * the scheduler's BLAH_select_curproc() to be called as soon as possible.
 330  */
 331 static
 332 void
 333 dummy_schedulerclock(struct lwp *lp, sysclock_t period, sysclock_t cpstamp)
 334 {
 335         globaldata_t gd = mycpu;
 336         dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
 337
 338         if (lp == NULL)
 339                 return;
 340
 341         if (++dd->rrcount >= usched_dummy_rrinterval) {
 342                 dd->rrcount = 0;
 343                 need_user_resched();
 344         }
 345 }
 346
 347 /*
 348  * DUMMY_RECALCULATE_ESTCPU
 349  *
 350  * Called once a second for any process that is running or has slept
 351  * for less then 2 seconds.
 352  *
 353  * MPSAFE
 354  */
 355 static
 356 void
 357 dummy_recalculate_estcpu(struct lwp *lp)
 358 {
 359 }
 360
 361 /*
 362  * MPSAFE
 363  */
 364 static
 365 void
 366 dummy_yield(struct lwp *lp)
 367 {
 368         need_user_resched();
 369 }
 370
 371 static
 372 void
 373 dummy_changedcpu(struct lwp *lp __unused)
 374 {
 375 }
 376
 377 /*
 378  * DUMMY_RESETPRIORITY
 379  *
 380  * This routine is called after the kernel has potentially modified
 381  * the lwp_rtprio structure.  The target process may be running or sleeping
 382  * or scheduled but not yet running or owned by another cpu.  Basically,
 383  * it can be in virtually any state.
 384  *
 385  * This routine is called by fork1() for initial setup with the process
 386  * of the run queue, and also may be called normally with the process on or
 387  * off the run queue.
 388  *
 389  * MPSAFE
 390  */
 391 static void
 392 dummy_resetpriority(struct lwp *lp)
 393 {
 394         /* XXX spinlock usually needed */
 395         /*
 396          * Set p_priority for general process comparisons
 397          */
 398         switch(lp->lwp_rtprio.type) {
 399         case RTP_PRIO_REALTIME:
 400                 lp->lwp_priority = PRIBASE_REALTIME + lp->lwp_rtprio.prio;
 401                 return;
 402         case RTP_PRIO_NORMAL:
 403                 lp->lwp_priority = PRIBASE_NORMAL + lp->lwp_rtprio.prio;
 404                 break;
 405         case RTP_PRIO_IDLE:
 406                 lp->lwp_priority = PRIBASE_IDLE + lp->lwp_rtprio.prio;
 407                 return;
 408         case RTP_PRIO_THREAD:
 409                 lp->lwp_priority = PRIBASE_THREAD + lp->lwp_rtprio.prio;
 410                 return;
 411         }
 412
 413         /*
 414          * td_upri has normal sense (higher numbers are more desireable),
 415          * so negate it.
 416          */
 417         lp->lwp_thread->td_upri = -lp->lwp_priority;
 418         /* XXX spinlock usually needed */
 419 }
 420
 421
 422 /*
 423  * DUMMY_FORKING
 424  *
 425  * Called from fork1() when a new child process is being created.  Allows
 426  * the scheduler to predispose the child process before it gets scheduled.
 427  *
 428  * MPSAFE
 429  */
 430 static void
 431 dummy_forking(struct lwp *plp, struct lwp *lp)
 432 {
 433         lp->lwp_estcpu = plp->lwp_estcpu;
 434 #if 0
 435         ++plp->lwp_estcpu;
 436 #endif
 437 }
 438
 439 /*
 440  * Called when a lwp is being removed from this scheduler, typically
 441  * during lwp_exit().
 442  */
 443 static void
 444 dummy_exiting(struct lwp *plp, struct proc *child)
 445 {
 446 }
 447
 448 static void
 449 dummy_uload_update(struct lwp *lp)
 450 {
 451 }
 452
 453 /*
 454  * SMP systems may need a scheduler helper thread.  This is how one can be
 455  * setup.
 456  *
 457  * We use a neat LWKT scheduling trick to interlock the helper thread.  It
 458  * is possible to deschedule an LWKT thread and then do some work before
 459  * switching away.  The thread can be rescheduled at any time, even before
 460  * we switch away.
 461  *
 462  * MPSAFE
 463  */
 464 static void
 465 dummy_sched_thread(void *dummy)
 466 {
 467     globaldata_t gd;
 468     dummy_pcpu_t dd;
 469     struct lwp *lp;
 470     cpumask_t cpumask;
 471     cpumask_t tmpmask;
 472     int cpuid;
 473     int tmpid;
 474
 475     gd = mycpu;
 476     cpuid = gd->gd_cpuid;
 477     dd = &dummy_pcpu[cpuid];
 478     CPUMASK_ASSBIT(cpumask, cpuid);
 479
 480     for (;;) {
 481         lwkt_deschedule_self(gd->gd_curthread);         /* interlock */
 482         ATOMIC_CPUMASK_ORBIT(dummy_rdyprocmask, cpuid);
 483         spin_lock(&dummy_spin);
 484         if (dd->uschedcp) {
 485                 /*
 486                  * We raced another cpu trying to schedule a thread onto us.
 487                  * If the runq isn't empty hit another free cpu.
 488                  */
 489                 tmpmask = dummy_rdyprocmask;
 490                 CPUMASK_NANDMASK(tmpmask, dummy_curprocmask);
 491                 CPUMASK_ANDMASK(tmpmask, gd->gd_other_cpus);
 492                 if (CPUMASK_TESTNZERO(tmpmask) && dummy_runqcount) {
 493                         tmpid = BSFCPUMASK(tmpmask);
 494                         KKASSERT(tmpid != cpuid);
 495                         ATOMIC_CPUMASK_NANDBIT(dummy_rdyprocmask, tmpid);
 496                         spin_unlock(&dummy_spin);
 497                         lwkt_schedule(dummy_pcpu[tmpid].helper_thread);
 498                 } else {
 499                         spin_unlock(&dummy_spin);
 500                 }
 501         } else if ((lp = TAILQ_FIRST(&dummy_runq)) != NULL) {
 502                 --dummy_runqcount;
 503                 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq);
 504                 atomic_clear_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
 505                 dd->uschedcp = lp;
 506                 ATOMIC_CPUMASK_ORBIT(dummy_curprocmask, cpuid);
 507                 spin_unlock(&dummy_spin);
 508                 lwkt_acquire(lp->lwp_thread);
 509                 lwkt_schedule(lp->lwp_thread);
 510         } else {
 511                 spin_unlock(&dummy_spin);
 512         }
 513         lwkt_switch();
 514     }
 515 }
 516
 517 /*
 518  * Setup our scheduler helpers.  Note that curprocmask bit 0 has already
 519  * been cleared by rqinit() and we should not mess with it further.
 520  */
 521 static void
 522 dummy_sched_thread_cpu_init(void)
 523 {
 524     int i;
 525
 526     if (bootverbose)
 527         kprintf("start dummy scheduler helpers on cpus:");
 528
 529     for (i = 0; i < ncpus; ++i) {
 530         dummy_pcpu_t dd = &dummy_pcpu[i];
 531         cpumask_t mask;
 532
 533         CPUMASK_ASSBIT(mask, i);
 534
 535         if (CPUMASK_TESTMASK(mask, smp_active_mask) == 0)
 536             continue;
 537
 538         if (bootverbose)
 539             kprintf(" %d", i);
 540
 541         lwkt_create(dummy_sched_thread, NULL, &dd->helper_thread, NULL,
 542                     TDF_NOSTART, i, "dsched %d", i);
 543
 544         /*
 545          * Allow user scheduling on the target cpu.  cpu #0 has already
 546          * been enabled in rqinit().
 547          */
 548         if (i)
 549                 ATOMIC_CPUMASK_NANDMASK(dummy_curprocmask, mask);
 550         ATOMIC_CPUMASK_ORMASK(dummy_rdyprocmask, mask);
 551     }
 552     if (bootverbose)
 553         kprintf("\n");
 554 }
 555 SYSINIT(uschedtd, SI_BOOT2_USCHED, SI_ORDER_SECOND,
 556         dummy_sched_thread_cpu_init, NULL);