sys/kern/usched_dummy.c

   1 /*
   2  * Copyright (c) 2006 The DragonFly Project.  All rights reserved.
   3  *
   4  * This code is derived from software contributed to The DragonFly Project
   5  * by Matthew Dillon <dillon@backplane.com>
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  *
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice, this list of conditions and the following disclaimer.
  13  * 2. Redistributions in binary form must reproduce the above copyright
  14  *    notice, this list of conditions and the following disclaimer in
  15  *    the documentation and/or other materials provided with the
  16  *    distribution.
  17  * 3. Neither the name of The DragonFly Project nor the names of its
  18  *    contributors may be used to endorse or promote products derived
  19  *    from this software without specific, prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
  24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
  25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
  27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
  29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  */
  34
  35 #include <sys/param.h>
  36 #include <sys/systm.h>
  37 #include <sys/kernel.h>
  38 #include <sys/lock.h>
  39 #include <sys/queue.h>
  40 #include <sys/proc.h>
  41 #include <sys/rtprio.h>
  42 #include <sys/uio.h>
  43 #include <sys/sysctl.h>
  44 #include <sys/resourcevar.h>
  45 #include <sys/spinlock.h>
  46 #include <machine/cpu.h>
  47 #include <machine/smp.h>
  48
  49 #include <sys/thread2.h>
  50 #include <sys/spinlock2.h>
  51 #include <sys/mplock2.h>
  52
  53 #define MAXPRI                  128
  54 #define PRIBASE_REALTIME        0
  55 #define PRIBASE_NORMAL          MAXPRI
  56 #define PRIBASE_IDLE            (MAXPRI * 2)
  57 #define PRIBASE_THREAD          (MAXPRI * 3)
  58 #define PRIBASE_NULL            (MAXPRI * 4)
  59
  60 #define lwp_priority    lwp_usdata.bsd4.priority
  61 #define lwp_estcpu      lwp_usdata.bsd4.estcpu
  62
  63 static void dummy_acquire_curproc(struct lwp *lp);
  64 static void dummy_release_curproc(struct lwp *lp);
  65 static void dummy_select_curproc(globaldata_t gd);
  66 static void dummy_setrunqueue(struct lwp *lp);
  67 static void dummy_schedulerclock(struct lwp *lp, sysclock_t period,
  68                                 sysclock_t cpstamp);
  69 static void dummy_recalculate_estcpu(struct lwp *lp);
  70 static void dummy_resetpriority(struct lwp *lp);
  71 static void dummy_forking(struct lwp *plp, struct lwp *lp);
  72 static void dummy_exiting(struct lwp *plp, struct proc *child);
  73 static void dummy_uload_update(struct lwp *lp);
  74 static void dummy_yield(struct lwp *lp);
  75 static void dummy_changedcpu(struct lwp *lp);
  76
  77 struct usched usched_dummy = {
  78         { NULL },
  79         "dummy", "Dummy DragonFly Scheduler",
  80         NULL,                   /* default registration */
  81         NULL,                   /* default deregistration */
  82         dummy_acquire_curproc,
  83         dummy_release_curproc,
  84         dummy_setrunqueue,
  85         dummy_schedulerclock,
  86         dummy_recalculate_estcpu,
  87         dummy_resetpriority,
  88         dummy_forking,
  89         dummy_exiting,
  90         dummy_uload_update,
  91         NULL,                   /* setcpumask not supported */
  92         dummy_yield,
  93         dummy_changedcpu
  94 };
  95
  96 struct usched_dummy_pcpu {
  97         int     rrcount;
  98         struct thread helper_thread;
  99         struct lwp *uschedcp;
 100 };
 101
 102 typedef struct usched_dummy_pcpu *dummy_pcpu_t;
 103
 104 static struct usched_dummy_pcpu dummy_pcpu[MAXCPU];
 105 static cpumask_t dummy_curprocmask = CPUMASK_INITIALIZER_ALLONES;
 106 static cpumask_t dummy_rdyprocmask;
 107 static struct spinlock dummy_spin;
 108 static TAILQ_HEAD(rq, lwp) dummy_runq;
 109 static int dummy_runqcount;
 110
 111 static int usched_dummy_rrinterval = (ESTCPUFREQ + 9) / 10;
 112 SYSCTL_INT(_kern, OID_AUTO, usched_dummy_rrinterval, CTLFLAG_RW,
 113         &usched_dummy_rrinterval, 0, "");
 114
 115 /*
 116  * Initialize the run queues at boot time, clear cpu 0 in curprocmask
 117  * to allow dummy scheduling on cpu 0.
 118  */
 119 static void
 120 dummyinit(void *dummy)
 121 {
 122         TAILQ_INIT(&dummy_runq);
 123         spin_init(&dummy_spin, "uscheddummy");
 124         ATOMIC_CPUMASK_NANDBIT(dummy_curprocmask, 0);
 125 }
 126 SYSINIT(runqueue, SI_BOOT2_USCHED, SI_ORDER_FIRST, dummyinit, NULL);
 127
 128 /*
 129  * DUMMY_ACQUIRE_CURPROC
 130  *
 131  * This function is called when the kernel intends to return to userland.
 132  * It is responsible for making the thread the current designated userland
 133  * thread for this cpu, blocking if necessary.
 134  *
 135  * The kernel will not depress our LWKT priority until after we return,
 136  * in case we have to shove over to another cpu.
 137  *
 138  * We must determine our thread's disposition before we switch away.  This
 139  * is very sensitive code.
 140  *
 141  * We are expected to handle userland reschedule requests here too.
 142  *
 143  * WARNING! THIS FUNCTION IS ALLOWED TO CAUSE THE CURRENT THREAD TO MIGRATE
 144  * TO ANOTHER CPU!  Because most of the kernel assumes that no migration will
 145  * occur, this function is called only under very controlled circumstances.
 146  *
 147  * MPSAFE
 148  */
 149 static void
 150 dummy_acquire_curproc(struct lwp *lp)
 151 {
 152         globaldata_t gd = mycpu;
 153         dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
 154         thread_t td = lp->lwp_thread;
 155
 156         /*
 157          * Possibly select another thread
 158          */
 159         if (user_resched_wanted())
 160                 dummy_select_curproc(gd);
 161
 162         /*
 163          * If this cpu has no current thread, select ourself
 164          */
 165         if (dd->uschedcp == lp ||
 166             (dd->uschedcp == NULL && TAILQ_EMPTY(&dummy_runq))) {
 167                 ATOMIC_CPUMASK_ORBIT(dummy_curprocmask, gd->gd_cpuid);
 168                 dd->uschedcp = lp;
 169                 return;
 170         }
 171
 172         /*
 173          * If this cpu's current user process thread is not our thread,
 174          * deschedule ourselves and place us on the run queue, then
 175          * switch away.
 176          *
 177          * We loop until we become the current process.  Its a good idea
 178          * to run any passive release(s) before we mess with the scheduler
 179          * so our thread is in the expected state.
 180          */
 181         KKASSERT(dd->uschedcp != lp);
 182         if (td->td_release)
 183                 td->td_release(lp->lwp_thread);
 184         do {
 185                 crit_enter();
 186                 lwkt_deschedule_self(td);
 187                 dummy_setrunqueue(lp);
 188                 if ((td->td_flags & TDF_RUNQ) == 0)
 189                         ++lp->lwp_ru.ru_nivcsw;
 190                 lwkt_switch();          /* WE MAY MIGRATE TO ANOTHER CPU */
 191                 crit_exit();
 192                 gd = mycpu;
 193                 dd = &dummy_pcpu[gd->gd_cpuid];
 194                 KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
 195         } while (dd->uschedcp != lp);
 196 }
 197
 198 /*
 199  * DUMMY_RELEASE_CURPROC
 200  *
 201  * This routine detaches the current thread from the userland scheduler,
 202  * usually because the thread needs to run in the kernel (at kernel priority)
 203  * for a while.
 204  *
 205  * This routine is also responsible for selecting a new thread to
 206  * make the current thread.
 207  *
 208  * MPSAFE
 209  */
 210 static void
 211 dummy_release_curproc(struct lwp *lp)
 212 {
 213         globaldata_t gd = mycpu;
 214         dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
 215
 216         KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
 217         if (dd->uschedcp == lp) {
 218                 dummy_select_curproc(gd);
 219         }
 220 }
 221
 222 /*
 223  * DUMMY_SELECT_CURPROC
 224  *
 225  * Select a new current process for this cpu.  This satisfies a user
 226  * scheduler reschedule request so clear that too.
 227  *
 228  * This routine is also responsible for equal-priority round-robining,
 229  * typically triggered from dummy_schedulerclock().  In our dummy example
 230  * all the 'user' threads are LWKT scheduled all at once and we just
 231  * call lwkt_switch().
 232  *
 233  * MPSAFE
 234  */
 235 static
 236 void
 237 dummy_select_curproc(globaldata_t gd)
 238 {
 239         dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
 240         struct lwp *lp;
 241
 242         clear_user_resched();
 243         spin_lock(&dummy_spin);
 244         if ((lp = TAILQ_FIRST(&dummy_runq)) == NULL) {
 245                 dd->uschedcp = NULL;
 246                 ATOMIC_CPUMASK_NANDBIT(dummy_curprocmask, gd->gd_cpuid);
 247                 spin_unlock(&dummy_spin);
 248         } else {
 249                 --dummy_runqcount;
 250                 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq);
 251                 atomic_clear_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
 252                 dd->uschedcp = lp;
 253                 ATOMIC_CPUMASK_ORBIT(dummy_curprocmask, gd->gd_cpuid);
 254                 spin_unlock(&dummy_spin);
 255                 lwkt_acquire(lp->lwp_thread);
 256                 lwkt_schedule(lp->lwp_thread);
 257         }
 258 }
 259
 260 /*
 261  * DUMMY_SETRUNQUEUE
 262  *
 263  * This routine is called to schedule a new user process after a fork.
 264  * The scheduler module itself might also call this routine to place
 265  * the current process on the userland scheduler's run queue prior
 266  * to calling dummy_select_curproc().
 267  *
 268  * The caller may set LWP_PASSIVE_ACQ in lwp_flags to indicate that we should
 269  * attempt to leave the thread on the current cpu.
 270  *
 271  * MPSAFE
 272  */
 273 static void
 274 dummy_setrunqueue(struct lwp *lp)
 275 {
 276         globaldata_t gd = mycpu;
 277         dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
 278         cpumask_t mask;
 279         int cpuid;
 280
 281         if (dd->uschedcp == NULL) {
 282                 dd->uschedcp = lp;
 283                 ATOMIC_CPUMASK_ORBIT(dummy_curprocmask, gd->gd_cpuid);
 284                 lwkt_schedule(lp->lwp_thread);
 285         } else {
 286                 /*
 287                  * Add to our global runq
 288                  */
 289                 KKASSERT((lp->lwp_mpflags & LWP_MP_ONRUNQ) == 0);
 290                 spin_lock(&dummy_spin);
 291                 ++dummy_runqcount;
 292                 TAILQ_INSERT_TAIL(&dummy_runq, lp, lwp_procq);
 293                 atomic_set_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
 294                 lwkt_giveaway(lp->lwp_thread);
 295
 296                 /* lp = TAILQ_FIRST(&dummy_runq); */
 297
 298                 /*
 299                  * Notify the next available cpu.  P.S. some
 300                  * cpu affinity could be done here.
 301                  *
 302                  * The rdyprocmask bit placeholds the knowledge that there
 303                  * is a process on the runq that needs service.  If the
 304                  * helper thread cannot find a home for it it will forward
 305                  * the request to another available cpu.
 306                  */
 307                 mask = dummy_rdyprocmask;
 308                 CPUMASK_NANDMASK(mask, dummy_curprocmask);
 309                 CPUMASK_ANDMASK(mask, gd->gd_other_cpus);
 310                 if (CPUMASK_TESTNZERO(mask)) {
 311                         cpuid = BSFCPUMASK(mask);
 312                         ATOMIC_CPUMASK_NANDBIT(dummy_rdyprocmask, cpuid);
 313                         spin_unlock(&dummy_spin);
 314                         lwkt_schedule(&dummy_pcpu[cpuid].helper_thread);
 315                 } else {
 316                         spin_unlock(&dummy_spin);
 317                 }
 318         }
 319 }
 320
 321 /*
 322  * This routine is called from a systimer IPI.  It must NEVER block.
 323  * If a lwp compatible with this scheduler is the currently running
 324  * thread this function is called with a non-NULL lp, otherwise it
 325  * will be called with a NULL lp.
 326  *
 327  * This routine is called at ESTCPUFREQ on each cpu independantly.
 328  *
 329  * This routine typically queues a reschedule request, which will cause
 330  * the scheduler's BLAH_select_curproc() to be called as soon as possible.
 331  */
 332 static
 333 void
 334 dummy_schedulerclock(struct lwp *lp, sysclock_t period, sysclock_t cpstamp)
 335 {
 336         globaldata_t gd = mycpu;
 337         dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid];
 338
 339         if (lp == NULL)
 340                 return;
 341
 342         if (++dd->rrcount >= usched_dummy_rrinterval) {
 343                 dd->rrcount = 0;
 344                 need_user_resched();
 345         }
 346 }
 347
 348 /*
 349  * DUMMY_RECALCULATE_ESTCPU
 350  *
 351  * Called once a second for any process that is running or has slept
 352  * for less then 2 seconds.
 353  *
 354  * MPSAFE
 355  */
 356 static
 357 void
 358 dummy_recalculate_estcpu(struct lwp *lp)
 359 {
 360 }
 361
 362 /*
 363  * MPSAFE
 364  */
 365 static
 366 void
 367 dummy_yield(struct lwp *lp)
 368 {
 369         need_user_resched();
 370 }
 371
 372 static
 373 void
 374 dummy_changedcpu(struct lwp *lp __unused)
 375 {
 376 }
 377
 378 /*
 379  * DUMMY_RESETPRIORITY
 380  *
 381  * This routine is called after the kernel has potentially modified
 382  * the lwp_rtprio structure.  The target process may be running or sleeping
 383  * or scheduled but not yet running or owned by another cpu.  Basically,
 384  * it can be in virtually any state.
 385  *
 386  * This routine is called by fork1() for initial setup with the process
 387  * of the run queue, and also may be called normally with the process on or
 388  * off the run queue.
 389  *
 390  * MPSAFE
 391  */
 392 static void
 393 dummy_resetpriority(struct lwp *lp)
 394 {
 395         /* XXX spinlock usually needed */
 396         /*
 397          * Set p_priority for general process comparisons
 398          */
 399         switch(lp->lwp_rtprio.type) {
 400         case RTP_PRIO_REALTIME:
 401                 lp->lwp_priority = PRIBASE_REALTIME + lp->lwp_rtprio.prio;
 402                 return;
 403         case RTP_PRIO_NORMAL:
 404                 lp->lwp_priority = PRIBASE_NORMAL + lp->lwp_rtprio.prio;
 405                 break;
 406         case RTP_PRIO_IDLE:
 407                 lp->lwp_priority = PRIBASE_IDLE + lp->lwp_rtprio.prio;
 408                 return;
 409         case RTP_PRIO_THREAD:
 410                 lp->lwp_priority = PRIBASE_THREAD + lp->lwp_rtprio.prio;
 411                 return;
 412         }
 413
 414         /*
 415          * td_upri has normal sense (higher numbers are more desireable),
 416          * so negate it.
 417          */
 418         lp->lwp_thread->td_upri = -lp->lwp_priority;
 419         /* XXX spinlock usually needed */
 420 }
 421
 422
 423 /*
 424  * DUMMY_FORKING
 425  *
 426  * Called from fork1() when a new child process is being created.  Allows
 427  * the scheduler to predispose the child process before it gets scheduled.
 428  *
 429  * MPSAFE
 430  */
 431 static void
 432 dummy_forking(struct lwp *plp, struct lwp *lp)
 433 {
 434         lp->lwp_estcpu = plp->lwp_estcpu;
 435 #if 0
 436         ++plp->lwp_estcpu;
 437 #endif
 438 }
 439
 440 /*
 441  * Called when a lwp is being removed from this scheduler, typically
 442  * during lwp_exit().
 443  */
 444 static void
 445 dummy_exiting(struct lwp *plp, struct proc *child)
 446 {
 447 }
 448
 449 static void
 450 dummy_uload_update(struct lwp *lp)
 451 {
 452 }
 453
 454 /*
 455  * SMP systems may need a scheduler helper thread.  This is how one can be
 456  * setup.
 457  *
 458  * We use a neat LWKT scheduling trick to interlock the helper thread.  It
 459  * is possible to deschedule an LWKT thread and then do some work before
 460  * switching away.  The thread can be rescheduled at any time, even before
 461  * we switch away.
 462  *
 463  * MPSAFE
 464  */
 465 static void
 466 dummy_sched_thread(void *dummy)
 467 {
 468     globaldata_t gd;
 469     dummy_pcpu_t dd;
 470     struct lwp *lp;
 471     cpumask_t cpumask;
 472     cpumask_t tmpmask;
 473     int cpuid;
 474     int tmpid;
 475
 476     gd = mycpu;
 477     cpuid = gd->gd_cpuid;
 478     dd = &dummy_pcpu[cpuid];
 479     CPUMASK_ASSBIT(cpumask, cpuid);
 480
 481     for (;;) {
 482         lwkt_deschedule_self(gd->gd_curthread);         /* interlock */
 483         ATOMIC_CPUMASK_ORBIT(dummy_rdyprocmask, cpuid);
 484         spin_lock(&dummy_spin);
 485         if (dd->uschedcp) {
 486                 /*
 487                  * We raced another cpu trying to schedule a thread onto us.
 488                  * If the runq isn't empty hit another free cpu.
 489                  */
 490                 tmpmask = dummy_rdyprocmask;
 491                 CPUMASK_NANDMASK(tmpmask, dummy_curprocmask);
 492                 CPUMASK_ANDMASK(tmpmask, gd->gd_other_cpus);
 493                 if (CPUMASK_TESTNZERO(tmpmask) && dummy_runqcount) {
 494                         tmpid = BSFCPUMASK(tmpmask);
 495                         KKASSERT(tmpid != cpuid);
 496                         ATOMIC_CPUMASK_NANDBIT(dummy_rdyprocmask, tmpid);
 497                         spin_unlock(&dummy_spin);
 498                         lwkt_schedule(&dummy_pcpu[tmpid].helper_thread);
 499                 } else {
 500                         spin_unlock(&dummy_spin);
 501                 }
 502         } else if ((lp = TAILQ_FIRST(&dummy_runq)) != NULL) {
 503                 --dummy_runqcount;
 504                 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq);
 505                 atomic_clear_int(&lp->lwp_mpflags, LWP_MP_ONRUNQ);
 506                 dd->uschedcp = lp;
 507                 ATOMIC_CPUMASK_ORBIT(dummy_curprocmask, cpuid);
 508                 spin_unlock(&dummy_spin);
 509                 lwkt_acquire(lp->lwp_thread);
 510                 lwkt_schedule(lp->lwp_thread);
 511         } else {
 512                 spin_unlock(&dummy_spin);
 513         }
 514         lwkt_switch();
 515     }
 516 }
 517
 518 /*
 519  * Setup our scheduler helpers.  Note that curprocmask bit 0 has already
 520  * been cleared by rqinit() and we should not mess with it further.
 521  */
 522 static void
 523 dummy_sched_thread_cpu_init(void)
 524 {
 525     int i;
 526
 527     if (bootverbose)
 528         kprintf("start dummy scheduler helpers on cpus:");
 529
 530     for (i = 0; i < ncpus; ++i) {
 531         dummy_pcpu_t dd = &dummy_pcpu[i];
 532         cpumask_t mask;
 533
 534         CPUMASK_ASSBIT(mask, i);
 535
 536         if (CPUMASK_TESTMASK(mask, smp_active_mask) == 0)
 537             continue;
 538
 539         if (bootverbose)
 540             kprintf(" %d", i);
 541
 542         lwkt_create(dummy_sched_thread, NULL, NULL, &dd->helper_thread,
 543                     TDF_NOSTART, i, "dsched %d", i);
 544
 545         /*
 546          * Allow user scheduling on the target cpu.  cpu #0 has already
 547          * been enabled in rqinit().
 548          */
 549         if (i)
 550                 ATOMIC_CPUMASK_NANDMASK(dummy_curprocmask, mask);
 551         ATOMIC_CPUMASK_ORMASK(dummy_rdyprocmask, mask);
 552     }
 553     if (bootverbose)
 554         kprintf("\n");
 555 }
 556 SYSINIT(uschedtd, SI_BOOT2_USCHED, SI_ORDER_SECOND,
 557         dummy_sched_thread_cpu_init, NULL);