libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <signal.h>
   7 #include <stdlib.h>
   8 #include <pthread.h>
   9 #include <unistd.h>
  10
  11 #include "config.h"
  12
  13 #ifdef HAVE_DL_ITERATE_PHDR
  14 #include <link.h>
  15 #endif
  16
  17 #include "runtime.h"
  18 #include "arch.h"
  19 #include "defs.h"
  20 #include "malloc.h"
  21 #include "go-type.h"
  22 #include "go-defer.h"
  23
  24 #ifdef USING_SPLIT_STACK
  25
  26 /* FIXME: These are not declared anywhere.  */
  27
  28 extern void __splitstack_getcontext(void *context[10]);
  29
  30 extern void __splitstack_setcontext(void *context[10]);
  31
  32 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  33
  34 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  35
  36 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  37                                void **);
  38
  39 extern void __splitstack_block_signals (int *, int *);
  40
  41 extern void __splitstack_block_signals_context (void *context[10], int *,
  42                                                 int *);
  43
  44 #endif
  45
  46 #ifndef PTHREAD_STACK_MIN
  47 # define PTHREAD_STACK_MIN 8192
  48 #endif
  49
  50 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  51 # define StackMin PTHREAD_STACK_MIN
  52 #else
  53 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
  54 #endif
  55
  56 uintptr runtime_stacks_sys;
  57
  58 static void gtraceback(G*);
  59
  60 #ifdef __rtems__
  61 #define __thread
  62 #endif
  63
  64 static __thread G *g;
  65 static __thread M *m;
  66
  67 #ifndef SETCONTEXT_CLOBBERS_TLS
  68
  69 static inline void
  70 initcontext(void)
  71 {
  72 }
  73
  74 static inline void
  75 fixcontext(ucontext_t *c __attribute__ ((unused)))
  76 {
  77 }
  78
  79 #else
  80
  81 # if defined(__x86_64__) && defined(__sun__)
  82
  83 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  84 // register to that of the thread which called getcontext.  The effect
  85 // is that the address of all __thread variables changes.  This bug
  86 // also affects pthread_self() and pthread_getspecific.  We work
  87 // around it by clobbering the context field directly to keep %fs the
  88 // same.
  89
  90 static __thread greg_t fs;
  91
  92 static inline void
  93 initcontext(void)
  94 {
  95         ucontext_t c;
  96
  97         getcontext(&c);
  98         fs = c.uc_mcontext.gregs[REG_FSBASE];
  99 }
 100
 101 static inline void
 102 fixcontext(ucontext_t* c)
 103 {
 104         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 105 }
 106
 107 # elif defined(__NetBSD__)
 108
 109 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
 110 // and restore it ourselves.
 111
 112 static __thread __greg_t tlsbase;
 113
 114 static inline void
 115 initcontext(void)
 116 {
 117         ucontext_t c;
 118
 119         getcontext(&c);
 120         tlsbase = c.uc_mcontext._mc_tlsbase;
 121 }
 122
 123 static inline void
 124 fixcontext(ucontext_t* c)
 125 {
 126         c->uc_mcontext._mc_tlsbase = tlsbase;
 127 }
 128
 129 # else
 130
 131 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 132
 133 # endif
 134
 135 #endif
 136
 137 // We can not always refer to the TLS variables directly.  The
 138 // compiler will call tls_get_addr to get the address of the variable,
 139 // and it may hold it in a register across a call to schedule.  When
 140 // we get back from the call we may be running in a different thread,
 141 // in which case the register now points to the TLS variable for a
 142 // different thread.  We use non-inlinable functions to avoid this
 143 // when necessary.
 144
 145 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 146
 147 G*
 148 runtime_g(void)
 149 {
 150         return g;
 151 }
 152
 153 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 154
 155 M*
 156 runtime_m(void)
 157 {
 158         return m;
 159 }
 160
 161 // Set m and g.
 162 void
 163 runtime_setmg(M* mp, G* gp)
 164 {
 165         m = mp;
 166         g = gp;
 167 }
 168
 169 // Start a new thread.
 170 static void
 171 runtime_newosproc(M *mp)
 172 {
 173         pthread_attr_t attr;
 174         sigset_t clear, old;
 175         pthread_t tid;
 176         int ret;
 177
 178         if(pthread_attr_init(&attr) != 0)
 179                 runtime_throw("pthread_attr_init");
 180         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 181                 runtime_throw("pthread_attr_setdetachstate");
 182
 183         // Block signals during pthread_create so that the new thread
 184         // starts with signals disabled.  It will enable them in minit.
 185         sigfillset(&clear);
 186
 187 #ifdef SIGTRAP
 188         // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
 189         sigdelset(&clear, SIGTRAP);
 190 #endif
 191
 192         sigemptyset(&old);
 193         pthread_sigmask(SIG_BLOCK, &clear, &old);
 194         ret = pthread_create(&tid, &attr, runtime_mstart, mp);
 195         pthread_sigmask(SIG_SETMASK, &old, nil);
 196
 197         if (ret != 0)
 198                 runtime_throw("pthread_create");
 199 }
 200
 201 // First function run by a new goroutine.  This replaces gogocall.
 202 static void
 203 kickoff(void)
 204 {
 205         void (*fn)(void*);
 206
 207         if(g->traceback != nil)
 208                 gtraceback(g);
 209
 210         fn = (void (*)(void*))(g->entry);
 211         fn(g->param);
 212         runtime_goexit();
 213 }
 214
 215 // Switch context to a different goroutine.  This is like longjmp.
 216 void runtime_gogo(G*) __attribute__ ((noinline));
 217 void
 218 runtime_gogo(G* newg)
 219 {
 220 #ifdef USING_SPLIT_STACK
 221         __splitstack_setcontext(&newg->stack_context[0]);
 222 #endif
 223         g = newg;
 224         newg->fromgogo = true;
 225         fixcontext(&newg->context);
 226         setcontext(&newg->context);
 227         runtime_throw("gogo setcontext returned");
 228 }
 229
 230 // Save context and call fn passing g as a parameter.  This is like
 231 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 232 // g->fromgogo as a code.  It will be true if we got here via
 233 // setcontext.  g == nil the first time this is called in a new m.
 234 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 235 void
 236 runtime_mcall(void (*pfn)(G*))
 237 {
 238         M *mp;
 239         G *gp;
 240
 241         // Ensure that all registers are on the stack for the garbage
 242         // collector.
 243         __builtin_unwind_init();
 244
 245         mp = m;
 246         gp = g;
 247         if(gp == mp->g0)
 248                 runtime_throw("runtime: mcall called on m->g0 stack");
 249
 250         if(gp != nil) {
 251
 252 #ifdef USING_SPLIT_STACK
 253                 __splitstack_getcontext(&g->stack_context[0]);
 254 #else
 255                 gp->gcnext_sp = &pfn;
 256 #endif
 257                 gp->fromgogo = false;
 258                 getcontext(&gp->context);
 259
 260                 // When we return from getcontext, we may be running
 261                 // in a new thread.  That means that m and g may have
 262                 // changed.  They are global variables so we will
 263                 // reload them, but the addresses of m and g may be
 264                 // cached in our local stack frame, and those
 265                 // addresses may be wrong.  Call functions to reload
 266                 // the values for this thread.
 267                 mp = runtime_m();
 268                 gp = runtime_g();
 269
 270                 if(gp->traceback != nil)
 271                         gtraceback(gp);
 272         }
 273         if (gp == nil || !gp->fromgogo) {
 274 #ifdef USING_SPLIT_STACK
 275                 __splitstack_setcontext(&mp->g0->stack_context[0]);
 276 #endif
 277                 mp->g0->entry = (byte*)pfn;
 278                 mp->g0->param = gp;
 279
 280                 // It's OK to set g directly here because this case
 281                 // can not occur if we got here via a setcontext to
 282                 // the getcontext call just above.
 283                 g = mp->g0;
 284
 285                 fixcontext(&mp->g0->context);
 286                 setcontext(&mp->g0->context);
 287                 runtime_throw("runtime: mcall function returned");
 288         }
 289 }
 290
 291 // Goroutine scheduler
 292 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
 293 //
 294 // The main concepts are:
 295 // G - goroutine.
 296 // M - worker thread, or machine.
 297 // P - processor, a resource that is required to execute Go code.
 298 //     M must have an associated P to execute Go code, however it can be
 299 //     blocked or in a syscall w/o an associated P.
 300 //
 301 // Design doc at http://golang.org/s/go11sched.
 302
 303 typedef struct Sched Sched;
 304 struct Sched {
 305         Lock;
 306
 307         uint64  goidgen;
 308         M*      midle;   // idle m's waiting for work
 309         int32   nmidle;  // number of idle m's waiting for work
 310         int32   nmidlelocked; // number of locked m's waiting for work
 311         int32   mcount;  // number of m's that have been created
 312         int32   maxmcount;      // maximum number of m's allowed (or die)
 313
 314         P*      pidle;  // idle P's
 315         uint32  npidle;
 316         uint32  nmspinning;
 317
 318         // Global runnable queue.
 319         G*      runqhead;
 320         G*      runqtail;
 321         int32   runqsize;
 322
 323         // Global cache of dead G's.
 324         Lock    gflock;
 325         G*      gfree;
 326
 327         uint32  gcwaiting;      // gc is waiting to run
 328         int32   stopwait;
 329         Note    stopnote;
 330         uint32  sysmonwait;
 331         Note    sysmonnote;
 332         uint64  lastpoll;
 333
 334         int32   profilehz;      // cpu profiling rate
 335 };
 336
 337 enum
 338 {
 339         // The max value of GOMAXPROCS.
 340         // There are no fundamental restrictions on the value.
 341         MaxGomaxprocs = 1<<8,
 342
 343         // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once.
 344         // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
 345         GoidCacheBatch = 16,
 346 };
 347
 348 Sched   runtime_sched;
 349 int32   runtime_gomaxprocs;
 350 uint32  runtime_needextram = 1;
 351 bool    runtime_iscgo = true;
 352 M       runtime_m0;
 353 G       runtime_g0;     // idle goroutine for m0
 354 G*      runtime_lastg;
 355 M*      runtime_allm;
 356 P**     runtime_allp;
 357 M*      runtime_extram;
 358 int8*   runtime_goos;
 359 int32   runtime_ncpu;
 360 bool    runtime_precisestack;
 361 static int32    newprocs;
 362
 363 static  Lock allglock;  // the following vars are protected by this lock or by stoptheworld
 364 G**     runtime_allg;
 365 uintptr runtime_allglen;
 366 static  uintptr allgcap;
 367
 368 void* runtime_mstart(void*);
 369 static void runqput(P*, G*);
 370 static G* runqget(P*);
 371 static bool runqputslow(P*, G*, uint32, uint32);
 372 static G* runqsteal(P*, P*);
 373 static void mput(M*);
 374 static M* mget(void);
 375 static void mcommoninit(M*);
 376 static void schedule(void);
 377 static void procresize(int32);
 378 static void acquirep(P*);
 379 static P* releasep(void);
 380 static void newm(void(*)(void), P*);
 381 static void stopm(void);
 382 static void startm(P*, bool);
 383 static void handoffp(P*);
 384 static void wakep(void);
 385 static void stoplockedm(void);
 386 static void startlockedm(G*);
 387 static void sysmon(void);
 388 static uint32 retake(int64);
 389 static void incidlelocked(int32);
 390 static void checkdead(void);
 391 static void exitsyscall0(G*);
 392 static void park0(G*);
 393 static void goexit0(G*);
 394 static void gfput(P*, G*);
 395 static G* gfget(P*);
 396 static void gfpurge(P*);
 397 static void globrunqput(G*);
 398 static void globrunqputbatch(G*, G*, int32);
 399 static G* globrunqget(P*, int32);
 400 static P* pidleget(void);
 401 static void pidleput(P*);
 402 static void injectglist(G*);
 403 static bool preemptall(void);
 404 static bool exitsyscallfast(void);
 405 static void allgadd(G*);
 406
 407 // The bootstrap sequence is:
 408 //
 409 //      call osinit
 410 //      call schedinit
 411 //      make & queue new G
 412 //      call runtime_mstart
 413 //
 414 // The new G calls runtime_main.
 415 void
 416 runtime_schedinit(void)
 417 {
 418         int32 n, procs;
 419         const byte *p;
 420         Eface i;
 421
 422         m = &runtime_m0;
 423         g = &runtime_g0;
 424         m->g0 = g;
 425         m->curg = g;
 426         g->m = m;
 427
 428         initcontext();
 429
 430         runtime_sched.maxmcount = 10000;
 431         runtime_precisestack = 0;
 432
 433         // runtime_symtabinit();
 434         runtime_mallocinit();
 435         mcommoninit(m);
 436
 437         // Initialize the itable value for newErrorCString,
 438         // so that the next time it gets called, possibly
 439         // in a fault during a garbage collection, it will not
 440         // need to allocated memory.
 441         runtime_newErrorCString(0, &i);
 442
 443         // Initialize the cached gotraceback value, since
 444         // gotraceback calls getenv, which mallocs on Plan 9.
 445         runtime_gotraceback(nil);
 446
 447         runtime_goargs();
 448         runtime_goenvs();
 449         runtime_parsedebugvars();
 450
 451         runtime_sched.lastpoll = runtime_nanotime();
 452         procs = 1;
 453         p = runtime_getenv("GOMAXPROCS");
 454         if(p != nil && (n = runtime_atoi(p)) > 0) {
 455                 if(n > MaxGomaxprocs)
 456                         n = MaxGomaxprocs;
 457                 procs = n;
 458         }
 459         runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
 460         procresize(procs);
 461
 462         // Can not enable GC until all roots are registered.
 463         // mstats.enablegc = 1;
 464 }
 465
 466 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
 467 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
 468
 469 static void
 470 initDone(void *arg __attribute__ ((unused))) {
 471         runtime_unlockOSThread();
 472 };
 473
 474 // The main goroutine.
 475 // Note: C frames in general are not copyable during stack growth, for two reasons:
 476 //   1) We don't know where in a frame to find pointers to other stack locations.
 477 //   2) There's no guarantee that globals or heap values do not point into the frame.
 478 //
 479 // The C frame for runtime.main is copyable, because:
 480 //   1) There are no pointers to other stack locations in the frame
 481 //      (d.fn points at a global, d.link is nil, d.argp is -1).
 482 //   2) The only pointer into this frame is from the defer chain,
 483 //      which is explicitly handled during stack copying.
 484 void
 485 runtime_main(void* dummy __attribute__((unused)))
 486 {
 487         Defer d;
 488         _Bool frame;
 489
 490         newm(sysmon, nil);
 491
 492         // Lock the main goroutine onto this, the main OS thread,
 493         // during initialization.  Most programs won't care, but a few
 494         // do require certain calls to be made by the main thread.
 495         // Those can arrange for main.main to run in the main thread
 496         // by calling runtime.LockOSThread during initialization
 497         // to preserve the lock.
 498         runtime_lockOSThread();
 499
 500         // Defer unlock so that runtime.Goexit during init does the unlock too.
 501         d.__pfn = initDone;
 502         d.__next = g->defer;
 503         d.__arg = (void*)-1;
 504         d.__panic = g->panic;
 505         d.__retaddr = nil;
 506         d.__makefunc_can_recover = 0;
 507         d.__frame = &frame;
 508         d.__special = true;
 509         g->defer = &d;
 510
 511         if(m != &runtime_m0)
 512                 runtime_throw("runtime_main not on m0");
 513         __go_go(runtime_MHeap_Scavenger, nil);
 514         main_init();
 515
 516         if(g->defer != &d || d.__pfn != initDone)
 517                 runtime_throw("runtime: bad defer entry after init");
 518         g->defer = d.__next;
 519         runtime_unlockOSThread();
 520
 521         // For gccgo we have to wait until after main is initialized
 522         // to enable GC, because initializing main registers the GC
 523         // roots.
 524         mstats.enablegc = 1;
 525
 526         main_main();
 527
 528         // Make racy client program work: if panicking on
 529         // another goroutine at the same time as main returns,
 530         // let the other goroutine finish printing the panic trace.
 531         // Once it does, it will exit. See issue 3934.
 532         if(runtime_panicking)
 533                 runtime_park(nil, nil, "panicwait");
 534
 535         runtime_exit(0);
 536         for(;;)
 537                 *(int32*)0 = 0;
 538 }
 539
 540 void
 541 runtime_goroutineheader(G *gp)
 542 {
 543         const char *status;
 544         int64 waitfor;
 545
 546         switch(gp->status) {
 547         case Gidle:
 548                 status = "idle";
 549                 break;
 550         case Grunnable:
 551                 status = "runnable";
 552                 break;
 553         case Grunning:
 554                 status = "running";
 555                 break;
 556         case Gsyscall:
 557                 status = "syscall";
 558                 break;
 559         case Gwaiting:
 560                 if(gp->waitreason)
 561                         status = gp->waitreason;
 562                 else
 563                         status = "waiting";
 564                 break;
 565         default:
 566                 status = "???";
 567                 break;
 568         }
 569
 570         // approx time the G is blocked, in minutes
 571         waitfor = 0;
 572         if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0)
 573                 waitfor = (runtime_nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
 574
 575         if(waitfor < 1)
 576                 runtime_printf("goroutine %D [%s]:\n", gp->goid, status);
 577         else
 578                 runtime_printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor);
 579 }
 580
 581 void
 582 runtime_printcreatedby(G *g)
 583 {
 584         if(g != nil && g->gopc != 0 && g->goid != 1) {
 585                 String fn;
 586                 String file;
 587                 intgo line;
 588
 589                 if(__go_file_line(g->gopc - 1, &fn, &file, &line)) {
 590                         runtime_printf("created by %S\n", fn);
 591                         runtime_printf("\t%S:%D\n", file, (int64) line);
 592                 }
 593         }
 594 }
 595
 596 struct Traceback
 597 {
 598         G* gp;
 599         Location locbuf[TracebackMaxFrames];
 600         int32 c;
 601 };
 602
 603 void
 604 runtime_tracebackothers(G * volatile me)
 605 {
 606         G * volatile gp;
 607         Traceback tb;
 608         int32 traceback;
 609         volatile uintptr i;
 610
 611         tb.gp = me;
 612         traceback = runtime_gotraceback(nil);
 613
 614         // Show the current goroutine first, if we haven't already.
 615         if((gp = m->curg) != nil && gp != me) {
 616                 runtime_printf("\n");
 617                 runtime_goroutineheader(gp);
 618                 gp->traceback = &tb;
 619
 620 #ifdef USING_SPLIT_STACK
 621                 __splitstack_getcontext(&me->stack_context[0]);
 622 #endif
 623                 getcontext(&me->context);
 624
 625                 if(gp->traceback != nil) {
 626                   runtime_gogo(gp);
 627                 }
 628
 629                 runtime_printtrace(tb.locbuf, tb.c, false);
 630                 runtime_printcreatedby(gp);
 631         }
 632
 633         runtime_lock(&allglock);
 634         for(i = 0; i < runtime_allglen; i++) {
 635                 gp = runtime_allg[i];
 636                 if(gp == me || gp == m->curg || gp->status == Gdead)
 637                         continue;
 638                 if(gp->issystem && traceback < 2)
 639                         continue;
 640                 runtime_printf("\n");
 641                 runtime_goroutineheader(gp);
 642
 643                 // Our only mechanism for doing a stack trace is
 644                 // _Unwind_Backtrace.  And that only works for the
 645                 // current thread, not for other random goroutines.
 646                 // So we need to switch context to the goroutine, get
 647                 // the backtrace, and then switch back.
 648
 649                 // This means that if g is running or in a syscall, we
 650                 // can't reliably print a stack trace.  FIXME.
 651
 652                 if(gp->status == Grunning) {
 653                         runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
 654                         runtime_printcreatedby(gp);
 655                 } else if(gp->status == Gsyscall) {
 656                         runtime_printf("\tgoroutine in C code; stack unavailable\n");
 657                         runtime_printcreatedby(gp);
 658                 } else {
 659                         gp->traceback = &tb;
 660
 661 #ifdef USING_SPLIT_STACK
 662                         __splitstack_getcontext(&me->stack_context[0]);
 663 #endif
 664                         getcontext(&me->context);
 665
 666                         if(gp->traceback != nil) {
 667                                 runtime_gogo(gp);
 668                         }
 669
 670                         runtime_printtrace(tb.locbuf, tb.c, false);
 671                         runtime_printcreatedby(gp);
 672                 }
 673         }
 674         runtime_unlock(&allglock);
 675 }
 676
 677 static void
 678 checkmcount(void)
 679 {
 680         // sched lock is held
 681         if(runtime_sched.mcount > runtime_sched.maxmcount) {
 682                 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
 683                 runtime_throw("thread exhaustion");
 684         }
 685 }
 686
 687 // Do a stack trace of gp, and then restore the context to
 688 // gp->dotraceback.
 689
 690 static void
 691 gtraceback(G* gp)
 692 {
 693         Traceback* traceback;
 694
 695         traceback = gp->traceback;
 696         gp->traceback = nil;
 697         traceback->c = runtime_callers(1, traceback->locbuf,
 698                 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
 699         runtime_gogo(traceback->gp);
 700 }
 701
 702 static void
 703 mcommoninit(M *mp)
 704 {
 705         // If there is no mcache runtime_callers() will crash,
 706         // and we are most likely in sysmon thread so the stack is senseless anyway.
 707         if(m->mcache)
 708                 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
 709
 710         mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
 711
 712         runtime_lock(&runtime_sched);
 713         mp->id = runtime_sched.mcount++;
 714         checkmcount();
 715         runtime_mpreinit(mp);
 716
 717         // Add to runtime_allm so garbage collector doesn't free m
 718         // when it is just in a register or thread-local storage.
 719         mp->alllink = runtime_allm;
 720         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 721         // so we need to publish it safely.
 722         runtime_atomicstorep(&runtime_allm, mp);
 723         runtime_unlock(&runtime_sched);
 724 }
 725
 726 // Mark gp ready to run.
 727 void
 728 runtime_ready(G *gp)
 729 {
 730         // Mark runnable.
 731         m->locks++;  // disable preemption because it can be holding p in a local var
 732         if(gp->status != Gwaiting) {
 733                 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status);
 734                 runtime_throw("bad g->status in ready");
 735         }
 736         gp->status = Grunnable;
 737         runqput(m->p, gp);
 738         if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0)  // TODO: fast atomic
 739                 wakep();
 740         m->locks--;
 741 }
 742
 743 int32
 744 runtime_gcprocs(void)
 745 {
 746         int32 n;
 747
 748         // Figure out how many CPUs to use during GC.
 749         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 750         runtime_lock(&runtime_sched);
 751         n = runtime_gomaxprocs;
 752         if(n > runtime_ncpu)
 753                 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
 754         if(n > MaxGcproc)
 755                 n = MaxGcproc;
 756         if(n > runtime_sched.nmidle+1) // one M is currently running
 757                 n = runtime_sched.nmidle+1;
 758         runtime_unlock(&runtime_sched);
 759         return n;
 760 }
 761
 762 static bool
 763 needaddgcproc(void)
 764 {
 765         int32 n;
 766
 767         runtime_lock(&runtime_sched);
 768         n = runtime_gomaxprocs;
 769         if(n > runtime_ncpu)
 770                 n = runtime_ncpu;
 771         if(n > MaxGcproc)
 772                 n = MaxGcproc;
 773         n -= runtime_sched.nmidle+1; // one M is currently running
 774         runtime_unlock(&runtime_sched);
 775         return n > 0;
 776 }
 777
 778 void
 779 runtime_helpgc(int32 nproc)
 780 {
 781         M *mp;
 782         int32 n, pos;
 783
 784         runtime_lock(&runtime_sched);
 785         pos = 0;
 786         for(n = 1; n < nproc; n++) {  // one M is currently running
 787                 if(runtime_allp[pos]->mcache == m->mcache)
 788                         pos++;
 789                 mp = mget();
 790                 if(mp == nil)
 791                         runtime_throw("runtime_gcprocs inconsistency");
 792                 mp->helpgc = n;
 793                 mp->mcache = runtime_allp[pos]->mcache;
 794                 pos++;
 795                 runtime_notewakeup(&mp->park);
 796         }
 797         runtime_unlock(&runtime_sched);
 798 }
 799
 800 // Similar to stoptheworld but best-effort and can be called several times.
 801 // There is no reverse operation, used during crashing.
 802 // This function must not lock any mutexes.
 803 void
 804 runtime_freezetheworld(void)
 805 {
 806         int32 i;
 807
 808         if(runtime_gomaxprocs == 1)
 809                 return;
 810         // stopwait and preemption requests can be lost
 811         // due to races with concurrently executing threads,
 812         // so try several times
 813         for(i = 0; i < 5; i++) {
 814                 // this should tell the scheduler to not start any new goroutines
 815                 runtime_sched.stopwait = 0x7fffffff;
 816                 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
 817                 // this should stop running goroutines
 818                 if(!preemptall())
 819                         break;  // no running goroutines
 820                 runtime_usleep(1000);
 821         }
 822         // to be sure
 823         runtime_usleep(1000);
 824         preemptall();
 825         runtime_usleep(1000);
 826 }
 827
 828 void
 829 runtime_stoptheworld(void)
 830 {
 831         int32 i;
 832         uint32 s;
 833         P *p;
 834         bool wait;
 835
 836         runtime_lock(&runtime_sched);
 837         runtime_sched.stopwait = runtime_gomaxprocs;
 838         runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
 839         preemptall();
 840         // stop current P
 841         m->p->status = Pgcstop;
 842         runtime_sched.stopwait--;
 843         // try to retake all P's in Psyscall status
 844         for(i = 0; i < runtime_gomaxprocs; i++) {
 845                 p = runtime_allp[i];
 846                 s = p->status;
 847                 if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop))
 848                         runtime_sched.stopwait--;
 849         }
 850         // stop idle P's
 851         while((p = pidleget()) != nil) {
 852                 p->status = Pgcstop;
 853                 runtime_sched.stopwait--;
 854         }
 855         wait = runtime_sched.stopwait > 0;
 856         runtime_unlock(&runtime_sched);
 857
 858         // wait for remaining P's to stop voluntarily
 859         if(wait) {
 860                 runtime_notesleep(&runtime_sched.stopnote);
 861                 runtime_noteclear(&runtime_sched.stopnote);
 862         }
 863         if(runtime_sched.stopwait)
 864                 runtime_throw("stoptheworld: not stopped");
 865         for(i = 0; i < runtime_gomaxprocs; i++) {
 866                 p = runtime_allp[i];
 867                 if(p->status != Pgcstop)
 868                         runtime_throw("stoptheworld: not stopped");
 869         }
 870 }
 871
 872 static void
 873 mhelpgc(void)
 874 {
 875         m->helpgc = -1;
 876 }
 877
 878 void
 879 runtime_starttheworld(void)
 880 {
 881         P *p, *p1;
 882         M *mp;
 883         G *gp;
 884         bool add;
 885
 886         m->locks++;  // disable preemption because it can be holding p in a local var
 887         gp = runtime_netpoll(false);  // non-blocking
 888         injectglist(gp);
 889         add = needaddgcproc();
 890         runtime_lock(&runtime_sched);
 891         if(newprocs) {
 892                 procresize(newprocs);
 893                 newprocs = 0;
 894         } else
 895                 procresize(runtime_gomaxprocs);
 896         runtime_sched.gcwaiting = 0;
 897
 898         p1 = nil;
 899         while((p = pidleget()) != nil) {
 900                 // procresize() puts p's with work at the beginning of the list.
 901                 // Once we reach a p without a run queue, the rest don't have one either.
 902                 if(p->runqhead == p->runqtail) {
 903                         pidleput(p);
 904                         break;
 905                 }
 906                 p->m = mget();
 907                 p->link = p1;
 908                 p1 = p;
 909         }
 910         if(runtime_sched.sysmonwait) {
 911                 runtime_sched.sysmonwait = false;
 912                 runtime_notewakeup(&runtime_sched.sysmonnote);
 913         }
 914         runtime_unlock(&runtime_sched);
 915
 916         while(p1) {
 917                 p = p1;
 918                 p1 = p1->link;
 919                 if(p->m) {
 920                         mp = p->m;
 921                         p->m = nil;
 922                         if(mp->nextp)
 923                                 runtime_throw("starttheworld: inconsistent mp->nextp");
 924                         mp->nextp = p;
 925                         runtime_notewakeup(&mp->park);
 926                 } else {
 927                         // Start M to run P.  Do not start another M below.
 928                         newm(nil, p);
 929                         add = false;
 930                 }
 931         }
 932
 933         if(add) {
 934                 // If GC could have used another helper proc, start one now,
 935                 // in the hope that it will be available next time.
 936                 // It would have been even better to start it before the collection,
 937                 // but doing so requires allocating memory, so it's tricky to
 938                 // coordinate.  This lazy approach works out in practice:
 939                 // we don't mind if the first couple gc rounds don't have quite
 940                 // the maximum number of procs.
 941                 newm(mhelpgc, nil);
 942         }
 943         m->locks--;
 944 }
 945
 946 // Called to start an M.
 947 void*
 948 runtime_mstart(void* mp)
 949 {
 950         m = (M*)mp;
 951         g = m->g0;
 952
 953         initcontext();
 954
 955         g->entry = nil;
 956         g->param = nil;
 957
 958         // Record top of stack for use by mcall.
 959         // Once we call schedule we're never coming back,
 960         // so other calls can reuse this stack space.
 961 #ifdef USING_SPLIT_STACK
 962         __splitstack_getcontext(&g->stack_context[0]);
 963 #else
 964         g->gcinitial_sp = &mp;
 965         // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
 966         // is the top of the stack, not the bottom.
 967         g->gcstack_size = 0;
 968         g->gcnext_sp = &mp;
 969 #endif
 970         getcontext(&g->context);
 971
 972         if(g->entry != nil) {
 973                 // Got here from mcall.
 974                 void (*pfn)(G*) = (void (*)(G*))g->entry;
 975                 G* gp = (G*)g->param;
 976                 pfn(gp);
 977                 *(int*)0x21 = 0x21;
 978         }
 979         runtime_minit();
 980
 981 #ifdef USING_SPLIT_STACK
 982         {
 983                 int dont_block_signals = 0;
 984                 __splitstack_block_signals(&dont_block_signals, nil);
 985         }
 986 #endif
 987
 988         // Install signal handlers; after minit so that minit can
 989         // prepare the thread to be able to handle the signals.
 990         if(m == &runtime_m0)
 991                 runtime_initsig();
 992
 993         if(m->mstartfn)
 994                 m->mstartfn();
 995
 996         if(m->helpgc) {
 997                 m->helpgc = 0;
 998                 stopm();
 999         } else if(m != &runtime_m0) {
1000                 acquirep(m->nextp);
1001                 m->nextp = nil;
1002         }
1003         schedule();
1004
1005         // TODO(brainman): This point is never reached, because scheduler
1006         // does not release os threads at the moment. But once this path
1007         // is enabled, we must remove our seh here.
1008
1009         return nil;
1010 }
1011
1012 typedef struct CgoThreadStart CgoThreadStart;
1013 struct CgoThreadStart
1014 {
1015         M *m;
1016         G *g;
1017         uintptr *tls;
1018         void (*fn)(void);
1019 };
1020
1021 // Allocate a new m unassociated with any thread.
1022 // Can use p for allocation context if needed.
1023 M*
1024 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacksize)
1025 {
1026         M *mp;
1027
1028         m->locks++;  // disable GC because it can be called from sysmon
1029         if(m->p == nil)
1030                 acquirep(p);  // temporarily borrow p for mallocs in this function
1031 #if 0
1032         if(mtype == nil) {
1033                 Eface e;
1034                 runtime_gc_m_ptr(&e);
1035                 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1036         }
1037 #endif
1038
1039         mp = runtime_mal(sizeof *mp);
1040         mcommoninit(mp);
1041         mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1042
1043         if(p == m->p)
1044                 releasep();
1045         m->locks--;
1046
1047         return mp;
1048 }
1049
1050 static G*
1051 allocg(void)
1052 {
1053         G *gp;
1054         // static Type *gtype;
1055
1056         // if(gtype == nil) {
1057         //      Eface e;
1058         //      runtime_gc_g_ptr(&e);
1059         //      gtype = ((PtrType*)e.__type_descriptor)->__element_type;
1060         // }
1061         // gp = runtime_cnew(gtype);
1062         gp = runtime_malloc(sizeof(G));
1063         return gp;
1064 }
1065
1066 static M* lockextra(bool nilokay);
1067 static void unlockextra(M*);
1068
1069 // needm is called when a cgo callback happens on a
1070 // thread without an m (a thread not created by Go).
1071 // In this case, needm is expected to find an m to use
1072 // and return with m, g initialized correctly.
1073 // Since m and g are not set now (likely nil, but see below)
1074 // needm is limited in what routines it can call. In particular
1075 // it can only call nosplit functions (textflag 7) and cannot
1076 // do any scheduling that requires an m.
1077 //
1078 // In order to avoid needing heavy lifting here, we adopt
1079 // the following strategy: there is a stack of available m's
1080 // that can be stolen. Using compare-and-swap
1081 // to pop from the stack has ABA races, so we simulate
1082 // a lock by doing an exchange (via casp) to steal the stack
1083 // head and replace the top pointer with MLOCKED (1).
1084 // This serves as a simple spin lock that we can use even
1085 // without an m. The thread that locks the stack in this way
1086 // unlocks the stack by storing a valid stack head pointer.
1087 //
1088 // In order to make sure that there is always an m structure
1089 // available to be stolen, we maintain the invariant that there
1090 // is always one more than needed. At the beginning of the
1091 // program (if cgo is in use) the list is seeded with a single m.
1092 // If needm finds that it has taken the last m off the list, its job
1093 // is - once it has installed its own m so that it can do things like
1094 // allocate memory - to create a spare m and put it on the list.
1095 //
1096 // Each of these extra m's also has a g0 and a curg that are
1097 // pressed into service as the scheduling stack and current
1098 // goroutine for the duration of the cgo callback.
1099 //
1100 // When the callback is done with the m, it calls dropm to
1101 // put the m back on the list.
1102 //
1103 // Unlike the gc toolchain, we start running on curg, since we are
1104 // just going to return and let the caller continue.
1105 void
1106 runtime_needm(void)
1107 {
1108         M *mp;
1109
1110         if(runtime_needextram) {
1111                 // Can happen if C/C++ code calls Go from a global ctor.
1112                 // Can not throw, because scheduler is not initialized yet.
1113                 int rv __attribute__((unused));
1114                 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
1115                         sizeof("fatal error: cgo callback before cgo call\n")-1);
1116                 runtime_exit(1);
1117         }
1118
1119         // Lock extra list, take head, unlock popped list.
1120         // nilokay=false is safe here because of the invariant above,
1121         // that the extra list always contains or will soon contain
1122         // at least one m.
1123         mp = lockextra(false);
1124
1125         // Set needextram when we've just emptied the list,
1126         // so that the eventual call into cgocallbackg will
1127         // allocate a new m for the extra list. We delay the
1128         // allocation until then so that it can be done
1129         // after exitsyscall makes sure it is okay to be
1130         // running at all (that is, there's no garbage collection
1131         // running right now).
1132         mp->needextram = mp->schedlink == nil;
1133         unlockextra(mp->schedlink);
1134
1135         // Install m and g (= m->curg).
1136         runtime_setmg(mp, mp->curg);
1137
1138         // Initialize g's context as in mstart.
1139         initcontext();
1140         g->status = Gsyscall;
1141         g->entry = nil;
1142         g->param = nil;
1143 #ifdef USING_SPLIT_STACK
1144         __splitstack_getcontext(&g->stack_context[0]);
1145 #else
1146         g->gcinitial_sp = &mp;
1147         g->gcstack = nil;
1148         g->gcstack_size = 0;
1149         g->gcnext_sp = &mp;
1150 #endif
1151         getcontext(&g->context);
1152
1153         if(g->entry != nil) {
1154                 // Got here from mcall.
1155                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1156                 G* gp = (G*)g->param;
1157                 pfn(gp);
1158                 *(int*)0x22 = 0x22;
1159         }
1160
1161         // Initialize this thread to use the m.
1162         runtime_minit();
1163
1164 #ifdef USING_SPLIT_STACK
1165         {
1166                 int dont_block_signals = 0;
1167                 __splitstack_block_signals(&dont_block_signals, nil);
1168         }
1169 #endif
1170 }
1171
1172 // newextram allocates an m and puts it on the extra list.
1173 // It is called with a working local m, so that it can do things
1174 // like call schedlock and allocate.
1175 void
1176 runtime_newextram(void)
1177 {
1178         M *mp, *mnext;
1179         G *gp;
1180         byte *g0_sp, *sp;
1181         size_t g0_spsize, spsize;
1182
1183         // Create extra goroutine locked to extra m.
1184         // The goroutine is the context in which the cgo callback will run.
1185         // The sched.pc will never be returned to, but setting it to
1186         // runtime.goexit makes clear to the traceback routines where
1187         // the goroutine stack ends.
1188         mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1189         gp = runtime_malg(StackMin, &sp, &spsize);
1190         gp->status = Gdead;
1191         mp->curg = gp;
1192         mp->locked = LockInternal;
1193         mp->lockedg = gp;
1194         gp->lockedm = mp;
1195         gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1196         // put on allg for garbage collector
1197         allgadd(gp);
1198
1199         // The context for gp will be set up in runtime_needm.  But
1200         // here we need to set up the context for g0.
1201         getcontext(&mp->g0->context);
1202         mp->g0->context.uc_stack.ss_sp = g0_sp;
1203         mp->g0->context.uc_stack.ss_size = g0_spsize;
1204         makecontext(&mp->g0->context, kickoff, 0);
1205
1206         // Add m to the extra list.
1207         mnext = lockextra(true);
1208         mp->schedlink = mnext;
1209         unlockextra(mp);
1210 }
1211
1212 // dropm is called when a cgo callback has called needm but is now
1213 // done with the callback and returning back into the non-Go thread.
1214 // It puts the current m back onto the extra list.
1215 //
1216 // The main expense here is the call to signalstack to release the
1217 // m's signal stack, and then the call to needm on the next callback
1218 // from this thread. It is tempting to try to save the m for next time,
1219 // which would eliminate both these costs, but there might not be
1220 // a next time: the current thread (which Go does not control) might exit.
1221 // If we saved the m for that thread, there would be an m leak each time
1222 // such a thread exited. Instead, we acquire and release an m on each
1223 // call. These should typically not be scheduling operations, just a few
1224 // atomics, so the cost should be small.
1225 //
1226 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1227 // variable using pthread_key_create. Unlike the pthread keys we already use
1228 // on OS X, this dummy key would never be read by Go code. It would exist
1229 // only so that we could register at thread-exit-time destructor.
1230 // That destructor would put the m back onto the extra list.
1231 // This is purely a performance optimization. The current version,
1232 // in which dropm happens on each cgo call, is still correct too.
1233 // We may have to keep the current version on systems with cgo
1234 // but without pthreads, like Windows.
1235 void
1236 runtime_dropm(void)
1237 {
1238         M *mp, *mnext;
1239
1240         // Undo whatever initialization minit did during needm.
1241         runtime_unminit();
1242
1243         // Clear m and g, and return m to the extra list.
1244         // After the call to setmg we can only call nosplit functions.
1245         mp = m;
1246         runtime_setmg(nil, nil);
1247
1248         mp->curg->status = Gdead;
1249         mp->curg->gcstack = nil;
1250         mp->curg->gcnext_sp = nil;
1251
1252         mnext = lockextra(true);
1253         mp->schedlink = mnext;
1254         unlockextra(mp);
1255 }
1256
1257 #define MLOCKED ((M*)1)
1258
1259 // lockextra locks the extra list and returns the list head.
1260 // The caller must unlock the list by storing a new list head
1261 // to runtime.extram. If nilokay is true, then lockextra will
1262 // return a nil list head if that's what it finds. If nilokay is false,
1263 // lockextra will keep waiting until the list head is no longer nil.
1264 static M*
1265 lockextra(bool nilokay)
1266 {
1267         M *mp;
1268         void (*yield)(void);
1269
1270         for(;;) {
1271                 mp = runtime_atomicloadp(&runtime_extram);
1272                 if(mp == MLOCKED) {
1273                         yield = runtime_osyield;
1274                         yield();
1275                         continue;
1276                 }
1277                 if(mp == nil && !nilokay) {
1278                         runtime_usleep(1);
1279                         continue;
1280                 }
1281                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1282                         yield = runtime_osyield;
1283                         yield();
1284                         continue;
1285                 }
1286                 break;
1287         }
1288         return mp;
1289 }
1290
1291 static void
1292 unlockextra(M *mp)
1293 {
1294         runtime_atomicstorep(&runtime_extram, mp);
1295 }
1296
1297 static int32
1298 countextra()
1299 {
1300         M *mp, *mc;
1301         int32 c;
1302
1303         for(;;) {
1304                 mp = runtime_atomicloadp(&runtime_extram);
1305                 if(mp == MLOCKED) {
1306                         runtime_osyield();
1307                         continue;
1308                 }
1309                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1310                         runtime_osyield();
1311                         continue;
1312                 }
1313                 c = 0;
1314                 for(mc = mp; mc != nil; mc = mc->schedlink)
1315                         c++;
1316                 runtime_atomicstorep(&runtime_extram, mp);
1317                 return c;
1318         }
1319 }
1320
1321 // Create a new m.  It will start off with a call to fn, or else the scheduler.
1322 static void
1323 newm(void(*fn)(void), P *p)
1324 {
1325         M *mp;
1326
1327         mp = runtime_allocm(p, -1, nil, nil);
1328         mp->nextp = p;
1329         mp->mstartfn = fn;
1330
1331         runtime_newosproc(mp);
1332 }
1333
1334 // Stops execution of the current m until new work is available.
1335 // Returns with acquired P.
1336 static void
1337 stopm(void)
1338 {
1339         if(m->locks)
1340                 runtime_throw("stopm holding locks");
1341         if(m->p)
1342                 runtime_throw("stopm holding p");
1343         if(m->spinning) {
1344                 m->spinning = false;
1345                 runtime_xadd(&runtime_sched.nmspinning, -1);
1346         }
1347
1348 retry:
1349         runtime_lock(&runtime_sched);
1350         mput(m);
1351         runtime_unlock(&runtime_sched);
1352         runtime_notesleep(&m->park);
1353         runtime_noteclear(&m->park);
1354         if(m->helpgc) {
1355                 runtime_gchelper();
1356                 m->helpgc = 0;
1357                 m->mcache = nil;
1358                 goto retry;
1359         }
1360         acquirep(m->nextp);
1361         m->nextp = nil;
1362 }
1363
1364 static void
1365 mspinning(void)
1366 {
1367         m->spinning = true;
1368 }
1369
1370 // Schedules some M to run the p (creates an M if necessary).
1371 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1372 static void
1373 startm(P *p, bool spinning)
1374 {
1375         M *mp;
1376         void (*fn)(void);
1377
1378         runtime_lock(&runtime_sched);
1379         if(p == nil) {
1380                 p = pidleget();
1381                 if(p == nil) {
1382                         runtime_unlock(&runtime_sched);
1383                         if(spinning)
1384                                 runtime_xadd(&runtime_sched.nmspinning, -1);
1385                         return;
1386                 }
1387         }
1388         mp = mget();
1389         runtime_unlock(&runtime_sched);
1390         if(mp == nil) {
1391                 fn = nil;
1392                 if(spinning)
1393                         fn = mspinning;
1394                 newm(fn, p);
1395                 return;
1396         }
1397         if(mp->spinning)
1398                 runtime_throw("startm: m is spinning");
1399         if(mp->nextp)
1400                 runtime_throw("startm: m has p");
1401         mp->spinning = spinning;
1402         mp->nextp = p;
1403         runtime_notewakeup(&mp->park);
1404 }
1405
1406 // Hands off P from syscall or locked M.
1407 static void
1408 handoffp(P *p)
1409 {
1410         // if it has local work, start it straight away
1411         if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
1412                 startm(p, false);
1413                 return;
1414         }
1415         // no local work, check that there are no spinning/idle M's,
1416         // otherwise our help is not required
1417         if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 &&  // TODO: fast atomic
1418                 runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
1419                 startm(p, true);
1420                 return;
1421         }
1422         runtime_lock(&runtime_sched);
1423         if(runtime_sched.gcwaiting) {
1424                 p->status = Pgcstop;
1425                 if(--runtime_sched.stopwait == 0)
1426                         runtime_notewakeup(&runtime_sched.stopnote);
1427                 runtime_unlock(&runtime_sched);
1428                 return;
1429         }
1430         if(runtime_sched.runqsize) {
1431                 runtime_unlock(&runtime_sched);
1432                 startm(p, false);
1433                 return;
1434         }
1435         // If this is the last running P and nobody is polling network,
1436         // need to wakeup another M to poll network.
1437         if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
1438                 runtime_unlock(&runtime_sched);
1439                 startm(p, false);
1440                 return;
1441         }
1442         pidleput(p);
1443         runtime_unlock(&runtime_sched);
1444 }
1445
1446 // Tries to add one more P to execute G's.
1447 // Called when a G is made runnable (newproc, ready).
1448 static void
1449 wakep(void)
1450 {
1451         // be conservative about spinning threads
1452         if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
1453                 return;
1454         startm(nil, true);
1455 }
1456
1457 // Stops execution of the current m that is locked to a g until the g is runnable again.
1458 // Returns with acquired P.
1459 static void
1460 stoplockedm(void)
1461 {
1462         P *p;
1463
1464         if(m->lockedg == nil || m->lockedg->lockedm != m)
1465                 runtime_throw("stoplockedm: inconsistent locking");
1466         if(m->p) {
1467                 // Schedule another M to run this p.
1468                 p = releasep();
1469                 handoffp(p);
1470         }
1471         incidlelocked(1);
1472         // Wait until another thread schedules lockedg again.
1473         runtime_notesleep(&m->park);
1474         runtime_noteclear(&m->park);
1475         if(m->lockedg->status != Grunnable)
1476                 runtime_throw("stoplockedm: not runnable");
1477         acquirep(m->nextp);
1478         m->nextp = nil;
1479 }
1480
1481 // Schedules the locked m to run the locked gp.
1482 static void
1483 startlockedm(G *gp)
1484 {
1485         M *mp;
1486         P *p;
1487
1488         mp = gp->lockedm;
1489         if(mp == m)
1490                 runtime_throw("startlockedm: locked to me");
1491         if(mp->nextp)
1492                 runtime_throw("startlockedm: m has p");
1493         // directly handoff current P to the locked m
1494         incidlelocked(-1);
1495         p = releasep();
1496         mp->nextp = p;
1497         runtime_notewakeup(&mp->park);
1498         stopm();
1499 }
1500
1501 // Stops the current m for stoptheworld.
1502 // Returns when the world is restarted.
1503 static void
1504 gcstopm(void)
1505 {
1506         P *p;
1507
1508         if(!runtime_sched.gcwaiting)
1509                 runtime_throw("gcstopm: not waiting for gc");
1510         if(m->spinning) {
1511                 m->spinning = false;
1512                 runtime_xadd(&runtime_sched.nmspinning, -1);
1513         }
1514         p = releasep();
1515         runtime_lock(&runtime_sched);
1516         p->status = Pgcstop;
1517         if(--runtime_sched.stopwait == 0)
1518                 runtime_notewakeup(&runtime_sched.stopnote);
1519         runtime_unlock(&runtime_sched);
1520         stopm();
1521 }
1522
1523 // Schedules gp to run on the current M.
1524 // Never returns.
1525 static void
1526 execute(G *gp)
1527 {
1528         int32 hz;
1529
1530         if(gp->status != Grunnable) {
1531                 runtime_printf("execute: bad g status %d\n", gp->status);
1532                 runtime_throw("execute: bad g status");
1533         }
1534         gp->status = Grunning;
1535         gp->waitsince = 0;
1536         m->p->schedtick++;
1537         m->curg = gp;
1538         gp->m = m;
1539
1540         // Check whether the profiler needs to be turned on or off.
1541         hz = runtime_sched.profilehz;
1542         if(m->profilehz != hz)
1543                 runtime_resetcpuprofiler(hz);
1544
1545         runtime_gogo(gp);
1546 }
1547
1548 // Finds a runnable goroutine to execute.
1549 // Tries to steal from other P's, get g from global queue, poll network.
1550 static G*
1551 findrunnable(void)
1552 {
1553         G *gp;
1554         P *p;
1555         int32 i;
1556
1557 top:
1558         if(runtime_sched.gcwaiting) {
1559                 gcstopm();
1560                 goto top;
1561         }
1562         if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1563                 runtime_ready(gp);
1564         // local runq
1565         gp = runqget(m->p);
1566         if(gp)
1567                 return gp;
1568         // global runq
1569         if(runtime_sched.runqsize) {
1570                 runtime_lock(&runtime_sched);
1571                 gp = globrunqget(m->p, 0);
1572                 runtime_unlock(&runtime_sched);
1573                 if(gp)
1574                         return gp;
1575         }
1576         // poll network
1577         gp = runtime_netpoll(false);  // non-blocking
1578         if(gp) {
1579                 injectglist(gp->schedlink);
1580                 gp->status = Grunnable;
1581                 return gp;
1582         }
1583         // If number of spinning M's >= number of busy P's, block.
1584         // This is necessary to prevent excessive CPU consumption
1585         // when GOMAXPROCS>>1 but the program parallelism is low.
1586         if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle))  // TODO: fast atomic
1587                 goto stop;
1588         if(!m->spinning) {
1589                 m->spinning = true;
1590                 runtime_xadd(&runtime_sched.nmspinning, 1);
1591         }
1592         // random steal from other P's
1593         for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1594                 if(runtime_sched.gcwaiting)
1595                         goto top;
1596                 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1597                 if(p == m->p)
1598                         gp = runqget(p);
1599                 else
1600                         gp = runqsteal(m->p, p);
1601                 if(gp)
1602                         return gp;
1603         }
1604 stop:
1605         // return P and block
1606         runtime_lock(&runtime_sched);
1607         if(runtime_sched.gcwaiting) {
1608                 runtime_unlock(&runtime_sched);
1609                 goto top;
1610         }
1611         if(runtime_sched.runqsize) {
1612                 gp = globrunqget(m->p, 0);
1613                 runtime_unlock(&runtime_sched);
1614                 return gp;
1615         }
1616         p = releasep();
1617         pidleput(p);
1618         runtime_unlock(&runtime_sched);
1619         if(m->spinning) {
1620                 m->spinning = false;
1621                 runtime_xadd(&runtime_sched.nmspinning, -1);
1622         }
1623         // check all runqueues once again
1624         for(i = 0; i < runtime_gomaxprocs; i++) {
1625                 p = runtime_allp[i];
1626                 if(p && p->runqhead != p->runqtail) {
1627                         runtime_lock(&runtime_sched);
1628                         p = pidleget();
1629                         runtime_unlock(&runtime_sched);
1630                         if(p) {
1631                                 acquirep(p);
1632                                 goto top;
1633                         }
1634                         break;
1635                 }
1636         }
1637         // poll network
1638         if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
1639                 if(m->p)
1640                         runtime_throw("findrunnable: netpoll with p");
1641                 if(m->spinning)
1642                         runtime_throw("findrunnable: netpoll with spinning");
1643                 gp = runtime_netpoll(true);  // block until new work is available
1644                 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
1645                 if(gp) {
1646                         runtime_lock(&runtime_sched);
1647                         p = pidleget();
1648                         runtime_unlock(&runtime_sched);
1649                         if(p) {
1650                                 acquirep(p);
1651                                 injectglist(gp->schedlink);
1652                                 gp->status = Grunnable;
1653                                 return gp;
1654                         }
1655                         injectglist(gp);
1656                 }
1657         }
1658         stopm();
1659         goto top;
1660 }
1661
1662 static void
1663 resetspinning(void)
1664 {
1665         int32 nmspinning;
1666
1667         if(m->spinning) {
1668                 m->spinning = false;
1669                 nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
1670                 if(nmspinning < 0)
1671                         runtime_throw("findrunnable: negative nmspinning");
1672         } else
1673                 nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
1674
1675         // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1676         // so see if we need to wakeup another P here.
1677         if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
1678                 wakep();
1679 }
1680
1681 // Injects the list of runnable G's into the scheduler.
1682 // Can run concurrently with GC.
1683 static void
1684 injectglist(G *glist)
1685 {
1686         int32 n;
1687         G *gp;
1688
1689         if(glist == nil)
1690                 return;
1691         runtime_lock(&runtime_sched);
1692         for(n = 0; glist; n++) {
1693                 gp = glist;
1694                 glist = gp->schedlink;
1695                 gp->status = Grunnable;
1696                 globrunqput(gp);
1697         }
1698         runtime_unlock(&runtime_sched);
1699
1700         for(; n && runtime_sched.npidle; n--)
1701                 startm(nil, false);
1702 }
1703
1704 // One round of scheduler: find a runnable goroutine and execute it.
1705 // Never returns.
1706 static void
1707 schedule(void)
1708 {
1709         G *gp;
1710         uint32 tick;
1711
1712         if(m->locks)
1713                 runtime_throw("schedule: holding locks");
1714
1715 top:
1716         if(runtime_sched.gcwaiting) {
1717                 gcstopm();
1718                 goto top;
1719         }
1720
1721         gp = nil;
1722         // Check the global runnable queue once in a while to ensure fairness.
1723         // Otherwise two goroutines can completely occupy the local runqueue
1724         // by constantly respawning each other.
1725         tick = m->p->schedtick;
1726         // This is a fancy way to say tick%61==0,
1727         // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1728         if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
1729                 runtime_lock(&runtime_sched);
1730                 gp = globrunqget(m->p, 1);
1731                 runtime_unlock(&runtime_sched);
1732                 if(gp)
1733                         resetspinning();
1734         }
1735         if(gp == nil) {
1736                 gp = runqget(m->p);
1737                 if(gp && m->spinning)
1738                         runtime_throw("schedule: spinning with local work");
1739         }
1740         if(gp == nil) {
1741                 gp = findrunnable();  // blocks until work is available
1742                 resetspinning();
1743         }
1744
1745         if(gp->lockedm) {
1746                 // Hands off own p to the locked m,
1747                 // then blocks waiting for a new p.
1748                 startlockedm(gp);
1749                 goto top;
1750         }
1751
1752         execute(gp);
1753 }
1754
1755 // Puts the current goroutine into a waiting state and calls unlockf.
1756 // If unlockf returns false, the goroutine is resumed.
1757 void
1758 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1759 {
1760         if(g->status != Grunning)
1761                 runtime_throw("bad g status");
1762         m->waitlock = lock;
1763         m->waitunlockf = unlockf;
1764         g->waitreason = reason;
1765         runtime_mcall(park0);
1766 }
1767
1768 static bool
1769 parkunlock(G *gp, void *lock)
1770 {
1771         USED(gp);
1772         runtime_unlock(lock);
1773         return true;
1774 }
1775
1776 // Puts the current goroutine into a waiting state and unlocks the lock.
1777 // The goroutine can be made runnable again by calling runtime_ready(gp).
1778 void
1779 runtime_parkunlock(Lock *lock, const char *reason)
1780 {
1781         runtime_park(parkunlock, lock, reason);
1782 }
1783
1784 // runtime_park continuation on g0.
1785 static void
1786 park0(G *gp)
1787 {
1788         bool ok;
1789
1790         gp->status = Gwaiting;
1791         gp->m = nil;
1792         m->curg = nil;
1793         if(m->waitunlockf) {
1794                 ok = m->waitunlockf(gp, m->waitlock);
1795                 m->waitunlockf = nil;
1796                 m->waitlock = nil;
1797                 if(!ok) {
1798                         gp->status = Grunnable;
1799                         execute(gp);  // Schedule it back, never returns.
1800                 }
1801         }
1802         if(m->lockedg) {
1803                 stoplockedm();
1804                 execute(gp);  // Never returns.
1805         }
1806         schedule();
1807 }
1808
1809 // Scheduler yield.
1810 void
1811 runtime_gosched(void)
1812 {
1813         if(g->status != Grunning)
1814                 runtime_throw("bad g status");
1815         runtime_mcall(runtime_gosched0);
1816 }
1817
1818 // runtime_gosched continuation on g0.
1819 void
1820 runtime_gosched0(G *gp)
1821 {
1822         gp->status = Grunnable;
1823         gp->m = nil;
1824         m->curg = nil;
1825         runtime_lock(&runtime_sched);
1826         globrunqput(gp);
1827         runtime_unlock(&runtime_sched);
1828         if(m->lockedg) {
1829                 stoplockedm();
1830                 execute(gp);  // Never returns.
1831         }
1832         schedule();
1833 }
1834
1835 // Finishes execution of the current goroutine.
1836 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1837 // Since it does not return it does not matter.  But if it is preempted
1838 // at the split stack check, GC will complain about inconsistent sp.
1839 void runtime_goexit(void) __attribute__ ((noinline));
1840 void
1841 runtime_goexit(void)
1842 {
1843         if(g->status != Grunning)
1844                 runtime_throw("bad g status");
1845         runtime_mcall(goexit0);
1846 }
1847
1848 // runtime_goexit continuation on g0.
1849 static void
1850 goexit0(G *gp)
1851 {
1852         gp->status = Gdead;
1853         gp->entry = nil;
1854         gp->m = nil;
1855         gp->lockedm = nil;
1856         gp->paniconfault = 0;
1857         gp->defer = nil; // should be true already but just in case.
1858         gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
1859         gp->writenbuf = 0;
1860         gp->writebuf = nil;
1861         gp->waitreason = nil;
1862         gp->param = nil;
1863         m->curg = nil;
1864         m->lockedg = nil;
1865         if(m->locked & ~LockExternal) {
1866                 runtime_printf("invalid m->locked = %d\n", m->locked);
1867                 runtime_throw("internal lockOSThread error");
1868         }
1869         m->locked = 0;
1870         gfput(m->p, gp);
1871         schedule();
1872 }
1873
1874 // The goroutine g is about to enter a system call.
1875 // Record that it's not using the cpu anymore.
1876 // This is called only from the go syscall library and cgocall,
1877 // not from the low-level system calls used by the runtime.
1878 //
1879 // Entersyscall cannot split the stack: the runtime_gosave must
1880 // make g->sched refer to the caller's stack segment, because
1881 // entersyscall is going to return immediately after.
1882
1883 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1884 static void doentersyscall(void) __attribute__ ((no_split_stack, noinline));
1885
1886 void
1887 runtime_entersyscall()
1888 {
1889         // Save the registers in the g structure so that any pointers
1890         // held in registers will be seen by the garbage collector.
1891         getcontext(&g->gcregs);
1892
1893         // Do the work in a separate function, so that this function
1894         // doesn't save any registers on its own stack.  If this
1895         // function does save any registers, we might store the wrong
1896         // value in the call to getcontext.
1897         //
1898         // FIXME: This assumes that we do not need to save any
1899         // callee-saved registers to access the TLS variable g.  We
1900         // don't want to put the ucontext_t on the stack because it is
1901         // large and we can not split the stack here.
1902         doentersyscall();
1903 }
1904
1905 static void
1906 doentersyscall()
1907 {
1908         // Disable preemption because during this function g is in Gsyscall status,
1909         // but can have inconsistent g->sched, do not let GC observe it.
1910         m->locks++;
1911
1912         // Leave SP around for GC and traceback.
1913 #ifdef USING_SPLIT_STACK
1914         g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1915                                        &g->gcnext_segment, &g->gcnext_sp,
1916                                        &g->gcinitial_sp);
1917 #else
1918         {
1919                 void *v;
1920
1921                 g->gcnext_sp = (byte *) &v;
1922         }
1923 #endif
1924
1925         g->status = Gsyscall;
1926
1927         if(runtime_atomicload(&runtime_sched.sysmonwait)) {  // TODO: fast atomic
1928                 runtime_lock(&runtime_sched);
1929                 if(runtime_atomicload(&runtime_sched.sysmonwait)) {
1930                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
1931                         runtime_notewakeup(&runtime_sched.sysmonnote);
1932                 }
1933                 runtime_unlock(&runtime_sched);
1934         }
1935
1936         m->mcache = nil;
1937         m->p->m = nil;
1938         runtime_atomicstore(&m->p->status, Psyscall);
1939         if(runtime_sched.gcwaiting) {
1940                 runtime_lock(&runtime_sched);
1941                 if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) {
1942                         if(--runtime_sched.stopwait == 0)
1943                                 runtime_notewakeup(&runtime_sched.stopnote);
1944                 }
1945                 runtime_unlock(&runtime_sched);
1946         }
1947
1948         m->locks--;
1949 }
1950
1951 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
1952 void
1953 runtime_entersyscallblock(void)
1954 {
1955         P *p;
1956
1957         m->locks++;  // see comment in entersyscall
1958
1959         // Leave SP around for GC and traceback.
1960 #ifdef USING_SPLIT_STACK
1961         g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1962                                        &g->gcnext_segment, &g->gcnext_sp,
1963                                        &g->gcinitial_sp);
1964 #else
1965         g->gcnext_sp = (byte *) &p;
1966 #endif
1967
1968         // Save the registers in the g structure so that any pointers
1969         // held in registers will be seen by the garbage collector.
1970         getcontext(&g->gcregs);
1971
1972         g->status = Gsyscall;
1973
1974         p = releasep();
1975         handoffp(p);
1976         if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
1977                 incidlelocked(1);
1978
1979         m->locks--;
1980 }
1981
1982 // The goroutine g exited its system call.
1983 // Arrange for it to run on a cpu again.
1984 // This is called only from the go syscall library, not
1985 // from the low-level system calls used by the runtime.
1986 void
1987 runtime_exitsyscall(void)
1988 {
1989         G *gp;
1990
1991         m->locks++;  // see comment in entersyscall
1992
1993         gp = g;
1994         if(gp->isbackground)  // do not consider blocked scavenger for deadlock detection
1995                 incidlelocked(-1);
1996
1997         g->waitsince = 0;
1998         if(exitsyscallfast()) {
1999                 // There's a cpu for us, so we can run.
2000                 m->p->syscalltick++;
2001                 gp->status = Grunning;
2002                 // Garbage collector isn't running (since we are),
2003                 // so okay to clear gcstack and gcsp.
2004 #ifdef USING_SPLIT_STACK
2005                 gp->gcstack = nil;
2006 #endif
2007                 gp->gcnext_sp = nil;
2008                 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
2009                 m->locks--;
2010                 return;
2011         }
2012
2013         m->locks--;
2014
2015         // Call the scheduler.
2016         runtime_mcall(exitsyscall0);
2017
2018         // Scheduler returned, so we're allowed to run now.
2019         // Delete the gcstack information that we left for
2020         // the garbage collector during the system call.
2021         // Must wait until now because until gosched returns
2022         // we don't know for sure that the garbage collector
2023         // is not running.
2024 #ifdef USING_SPLIT_STACK
2025         gp->gcstack = nil;
2026 #endif
2027         gp->gcnext_sp = nil;
2028         runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
2029
2030         // Don't refer to m again, we might be running on a different
2031         // thread after returning from runtime_mcall.
2032         runtime_m()->p->syscalltick++;
2033 }
2034
2035 static bool
2036 exitsyscallfast(void)
2037 {
2038         P *p;
2039
2040         // Freezetheworld sets stopwait but does not retake P's.
2041         if(runtime_sched.stopwait) {
2042                 m->p = nil;
2043                 return false;
2044         }
2045
2046         // Try to re-acquire the last P.
2047         if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) {
2048                 // There's a cpu for us, so we can run.
2049                 m->mcache = m->p->mcache;
2050                 m->p->m = m;
2051                 return true;
2052         }
2053         // Try to get any other idle P.
2054         m->p = nil;
2055         if(runtime_sched.pidle) {
2056                 runtime_lock(&runtime_sched);
2057                 p = pidleget();
2058                 if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
2059                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2060                         runtime_notewakeup(&runtime_sched.sysmonnote);
2061                 }
2062                 runtime_unlock(&runtime_sched);
2063                 if(p) {
2064                         acquirep(p);
2065                         return true;
2066                 }
2067         }
2068         return false;
2069 }
2070
2071 // runtime_exitsyscall slow path on g0.
2072 // Failed to acquire P, enqueue gp as runnable.
2073 static void
2074 exitsyscall0(G *gp)
2075 {
2076         P *p;
2077
2078         gp->status = Grunnable;
2079         gp->m = nil;
2080         m->curg = nil;
2081         runtime_lock(&runtime_sched);
2082         p = pidleget();
2083         if(p == nil)
2084                 globrunqput(gp);
2085         else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2086                 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2087                 runtime_notewakeup(&runtime_sched.sysmonnote);
2088         }
2089         runtime_unlock(&runtime_sched);
2090         if(p) {
2091                 acquirep(p);
2092                 execute(gp);  // Never returns.
2093         }
2094         if(m->lockedg) {
2095                 // Wait until another thread schedules gp and so m again.
2096                 stoplockedm();
2097                 execute(gp);  // Never returns.
2098         }
2099         stopm();
2100         schedule();  // Never returns.
2101 }
2102
2103 // Called from syscall package before fork.
2104 void syscall_runtime_BeforeFork(void)
2105   __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2106 void
2107 syscall_runtime_BeforeFork(void)
2108 {
2109         // Fork can hang if preempted with signals frequently enough (see issue 5517).
2110         // Ensure that we stay on the same M where we disable profiling.
2111         runtime_m()->locks++;
2112         if(runtime_m()->profilehz != 0)
2113                 runtime_resetcpuprofiler(0);
2114 }
2115
2116 // Called from syscall package after fork in parent.
2117 void syscall_runtime_AfterFork(void)
2118   __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2119 void
2120 syscall_runtime_AfterFork(void)
2121 {
2122         int32 hz;
2123
2124         hz = runtime_sched.profilehz;
2125         if(hz != 0)
2126                 runtime_resetcpuprofiler(hz);
2127         runtime_m()->locks--;
2128 }
2129
2130 // Allocate a new g, with a stack big enough for stacksize bytes.
2131 G*
2132 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
2133 {
2134         G *newg;
2135
2136         newg = allocg();
2137         if(stacksize >= 0) {
2138 #if USING_SPLIT_STACK
2139                 int dont_block_signals = 0;
2140
2141                 *ret_stack = __splitstack_makecontext(stacksize,
2142                                                       &newg->stack_context[0],
2143                                                       ret_stacksize);
2144                 __splitstack_block_signals_context(&newg->stack_context[0],
2145                                                    &dont_block_signals, nil);
2146 #else
2147                 *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2148                 *ret_stacksize = stacksize;
2149                 newg->gcinitial_sp = *ret_stack;
2150                 newg->gcstack_size = stacksize;
2151                 runtime_xadd(&runtime_stacks_sys, stacksize);
2152 #endif
2153         }
2154         return newg;
2155 }
2156
2157 /* For runtime package testing.  */
2158
2159
2160 // Create a new g running fn with siz bytes of arguments.
2161 // Put it on the queue of g's waiting to run.
2162 // The compiler turns a go statement into a call to this.
2163 // Cannot split the stack because it assumes that the arguments
2164 // are available sequentially after &fn; they would not be
2165 // copied if a stack split occurred.  It's OK for this to call
2166 // functions that split the stack.
2167 void runtime_testing_entersyscall(void)
2168   __asm__ (GOSYM_PREFIX "runtime.entersyscall");
2169 void
2170 runtime_testing_entersyscall()
2171 {
2172         runtime_entersyscall();
2173 }
2174
2175 void runtime_testing_exitsyscall(void)
2176   __asm__ (GOSYM_PREFIX "runtime.exitsyscall");
2177
2178 void
2179 runtime_testing_exitsyscall()
2180 {
2181         runtime_exitsyscall();
2182 }
2183
2184 G*
2185 __go_go(void (*fn)(void*), void* arg)
2186 {
2187         byte *sp;
2188         size_t spsize;
2189         G *newg;
2190         P *p;
2191
2192 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2193         if(fn == nil) {
2194                 m->throwing = -1;  // do not dump full stacks
2195                 runtime_throw("go of nil func value");
2196         }
2197         m->locks++;  // disable preemption because it can be holding p in a local var
2198
2199         p = m->p;
2200         if((newg = gfget(p)) != nil) {
2201 #ifdef USING_SPLIT_STACK
2202                 int dont_block_signals = 0;
2203
2204                 sp = __splitstack_resetcontext(&newg->stack_context[0],
2205                                                &spsize);
2206                 __splitstack_block_signals_context(&newg->stack_context[0],
2207                                                    &dont_block_signals, nil);
2208 #else
2209                 sp = newg->gcinitial_sp;
2210                 spsize = newg->gcstack_size;
2211                 if(spsize == 0)
2212                         runtime_throw("bad spsize in __go_go");
2213                 newg->gcnext_sp = sp;
2214 #endif
2215         } else {
2216                 newg = runtime_malg(StackMin, &sp, &spsize);
2217                 allgadd(newg);
2218         }
2219
2220         newg->entry = (byte*)fn;
2221         newg->param = arg;
2222         newg->gopc = (uintptr)__builtin_return_address(0);
2223         newg->status = Grunnable;
2224         if(p->goidcache == p->goidcacheend) {
2225                 p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch);
2226                 p->goidcacheend = p->goidcache + GoidCacheBatch;
2227         }
2228         newg->goid = p->goidcache++;
2229
2230         {
2231                 // Avoid warnings about variables clobbered by
2232                 // longjmp.
2233                 byte * volatile vsp = sp;
2234                 size_t volatile vspsize = spsize;
2235                 G * volatile vnewg = newg;
2236
2237                 getcontext(&vnewg->context);
2238                 vnewg->context.uc_stack.ss_sp = vsp;
2239 #ifdef MAKECONTEXT_STACK_TOP
2240                 vnewg->context.uc_stack.ss_sp += vspsize;
2241 #endif
2242                 vnewg->context.uc_stack.ss_size = vspsize;
2243                 makecontext(&vnewg->context, kickoff, 0);
2244
2245                 runqput(p, vnewg);
2246
2247                 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
2248                         wakep();
2249                 m->locks--;
2250                 return vnewg;
2251         }
2252 }
2253
2254 static void
2255 allgadd(G *gp)
2256 {
2257         G **new;
2258         uintptr cap;
2259
2260         runtime_lock(&allglock);
2261         if(runtime_allglen >= allgcap) {
2262                 cap = 4096/sizeof(new[0]);
2263                 if(cap < 2*allgcap)
2264                         cap = 2*allgcap;
2265                 new = runtime_malloc(cap*sizeof(new[0]));
2266                 if(new == nil)
2267                         runtime_throw("runtime: cannot allocate memory");
2268                 if(runtime_allg != nil) {
2269                         runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
2270                         runtime_free(runtime_allg);
2271                 }
2272                 runtime_allg = new;
2273                 allgcap = cap;
2274         }
2275         runtime_allg[runtime_allglen++] = gp;
2276         runtime_unlock(&allglock);
2277 }
2278
2279 // Put on gfree list.
2280 // If local list is too long, transfer a batch to the global list.
2281 static void
2282 gfput(P *p, G *gp)
2283 {
2284         gp->schedlink = p->gfree;
2285         p->gfree = gp;
2286         p->gfreecnt++;
2287         if(p->gfreecnt >= 64) {
2288                 runtime_lock(&runtime_sched.gflock);
2289                 while(p->gfreecnt >= 32) {
2290                         p->gfreecnt--;
2291                         gp = p->gfree;
2292                         p->gfree = gp->schedlink;
2293                         gp->schedlink = runtime_sched.gfree;
2294                         runtime_sched.gfree = gp;
2295                 }
2296                 runtime_unlock(&runtime_sched.gflock);
2297         }
2298 }
2299
2300 // Get from gfree list.
2301 // If local list is empty, grab a batch from global list.
2302 static G*
2303 gfget(P *p)
2304 {
2305         G *gp;
2306
2307 retry:
2308         gp = p->gfree;
2309         if(gp == nil && runtime_sched.gfree) {
2310                 runtime_lock(&runtime_sched.gflock);
2311                 while(p->gfreecnt < 32 && runtime_sched.gfree) {
2312                         p->gfreecnt++;
2313                         gp = runtime_sched.gfree;
2314                         runtime_sched.gfree = gp->schedlink;
2315                         gp->schedlink = p->gfree;
2316                         p->gfree = gp;
2317                 }
2318                 runtime_unlock(&runtime_sched.gflock);
2319                 goto retry;
2320         }
2321         if(gp) {
2322                 p->gfree = gp->schedlink;
2323                 p->gfreecnt--;
2324         }
2325         return gp;
2326 }
2327
2328 // Purge all cached G's from gfree list to the global list.
2329 static void
2330 gfpurge(P *p)
2331 {
2332         G *gp;
2333
2334         runtime_lock(&runtime_sched.gflock);
2335         while(p->gfreecnt) {
2336                 p->gfreecnt--;
2337                 gp = p->gfree;
2338                 p->gfree = gp->schedlink;
2339                 gp->schedlink = runtime_sched.gfree;
2340                 runtime_sched.gfree = gp;
2341         }
2342         runtime_unlock(&runtime_sched.gflock);
2343 }
2344
2345 void
2346 runtime_Breakpoint(void)
2347 {
2348         runtime_breakpoint();
2349 }
2350
2351 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2352
2353 void
2354 runtime_Gosched(void)
2355 {
2356         runtime_gosched();
2357 }
2358
2359 // Implementation of runtime.GOMAXPROCS.
2360 // delete when scheduler is even stronger
2361 int32
2362 runtime_gomaxprocsfunc(int32 n)
2363 {
2364         int32 ret;
2365
2366         if(n > MaxGomaxprocs)
2367                 n = MaxGomaxprocs;
2368         runtime_lock(&runtime_sched);
2369         ret = runtime_gomaxprocs;
2370         if(n <= 0 || n == ret) {
2371                 runtime_unlock(&runtime_sched);
2372                 return ret;
2373         }
2374         runtime_unlock(&runtime_sched);
2375
2376         runtime_semacquire(&runtime_worldsema, false);
2377         m->gcing = 1;
2378         runtime_stoptheworld();
2379         newprocs = n;
2380         m->gcing = 0;
2381         runtime_semrelease(&runtime_worldsema);
2382         runtime_starttheworld();
2383
2384         return ret;
2385 }
2386
2387 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2388 // after they modify m->locked. Do not allow preemption during this call,
2389 // or else the m might be different in this function than in the caller.
2390 static void
2391 lockOSThread(void)
2392 {
2393         m->lockedg = g;
2394         g->lockedm = m;
2395 }
2396
2397 void    runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2398 void
2399 runtime_LockOSThread(void)
2400 {
2401         m->locked |= LockExternal;
2402         lockOSThread();
2403 }
2404
2405 void
2406 runtime_lockOSThread(void)
2407 {
2408         m->locked += LockInternal;
2409         lockOSThread();
2410 }
2411
2412
2413 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2414 // after they update m->locked. Do not allow preemption during this call,
2415 // or else the m might be in different in this function than in the caller.
2416 static void
2417 unlockOSThread(void)
2418 {
2419         if(m->locked != 0)
2420                 return;
2421         m->lockedg = nil;
2422         g->lockedm = nil;
2423 }
2424
2425 void    runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2426
2427 void
2428 runtime_UnlockOSThread(void)
2429 {
2430         m->locked &= ~LockExternal;
2431         unlockOSThread();
2432 }
2433
2434 void
2435 runtime_unlockOSThread(void)
2436 {
2437         if(m->locked < LockInternal)
2438                 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2439         m->locked -= LockInternal;
2440         unlockOSThread();
2441 }
2442
2443 bool
2444 runtime_lockedOSThread(void)
2445 {
2446         return g->lockedm != nil && m->lockedg != nil;
2447 }
2448
2449 int32
2450 runtime_gcount(void)
2451 {
2452         G *gp;
2453         int32 n, s;
2454         uintptr i;
2455
2456         n = 0;
2457         runtime_lock(&allglock);
2458         // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2459         // We do not want to increment/decrement centralized counter in newproc/goexit,
2460         // just to make runtime.NumGoroutine() faster.
2461         // Compromise solution is to introduce per-P counters of active goroutines.
2462         for(i = 0; i < runtime_allglen; i++) {
2463                 gp = runtime_allg[i];
2464                 s = gp->status;
2465                 if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
2466                         n++;
2467         }
2468         runtime_unlock(&allglock);
2469         return n;
2470 }
2471
2472 int32
2473 runtime_mcount(void)
2474 {
2475         return runtime_sched.mcount;
2476 }
2477
2478 static struct {
2479         Lock;
2480         void (*fn)(uintptr*, int32);
2481         int32 hz;
2482         uintptr pcbuf[TracebackMaxFrames];
2483         Location locbuf[TracebackMaxFrames];
2484 } prof;
2485
2486 static void System(void) {}
2487 static void GC(void) {}
2488
2489 // Called if we receive a SIGPROF signal.
2490 void
2491 runtime_sigprof()
2492 {
2493         M *mp = m;
2494         int32 n, i;
2495         bool traceback;
2496
2497         if(prof.fn == nil || prof.hz == 0)
2498                 return;
2499
2500         if(mp == nil)
2501                 return;
2502
2503         // Profiling runs concurrently with GC, so it must not allocate.
2504         mp->mallocing++;
2505
2506         traceback = true;
2507
2508         if(mp->mcache == nil)
2509                 traceback = false;
2510
2511         runtime_lock(&prof);
2512         if(prof.fn == nil) {
2513                 runtime_unlock(&prof);
2514                 mp->mallocing--;
2515                 return;
2516         }
2517         n = 0;
2518
2519         if(runtime_atomicload(&runtime_in_callers) > 0) {
2520                 // If SIGPROF arrived while already fetching runtime
2521                 // callers we can have trouble on older systems
2522                 // because the unwind library calls dl_iterate_phdr
2523                 // which was not recursive in the past.
2524                 traceback = false;
2525         }
2526
2527         if(traceback) {
2528                 n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf), false);
2529                 for(i = 0; i < n; i++)
2530                         prof.pcbuf[i] = prof.locbuf[i].pc;
2531         }
2532         if(!traceback || n <= 0) {
2533                 n = 2;
2534                 prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2535                 if(mp->gcing || mp->helpgc)
2536                         prof.pcbuf[1] = (uintptr)GC;
2537                 else
2538                         prof.pcbuf[1] = (uintptr)System;
2539         }
2540         prof.fn(prof.pcbuf, n);
2541         runtime_unlock(&prof);
2542         mp->mallocing--;
2543 }
2544
2545 // Arrange to call fn with a traceback hz times a second.
2546 void
2547 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
2548 {
2549         // Force sane arguments.
2550         if(hz < 0)
2551                 hz = 0;
2552         if(hz == 0)
2553                 fn = nil;
2554         if(fn == nil)
2555                 hz = 0;
2556
2557         // Disable preemption, otherwise we can be rescheduled to another thread
2558         // that has profiling enabled.
2559         m->locks++;
2560
2561         // Stop profiler on this thread so that it is safe to lock prof.
2562         // if a profiling signal came in while we had prof locked,
2563         // it would deadlock.
2564         runtime_resetcpuprofiler(0);
2565
2566         runtime_lock(&prof);
2567         prof.fn = fn;
2568         prof.hz = hz;
2569         runtime_unlock(&prof);
2570         runtime_lock(&runtime_sched);
2571         runtime_sched.profilehz = hz;
2572         runtime_unlock(&runtime_sched);
2573
2574         if(hz != 0)
2575                 runtime_resetcpuprofiler(hz);
2576
2577         m->locks--;
2578 }
2579
2580 // Change number of processors.  The world is stopped, sched is locked.
2581 static void
2582 procresize(int32 new)
2583 {
2584         int32 i, old;
2585         bool empty;
2586         G *gp;
2587         P *p;
2588
2589         old = runtime_gomaxprocs;
2590         if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
2591                 runtime_throw("procresize: invalid arg");
2592         // initialize new P's
2593         for(i = 0; i < new; i++) {
2594                 p = runtime_allp[i];
2595                 if(p == nil) {
2596                         p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2597                         p->id = i;
2598                         p->status = Pgcstop;
2599                         runtime_atomicstorep(&runtime_allp[i], p);
2600                 }
2601                 if(p->mcache == nil) {
2602                         if(old==0 && i==0)
2603                                 p->mcache = m->mcache;  // bootstrap
2604                         else
2605                                 p->mcache = runtime_allocmcache();
2606                 }
2607         }
2608
2609         // redistribute runnable G's evenly
2610         // collect all runnable goroutines in global queue preserving FIFO order
2611         // FIFO order is required to ensure fairness even during frequent GCs
2612         // see http://golang.org/issue/7126
2613         empty = false;
2614         while(!empty) {
2615                 empty = true;
2616                 for(i = 0; i < old; i++) {
2617                         p = runtime_allp[i];
2618                         if(p->runqhead == p->runqtail)
2619                                 continue;
2620                         empty = false;
2621                         // pop from tail of local queue
2622                         p->runqtail--;
2623                         gp = p->runq[p->runqtail%nelem(p->runq)];
2624                         // push onto head of global queue
2625                         gp->schedlink = runtime_sched.runqhead;
2626                         runtime_sched.runqhead = gp;
2627                         if(runtime_sched.runqtail == nil)
2628                                 runtime_sched.runqtail = gp;
2629                         runtime_sched.runqsize++;
2630                 }
2631         }
2632         // fill local queues with at most nelem(p->runq)/2 goroutines
2633         // start at 1 because current M already executes some G and will acquire allp[0] below,
2634         // so if we have a spare G we want to put it into allp[1].
2635         for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) {
2636                 gp = runtime_sched.runqhead;
2637                 runtime_sched.runqhead = gp->schedlink;
2638                 if(runtime_sched.runqhead == nil)
2639                         runtime_sched.runqtail = nil;
2640                 runtime_sched.runqsize--;
2641                 runqput(runtime_allp[i%new], gp);
2642         }
2643
2644         // free unused P's
2645         for(i = new; i < old; i++) {
2646                 p = runtime_allp[i];
2647                 runtime_freemcache(p->mcache);
2648                 p->mcache = nil;
2649                 gfpurge(p);
2650                 p->status = Pdead;
2651                 // can't free P itself because it can be referenced by an M in syscall
2652         }
2653
2654         if(m->p)
2655                 m->p->m = nil;
2656         m->p = nil;
2657         m->mcache = nil;
2658         p = runtime_allp[0];
2659         p->m = nil;
2660         p->status = Pidle;
2661         acquirep(p);
2662         for(i = new-1; i > 0; i--) {
2663                 p = runtime_allp[i];
2664                 p->status = Pidle;
2665                 pidleput(p);
2666         }
2667         runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2668 }
2669
2670 // Associate p and the current m.
2671 static void
2672 acquirep(P *p)
2673 {
2674         if(m->p || m->mcache)
2675                 runtime_throw("acquirep: already in go");
2676         if(p->m || p->status != Pidle) {
2677                 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
2678                 runtime_throw("acquirep: invalid p state");
2679         }
2680         m->mcache = p->mcache;
2681         m->p = p;
2682         p->m = m;
2683         p->status = Prunning;
2684 }
2685
2686 // Disassociate p and the current m.
2687 static P*
2688 releasep(void)
2689 {
2690         P *p;
2691
2692         if(m->p == nil || m->mcache == nil)
2693                 runtime_throw("releasep: invalid arg");
2694         p = m->p;
2695         if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
2696                 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2697                         m, m->p, p->m, m->mcache, p->mcache, p->status);
2698                 runtime_throw("releasep: invalid p state");
2699         }
2700         m->p = nil;
2701         m->mcache = nil;
2702         p->m = nil;
2703         p->status = Pidle;
2704         return p;
2705 }
2706
2707 static void
2708 incidlelocked(int32 v)
2709 {
2710         runtime_lock(&runtime_sched);
2711         runtime_sched.nmidlelocked += v;
2712         if(v > 0)
2713                 checkdead();
2714         runtime_unlock(&runtime_sched);
2715 }
2716
2717 // Check for deadlock situation.
2718 // The check is based on number of running M's, if 0 -> deadlock.
2719 static void
2720 checkdead(void)
2721 {
2722         G *gp;
2723         int32 run, grunning, s;
2724         uintptr i;
2725
2726         // -1 for sysmon
2727         run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
2728         if(run > 0)
2729                 return;
2730         // If we are dying because of a signal caught on an already idle thread,
2731         // freezetheworld will cause all running threads to block.
2732         // And runtime will essentially enter into deadlock state,
2733         // except that there is a thread that will call runtime_exit soon.
2734         if(runtime_panicking > 0)
2735                 return;
2736         if(run < 0) {
2737                 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2738                         runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
2739                 runtime_throw("checkdead: inconsistent counts");
2740         }
2741         grunning = 0;
2742         runtime_lock(&allglock);
2743         for(i = 0; i < runtime_allglen; i++) {
2744                 gp = runtime_allg[i];
2745                 if(gp->isbackground)
2746                         continue;
2747                 s = gp->status;
2748                 if(s == Gwaiting)
2749                         grunning++;
2750                 else if(s == Grunnable || s == Grunning || s == Gsyscall) {
2751                         runtime_unlock(&allglock);
2752                         runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
2753                         runtime_throw("checkdead: runnable g");
2754                 }
2755         }
2756         runtime_unlock(&allglock);
2757         if(grunning == 0)  // possible if main goroutine calls runtime_Goexit()
2758                 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
2759         m->throwing = -1;  // do not dump full stacks
2760         runtime_throw("all goroutines are asleep - deadlock!");
2761 }
2762
2763 static void
2764 sysmon(void)
2765 {
2766         uint32 idle, delay;
2767         int64 now, lastpoll, lasttrace;
2768         G *gp;
2769
2770         lasttrace = 0;
2771         idle = 0;  // how many cycles in succession we had not wokeup somebody
2772         delay = 0;
2773         for(;;) {
2774                 if(idle == 0)  // start with 20us sleep...
2775                         delay = 20;
2776                 else if(idle > 50)  // start doubling the sleep after 1ms...
2777                         delay *= 2;
2778                 if(delay > 10*1000)  // up to 10ms
2779                         delay = 10*1000;
2780                 runtime_usleep(delay);
2781                 if(runtime_debug.schedtrace <= 0 &&
2782                         (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) {  // TODO: fast atomic
2783                         runtime_lock(&runtime_sched);
2784                         if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
2785                                 runtime_atomicstore(&runtime_sched.sysmonwait, 1);
2786                                 runtime_unlock(&runtime_sched);
2787                                 runtime_notesleep(&runtime_sched.sysmonnote);
2788                                 runtime_noteclear(&runtime_sched.sysmonnote);
2789                                 idle = 0;
2790                                 delay = 20;
2791                         } else
2792                                 runtime_unlock(&runtime_sched);
2793                 }
2794                 // poll network if not polled for more than 10ms
2795                 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
2796                 now = runtime_nanotime();
2797                 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2798                         runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
2799                         gp = runtime_netpoll(false);  // non-blocking
2800                         if(gp) {
2801                                 // Need to decrement number of idle locked M's
2802                                 // (pretending that one more is running) before injectglist.
2803                                 // Otherwise it can lead to the following situation:
2804                                 // injectglist grabs all P's but before it starts M's to run the P's,
2805                                 // another M returns from syscall, finishes running its G,
2806                                 // observes that there is no work to do and no other running M's
2807                                 // and reports deadlock.
2808                                 incidlelocked(-1);
2809                                 injectglist(gp);
2810                                 incidlelocked(1);
2811                         }
2812                 }
2813                 // retake P's blocked in syscalls
2814                 // and preempt long running G's
2815                 if(retake(now))
2816                         idle = 0;
2817                 else
2818                         idle++;
2819
2820                 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
2821                         lasttrace = now;
2822                         runtime_schedtrace(runtime_debug.scheddetail);
2823                 }
2824         }
2825 }
2826
2827 typedef struct Pdesc Pdesc;
2828 struct Pdesc
2829 {
2830         uint32  schedtick;
2831         int64   schedwhen;
2832         uint32  syscalltick;
2833         int64   syscallwhen;
2834 };
2835 static Pdesc pdesc[MaxGomaxprocs];
2836
2837 static uint32
2838 retake(int64 now)
2839 {
2840         uint32 i, s, n;
2841         int64 t;
2842         P *p;
2843         Pdesc *pd;
2844
2845         n = 0;
2846         for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
2847                 p = runtime_allp[i];
2848                 if(p==nil)
2849                         continue;
2850                 pd = &pdesc[i];
2851                 s = p->status;
2852                 if(s == Psyscall) {
2853                         // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
2854                         t = p->syscalltick;
2855                         if(pd->syscalltick != t) {
2856                                 pd->syscalltick = t;
2857                                 pd->syscallwhen = now;
2858                                 continue;
2859                         }
2860                         // On the one hand we don't want to retake Ps if there is no other work to do,
2861                         // but on the other hand we want to retake them eventually
2862                         // because they can prevent the sysmon thread from deep sleep.
2863                         if(p->runqhead == p->runqtail &&
2864                                 runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 &&
2865                                 pd->syscallwhen + 10*1000*1000 > now)
2866                                 continue;
2867                         // Need to decrement number of idle locked M's
2868                         // (pretending that one more is running) before the CAS.
2869                         // Otherwise the M from which we retake can exit the syscall,
2870                         // increment nmidle and report deadlock.
2871                         incidlelocked(-1);
2872                         if(runtime_cas(&p->status, s, Pidle)) {
2873                                 n++;
2874                                 handoffp(p);
2875                         }
2876                         incidlelocked(1);
2877                 } else if(s == Prunning) {
2878                         // Preempt G if it's running for more than 10ms.
2879                         t = p->schedtick;
2880                         if(pd->schedtick != t) {
2881                                 pd->schedtick = t;
2882                                 pd->schedwhen = now;
2883                                 continue;
2884                         }
2885                         if(pd->schedwhen + 10*1000*1000 > now)
2886                                 continue;
2887                         // preemptone(p);
2888                 }
2889         }
2890         return n;
2891 }
2892
2893 // Tell all goroutines that they have been preempted and they should stop.
2894 // This function is purely best-effort.  It can fail to inform a goroutine if a
2895 // processor just started running it.
2896 // No locks need to be held.
2897 // Returns true if preemption request was issued to at least one goroutine.
2898 static bool
2899 preemptall(void)
2900 {
2901         return false;
2902 }
2903
2904 void
2905 runtime_schedtrace(bool detailed)
2906 {
2907         static int64 starttime;
2908         int64 now;
2909         int64 id1, id2, id3;
2910         int32 i, t, h;
2911         uintptr gi;
2912         const char *fmt;
2913         M *mp, *lockedm;
2914         G *gp, *lockedg;
2915         P *p;
2916
2917         now = runtime_nanotime();
2918         if(starttime == 0)
2919                 starttime = now;
2920
2921         runtime_lock(&runtime_sched);
2922         runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
2923                 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
2924                 runtime_sched.nmidle, runtime_sched.runqsize);
2925         if(detailed) {
2926                 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
2927                         runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
2928                         runtime_sched.stopwait, runtime_sched.sysmonwait);
2929         }
2930         // We must be careful while reading data from P's, M's and G's.
2931         // Even if we hold schedlock, most data can be changed concurrently.
2932         // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
2933         for(i = 0; i < runtime_gomaxprocs; i++) {
2934                 p = runtime_allp[i];
2935                 if(p == nil)
2936                         continue;
2937                 mp = p->m;
2938                 h = runtime_atomicload(&p->runqhead);
2939                 t = runtime_atomicload(&p->runqtail);
2940                 if(detailed)
2941                         runtime_printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
2942                                 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
2943                 else {
2944                         // In non-detailed mode format lengths of per-P run queues as:
2945                         // [len1 len2 len3 len4]
2946                         fmt = " %d";
2947                         if(runtime_gomaxprocs == 1)
2948                                 fmt = " [%d]\n";
2949                         else if(i == 0)
2950                                 fmt = " [%d";
2951                         else if(i == runtime_gomaxprocs-1)
2952                                 fmt = " %d]\n";
2953                         runtime_printf(fmt, t-h);
2954                 }
2955         }
2956         if(!detailed) {
2957                 runtime_unlock(&runtime_sched);
2958                 return;
2959         }
2960         for(mp = runtime_allm; mp; mp = mp->alllink) {
2961                 p = mp->p;
2962                 gp = mp->curg;
2963                 lockedg = mp->lockedg;
2964                 id1 = -1;
2965                 if(p)
2966                         id1 = p->id;
2967                 id2 = -1;
2968                 if(gp)
2969                         id2 = gp->goid;
2970                 id3 = -1;
2971                 if(lockedg)
2972                         id3 = lockedg->goid;
2973                 runtime_printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
2974                         " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
2975                         mp->id, id1, id2,
2976                         mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
2977                         mp->spinning, m->blocked, id3);
2978         }
2979         runtime_lock(&allglock);
2980         for(gi = 0; gi < runtime_allglen; gi++) {
2981                 gp = runtime_allg[gi];
2982                 mp = gp->m;
2983                 lockedm = gp->lockedm;
2984                 runtime_printf("  G%D: status=%d(%s) m=%d lockedm=%d\n",
2985                         gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
2986                         lockedm ? lockedm->id : -1);
2987         }
2988         runtime_unlock(&allglock);
2989         runtime_unlock(&runtime_sched);
2990 }
2991
2992 // Put mp on midle list.
2993 // Sched must be locked.
2994 static void
2995 mput(M *mp)
2996 {
2997         mp->schedlink = runtime_sched.midle;
2998         runtime_sched.midle = mp;
2999         runtime_sched.nmidle++;
3000         checkdead();
3001 }
3002
3003 // Try to get an m from midle list.
3004 // Sched must be locked.
3005 static M*
3006 mget(void)
3007 {
3008         M *mp;
3009
3010         if((mp = runtime_sched.midle) != nil){
3011                 runtime_sched.midle = mp->schedlink;
3012                 runtime_sched.nmidle--;
3013         }
3014         return mp;
3015 }
3016
3017 // Put gp on the global runnable queue.
3018 // Sched must be locked.
3019 static void
3020 globrunqput(G *gp)
3021 {
3022         gp->schedlink = nil;
3023         if(runtime_sched.runqtail)
3024                 runtime_sched.runqtail->schedlink = gp;
3025         else
3026                 runtime_sched.runqhead = gp;
3027         runtime_sched.runqtail = gp;
3028         runtime_sched.runqsize++;
3029 }
3030
3031 // Put a batch of runnable goroutines on the global runnable queue.
3032 // Sched must be locked.
3033 static void
3034 globrunqputbatch(G *ghead, G *gtail, int32 n)
3035 {
3036         gtail->schedlink = nil;
3037         if(runtime_sched.runqtail)
3038                 runtime_sched.runqtail->schedlink = ghead;
3039         else
3040                 runtime_sched.runqhead = ghead;
3041         runtime_sched.runqtail = gtail;
3042         runtime_sched.runqsize += n;
3043 }
3044
3045 // Try get a batch of G's from the global runnable queue.
3046 // Sched must be locked.
3047 static G*
3048 globrunqget(P *p, int32 max)
3049 {
3050         G *gp, *gp1;
3051         int32 n;
3052
3053         if(runtime_sched.runqsize == 0)
3054                 return nil;
3055         n = runtime_sched.runqsize/runtime_gomaxprocs+1;
3056         if(n > runtime_sched.runqsize)
3057                 n = runtime_sched.runqsize;
3058         if(max > 0 && n > max)
3059                 n = max;
3060         if((uint32)n > nelem(p->runq)/2)
3061                 n = nelem(p->runq)/2;
3062         runtime_sched.runqsize -= n;
3063         if(runtime_sched.runqsize == 0)
3064                 runtime_sched.runqtail = nil;
3065         gp = runtime_sched.runqhead;
3066         runtime_sched.runqhead = gp->schedlink;
3067         n--;
3068         while(n--) {
3069                 gp1 = runtime_sched.runqhead;
3070                 runtime_sched.runqhead = gp1->schedlink;
3071                 runqput(p, gp1);
3072         }
3073         return gp;
3074 }
3075
3076 // Put p to on pidle list.
3077 // Sched must be locked.
3078 static void
3079 pidleput(P *p)
3080 {
3081         p->link = runtime_sched.pidle;
3082         runtime_sched.pidle = p;
3083         runtime_xadd(&runtime_sched.npidle, 1);  // TODO: fast atomic
3084 }
3085
3086 // Try get a p from pidle list.
3087 // Sched must be locked.
3088 static P*
3089 pidleget(void)
3090 {
3091         P *p;
3092
3093         p = runtime_sched.pidle;
3094         if(p) {
3095                 runtime_sched.pidle = p->link;
3096                 runtime_xadd(&runtime_sched.npidle, -1);  // TODO: fast atomic
3097         }
3098         return p;
3099 }
3100
3101 // Try to put g on local runnable queue.
3102 // If it's full, put onto global queue.
3103 // Executed only by the owner P.
3104 static void
3105 runqput(P *p, G *gp)
3106 {
3107         uint32 h, t;
3108
3109 retry:
3110         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3111         t = p->runqtail;
3112         if(t - h < nelem(p->runq)) {
3113                 p->runq[t%nelem(p->runq)] = gp;
3114                 runtime_atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
3115                 return;
3116         }
3117         if(runqputslow(p, gp, h, t))
3118                 return;
3119         // the queue is not full, now the put above must suceed
3120         goto retry;
3121 }
3122
3123 // Put g and a batch of work from local runnable queue on global queue.
3124 // Executed only by the owner P.
3125 static bool
3126 runqputslow(P *p, G *gp, uint32 h, uint32 t)
3127 {
3128         G *batch[nelem(p->runq)/2+1];
3129         uint32 n, i;
3130
3131         // First, grab a batch from local queue.
3132         n = t-h;
3133         n = n/2;
3134         if(n != nelem(p->runq)/2)
3135                 runtime_throw("runqputslow: queue is not full");
3136         for(i=0; i<n; i++)
3137                 batch[i] = p->runq[(h+i)%nelem(p->runq)];
3138         if(!runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3139                 return false;
3140         batch[n] = gp;
3141         // Link the goroutines.
3142         for(i=0; i<n; i++)
3143                 batch[i]->schedlink = batch[i+1];
3144         // Now put the batch on global queue.
3145         runtime_lock(&runtime_sched);
3146         globrunqputbatch(batch[0], batch[n], n+1);
3147         runtime_unlock(&runtime_sched);
3148         return true;
3149 }
3150
3151 // Get g from local runnable queue.
3152 // Executed only by the owner P.
3153 static G*
3154 runqget(P *p)
3155 {
3156         G *gp;
3157         uint32 t, h;
3158
3159         for(;;) {
3160                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3161                 t = p->runqtail;
3162                 if(t == h)
3163                         return nil;
3164                 gp = p->runq[h%nelem(p->runq)];
3165                 if(runtime_cas(&p->runqhead, h, h+1))  // cas-release, commits consume
3166                         return gp;
3167         }
3168 }
3169
3170 // Grabs a batch of goroutines from local runnable queue.
3171 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3172 // Can be executed by any P.
3173 static uint32
3174 runqgrab(P *p, G **batch)
3175 {
3176         uint32 t, h, n, i;
3177
3178         for(;;) {
3179                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3180                 t = runtime_atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
3181                 n = t-h;
3182                 n = n - n/2;
3183                 if(n == 0)
3184                         break;
3185                 if(n > nelem(p->runq)/2)  // read inconsistent h and t
3186                         continue;
3187                 for(i=0; i<n; i++)
3188                         batch[i] = p->runq[(h+i)%nelem(p->runq)];
3189                 if(runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3190                         break;
3191         }
3192         return n;
3193 }
3194
3195 // Steal half of elements from local runnable queue of p2
3196 // and put onto local runnable queue of p.
3197 // Returns one of the stolen elements (or nil if failed).
3198 static G*
3199 runqsteal(P *p, P *p2)
3200 {
3201         G *gp;
3202         G *batch[nelem(p->runq)/2];
3203         uint32 t, h, n, i;
3204
3205         n = runqgrab(p2, batch);
3206         if(n == 0)
3207                 return nil;
3208         n--;
3209         gp = batch[n];
3210         if(n == 0)
3211                 return gp;
3212         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3213         t = p->runqtail;
3214         if(t - h + n >= nelem(p->runq))
3215                 runtime_throw("runqsteal: runq overflow");
3216         for(i=0; i<n; i++, t++)
3217                 p->runq[t%nelem(p->runq)] = batch[i];
3218         runtime_atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
3219         return gp;
3220 }
3221
3222 void runtime_testSchedLocalQueue(void)
3223   __asm__("runtime.testSchedLocalQueue");
3224
3225 void
3226 runtime_testSchedLocalQueue(void)
3227 {
3228         P p;
3229         G gs[nelem(p.runq)];
3230         int32 i, j;
3231
3232         runtime_memclr((byte*)&p, sizeof(p));
3233
3234         for(i = 0; i < (int32)nelem(gs); i++) {
3235                 if(runqget(&p) != nil)
3236                         runtime_throw("runq is not empty initially");
3237                 for(j = 0; j < i; j++)
3238                         runqput(&p, &gs[i]);
3239                 for(j = 0; j < i; j++) {
3240                         if(runqget(&p) != &gs[i]) {
3241                                 runtime_printf("bad element at iter %d/%d\n", i, j);
3242                                 runtime_throw("bad element");
3243                         }
3244                 }
3245                 if(runqget(&p) != nil)
3246                         runtime_throw("runq is not empty afterwards");
3247         }
3248 }
3249
3250 void runtime_testSchedLocalQueueSteal(void)
3251   __asm__("runtime.testSchedLocalQueueSteal");
3252
3253 void
3254 runtime_testSchedLocalQueueSteal(void)
3255 {
3256         P p1, p2;
3257         G gs[nelem(p1.runq)], *gp;
3258         int32 i, j, s;
3259
3260         runtime_memclr((byte*)&p1, sizeof(p1));
3261         runtime_memclr((byte*)&p2, sizeof(p2));
3262
3263         for(i = 0; i < (int32)nelem(gs); i++) {
3264                 for(j = 0; j < i; j++) {
3265                         gs[j].sig = 0;
3266                         runqput(&p1, &gs[j]);
3267                 }
3268                 gp = runqsteal(&p2, &p1);
3269                 s = 0;
3270                 if(gp) {
3271                         s++;
3272                         gp->sig++;
3273                 }
3274                 while((gp = runqget(&p2)) != nil) {
3275                         s++;
3276                         gp->sig++;
3277                 }
3278                 while((gp = runqget(&p1)) != nil)
3279                         gp->sig++;
3280                 for(j = 0; j < i; j++) {
3281                         if(gs[j].sig != 1) {
3282                                 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3283                                 runtime_throw("bad element");
3284                         }
3285                 }
3286                 if(s != i/2 && s != i/2+1) {
3287                         runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3288                                 s, i/2, i/2+1, i);
3289                         runtime_throw("bad steal");
3290                 }
3291         }
3292 }
3293
3294 int32
3295 runtime_setmaxthreads(int32 in)
3296 {
3297         int32 out;
3298
3299         runtime_lock(&runtime_sched);
3300         out = runtime_sched.maxmcount;
3301         runtime_sched.maxmcount = in;
3302         checkmcount();
3303         runtime_unlock(&runtime_sched);
3304         return out;
3305 }
3306
3307 void
3308 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
3309 {
3310         enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
3311 }
3312
3313 // When a function calls a closure, it passes the closure value to
3314 // __go_set_closure immediately before the function call.  When a
3315 // function uses a closure, it calls __go_get_closure immediately on
3316 // function entry.  This is a hack, but it will work on any system.
3317 // It would be better to use the static chain register when there is
3318 // one.  It is also worth considering expanding these functions
3319 // directly in the compiler.
3320
3321 void
3322 __go_set_closure(void* v)
3323 {
3324         g->closure = v;
3325 }
3326
3327 void *
3328 __go_get_closure(void)
3329 {
3330         return g->closure;
3331 }
3332
3333 // Return whether we are waiting for a GC.  This gc toolchain uses
3334 // preemption instead.
3335 bool
3336 runtime_gcwaiting(void)
3337 {
3338         return runtime_sched.gcwaiting;
3339 }