libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <stdlib.h>
   7 #include <pthread.h>
   8 #include <unistd.h>
   9
  10 #include "config.h"
  11
  12 #ifdef HAVE_DL_ITERATE_PHDR
  13 #include <link.h>
  14 #endif
  15
  16 #include "runtime.h"
  17 #include "arch.h"
  18 #include "defs.h"
  19 #include "malloc.h"
  20 #include "race.h"
  21 #include "go-type.h"
  22 #include "go-defer.h"
  23
  24 #ifdef USING_SPLIT_STACK
  25
  26 /* FIXME: These are not declared anywhere.  */
  27
  28 extern void __splitstack_getcontext(void *context[10]);
  29
  30 extern void __splitstack_setcontext(void *context[10]);
  31
  32 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  33
  34 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  35
  36 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  37                                void **);
  38
  39 extern void __splitstack_block_signals (int *, int *);
  40
  41 extern void __splitstack_block_signals_context (void *context[10], int *,
  42                                                 int *);
  43
  44 #endif
  45
  46 #ifndef PTHREAD_STACK_MIN
  47 # define PTHREAD_STACK_MIN 8192
  48 #endif
  49
  50 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  51 # define StackMin PTHREAD_STACK_MIN
  52 #else
  53 # define StackMin 2 * 1024 * 1024
  54 #endif
  55
  56 uintptr runtime_stacks_sys;
  57
  58 static void schedule(G*);
  59
  60 static void gtraceback(G*);
  61
  62 typedef struct Sched Sched;
  63
  64 M       runtime_m0;
  65 G       runtime_g0;     // idle goroutine for m0
  66
  67 #ifdef __rtems__
  68 #define __thread
  69 #endif
  70
  71 static __thread G *g;
  72 static __thread M *m;
  73
  74 #ifndef SETCONTEXT_CLOBBERS_TLS
  75
  76 static inline void
  77 initcontext(void)
  78 {
  79 }
  80
  81 static inline void
  82 fixcontext(ucontext_t *c __attribute__ ((unused)))
  83 {
  84 }
  85
  86 #else
  87
  88 # if defined(__x86_64__) && defined(__sun__)
  89
  90 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  91 // register to that of the thread which called getcontext.  The effect
  92 // is that the address of all __thread variables changes.  This bug
  93 // also affects pthread_self() and pthread_getspecific.  We work
  94 // around it by clobbering the context field directly to keep %fs the
  95 // same.
  96
  97 static __thread greg_t fs;
  98
  99 static inline void
 100 initcontext(void)
 101 {
 102         ucontext_t c;
 103
 104         getcontext(&c);
 105         fs = c.uc_mcontext.gregs[REG_FSBASE];
 106 }
 107
 108 static inline void
 109 fixcontext(ucontext_t* c)
 110 {
 111         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 112 }
 113
 114 # elif defined(__NetBSD__)
 115
 116 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
 117 // and restore it ourselves.
 118
 119 static __thread __greg_t tlsbase;
 120
 121 static inline void
 122 initcontext(void)
 123 {
 124         ucontext_t c;
 125
 126         getcontext(&c);
 127         tlsbase = c.uc_mcontext._mc_tlsbase;
 128 }
 129
 130 static inline void
 131 fixcontext(ucontext_t* c)
 132 {
 133         c->uc_mcontext._mc_tlsbase = tlsbase;
 134 }
 135
 136 # else
 137
 138 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 139
 140 # endif
 141
 142 #endif
 143
 144 // We can not always refer to the TLS variables directly.  The
 145 // compiler will call tls_get_addr to get the address of the variable,
 146 // and it may hold it in a register across a call to schedule.  When
 147 // we get back from the call we may be running in a different thread,
 148 // in which case the register now points to the TLS variable for a
 149 // different thread.  We use non-inlinable functions to avoid this
 150 // when necessary.
 151
 152 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 153
 154 G*
 155 runtime_g(void)
 156 {
 157         return g;
 158 }
 159
 160 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 161
 162 M*
 163 runtime_m(void)
 164 {
 165         return m;
 166 }
 167
 168 int32   runtime_gcwaiting;
 169
 170 // The static TLS size.  See runtime_newm.
 171 static int tlssize;
 172
 173 #ifdef HAVE_DL_ITERATE_PHDR
 174
 175 // Called via dl_iterate_phdr.
 176
 177 static int
 178 addtls(struct dl_phdr_info* info, size_t size __attribute__ ((unused)), void *data)
 179 {
 180         size_t *total = (size_t *)data;
 181         unsigned int i;
 182
 183         for(i = 0; i < info->dlpi_phnum; ++i) {
 184                 if(info->dlpi_phdr[i].p_type == PT_TLS)
 185                         *total += info->dlpi_phdr[i].p_memsz;
 186         }
 187         return 0;
 188 }
 189
 190 // Set the total TLS size.
 191
 192 static void
 193 inittlssize()
 194 {
 195         size_t total = 0;
 196
 197         dl_iterate_phdr(addtls, (void *)&total);
 198         tlssize = total;
 199 }
 200
 201 #else
 202
 203 static void
 204 inittlssize()
 205 {
 206 }
 207
 208 #endif
 209
 210 // Go scheduler
 211 //
 212 // The go scheduler's job is to match ready-to-run goroutines (`g's)
 213 // with waiting-for-work schedulers (`m's).  If there are ready g's
 214 // and no waiting m's, ready() will start a new m running in a new
 215 // OS thread, so that all ready g's can run simultaneously, up to a limit.
 216 // For now, m's never go away.
 217 //
 218 // By default, Go keeps only one kernel thread (m) running user code
 219 // at a single time; other threads may be blocked in the operating system.
 220 // Setting the environment variable $GOMAXPROCS or calling
 221 // runtime.GOMAXPROCS() will change the number of user threads
 222 // allowed to execute simultaneously.  $GOMAXPROCS is thus an
 223 // approximation of the maximum number of cores to use.
 224 //
 225 // Even a program that can run without deadlock in a single process
 226 // might use more m's if given the chance.  For example, the prime
 227 // sieve will use as many m's as there are primes (up to runtime_sched.mmax),
 228 // allowing different stages of the pipeline to execute in parallel.
 229 // We could revisit this choice, only kicking off new m's for blocking
 230 // system calls, but that would limit the amount of parallel computation
 231 // that go would try to do.
 232 //
 233 // In general, one could imagine all sorts of refinements to the
 234 // scheduler, but the goal now is just to get something working on
 235 // Linux and OS X.
 236
 237 struct Sched {
 238         Lock;
 239
 240         G *gfree;       // available g's (status == Gdead)
 241         int64 goidgen;
 242
 243         G *ghead;       // g's waiting to run
 244         G *gtail;
 245         int32 gwait;    // number of g's waiting to run
 246         int32 gcount;   // number of g's that are alive
 247         int32 grunning; // number of g's running on cpu or in syscall
 248
 249         M *mhead;       // m's waiting for work
 250         int32 mwait;    // number of m's waiting for work
 251         int32 mcount;   // number of m's that have been created
 252
 253         volatile uint32 atomic; // atomic scheduling word (see below)
 254
 255         int32 profilehz;        // cpu profiling rate
 256
 257         bool init;  // running initialization
 258         bool lockmain;  // init called runtime.LockOSThread
 259
 260         Note    stopped;        // one g can set waitstop and wait here for m's to stop
 261 };
 262
 263 // The atomic word in sched is an atomic uint32 that
 264 // holds these fields.
 265 //
 266 //      [15 bits] mcpu          number of m's executing on cpu
 267 //      [15 bits] mcpumax       max number of m's allowed on cpu
 268 //      [1 bit] waitstop        some g is waiting on stopped
 269 //      [1 bit] gwaiting        gwait != 0
 270 //
 271 // These fields are the information needed by entersyscall
 272 // and exitsyscall to decide whether to coordinate with the
 273 // scheduler.  Packing them into a single machine word lets
 274 // them use a fast path with a single atomic read/write and
 275 // no lock/unlock.  This greatly reduces contention in
 276 // syscall- or cgo-heavy multithreaded programs.
 277 //
 278 // Except for entersyscall and exitsyscall, the manipulations
 279 // to these fields only happen while holding the schedlock,
 280 // so the routines holding schedlock only need to worry about
 281 // what entersyscall and exitsyscall do, not the other routines
 282 // (which also use the schedlock).
 283 //
 284 // In particular, entersyscall and exitsyscall only read mcpumax,
 285 // waitstop, and gwaiting.  They never write them.  Thus, writes to those
 286 // fields can be done (holding schedlock) without fear of write conflicts.
 287 // There may still be logic conflicts: for example, the set of waitstop must
 288 // be conditioned on mcpu >= mcpumax or else the wait may be a
 289 // spurious sleep.  The Promela model in proc.p verifies these accesses.
 290 enum {
 291         mcpuWidth = 15,
 292         mcpuMask = (1<<mcpuWidth) - 1,
 293         mcpuShift = 0,
 294         mcpumaxShift = mcpuShift + mcpuWidth,
 295         waitstopShift = mcpumaxShift + mcpuWidth,
 296         gwaitingShift = waitstopShift+1,
 297
 298         // The max value of GOMAXPROCS is constrained
 299         // by the max value we can store in the bit fields
 300         // of the atomic word.  Reserve a few high values
 301         // so that we can detect accidental decrement
 302         // beyond zero.
 303         maxgomaxprocs = mcpuMask - 10,
 304 };
 305
 306 #define atomic_mcpu(v)          (((v)>>mcpuShift)&mcpuMask)
 307 #define atomic_mcpumax(v)       (((v)>>mcpumaxShift)&mcpuMask)
 308 #define atomic_waitstop(v)      (((v)>>waitstopShift)&1)
 309 #define atomic_gwaiting(v)      (((v)>>gwaitingShift)&1)
 310
 311 Sched runtime_sched;
 312 int32 runtime_gomaxprocs;
 313 bool runtime_singleproc;
 314
 315 static bool canaddmcpu(void);
 316
 317 // An m that is waiting for notewakeup(&m->havenextg).  This may
 318 // only be accessed while the scheduler lock is held.  This is used to
 319 // minimize the number of times we call notewakeup while the scheduler
 320 // lock is held, since the m will normally move quickly to lock the
 321 // scheduler itself, producing lock contention.
 322 static M* mwakeup;
 323
 324 // Scheduling helpers.  Sched must be locked.
 325 static void gput(G*);   // put/get on ghead/gtail
 326 static G* gget(void);
 327 static void mput(M*);   // put/get on mhead
 328 static M* mget(G*);
 329 static void gfput(G*);  // put/get on gfree
 330 static G* gfget(void);
 331 static void matchmg(void);      // match m's to g's
 332 static void readylocked(G*);    // ready, but sched is locked
 333 static void mnextg(M*, G*);
 334 static void mcommoninit(M*);
 335
 336 void
 337 setmcpumax(uint32 n)
 338 {
 339         uint32 v, w;
 340
 341         for(;;) {
 342                 v = runtime_sched.atomic;
 343                 w = v;
 344                 w &= ~(mcpuMask<<mcpumaxShift);
 345                 w |= n<<mcpumaxShift;
 346                 if(runtime_cas(&runtime_sched.atomic, v, w))
 347                         break;
 348         }
 349 }
 350
 351 // First function run by a new goroutine.  This replaces gogocall.
 352 static void
 353 kickoff(void)
 354 {
 355         void (*fn)(void*);
 356
 357         if(g->traceback != nil)
 358                 gtraceback(g);
 359
 360         fn = (void (*)(void*))(g->entry);
 361         fn(g->param);
 362         runtime_goexit();
 363 }
 364
 365 // Switch context to a different goroutine.  This is like longjmp.
 366 static void runtime_gogo(G*) __attribute__ ((noinline));
 367 static void
 368 runtime_gogo(G* newg)
 369 {
 370 #ifdef USING_SPLIT_STACK
 371         __splitstack_setcontext(&newg->stack_context[0]);
 372 #endif
 373         g = newg;
 374         newg->fromgogo = true;
 375         fixcontext(&newg->context);
 376         setcontext(&newg->context);
 377         runtime_throw("gogo setcontext returned");
 378 }
 379
 380 // Save context and call fn passing g as a parameter.  This is like
 381 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 382 // g->fromgogo as a code.  It will be true if we got here via
 383 // setcontext.  g == nil the first time this is called in a new m.
 384 static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 385 static void
 386 runtime_mcall(void (*pfn)(G*))
 387 {
 388         M *mp;
 389         G *gp;
 390 #ifndef USING_SPLIT_STACK
 391         int i;
 392 #endif
 393
 394         // Ensure that all registers are on the stack for the garbage
 395         // collector.
 396         __builtin_unwind_init();
 397
 398         mp = m;
 399         gp = g;
 400         if(gp == mp->g0)
 401                 runtime_throw("runtime: mcall called on m->g0 stack");
 402
 403         if(gp != nil) {
 404
 405 #ifdef USING_SPLIT_STACK
 406                 __splitstack_getcontext(&g->stack_context[0]);
 407 #else
 408                 gp->gcnext_sp = &i;
 409 #endif
 410                 gp->fromgogo = false;
 411                 getcontext(&gp->context);
 412
 413                 // When we return from getcontext, we may be running
 414                 // in a new thread.  That means that m and g may have
 415                 // changed.  They are global variables so we will
 416                 // reload them, but the addresses of m and g may be
 417                 // cached in our local stack frame, and those
 418                 // addresses may be wrong.  Call functions to reload
 419                 // the values for this thread.
 420                 mp = runtime_m();
 421                 gp = runtime_g();
 422
 423                 if(gp->traceback != nil)
 424                         gtraceback(gp);
 425         }
 426         if (gp == nil || !gp->fromgogo) {
 427 #ifdef USING_SPLIT_STACK
 428                 __splitstack_setcontext(&mp->g0->stack_context[0]);
 429 #endif
 430                 mp->g0->entry = (byte*)pfn;
 431                 mp->g0->param = gp;
 432
 433                 // It's OK to set g directly here because this case
 434                 // can not occur if we got here via a setcontext to
 435                 // the getcontext call just above.
 436                 g = mp->g0;
 437
 438                 fixcontext(&mp->g0->context);
 439                 setcontext(&mp->g0->context);
 440                 runtime_throw("runtime: mcall function returned");
 441         }
 442 }
 443
 444 // Keep trace of scavenger's goroutine for deadlock detection.
 445 static G *scvg;
 446
 447 // The bootstrap sequence is:
 448 //
 449 //      call osinit
 450 //      call schedinit
 451 //      make & queue new G
 452 //      call runtime_mstart
 453 //
 454 // The new G calls runtime_main.
 455 void
 456 runtime_schedinit(void)
 457 {
 458         int32 n;
 459         const byte *p;
 460
 461         m = &runtime_m0;
 462         g = &runtime_g0;
 463         m->g0 = g;
 464         m->curg = g;
 465         g->m = m;
 466
 467         initcontext();
 468         inittlssize();
 469
 470         m->nomemprof++;
 471         runtime_mallocinit();
 472         mcommoninit(m);
 473
 474         runtime_goargs();
 475         runtime_goenvs();
 476
 477         // For debugging:
 478         // Allocate internal symbol table representation now,
 479         // so that we don't need to call malloc when we crash.
 480         // runtime_findfunc(0);
 481
 482         runtime_gomaxprocs = 1;
 483         p = runtime_getenv("GOMAXPROCS");
 484         if(p != nil && (n = runtime_atoi(p)) != 0) {
 485                 if(n > maxgomaxprocs)
 486                         n = maxgomaxprocs;
 487                 runtime_gomaxprocs = n;
 488         }
 489         // wait for the main goroutine to start before taking
 490         // GOMAXPROCS into account.
 491         setmcpumax(1);
 492         runtime_singleproc = runtime_gomaxprocs == 1;
 493
 494         canaddmcpu();   // mcpu++ to account for bootstrap m
 495         m->helpgc = 1;  // flag to tell schedule() to mcpu--
 496         runtime_sched.grunning++;
 497
 498         // Can not enable GC until all roots are registered.
 499         // mstats.enablegc = 1;
 500         m->nomemprof--;
 501
 502         if(raceenabled)
 503                 runtime_raceinit();
 504 }
 505
 506 extern void main_init(void) __asm__ ("__go_init_main");
 507 extern void main_main(void) __asm__ ("main.main");
 508
 509 // The main goroutine.
 510 void
 511 runtime_main(void)
 512 {
 513         // Lock the main goroutine onto this, the main OS thread,
 514         // during initialization.  Most programs won't care, but a few
 515         // do require certain calls to be made by the main thread.
 516         // Those can arrange for main.main to run in the main thread
 517         // by calling runtime.LockOSThread during initialization
 518         // to preserve the lock.
 519         runtime_LockOSThread();
 520         // From now on, newgoroutines may use non-main threads.
 521         setmcpumax(runtime_gomaxprocs);
 522         runtime_sched.init = true;
 523         scvg = __go_go(runtime_MHeap_Scavenger, nil);
 524         main_init();
 525         runtime_sched.init = false;
 526         if(!runtime_sched.lockmain)
 527                 runtime_UnlockOSThread();
 528
 529         // For gccgo we have to wait until after main is initialized
 530         // to enable GC, because initializing main registers the GC
 531         // roots.
 532         mstats.enablegc = 1;
 533
 534         // The deadlock detection has false negatives.
 535         // Let scvg start up, to eliminate the false negative
 536         // for the trivial program func main() { select{} }.
 537         runtime_gosched();
 538
 539         main_main();
 540         if(raceenabled)
 541                 runtime_racefini();
 542         runtime_exit(0);
 543         for(;;)
 544                 *(int32*)0 = 0;
 545 }
 546
 547 // Lock the scheduler.
 548 static void
 549 schedlock(void)
 550 {
 551         runtime_lock(&runtime_sched);
 552 }
 553
 554 // Unlock the scheduler.
 555 static void
 556 schedunlock(void)
 557 {
 558         M *mp;
 559
 560         mp = mwakeup;
 561         mwakeup = nil;
 562         runtime_unlock(&runtime_sched);
 563         if(mp != nil)
 564                 runtime_notewakeup(&mp->havenextg);
 565 }
 566
 567 void
 568 runtime_goexit(void)
 569 {
 570         g->status = Gmoribund;
 571         runtime_gosched();
 572 }
 573
 574 void
 575 runtime_goroutineheader(G *gp)
 576 {
 577         const char *status;
 578
 579         switch(gp->status) {
 580         case Gidle:
 581                 status = "idle";
 582                 break;
 583         case Grunnable:
 584                 status = "runnable";
 585                 break;
 586         case Grunning:
 587                 status = "running";
 588                 break;
 589         case Gsyscall:
 590                 status = "syscall";
 591                 break;
 592         case Gwaiting:
 593                 if(gp->waitreason)
 594                         status = gp->waitreason;
 595                 else
 596                         status = "waiting";
 597                 break;
 598         case Gmoribund:
 599                 status = "moribund";
 600                 break;
 601         default:
 602                 status = "???";
 603                 break;
 604         }
 605         runtime_printf("goroutine %D [%s]:\n", gp->goid, status);
 606 }
 607
 608 void
 609 runtime_goroutinetrailer(G *g)
 610 {
 611         if(g != nil && g->gopc != 0 && g->goid != 1) {
 612                 String fn;
 613                 String file;
 614                 intgo line;
 615
 616                 if(__go_file_line(g->gopc - 1, &fn, &file, &line)) {
 617                         runtime_printf("created by %S\n", fn);
 618                         runtime_printf("\t%S:%D\n", file, (int64) line);
 619                 }
 620         }
 621 }
 622
 623 struct Traceback
 624 {
 625         G* gp;
 626         uintptr pcbuf[100];
 627         int32 c;
 628 };
 629
 630 void
 631 runtime_tracebackothers(G * volatile me)
 632 {
 633         G * volatile gp;
 634         Traceback traceback;
 635
 636         traceback.gp = me;
 637         for(gp = runtime_allg; gp != nil; gp = gp->alllink) {
 638                 if(gp == me || gp->status == Gdead)
 639                         continue;
 640                 runtime_printf("\n");
 641                 runtime_goroutineheader(gp);
 642
 643                 // Our only mechanism for doing a stack trace is
 644                 // _Unwind_Backtrace.  And that only works for the
 645                 // current thread, not for other random goroutines.
 646                 // So we need to switch context to the goroutine, get
 647                 // the backtrace, and then switch back.
 648
 649                 // This means that if g is running or in a syscall, we
 650                 // can't reliably print a stack trace.  FIXME.
 651                 if(gp->status == Gsyscall || gp->status == Grunning) {
 652                         runtime_printf("no stack trace available\n");
 653                         runtime_goroutinetrailer(gp);
 654                         continue;
 655                 }
 656
 657                 gp->traceback = &traceback;
 658
 659 #ifdef USING_SPLIT_STACK
 660                 __splitstack_getcontext(&me->stack_context[0]);
 661 #endif
 662                 getcontext(&me->context);
 663
 664                 if(gp->traceback != nil) {
 665                         runtime_gogo(gp);
 666                 }
 667
 668                 runtime_printtrace(traceback.pcbuf, traceback.c);
 669                 runtime_goroutinetrailer(gp);
 670         }
 671 }
 672
 673 // Do a stack trace of gp, and then restore the context to
 674 // gp->dotraceback.
 675
 676 static void
 677 gtraceback(G* gp)
 678 {
 679         Traceback* traceback;
 680
 681         traceback = gp->traceback;
 682         gp->traceback = nil;
 683         traceback->c = runtime_callers(1, traceback->pcbuf,
 684                 sizeof traceback->pcbuf / sizeof traceback->pcbuf[0]);
 685         runtime_gogo(traceback->gp);
 686 }
 687
 688 // Mark this g as m's idle goroutine.
 689 // This functionality might be used in environments where programs
 690 // are limited to a single thread, to simulate a select-driven
 691 // network server.  It is not exposed via the standard runtime API.
 692 void
 693 runtime_idlegoroutine(void)
 694 {
 695         if(g->idlem != nil)
 696                 runtime_throw("g is already an idle goroutine");
 697         g->idlem = m;
 698 }
 699
 700 static void
 701 mcommoninit(M *mp)
 702 {
 703         mp->id = runtime_sched.mcount++;
 704         mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
 705
 706         if(mp->mcache == nil)
 707                 mp->mcache = runtime_allocmcache();
 708
 709         runtime_callers(1, mp->createstack, nelem(mp->createstack));
 710
 711         // Add to runtime_allm so garbage collector doesn't free m
 712         // when it is just in a register or thread-local storage.
 713         mp->alllink = runtime_allm;
 714         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 715         // so we need to publish it safely.
 716         runtime_atomicstorep(&runtime_allm, mp);
 717 }
 718
 719 // Try to increment mcpu.  Report whether succeeded.
 720 static bool
 721 canaddmcpu(void)
 722 {
 723         uint32 v;
 724
 725         for(;;) {
 726                 v = runtime_sched.atomic;
 727                 if(atomic_mcpu(v) >= atomic_mcpumax(v))
 728                         return 0;
 729                 if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift)))
 730                         return 1;
 731         }
 732 }
 733
 734 // Put on `g' queue.  Sched must be locked.
 735 static void
 736 gput(G *gp)
 737 {
 738         M *mp;
 739
 740         // If g is wired, hand it off directly.
 741         if((mp = gp->lockedm) != nil && canaddmcpu()) {
 742                 mnextg(mp, gp);
 743                 return;
 744         }
 745
 746         // If g is the idle goroutine for an m, hand it off.
 747         if(gp->idlem != nil) {
 748                 if(gp->idlem->idleg != nil) {
 749                         runtime_printf("m%d idle out of sync: g%D g%D\n",
 750                                 gp->idlem->id,
 751                                 gp->idlem->idleg->goid, gp->goid);
 752                         runtime_throw("runtime: double idle");
 753                 }
 754                 gp->idlem->idleg = gp;
 755                 return;
 756         }
 757
 758         gp->schedlink = nil;
 759         if(runtime_sched.ghead == nil)
 760                 runtime_sched.ghead = gp;
 761         else
 762                 runtime_sched.gtail->schedlink = gp;
 763         runtime_sched.gtail = gp;
 764
 765         // increment gwait.
 766         // if it transitions to nonzero, set atomic gwaiting bit.
 767         if(runtime_sched.gwait++ == 0)
 768                 runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift);
 769 }
 770
 771 // Report whether gget would return something.
 772 static bool
 773 haveg(void)
 774 {
 775         return runtime_sched.ghead != nil || m->idleg != nil;
 776 }
 777
 778 // Get from `g' queue.  Sched must be locked.
 779 static G*
 780 gget(void)
 781 {
 782         G *gp;
 783
 784         gp = runtime_sched.ghead;
 785         if(gp) {
 786                 runtime_sched.ghead = gp->schedlink;
 787                 if(runtime_sched.ghead == nil)
 788                         runtime_sched.gtail = nil;
 789                 // decrement gwait.
 790                 // if it transitions to zero, clear atomic gwaiting bit.
 791                 if(--runtime_sched.gwait == 0)
 792                         runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
 793         } else if(m->idleg != nil) {
 794                 gp = m->idleg;
 795                 m->idleg = nil;
 796         }
 797         return gp;
 798 }
 799
 800 // Put on `m' list.  Sched must be locked.
 801 static void
 802 mput(M *mp)
 803 {
 804         mp->schedlink = runtime_sched.mhead;
 805         runtime_sched.mhead = mp;
 806         runtime_sched.mwait++;
 807 }
 808
 809 // Get an `m' to run `g'.  Sched must be locked.
 810 static M*
 811 mget(G *gp)
 812 {
 813         M *mp;
 814
 815         // if g has its own m, use it.
 816         if(gp && (mp = gp->lockedm) != nil)
 817                 return mp;
 818
 819         // otherwise use general m pool.
 820         if((mp = runtime_sched.mhead) != nil) {
 821                 runtime_sched.mhead = mp->schedlink;
 822                 runtime_sched.mwait--;
 823         }
 824         return mp;
 825 }
 826
 827 // Mark g ready to run.
 828 void
 829 runtime_ready(G *gp)
 830 {
 831         schedlock();
 832         readylocked(gp);
 833         schedunlock();
 834 }
 835
 836 // Mark g ready to run.  Sched is already locked.
 837 // G might be running already and about to stop.
 838 // The sched lock protects g->status from changing underfoot.
 839 static void
 840 readylocked(G *gp)
 841 {
 842         if(gp->m) {
 843                 // Running on another machine.
 844                 // Ready it when it stops.
 845                 gp->readyonstop = 1;
 846                 return;
 847         }
 848
 849         // Mark runnable.
 850         if(gp->status == Grunnable || gp->status == Grunning) {
 851                 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status);
 852                 runtime_throw("bad g->status in ready");
 853         }
 854         gp->status = Grunnable;
 855
 856         gput(gp);
 857         matchmg();
 858 }
 859
 860 // Same as readylocked but a different symbol so that
 861 // debuggers can set a breakpoint here and catch all
 862 // new goroutines.
 863 static void
 864 newprocreadylocked(G *gp)
 865 {
 866         readylocked(gp);
 867 }
 868
 869 // Pass g to m for running.
 870 // Caller has already incremented mcpu.
 871 static void
 872 mnextg(M *mp, G *gp)
 873 {
 874         runtime_sched.grunning++;
 875         mp->nextg = gp;
 876         if(mp->waitnextg) {
 877                 mp->waitnextg = 0;
 878                 if(mwakeup != nil)
 879                         runtime_notewakeup(&mwakeup->havenextg);
 880                 mwakeup = mp;
 881         }
 882 }
 883
 884 // Get the next goroutine that m should run.
 885 // Sched must be locked on entry, is unlocked on exit.
 886 // Makes sure that at most $GOMAXPROCS g's are
 887 // running on cpus (not in system calls) at any given time.
 888 static G*
 889 nextgandunlock(void)
 890 {
 891         G *gp;
 892         uint32 v;
 893
 894 top:
 895         if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs)
 896                 runtime_throw("negative mcpu");
 897
 898         // If there is a g waiting as m->nextg, the mcpu++
 899         // happened before it was passed to mnextg.
 900         if(m->nextg != nil) {
 901                 gp = m->nextg;
 902                 m->nextg = nil;
 903                 schedunlock();
 904                 return gp;
 905         }
 906
 907         if(m->lockedg != nil) {
 908                 // We can only run one g, and it's not available.
 909                 // Make sure some other cpu is running to handle
 910                 // the ordinary run queue.
 911                 if(runtime_sched.gwait != 0) {
 912                         matchmg();
 913                         // m->lockedg might have been on the queue.
 914                         if(m->nextg != nil) {
 915                                 gp = m->nextg;
 916                                 m->nextg = nil;
 917                                 schedunlock();
 918                                 return gp;
 919                         }
 920                 }
 921         } else {
 922                 // Look for work on global queue.
 923                 while(haveg() && canaddmcpu()) {
 924                         gp = gget();
 925                         if(gp == nil)
 926                                 runtime_throw("gget inconsistency");
 927
 928                         if(gp->lockedm) {
 929                                 mnextg(gp->lockedm, gp);
 930                                 continue;
 931                         }
 932                         runtime_sched.grunning++;
 933                         schedunlock();
 934                         return gp;
 935                 }
 936
 937                 // The while loop ended either because the g queue is empty
 938                 // or because we have maxed out our m procs running go
 939                 // code (mcpu >= mcpumax).  We need to check that
 940                 // concurrent actions by entersyscall/exitsyscall cannot
 941                 // invalidate the decision to end the loop.
 942                 //
 943                 // We hold the sched lock, so no one else is manipulating the
 944                 // g queue or changing mcpumax.  Entersyscall can decrement
 945                 // mcpu, but if does so when there is something on the g queue,
 946                 // the gwait bit will be set, so entersyscall will take the slow path
 947                 // and use the sched lock.  So it cannot invalidate our decision.
 948                 //
 949                 // Wait on global m queue.
 950                 mput(m);
 951         }
 952
 953         // Look for deadlock situation.
 954         // There is a race with the scavenger that causes false negatives:
 955         // if the scavenger is just starting, then we have
 956         //      scvg != nil && grunning == 0 && gwait == 0
 957         // and we do not detect a deadlock.  It is possible that we should
 958         // add that case to the if statement here, but it is too close to Go 1
 959         // to make such a subtle change.  Instead, we work around the
 960         // false negative in trivial programs by calling runtime.gosched
 961         // from the main goroutine just before main.main.
 962         // See runtime_main above.
 963         //
 964         // On a related note, it is also possible that the scvg == nil case is
 965         // wrong and should include gwait, but that does not happen in
 966         // standard Go programs, which all start the scavenger.
 967         //
 968         if((scvg == nil && runtime_sched.grunning == 0) ||
 969            (scvg != nil && runtime_sched.grunning == 1 && runtime_sched.gwait == 0 &&
 970             (scvg->status == Grunning || scvg->status == Gsyscall))) {
 971                 runtime_throw("all goroutines are asleep - deadlock!");
 972         }
 973
 974         m->nextg = nil;
 975         m->waitnextg = 1;
 976         runtime_noteclear(&m->havenextg);
 977
 978         // Stoptheworld is waiting for all but its cpu to go to stop.
 979         // Entersyscall might have decremented mcpu too, but if so
 980         // it will see the waitstop and take the slow path.
 981         // Exitsyscall never increments mcpu beyond mcpumax.
 982         v = runtime_atomicload(&runtime_sched.atomic);
 983         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
 984                 // set waitstop = 0 (known to be 1)
 985                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
 986                 runtime_notewakeup(&runtime_sched.stopped);
 987         }
 988         schedunlock();
 989
 990         runtime_notesleep(&m->havenextg);
 991         if(m->helpgc) {
 992                 runtime_gchelper();
 993                 m->helpgc = 0;
 994                 runtime_lock(&runtime_sched);
 995                 goto top;
 996         }
 997         if((gp = m->nextg) == nil)
 998                 runtime_throw("bad m->nextg in nextgoroutine");
 999         m->nextg = nil;
1000         return gp;
1001 }
1002
1003 int32
1004 runtime_gcprocs(void)
1005 {
1006         int32 n;
1007
1008         // Figure out how many CPUs to use during GC.
1009         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
1010         n = runtime_gomaxprocs;
1011         if(n > runtime_ncpu)
1012                 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
1013         if(n > MaxGcproc)
1014                 n = MaxGcproc;
1015         if(n > runtime_sched.mwait+1) // one M is currently running
1016                 n = runtime_sched.mwait+1;
1017         return n;
1018 }
1019
1020 void
1021 runtime_helpgc(int32 nproc)
1022 {
1023         M *mp;
1024         int32 n;
1025
1026         runtime_lock(&runtime_sched);
1027         for(n = 1; n < nproc; n++) { // one M is currently running
1028                 mp = mget(nil);
1029                 if(mp == nil)
1030                         runtime_throw("runtime_gcprocs inconsistency");
1031                 mp->helpgc = 1;
1032                 mp->waitnextg = 0;
1033                 runtime_notewakeup(&mp->havenextg);
1034         }
1035         runtime_unlock(&runtime_sched);
1036 }
1037
1038 void
1039 runtime_stoptheworld(void)
1040 {
1041         uint32 v;
1042
1043         schedlock();
1044         runtime_gcwaiting = 1;
1045
1046         setmcpumax(1);
1047
1048         // while mcpu > 1
1049         for(;;) {
1050                 v = runtime_sched.atomic;
1051                 if(atomic_mcpu(v) <= 1)
1052                         break;
1053
1054                 // It would be unsafe for multiple threads to be using
1055                 // the stopped note at once, but there is only
1056                 // ever one thread doing garbage collection.
1057                 runtime_noteclear(&runtime_sched.stopped);
1058                 if(atomic_waitstop(v))
1059                         runtime_throw("invalid waitstop");
1060
1061                 // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
1062                 // still being true.
1063                 if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift)))
1064                         continue;
1065
1066                 schedunlock();
1067                 runtime_notesleep(&runtime_sched.stopped);
1068                 schedlock();
1069         }
1070         runtime_singleproc = runtime_gomaxprocs == 1;
1071         schedunlock();
1072 }
1073
1074 void
1075 runtime_starttheworld(void)
1076 {
1077         M *mp;
1078         int32 max;
1079
1080         // Figure out how many CPUs GC could possibly use.
1081         max = runtime_gomaxprocs;
1082         if(max > runtime_ncpu)
1083                 max = runtime_ncpu > 0 ? runtime_ncpu : 1;
1084         if(max > MaxGcproc)
1085                 max = MaxGcproc;
1086
1087         schedlock();
1088         runtime_gcwaiting = 0;
1089         setmcpumax(runtime_gomaxprocs);
1090         matchmg();
1091         if(runtime_gcprocs() < max && canaddmcpu()) {
1092                 // If GC could have used another helper proc, start one now,
1093                 // in the hope that it will be available next time.
1094                 // It would have been even better to start it before the collection,
1095                 // but doing so requires allocating memory, so it's tricky to
1096                 // coordinate.  This lazy approach works out in practice:
1097                 // we don't mind if the first couple gc rounds don't have quite
1098                 // the maximum number of procs.
1099                 // canaddmcpu above did mcpu++
1100                 // (necessary, because m will be doing various
1101                 // initialization work so is definitely running),
1102                 // but m is not running a specific goroutine,
1103                 // so set the helpgc flag as a signal to m's
1104                 // first schedule(nil) to mcpu-- and grunning--.
1105                 mp = runtime_newm();
1106                 mp->helpgc = 1;
1107                 runtime_sched.grunning++;
1108         }
1109         schedunlock();
1110 }
1111
1112 // Called to start an M.
1113 void*
1114 runtime_mstart(void* mp)
1115 {
1116         m = (M*)mp;
1117         g = m->g0;
1118
1119         initcontext();
1120
1121         g->entry = nil;
1122         g->param = nil;
1123
1124         // Record top of stack for use by mcall.
1125         // Once we call schedule we're never coming back,
1126         // so other calls can reuse this stack space.
1127 #ifdef USING_SPLIT_STACK
1128         __splitstack_getcontext(&g->stack_context[0]);
1129 #else
1130         g->gcinitial_sp = &mp;
1131         // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
1132         // is the top of the stack, not the bottom.
1133         g->gcstack_size = 0;
1134         g->gcnext_sp = &mp;
1135 #endif
1136         getcontext(&g->context);
1137
1138         if(g->entry != nil) {
1139                 // Got here from mcall.
1140                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1141                 G* gp = (G*)g->param;
1142                 pfn(gp);
1143                 *(int*)0x21 = 0x21;
1144         }
1145         runtime_minit();
1146
1147 #ifdef USING_SPLIT_STACK
1148         {
1149           int dont_block_signals = 0;
1150           __splitstack_block_signals(&dont_block_signals, nil);
1151         }
1152 #endif
1153
1154         // Install signal handlers; after minit so that minit can
1155         // prepare the thread to be able to handle the signals.
1156         if(m == &runtime_m0)
1157                 runtime_initsig();
1158
1159         schedule(nil);
1160
1161         // TODO(brainman): This point is never reached, because scheduler
1162         // does not release os threads at the moment. But once this path
1163         // is enabled, we must remove our seh here.
1164
1165         return nil;
1166 }
1167
1168 typedef struct CgoThreadStart CgoThreadStart;
1169 struct CgoThreadStart
1170 {
1171         M *m;
1172         G *g;
1173         void (*fn)(void);
1174 };
1175
1176 // Kick off new m's as needed (up to mcpumax).
1177 // Sched is locked.
1178 static void
1179 matchmg(void)
1180 {
1181         G *gp;
1182         M *mp;
1183
1184         if(m->mallocing || m->gcing)
1185                 return;
1186
1187         while(haveg() && canaddmcpu()) {
1188                 gp = gget();
1189                 if(gp == nil)
1190                         runtime_throw("gget inconsistency");
1191
1192                 // Find the m that will run gp.
1193                 if((mp = mget(gp)) == nil)
1194                         mp = runtime_newm();
1195                 mnextg(mp, gp);
1196         }
1197 }
1198
1199 // Create a new m.  It will start off with a call to runtime_mstart.
1200 M*
1201 runtime_newm(void)
1202 {
1203         M *mp;
1204         pthread_attr_t attr;
1205         pthread_t tid;
1206         size_t stacksize;
1207
1208 #if 0
1209         static const Type *mtype;  // The Go type M
1210         if(mtype == nil) {
1211                 Eface e;
1212                 runtime_gc_m_ptr(&e);
1213                 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1214         }
1215 #endif
1216
1217         mp = runtime_mal(sizeof *mp);
1218         mcommoninit(mp);
1219         mp->g0 = runtime_malg(-1, nil, nil);
1220
1221         if(pthread_attr_init(&attr) != 0)
1222                 runtime_throw("pthread_attr_init");
1223         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
1224                 runtime_throw("pthread_attr_setdetachstate");
1225
1226         stacksize = PTHREAD_STACK_MIN;
1227
1228         // With glibc before version 2.16 the static TLS size is taken
1229         // out of the stack size, and we get an error or a crash if
1230         // there is not enough stack space left.  Add it back in if we
1231         // can, in case the program uses a lot of TLS space.  FIXME:
1232         // This can be disabled in glibc 2.16 and later, if the bug is
1233         // indeed fixed then.
1234         stacksize += tlssize;
1235
1236         if(pthread_attr_setstacksize(&attr, stacksize) != 0)
1237                 runtime_throw("pthread_attr_setstacksize");
1238
1239         if(pthread_create(&tid, &attr, runtime_mstart, mp) != 0)
1240                 runtime_throw("pthread_create");
1241
1242         return mp;
1243 }
1244
1245 // One round of scheduler: find a goroutine and run it.
1246 // The argument is the goroutine that was running before
1247 // schedule was called, or nil if this is the first call.
1248 // Never returns.
1249 static void
1250 schedule(G *gp)
1251 {
1252         int32 hz;
1253         uint32 v;
1254
1255         schedlock();
1256         if(gp != nil) {
1257                 // Just finished running gp.
1258                 gp->m = nil;
1259                 runtime_sched.grunning--;
1260
1261                 // atomic { mcpu-- }
1262                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1263                 if(atomic_mcpu(v) > maxgomaxprocs)
1264                         runtime_throw("negative mcpu in scheduler");
1265
1266                 switch(gp->status) {
1267                 case Grunnable:
1268                 case Gdead:
1269                         // Shouldn't have been running!
1270                         runtime_throw("bad gp->status in sched");
1271                 case Grunning:
1272                         gp->status = Grunnable;
1273                         gput(gp);
1274                         break;
1275                 case Gmoribund:
1276                         if(raceenabled)
1277                                 runtime_racegoend(gp->goid);
1278                         gp->status = Gdead;
1279                         if(gp->lockedm) {
1280                                 gp->lockedm = nil;
1281                                 m->lockedg = nil;
1282                         }
1283                         gp->idlem = nil;
1284                         runtime_memclr(&gp->context, sizeof gp->context);
1285                         gfput(gp);
1286                         if(--runtime_sched.gcount == 0)
1287                                 runtime_exit(0);
1288                         break;
1289                 }
1290                 if(gp->readyonstop) {
1291                         gp->readyonstop = 0;
1292                         readylocked(gp);
1293                 }
1294         } else if(m->helpgc) {
1295                 // Bootstrap m or new m started by starttheworld.
1296                 // atomic { mcpu-- }
1297                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1298                 if(atomic_mcpu(v) > maxgomaxprocs)
1299                         runtime_throw("negative mcpu in scheduler");
1300                 // Compensate for increment in starttheworld().
1301                 runtime_sched.grunning--;
1302                 m->helpgc = 0;
1303         } else if(m->nextg != nil) {
1304                 // New m started by matchmg.
1305         } else {
1306                 runtime_throw("invalid m state in scheduler");
1307         }
1308
1309         // Find (or wait for) g to run.  Unlocks runtime_sched.
1310         gp = nextgandunlock();
1311         gp->readyonstop = 0;
1312         gp->status = Grunning;
1313         m->curg = gp;
1314         gp->m = m;
1315
1316         // Check whether the profiler needs to be turned on or off.
1317         hz = runtime_sched.profilehz;
1318         if(m->profilehz != hz)
1319                 runtime_resetcpuprofiler(hz);
1320
1321         runtime_gogo(gp);
1322 }
1323
1324 // Enter scheduler.  If g->status is Grunning,
1325 // re-queues g and runs everyone else who is waiting
1326 // before running g again.  If g->status is Gmoribund,
1327 // kills off g.
1328 void
1329 runtime_gosched(void)
1330 {
1331         if(m->locks != 0)
1332                 runtime_throw("gosched holding locks");
1333         if(g == m->g0)
1334                 runtime_throw("gosched of g0");
1335         runtime_mcall(schedule);
1336 }
1337
1338 // Puts the current goroutine into a waiting state and unlocks the lock.
1339 // The goroutine can be made runnable again by calling runtime_ready(gp).
1340 void
1341 runtime_park(void (*unlockf)(Lock*), Lock *lock, const char *reason)
1342 {
1343         g->status = Gwaiting;
1344         g->waitreason = reason;
1345         if(unlockf)
1346                 unlockf(lock);
1347         runtime_gosched();
1348 }
1349
1350 // The goroutine g is about to enter a system call.
1351 // Record that it's not using the cpu anymore.
1352 // This is called only from the go syscall library and cgocall,
1353 // not from the low-level system calls used by the runtime.
1354 //
1355 // Entersyscall cannot split the stack: the runtime_gosave must
1356 // make g->sched refer to the caller's stack segment, because
1357 // entersyscall is going to return immediately after.
1358 // It's okay to call matchmg and notewakeup even after
1359 // decrementing mcpu, because we haven't released the
1360 // sched lock yet, so the garbage collector cannot be running.
1361
1362 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1363
1364 void
1365 runtime_entersyscall(void)
1366 {
1367         uint32 v;
1368
1369         if(m->profilehz > 0)
1370                 runtime_setprof(false);
1371
1372         // Leave SP around for gc and traceback.
1373 #ifdef USING_SPLIT_STACK
1374         g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1375                                        &g->gcnext_segment, &g->gcnext_sp,
1376                                        &g->gcinitial_sp);
1377 #else
1378         g->gcnext_sp = (byte *) &v;
1379 #endif
1380
1381         // Save the registers in the g structure so that any pointers
1382         // held in registers will be seen by the garbage collector.
1383         getcontext(&g->gcregs);
1384
1385         g->status = Gsyscall;
1386
1387         // Fast path.
1388         // The slow path inside the schedlock/schedunlock will get
1389         // through without stopping if it does:
1390         //      mcpu--
1391         //      gwait not true
1392         //      waitstop && mcpu <= mcpumax not true
1393         // If we can do the same with a single atomic add,
1394         // then we can skip the locks.
1395         v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1396         if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
1397                 return;
1398
1399         schedlock();
1400         v = runtime_atomicload(&runtime_sched.atomic);
1401         if(atomic_gwaiting(v)) {
1402                 matchmg();
1403                 v = runtime_atomicload(&runtime_sched.atomic);
1404         }
1405         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1406                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
1407                 runtime_notewakeup(&runtime_sched.stopped);
1408         }
1409
1410         schedunlock();
1411 }
1412
1413 // The goroutine g exited its system call.
1414 // Arrange for it to run on a cpu again.
1415 // This is called only from the go syscall library, not
1416 // from the low-level system calls used by the runtime.
1417 void
1418 runtime_exitsyscall(void)
1419 {
1420         G *gp;
1421         uint32 v;
1422
1423         // Fast path.
1424         // If we can do the mcpu++ bookkeeping and
1425         // find that we still have mcpu <= mcpumax, then we can
1426         // start executing Go code immediately, without having to
1427         // schedlock/schedunlock.
1428         // Also do fast return if any locks are held, so that
1429         // panic code can use syscalls to open a file.
1430         gp = g;
1431         v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift));
1432         if((m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) || m->locks > 0) {
1433                 // There's a cpu for us, so we can run.
1434                 gp->status = Grunning;
1435                 // Garbage collector isn't running (since we are),
1436                 // so okay to clear gcstack.
1437 #ifdef USING_SPLIT_STACK
1438                 gp->gcstack = nil;
1439 #endif
1440                 gp->gcnext_sp = nil;
1441                 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1442
1443                 if(m->profilehz > 0)
1444                         runtime_setprof(true);
1445                 return;
1446         }
1447
1448         // Tell scheduler to put g back on the run queue:
1449         // mostly equivalent to g->status = Grunning,
1450         // but keeps the garbage collector from thinking
1451         // that g is running right now, which it's not.
1452         gp->readyonstop = 1;
1453
1454         // All the cpus are taken.
1455         // The scheduler will ready g and put this m to sleep.
1456         // When the scheduler takes g away from m,
1457         // it will undo the runtime_sched.mcpu++ above.
1458         runtime_gosched();
1459
1460         // Gosched returned, so we're allowed to run now.
1461         // Delete the gcstack information that we left for
1462         // the garbage collector during the system call.
1463         // Must wait until now because until gosched returns
1464         // we don't know for sure that the garbage collector
1465         // is not running.
1466 #ifdef USING_SPLIT_STACK
1467         gp->gcstack = nil;
1468 #endif
1469         gp->gcnext_sp = nil;
1470         runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1471 }
1472
1473 // Allocate a new g, with a stack big enough for stacksize bytes.
1474 G*
1475 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
1476 {
1477         G *newg;
1478
1479         newg = runtime_malloc(sizeof(G));
1480         if(stacksize >= 0) {
1481 #if USING_SPLIT_STACK
1482                 int dont_block_signals = 0;
1483
1484                 *ret_stack = __splitstack_makecontext(stacksize,
1485                                                       &newg->stack_context[0],
1486                                                       ret_stacksize);
1487                 __splitstack_block_signals_context(&newg->stack_context[0],
1488                                                    &dont_block_signals, nil);
1489 #else
1490                 *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
1491                 *ret_stacksize = stacksize;
1492                 newg->gcinitial_sp = *ret_stack;
1493                 newg->gcstack_size = stacksize;
1494                 runtime_xadd(&runtime_stacks_sys, stacksize);
1495 #endif
1496         }
1497         return newg;
1498 }
1499
1500 /* For runtime package testing.  */
1501
1502 void runtime_testing_entersyscall(void)
1503   __asm__("runtime.entersyscall");
1504
1505 void
1506 runtime_testing_entersyscall()
1507 {
1508         runtime_entersyscall();
1509 }
1510
1511 void runtime_testing_exitsyscall(void)
1512   __asm__("runtime.exitsyscall");
1513
1514 void
1515 runtime_testing_exitsyscall()
1516 {
1517         runtime_exitsyscall();
1518 }
1519
1520 G*
1521 __go_go(void (*fn)(void*), void* arg)
1522 {
1523         byte *sp;
1524         size_t spsize;
1525         G *newg;
1526         int64 goid;
1527
1528         goid = runtime_xadd64((uint64*)&runtime_sched.goidgen, 1);
1529         if(raceenabled)
1530                 runtime_racegostart(goid, runtime_getcallerpc(&fn));
1531
1532         schedlock();
1533
1534         if((newg = gfget()) != nil) {
1535 #ifdef USING_SPLIT_STACK
1536                 int dont_block_signals = 0;
1537
1538                 sp = __splitstack_resetcontext(&newg->stack_context[0],
1539                                                &spsize);
1540                 __splitstack_block_signals_context(&newg->stack_context[0],
1541                                                    &dont_block_signals, nil);
1542 #else
1543                 sp = newg->gcinitial_sp;
1544                 spsize = newg->gcstack_size;
1545                 if(spsize == 0)
1546                         runtime_throw("bad spsize in __go_go");
1547                 newg->gcnext_sp = sp;
1548 #endif
1549         } else {
1550                 newg = runtime_malg(StackMin, &sp, &spsize);
1551                 if(runtime_lastg == nil)
1552                         runtime_allg = newg;
1553                 else
1554                         runtime_lastg->alllink = newg;
1555                 runtime_lastg = newg;
1556         }
1557         newg->status = Gwaiting;
1558         newg->waitreason = "new goroutine";
1559
1560         newg->entry = (byte*)fn;
1561         newg->param = arg;
1562         newg->gopc = (uintptr)__builtin_return_address(0);
1563
1564         runtime_sched.gcount++;
1565         newg->goid = goid;
1566
1567         if(sp == nil)
1568                 runtime_throw("nil g->stack0");
1569
1570         {
1571                 // Avoid warnings about variables clobbered by
1572                 // longjmp.
1573                 byte * volatile vsp = sp;
1574                 size_t volatile vspsize = spsize;
1575                 G * volatile vnewg = newg;
1576
1577                 getcontext(&vnewg->context);
1578                 vnewg->context.uc_stack.ss_sp = vsp;
1579 #ifdef MAKECONTEXT_STACK_TOP
1580                 vnewg->context.uc_stack.ss_sp += vspsize;
1581 #endif
1582                 vnewg->context.uc_stack.ss_size = vspsize;
1583                 makecontext(&vnewg->context, kickoff, 0);
1584
1585                 newprocreadylocked(vnewg);
1586                 schedunlock();
1587
1588                 return vnewg;
1589         }
1590 }
1591
1592 // Put on gfree list.  Sched must be locked.
1593 static void
1594 gfput(G *gp)
1595 {
1596         gp->schedlink = runtime_sched.gfree;
1597         runtime_sched.gfree = gp;
1598 }
1599
1600 // Get from gfree list.  Sched must be locked.
1601 static G*
1602 gfget(void)
1603 {
1604         G *gp;
1605
1606         gp = runtime_sched.gfree;
1607         if(gp)
1608                 runtime_sched.gfree = gp->schedlink;
1609         return gp;
1610 }
1611
1612 void runtime_Gosched (void) asm ("runtime.Gosched");
1613
1614 void
1615 runtime_Gosched(void)
1616 {
1617         runtime_gosched();
1618 }
1619
1620 // Implementation of runtime.GOMAXPROCS.
1621 // delete when scheduler is stronger
1622 int32
1623 runtime_gomaxprocsfunc(int32 n)
1624 {
1625         int32 ret;
1626         uint32 v;
1627
1628         schedlock();
1629         ret = runtime_gomaxprocs;
1630         if(n <= 0)
1631                 n = ret;
1632         if(n > maxgomaxprocs)
1633                 n = maxgomaxprocs;
1634         runtime_gomaxprocs = n;
1635         if(runtime_gomaxprocs > 1)
1636                 runtime_singleproc = false;
1637         if(runtime_gcwaiting != 0) {
1638                 if(atomic_mcpumax(runtime_sched.atomic) != 1)
1639                         runtime_throw("invalid mcpumax during gc");
1640                 schedunlock();
1641                 return ret;
1642         }
1643
1644         setmcpumax(n);
1645
1646         // If there are now fewer allowed procs
1647         // than procs running, stop.
1648         v = runtime_atomicload(&runtime_sched.atomic);
1649         if((int32)atomic_mcpu(v) > n) {
1650                 schedunlock();
1651                 runtime_gosched();
1652                 return ret;
1653         }
1654         // handle more procs
1655         matchmg();
1656         schedunlock();
1657         return ret;
1658 }
1659
1660 void
1661 runtime_LockOSThread(void)
1662 {
1663         if(m == &runtime_m0 && runtime_sched.init) {
1664                 runtime_sched.lockmain = true;
1665                 return;
1666         }
1667         m->lockedg = g;
1668         g->lockedm = m;
1669 }
1670
1671 void
1672 runtime_UnlockOSThread(void)
1673 {
1674         if(m == &runtime_m0 && runtime_sched.init) {
1675                 runtime_sched.lockmain = false;
1676                 return;
1677         }
1678         m->lockedg = nil;
1679         g->lockedm = nil;
1680 }
1681
1682 bool
1683 runtime_lockedOSThread(void)
1684 {
1685         return g->lockedm != nil && m->lockedg != nil;
1686 }
1687
1688 // for testing of callbacks
1689
1690 _Bool runtime_golockedOSThread(void)
1691   asm("runtime.golockedOSThread");
1692
1693 _Bool
1694 runtime_golockedOSThread(void)
1695 {
1696         return runtime_lockedOSThread();
1697 }
1698
1699 // for testing of wire, unwire
1700 uint32
1701 runtime_mid()
1702 {
1703         return m->id;
1704 }
1705
1706 intgo runtime_NumGoroutine (void)
1707   __asm__ ("runtime.NumGoroutine");
1708
1709 intgo
1710 runtime_NumGoroutine()
1711 {
1712         return runtime_sched.gcount;
1713 }
1714
1715 int32
1716 runtime_gcount(void)
1717 {
1718         return runtime_sched.gcount;
1719 }
1720
1721 int32
1722 runtime_mcount(void)
1723 {
1724         return runtime_sched.mcount;
1725 }
1726
1727 static struct {
1728         Lock;
1729         void (*fn)(uintptr*, int32);
1730         int32 hz;
1731         uintptr pcbuf[100];
1732 } prof;
1733
1734 // Called if we receive a SIGPROF signal.
1735 void
1736 runtime_sigprof()
1737 {
1738         int32 n;
1739
1740         if(prof.fn == nil || prof.hz == 0)
1741                 return;
1742
1743         runtime_lock(&prof);
1744         if(prof.fn == nil) {
1745                 runtime_unlock(&prof);
1746                 return;
1747         }
1748         n = runtime_callers(0, prof.pcbuf, nelem(prof.pcbuf));
1749         if(n > 0)
1750                 prof.fn(prof.pcbuf, n);
1751         runtime_unlock(&prof);
1752 }
1753
1754 // Arrange to call fn with a traceback hz times a second.
1755 void
1756 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
1757 {
1758         // Force sane arguments.
1759         if(hz < 0)
1760                 hz = 0;
1761         if(hz == 0)
1762                 fn = nil;
1763         if(fn == nil)
1764                 hz = 0;
1765
1766         // Stop profiler on this cpu so that it is safe to lock prof.
1767         // if a profiling signal came in while we had prof locked,
1768         // it would deadlock.
1769         runtime_resetcpuprofiler(0);
1770
1771         runtime_lock(&prof);
1772         prof.fn = fn;
1773         prof.hz = hz;
1774         runtime_unlock(&prof);
1775         runtime_lock(&runtime_sched);
1776         runtime_sched.profilehz = hz;
1777         runtime_unlock(&runtime_sched);
1778
1779         if(hz != 0)
1780                 runtime_resetcpuprofiler(hz);
1781 }