libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <stdlib.h>
   7 #include <pthread.h>
   8 #include <unistd.h>
   9
  10 #include "config.h"
  11
  12 #ifdef HAVE_DL_ITERATE_PHDR
  13 #include <link.h>
  14 #endif
  15
  16 #include "runtime.h"
  17 #include "arch.h"
  18 #include "defs.h"
  19 #include "malloc.h"
  20 #include "go-defer.h"
  21
  22 #ifdef USING_SPLIT_STACK
  23
  24 /* FIXME: These are not declared anywhere.  */
  25
  26 extern void __splitstack_getcontext(void *context[10]);
  27
  28 extern void __splitstack_setcontext(void *context[10]);
  29
  30 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  31
  32 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  33
  34 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  35                                void **);
  36
  37 extern void __splitstack_block_signals (int *, int *);
  38
  39 extern void __splitstack_block_signals_context (void *context[10], int *,
  40                                                 int *);
  41
  42 #endif
  43
  44 #ifndef PTHREAD_STACK_MIN
  45 # define PTHREAD_STACK_MIN 8192
  46 #endif
  47
  48 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  49 # define StackMin PTHREAD_STACK_MIN
  50 #else
  51 # define StackMin 2 * 1024 * 1024
  52 #endif
  53
  54 uintptr runtime_stacks_sys;
  55
  56 static void schedule(G*);
  57
  58 static void gtraceback(G*);
  59
  60 typedef struct Sched Sched;
  61
  62 M       runtime_m0;
  63 G       runtime_g0;     // idle goroutine for m0
  64
  65 #ifdef __rtems__
  66 #define __thread
  67 #endif
  68
  69 static __thread G *g;
  70 static __thread M *m;
  71
  72 #ifndef SETCONTEXT_CLOBBERS_TLS
  73
  74 static inline void
  75 initcontext(void)
  76 {
  77 }
  78
  79 static inline void
  80 fixcontext(ucontext_t *c __attribute__ ((unused)))
  81 {
  82 }
  83
  84 # else
  85
  86 # if defined(__x86_64__) && defined(__sun__)
  87
  88 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  89 // register to that of the thread which called getcontext.  The effect
  90 // is that the address of all __thread variables changes.  This bug
  91 // also affects pthread_self() and pthread_getspecific.  We work
  92 // around it by clobbering the context field directly to keep %fs the
  93 // same.
  94
  95 static __thread greg_t fs;
  96
  97 static inline void
  98 initcontext(void)
  99 {
 100         ucontext_t c;
 101
 102         getcontext(&c);
 103         fs = c.uc_mcontext.gregs[REG_FSBASE];
 104 }
 105
 106 static inline void
 107 fixcontext(ucontext_t* c)
 108 {
 109         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 110 }
 111
 112 # else
 113
 114 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 115
 116 # endif
 117
 118 #endif
 119
 120 // We can not always refer to the TLS variables directly.  The
 121 // compiler will call tls_get_addr to get the address of the variable,
 122 // and it may hold it in a register across a call to schedule.  When
 123 // we get back from the call we may be running in a different thread,
 124 // in which case the register now points to the TLS variable for a
 125 // different thread.  We use non-inlinable functions to avoid this
 126 // when necessary.
 127
 128 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 129
 130 G*
 131 runtime_g(void)
 132 {
 133         return g;
 134 }
 135
 136 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 137
 138 M*
 139 runtime_m(void)
 140 {
 141         return m;
 142 }
 143
 144 int32   runtime_gcwaiting;
 145
 146 // The static TLS size.  See runtime_newm.
 147 static int tlssize;
 148
 149 #ifdef HAVE_DL_ITERATE_PHDR
 150
 151 // Called via dl_iterate_phdr.
 152
 153 static int
 154 addtls(struct dl_phdr_info* info, size_t size __attribute__ ((unused)), void *data)
 155 {
 156         size_t *total = (size_t *)data;
 157         unsigned int i;
 158
 159         for(i = 0; i < info->dlpi_phnum; ++i) {
 160                 if(info->dlpi_phdr[i].p_type == PT_TLS)
 161                         *total += info->dlpi_phdr[i].p_memsz;
 162         }
 163         return 0;
 164 }
 165
 166 // Set the total TLS size.
 167
 168 static void
 169 inittlssize()
 170 {
 171         size_t total = 0;
 172
 173         dl_iterate_phdr(addtls, (void *)&total);
 174         tlssize = total;
 175 }
 176
 177 #else
 178
 179 static void
 180 inittlssize()
 181 {
 182 }
 183
 184 #endif
 185
 186 // Go scheduler
 187 //
 188 // The go scheduler's job is to match ready-to-run goroutines (`g's)
 189 // with waiting-for-work schedulers (`m's).  If there are ready g's
 190 // and no waiting m's, ready() will start a new m running in a new
 191 // OS thread, so that all ready g's can run simultaneously, up to a limit.
 192 // For now, m's never go away.
 193 //
 194 // By default, Go keeps only one kernel thread (m) running user code
 195 // at a single time; other threads may be blocked in the operating system.
 196 // Setting the environment variable $GOMAXPROCS or calling
 197 // runtime.GOMAXPROCS() will change the number of user threads
 198 // allowed to execute simultaneously.  $GOMAXPROCS is thus an
 199 // approximation of the maximum number of cores to use.
 200 //
 201 // Even a program that can run without deadlock in a single process
 202 // might use more m's if given the chance.  For example, the prime
 203 // sieve will use as many m's as there are primes (up to runtime_sched.mmax),
 204 // allowing different stages of the pipeline to execute in parallel.
 205 // We could revisit this choice, only kicking off new m's for blocking
 206 // system calls, but that would limit the amount of parallel computation
 207 // that go would try to do.
 208 //
 209 // In general, one could imagine all sorts of refinements to the
 210 // scheduler, but the goal now is just to get something working on
 211 // Linux and OS X.
 212
 213 struct Sched {
 214         Lock;
 215
 216         G *gfree;       // available g's (status == Gdead)
 217         int32 goidgen;
 218
 219         G *ghead;       // g's waiting to run
 220         G *gtail;
 221         int32 gwait;    // number of g's waiting to run
 222         int32 gcount;   // number of g's that are alive
 223         int32 grunning; // number of g's running on cpu or in syscall
 224
 225         M *mhead;       // m's waiting for work
 226         int32 mwait;    // number of m's waiting for work
 227         int32 mcount;   // number of m's that have been created
 228
 229         volatile uint32 atomic; // atomic scheduling word (see below)
 230
 231         int32 profilehz;        // cpu profiling rate
 232
 233         bool init;  // running initialization
 234         bool lockmain;  // init called runtime.LockOSThread
 235
 236         Note    stopped;        // one g can set waitstop and wait here for m's to stop
 237 };
 238
 239 // The atomic word in sched is an atomic uint32 that
 240 // holds these fields.
 241 //
 242 //      [15 bits] mcpu          number of m's executing on cpu
 243 //      [15 bits] mcpumax       max number of m's allowed on cpu
 244 //      [1 bit] waitstop        some g is waiting on stopped
 245 //      [1 bit] gwaiting        gwait != 0
 246 //
 247 // These fields are the information needed by entersyscall
 248 // and exitsyscall to decide whether to coordinate with the
 249 // scheduler.  Packing them into a single machine word lets
 250 // them use a fast path with a single atomic read/write and
 251 // no lock/unlock.  This greatly reduces contention in
 252 // syscall- or cgo-heavy multithreaded programs.
 253 //
 254 // Except for entersyscall and exitsyscall, the manipulations
 255 // to these fields only happen while holding the schedlock,
 256 // so the routines holding schedlock only need to worry about
 257 // what entersyscall and exitsyscall do, not the other routines
 258 // (which also use the schedlock).
 259 //
 260 // In particular, entersyscall and exitsyscall only read mcpumax,
 261 // waitstop, and gwaiting.  They never write them.  Thus, writes to those
 262 // fields can be done (holding schedlock) without fear of write conflicts.
 263 // There may still be logic conflicts: for example, the set of waitstop must
 264 // be conditioned on mcpu >= mcpumax or else the wait may be a
 265 // spurious sleep.  The Promela model in proc.p verifies these accesses.
 266 enum {
 267         mcpuWidth = 15,
 268         mcpuMask = (1<<mcpuWidth) - 1,
 269         mcpuShift = 0,
 270         mcpumaxShift = mcpuShift + mcpuWidth,
 271         waitstopShift = mcpumaxShift + mcpuWidth,
 272         gwaitingShift = waitstopShift+1,
 273
 274         // The max value of GOMAXPROCS is constrained
 275         // by the max value we can store in the bit fields
 276         // of the atomic word.  Reserve a few high values
 277         // so that we can detect accidental decrement
 278         // beyond zero.
 279         maxgomaxprocs = mcpuMask - 10,
 280 };
 281
 282 #define atomic_mcpu(v)          (((v)>>mcpuShift)&mcpuMask)
 283 #define atomic_mcpumax(v)       (((v)>>mcpumaxShift)&mcpuMask)
 284 #define atomic_waitstop(v)      (((v)>>waitstopShift)&1)
 285 #define atomic_gwaiting(v)      (((v)>>gwaitingShift)&1)
 286
 287 Sched runtime_sched;
 288 int32 runtime_gomaxprocs;
 289 bool runtime_singleproc;
 290
 291 static bool canaddmcpu(void);
 292
 293 // An m that is waiting for notewakeup(&m->havenextg).  This may
 294 // only be accessed while the scheduler lock is held.  This is used to
 295 // minimize the number of times we call notewakeup while the scheduler
 296 // lock is held, since the m will normally move quickly to lock the
 297 // scheduler itself, producing lock contention.
 298 static M* mwakeup;
 299
 300 // Scheduling helpers.  Sched must be locked.
 301 static void gput(G*);   // put/get on ghead/gtail
 302 static G* gget(void);
 303 static void mput(M*);   // put/get on mhead
 304 static M* mget(G*);
 305 static void gfput(G*);  // put/get on gfree
 306 static G* gfget(void);
 307 static void matchmg(void);      // match m's to g's
 308 static void readylocked(G*);    // ready, but sched is locked
 309 static void mnextg(M*, G*);
 310 static void mcommoninit(M*);
 311
 312 void
 313 setmcpumax(uint32 n)
 314 {
 315         uint32 v, w;
 316
 317         for(;;) {
 318                 v = runtime_sched.atomic;
 319                 w = v;
 320                 w &= ~(mcpuMask<<mcpumaxShift);
 321                 w |= n<<mcpumaxShift;
 322                 if(runtime_cas(&runtime_sched.atomic, v, w))
 323                         break;
 324         }
 325 }
 326
 327 // First function run by a new goroutine.  This replaces gogocall.
 328 static void
 329 kickoff(void)
 330 {
 331         void (*fn)(void*);
 332
 333         fn = (void (*)(void*))(g->entry);
 334         fn(g->param);
 335         runtime_goexit();
 336 }
 337
 338 // Switch context to a different goroutine.  This is like longjmp.
 339 static void runtime_gogo(G*) __attribute__ ((noinline));
 340 static void
 341 runtime_gogo(G* newg)
 342 {
 343 #ifdef USING_SPLIT_STACK
 344         __splitstack_setcontext(&newg->stack_context[0]);
 345 #endif
 346         g = newg;
 347         newg->fromgogo = true;
 348         fixcontext(&newg->context);
 349         setcontext(&newg->context);
 350         runtime_throw("gogo setcontext returned");
 351 }
 352
 353 // Save context and call fn passing g as a parameter.  This is like
 354 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 355 // g->fromgogo as a code.  It will be true if we got here via
 356 // setcontext.  g == nil the first time this is called in a new m.
 357 static void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 358 static void
 359 runtime_mcall(void (*pfn)(G*))
 360 {
 361         M *mp;
 362         G *gp;
 363 #ifndef USING_SPLIT_STACK
 364         int i;
 365 #endif
 366
 367         // Ensure that all registers are on the stack for the garbage
 368         // collector.
 369         __builtin_unwind_init();
 370
 371         mp = m;
 372         gp = g;
 373         if(gp == mp->g0)
 374                 runtime_throw("runtime: mcall called on m->g0 stack");
 375
 376         if(gp != nil) {
 377
 378 #ifdef USING_SPLIT_STACK
 379                 __splitstack_getcontext(&g->stack_context[0]);
 380 #else
 381                 gp->gcnext_sp = &i;
 382 #endif
 383                 gp->fromgogo = false;
 384                 getcontext(&gp->context);
 385
 386                 // When we return from getcontext, we may be running
 387                 // in a new thread.  That means that m and g may have
 388                 // changed.  They are global variables so we will
 389                 // reload them, but the addresses of m and g may be
 390                 // cached in our local stack frame, and those
 391                 // addresses may be wrong.  Call functions to reload
 392                 // the values for this thread.
 393                 mp = runtime_m();
 394                 gp = runtime_g();
 395
 396                 if(gp->traceback != nil)
 397                         gtraceback(gp);
 398         }
 399         if (gp == nil || !gp->fromgogo) {
 400 #ifdef USING_SPLIT_STACK
 401                 __splitstack_setcontext(&mp->g0->stack_context[0]);
 402 #endif
 403                 mp->g0->entry = (byte*)pfn;
 404                 mp->g0->param = gp;
 405
 406                 // It's OK to set g directly here because this case
 407                 // can not occur if we got here via a setcontext to
 408                 // the getcontext call just above.
 409                 g = mp->g0;
 410
 411                 fixcontext(&mp->g0->context);
 412                 setcontext(&mp->g0->context);
 413                 runtime_throw("runtime: mcall function returned");
 414         }
 415 }
 416
 417 // Keep trace of scavenger's goroutine for deadlock detection.
 418 static G *scvg;
 419
 420 // The bootstrap sequence is:
 421 //
 422 //      call osinit
 423 //      call schedinit
 424 //      make & queue new G
 425 //      call runtime_mstart
 426 //
 427 // The new G calls runtime_main.
 428 void
 429 runtime_schedinit(void)
 430 {
 431         int32 n;
 432         const byte *p;
 433
 434         m = &runtime_m0;
 435         g = &runtime_g0;
 436         m->g0 = g;
 437         m->curg = g;
 438         g->m = m;
 439
 440         initcontext();
 441         inittlssize();
 442
 443         m->nomemprof++;
 444         runtime_mallocinit();
 445         mcommoninit(m);
 446
 447         runtime_goargs();
 448         runtime_goenvs();
 449
 450         // For debugging:
 451         // Allocate internal symbol table representation now,
 452         // so that we don't need to call malloc when we crash.
 453         // runtime_findfunc(0);
 454
 455         runtime_gomaxprocs = 1;
 456         p = runtime_getenv("GOMAXPROCS");
 457         if(p != nil && (n = runtime_atoi(p)) != 0) {
 458                 if(n > maxgomaxprocs)
 459                         n = maxgomaxprocs;
 460                 runtime_gomaxprocs = n;
 461         }
 462         // wait for the main goroutine to start before taking
 463         // GOMAXPROCS into account.
 464         setmcpumax(1);
 465         runtime_singleproc = runtime_gomaxprocs == 1;
 466
 467         canaddmcpu();   // mcpu++ to account for bootstrap m
 468         m->helpgc = 1;  // flag to tell schedule() to mcpu--
 469         runtime_sched.grunning++;
 470
 471         // Can not enable GC until all roots are registered.
 472         // mstats.enablegc = 1;
 473         m->nomemprof--;
 474 }
 475
 476 extern void main_init(void) __asm__ ("__go_init_main");
 477 extern void main_main(void) __asm__ ("main.main");
 478
 479 // The main goroutine.
 480 void
 481 runtime_main(void)
 482 {
 483         // Lock the main goroutine onto this, the main OS thread,
 484         // during initialization.  Most programs won't care, but a few
 485         // do require certain calls to be made by the main thread.
 486         // Those can arrange for main.main to run in the main thread
 487         // by calling runtime.LockOSThread during initialization
 488         // to preserve the lock.
 489         runtime_LockOSThread();
 490         // From now on, newgoroutines may use non-main threads.
 491         setmcpumax(runtime_gomaxprocs);
 492         runtime_sched.init = true;
 493         scvg = __go_go(runtime_MHeap_Scavenger, nil);
 494         main_init();
 495         runtime_sched.init = false;
 496         if(!runtime_sched.lockmain)
 497                 runtime_UnlockOSThread();
 498
 499         // For gccgo we have to wait until after main is initialized
 500         // to enable GC, because initializing main registers the GC
 501         // roots.
 502         mstats.enablegc = 1;
 503
 504         // The deadlock detection has false negatives.
 505         // Let scvg start up, to eliminate the false negative
 506         // for the trivial program func main() { select{} }.
 507         runtime_gosched();
 508
 509         main_main();
 510         runtime_exit(0);
 511         for(;;)
 512                 *(int32*)0 = 0;
 513 }
 514
 515 // Lock the scheduler.
 516 static void
 517 schedlock(void)
 518 {
 519         runtime_lock(&runtime_sched);
 520 }
 521
 522 // Unlock the scheduler.
 523 static void
 524 schedunlock(void)
 525 {
 526         M *m;
 527
 528         m = mwakeup;
 529         mwakeup = nil;
 530         runtime_unlock(&runtime_sched);
 531         if(m != nil)
 532                 runtime_notewakeup(&m->havenextg);
 533 }
 534
 535 void
 536 runtime_goexit(void)
 537 {
 538         g->status = Gmoribund;
 539         runtime_gosched();
 540 }
 541
 542 void
 543 runtime_goroutineheader(G *g)
 544 {
 545         const char *status;
 546
 547         switch(g->status) {
 548         case Gidle:
 549                 status = "idle";
 550                 break;
 551         case Grunnable:
 552                 status = "runnable";
 553                 break;
 554         case Grunning:
 555                 status = "running";
 556                 break;
 557         case Gsyscall:
 558                 status = "syscall";
 559                 break;
 560         case Gwaiting:
 561                 if(g->waitreason)
 562                         status = g->waitreason;
 563                 else
 564                         status = "waiting";
 565                 break;
 566         case Gmoribund:
 567                 status = "moribund";
 568                 break;
 569         default:
 570                 status = "???";
 571                 break;
 572         }
 573         runtime_printf("goroutine %d [%s]:\n", g->goid, status);
 574 }
 575
 576 void
 577 runtime_goroutinetrailer(G *g)
 578 {
 579         if(g != nil && g->gopc != 0 && g->goid != 1) {
 580                 struct __go_string fn;
 581                 struct __go_string file;
 582                 int line;
 583
 584                 if(__go_file_line(g->gopc - 1, &fn, &file, &line)) {
 585                         runtime_printf("created by %s\n", fn.__data);
 586                         runtime_printf("\t%s:%d\n", file.__data, line);
 587                 }
 588         }
 589 }
 590
 591 struct Traceback
 592 {
 593         G* gp;
 594         uintptr pcbuf[100];
 595         int32 c;
 596 };
 597
 598 void
 599 runtime_tracebackothers(G * volatile me)
 600 {
 601         G * volatile g;
 602         Traceback traceback;
 603
 604         traceback.gp = me;
 605         for(g = runtime_allg; g != nil; g = g->alllink) {
 606                 if(g == me || g->status == Gdead)
 607                         continue;
 608                 runtime_printf("\n");
 609                 runtime_goroutineheader(g);
 610
 611                 // Our only mechanism for doing a stack trace is
 612                 // _Unwind_Backtrace.  And that only works for the
 613                 // current thread, not for other random goroutines.
 614                 // So we need to switch context to the goroutine, get
 615                 // the backtrace, and then switch back.
 616
 617                 // This means that if g is running or in a syscall, we
 618                 // can't reliably print a stack trace.  FIXME.
 619                 if(g->status == Gsyscall || g->status == Grunning) {
 620                         runtime_printf("no stack trace available\n");
 621                         runtime_goroutinetrailer(g);
 622                         continue;
 623                 }
 624
 625                 g->traceback = &traceback;
 626
 627 #ifdef USING_SPLIT_STACK
 628                 __splitstack_getcontext(&me->stack_context[0]);
 629 #endif
 630                 getcontext(&me->context);
 631
 632                 if(g->traceback != nil) {
 633                         runtime_gogo(g);
 634                 }
 635
 636                 runtime_printtrace(traceback.pcbuf, traceback.c);
 637                 runtime_goroutinetrailer(g);
 638         }
 639 }
 640
 641 // Do a stack trace of gp, and then restore the context to
 642 // gp->dotraceback.
 643
 644 static void
 645 gtraceback(G* gp)
 646 {
 647         Traceback* traceback;
 648
 649         traceback = gp->traceback;
 650         gp->traceback = nil;
 651         traceback->c = runtime_callers(1, traceback->pcbuf,
 652                 sizeof traceback->pcbuf / sizeof traceback->pcbuf[0]);
 653         runtime_gogo(traceback->gp);
 654 }
 655
 656 // Mark this g as m's idle goroutine.
 657 // This functionality might be used in environments where programs
 658 // are limited to a single thread, to simulate a select-driven
 659 // network server.  It is not exposed via the standard runtime API.
 660 void
 661 runtime_idlegoroutine(void)
 662 {
 663         if(g->idlem != nil)
 664                 runtime_throw("g is already an idle goroutine");
 665         g->idlem = m;
 666 }
 667
 668 static void
 669 mcommoninit(M *m)
 670 {
 671         m->id = runtime_sched.mcount++;
 672         m->fastrand = 0x49f6428aUL + m->id + runtime_cputicks();
 673
 674         if(m->mcache == nil)
 675                 m->mcache = runtime_allocmcache();
 676
 677         runtime_callers(1, m->createstack, nelem(m->createstack));
 678
 679         // Add to runtime_allm so garbage collector doesn't free m
 680         // when it is just in a register or thread-local storage.
 681         m->alllink = runtime_allm;
 682         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 683         // so we need to publish it safely.
 684         runtime_atomicstorep(&runtime_allm, m);
 685 }
 686
 687 // Try to increment mcpu.  Report whether succeeded.
 688 static bool
 689 canaddmcpu(void)
 690 {
 691         uint32 v;
 692
 693         for(;;) {
 694                 v = runtime_sched.atomic;
 695                 if(atomic_mcpu(v) >= atomic_mcpumax(v))
 696                         return 0;
 697                 if(runtime_cas(&runtime_sched.atomic, v, v+(1<<mcpuShift)))
 698                         return 1;
 699         }
 700 }
 701
 702 // Put on `g' queue.  Sched must be locked.
 703 static void
 704 gput(G *g)
 705 {
 706         M *m;
 707
 708         // If g is wired, hand it off directly.
 709         if((m = g->lockedm) != nil && canaddmcpu()) {
 710                 mnextg(m, g);
 711                 return;
 712         }
 713
 714         // If g is the idle goroutine for an m, hand it off.
 715         if(g->idlem != nil) {
 716                 if(g->idlem->idleg != nil) {
 717                         runtime_printf("m%d idle out of sync: g%d g%d\n",
 718                                 g->idlem->id,
 719                                 g->idlem->idleg->goid, g->goid);
 720                         runtime_throw("runtime: double idle");
 721                 }
 722                 g->idlem->idleg = g;
 723                 return;
 724         }
 725
 726         g->schedlink = nil;
 727         if(runtime_sched.ghead == nil)
 728                 runtime_sched.ghead = g;
 729         else
 730                 runtime_sched.gtail->schedlink = g;
 731         runtime_sched.gtail = g;
 732
 733         // increment gwait.
 734         // if it transitions to nonzero, set atomic gwaiting bit.
 735         if(runtime_sched.gwait++ == 0)
 736                 runtime_xadd(&runtime_sched.atomic, 1<<gwaitingShift);
 737 }
 738
 739 // Report whether gget would return something.
 740 static bool
 741 haveg(void)
 742 {
 743         return runtime_sched.ghead != nil || m->idleg != nil;
 744 }
 745
 746 // Get from `g' queue.  Sched must be locked.
 747 static G*
 748 gget(void)
 749 {
 750         G *g;
 751
 752         g = runtime_sched.ghead;
 753         if(g){
 754                 runtime_sched.ghead = g->schedlink;
 755                 if(runtime_sched.ghead == nil)
 756                         runtime_sched.gtail = nil;
 757                 // decrement gwait.
 758                 // if it transitions to zero, clear atomic gwaiting bit.
 759                 if(--runtime_sched.gwait == 0)
 760                         runtime_xadd(&runtime_sched.atomic, -1<<gwaitingShift);
 761         } else if(m->idleg != nil) {
 762                 g = m->idleg;
 763                 m->idleg = nil;
 764         }
 765         return g;
 766 }
 767
 768 // Put on `m' list.  Sched must be locked.
 769 static void
 770 mput(M *m)
 771 {
 772         m->schedlink = runtime_sched.mhead;
 773         runtime_sched.mhead = m;
 774         runtime_sched.mwait++;
 775 }
 776
 777 // Get an `m' to run `g'.  Sched must be locked.
 778 static M*
 779 mget(G *g)
 780 {
 781         M *m;
 782
 783         // if g has its own m, use it.
 784         if(g && (m = g->lockedm) != nil)
 785                 return m;
 786
 787         // otherwise use general m pool.
 788         if((m = runtime_sched.mhead) != nil){
 789                 runtime_sched.mhead = m->schedlink;
 790                 runtime_sched.mwait--;
 791         }
 792         return m;
 793 }
 794
 795 // Mark g ready to run.
 796 void
 797 runtime_ready(G *g)
 798 {
 799         schedlock();
 800         readylocked(g);
 801         schedunlock();
 802 }
 803
 804 // Mark g ready to run.  Sched is already locked.
 805 // G might be running already and about to stop.
 806 // The sched lock protects g->status from changing underfoot.
 807 static void
 808 readylocked(G *g)
 809 {
 810         if(g->m){
 811                 // Running on another machine.
 812                 // Ready it when it stops.
 813                 g->readyonstop = 1;
 814                 return;
 815         }
 816
 817         // Mark runnable.
 818         if(g->status == Grunnable || g->status == Grunning) {
 819                 runtime_printf("goroutine %d has status %d\n", g->goid, g->status);
 820                 runtime_throw("bad g->status in ready");
 821         }
 822         g->status = Grunnable;
 823
 824         gput(g);
 825         matchmg();
 826 }
 827
 828 // Same as readylocked but a different symbol so that
 829 // debuggers can set a breakpoint here and catch all
 830 // new goroutines.
 831 static void
 832 newprocreadylocked(G *g)
 833 {
 834         readylocked(g);
 835 }
 836
 837 // Pass g to m for running.
 838 // Caller has already incremented mcpu.
 839 static void
 840 mnextg(M *m, G *g)
 841 {
 842         runtime_sched.grunning++;
 843         m->nextg = g;
 844         if(m->waitnextg) {
 845                 m->waitnextg = 0;
 846                 if(mwakeup != nil)
 847                         runtime_notewakeup(&mwakeup->havenextg);
 848                 mwakeup = m;
 849         }
 850 }
 851
 852 // Get the next goroutine that m should run.
 853 // Sched must be locked on entry, is unlocked on exit.
 854 // Makes sure that at most $GOMAXPROCS g's are
 855 // running on cpus (not in system calls) at any given time.
 856 static G*
 857 nextgandunlock(void)
 858 {
 859         G *gp;
 860         uint32 v;
 861
 862 top:
 863         if(atomic_mcpu(runtime_sched.atomic) >= maxgomaxprocs)
 864                 runtime_throw("negative mcpu");
 865
 866         // If there is a g waiting as m->nextg, the mcpu++
 867         // happened before it was passed to mnextg.
 868         if(m->nextg != nil) {
 869                 gp = m->nextg;
 870                 m->nextg = nil;
 871                 schedunlock();
 872                 return gp;
 873         }
 874
 875         if(m->lockedg != nil) {
 876                 // We can only run one g, and it's not available.
 877                 // Make sure some other cpu is running to handle
 878                 // the ordinary run queue.
 879                 if(runtime_sched.gwait != 0) {
 880                         matchmg();
 881                         // m->lockedg might have been on the queue.
 882                         if(m->nextg != nil) {
 883                                 gp = m->nextg;
 884                                 m->nextg = nil;
 885                                 schedunlock();
 886                                 return gp;
 887                         }
 888                 }
 889         } else {
 890                 // Look for work on global queue.
 891                 while(haveg() && canaddmcpu()) {
 892                         gp = gget();
 893                         if(gp == nil)
 894                                 runtime_throw("gget inconsistency");
 895
 896                         if(gp->lockedm) {
 897                                 mnextg(gp->lockedm, gp);
 898                                 continue;
 899                         }
 900                         runtime_sched.grunning++;
 901                         schedunlock();
 902                         return gp;
 903                 }
 904
 905                 // The while loop ended either because the g queue is empty
 906                 // or because we have maxed out our m procs running go
 907                 // code (mcpu >= mcpumax).  We need to check that
 908                 // concurrent actions by entersyscall/exitsyscall cannot
 909                 // invalidate the decision to end the loop.
 910                 //
 911                 // We hold the sched lock, so no one else is manipulating the
 912                 // g queue or changing mcpumax.  Entersyscall can decrement
 913                 // mcpu, but if does so when there is something on the g queue,
 914                 // the gwait bit will be set, so entersyscall will take the slow path
 915                 // and use the sched lock.  So it cannot invalidate our decision.
 916                 //
 917                 // Wait on global m queue.
 918                 mput(m);
 919         }
 920
 921         // Look for deadlock situation.
 922         // There is a race with the scavenger that causes false negatives:
 923         // if the scavenger is just starting, then we have
 924         //      scvg != nil && grunning == 0 && gwait == 0
 925         // and we do not detect a deadlock.  It is possible that we should
 926         // add that case to the if statement here, but it is too close to Go 1
 927         // to make such a subtle change.  Instead, we work around the
 928         // false negative in trivial programs by calling runtime.gosched
 929         // from the main goroutine just before main.main.
 930         // See runtime_main above.
 931         //
 932         // On a related note, it is also possible that the scvg == nil case is
 933         // wrong and should include gwait, but that does not happen in
 934         // standard Go programs, which all start the scavenger.
 935         //
 936         if((scvg == nil && runtime_sched.grunning == 0) ||
 937            (scvg != nil && runtime_sched.grunning == 1 && runtime_sched.gwait == 0 &&
 938             (scvg->status == Grunning || scvg->status == Gsyscall))) {
 939                 runtime_throw("all goroutines are asleep - deadlock!");
 940         }
 941
 942         m->nextg = nil;
 943         m->waitnextg = 1;
 944         runtime_noteclear(&m->havenextg);
 945
 946         // Stoptheworld is waiting for all but its cpu to go to stop.
 947         // Entersyscall might have decremented mcpu too, but if so
 948         // it will see the waitstop and take the slow path.
 949         // Exitsyscall never increments mcpu beyond mcpumax.
 950         v = runtime_atomicload(&runtime_sched.atomic);
 951         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
 952                 // set waitstop = 0 (known to be 1)
 953                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
 954                 runtime_notewakeup(&runtime_sched.stopped);
 955         }
 956         schedunlock();
 957
 958         runtime_notesleep(&m->havenextg);
 959         if(m->helpgc) {
 960                 runtime_gchelper();
 961                 m->helpgc = 0;
 962                 runtime_lock(&runtime_sched);
 963                 goto top;
 964         }
 965         if((gp = m->nextg) == nil)
 966                 runtime_throw("bad m->nextg in nextgoroutine");
 967         m->nextg = nil;
 968         return gp;
 969 }
 970
 971 int32
 972 runtime_helpgc(bool *extra)
 973 {
 974         M *mp;
 975         int32 n, max;
 976
 977         // Figure out how many CPUs to use.
 978         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 979         max = runtime_gomaxprocs;
 980         if(max > runtime_ncpu)
 981                 max = runtime_ncpu > 0 ? runtime_ncpu : 1;
 982         if(max > MaxGcproc)
 983                 max = MaxGcproc;
 984
 985         // We're going to use one CPU no matter what.
 986         // Figure out the max number of additional CPUs.
 987         max--;
 988
 989         runtime_lock(&runtime_sched);
 990         n = 0;
 991         while(n < max && (mp = mget(nil)) != nil) {
 992                 n++;
 993                 mp->helpgc = 1;
 994                 mp->waitnextg = 0;
 995                 runtime_notewakeup(&mp->havenextg);
 996         }
 997         runtime_unlock(&runtime_sched);
 998         if(extra)
 999                 *extra = n != max;
1000         return n;
1001 }
1002
1003 void
1004 runtime_stoptheworld(void)
1005 {
1006         uint32 v;
1007
1008         schedlock();
1009         runtime_gcwaiting = 1;
1010
1011         setmcpumax(1);
1012
1013         // while mcpu > 1
1014         for(;;) {
1015                 v = runtime_sched.atomic;
1016                 if(atomic_mcpu(v) <= 1)
1017                         break;
1018
1019                 // It would be unsafe for multiple threads to be using
1020                 // the stopped note at once, but there is only
1021                 // ever one thread doing garbage collection.
1022                 runtime_noteclear(&runtime_sched.stopped);
1023                 if(atomic_waitstop(v))
1024                         runtime_throw("invalid waitstop");
1025
1026                 // atomic { waitstop = 1 }, predicated on mcpu <= 1 check above
1027                 // still being true.
1028                 if(!runtime_cas(&runtime_sched.atomic, v, v+(1<<waitstopShift)))
1029                         continue;
1030
1031                 schedunlock();
1032                 runtime_notesleep(&runtime_sched.stopped);
1033                 schedlock();
1034         }
1035         runtime_singleproc = runtime_gomaxprocs == 1;
1036         schedunlock();
1037 }
1038
1039 void
1040 runtime_starttheworld(bool extra)
1041 {
1042         M *m;
1043
1044         schedlock();
1045         runtime_gcwaiting = 0;
1046         setmcpumax(runtime_gomaxprocs);
1047         matchmg();
1048         if(extra && canaddmcpu()) {
1049                 // Start a new m that will (we hope) be idle
1050                 // and so available to help when the next
1051                 // garbage collection happens.
1052                 // canaddmcpu above did mcpu++
1053                 // (necessary, because m will be doing various
1054                 // initialization work so is definitely running),
1055                 // but m is not running a specific goroutine,
1056                 // so set the helpgc flag as a signal to m's
1057                 // first schedule(nil) to mcpu-- and grunning--.
1058                 m = runtime_newm();
1059                 m->helpgc = 1;
1060                 runtime_sched.grunning++;
1061         }
1062         schedunlock();
1063 }
1064
1065 // Called to start an M.
1066 void*
1067 runtime_mstart(void* mp)
1068 {
1069         m = (M*)mp;
1070         g = m->g0;
1071
1072         initcontext();
1073
1074         g->entry = nil;
1075         g->param = nil;
1076
1077         // Record top of stack for use by mcall.
1078         // Once we call schedule we're never coming back,
1079         // so other calls can reuse this stack space.
1080 #ifdef USING_SPLIT_STACK
1081         __splitstack_getcontext(&g->stack_context[0]);
1082 #else
1083         g->gcinitial_sp = &mp;
1084         // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
1085         // is the top of the stack, not the bottom.
1086         g->gcstack_size = 0;
1087         g->gcnext_sp = &mp;
1088 #endif
1089         getcontext(&g->context);
1090
1091         if(g->entry != nil) {
1092                 // Got here from mcall.
1093                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1094                 G* gp = (G*)g->param;
1095                 pfn(gp);
1096                 *(int*)0x21 = 0x21;
1097         }
1098         runtime_minit();
1099
1100 #ifdef USING_SPLIT_STACK
1101         {
1102           int dont_block_signals = 0;
1103           __splitstack_block_signals(&dont_block_signals, nil);
1104         }
1105 #endif
1106
1107         // Install signal handlers; after minit so that minit can
1108         // prepare the thread to be able to handle the signals.
1109         if(m == &runtime_m0)
1110                 runtime_initsig();
1111
1112         schedule(nil);
1113         return nil;
1114 }
1115
1116 typedef struct CgoThreadStart CgoThreadStart;
1117 struct CgoThreadStart
1118 {
1119         M *m;
1120         G *g;
1121         void (*fn)(void);
1122 };
1123
1124 // Kick off new m's as needed (up to mcpumax).
1125 // Sched is locked.
1126 static void
1127 matchmg(void)
1128 {
1129         G *gp;
1130         M *mp;
1131
1132         if(m->mallocing || m->gcing)
1133                 return;
1134
1135         while(haveg() && canaddmcpu()) {
1136                 gp = gget();
1137                 if(gp == nil)
1138                         runtime_throw("gget inconsistency");
1139
1140                 // Find the m that will run gp.
1141                 if((mp = mget(gp)) == nil)
1142                         mp = runtime_newm();
1143                 mnextg(mp, gp);
1144         }
1145 }
1146
1147 // Create a new m.  It will start off with a call to runtime_mstart.
1148 M*
1149 runtime_newm(void)
1150 {
1151         M *m;
1152         pthread_attr_t attr;
1153         pthread_t tid;
1154         size_t stacksize;
1155
1156         m = runtime_malloc(sizeof(M));
1157         mcommoninit(m);
1158         m->g0 = runtime_malg(-1, nil, nil);
1159
1160         if(pthread_attr_init(&attr) != 0)
1161                 runtime_throw("pthread_attr_init");
1162         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
1163                 runtime_throw("pthread_attr_setdetachstate");
1164
1165         stacksize = PTHREAD_STACK_MIN;
1166
1167         // With glibc before version 2.16 the static TLS size is taken
1168         // out of the stack size, and we get an error or a crash if
1169         // there is not enough stack space left.  Add it back in if we
1170         // can, in case the program uses a lot of TLS space.  FIXME:
1171         // This can be disabled in glibc 2.16 and later, if the bug is
1172         // indeed fixed then.
1173         stacksize += tlssize;
1174
1175         if(pthread_attr_setstacksize(&attr, stacksize) != 0)
1176                 runtime_throw("pthread_attr_setstacksize");
1177
1178         if(pthread_create(&tid, &attr, runtime_mstart, m) != 0)
1179                 runtime_throw("pthread_create");
1180
1181         return m;
1182 }
1183
1184 // One round of scheduler: find a goroutine and run it.
1185 // The argument is the goroutine that was running before
1186 // schedule was called, or nil if this is the first call.
1187 // Never returns.
1188 static void
1189 schedule(G *gp)
1190 {
1191         int32 hz;
1192         uint32 v;
1193
1194         schedlock();
1195         if(gp != nil) {
1196                 // Just finished running gp.
1197                 gp->m = nil;
1198                 runtime_sched.grunning--;
1199
1200                 // atomic { mcpu-- }
1201                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1202                 if(atomic_mcpu(v) > maxgomaxprocs)
1203                         runtime_throw("negative mcpu in scheduler");
1204
1205                 switch(gp->status){
1206                 case Grunnable:
1207                 case Gdead:
1208                         // Shouldn't have been running!
1209                         runtime_throw("bad gp->status in sched");
1210                 case Grunning:
1211                         gp->status = Grunnable;
1212                         gput(gp);
1213                         break;
1214                 case Gmoribund:
1215                         gp->status = Gdead;
1216                         if(gp->lockedm) {
1217                                 gp->lockedm = nil;
1218                                 m->lockedg = nil;
1219                         }
1220                         gp->idlem = nil;
1221                         runtime_memclr(&gp->context, sizeof gp->context);
1222                         gfput(gp);
1223                         if(--runtime_sched.gcount == 0)
1224                                 runtime_exit(0);
1225                         break;
1226                 }
1227                 if(gp->readyonstop){
1228                         gp->readyonstop = 0;
1229                         readylocked(gp);
1230                 }
1231         } else if(m->helpgc) {
1232                 // Bootstrap m or new m started by starttheworld.
1233                 // atomic { mcpu-- }
1234                 v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1235                 if(atomic_mcpu(v) > maxgomaxprocs)
1236                         runtime_throw("negative mcpu in scheduler");
1237                 // Compensate for increment in starttheworld().
1238                 runtime_sched.grunning--;
1239                 m->helpgc = 0;
1240         } else if(m->nextg != nil) {
1241                 // New m started by matchmg.
1242         } else {
1243                 runtime_throw("invalid m state in scheduler");
1244         }
1245
1246         // Find (or wait for) g to run.  Unlocks runtime_sched.
1247         gp = nextgandunlock();
1248         gp->readyonstop = 0;
1249         gp->status = Grunning;
1250         m->curg = gp;
1251         gp->m = m;
1252
1253         // Check whether the profiler needs to be turned on or off.
1254         hz = runtime_sched.profilehz;
1255         if(m->profilehz != hz)
1256                 runtime_resetcpuprofiler(hz);
1257
1258         runtime_gogo(gp);
1259 }
1260
1261 // Enter scheduler.  If g->status is Grunning,
1262 // re-queues g and runs everyone else who is waiting
1263 // before running g again.  If g->status is Gmoribund,
1264 // kills off g.
1265 void
1266 runtime_gosched(void)
1267 {
1268         if(m->locks != 0)
1269                 runtime_throw("gosched holding locks");
1270         if(g == m->g0)
1271                 runtime_throw("gosched of g0");
1272         runtime_mcall(schedule);
1273 }
1274
1275 // The goroutine g is about to enter a system call.
1276 // Record that it's not using the cpu anymore.
1277 // This is called only from the go syscall library and cgocall,
1278 // not from the low-level system calls used by the runtime.
1279 //
1280 // Entersyscall cannot split the stack: the runtime_gosave must
1281 // make g->sched refer to the caller's stack segment, because
1282 // entersyscall is going to return immediately after.
1283 // It's okay to call matchmg and notewakeup even after
1284 // decrementing mcpu, because we haven't released the
1285 // sched lock yet, so the garbage collector cannot be running.
1286
1287 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1288
1289 void
1290 runtime_entersyscall(void)
1291 {
1292         uint32 v;
1293
1294         if(m->profilehz > 0)
1295                 runtime_setprof(false);
1296
1297         // Leave SP around for gc and traceback.
1298 #ifdef USING_SPLIT_STACK
1299         g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1300                                        &g->gcnext_segment, &g->gcnext_sp,
1301                                        &g->gcinitial_sp);
1302 #else
1303         g->gcnext_sp = (byte *) &v;
1304 #endif
1305
1306         // Save the registers in the g structure so that any pointers
1307         // held in registers will be seen by the garbage collector.
1308         getcontext(&g->gcregs);
1309
1310         g->status = Gsyscall;
1311
1312         // Fast path.
1313         // The slow path inside the schedlock/schedunlock will get
1314         // through without stopping if it does:
1315         //      mcpu--
1316         //      gwait not true
1317         //      waitstop && mcpu <= mcpumax not true
1318         // If we can do the same with a single atomic add,
1319         // then we can skip the locks.
1320         v = runtime_xadd(&runtime_sched.atomic, -1<<mcpuShift);
1321         if(!atomic_gwaiting(v) && (!atomic_waitstop(v) || atomic_mcpu(v) > atomic_mcpumax(v)))
1322                 return;
1323
1324         schedlock();
1325         v = runtime_atomicload(&runtime_sched.atomic);
1326         if(atomic_gwaiting(v)) {
1327                 matchmg();
1328                 v = runtime_atomicload(&runtime_sched.atomic);
1329         }
1330         if(atomic_waitstop(v) && atomic_mcpu(v) <= atomic_mcpumax(v)) {
1331                 runtime_xadd(&runtime_sched.atomic, -1<<waitstopShift);
1332                 runtime_notewakeup(&runtime_sched.stopped);
1333         }
1334
1335         schedunlock();
1336 }
1337
1338 // The goroutine g exited its system call.
1339 // Arrange for it to run on a cpu again.
1340 // This is called only from the go syscall library, not
1341 // from the low-level system calls used by the runtime.
1342 void
1343 runtime_exitsyscall(void)
1344 {
1345         G *gp;
1346         uint32 v;
1347
1348         // Fast path.
1349         // If we can do the mcpu++ bookkeeping and
1350         // find that we still have mcpu <= mcpumax, then we can
1351         // start executing Go code immediately, without having to
1352         // schedlock/schedunlock.
1353         // Also do fast return if any locks are held, so that
1354         // panic code can use syscalls to open a file.
1355         gp = g;
1356         v = runtime_xadd(&runtime_sched.atomic, (1<<mcpuShift));
1357         if((m->profilehz == runtime_sched.profilehz && atomic_mcpu(v) <= atomic_mcpumax(v)) || m->locks > 0) {
1358                 // There's a cpu for us, so we can run.
1359                 gp->status = Grunning;
1360                 // Garbage collector isn't running (since we are),
1361                 // so okay to clear gcstack.
1362 #ifdef USING_SPLIT_STACK
1363                 gp->gcstack = nil;
1364 #endif
1365                 gp->gcnext_sp = nil;
1366                 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1367
1368                 if(m->profilehz > 0)
1369                         runtime_setprof(true);
1370                 return;
1371         }
1372
1373         // Tell scheduler to put g back on the run queue:
1374         // mostly equivalent to g->status = Grunning,
1375         // but keeps the garbage collector from thinking
1376         // that g is running right now, which it's not.
1377         gp->readyonstop = 1;
1378
1379         // All the cpus are taken.
1380         // The scheduler will ready g and put this m to sleep.
1381         // When the scheduler takes g away from m,
1382         // it will undo the runtime_sched.mcpu++ above.
1383         runtime_gosched();
1384
1385         // Gosched returned, so we're allowed to run now.
1386         // Delete the gcstack information that we left for
1387         // the garbage collector during the system call.
1388         // Must wait until now because until gosched returns
1389         // we don't know for sure that the garbage collector
1390         // is not running.
1391 #ifdef USING_SPLIT_STACK
1392         gp->gcstack = nil;
1393 #endif
1394         gp->gcnext_sp = nil;
1395         runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
1396 }
1397
1398 // Allocate a new g, with a stack big enough for stacksize bytes.
1399 G*
1400 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
1401 {
1402         G *newg;
1403
1404         newg = runtime_malloc(sizeof(G));
1405         if(stacksize >= 0) {
1406 #if USING_SPLIT_STACK
1407                 int dont_block_signals = 0;
1408
1409                 *ret_stack = __splitstack_makecontext(stacksize,
1410                                                       &newg->stack_context[0],
1411                                                       ret_stacksize);
1412                 __splitstack_block_signals_context(&newg->stack_context[0],
1413                                                    &dont_block_signals, nil);
1414 #else
1415                 *ret_stack = runtime_mallocgc(stacksize, FlagNoProfiling|FlagNoGC, 0, 0);
1416                 *ret_stacksize = stacksize;
1417                 newg->gcinitial_sp = *ret_stack;
1418                 newg->gcstack_size = stacksize;
1419                 runtime_xadd(&runtime_stacks_sys, stacksize);
1420 #endif
1421         }
1422         return newg;
1423 }
1424
1425 /* For runtime package testing.  */
1426
1427 void runtime_testing_entersyscall(void)
1428   __asm__("runtime.entersyscall");
1429
1430 void
1431 runtime_testing_entersyscall()
1432 {
1433         runtime_entersyscall();
1434 }
1435
1436 void runtime_testing_exitsyscall(void)
1437   __asm__("runtime.exitsyscall");
1438
1439 void
1440 runtime_testing_exitsyscall()
1441 {
1442         runtime_exitsyscall();
1443 }
1444
1445 G*
1446 __go_go(void (*fn)(void*), void* arg)
1447 {
1448         byte *sp;
1449         size_t spsize;
1450         G *newg;
1451
1452         schedlock();
1453
1454         if((newg = gfget()) != nil){
1455 #ifdef USING_SPLIT_STACK
1456                 int dont_block_signals = 0;
1457
1458                 sp = __splitstack_resetcontext(&newg->stack_context[0],
1459                                                &spsize);
1460                 __splitstack_block_signals_context(&newg->stack_context[0],
1461                                                    &dont_block_signals, nil);
1462 #else
1463                 sp = newg->gcinitial_sp;
1464                 spsize = newg->gcstack_size;
1465                 if(spsize == 0)
1466                         runtime_throw("bad spsize in __go_go");
1467                 newg->gcnext_sp = sp;
1468 #endif
1469         } else {
1470                 newg = runtime_malg(StackMin, &sp, &spsize);
1471                 if(runtime_lastg == nil)
1472                         runtime_allg = newg;
1473                 else
1474                         runtime_lastg->alllink = newg;
1475                 runtime_lastg = newg;
1476         }
1477         newg->status = Gwaiting;
1478         newg->waitreason = "new goroutine";
1479
1480         newg->entry = (byte*)fn;
1481         newg->param = arg;
1482         newg->gopc = (uintptr)__builtin_return_address(0);
1483
1484         runtime_sched.gcount++;
1485         runtime_sched.goidgen++;
1486         newg->goid = runtime_sched.goidgen;
1487
1488         if(sp == nil)
1489                 runtime_throw("nil g->stack0");
1490
1491         {
1492                 // Avoid warnings about variables clobbered by
1493                 // longjmp.
1494                 byte * volatile vsp = sp;
1495                 size_t volatile vspsize = spsize;
1496                 G * volatile vnewg = newg;
1497
1498                 getcontext(&vnewg->context);
1499                 vnewg->context.uc_stack.ss_sp = vsp;
1500 #ifdef MAKECONTEXT_STACK_TOP
1501                 vnewg->context.uc_stack.ss_sp += vspsize;
1502 #endif
1503                 vnewg->context.uc_stack.ss_size = vspsize;
1504                 makecontext(&vnewg->context, kickoff, 0);
1505
1506                 newprocreadylocked(vnewg);
1507                 schedunlock();
1508
1509                 return vnewg;
1510         }
1511 }
1512
1513 // Put on gfree list.  Sched must be locked.
1514 static void
1515 gfput(G *g)
1516 {
1517         g->schedlink = runtime_sched.gfree;
1518         runtime_sched.gfree = g;
1519 }
1520
1521 // Get from gfree list.  Sched must be locked.
1522 static G*
1523 gfget(void)
1524 {
1525         G *g;
1526
1527         g = runtime_sched.gfree;
1528         if(g)
1529                 runtime_sched.gfree = g->schedlink;
1530         return g;
1531 }
1532
1533 // Run all deferred functions for the current goroutine.
1534 static void
1535 rundefer(void)
1536 {
1537         Defer *d;
1538
1539         while((d = g->defer) != nil) {
1540                 void (*pfn)(void*);
1541
1542                 pfn = d->__pfn;
1543                 d->__pfn = nil;
1544                 if (pfn != nil)
1545                         (*pfn)(d->__arg);
1546                 g->defer = d->__next;
1547                 runtime_free(d);
1548         }
1549 }
1550
1551 void runtime_Goexit (void) asm ("runtime.Goexit");
1552
1553 void
1554 runtime_Goexit(void)
1555 {
1556         rundefer();
1557         runtime_goexit();
1558 }
1559
1560 void runtime_Gosched (void) asm ("runtime.Gosched");
1561
1562 void
1563 runtime_Gosched(void)
1564 {
1565         runtime_gosched();
1566 }
1567
1568 // Implementation of runtime.GOMAXPROCS.
1569 // delete when scheduler is stronger
1570 int32
1571 runtime_gomaxprocsfunc(int32 n)
1572 {
1573         int32 ret;
1574         uint32 v;
1575
1576         schedlock();
1577         ret = runtime_gomaxprocs;
1578         if(n <= 0)
1579                 n = ret;
1580         if(n > maxgomaxprocs)
1581                 n = maxgomaxprocs;
1582         runtime_gomaxprocs = n;
1583         if(runtime_gomaxprocs > 1)
1584                 runtime_singleproc = false;
1585         if(runtime_gcwaiting != 0) {
1586                 if(atomic_mcpumax(runtime_sched.atomic) != 1)
1587                         runtime_throw("invalid mcpumax during gc");
1588                 schedunlock();
1589                 return ret;
1590         }
1591
1592         setmcpumax(n);
1593
1594         // If there are now fewer allowed procs
1595         // than procs running, stop.
1596         v = runtime_atomicload(&runtime_sched.atomic);
1597         if((int32)atomic_mcpu(v) > n) {
1598                 schedunlock();
1599                 runtime_gosched();
1600                 return ret;
1601         }
1602         // handle more procs
1603         matchmg();
1604         schedunlock();
1605         return ret;
1606 }
1607
1608 void
1609 runtime_LockOSThread(void)
1610 {
1611         if(m == &runtime_m0 && runtime_sched.init) {
1612                 runtime_sched.lockmain = true;
1613                 return;
1614         }
1615         m->lockedg = g;
1616         g->lockedm = m;
1617 }
1618
1619 void
1620 runtime_UnlockOSThread(void)
1621 {
1622         if(m == &runtime_m0 && runtime_sched.init) {
1623                 runtime_sched.lockmain = false;
1624                 return;
1625         }
1626         m->lockedg = nil;
1627         g->lockedm = nil;
1628 }
1629
1630 bool
1631 runtime_lockedOSThread(void)
1632 {
1633         return g->lockedm != nil && m->lockedg != nil;
1634 }
1635
1636 // for testing of callbacks
1637
1638 _Bool runtime_golockedOSThread(void)
1639   asm("runtime.golockedOSThread");
1640
1641 _Bool
1642 runtime_golockedOSThread(void)
1643 {
1644         return runtime_lockedOSThread();
1645 }
1646
1647 // for testing of wire, unwire
1648 uint32
1649 runtime_mid()
1650 {
1651         return m->id;
1652 }
1653
1654 int32 runtime_NumGoroutine (void)
1655   __asm__ ("runtime.NumGoroutine");
1656
1657 int32
1658 runtime_NumGoroutine()
1659 {
1660         return runtime_sched.gcount;
1661 }
1662
1663 int32
1664 runtime_gcount(void)
1665 {
1666         return runtime_sched.gcount;
1667 }
1668
1669 int32
1670 runtime_mcount(void)
1671 {
1672         return runtime_sched.mcount;
1673 }
1674
1675 static struct {
1676         Lock;
1677         void (*fn)(uintptr*, int32);
1678         int32 hz;
1679         uintptr pcbuf[100];
1680 } prof;
1681
1682 // Called if we receive a SIGPROF signal.
1683 void
1684 runtime_sigprof(uint8 *pc __attribute__ ((unused)),
1685                 uint8 *sp __attribute__ ((unused)),
1686                 uint8 *lr __attribute__ ((unused)),
1687                 G *gp __attribute__ ((unused)))
1688 {
1689         int32 n;
1690
1691         if(prof.fn == nil || prof.hz == 0)
1692                 return;
1693
1694         runtime_lock(&prof);
1695         if(prof.fn == nil) {
1696                 runtime_unlock(&prof);
1697                 return;
1698         }
1699         n = runtime_callers(0, prof.pcbuf, nelem(prof.pcbuf));
1700         if(n > 0)
1701                 prof.fn(prof.pcbuf, n);
1702         runtime_unlock(&prof);
1703 }
1704
1705 // Arrange to call fn with a traceback hz times a second.
1706 void
1707 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
1708 {
1709         // Force sane arguments.
1710         if(hz < 0)
1711                 hz = 0;
1712         if(hz == 0)
1713                 fn = nil;
1714         if(fn == nil)
1715                 hz = 0;
1716
1717         // Stop profiler on this cpu so that it is safe to lock prof.
1718         // if a profiling signal came in while we had prof locked,
1719         // it would deadlock.
1720         runtime_resetcpuprofiler(0);
1721
1722         runtime_lock(&prof);
1723         prof.fn = fn;
1724         prof.hz = hz;
1725         runtime_unlock(&prof);
1726         runtime_lock(&runtime_sched);
1727         runtime_sched.profilehz = hz;
1728         runtime_unlock(&runtime_sched);
1729
1730         if(hz != 0)
1731                 runtime_resetcpuprofiler(hz);
1732 }