libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <signal.h>
   7 #include <stdlib.h>
   8 #include <pthread.h>
   9 #include <unistd.h>
  10
  11 #include "config.h"
  12
  13 #ifdef HAVE_DL_ITERATE_PHDR
  14 #include <link.h>
  15 #endif
  16
  17 #include "runtime.h"
  18 #include "arch.h"
  19 #include "defs.h"
  20 #include "malloc.h"
  21 #include "go-type.h"
  22
  23 #ifdef USING_SPLIT_STACK
  24
  25 /* FIXME: These are not declared anywhere.  */
  26
  27 extern void __splitstack_getcontext(void *context[10]);
  28
  29 extern void __splitstack_setcontext(void *context[10]);
  30
  31 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  32
  33 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  34
  35 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  36                                void **);
  37
  38 extern void __splitstack_block_signals (int *, int *);
  39
  40 extern void __splitstack_block_signals_context (void *context[10], int *,
  41                                                 int *);
  42
  43 #endif
  44
  45 #ifndef PTHREAD_STACK_MIN
  46 # define PTHREAD_STACK_MIN 8192
  47 #endif
  48
  49 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  50 # define StackMin PTHREAD_STACK_MIN
  51 #else
  52 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
  53 #endif
  54
  55 uintptr runtime_stacks_sys;
  56
  57 static void gtraceback(G*);
  58
  59 #ifdef __rtems__
  60 #define __thread
  61 #endif
  62
  63 static __thread G *g;
  64
  65 #ifndef SETCONTEXT_CLOBBERS_TLS
  66
  67 static inline void
  68 initcontext(void)
  69 {
  70 }
  71
  72 static inline void
  73 fixcontext(ucontext_t *c __attribute__ ((unused)))
  74 {
  75 }
  76
  77 #else
  78
  79 # if defined(__x86_64__) && defined(__sun__)
  80
  81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  82 // register to that of the thread which called getcontext.  The effect
  83 // is that the address of all __thread variables changes.  This bug
  84 // also affects pthread_self() and pthread_getspecific.  We work
  85 // around it by clobbering the context field directly to keep %fs the
  86 // same.
  87
  88 static __thread greg_t fs;
  89
  90 static inline void
  91 initcontext(void)
  92 {
  93         ucontext_t c;
  94
  95         getcontext(&c);
  96         fs = c.uc_mcontext.gregs[REG_FSBASE];
  97 }
  98
  99 static inline void
 100 fixcontext(ucontext_t* c)
 101 {
 102         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 103 }
 104
 105 # elif defined(__NetBSD__)
 106
 107 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
 108 // and restore it ourselves.
 109
 110 static __thread __greg_t tlsbase;
 111
 112 static inline void
 113 initcontext(void)
 114 {
 115         ucontext_t c;
 116
 117         getcontext(&c);
 118         tlsbase = c.uc_mcontext._mc_tlsbase;
 119 }
 120
 121 static inline void
 122 fixcontext(ucontext_t* c)
 123 {
 124         c->uc_mcontext._mc_tlsbase = tlsbase;
 125 }
 126
 127 # elif defined(__sparc__)
 128
 129 static inline void
 130 initcontext(void)
 131 {
 132 }
 133
 134 static inline void
 135 fixcontext(ucontext_t *c)
 136 {
 137         /* ??? Using
 138              register unsigned long thread __asm__("%g7");
 139              c->uc_mcontext.gregs[REG_G7] = thread;
 140            results in
 141              error: variable ‘thread’ might be clobbered by \
 142                 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
 143            which ought to be false, as %g7 is a fixed register.  */
 144
 145         if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8)
 146                 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 147         else
 148                 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 149 }
 150
 151 # else
 152
 153 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 154
 155 # endif
 156
 157 #endif
 158
 159 // ucontext_arg returns a properly aligned ucontext_t value.  On some
 160 // systems a ucontext_t value must be aligned to a 16-byte boundary.
 161 // The g structure that has fields of type ucontext_t is defined in
 162 // Go, and Go has no simple way to align a field to such a boundary.
 163 // So we make the field larger in runtime2.go and pick an appropriate
 164 // offset within the field here.
 165 static ucontext_t*
 166 ucontext_arg(void** go_ucontext)
 167 {
 168         uintptr_t p = (uintptr_t)go_ucontext;
 169         size_t align = __alignof__(ucontext_t);
 170         if(align > 16) {
 171                 // We only ensured space for up to a 16 byte alignment
 172                 // in libgo/go/runtime/runtime2.go.
 173                 runtime_throw("required alignment of ucontext_t too large");
 174         }
 175         p = (p + align - 1) &~ (uintptr_t)(align - 1);
 176         return (ucontext_t*)p;
 177 }
 178
 179 // We can not always refer to the TLS variables directly.  The
 180 // compiler will call tls_get_addr to get the address of the variable,
 181 // and it may hold it in a register across a call to schedule.  When
 182 // we get back from the call we may be running in a different thread,
 183 // in which case the register now points to the TLS variable for a
 184 // different thread.  We use non-inlinable functions to avoid this
 185 // when necessary.
 186
 187 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 188
 189 G*
 190 runtime_g(void)
 191 {
 192         return g;
 193 }
 194
 195 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 196
 197 M*
 198 runtime_m(void)
 199 {
 200         if(g == nil)
 201                 return nil;
 202         return g->m;
 203 }
 204
 205 // Set g.
 206 void
 207 runtime_setg(G* gp)
 208 {
 209         g = gp;
 210 }
 211
 212 // Start a new thread.
 213 static void
 214 runtime_newosproc(M *mp)
 215 {
 216         pthread_attr_t attr;
 217         sigset_t clear, old;
 218         pthread_t tid;
 219         int ret;
 220
 221         if(pthread_attr_init(&attr) != 0)
 222                 runtime_throw("pthread_attr_init");
 223         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 224                 runtime_throw("pthread_attr_setdetachstate");
 225
 226         // Block signals during pthread_create so that the new thread
 227         // starts with signals disabled.  It will enable them in minit.
 228         sigfillset(&clear);
 229
 230 #ifdef SIGTRAP
 231         // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
 232         sigdelset(&clear, SIGTRAP);
 233 #endif
 234
 235         sigemptyset(&old);
 236         pthread_sigmask(SIG_BLOCK, &clear, &old);
 237         ret = pthread_create(&tid, &attr, runtime_mstart, mp);
 238         pthread_sigmask(SIG_SETMASK, &old, nil);
 239
 240         if (ret != 0)
 241                 runtime_throw("pthread_create");
 242 }
 243
 244 // First function run by a new goroutine.  This replaces gogocall.
 245 static void
 246 kickoff(void)
 247 {
 248         void (*fn)(void*);
 249         void *param;
 250
 251         if(g->traceback != nil)
 252                 gtraceback(g);
 253
 254         fn = (void (*)(void*))(g->entry);
 255         param = g->param;
 256         g->param = nil;
 257         fn(param);
 258         runtime_goexit1();
 259 }
 260
 261 // Switch context to a different goroutine.  This is like longjmp.
 262 void runtime_gogo(G*) __attribute__ ((noinline));
 263 void
 264 runtime_gogo(G* newg)
 265 {
 266 #ifdef USING_SPLIT_STACK
 267         __splitstack_setcontext(&newg->stackcontext[0]);
 268 #endif
 269         g = newg;
 270         newg->fromgogo = true;
 271         fixcontext(ucontext_arg(&newg->context[0]));
 272         setcontext(ucontext_arg(&newg->context[0]));
 273         runtime_throw("gogo setcontext returned");
 274 }
 275
 276 // Save context and call fn passing g as a parameter.  This is like
 277 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 278 // g->fromgogo as a code.  It will be true if we got here via
 279 // setcontext.  g == nil the first time this is called in a new m.
 280 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 281 void
 282 runtime_mcall(void (*pfn)(G*))
 283 {
 284         M *mp;
 285         G *gp;
 286 #ifndef USING_SPLIT_STACK
 287         void *afterregs;
 288 #endif
 289
 290         // Ensure that all registers are on the stack for the garbage
 291         // collector.
 292         __builtin_unwind_init();
 293
 294         gp = g;
 295         mp = gp->m;
 296         if(gp == mp->g0)
 297                 runtime_throw("runtime: mcall called on m->g0 stack");
 298
 299         if(gp != nil) {
 300
 301 #ifdef USING_SPLIT_STACK
 302                 __splitstack_getcontext(&g->stackcontext[0]);
 303 #else
 304                 // We have to point to an address on the stack that is
 305                 // below the saved registers.
 306                 gp->gcnextsp = &afterregs;
 307 #endif
 308                 gp->fromgogo = false;
 309                 getcontext(ucontext_arg(&gp->context[0]));
 310
 311                 // When we return from getcontext, we may be running
 312                 // in a new thread.  That means that g may have
 313                 // changed.  It is a global variables so we will
 314                 // reload it, but the address of g may be cached in
 315                 // our local stack frame, and that address may be
 316                 // wrong.  Call the function to reload the value for
 317                 // this thread.
 318                 gp = runtime_g();
 319                 mp = gp->m;
 320
 321                 if(gp->traceback != nil)
 322                         gtraceback(gp);
 323         }
 324         if (gp == nil || !gp->fromgogo) {
 325 #ifdef USING_SPLIT_STACK
 326                 __splitstack_setcontext(&mp->g0->stackcontext[0]);
 327 #endif
 328                 mp->g0->entry = (byte*)pfn;
 329                 mp->g0->param = gp;
 330
 331                 // It's OK to set g directly here because this case
 332                 // can not occur if we got here via a setcontext to
 333                 // the getcontext call just above.
 334                 g = mp->g0;
 335
 336                 fixcontext(ucontext_arg(&mp->g0->context[0]));
 337                 setcontext(ucontext_arg(&mp->g0->context[0]));
 338                 runtime_throw("runtime: mcall function returned");
 339         }
 340 }
 341
 342 // Goroutine scheduler
 343 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
 344 //
 345 // The main concepts are:
 346 // G - goroutine.
 347 // M - worker thread, or machine.
 348 // P - processor, a resource that is required to execute Go code.
 349 //     M must have an associated P to execute Go code, however it can be
 350 //     blocked or in a syscall w/o an associated P.
 351 //
 352 // Design doc at http://golang.org/s/go11sched.
 353
 354 enum
 355 {
 356         // Number of goroutine ids to grab from runtime_sched->goidgen to local per-P cache at once.
 357         // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
 358         GoidCacheBatch = 16,
 359 };
 360
 361 extern Sched* runtime_getsched() __asm__ (GOSYM_PREFIX "runtime.getsched");
 362 extern bool* runtime_getCgoHasExtraM()
 363   __asm__ (GOSYM_PREFIX "runtime.getCgoHasExtraM");
 364 extern P** runtime_getAllP()
 365   __asm__ (GOSYM_PREFIX "runtime.getAllP");
 366 extern G* allocg(void)
 367   __asm__ (GOSYM_PREFIX "runtime.allocg");
 368
 369 Sched*  runtime_sched;
 370 int32   runtime_gomaxprocs;
 371 M       runtime_m0;
 372 G       runtime_g0;     // idle goroutine for m0
 373 G*      runtime_lastg;
 374 M*      runtime_allm;
 375 P**     runtime_allp;
 376 int8*   runtime_goos;
 377 int32   runtime_ncpu;
 378 bool    runtime_precisestack;
 379 static int32    newprocs;
 380
 381 bool    runtime_isarchive;
 382
 383 void* runtime_mstart(void*);
 384 static void runqput(P*, G*);
 385 static G* runqget(P*);
 386 static bool runqputslow(P*, G*, uint32, uint32);
 387 static G* runqsteal(P*, P*);
 388 static void mput(M*);
 389 static M* mget(void);
 390 static void mcommoninit(M*);
 391 static void schedule(void);
 392 static void procresize(int32);
 393 static void acquirep(P*);
 394 static P* releasep(void);
 395 static void newm(void(*)(void), P*);
 396 static void stopm(void);
 397 static void startm(P*, bool);
 398 static void handoffp(P*);
 399 static void wakep(void);
 400 static void stoplockedm(void);
 401 static void startlockedm(G*);
 402 static void sysmon(void);
 403 static uint32 retake(int64);
 404 static void incidlelocked(int32);
 405 static void exitsyscall0(G*);
 406 static void park0(G*);
 407 static void goexit0(G*);
 408 static void gfput(P*, G*);
 409 static G* gfget(P*);
 410 static void gfpurge(P*);
 411 static void globrunqput(G*);
 412 static void globrunqputbatch(G*, G*, int32);
 413 static G* globrunqget(P*, int32);
 414 static P* pidleget(void);
 415 static void pidleput(P*);
 416 static void injectglist(G*);
 417 static bool preemptall(void);
 418 static bool exitsyscallfast(void);
 419
 420 void allgadd(G*)
 421   __asm__(GOSYM_PREFIX "runtime.allgadd");
 422 void checkdead(void)
 423   __asm__(GOSYM_PREFIX "runtime.checkdead");
 424
 425 bool runtime_isstarted;
 426
 427 // The bootstrap sequence is:
 428 //
 429 //      call osinit
 430 //      call schedinit
 431 //      make & queue new G
 432 //      call runtime_mstart
 433 //
 434 // The new G calls runtime_main.
 435 void
 436 runtime_schedinit(void)
 437 {
 438         M *m;
 439         int32 n, procs;
 440         String s;
 441         const byte *p;
 442         Eface i;
 443
 444         runtime_sched = runtime_getsched();
 445
 446         m = &runtime_m0;
 447         g = &runtime_g0;
 448         m->g0 = g;
 449         m->curg = g;
 450         g->m = m;
 451
 452         initcontext();
 453
 454         runtime_sched->maxmcount = 10000;
 455         runtime_precisestack = 0;
 456
 457         // runtime_symtabinit();
 458         runtime_mallocinit();
 459         mcommoninit(m);
 460         runtime_alginit(); // maps must not be used before this call
 461
 462         // Initialize the itable value for newErrorCString,
 463         // so that the next time it gets called, possibly
 464         // in a fault during a garbage collection, it will not
 465         // need to allocated memory.
 466         runtime_newErrorCString(0, &i);
 467
 468         // Initialize the cached gotraceback value, since
 469         // gotraceback calls getenv, which mallocs on Plan 9.
 470         runtime_gotraceback(nil);
 471
 472         runtime_goargs();
 473         runtime_goenvs();
 474         runtime_parsedebugvars();
 475
 476         runtime_sched->lastpoll = runtime_nanotime();
 477         procs = 1;
 478         s = runtime_getenv("GOMAXPROCS");
 479         p = s.str;
 480         if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
 481                 if(n > _MaxGomaxprocs)
 482                         n = _MaxGomaxprocs;
 483                 procs = n;
 484         }
 485         runtime_allp = runtime_getAllP();
 486         procresize(procs);
 487
 488         // Can not enable GC until all roots are registered.
 489         // mstats()->enablegc = 1;
 490 }
 491
 492 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
 493 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
 494
 495 // Used to determine the field alignment.
 496
 497 struct field_align
 498 {
 499   char c;
 500   Hchan *p;
 501 };
 502
 503 static void
 504 initDone(void *arg __attribute__ ((unused))) {
 505         runtime_unlockOSThread();
 506 };
 507
 508 // The main goroutine.
 509 // Note: C frames in general are not copyable during stack growth, for two reasons:
 510 //   1) We don't know where in a frame to find pointers to other stack locations.
 511 //   2) There's no guarantee that globals or heap values do not point into the frame.
 512 //
 513 // The C frame for runtime.main is copyable, because:
 514 //   1) There are no pointers to other stack locations in the frame
 515 //      (d.fn points at a global, d.link is nil, d.argp is -1).
 516 //   2) The only pointer into this frame is from the defer chain,
 517 //      which is explicitly handled during stack copying.
 518 void
 519 runtime_main(void* dummy __attribute__((unused)))
 520 {
 521         Defer d;
 522         _Bool frame;
 523
 524         newm(sysmon, nil);
 525
 526         // Lock the main goroutine onto this, the main OS thread,
 527         // during initialization.  Most programs won't care, but a few
 528         // do require certain calls to be made by the main thread.
 529         // Those can arrange for main.main to run in the main thread
 530         // by calling runtime.LockOSThread during initialization
 531         // to preserve the lock.
 532         runtime_lockOSThread();
 533
 534         // Defer unlock so that runtime.Goexit during init does the unlock too.
 535         d.pfn = (uintptr)(void*)initDone;
 536         d.link = g->_defer;
 537         d.arg = (void*)-1;
 538         d._panic = g->_panic;
 539         d.retaddr = 0;
 540         d.makefunccanrecover = 0;
 541         d.frame = &frame;
 542         d.special = true;
 543         g->_defer = &d;
 544
 545         if(g->m != &runtime_m0)
 546                 runtime_throw("runtime_main not on m0");
 547         __go_go(runtime_MHeap_Scavenger, nil);
 548
 549         makeMainInitDone();
 550
 551         _cgo_notify_runtime_init_done();
 552
 553         main_init();
 554
 555         closeMainInitDone();
 556
 557         if(g->_defer != &d || (void*)d.pfn != initDone)
 558                 runtime_throw("runtime: bad defer entry after init");
 559         g->_defer = d.link;
 560         runtime_unlockOSThread();
 561
 562         // For gccgo we have to wait until after main is initialized
 563         // to enable GC, because initializing main registers the GC
 564         // roots.
 565         mstats()->enablegc = 1;
 566
 567         if(runtime_isarchive) {
 568                 // This is not a complete program, but is instead a
 569                 // library built using -buildmode=c-archive or
 570                 // c-shared.  Now that we are initialized, there is
 571                 // nothing further to do.
 572                 return;
 573         }
 574
 575         main_main();
 576
 577         // Make racy client program work: if panicking on
 578         // another goroutine at the same time as main returns,
 579         // let the other goroutine finish printing the panic trace.
 580         // Once it does, it will exit. See issue 3934.
 581         if(runtime_panicking())
 582                 runtime_park(nil, nil, "panicwait");
 583
 584         runtime_exit(0);
 585         for(;;)
 586                 *(int32*)0 = 0;
 587 }
 588
 589 void getTraceback(G*, G*) __asm__(GOSYM_PREFIX "runtime.getTraceback");
 590
 591 // getTraceback stores a traceback of gp in the g's traceback field
 592 // and then returns to me.  We expect that gp's traceback is not nil.
 593 // It works by saving me's current context, and checking gp's traceback field.
 594 // If gp's traceback field is not nil, it starts running gp.
 595 // In places where we call getcontext, we check the traceback field.
 596 // If it is not nil, we collect a traceback, and then return to the
 597 // goroutine stored in the traceback field, which is me.
 598 void getTraceback(G* me, G* gp)
 599 {
 600 #ifdef USING_SPLIT_STACK
 601         __splitstack_getcontext(&me->stackcontext[0]);
 602 #endif
 603         getcontext(ucontext_arg(&me->stackcontext[0]));
 604
 605         if (gp->traceback != nil) {
 606                 runtime_gogo(gp);
 607         }
 608 }
 609
 610 static void
 611 checkmcount(void)
 612 {
 613         // sched lock is held
 614         if(runtime_sched->mcount > runtime_sched->maxmcount) {
 615                 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched->maxmcount);
 616                 runtime_throw("thread exhaustion");
 617         }
 618 }
 619
 620 // Do a stack trace of gp, and then restore the context to
 621 // gp->dotraceback.
 622
 623 static void
 624 gtraceback(G* gp)
 625 {
 626         Traceback* traceback;
 627
 628         traceback = gp->traceback;
 629         gp->traceback = nil;
 630         if(gp->m != nil)
 631                 runtime_throw("gtraceback: m is not nil");
 632         gp->m = traceback->gp->m;
 633         traceback->c = runtime_callers(1, traceback->locbuf,
 634                 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
 635         gp->m = nil;
 636         runtime_gogo(traceback->gp);
 637 }
 638
 639 static void
 640 mcommoninit(M *mp)
 641 {
 642         // If there is no mcache runtime_callers() will crash,
 643         // and we are most likely in sysmon thread so the stack is senseless anyway.
 644         if(g->m->mcache)
 645                 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
 646
 647         mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
 648
 649         runtime_lock(&runtime_sched->lock);
 650         mp->id = runtime_sched->mcount++;
 651         checkmcount();
 652         runtime_mpreinit(mp);
 653
 654         // Add to runtime_allm so garbage collector doesn't free m
 655         // when it is just in a register or thread-local storage.
 656         mp->alllink = runtime_allm;
 657         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 658         // so we need to publish it safely.
 659         runtime_atomicstorep(&runtime_allm, mp);
 660         runtime_unlock(&runtime_sched->lock);
 661 }
 662
 663 // Mark gp ready to run.
 664 void
 665 runtime_ready(G *gp)
 666 {
 667         // Mark runnable.
 668         g->m->locks++;  // disable preemption because it can be holding p in a local var
 669         if(gp->atomicstatus != _Gwaiting) {
 670                 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus);
 671                 runtime_throw("bad g->atomicstatus in ready");
 672         }
 673         gp->atomicstatus = _Grunnable;
 674         runqput((P*)g->m->p, gp);
 675         if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0)  // TODO: fast atomic
 676                 wakep();
 677         g->m->locks--;
 678 }
 679
 680 void goready(G*, int) __asm__ (GOSYM_PREFIX "runtime.goready");
 681
 682 void
 683 goready(G* gp, int traceskip __attribute__ ((unused)))
 684 {
 685         runtime_ready(gp);
 686 }
 687
 688 int32
 689 runtime_gcprocs(void)
 690 {
 691         int32 n;
 692
 693         // Figure out how many CPUs to use during GC.
 694         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 695         runtime_lock(&runtime_sched->lock);
 696         n = runtime_gomaxprocs;
 697         if(n > runtime_ncpu)
 698                 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
 699         if(n > MaxGcproc)
 700                 n = MaxGcproc;
 701         if(n > runtime_sched->nmidle+1) // one M is currently running
 702                 n = runtime_sched->nmidle+1;
 703         runtime_unlock(&runtime_sched->lock);
 704         return n;
 705 }
 706
 707 static bool
 708 needaddgcproc(void)
 709 {
 710         int32 n;
 711
 712         runtime_lock(&runtime_sched->lock);
 713         n = runtime_gomaxprocs;
 714         if(n > runtime_ncpu)
 715                 n = runtime_ncpu;
 716         if(n > MaxGcproc)
 717                 n = MaxGcproc;
 718         n -= runtime_sched->nmidle+1; // one M is currently running
 719         runtime_unlock(&runtime_sched->lock);
 720         return n > 0;
 721 }
 722
 723 void
 724 runtime_helpgc(int32 nproc)
 725 {
 726         M *mp;
 727         int32 n, pos;
 728
 729         runtime_lock(&runtime_sched->lock);
 730         pos = 0;
 731         for(n = 1; n < nproc; n++) {  // one M is currently running
 732                 if(runtime_allp[pos]->mcache == g->m->mcache)
 733                         pos++;
 734                 mp = mget();
 735                 if(mp == nil)
 736                         runtime_throw("runtime_gcprocs inconsistency");
 737                 mp->helpgc = n;
 738                 mp->mcache = runtime_allp[pos]->mcache;
 739                 pos++;
 740                 runtime_notewakeup(&mp->park);
 741         }
 742         runtime_unlock(&runtime_sched->lock);
 743 }
 744
 745 // Similar to stoptheworld but best-effort and can be called several times.
 746 // There is no reverse operation, used during crashing.
 747 // This function must not lock any mutexes.
 748 void
 749 runtime_freezetheworld(void)
 750 {
 751         int32 i;
 752
 753         if(runtime_gomaxprocs == 1)
 754                 return;
 755         // stopwait and preemption requests can be lost
 756         // due to races with concurrently executing threads,
 757         // so try several times
 758         for(i = 0; i < 5; i++) {
 759                 // this should tell the scheduler to not start any new goroutines
 760                 runtime_sched->stopwait = 0x7fffffff;
 761                 runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
 762                 // this should stop running goroutines
 763                 if(!preemptall())
 764                         break;  // no running goroutines
 765                 runtime_usleep(1000);
 766         }
 767         // to be sure
 768         runtime_usleep(1000);
 769         preemptall();
 770         runtime_usleep(1000);
 771 }
 772
 773 void
 774 runtime_stopTheWorldWithSema(void)
 775 {
 776         int32 i;
 777         uint32 s;
 778         P *p;
 779         bool wait;
 780
 781         runtime_lock(&runtime_sched->lock);
 782         runtime_sched->stopwait = runtime_gomaxprocs;
 783         runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
 784         preemptall();
 785         // stop current P
 786         ((P*)g->m->p)->status = _Pgcstop;
 787         runtime_sched->stopwait--;
 788         // try to retake all P's in _Psyscall status
 789         for(i = 0; i < runtime_gomaxprocs; i++) {
 790                 p = runtime_allp[i];
 791                 s = p->status;
 792                 if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
 793                         runtime_sched->stopwait--;
 794         }
 795         // stop idle P's
 796         while((p = pidleget()) != nil) {
 797                 p->status = _Pgcstop;
 798                 runtime_sched->stopwait--;
 799         }
 800         wait = runtime_sched->stopwait > 0;
 801         runtime_unlock(&runtime_sched->lock);
 802
 803         // wait for remaining P's to stop voluntarily
 804         if(wait) {
 805                 runtime_notesleep(&runtime_sched->stopnote);
 806                 runtime_noteclear(&runtime_sched->stopnote);
 807         }
 808         if(runtime_sched->stopwait)
 809                 runtime_throw("stoptheworld: not stopped");
 810         for(i = 0; i < runtime_gomaxprocs; i++) {
 811                 p = runtime_allp[i];
 812                 if(p->status != _Pgcstop)
 813                         runtime_throw("stoptheworld: not stopped");
 814         }
 815 }
 816
 817 static void
 818 mhelpgc(void)
 819 {
 820         g->m->helpgc = -1;
 821 }
 822
 823 void
 824 runtime_startTheWorldWithSema(void)
 825 {
 826         P *p, *p1;
 827         M *mp;
 828         G *gp;
 829         bool add;
 830
 831         g->m->locks++;  // disable preemption because it can be holding p in a local var
 832         gp = runtime_netpoll(false);  // non-blocking
 833         injectglist(gp);
 834         add = needaddgcproc();
 835         runtime_lock(&runtime_sched->lock);
 836         if(newprocs) {
 837                 procresize(newprocs);
 838                 newprocs = 0;
 839         } else
 840                 procresize(runtime_gomaxprocs);
 841         runtime_sched->gcwaiting = 0;
 842
 843         p1 = nil;
 844         while((p = pidleget()) != nil) {
 845                 // procresize() puts p's with work at the beginning of the list.
 846                 // Once we reach a p without a run queue, the rest don't have one either.
 847                 if(p->runqhead == p->runqtail) {
 848                         pidleput(p);
 849                         break;
 850                 }
 851                 p->m = (uintptr)mget();
 852                 p->link = (uintptr)p1;
 853                 p1 = p;
 854         }
 855         if(runtime_sched->sysmonwait) {
 856                 runtime_sched->sysmonwait = false;
 857                 runtime_notewakeup(&runtime_sched->sysmonnote);
 858         }
 859         runtime_unlock(&runtime_sched->lock);
 860
 861         while(p1) {
 862                 p = p1;
 863                 p1 = (P*)p1->link;
 864                 if(p->m) {
 865                         mp = (M*)p->m;
 866                         p->m = 0;
 867                         if(mp->nextp)
 868                                 runtime_throw("startTheWorldWithSema: inconsistent mp->nextp");
 869                         mp->nextp = (uintptr)p;
 870                         runtime_notewakeup(&mp->park);
 871                 } else {
 872                         // Start M to run P.  Do not start another M below.
 873                         newm(nil, p);
 874                         add = false;
 875                 }
 876         }
 877
 878         if(add) {
 879                 // If GC could have used another helper proc, start one now,
 880                 // in the hope that it will be available next time.
 881                 // It would have been even better to start it before the collection,
 882                 // but doing so requires allocating memory, so it's tricky to
 883                 // coordinate.  This lazy approach works out in practice:
 884                 // we don't mind if the first couple gc rounds don't have quite
 885                 // the maximum number of procs.
 886                 newm(mhelpgc, nil);
 887         }
 888         g->m->locks--;
 889 }
 890
 891 // Called to start an M.
 892 void*
 893 runtime_mstart(void* mp)
 894 {
 895         M *m;
 896
 897         m = (M*)mp;
 898         g = m->g0;
 899         g->m = m;
 900
 901         initcontext();
 902
 903         g->entry = nil;
 904         g->param = nil;
 905
 906         // Record top of stack for use by mcall.
 907         // Once we call schedule we're never coming back,
 908         // so other calls can reuse this stack space.
 909 #ifdef USING_SPLIT_STACK
 910         __splitstack_getcontext(&g->stackcontext[0]);
 911 #else
 912         g->gcinitialsp = &mp;
 913         // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
 914         // is the top of the stack, not the bottom.
 915         g->gcstacksize = 0;
 916         g->gcnextsp = &mp;
 917 #endif
 918         getcontext(ucontext_arg(&g->context[0]));
 919
 920         if(g->entry != nil) {
 921                 // Got here from mcall.
 922                 void (*pfn)(G*) = (void (*)(G*))g->entry;
 923                 G* gp = (G*)g->param;
 924                 pfn(gp);
 925                 *(int*)0x21 = 0x21;
 926         }
 927         runtime_minit();
 928
 929 #ifdef USING_SPLIT_STACK
 930         {
 931                 int dont_block_signals = 0;
 932                 __splitstack_block_signals(&dont_block_signals, nil);
 933         }
 934 #endif
 935
 936         // Install signal handlers; after minit so that minit can
 937         // prepare the thread to be able to handle the signals.
 938         if(m == &runtime_m0) {
 939                 if(runtime_iscgo) {
 940                         bool* cgoHasExtraM = runtime_getCgoHasExtraM();
 941                         if(!*cgoHasExtraM) {
 942                                 *cgoHasExtraM = true;
 943                                 runtime_newextram();
 944                         }
 945                 }
 946                 runtime_initsig(false);
 947         }
 948
 949         if(m->mstartfn)
 950                 ((void (*)(void))m->mstartfn)();
 951
 952         if(m->helpgc) {
 953                 m->helpgc = 0;
 954                 stopm();
 955         } else if(m != &runtime_m0) {
 956                 acquirep((P*)m->nextp);
 957                 m->nextp = 0;
 958         }
 959         schedule();
 960
 961         // TODO(brainman): This point is never reached, because scheduler
 962         // does not release os threads at the moment. But once this path
 963         // is enabled, we must remove our seh here.
 964
 965         return nil;
 966 }
 967
 968 typedef struct CgoThreadStart CgoThreadStart;
 969 struct CgoThreadStart
 970 {
 971         M *m;
 972         G *g;
 973         uintptr *tls;
 974         void (*fn)(void);
 975 };
 976
 977 M* runtime_allocm(P*, bool, byte**, uintptr*)
 978         __asm__(GOSYM_PREFIX "runtime.allocm");
 979
 980 // Allocate a new m unassociated with any thread.
 981 // Can use p for allocation context if needed.
 982 M*
 983 runtime_allocm(P *p, bool allocatestack, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
 984 {
 985         M *mp;
 986
 987         g->m->locks++;  // disable GC because it can be called from sysmon
 988         if(g->m->p == 0)
 989                 acquirep(p);  // temporarily borrow p for mallocs in this function
 990 #if 0
 991         if(mtype == nil) {
 992                 Eface e;
 993                 runtime_gc_m_ptr(&e);
 994                 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
 995         }
 996 #endif
 997
 998         mp = runtime_mal(sizeof *mp);
 999         mcommoninit(mp);
1000         mp->g0 = runtime_malg(allocatestack, false, ret_g0_stack, ret_g0_stacksize);
1001         mp->g0->m = mp;
1002
1003         if(p == (P*)g->m->p)
1004                 releasep();
1005         g->m->locks--;
1006
1007         return mp;
1008 }
1009
1010 void setGContext(void) __asm__ (GOSYM_PREFIX "runtime.setGContext");
1011
1012 // setGContext sets up a new goroutine context for the current g.
1013 void
1014 setGContext()
1015 {
1016         int val;
1017
1018         initcontext();
1019         g->entry = nil;
1020         g->param = nil;
1021 #ifdef USING_SPLIT_STACK
1022         __splitstack_getcontext(&g->stackcontext[0]);
1023         val = 0;
1024         __splitstack_block_signals(&val, nil);
1025 #else
1026         g->gcinitialsp = &val;
1027         g->gcstack = nil;
1028         g->gcstacksize = 0;
1029         g->gcnextsp = &val;
1030 #endif
1031         getcontext(ucontext_arg(&g->context[0]));
1032
1033         if(g->entry != nil) {
1034                 // Got here from mcall.
1035                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1036                 G* gp = (G*)g->param;
1037                 pfn(gp);
1038                 *(int*)0x22 = 0x22;
1039         }
1040 }
1041
1042 void makeGContext(G*, byte*, uintptr)
1043         __asm__(GOSYM_PREFIX "runtime.makeGContext");
1044
1045 // makeGContext makes a new context for a g.
1046 void
1047 makeGContext(G* gp, byte* sp, uintptr spsize) {
1048         ucontext_t *uc;
1049
1050         uc = ucontext_arg(&gp->context[0]);
1051         getcontext(uc);
1052         uc->uc_stack.ss_sp = sp;
1053         uc->uc_stack.ss_size = (size_t)spsize;
1054         makecontext(uc, kickoff, 0);
1055 }
1056
1057 // Create a new m.  It will start off with a call to fn, or else the scheduler.
1058 static void
1059 newm(void(*fn)(void), P *p)
1060 {
1061         M *mp;
1062
1063         mp = runtime_allocm(p, false, nil, nil);
1064         mp->nextp = (uintptr)p;
1065         mp->mstartfn = (uintptr)(void*)fn;
1066
1067         runtime_newosproc(mp);
1068 }
1069
1070 // Stops execution of the current m until new work is available.
1071 // Returns with acquired P.
1072 static void
1073 stopm(void)
1074 {
1075         M* m;
1076
1077         m = g->m;
1078         if(m->locks)
1079                 runtime_throw("stopm holding locks");
1080         if(m->p)
1081                 runtime_throw("stopm holding p");
1082         if(m->spinning) {
1083                 m->spinning = false;
1084                 runtime_xadd(&runtime_sched->nmspinning, -1);
1085         }
1086
1087 retry:
1088         runtime_lock(&runtime_sched->lock);
1089         mput(m);
1090         runtime_unlock(&runtime_sched->lock);
1091         runtime_notesleep(&m->park);
1092         m = g->m;
1093         runtime_noteclear(&m->park);
1094         if(m->helpgc) {
1095                 runtime_gchelper();
1096                 m->helpgc = 0;
1097                 m->mcache = nil;
1098                 goto retry;
1099         }
1100         acquirep((P*)m->nextp);
1101         m->nextp = 0;
1102 }
1103
1104 static void
1105 mspinning(void)
1106 {
1107         g->m->spinning = true;
1108 }
1109
1110 // Schedules some M to run the p (creates an M if necessary).
1111 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1112 static void
1113 startm(P *p, bool spinning)
1114 {
1115         M *mp;
1116         void (*fn)(void);
1117
1118         runtime_lock(&runtime_sched->lock);
1119         if(p == nil) {
1120                 p = pidleget();
1121                 if(p == nil) {
1122                         runtime_unlock(&runtime_sched->lock);
1123                         if(spinning)
1124                                 runtime_xadd(&runtime_sched->nmspinning, -1);
1125                         return;
1126                 }
1127         }
1128         mp = mget();
1129         runtime_unlock(&runtime_sched->lock);
1130         if(mp == nil) {
1131                 fn = nil;
1132                 if(spinning)
1133                         fn = mspinning;
1134                 newm(fn, p);
1135                 return;
1136         }
1137         if(mp->spinning)
1138                 runtime_throw("startm: m is spinning");
1139         if(mp->nextp)
1140                 runtime_throw("startm: m has p");
1141         mp->spinning = spinning;
1142         mp->nextp = (uintptr)p;
1143         runtime_notewakeup(&mp->park);
1144 }
1145
1146 // Hands off P from syscall or locked M.
1147 static void
1148 handoffp(P *p)
1149 {
1150         // if it has local work, start it straight away
1151         if(p->runqhead != p->runqtail || runtime_sched->runqsize) {
1152                 startm(p, false);
1153                 return;
1154         }
1155         // no local work, check that there are no spinning/idle M's,
1156         // otherwise our help is not required
1157         if(runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) == 0 &&  // TODO: fast atomic
1158                 runtime_cas(&runtime_sched->nmspinning, 0, 1)) {
1159                 startm(p, true);
1160                 return;
1161         }
1162         runtime_lock(&runtime_sched->lock);
1163         if(runtime_sched->gcwaiting) {
1164                 p->status = _Pgcstop;
1165                 if(--runtime_sched->stopwait == 0)
1166                         runtime_notewakeup(&runtime_sched->stopnote);
1167                 runtime_unlock(&runtime_sched->lock);
1168                 return;
1169         }
1170         if(runtime_sched->runqsize) {
1171                 runtime_unlock(&runtime_sched->lock);
1172                 startm(p, false);
1173                 return;
1174         }
1175         // If this is the last running P and nobody is polling network,
1176         // need to wakeup another M to poll network.
1177         if(runtime_sched->npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched->lastpoll) != 0) {
1178                 runtime_unlock(&runtime_sched->lock);
1179                 startm(p, false);
1180                 return;
1181         }
1182         pidleput(p);
1183         runtime_unlock(&runtime_sched->lock);
1184 }
1185
1186 // Tries to add one more P to execute G's.
1187 // Called when a G is made runnable (newproc, ready).
1188 static void
1189 wakep(void)
1190 {
1191         // be conservative about spinning threads
1192         if(!runtime_cas(&runtime_sched->nmspinning, 0, 1))
1193                 return;
1194         startm(nil, true);
1195 }
1196
1197 // Stops execution of the current m that is locked to a g until the g is runnable again.
1198 // Returns with acquired P.
1199 static void
1200 stoplockedm(void)
1201 {
1202         M *m;
1203         P *p;
1204
1205         m = g->m;
1206         if(m->lockedg == nil || m->lockedg->lockedm != m)
1207                 runtime_throw("stoplockedm: inconsistent locking");
1208         if(m->p) {
1209                 // Schedule another M to run this p.
1210                 p = releasep();
1211                 handoffp(p);
1212         }
1213         incidlelocked(1);
1214         // Wait until another thread schedules lockedg again.
1215         runtime_notesleep(&m->park);
1216         m = g->m;
1217         runtime_noteclear(&m->park);
1218         if(m->lockedg->atomicstatus != _Grunnable)
1219                 runtime_throw("stoplockedm: not runnable");
1220         acquirep((P*)m->nextp);
1221         m->nextp = 0;
1222 }
1223
1224 // Schedules the locked m to run the locked gp.
1225 static void
1226 startlockedm(G *gp)
1227 {
1228         M *mp;
1229         P *p;
1230
1231         mp = gp->lockedm;
1232         if(mp == g->m)
1233                 runtime_throw("startlockedm: locked to me");
1234         if(mp->nextp)
1235                 runtime_throw("startlockedm: m has p");
1236         // directly handoff current P to the locked m
1237         incidlelocked(-1);
1238         p = releasep();
1239         mp->nextp = (uintptr)p;
1240         runtime_notewakeup(&mp->park);
1241         stopm();
1242 }
1243
1244 // Stops the current m for stoptheworld.
1245 // Returns when the world is restarted.
1246 static void
1247 gcstopm(void)
1248 {
1249         P *p;
1250
1251         if(!runtime_sched->gcwaiting)
1252                 runtime_throw("gcstopm: not waiting for gc");
1253         if(g->m->spinning) {
1254                 g->m->spinning = false;
1255                 runtime_xadd(&runtime_sched->nmspinning, -1);
1256         }
1257         p = releasep();
1258         runtime_lock(&runtime_sched->lock);
1259         p->status = _Pgcstop;
1260         if(--runtime_sched->stopwait == 0)
1261                 runtime_notewakeup(&runtime_sched->stopnote);
1262         runtime_unlock(&runtime_sched->lock);
1263         stopm();
1264 }
1265
1266 // Schedules gp to run on the current M.
1267 // Never returns.
1268 static void
1269 execute(G *gp)
1270 {
1271         int32 hz;
1272
1273         if(gp->atomicstatus != _Grunnable) {
1274                 runtime_printf("execute: bad g status %d\n", gp->atomicstatus);
1275                 runtime_throw("execute: bad g status");
1276         }
1277         gp->atomicstatus = _Grunning;
1278         gp->waitsince = 0;
1279         ((P*)g->m->p)->schedtick++;
1280         g->m->curg = gp;
1281         gp->m = g->m;
1282
1283         // Check whether the profiler needs to be turned on or off.
1284         hz = runtime_sched->profilehz;
1285         if(g->m->profilehz != hz)
1286                 runtime_resetcpuprofiler(hz);
1287
1288         runtime_gogo(gp);
1289 }
1290
1291 // Finds a runnable goroutine to execute.
1292 // Tries to steal from other P's, get g from global queue, poll network.
1293 static G*
1294 findrunnable(void)
1295 {
1296         G *gp;
1297         P *p;
1298         int32 i;
1299
1300 top:
1301         if(runtime_sched->gcwaiting) {
1302                 gcstopm();
1303                 goto top;
1304         }
1305         if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1306                 runtime_ready(gp);
1307         // local runq
1308         gp = runqget((P*)g->m->p);
1309         if(gp)
1310                 return gp;
1311         // global runq
1312         if(runtime_sched->runqsize) {
1313                 runtime_lock(&runtime_sched->lock);
1314                 gp = globrunqget((P*)g->m->p, 0);
1315                 runtime_unlock(&runtime_sched->lock);
1316                 if(gp)
1317                         return gp;
1318         }
1319         // poll network
1320         gp = runtime_netpoll(false);  // non-blocking
1321         if(gp) {
1322                 injectglist((G*)gp->schedlink);
1323                 gp->atomicstatus = _Grunnable;
1324                 return gp;
1325         }
1326         // If number of spinning M's >= number of busy P's, block.
1327         // This is necessary to prevent excessive CPU consumption
1328         // when GOMAXPROCS>>1 but the program parallelism is low.
1329         if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched->nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched->npidle))  // TODO: fast atomic
1330                 goto stop;
1331         if(!g->m->spinning) {
1332                 g->m->spinning = true;
1333                 runtime_xadd(&runtime_sched->nmspinning, 1);
1334         }
1335         // random steal from other P's
1336         for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1337                 if(runtime_sched->gcwaiting)
1338                         goto top;
1339                 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1340                 if(p == (P*)g->m->p)
1341                         gp = runqget(p);
1342                 else
1343                         gp = runqsteal((P*)g->m->p, p);
1344                 if(gp)
1345                         return gp;
1346         }
1347 stop:
1348         // return P and block
1349         runtime_lock(&runtime_sched->lock);
1350         if(runtime_sched->gcwaiting) {
1351                 runtime_unlock(&runtime_sched->lock);
1352                 goto top;
1353         }
1354         if(runtime_sched->runqsize) {
1355                 gp = globrunqget((P*)g->m->p, 0);
1356                 runtime_unlock(&runtime_sched->lock);
1357                 return gp;
1358         }
1359         p = releasep();
1360         pidleput(p);
1361         runtime_unlock(&runtime_sched->lock);
1362         if(g->m->spinning) {
1363                 g->m->spinning = false;
1364                 runtime_xadd(&runtime_sched->nmspinning, -1);
1365         }
1366         // check all runqueues once again
1367         for(i = 0; i < runtime_gomaxprocs; i++) {
1368                 p = runtime_allp[i];
1369                 if(p && p->runqhead != p->runqtail) {
1370                         runtime_lock(&runtime_sched->lock);
1371                         p = pidleget();
1372                         runtime_unlock(&runtime_sched->lock);
1373                         if(p) {
1374                                 acquirep(p);
1375                                 goto top;
1376                         }
1377                         break;
1378                 }
1379         }
1380         // poll network
1381         if(runtime_xchg64(&runtime_sched->lastpoll, 0) != 0) {
1382                 if(g->m->p)
1383                         runtime_throw("findrunnable: netpoll with p");
1384                 if(g->m->spinning)
1385                         runtime_throw("findrunnable: netpoll with spinning");
1386                 gp = runtime_netpoll(true);  // block until new work is available
1387                 runtime_atomicstore64(&runtime_sched->lastpoll, runtime_nanotime());
1388                 if(gp) {
1389                         runtime_lock(&runtime_sched->lock);
1390                         p = pidleget();
1391                         runtime_unlock(&runtime_sched->lock);
1392                         if(p) {
1393                                 acquirep(p);
1394                                 injectglist((G*)gp->schedlink);
1395                                 gp->atomicstatus = _Grunnable;
1396                                 return gp;
1397                         }
1398                         injectglist(gp);
1399                 }
1400         }
1401         stopm();
1402         goto top;
1403 }
1404
1405 static void
1406 resetspinning(void)
1407 {
1408         int32 nmspinning;
1409
1410         if(g->m->spinning) {
1411                 g->m->spinning = false;
1412                 nmspinning = runtime_xadd(&runtime_sched->nmspinning, -1);
1413                 if(nmspinning < 0)
1414                         runtime_throw("findrunnable: negative nmspinning");
1415         } else
1416                 nmspinning = runtime_atomicload(&runtime_sched->nmspinning);
1417
1418         // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1419         // so see if we need to wakeup another P here.
1420         if (nmspinning == 0 && runtime_atomicload(&runtime_sched->npidle) > 0)
1421                 wakep();
1422 }
1423
1424 // Injects the list of runnable G's into the scheduler.
1425 // Can run concurrently with GC.
1426 static void
1427 injectglist(G *glist)
1428 {
1429         int32 n;
1430         G *gp;
1431
1432         if(glist == nil)
1433                 return;
1434         runtime_lock(&runtime_sched->lock);
1435         for(n = 0; glist; n++) {
1436                 gp = glist;
1437                 glist = (G*)gp->schedlink;
1438                 gp->atomicstatus = _Grunnable;
1439                 globrunqput(gp);
1440         }
1441         runtime_unlock(&runtime_sched->lock);
1442
1443         for(; n && runtime_sched->npidle; n--)
1444                 startm(nil, false);
1445 }
1446
1447 // One round of scheduler: find a runnable goroutine and execute it.
1448 // Never returns.
1449 static void
1450 schedule(void)
1451 {
1452         G *gp;
1453         uint32 tick;
1454
1455         if(g->m->locks)
1456                 runtime_throw("schedule: holding locks");
1457
1458 top:
1459         if(runtime_sched->gcwaiting) {
1460                 gcstopm();
1461                 goto top;
1462         }
1463
1464         gp = nil;
1465         // Check the global runnable queue once in a while to ensure fairness.
1466         // Otherwise two goroutines can completely occupy the local runqueue
1467         // by constantly respawning each other.
1468         tick = ((P*)g->m->p)->schedtick;
1469         // This is a fancy way to say tick%61==0,
1470         // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1471         if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched->runqsize > 0) {
1472                 runtime_lock(&runtime_sched->lock);
1473                 gp = globrunqget((P*)g->m->p, 1);
1474                 runtime_unlock(&runtime_sched->lock);
1475                 if(gp)
1476                         resetspinning();
1477         }
1478         if(gp == nil) {
1479                 gp = runqget((P*)g->m->p);
1480                 if(gp && g->m->spinning)
1481                         runtime_throw("schedule: spinning with local work");
1482         }
1483         if(gp == nil) {
1484                 gp = findrunnable();  // blocks until work is available
1485                 resetspinning();
1486         }
1487
1488         if(gp->lockedm) {
1489                 // Hands off own p to the locked m,
1490                 // then blocks waiting for a new p.
1491                 startlockedm(gp);
1492                 goto top;
1493         }
1494
1495         execute(gp);
1496 }
1497
1498 // Puts the current goroutine into a waiting state and calls unlockf.
1499 // If unlockf returns false, the goroutine is resumed.
1500 void
1501 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1502 {
1503         if(g->atomicstatus != _Grunning)
1504                 runtime_throw("bad g status");
1505         g->m->waitlock = lock;
1506         g->m->waitunlockf = unlockf;
1507         g->waitreason = runtime_gostringnocopy((const byte*)reason);
1508         runtime_mcall(park0);
1509 }
1510
1511 void gopark(FuncVal *, void *, String, byte, int)
1512   __asm__ ("runtime.gopark");
1513
1514 void
1515 gopark(FuncVal *unlockf, void *lock, String reason,
1516        byte traceEv __attribute__ ((unused)),
1517        int traceskip __attribute__ ((unused)))
1518 {
1519         if(g->atomicstatus != _Grunning)
1520                 runtime_throw("bad g status");
1521         g->m->waitlock = lock;
1522         g->m->waitunlockf = unlockf == nil ? nil : (void*)unlockf->fn;
1523         g->waitreason = reason;
1524         runtime_mcall(park0);
1525 }
1526
1527 static bool
1528 parkunlock(G *gp, void *lock)
1529 {
1530         USED(gp);
1531         runtime_unlock(lock);
1532         return true;
1533 }
1534
1535 // Puts the current goroutine into a waiting state and unlocks the lock.
1536 // The goroutine can be made runnable again by calling runtime_ready(gp).
1537 void
1538 runtime_parkunlock(Lock *lock, const char *reason)
1539 {
1540         runtime_park(parkunlock, lock, reason);
1541 }
1542
1543 void goparkunlock(Lock *, String, byte, int)
1544   __asm__ (GOSYM_PREFIX "runtime.goparkunlock");
1545
1546 void
1547 goparkunlock(Lock *lock, String reason, byte traceEv __attribute__ ((unused)),
1548              int traceskip __attribute__ ((unused)))
1549 {
1550         if(g->atomicstatus != _Grunning)
1551                 runtime_throw("bad g status");
1552         g->m->waitlock = lock;
1553         g->m->waitunlockf = parkunlock;
1554         g->waitreason = reason;
1555         runtime_mcall(park0);
1556 }
1557
1558 // runtime_park continuation on g0.
1559 static void
1560 park0(G *gp)
1561 {
1562         M *m;
1563         bool ok;
1564
1565         m = g->m;
1566         gp->atomicstatus = _Gwaiting;
1567         gp->m = nil;
1568         m->curg = nil;
1569         if(m->waitunlockf) {
1570                 ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock);
1571                 m->waitunlockf = nil;
1572                 m->waitlock = nil;
1573                 if(!ok) {
1574                         gp->atomicstatus = _Grunnable;
1575                         execute(gp);  // Schedule it back, never returns.
1576                 }
1577         }
1578         if(m->lockedg) {
1579                 stoplockedm();
1580                 execute(gp);  // Never returns.
1581         }
1582         schedule();
1583 }
1584
1585 // Scheduler yield.
1586 void
1587 runtime_gosched(void)
1588 {
1589         if(g->atomicstatus != _Grunning)
1590                 runtime_throw("bad g status");
1591         runtime_mcall(runtime_gosched0);
1592 }
1593
1594 // runtime_gosched continuation on g0.
1595 void
1596 runtime_gosched0(G *gp)
1597 {
1598         M *m;
1599
1600         m = g->m;
1601         gp->atomicstatus = _Grunnable;
1602         gp->m = nil;
1603         m->curg = nil;
1604         runtime_lock(&runtime_sched->lock);
1605         globrunqput(gp);
1606         runtime_unlock(&runtime_sched->lock);
1607         if(m->lockedg) {
1608                 stoplockedm();
1609                 execute(gp);  // Never returns.
1610         }
1611         schedule();
1612 }
1613
1614 // Finishes execution of the current goroutine.
1615 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1616 // Since it does not return it does not matter.  But if it is preempted
1617 // at the split stack check, GC will complain about inconsistent sp.
1618 void runtime_goexit1(void) __attribute__ ((noinline));
1619 void
1620 runtime_goexit1(void)
1621 {
1622         if(g->atomicstatus != _Grunning)
1623                 runtime_throw("bad g status");
1624         runtime_mcall(goexit0);
1625 }
1626
1627 // runtime_goexit1 continuation on g0.
1628 static void
1629 goexit0(G *gp)
1630 {
1631         M *m;
1632
1633         m = g->m;
1634         gp->atomicstatus = _Gdead;
1635         gp->entry = nil;
1636         gp->m = nil;
1637         gp->lockedm = nil;
1638         gp->paniconfault = 0;
1639         gp->_defer = nil; // should be true already but just in case.
1640         gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
1641         gp->writebuf.__values = nil;
1642         gp->writebuf.__count = 0;
1643         gp->writebuf.__capacity = 0;
1644         gp->waitreason = runtime_gostringnocopy(nil);
1645         gp->param = nil;
1646         m->curg = nil;
1647         m->lockedg = nil;
1648         if(m->locked & ~_LockExternal) {
1649                 runtime_printf("invalid m->locked = %d\n", m->locked);
1650                 runtime_throw("internal lockOSThread error");
1651         }
1652         m->locked = 0;
1653         gfput((P*)m->p, gp);
1654         schedule();
1655 }
1656
1657 // The goroutine g is about to enter a system call.
1658 // Record that it's not using the cpu anymore.
1659 // This is called only from the go syscall library and cgocall,
1660 // not from the low-level system calls used by the runtime.
1661 //
1662 // Entersyscall cannot split the stack: the runtime_gosave must
1663 // make g->sched refer to the caller's stack segment, because
1664 // entersyscall is going to return immediately after.
1665
1666 void runtime_entersyscall(int32) __attribute__ ((no_split_stack));
1667 static void doentersyscall(uintptr, uintptr)
1668   __attribute__ ((no_split_stack, noinline));
1669
1670 void
1671 runtime_entersyscall(int32 dummy __attribute__ ((unused)))
1672 {
1673         // Save the registers in the g structure so that any pointers
1674         // held in registers will be seen by the garbage collector.
1675         getcontext(ucontext_arg(&g->gcregs[0]));
1676
1677         // Do the work in a separate function, so that this function
1678         // doesn't save any registers on its own stack.  If this
1679         // function does save any registers, we might store the wrong
1680         // value in the call to getcontext.
1681         //
1682         // FIXME: This assumes that we do not need to save any
1683         // callee-saved registers to access the TLS variable g.  We
1684         // don't want to put the ucontext_t on the stack because it is
1685         // large and we can not split the stack here.
1686         doentersyscall((uintptr)runtime_getcallerpc(&dummy),
1687                        (uintptr)runtime_getcallersp(&dummy));
1688 }
1689
1690 static void
1691 doentersyscall(uintptr pc, uintptr sp)
1692 {
1693         // Disable preemption because during this function g is in _Gsyscall status,
1694         // but can have inconsistent g->sched, do not let GC observe it.
1695         g->m->locks++;
1696
1697         // Leave SP around for GC and traceback.
1698 #ifdef USING_SPLIT_STACK
1699         {
1700           size_t gcstacksize;
1701           g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
1702                                          &g->gcnextsegment, &g->gcnextsp,
1703                                          &g->gcinitialsp);
1704           g->gcstacksize = (uintptr)gcstacksize;
1705         }
1706 #else
1707         {
1708                 void *v;
1709
1710                 g->gcnextsp = (byte *) &v;
1711         }
1712 #endif
1713
1714         g->syscallsp = sp;
1715         g->syscallpc = pc;
1716
1717         g->atomicstatus = _Gsyscall;
1718
1719         if(runtime_atomicload(&runtime_sched->sysmonwait)) {  // TODO: fast atomic
1720                 runtime_lock(&runtime_sched->lock);
1721                 if(runtime_atomicload(&runtime_sched->sysmonwait)) {
1722                         runtime_atomicstore(&runtime_sched->sysmonwait, 0);
1723                         runtime_notewakeup(&runtime_sched->sysmonnote);
1724                 }
1725                 runtime_unlock(&runtime_sched->lock);
1726         }
1727
1728         g->m->mcache = nil;
1729         ((P*)(g->m->p))->m = 0;
1730         runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
1731         if(runtime_atomicload(&runtime_sched->gcwaiting)) {
1732                 runtime_lock(&runtime_sched->lock);
1733                 if (runtime_sched->stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
1734                         if(--runtime_sched->stopwait == 0)
1735                                 runtime_notewakeup(&runtime_sched->stopnote);
1736                 }
1737                 runtime_unlock(&runtime_sched->lock);
1738         }
1739
1740         g->m->locks--;
1741 }
1742
1743 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
1744 void
1745 runtime_entersyscallblock(int32 dummy __attribute__ ((unused)))
1746 {
1747         P *p;
1748
1749         g->m->locks++;  // see comment in entersyscall
1750
1751         // Leave SP around for GC and traceback.
1752 #ifdef USING_SPLIT_STACK
1753         {
1754           size_t gcstacksize;
1755           g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
1756                                          &g->gcnextsegment, &g->gcnextsp,
1757                                          &g->gcinitialsp);
1758           g->gcstacksize = (uintptr)gcstacksize;
1759         }
1760 #else
1761         g->gcnextsp = (byte *) &p;
1762 #endif
1763
1764         // Save the registers in the g structure so that any pointers
1765         // held in registers will be seen by the garbage collector.
1766         getcontext(ucontext_arg(&g->gcregs[0]));
1767
1768         g->syscallpc = (uintptr)runtime_getcallerpc(&dummy);
1769         g->syscallsp = (uintptr)runtime_getcallersp(&dummy);
1770
1771         g->atomicstatus = _Gsyscall;
1772
1773         p = releasep();
1774         handoffp(p);
1775         if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
1776                 incidlelocked(1);
1777
1778         g->m->locks--;
1779 }
1780
1781 // The goroutine g exited its system call.
1782 // Arrange for it to run on a cpu again.
1783 // This is called only from the go syscall library, not
1784 // from the low-level system calls used by the runtime.
1785 void
1786 runtime_exitsyscall(int32 dummy __attribute__ ((unused)))
1787 {
1788         G *gp;
1789
1790         gp = g;
1791         gp->m->locks++;  // see comment in entersyscall
1792
1793         if(gp->isbackground)  // do not consider blocked scavenger for deadlock detection
1794                 incidlelocked(-1);
1795
1796         gp->waitsince = 0;
1797         if(exitsyscallfast()) {
1798                 // There's a cpu for us, so we can run.
1799                 ((P*)gp->m->p)->syscalltick++;
1800                 gp->atomicstatus = _Grunning;
1801                 // Garbage collector isn't running (since we are),
1802                 // so okay to clear gcstack and gcsp.
1803 #ifdef USING_SPLIT_STACK
1804                 gp->gcstack = nil;
1805 #endif
1806                 gp->gcnextsp = nil;
1807                 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
1808                 gp->syscallsp = 0;
1809                 gp->m->locks--;
1810                 return;
1811         }
1812
1813         gp->m->locks--;
1814
1815         // Call the scheduler.
1816         runtime_mcall(exitsyscall0);
1817
1818         // Scheduler returned, so we're allowed to run now.
1819         // Delete the gcstack information that we left for
1820         // the garbage collector during the system call.
1821         // Must wait until now because until gosched returns
1822         // we don't know for sure that the garbage collector
1823         // is not running.
1824 #ifdef USING_SPLIT_STACK
1825         gp->gcstack = nil;
1826 #endif
1827         gp->gcnextsp = nil;
1828         runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
1829
1830         gp->syscallsp = 0;
1831
1832         // Note that this gp->m might be different than the earlier
1833         // gp->m after returning from runtime_mcall.
1834         ((P*)gp->m->p)->syscalltick++;
1835 }
1836
1837 static bool
1838 exitsyscallfast(void)
1839 {
1840         G *gp;
1841         P *p;
1842
1843         gp = g;
1844
1845         // Freezetheworld sets stopwait but does not retake P's.
1846         if(runtime_sched->stopwait) {
1847                 gp->m->p = 0;
1848                 return false;
1849         }
1850
1851         // Try to re-acquire the last P.
1852         if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) {
1853                 // There's a cpu for us, so we can run.
1854                 gp->m->mcache = ((P*)gp->m->p)->mcache;
1855                 ((P*)gp->m->p)->m = (uintptr)gp->m;
1856                 return true;
1857         }
1858         // Try to get any other idle P.
1859         gp->m->p = 0;
1860         if(runtime_sched->pidle) {
1861                 runtime_lock(&runtime_sched->lock);
1862                 p = pidleget();
1863                 if(p && runtime_atomicload(&runtime_sched->sysmonwait)) {
1864                         runtime_atomicstore(&runtime_sched->sysmonwait, 0);
1865                         runtime_notewakeup(&runtime_sched->sysmonnote);
1866                 }
1867                 runtime_unlock(&runtime_sched->lock);
1868                 if(p) {
1869                         acquirep(p);
1870                         return true;
1871                 }
1872         }
1873         return false;
1874 }
1875
1876 // runtime_exitsyscall slow path on g0.
1877 // Failed to acquire P, enqueue gp as runnable.
1878 static void
1879 exitsyscall0(G *gp)
1880 {
1881         M *m;
1882         P *p;
1883
1884         m = g->m;
1885         gp->atomicstatus = _Grunnable;
1886         gp->m = nil;
1887         m->curg = nil;
1888         runtime_lock(&runtime_sched->lock);
1889         p = pidleget();
1890         if(p == nil)
1891                 globrunqput(gp);
1892         else if(runtime_atomicload(&runtime_sched->sysmonwait)) {
1893                 runtime_atomicstore(&runtime_sched->sysmonwait, 0);
1894                 runtime_notewakeup(&runtime_sched->sysmonnote);
1895         }
1896         runtime_unlock(&runtime_sched->lock);
1897         if(p) {
1898                 acquirep(p);
1899                 execute(gp);  // Never returns.
1900         }
1901         if(m->lockedg) {
1902                 // Wait until another thread schedules gp and so m again.
1903                 stoplockedm();
1904                 execute(gp);  // Never returns.
1905         }
1906         stopm();
1907         schedule();  // Never returns.
1908 }
1909
1910 void syscall_entersyscall(void)
1911   __asm__(GOSYM_PREFIX "syscall.Entersyscall");
1912
1913 void syscall_entersyscall(void) __attribute__ ((no_split_stack));
1914
1915 void
1916 syscall_entersyscall()
1917 {
1918   runtime_entersyscall(0);
1919 }
1920
1921 void syscall_exitsyscall(void)
1922   __asm__(GOSYM_PREFIX "syscall.Exitsyscall");
1923
1924 void syscall_exitsyscall(void) __attribute__ ((no_split_stack));
1925
1926 void
1927 syscall_exitsyscall()
1928 {
1929   runtime_exitsyscall(0);
1930 }
1931
1932 // Called from syscall package before fork.
1933 void syscall_runtime_BeforeFork(void)
1934   __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
1935 void
1936 syscall_runtime_BeforeFork(void)
1937 {
1938         // Fork can hang if preempted with signals frequently enough (see issue 5517).
1939         // Ensure that we stay on the same M where we disable profiling.
1940         runtime_m()->locks++;
1941         if(runtime_m()->profilehz != 0)
1942                 runtime_resetcpuprofiler(0);
1943 }
1944
1945 // Called from syscall package after fork in parent.
1946 void syscall_runtime_AfterFork(void)
1947   __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
1948 void
1949 syscall_runtime_AfterFork(void)
1950 {
1951         int32 hz;
1952
1953         hz = runtime_sched->profilehz;
1954         if(hz != 0)
1955                 runtime_resetcpuprofiler(hz);
1956         runtime_m()->locks--;
1957 }
1958
1959 // Allocate a new g, with a stack big enough for stacksize bytes.
1960 G*
1961 runtime_malg(bool allocatestack, bool signalstack, byte** ret_stack, uintptr* ret_stacksize)
1962 {
1963         uintptr stacksize;
1964         G *newg;
1965         byte* unused_stack;
1966         uintptr unused_stacksize;
1967 #if USING_SPLIT_STACK
1968         int dont_block_signals = 0;
1969         size_t ss_stacksize;
1970 #endif
1971
1972         if (ret_stack == nil) {
1973                 ret_stack = &unused_stack;
1974         }
1975         if (ret_stacksize == nil) {
1976                 ret_stacksize = &unused_stacksize;
1977         }
1978         newg = allocg();
1979         if(allocatestack) {
1980                 stacksize = StackMin;
1981                 if(signalstack) {
1982                         stacksize = 32 * 1024; // OS X wants >= 8K, GNU/Linux >= 2K
1983 #ifdef SIGSTKSZ
1984                         if(stacksize < SIGSTKSZ)
1985                                 stacksize = SIGSTKSZ;
1986 #endif
1987                 }
1988
1989 #if USING_SPLIT_STACK
1990                 *ret_stack = __splitstack_makecontext(stacksize,
1991                                                       &newg->stackcontext[0],
1992                                                       &ss_stacksize);
1993                 *ret_stacksize = (uintptr)ss_stacksize;
1994                 __splitstack_block_signals_context(&newg->stackcontext[0],
1995                                                    &dont_block_signals, nil);
1996 #else
1997                 // In 64-bit mode, the maximum Go allocation space is
1998                 // 128G.  Our stack size is 4M, which only permits 32K
1999                 // goroutines.  In order to not limit ourselves,
2000                 // allocate the stacks out of separate memory.  In
2001                 // 32-bit mode, the Go allocation space is all of
2002                 // memory anyhow.
2003                 if(sizeof(void*) == 8) {
2004                         void *p = runtime_SysAlloc(stacksize, &mstats()->other_sys);
2005                         if(p == nil)
2006                                 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2007                         *ret_stack = (byte*)p;
2008                 } else {
2009                         *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2010                         runtime_xadd(&runtime_stacks_sys, stacksize);
2011                 }
2012                 *ret_stacksize = (uintptr)stacksize;
2013                 newg->gcinitialsp = *ret_stack;
2014                 newg->gcstacksize = (uintptr)stacksize;
2015 #endif
2016         }
2017         return newg;
2018 }
2019
2020 G*
2021 __go_go(void (*fn)(void*), void* arg)
2022 {
2023         byte *sp;
2024         size_t spsize;
2025         G *newg;
2026         P *p;
2027
2028 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2029         if(fn == nil) {
2030                 g->m->throwing = -1;  // do not dump full stacks
2031                 runtime_throw("go of nil func value");
2032         }
2033         g->m->locks++;  // disable preemption because it can be holding p in a local var
2034
2035         p = (P*)g->m->p;
2036         if((newg = gfget(p)) != nil) {
2037 #ifdef USING_SPLIT_STACK
2038                 int dont_block_signals = 0;
2039
2040                 sp = __splitstack_resetcontext(&newg->stackcontext[0],
2041                                                &spsize);
2042                 __splitstack_block_signals_context(&newg->stackcontext[0],
2043                                                    &dont_block_signals, nil);
2044 #else
2045                 sp = newg->gcinitialsp;
2046                 spsize = newg->gcstacksize;
2047                 if(spsize == 0)
2048                         runtime_throw("bad spsize in __go_go");
2049                 newg->gcnextsp = sp;
2050 #endif
2051         } else {
2052                 uintptr malsize;
2053
2054                 newg = runtime_malg(true, false, &sp, &malsize);
2055                 spsize = (size_t)malsize;
2056                 newg->atomicstatus = _Gdead;
2057                 allgadd(newg);
2058         }
2059
2060         newg->entry = (byte*)fn;
2061         newg->param = arg;
2062         newg->gopc = (uintptr)__builtin_return_address(0);
2063         newg->atomicstatus = _Grunnable;
2064         if(p->goidcache == p->goidcacheend) {
2065                 p->goidcache = runtime_xadd64(&runtime_sched->goidgen, GoidCacheBatch);
2066                 p->goidcacheend = p->goidcache + GoidCacheBatch;
2067         }
2068         newg->goid = p->goidcache++;
2069
2070         makeGContext(newg, sp, (uintptr)spsize);
2071
2072         runqput(p, newg);
2073
2074         if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
2075                 wakep();
2076         g->m->locks--;
2077         return newg;
2078 }
2079
2080 // Put on gfree list.
2081 // If local list is too long, transfer a batch to the global list.
2082 static void
2083 gfput(P *p, G *gp)
2084 {
2085         gp->schedlink = (uintptr)p->gfree;
2086         p->gfree = gp;
2087         p->gfreecnt++;
2088         if(p->gfreecnt >= 64) {
2089                 runtime_lock(&runtime_sched->gflock);
2090                 while(p->gfreecnt >= 32) {
2091                         p->gfreecnt--;
2092                         gp = p->gfree;
2093                         p->gfree = (G*)gp->schedlink;
2094                         gp->schedlink = (uintptr)runtime_sched->gfree;
2095                         runtime_sched->gfree = gp;
2096                 }
2097                 runtime_unlock(&runtime_sched->gflock);
2098         }
2099 }
2100
2101 // Get from gfree list.
2102 // If local list is empty, grab a batch from global list.
2103 static G*
2104 gfget(P *p)
2105 {
2106         G *gp;
2107
2108 retry:
2109         gp = p->gfree;
2110         if(gp == nil && runtime_sched->gfree) {
2111                 runtime_lock(&runtime_sched->gflock);
2112                 while(p->gfreecnt < 32 && runtime_sched->gfree) {
2113                         p->gfreecnt++;
2114                         gp = runtime_sched->gfree;
2115                         runtime_sched->gfree = (G*)gp->schedlink;
2116                         gp->schedlink = (uintptr)p->gfree;
2117                         p->gfree = gp;
2118                 }
2119                 runtime_unlock(&runtime_sched->gflock);
2120                 goto retry;
2121         }
2122         if(gp) {
2123                 p->gfree = (G*)gp->schedlink;
2124                 p->gfreecnt--;
2125         }
2126         return gp;
2127 }
2128
2129 // Purge all cached G's from gfree list to the global list.
2130 static void
2131 gfpurge(P *p)
2132 {
2133         G *gp;
2134
2135         runtime_lock(&runtime_sched->gflock);
2136         while(p->gfreecnt) {
2137                 p->gfreecnt--;
2138                 gp = p->gfree;
2139                 p->gfree = (G*)gp->schedlink;
2140                 gp->schedlink = (uintptr)runtime_sched->gfree;
2141                 runtime_sched->gfree = gp;
2142         }
2143         runtime_unlock(&runtime_sched->gflock);
2144 }
2145
2146 void
2147 runtime_Breakpoint(void)
2148 {
2149         runtime_breakpoint();
2150 }
2151
2152 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2153
2154 void
2155 runtime_Gosched(void)
2156 {
2157         runtime_gosched();
2158 }
2159
2160 // Implementation of runtime.GOMAXPROCS.
2161 // delete when scheduler is even stronger
2162
2163 intgo runtime_GOMAXPROCS(intgo)
2164   __asm__(GOSYM_PREFIX "runtime.GOMAXPROCS");
2165
2166 intgo
2167 runtime_GOMAXPROCS(intgo n)
2168 {
2169         intgo ret;
2170
2171         if(n > _MaxGomaxprocs)
2172                 n = _MaxGomaxprocs;
2173         runtime_lock(&runtime_sched->lock);
2174         ret = (intgo)runtime_gomaxprocs;
2175         if(n <= 0 || n == ret) {
2176                 runtime_unlock(&runtime_sched->lock);
2177                 return ret;
2178         }
2179         runtime_unlock(&runtime_sched->lock);
2180
2181         runtime_acquireWorldsema();
2182         g->m->gcing = 1;
2183         runtime_stopTheWorldWithSema();
2184         newprocs = (int32)n;
2185         g->m->gcing = 0;
2186         runtime_releaseWorldsema();
2187         runtime_startTheWorldWithSema();
2188
2189         return ret;
2190 }
2191
2192 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2193 // after they modify m->locked. Do not allow preemption during this call,
2194 // or else the m might be different in this function than in the caller.
2195 static void
2196 lockOSThread(void)
2197 {
2198         g->m->lockedg = g;
2199         g->lockedm = g->m;
2200 }
2201
2202 void    runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2203 void
2204 runtime_LockOSThread(void)
2205 {
2206         g->m->locked |= _LockExternal;
2207         lockOSThread();
2208 }
2209
2210 void
2211 runtime_lockOSThread(void)
2212 {
2213         g->m->locked += _LockInternal;
2214         lockOSThread();
2215 }
2216
2217
2218 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2219 // after they update m->locked. Do not allow preemption during this call,
2220 // or else the m might be in different in this function than in the caller.
2221 static void
2222 unlockOSThread(void)
2223 {
2224         if(g->m->locked != 0)
2225                 return;
2226         g->m->lockedg = nil;
2227         g->lockedm = nil;
2228 }
2229
2230 void    runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2231
2232 void
2233 runtime_UnlockOSThread(void)
2234 {
2235         g->m->locked &= ~_LockExternal;
2236         unlockOSThread();
2237 }
2238
2239 void
2240 runtime_unlockOSThread(void)
2241 {
2242         if(g->m->locked < _LockInternal)
2243                 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2244         g->m->locked -= _LockInternal;
2245         unlockOSThread();
2246 }
2247
2248 bool
2249 runtime_lockedOSThread(void)
2250 {
2251         return g->lockedm != nil && g->m->lockedg != nil;
2252 }
2253
2254 int32
2255 runtime_mcount(void)
2256 {
2257         return runtime_sched->mcount;
2258 }
2259
2260 static struct {
2261         uint32 lock;
2262         int32 hz;
2263 } prof;
2264
2265 static void System(void) {}
2266 static void GC(void) {}
2267
2268 // Called if we receive a SIGPROF signal.
2269 void
2270 runtime_sigprof()
2271 {
2272         M *mp = g->m;
2273         int32 n, i;
2274         bool traceback;
2275         uintptr pcbuf[TracebackMaxFrames];
2276         Location locbuf[TracebackMaxFrames];
2277         Slice stk;
2278
2279         if(prof.hz == 0)
2280                 return;
2281
2282         if(mp == nil)
2283                 return;
2284
2285         // Profiling runs concurrently with GC, so it must not allocate.
2286         mp->mallocing++;
2287
2288         traceback = true;
2289
2290         if(mp->mcache == nil)
2291                 traceback = false;
2292
2293         n = 0;
2294
2295         if(runtime_atomicload(&runtime_in_callers) > 0) {
2296                 // If SIGPROF arrived while already fetching runtime
2297                 // callers we can have trouble on older systems
2298                 // because the unwind library calls dl_iterate_phdr
2299                 // which was not recursive in the past.
2300                 traceback = false;
2301         }
2302
2303         if(traceback) {
2304                 n = runtime_callers(0, locbuf, nelem(locbuf), false);
2305                 for(i = 0; i < n; i++)
2306                         pcbuf[i] = locbuf[i].pc;
2307         }
2308         if(!traceback || n <= 0) {
2309                 n = 2;
2310                 pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2311                 if(mp->gcing || mp->helpgc)
2312                         pcbuf[1] = (uintptr)GC;
2313                 else
2314                         pcbuf[1] = (uintptr)System;
2315         }
2316
2317         if (prof.hz != 0) {
2318                 stk.__values = &pcbuf[0];
2319                 stk.__count = n;
2320                 stk.__capacity = n;
2321
2322                 // Simple cas-lock to coordinate with setcpuprofilerate.
2323                 while (!runtime_cas(&prof.lock, 0, 1)) {
2324                         runtime_osyield();
2325                 }
2326                 if (prof.hz != 0) {
2327                         runtime_cpuprofAdd(stk);
2328                 }
2329                 runtime_atomicstore(&prof.lock, 0);
2330         }
2331
2332         mp->mallocing--;
2333 }
2334
2335 // Arrange to call fn with a traceback hz times a second.
2336 void
2337 runtime_setcpuprofilerate_m(int32 hz)
2338 {
2339         // Force sane arguments.
2340         if(hz < 0)
2341                 hz = 0;
2342
2343         // Disable preemption, otherwise we can be rescheduled to another thread
2344         // that has profiling enabled.
2345         g->m->locks++;
2346
2347         // Stop profiler on this thread so that it is safe to lock prof.
2348         // if a profiling signal came in while we had prof locked,
2349         // it would deadlock.
2350         runtime_resetcpuprofiler(0);
2351
2352         while (!runtime_cas(&prof.lock, 0, 1)) {
2353                 runtime_osyield();
2354         }
2355         prof.hz = hz;
2356         runtime_atomicstore(&prof.lock, 0);
2357
2358         runtime_lock(&runtime_sched->lock);
2359         runtime_sched->profilehz = hz;
2360         runtime_unlock(&runtime_sched->lock);
2361
2362         if(hz != 0)
2363                 runtime_resetcpuprofiler(hz);
2364
2365         g->m->locks--;
2366 }
2367
2368 // Change number of processors.  The world is stopped, sched is locked.
2369 static void
2370 procresize(int32 new)
2371 {
2372         int32 i, old;
2373         bool pempty;
2374         G *gp;
2375         P *p;
2376         intgo j;
2377
2378         old = runtime_gomaxprocs;
2379         if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs)
2380                 runtime_throw("procresize: invalid arg");
2381         // initialize new P's
2382         for(i = 0; i < new; i++) {
2383                 p = runtime_allp[i];
2384                 if(p == nil) {
2385                         p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2386                         p->id = i;
2387                         p->status = _Pgcstop;
2388                         p->deferpool.__values = &p->deferpoolbuf[0];
2389                         p->deferpool.__count = 0;
2390                         p->deferpool.__capacity = nelem(p->deferpoolbuf);
2391                         runtime_atomicstorep(&runtime_allp[i], p);
2392                 }
2393                 if(p->mcache == nil) {
2394                         if(old==0 && i==0)
2395                                 p->mcache = g->m->mcache;  // bootstrap
2396                         else
2397                                 p->mcache = runtime_allocmcache();
2398                 }
2399         }
2400
2401         // redistribute runnable G's evenly
2402         // collect all runnable goroutines in global queue preserving FIFO order
2403         // FIFO order is required to ensure fairness even during frequent GCs
2404         // see http://golang.org/issue/7126
2405         pempty = false;
2406         while(!pempty) {
2407                 pempty = true;
2408                 for(i = 0; i < old; i++) {
2409                         p = runtime_allp[i];
2410                         if(p->runqhead == p->runqtail)
2411                                 continue;
2412                         pempty = false;
2413                         // pop from tail of local queue
2414                         p->runqtail--;
2415                         gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
2416                         // push onto head of global queue
2417                         gp->schedlink = runtime_sched->runqhead;
2418                         runtime_sched->runqhead = (uintptr)gp;
2419                         if(runtime_sched->runqtail == 0)
2420                                 runtime_sched->runqtail = (uintptr)gp;
2421                         runtime_sched->runqsize++;
2422                 }
2423         }
2424         // fill local queues with at most nelem(p->runq)/2 goroutines
2425         // start at 1 because current M already executes some G and will acquire allp[0] below,
2426         // so if we have a spare G we want to put it into allp[1].
2427         for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched->runqsize > 0; i++) {
2428                 gp = (G*)runtime_sched->runqhead;
2429                 runtime_sched->runqhead = gp->schedlink;
2430                 if(runtime_sched->runqhead == 0)
2431                         runtime_sched->runqtail = 0;
2432                 runtime_sched->runqsize--;
2433                 runqput(runtime_allp[i%new], gp);
2434         }
2435
2436         // free unused P's
2437         for(i = new; i < old; i++) {
2438                 p = runtime_allp[i];
2439                 for(j = 0; j < p->deferpool.__count; j++) {
2440                         ((struct _defer**)p->deferpool.__values)[j] = nil;
2441                 }
2442                 p->deferpool.__count = 0;
2443                 runtime_freemcache(p->mcache);
2444                 p->mcache = nil;
2445                 gfpurge(p);
2446                 p->status = _Pdead;
2447                 // can't free P itself because it can be referenced by an M in syscall
2448         }
2449
2450         if(g->m->p)
2451                 ((P*)g->m->p)->m = 0;
2452         g->m->p = 0;
2453         g->m->mcache = nil;
2454         p = runtime_allp[0];
2455         p->m = 0;
2456         p->status = _Pidle;
2457         acquirep(p);
2458         for(i = new-1; i > 0; i--) {
2459                 p = runtime_allp[i];
2460                 p->status = _Pidle;
2461                 pidleput(p);
2462         }
2463         runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2464 }
2465
2466 // Associate p and the current m.
2467 static void
2468 acquirep(P *p)
2469 {
2470         M *m;
2471
2472         m = g->m;
2473         if(m->p || m->mcache)
2474                 runtime_throw("acquirep: already in go");
2475         if(p->m || p->status != _Pidle) {
2476                 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status);
2477                 runtime_throw("acquirep: invalid p state");
2478         }
2479         m->mcache = p->mcache;
2480         m->p = (uintptr)p;
2481         p->m = (uintptr)m;
2482         p->status = _Prunning;
2483 }
2484
2485 // Disassociate p and the current m.
2486 static P*
2487 releasep(void)
2488 {
2489         M *m;
2490         P *p;
2491
2492         m = g->m;
2493         if(m->p == 0 || m->mcache == nil)
2494                 runtime_throw("releasep: invalid arg");
2495         p = (P*)m->p;
2496         if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) {
2497                 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2498                         m, m->p, p->m, m->mcache, p->mcache, p->status);
2499                 runtime_throw("releasep: invalid p state");
2500         }
2501         m->p = 0;
2502         m->mcache = nil;
2503         p->m = 0;
2504         p->status = _Pidle;
2505         return p;
2506 }
2507
2508 static void
2509 incidlelocked(int32 v)
2510 {
2511         runtime_lock(&runtime_sched->lock);
2512         runtime_sched->nmidlelocked += v;
2513         if(v > 0)
2514                 checkdead();
2515         runtime_unlock(&runtime_sched->lock);
2516 }
2517
2518 static void
2519 sysmon(void)
2520 {
2521         uint32 idle, delay;
2522         int64 now, lastpoll, lasttrace;
2523         G *gp;
2524
2525         lasttrace = 0;
2526         idle = 0;  // how many cycles in succession we had not wokeup somebody
2527         delay = 0;
2528         for(;;) {
2529                 if(idle == 0)  // start with 20us sleep...
2530                         delay = 20;
2531                 else if(idle > 50)  // start doubling the sleep after 1ms...
2532                         delay *= 2;
2533                 if(delay > 10*1000)  // up to 10ms
2534                         delay = 10*1000;
2535                 runtime_usleep(delay);
2536                 if(runtime_debug.schedtrace <= 0 &&
2537                         (runtime_sched->gcwaiting || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs)) {  // TODO: fast atomic
2538                         runtime_lock(&runtime_sched->lock);
2539                         if(runtime_atomicload(&runtime_sched->gcwaiting) || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs) {
2540                                 runtime_atomicstore(&runtime_sched->sysmonwait, 1);
2541                                 runtime_unlock(&runtime_sched->lock);
2542                                 runtime_notesleep(&runtime_sched->sysmonnote);
2543                                 runtime_noteclear(&runtime_sched->sysmonnote);
2544                                 idle = 0;
2545                                 delay = 20;
2546                         } else
2547                                 runtime_unlock(&runtime_sched->lock);
2548                 }
2549                 // poll network if not polled for more than 10ms
2550                 lastpoll = runtime_atomicload64(&runtime_sched->lastpoll);
2551                 now = runtime_nanotime();
2552                 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2553                         runtime_cas64(&runtime_sched->lastpoll, lastpoll, now);
2554                         gp = runtime_netpoll(false);  // non-blocking
2555                         if(gp) {
2556                                 // Need to decrement number of idle locked M's
2557                                 // (pretending that one more is running) before injectglist.
2558                                 // Otherwise it can lead to the following situation:
2559                                 // injectglist grabs all P's but before it starts M's to run the P's,
2560                                 // another M returns from syscall, finishes running its G,
2561                                 // observes that there is no work to do and no other running M's
2562                                 // and reports deadlock.
2563                                 incidlelocked(-1);
2564                                 injectglist(gp);
2565                                 incidlelocked(1);
2566                         }
2567                 }
2568                 // retake P's blocked in syscalls
2569                 // and preempt long running G's
2570                 if(retake(now))
2571                         idle = 0;
2572                 else
2573                         idle++;
2574
2575                 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
2576                         lasttrace = now;
2577                         runtime_schedtrace(runtime_debug.scheddetail);
2578                 }
2579         }
2580 }
2581
2582 typedef struct Pdesc Pdesc;
2583 struct Pdesc
2584 {
2585         uint32  schedtick;
2586         int64   schedwhen;
2587         uint32  syscalltick;
2588         int64   syscallwhen;
2589 };
2590 static Pdesc pdesc[_MaxGomaxprocs];
2591
2592 static uint32
2593 retake(int64 now)
2594 {
2595         uint32 i, s, n;
2596         int64 t;
2597         P *p;
2598         Pdesc *pd;
2599
2600         n = 0;
2601         for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
2602                 p = runtime_allp[i];
2603                 if(p==nil)
2604                         continue;
2605                 pd = &pdesc[i];
2606                 s = p->status;
2607                 if(s == _Psyscall) {
2608                         // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
2609                         t = p->syscalltick;
2610                         if(pd->syscalltick != t) {
2611                                 pd->syscalltick = t;
2612                                 pd->syscallwhen = now;
2613                                 continue;
2614                         }
2615                         // On the one hand we don't want to retake Ps if there is no other work to do,
2616                         // but on the other hand we want to retake them eventually
2617                         // because they can prevent the sysmon thread from deep sleep.
2618                         if(p->runqhead == p->runqtail &&
2619                                 runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) > 0 &&
2620                                 pd->syscallwhen + 10*1000*1000 > now)
2621                                 continue;
2622                         // Need to decrement number of idle locked M's
2623                         // (pretending that one more is running) before the CAS.
2624                         // Otherwise the M from which we retake can exit the syscall,
2625                         // increment nmidle and report deadlock.
2626                         incidlelocked(-1);
2627                         if(runtime_cas(&p->status, s, _Pidle)) {
2628                                 n++;
2629                                 handoffp(p);
2630                         }
2631                         incidlelocked(1);
2632                 } else if(s == _Prunning) {
2633                         // Preempt G if it's running for more than 10ms.
2634                         t = p->schedtick;
2635                         if(pd->schedtick != t) {
2636                                 pd->schedtick = t;
2637                                 pd->schedwhen = now;
2638                                 continue;
2639                         }
2640                         if(pd->schedwhen + 10*1000*1000 > now)
2641                                 continue;
2642                         // preemptone(p);
2643                 }
2644         }
2645         return n;
2646 }
2647
2648 // Tell all goroutines that they have been preempted and they should stop.
2649 // This function is purely best-effort.  It can fail to inform a goroutine if a
2650 // processor just started running it.
2651 // No locks need to be held.
2652 // Returns true if preemption request was issued to at least one goroutine.
2653 static bool
2654 preemptall(void)
2655 {
2656         return false;
2657 }
2658
2659 // Put mp on midle list.
2660 // Sched must be locked.
2661 static void
2662 mput(M *mp)
2663 {
2664         mp->schedlink = runtime_sched->midle;
2665         runtime_sched->midle = (uintptr)mp;
2666         runtime_sched->nmidle++;
2667         checkdead();
2668 }
2669
2670 // Try to get an m from midle list.
2671 // Sched must be locked.
2672 static M*
2673 mget(void)
2674 {
2675         M *mp;
2676
2677         if((mp = (M*)runtime_sched->midle) != nil){
2678                 runtime_sched->midle = mp->schedlink;
2679                 runtime_sched->nmidle--;
2680         }
2681         return mp;
2682 }
2683
2684 // Put gp on the global runnable queue.
2685 // Sched must be locked.
2686 static void
2687 globrunqput(G *gp)
2688 {
2689         gp->schedlink = 0;
2690         if(runtime_sched->runqtail)
2691                 ((G*)runtime_sched->runqtail)->schedlink = (uintptr)gp;
2692         else
2693                 runtime_sched->runqhead = (uintptr)gp;
2694         runtime_sched->runqtail = (uintptr)gp;
2695         runtime_sched->runqsize++;
2696 }
2697
2698 // Put a batch of runnable goroutines on the global runnable queue.
2699 // Sched must be locked.
2700 static void
2701 globrunqputbatch(G *ghead, G *gtail, int32 n)
2702 {
2703         gtail->schedlink = 0;
2704         if(runtime_sched->runqtail)
2705                 ((G*)runtime_sched->runqtail)->schedlink = (uintptr)ghead;
2706         else
2707                 runtime_sched->runqhead = (uintptr)ghead;
2708         runtime_sched->runqtail = (uintptr)gtail;
2709         runtime_sched->runqsize += n;
2710 }
2711
2712 // Try get a batch of G's from the global runnable queue.
2713 // Sched must be locked.
2714 static G*
2715 globrunqget(P *p, int32 max)
2716 {
2717         G *gp, *gp1;
2718         int32 n;
2719
2720         if(runtime_sched->runqsize == 0)
2721                 return nil;
2722         n = runtime_sched->runqsize/runtime_gomaxprocs+1;
2723         if(n > runtime_sched->runqsize)
2724                 n = runtime_sched->runqsize;
2725         if(max > 0 && n > max)
2726                 n = max;
2727         if((uint32)n > nelem(p->runq)/2)
2728                 n = nelem(p->runq)/2;
2729         runtime_sched->runqsize -= n;
2730         if(runtime_sched->runqsize == 0)
2731                 runtime_sched->runqtail = 0;
2732         gp = (G*)runtime_sched->runqhead;
2733         runtime_sched->runqhead = gp->schedlink;
2734         n--;
2735         while(n--) {
2736                 gp1 = (G*)runtime_sched->runqhead;
2737                 runtime_sched->runqhead = gp1->schedlink;
2738                 runqput(p, gp1);
2739         }
2740         return gp;
2741 }
2742
2743 // Put p to on pidle list.
2744 // Sched must be locked.
2745 static void
2746 pidleput(P *p)
2747 {
2748         p->link = runtime_sched->pidle;
2749         runtime_sched->pidle = (uintptr)p;
2750         runtime_xadd(&runtime_sched->npidle, 1);  // TODO: fast atomic
2751 }
2752
2753 // Try get a p from pidle list.
2754 // Sched must be locked.
2755 static P*
2756 pidleget(void)
2757 {
2758         P *p;
2759
2760         p = (P*)runtime_sched->pidle;
2761         if(p) {
2762                 runtime_sched->pidle = p->link;
2763                 runtime_xadd(&runtime_sched->npidle, -1);  // TODO: fast atomic
2764         }
2765         return p;
2766 }
2767
2768 // Try to put g on local runnable queue.
2769 // If it's full, put onto global queue.
2770 // Executed only by the owner P.
2771 static void
2772 runqput(P *p, G *gp)
2773 {
2774         uint32 h, t;
2775
2776 retry:
2777         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
2778         t = p->runqtail;
2779         if(t - h < nelem(p->runq)) {
2780                 p->runq[t%nelem(p->runq)] = (uintptr)gp;
2781                 runtime_atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
2782                 return;
2783         }
2784         if(runqputslow(p, gp, h, t))
2785                 return;
2786         // the queue is not full, now the put above must suceed
2787         goto retry;
2788 }
2789
2790 // Put g and a batch of work from local runnable queue on global queue.
2791 // Executed only by the owner P.
2792 static bool
2793 runqputslow(P *p, G *gp, uint32 h, uint32 t)
2794 {
2795         G *batch[nelem(p->runq)/2+1];
2796         uint32 n, i;
2797
2798         // First, grab a batch from local queue.
2799         n = t-h;
2800         n = n/2;
2801         if(n != nelem(p->runq)/2)
2802                 runtime_throw("runqputslow: queue is not full");
2803         for(i=0; i<n; i++)
2804                 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
2805         if(!runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
2806                 return false;
2807         batch[n] = gp;
2808         // Link the goroutines.
2809         for(i=0; i<n; i++)
2810                 batch[i]->schedlink = (uintptr)batch[i+1];
2811         // Now put the batch on global queue.
2812         runtime_lock(&runtime_sched->lock);
2813         globrunqputbatch(batch[0], batch[n], n+1);
2814         runtime_unlock(&runtime_sched->lock);
2815         return true;
2816 }
2817
2818 // Get g from local runnable queue.
2819 // Executed only by the owner P.
2820 static G*
2821 runqget(P *p)
2822 {
2823         G *gp;
2824         uint32 t, h;
2825
2826         for(;;) {
2827                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
2828                 t = p->runqtail;
2829                 if(t == h)
2830                         return nil;
2831                 gp = (G*)p->runq[h%nelem(p->runq)];
2832                 if(runtime_cas(&p->runqhead, h, h+1))  // cas-release, commits consume
2833                         return gp;
2834         }
2835 }
2836
2837 // Grabs a batch of goroutines from local runnable queue.
2838 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
2839 // Can be executed by any P.
2840 static uint32
2841 runqgrab(P *p, G **batch)
2842 {
2843         uint32 t, h, n, i;
2844
2845         for(;;) {
2846                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
2847                 t = runtime_atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
2848                 n = t-h;
2849                 n = n - n/2;
2850                 if(n == 0)
2851                         break;
2852                 if(n > nelem(p->runq)/2)  // read inconsistent h and t
2853                         continue;
2854                 for(i=0; i<n; i++)
2855                         batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
2856                 if(runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
2857                         break;
2858         }
2859         return n;
2860 }
2861
2862 // Steal half of elements from local runnable queue of p2
2863 // and put onto local runnable queue of p.
2864 // Returns one of the stolen elements (or nil if failed).
2865 static G*
2866 runqsteal(P *p, P *p2)
2867 {
2868         G *gp;
2869         G *batch[nelem(p->runq)/2];
2870         uint32 t, h, n, i;
2871
2872         n = runqgrab(p2, batch);
2873         if(n == 0)
2874                 return nil;
2875         n--;
2876         gp = batch[n];
2877         if(n == 0)
2878                 return gp;
2879         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
2880         t = p->runqtail;
2881         if(t - h + n >= nelem(p->runq))
2882                 runtime_throw("runqsteal: runq overflow");
2883         for(i=0; i<n; i++, t++)
2884                 p->runq[t%nelem(p->runq)] = (uintptr)batch[i];
2885         runtime_atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
2886         return gp;
2887 }
2888
2889 void runtime_testSchedLocalQueue(void)
2890   __asm__("runtime.testSchedLocalQueue");
2891
2892 void
2893 runtime_testSchedLocalQueue(void)
2894 {
2895         P p;
2896         G gs[nelem(p.runq)];
2897         int32 i, j;
2898
2899         runtime_memclr((byte*)&p, sizeof(p));
2900
2901         for(i = 0; i < (int32)nelem(gs); i++) {
2902                 if(runqget(&p) != nil)
2903                         runtime_throw("runq is not empty initially");
2904                 for(j = 0; j < i; j++)
2905                         runqput(&p, &gs[i]);
2906                 for(j = 0; j < i; j++) {
2907                         if(runqget(&p) != &gs[i]) {
2908                                 runtime_printf("bad element at iter %d/%d\n", i, j);
2909                                 runtime_throw("bad element");
2910                         }
2911                 }
2912                 if(runqget(&p) != nil)
2913                         runtime_throw("runq is not empty afterwards");
2914         }
2915 }
2916
2917 void runtime_testSchedLocalQueueSteal(void)
2918   __asm__("runtime.testSchedLocalQueueSteal");
2919
2920 void
2921 runtime_testSchedLocalQueueSteal(void)
2922 {
2923         P p1, p2;
2924         G gs[nelem(p1.runq)], *gp;
2925         int32 i, j, s;
2926
2927         runtime_memclr((byte*)&p1, sizeof(p1));
2928         runtime_memclr((byte*)&p2, sizeof(p2));
2929
2930         for(i = 0; i < (int32)nelem(gs); i++) {
2931                 for(j = 0; j < i; j++) {
2932                         gs[j].sig = 0;
2933                         runqput(&p1, &gs[j]);
2934                 }
2935                 gp = runqsteal(&p2, &p1);
2936                 s = 0;
2937                 if(gp) {
2938                         s++;
2939                         gp->sig++;
2940                 }
2941                 while((gp = runqget(&p2)) != nil) {
2942                         s++;
2943                         gp->sig++;
2944                 }
2945                 while((gp = runqget(&p1)) != nil)
2946                         gp->sig++;
2947                 for(j = 0; j < i; j++) {
2948                         if(gs[j].sig != 1) {
2949                                 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
2950                                 runtime_throw("bad element");
2951                         }
2952                 }
2953                 if(s != i/2 && s != i/2+1) {
2954                         runtime_printf("bad steal %d, want %d or %d, iter %d\n",
2955                                 s, i/2, i/2+1, i);
2956                         runtime_throw("bad steal");
2957                 }
2958         }
2959 }
2960
2961 intgo
2962 runtime_setmaxthreads(intgo in)
2963 {
2964         intgo out;
2965
2966         runtime_lock(&runtime_sched->lock);
2967         out = (intgo)runtime_sched->maxmcount;
2968         runtime_sched->maxmcount = (int32)in;
2969         checkmcount();
2970         runtime_unlock(&runtime_sched->lock);
2971         return out;
2972 }
2973
2974 static intgo
2975 procPin()
2976 {
2977         M *mp;
2978
2979         mp = runtime_m();
2980         mp->locks++;
2981         return (intgo)(((P*)mp->p)->id);
2982 }
2983
2984 static void
2985 procUnpin()
2986 {
2987         runtime_m()->locks--;
2988 }
2989
2990 intgo sync_runtime_procPin(void)
2991   __asm__ (GOSYM_PREFIX "sync.runtime_procPin");
2992
2993 intgo
2994 sync_runtime_procPin()
2995 {
2996         return procPin();
2997 }
2998
2999 void sync_runtime_procUnpin(void)
3000   __asm__ (GOSYM_PREFIX  "sync.runtime_procUnpin");
3001
3002 void
3003 sync_runtime_procUnpin()
3004 {
3005         procUnpin();
3006 }
3007
3008 intgo sync_atomic_runtime_procPin(void)
3009   __asm__ (GOSYM_PREFIX "sync_atomic.runtime_procPin");
3010
3011 intgo
3012 sync_atomic_runtime_procPin()
3013 {
3014         return procPin();
3015 }
3016
3017 void sync_atomic_runtime_procUnpin(void)
3018   __asm__ (GOSYM_PREFIX  "sync_atomic.runtime_procUnpin");
3019
3020 void
3021 sync_atomic_runtime_procUnpin()
3022 {
3023         procUnpin();
3024 }
3025
3026 // Return whether we are waiting for a GC.  This gc toolchain uses
3027 // preemption instead.
3028 bool
3029 runtime_gcwaiting(void)
3030 {
3031         return runtime_sched->gcwaiting;
3032 }
3033
3034 // os_beforeExit is called from os.Exit(0).
3035 //go:linkname os_beforeExit os.runtime_beforeExit
3036
3037 extern void os_beforeExit() __asm__ (GOSYM_PREFIX "os.runtime_beforeExit");
3038
3039 void
3040 os_beforeExit()
3041 {
3042 }
3043
3044 // Active spinning for sync.Mutex.
3045 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3046
3047 enum
3048 {
3049         ACTIVE_SPIN = 4,
3050         ACTIVE_SPIN_CNT = 30,
3051 };
3052
3053 extern _Bool sync_runtime_canSpin(intgo i)
3054   __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
3055
3056 _Bool
3057 sync_runtime_canSpin(intgo i)
3058 {
3059         P *p;
3060
3061         // sync.Mutex is cooperative, so we are conservative with spinning.
3062         // Spin only few times and only if running on a multicore machine and
3063         // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3064         // As opposed to runtime mutex we don't do passive spinning here,
3065         // because there can be work on global runq on on other Ps.
3066         if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched->npidle+runtime_sched->nmspinning)+1) {
3067                 return false;
3068         }
3069         p = (P*)g->m->p;
3070         return p != nil && p->runqhead == p->runqtail;
3071 }
3072
3073 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3074 //go:nosplit
3075
3076 extern void sync_runtime_doSpin(void)
3077   __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
3078
3079 void
3080 sync_runtime_doSpin()
3081 {
3082         runtime_procyield(ACTIVE_SPIN_CNT);
3083 }
3084
3085 // For Go code to look at variables, until we port proc.go.
3086
3087 extern M** runtime_go_allm(void)
3088   __asm__ (GOSYM_PREFIX "runtime.allm");
3089
3090 M**
3091 runtime_go_allm()
3092 {
3093         return &runtime_allm;
3094 }
3095
3096 intgo NumCPU(void) __asm__ (GOSYM_PREFIX "runtime.NumCPU");
3097
3098 intgo
3099 NumCPU()
3100 {
3101         return (intgo)(runtime_ncpu);
3102 }