libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <signal.h>
   7 #include <stdlib.h>
   8 #include <pthread.h>
   9 #include <unistd.h>
  10
  11 #include "config.h"
  12
  13 #ifdef HAVE_DL_ITERATE_PHDR
  14 #include <link.h>
  15 #endif
  16
  17 #include "runtime.h"
  18 #include "arch.h"
  19 #include "defs.h"
  20 #include "malloc.h"
  21 #include "go-type.h"
  22
  23 #ifdef USING_SPLIT_STACK
  24
  25 /* FIXME: These are not declared anywhere.  */
  26
  27 extern void __splitstack_getcontext(void *context[10]);
  28
  29 extern void __splitstack_setcontext(void *context[10]);
  30
  31 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  32
  33 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  34
  35 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  36                                void **);
  37
  38 extern void __splitstack_block_signals (int *, int *);
  39
  40 extern void __splitstack_block_signals_context (void *context[10], int *,
  41                                                 int *);
  42
  43 #endif
  44
  45 #ifndef PTHREAD_STACK_MIN
  46 # define PTHREAD_STACK_MIN 8192
  47 #endif
  48
  49 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  50 # define StackMin PTHREAD_STACK_MIN
  51 #else
  52 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
  53 #endif
  54
  55 uintptr runtime_stacks_sys;
  56
  57 static void gtraceback(G*);
  58
  59 #ifdef __rtems__
  60 #define __thread
  61 #endif
  62
  63 static __thread G *g;
  64
  65 #ifndef SETCONTEXT_CLOBBERS_TLS
  66
  67 static inline void
  68 initcontext(void)
  69 {
  70 }
  71
  72 static inline void
  73 fixcontext(ucontext_t *c __attribute__ ((unused)))
  74 {
  75 }
  76
  77 #else
  78
  79 # if defined(__x86_64__) && defined(__sun__)
  80
  81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  82 // register to that of the thread which called getcontext.  The effect
  83 // is that the address of all __thread variables changes.  This bug
  84 // also affects pthread_self() and pthread_getspecific.  We work
  85 // around it by clobbering the context field directly to keep %fs the
  86 // same.
  87
  88 static __thread greg_t fs;
  89
  90 static inline void
  91 initcontext(void)
  92 {
  93         ucontext_t c;
  94
  95         getcontext(&c);
  96         fs = c.uc_mcontext.gregs[REG_FSBASE];
  97 }
  98
  99 static inline void
 100 fixcontext(ucontext_t* c)
 101 {
 102         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 103 }
 104
 105 # elif defined(__NetBSD__)
 106
 107 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
 108 // and restore it ourselves.
 109
 110 static __thread __greg_t tlsbase;
 111
 112 static inline void
 113 initcontext(void)
 114 {
 115         ucontext_t c;
 116
 117         getcontext(&c);
 118         tlsbase = c.uc_mcontext._mc_tlsbase;
 119 }
 120
 121 static inline void
 122 fixcontext(ucontext_t* c)
 123 {
 124         c->uc_mcontext._mc_tlsbase = tlsbase;
 125 }
 126
 127 # elif defined(__sparc__)
 128
 129 static inline void
 130 initcontext(void)
 131 {
 132 }
 133
 134 static inline void
 135 fixcontext(ucontext_t *c)
 136 {
 137         /* ??? Using
 138              register unsigned long thread __asm__("%g7");
 139              c->uc_mcontext.gregs[REG_G7] = thread;
 140            results in
 141              error: variable ‘thread’ might be clobbered by \
 142                 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
 143            which ought to be false, as %g7 is a fixed register.  */
 144
 145         if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8)
 146                 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 147         else
 148                 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 149 }
 150
 151 # else
 152
 153 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 154
 155 # endif
 156
 157 #endif
 158
 159 // ucontext_arg returns a properly aligned ucontext_t value.  On some
 160 // systems a ucontext_t value must be aligned to a 16-byte boundary.
 161 // The g structure that has fields of type ucontext_t is defined in
 162 // Go, and Go has no simple way to align a field to such a boundary.
 163 // So we make the field larger in runtime2.go and pick an appropriate
 164 // offset within the field here.
 165 static ucontext_t*
 166 ucontext_arg(void** go_ucontext)
 167 {
 168         uintptr_t p = (uintptr_t)go_ucontext;
 169         size_t align = __alignof__(ucontext_t);
 170         if(align > 16) {
 171                 // We only ensured space for up to a 16 byte alignment
 172                 // in libgo/go/runtime/runtime2.go.
 173                 runtime_throw("required alignment of ucontext_t too large");
 174         }
 175         p = (p + align - 1) &~ (uintptr_t)(align - 1);
 176         return (ucontext_t*)p;
 177 }
 178
 179 // We can not always refer to the TLS variables directly.  The
 180 // compiler will call tls_get_addr to get the address of the variable,
 181 // and it may hold it in a register across a call to schedule.  When
 182 // we get back from the call we may be running in a different thread,
 183 // in which case the register now points to the TLS variable for a
 184 // different thread.  We use non-inlinable functions to avoid this
 185 // when necessary.
 186
 187 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 188
 189 G*
 190 runtime_g(void)
 191 {
 192         return g;
 193 }
 194
 195 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 196
 197 M*
 198 runtime_m(void)
 199 {
 200         if(g == nil)
 201                 return nil;
 202         return g->m;
 203 }
 204
 205 // Set g.
 206 void
 207 runtime_setg(G* gp)
 208 {
 209         g = gp;
 210 }
 211
 212 // Start a new thread.
 213 static void
 214 runtime_newosproc(M *mp)
 215 {
 216         pthread_attr_t attr;
 217         sigset_t clear, old;
 218         pthread_t tid;
 219         int ret;
 220
 221         if(pthread_attr_init(&attr) != 0)
 222                 runtime_throw("pthread_attr_init");
 223         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 224                 runtime_throw("pthread_attr_setdetachstate");
 225
 226         // Block signals during pthread_create so that the new thread
 227         // starts with signals disabled.  It will enable them in minit.
 228         sigfillset(&clear);
 229
 230 #ifdef SIGTRAP
 231         // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
 232         sigdelset(&clear, SIGTRAP);
 233 #endif
 234
 235         sigemptyset(&old);
 236         pthread_sigmask(SIG_BLOCK, &clear, &old);
 237         ret = pthread_create(&tid, &attr, runtime_mstart, mp);
 238         pthread_sigmask(SIG_SETMASK, &old, nil);
 239
 240         if (ret != 0)
 241                 runtime_throw("pthread_create");
 242 }
 243
 244 // First function run by a new goroutine.  This replaces gogocall.
 245 static void
 246 kickoff(void)
 247 {
 248         void (*fn)(void*);
 249         void *param;
 250
 251         if(g->traceback != nil)
 252                 gtraceback(g);
 253
 254         fn = (void (*)(void*))(g->entry);
 255         param = g->param;
 256         g->param = nil;
 257         fn(param);
 258         runtime_goexit1();
 259 }
 260
 261 // Switch context to a different goroutine.  This is like longjmp.
 262 void runtime_gogo(G*) __attribute__ ((noinline));
 263 void
 264 runtime_gogo(G* newg)
 265 {
 266 #ifdef USING_SPLIT_STACK
 267         __splitstack_setcontext(&newg->stackcontext[0]);
 268 #endif
 269         g = newg;
 270         newg->fromgogo = true;
 271         fixcontext(ucontext_arg(&newg->context[0]));
 272         setcontext(ucontext_arg(&newg->context[0]));
 273         runtime_throw("gogo setcontext returned");
 274 }
 275
 276 // Save context and call fn passing g as a parameter.  This is like
 277 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 278 // g->fromgogo as a code.  It will be true if we got here via
 279 // setcontext.  g == nil the first time this is called in a new m.
 280 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 281 void
 282 runtime_mcall(void (*pfn)(G*))
 283 {
 284         M *mp;
 285         G *gp;
 286 #ifndef USING_SPLIT_STACK
 287         void *afterregs;
 288 #endif
 289
 290         // Ensure that all registers are on the stack for the garbage
 291         // collector.
 292         __builtin_unwind_init();
 293
 294         gp = g;
 295         mp = gp->m;
 296         if(gp == mp->g0)
 297                 runtime_throw("runtime: mcall called on m->g0 stack");
 298
 299         if(gp != nil) {
 300
 301 #ifdef USING_SPLIT_STACK
 302                 __splitstack_getcontext(&g->stackcontext[0]);
 303 #else
 304                 // We have to point to an address on the stack that is
 305                 // below the saved registers.
 306                 gp->gcnextsp = &afterregs;
 307 #endif
 308                 gp->fromgogo = false;
 309                 getcontext(ucontext_arg(&gp->context[0]));
 310
 311                 // When we return from getcontext, we may be running
 312                 // in a new thread.  That means that g may have
 313                 // changed.  It is a global variables so we will
 314                 // reload it, but the address of g may be cached in
 315                 // our local stack frame, and that address may be
 316                 // wrong.  Call the function to reload the value for
 317                 // this thread.
 318                 gp = runtime_g();
 319                 mp = gp->m;
 320
 321                 if(gp->traceback != nil)
 322                         gtraceback(gp);
 323         }
 324         if (gp == nil || !gp->fromgogo) {
 325 #ifdef USING_SPLIT_STACK
 326                 __splitstack_setcontext(&mp->g0->stackcontext[0]);
 327 #endif
 328                 mp->g0->entry = (byte*)pfn;
 329                 mp->g0->param = gp;
 330
 331                 // It's OK to set g directly here because this case
 332                 // can not occur if we got here via a setcontext to
 333                 // the getcontext call just above.
 334                 g = mp->g0;
 335
 336                 fixcontext(ucontext_arg(&mp->g0->context[0]));
 337                 setcontext(ucontext_arg(&mp->g0->context[0]));
 338                 runtime_throw("runtime: mcall function returned");
 339         }
 340 }
 341
 342 // Goroutine scheduler
 343 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
 344 //
 345 // The main concepts are:
 346 // G - goroutine.
 347 // M - worker thread, or machine.
 348 // P - processor, a resource that is required to execute Go code.
 349 //     M must have an associated P to execute Go code, however it can be
 350 //     blocked or in a syscall w/o an associated P.
 351 //
 352 // Design doc at http://golang.org/s/go11sched.
 353
 354 enum
 355 {
 356         // Number of goroutine ids to grab from runtime_sched->goidgen to local per-P cache at once.
 357         // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
 358         GoidCacheBatch = 16,
 359 };
 360
 361 extern Sched* runtime_getsched() __asm__ (GOSYM_PREFIX "runtime.getsched");
 362
 363 Sched*  runtime_sched;
 364 int32   runtime_gomaxprocs;
 365 uint32  runtime_needextram = 1;
 366 M       runtime_m0;
 367 G       runtime_g0;     // idle goroutine for m0
 368 G*      runtime_lastg;
 369 M*      runtime_allm;
 370 P**     runtime_allp;
 371 M*      runtime_extram;
 372 int8*   runtime_goos;
 373 int32   runtime_ncpu;
 374 bool    runtime_precisestack;
 375 static int32    newprocs;
 376
 377 static  Lock allglock;  // the following vars are protected by this lock or by stoptheworld
 378 G**     runtime_allg;
 379 uintptr runtime_allglen;
 380 static  uintptr allgcap;
 381
 382 bool    runtime_isarchive;
 383
 384 void* runtime_mstart(void*);
 385 static void runqput(P*, G*);
 386 static G* runqget(P*);
 387 static bool runqputslow(P*, G*, uint32, uint32);
 388 static G* runqsteal(P*, P*);
 389 static void mput(M*);
 390 static M* mget(void);
 391 static void mcommoninit(M*);
 392 static void schedule(void);
 393 static void procresize(int32);
 394 static void acquirep(P*);
 395 static P* releasep(void);
 396 static void newm(void(*)(void), P*);
 397 static void stopm(void);
 398 static void startm(P*, bool);
 399 static void handoffp(P*);
 400 static void wakep(void);
 401 static void stoplockedm(void);
 402 static void startlockedm(G*);
 403 static void sysmon(void);
 404 static uint32 retake(int64);
 405 static void incidlelocked(int32);
 406 static void checkdead(void);
 407 static void exitsyscall0(G*);
 408 static void park0(G*);
 409 static void goexit0(G*);
 410 static void gfput(P*, G*);
 411 static G* gfget(P*);
 412 static void gfpurge(P*);
 413 static void globrunqput(G*);
 414 static void globrunqputbatch(G*, G*, int32);
 415 static G* globrunqget(P*, int32);
 416 static P* pidleget(void);
 417 static void pidleput(P*);
 418 static void injectglist(G*);
 419 static bool preemptall(void);
 420 static bool exitsyscallfast(void);
 421 static void allgadd(G*);
 422
 423 bool runtime_isstarted;
 424
 425 // The bootstrap sequence is:
 426 //
 427 //      call osinit
 428 //      call schedinit
 429 //      make & queue new G
 430 //      call runtime_mstart
 431 //
 432 // The new G calls runtime_main.
 433 void
 434 runtime_schedinit(void)
 435 {
 436         M *m;
 437         int32 n, procs;
 438         String s;
 439         const byte *p;
 440         Eface i;
 441
 442         runtime_sched = runtime_getsched();
 443
 444         m = &runtime_m0;
 445         g = &runtime_g0;
 446         m->g0 = g;
 447         m->curg = g;
 448         g->m = m;
 449
 450         initcontext();
 451
 452         runtime_sched->maxmcount = 10000;
 453         runtime_precisestack = 0;
 454
 455         // runtime_symtabinit();
 456         runtime_mallocinit();
 457         mcommoninit(m);
 458
 459         // Initialize the itable value for newErrorCString,
 460         // so that the next time it gets called, possibly
 461         // in a fault during a garbage collection, it will not
 462         // need to allocated memory.
 463         runtime_newErrorCString(0, &i);
 464
 465         // Initialize the cached gotraceback value, since
 466         // gotraceback calls getenv, which mallocs on Plan 9.
 467         runtime_gotraceback(nil);
 468
 469         runtime_goargs();
 470         runtime_goenvs();
 471         runtime_parsedebugvars();
 472
 473         runtime_sched->lastpoll = runtime_nanotime();
 474         procs = 1;
 475         s = runtime_getenv("GOMAXPROCS");
 476         p = s.str;
 477         if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
 478                 if(n > _MaxGomaxprocs)
 479                         n = _MaxGomaxprocs;
 480                 procs = n;
 481         }
 482         runtime_allp = runtime_malloc((_MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
 483         procresize(procs);
 484
 485         // Can not enable GC until all roots are registered.
 486         // mstats()->enablegc = 1;
 487 }
 488
 489 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
 490 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
 491
 492 // Used to determine the field alignment.
 493
 494 struct field_align
 495 {
 496   char c;
 497   Hchan *p;
 498 };
 499
 500 // main_init_done is a signal used by cgocallbackg that initialization
 501 // has been completed.  It is made before _cgo_notify_runtime_init_done,
 502 // so all cgo calls can rely on it existing.  When main_init is
 503 // complete, it is closed, meaning cgocallbackg can reliably receive
 504 // from it.
 505 Hchan *runtime_main_init_done;
 506
 507 // The chan bool type, for runtime_main_init_done.
 508
 509 extern const struct __go_type_descriptor bool_type_descriptor
 510   __asm__ (GOSYM_PREFIX "__go_tdn_bool");
 511
 512 static struct __go_channel_type chan_bool_type_descriptor =
 513   {
 514     /* __common */
 515     {
 516       /* __code */
 517       GO_CHAN,
 518       /* __align */
 519       __alignof (Hchan *),
 520       /* __field_align */
 521       offsetof (struct field_align, p) - 1,
 522       /* __size */
 523       sizeof (Hchan *),
 524       /* __hash */
 525       0, /* This value doesn't matter.  */
 526       /* __hashfn */
 527       NULL,
 528       /* __equalfn */
 529       NULL,
 530       /* __gc */
 531       NULL, /* This value doesn't matter */
 532       /* __reflection */
 533       NULL, /* This value doesn't matter */
 534       /* __uncommon */
 535       NULL,
 536       /* __pointer_to_this */
 537       NULL
 538     },
 539     /* __element_type */
 540     &bool_type_descriptor,
 541     /* __dir */
 542     CHANNEL_BOTH_DIR
 543   };
 544
 545 extern Hchan *makechan (ChanType *, int64)
 546   __asm__ (GOSYM_PREFIX "runtime.makechan");
 547 extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan");
 548
 549 static void
 550 initDone(void *arg __attribute__ ((unused))) {
 551         runtime_unlockOSThread();
 552 };
 553
 554 // The main goroutine.
 555 // Note: C frames in general are not copyable during stack growth, for two reasons:
 556 //   1) We don't know where in a frame to find pointers to other stack locations.
 557 //   2) There's no guarantee that globals or heap values do not point into the frame.
 558 //
 559 // The C frame for runtime.main is copyable, because:
 560 //   1) There are no pointers to other stack locations in the frame
 561 //      (d.fn points at a global, d.link is nil, d.argp is -1).
 562 //   2) The only pointer into this frame is from the defer chain,
 563 //      which is explicitly handled during stack copying.
 564 void
 565 runtime_main(void* dummy __attribute__((unused)))
 566 {
 567         Defer d;
 568         _Bool frame;
 569
 570         newm(sysmon, nil);
 571
 572         // Lock the main goroutine onto this, the main OS thread,
 573         // during initialization.  Most programs won't care, but a few
 574         // do require certain calls to be made by the main thread.
 575         // Those can arrange for main.main to run in the main thread
 576         // by calling runtime.LockOSThread during initialization
 577         // to preserve the lock.
 578         runtime_lockOSThread();
 579
 580         // Defer unlock so that runtime.Goexit during init does the unlock too.
 581         d.pfn = (uintptr)(void*)initDone;
 582         d.link = g->_defer;
 583         d.arg = (void*)-1;
 584         d._panic = g->_panic;
 585         d.retaddr = 0;
 586         d.makefunccanrecover = 0;
 587         d.frame = &frame;
 588         d.special = true;
 589         g->_defer = &d;
 590
 591         if(g->m != &runtime_m0)
 592                 runtime_throw("runtime_main not on m0");
 593         __go_go(runtime_MHeap_Scavenger, nil);
 594
 595         runtime_main_init_done = makechan(&chan_bool_type_descriptor, 0);
 596
 597         _cgo_notify_runtime_init_done();
 598
 599         main_init();
 600
 601         closechan(runtime_main_init_done);
 602
 603         if(g->_defer != &d || (void*)d.pfn != initDone)
 604                 runtime_throw("runtime: bad defer entry after init");
 605         g->_defer = d.link;
 606         runtime_unlockOSThread();
 607
 608         // For gccgo we have to wait until after main is initialized
 609         // to enable GC, because initializing main registers the GC
 610         // roots.
 611         mstats()->enablegc = 1;
 612
 613         if(runtime_isarchive) {
 614                 // This is not a complete program, but is instead a
 615                 // library built using -buildmode=c-archive or
 616                 // c-shared.  Now that we are initialized, there is
 617                 // nothing further to do.
 618                 return;
 619         }
 620
 621         main_main();
 622
 623         // Make racy client program work: if panicking on
 624         // another goroutine at the same time as main returns,
 625         // let the other goroutine finish printing the panic trace.
 626         // Once it does, it will exit. See issue 3934.
 627         if(runtime_panicking())
 628                 runtime_park(nil, nil, "panicwait");
 629
 630         runtime_exit(0);
 631         for(;;)
 632                 *(int32*)0 = 0;
 633 }
 634
 635 void
 636 runtime_tracebackothers(G * volatile me)
 637 {
 638         G * volatile gp;
 639         Traceback tb;
 640         int32 traceback;
 641         Slice slice;
 642         volatile uintptr i;
 643
 644         tb.gp = me;
 645         traceback = runtime_gotraceback(nil);
 646
 647         // Show the current goroutine first, if we haven't already.
 648         if((gp = g->m->curg) != nil && gp != me) {
 649                 runtime_printf("\n");
 650                 runtime_goroutineheader(gp);
 651                 gp->traceback = &tb;
 652
 653 #ifdef USING_SPLIT_STACK
 654                 __splitstack_getcontext(&me->stackcontext[0]);
 655 #endif
 656                 getcontext(ucontext_arg(&me->context[0]));
 657
 658                 if(gp->traceback != nil) {
 659                   runtime_gogo(gp);
 660                 }
 661
 662                 slice.__values = &tb.locbuf[0];
 663                 slice.__count = tb.c;
 664                 slice.__capacity = tb.c;
 665                 runtime_printtrace(slice, nil);
 666                 runtime_printcreatedby(gp);
 667         }
 668
 669         runtime_lock(&allglock);
 670         for(i = 0; i < runtime_allglen; i++) {
 671                 gp = runtime_allg[i];
 672                 if(gp == me || gp == g->m->curg || gp->atomicstatus == _Gdead)
 673                         continue;
 674                 if(gp->issystem && traceback < 2)
 675                         continue;
 676                 runtime_printf("\n");
 677                 runtime_goroutineheader(gp);
 678
 679                 // Our only mechanism for doing a stack trace is
 680                 // _Unwind_Backtrace.  And that only works for the
 681                 // current thread, not for other random goroutines.
 682                 // So we need to switch context to the goroutine, get
 683                 // the backtrace, and then switch back.
 684
 685                 // This means that if g is running or in a syscall, we
 686                 // can't reliably print a stack trace.  FIXME.
 687
 688                 if(gp->atomicstatus == _Grunning) {
 689                         runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
 690                         runtime_printcreatedby(gp);
 691                 } else if(gp->atomicstatus == _Gsyscall) {
 692                         runtime_printf("\tgoroutine in C code; stack unavailable\n");
 693                         runtime_printcreatedby(gp);
 694                 } else {
 695                         gp->traceback = &tb;
 696
 697 #ifdef USING_SPLIT_STACK
 698                         __splitstack_getcontext(&me->stackcontext[0]);
 699 #endif
 700                         getcontext(ucontext_arg(&me->context[0]));
 701
 702                         if(gp->traceback != nil) {
 703                                 runtime_gogo(gp);
 704                         }
 705
 706                         slice.__values = &tb.locbuf[0];
 707                         slice.__count = tb.c;
 708                         slice.__capacity = tb.c;
 709                         runtime_printtrace(slice, nil);
 710                         runtime_printcreatedby(gp);
 711                 }
 712         }
 713         runtime_unlock(&allglock);
 714 }
 715
 716 static void
 717 checkmcount(void)
 718 {
 719         // sched lock is held
 720         if(runtime_sched->mcount > runtime_sched->maxmcount) {
 721                 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched->maxmcount);
 722                 runtime_throw("thread exhaustion");
 723         }
 724 }
 725
 726 // Do a stack trace of gp, and then restore the context to
 727 // gp->dotraceback.
 728
 729 static void
 730 gtraceback(G* gp)
 731 {
 732         Traceback* traceback;
 733
 734         traceback = gp->traceback;
 735         gp->traceback = nil;
 736         if(gp->m != nil)
 737                 runtime_throw("gtraceback: m is not nil");
 738         gp->m = traceback->gp->m;
 739         traceback->c = runtime_callers(1, traceback->locbuf,
 740                 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
 741         gp->m = nil;
 742         runtime_gogo(traceback->gp);
 743 }
 744
 745 static void
 746 mcommoninit(M *mp)
 747 {
 748         // If there is no mcache runtime_callers() will crash,
 749         // and we are most likely in sysmon thread so the stack is senseless anyway.
 750         if(g->m->mcache)
 751                 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
 752
 753         mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
 754
 755         runtime_lock(&runtime_sched->lock);
 756         mp->id = runtime_sched->mcount++;
 757         checkmcount();
 758         runtime_mpreinit(mp);
 759
 760         // Add to runtime_allm so garbage collector doesn't free m
 761         // when it is just in a register or thread-local storage.
 762         mp->alllink = runtime_allm;
 763         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 764         // so we need to publish it safely.
 765         runtime_atomicstorep(&runtime_allm, mp);
 766         runtime_unlock(&runtime_sched->lock);
 767 }
 768
 769 // Mark gp ready to run.
 770 void
 771 runtime_ready(G *gp)
 772 {
 773         // Mark runnable.
 774         g->m->locks++;  // disable preemption because it can be holding p in a local var
 775         if(gp->atomicstatus != _Gwaiting) {
 776                 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus);
 777                 runtime_throw("bad g->atomicstatus in ready");
 778         }
 779         gp->atomicstatus = _Grunnable;
 780         runqput((P*)g->m->p, gp);
 781         if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0)  // TODO: fast atomic
 782                 wakep();
 783         g->m->locks--;
 784 }
 785
 786 void goready(G*, int) __asm__ (GOSYM_PREFIX "runtime.goready");
 787
 788 void
 789 goready(G* gp, int traceskip __attribute__ ((unused)))
 790 {
 791         runtime_ready(gp);
 792 }
 793
 794 int32
 795 runtime_gcprocs(void)
 796 {
 797         int32 n;
 798
 799         // Figure out how many CPUs to use during GC.
 800         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 801         runtime_lock(&runtime_sched->lock);
 802         n = runtime_gomaxprocs;
 803         if(n > runtime_ncpu)
 804                 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
 805         if(n > MaxGcproc)
 806                 n = MaxGcproc;
 807         if(n > runtime_sched->nmidle+1) // one M is currently running
 808                 n = runtime_sched->nmidle+1;
 809         runtime_unlock(&runtime_sched->lock);
 810         return n;
 811 }
 812
 813 static bool
 814 needaddgcproc(void)
 815 {
 816         int32 n;
 817
 818         runtime_lock(&runtime_sched->lock);
 819         n = runtime_gomaxprocs;
 820         if(n > runtime_ncpu)
 821                 n = runtime_ncpu;
 822         if(n > MaxGcproc)
 823                 n = MaxGcproc;
 824         n -= runtime_sched->nmidle+1; // one M is currently running
 825         runtime_unlock(&runtime_sched->lock);
 826         return n > 0;
 827 }
 828
 829 void
 830 runtime_helpgc(int32 nproc)
 831 {
 832         M *mp;
 833         int32 n, pos;
 834
 835         runtime_lock(&runtime_sched->lock);
 836         pos = 0;
 837         for(n = 1; n < nproc; n++) {  // one M is currently running
 838                 if(runtime_allp[pos]->mcache == g->m->mcache)
 839                         pos++;
 840                 mp = mget();
 841                 if(mp == nil)
 842                         runtime_throw("runtime_gcprocs inconsistency");
 843                 mp->helpgc = n;
 844                 mp->mcache = runtime_allp[pos]->mcache;
 845                 pos++;
 846                 runtime_notewakeup(&mp->park);
 847         }
 848         runtime_unlock(&runtime_sched->lock);
 849 }
 850
 851 // Similar to stoptheworld but best-effort and can be called several times.
 852 // There is no reverse operation, used during crashing.
 853 // This function must not lock any mutexes.
 854 void
 855 runtime_freezetheworld(void)
 856 {
 857         int32 i;
 858
 859         if(runtime_gomaxprocs == 1)
 860                 return;
 861         // stopwait and preemption requests can be lost
 862         // due to races with concurrently executing threads,
 863         // so try several times
 864         for(i = 0; i < 5; i++) {
 865                 // this should tell the scheduler to not start any new goroutines
 866                 runtime_sched->stopwait = 0x7fffffff;
 867                 runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
 868                 // this should stop running goroutines
 869                 if(!preemptall())
 870                         break;  // no running goroutines
 871                 runtime_usleep(1000);
 872         }
 873         // to be sure
 874         runtime_usleep(1000);
 875         preemptall();
 876         runtime_usleep(1000);
 877 }
 878
 879 void
 880 runtime_stopTheWorldWithSema(void)
 881 {
 882         int32 i;
 883         uint32 s;
 884         P *p;
 885         bool wait;
 886
 887         runtime_lock(&runtime_sched->lock);
 888         runtime_sched->stopwait = runtime_gomaxprocs;
 889         runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
 890         preemptall();
 891         // stop current P
 892         ((P*)g->m->p)->status = _Pgcstop;
 893         runtime_sched->stopwait--;
 894         // try to retake all P's in _Psyscall status
 895         for(i = 0; i < runtime_gomaxprocs; i++) {
 896                 p = runtime_allp[i];
 897                 s = p->status;
 898                 if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
 899                         runtime_sched->stopwait--;
 900         }
 901         // stop idle P's
 902         while((p = pidleget()) != nil) {
 903                 p->status = _Pgcstop;
 904                 runtime_sched->stopwait--;
 905         }
 906         wait = runtime_sched->stopwait > 0;
 907         runtime_unlock(&runtime_sched->lock);
 908
 909         // wait for remaining P's to stop voluntarily
 910         if(wait) {
 911                 runtime_notesleep(&runtime_sched->stopnote);
 912                 runtime_noteclear(&runtime_sched->stopnote);
 913         }
 914         if(runtime_sched->stopwait)
 915                 runtime_throw("stoptheworld: not stopped");
 916         for(i = 0; i < runtime_gomaxprocs; i++) {
 917                 p = runtime_allp[i];
 918                 if(p->status != _Pgcstop)
 919                         runtime_throw("stoptheworld: not stopped");
 920         }
 921 }
 922
 923 static void
 924 mhelpgc(void)
 925 {
 926         g->m->helpgc = -1;
 927 }
 928
 929 void
 930 runtime_startTheWorldWithSema(void)
 931 {
 932         P *p, *p1;
 933         M *mp;
 934         G *gp;
 935         bool add;
 936
 937         g->m->locks++;  // disable preemption because it can be holding p in a local var
 938         gp = runtime_netpoll(false);  // non-blocking
 939         injectglist(gp);
 940         add = needaddgcproc();
 941         runtime_lock(&runtime_sched->lock);
 942         if(newprocs) {
 943                 procresize(newprocs);
 944                 newprocs = 0;
 945         } else
 946                 procresize(runtime_gomaxprocs);
 947         runtime_sched->gcwaiting = 0;
 948
 949         p1 = nil;
 950         while((p = pidleget()) != nil) {
 951                 // procresize() puts p's with work at the beginning of the list.
 952                 // Once we reach a p without a run queue, the rest don't have one either.
 953                 if(p->runqhead == p->runqtail) {
 954                         pidleput(p);
 955                         break;
 956                 }
 957                 p->m = (uintptr)mget();
 958                 p->link = (uintptr)p1;
 959                 p1 = p;
 960         }
 961         if(runtime_sched->sysmonwait) {
 962                 runtime_sched->sysmonwait = false;
 963                 runtime_notewakeup(&runtime_sched->sysmonnote);
 964         }
 965         runtime_unlock(&runtime_sched->lock);
 966
 967         while(p1) {
 968                 p = p1;
 969                 p1 = (P*)p1->link;
 970                 if(p->m) {
 971                         mp = (M*)p->m;
 972                         p->m = 0;
 973                         if(mp->nextp)
 974                                 runtime_throw("startTheWorldWithSema: inconsistent mp->nextp");
 975                         mp->nextp = (uintptr)p;
 976                         runtime_notewakeup(&mp->park);
 977                 } else {
 978                         // Start M to run P.  Do not start another M below.
 979                         newm(nil, p);
 980                         add = false;
 981                 }
 982         }
 983
 984         if(add) {
 985                 // If GC could have used another helper proc, start one now,
 986                 // in the hope that it will be available next time.
 987                 // It would have been even better to start it before the collection,
 988                 // but doing so requires allocating memory, so it's tricky to
 989                 // coordinate.  This lazy approach works out in practice:
 990                 // we don't mind if the first couple gc rounds don't have quite
 991                 // the maximum number of procs.
 992                 newm(mhelpgc, nil);
 993         }
 994         g->m->locks--;
 995 }
 996
 997 // Called to start an M.
 998 void*
 999 runtime_mstart(void* mp)
1000 {
1001         M *m;
1002
1003         m = (M*)mp;
1004         g = m->g0;
1005         g->m = m;
1006
1007         initcontext();
1008
1009         g->entry = nil;
1010         g->param = nil;
1011
1012         // Record top of stack for use by mcall.
1013         // Once we call schedule we're never coming back,
1014         // so other calls can reuse this stack space.
1015 #ifdef USING_SPLIT_STACK
1016         __splitstack_getcontext(&g->stackcontext[0]);
1017 #else
1018         g->gcinitialsp = &mp;
1019         // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
1020         // is the top of the stack, not the bottom.
1021         g->gcstacksize = 0;
1022         g->gcnextsp = &mp;
1023 #endif
1024         getcontext(ucontext_arg(&g->context[0]));
1025
1026         if(g->entry != nil) {
1027                 // Got here from mcall.
1028                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1029                 G* gp = (G*)g->param;
1030                 pfn(gp);
1031                 *(int*)0x21 = 0x21;
1032         }
1033         runtime_minit();
1034
1035 #ifdef USING_SPLIT_STACK
1036         {
1037                 int dont_block_signals = 0;
1038                 __splitstack_block_signals(&dont_block_signals, nil);
1039         }
1040 #endif
1041
1042         // Install signal handlers; after minit so that minit can
1043         // prepare the thread to be able to handle the signals.
1044         if(m == &runtime_m0) {
1045                 if(runtime_iscgo && !runtime_cgoHasExtraM) {
1046                         runtime_cgoHasExtraM = true;
1047                         runtime_newextram();
1048                         runtime_needextram = 0;
1049                 }
1050                 runtime_initsig(false);
1051         }
1052
1053         if(m->mstartfn)
1054                 ((void (*)(void))m->mstartfn)();
1055
1056         if(m->helpgc) {
1057                 m->helpgc = 0;
1058                 stopm();
1059         } else if(m != &runtime_m0) {
1060                 acquirep((P*)m->nextp);
1061                 m->nextp = 0;
1062         }
1063         schedule();
1064
1065         // TODO(brainman): This point is never reached, because scheduler
1066         // does not release os threads at the moment. But once this path
1067         // is enabled, we must remove our seh here.
1068
1069         return nil;
1070 }
1071
1072 typedef struct CgoThreadStart CgoThreadStart;
1073 struct CgoThreadStart
1074 {
1075         M *m;
1076         G *g;
1077         uintptr *tls;
1078         void (*fn)(void);
1079 };
1080
1081 // Allocate a new m unassociated with any thread.
1082 // Can use p for allocation context if needed.
1083 M*
1084 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
1085 {
1086         M *mp;
1087
1088         g->m->locks++;  // disable GC because it can be called from sysmon
1089         if(g->m->p == 0)
1090                 acquirep(p);  // temporarily borrow p for mallocs in this function
1091 #if 0
1092         if(mtype == nil) {
1093                 Eface e;
1094                 runtime_gc_m_ptr(&e);
1095                 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1096         }
1097 #endif
1098
1099         mp = runtime_mal(sizeof *mp);
1100         mcommoninit(mp);
1101         mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1102         mp->g0->m = mp;
1103
1104         if(p == (P*)g->m->p)
1105                 releasep();
1106         g->m->locks--;
1107
1108         return mp;
1109 }
1110
1111 static G*
1112 allocg(void)
1113 {
1114         G *gp;
1115         // static Type *gtype;
1116
1117         // if(gtype == nil) {
1118         //      Eface e;
1119         //      runtime_gc_g_ptr(&e);
1120         //      gtype = ((PtrType*)e.__type_descriptor)->__element_type;
1121         // }
1122         // gp = runtime_cnew(gtype);
1123         gp = runtime_malloc(sizeof(G));
1124         return gp;
1125 }
1126
1127 static M* lockextra(bool nilokay);
1128 static void unlockextra(M*);
1129
1130 // needm is called when a cgo callback happens on a
1131 // thread without an m (a thread not created by Go).
1132 // In this case, needm is expected to find an m to use
1133 // and return with m, g initialized correctly.
1134 // Since m and g are not set now (likely nil, but see below)
1135 // needm is limited in what routines it can call. In particular
1136 // it can only call nosplit functions (textflag 7) and cannot
1137 // do any scheduling that requires an m.
1138 //
1139 // In order to avoid needing heavy lifting here, we adopt
1140 // the following strategy: there is a stack of available m's
1141 // that can be stolen. Using compare-and-swap
1142 // to pop from the stack has ABA races, so we simulate
1143 // a lock by doing an exchange (via casp) to steal the stack
1144 // head and replace the top pointer with MLOCKED (1).
1145 // This serves as a simple spin lock that we can use even
1146 // without an m. The thread that locks the stack in this way
1147 // unlocks the stack by storing a valid stack head pointer.
1148 //
1149 // In order to make sure that there is always an m structure
1150 // available to be stolen, we maintain the invariant that there
1151 // is always one more than needed. At the beginning of the
1152 // program (if cgo is in use) the list is seeded with a single m.
1153 // If needm finds that it has taken the last m off the list, its job
1154 // is - once it has installed its own m so that it can do things like
1155 // allocate memory - to create a spare m and put it on the list.
1156 //
1157 // Each of these extra m's also has a g0 and a curg that are
1158 // pressed into service as the scheduling stack and current
1159 // goroutine for the duration of the cgo callback.
1160 //
1161 // When the callback is done with the m, it calls dropm to
1162 // put the m back on the list.
1163 //
1164 // Unlike the gc toolchain, we start running on curg, since we are
1165 // just going to return and let the caller continue.
1166 void
1167 runtime_needm(void)
1168 {
1169         M *mp;
1170
1171         if(runtime_needextram) {
1172                 // Can happen if C/C++ code calls Go from a global ctor.
1173                 // Can not throw, because scheduler is not initialized yet.
1174                 int rv __attribute__((unused));
1175                 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
1176                         sizeof("fatal error: cgo callback before cgo call\n")-1);
1177                 runtime_exit(1);
1178         }
1179
1180         // Lock extra list, take head, unlock popped list.
1181         // nilokay=false is safe here because of the invariant above,
1182         // that the extra list always contains or will soon contain
1183         // at least one m.
1184         mp = lockextra(false);
1185
1186         // Set needextram when we've just emptied the list,
1187         // so that the eventual call into cgocallbackg will
1188         // allocate a new m for the extra list. We delay the
1189         // allocation until then so that it can be done
1190         // after exitsyscall makes sure it is okay to be
1191         // running at all (that is, there's no garbage collection
1192         // running right now).
1193         mp->needextram = mp->schedlink == 0;
1194         unlockextra((M*)mp->schedlink);
1195
1196         // Install g (= m->curg).
1197         runtime_setg(mp->curg);
1198
1199         // Initialize g's context as in mstart.
1200         initcontext();
1201         g->atomicstatus = _Gsyscall;
1202         g->entry = nil;
1203         g->param = nil;
1204 #ifdef USING_SPLIT_STACK
1205         __splitstack_getcontext(&g->stackcontext[0]);
1206 #else
1207         g->gcinitialsp = &mp;
1208         g->gcstack = nil;
1209         g->gcstacksize = 0;
1210         g->gcnextsp = &mp;
1211 #endif
1212         getcontext(ucontext_arg(&g->context[0]));
1213
1214         if(g->entry != nil) {
1215                 // Got here from mcall.
1216                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1217                 G* gp = (G*)g->param;
1218                 pfn(gp);
1219                 *(int*)0x22 = 0x22;
1220         }
1221
1222         // Initialize this thread to use the m.
1223         runtime_minit();
1224
1225 #ifdef USING_SPLIT_STACK
1226         {
1227                 int dont_block_signals = 0;
1228                 __splitstack_block_signals(&dont_block_signals, nil);
1229         }
1230 #endif
1231 }
1232
1233 // newextram allocates an m and puts it on the extra list.
1234 // It is called with a working local m, so that it can do things
1235 // like call schedlock and allocate.
1236 void
1237 runtime_newextram(void)
1238 {
1239         M *mp, *mnext;
1240         G *gp;
1241         byte *g0_sp, *sp;
1242         uintptr g0_spsize, spsize;
1243         ucontext_t *uc;
1244
1245         // Create extra goroutine locked to extra m.
1246         // The goroutine is the context in which the cgo callback will run.
1247         // The sched.pc will never be returned to, but setting it to
1248         // runtime.goexit makes clear to the traceback routines where
1249         // the goroutine stack ends.
1250         mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1251         gp = runtime_malg(StackMin, &sp, &spsize);
1252         gp->atomicstatus = _Gdead;
1253         gp->m = mp;
1254         mp->curg = gp;
1255         mp->locked = _LockInternal;
1256         mp->lockedg = gp;
1257         gp->lockedm = mp;
1258         gp->goid = runtime_xadd64(&runtime_sched->goidgen, 1);
1259         // put on allg for garbage collector
1260         allgadd(gp);
1261
1262         // The context for gp will be set up in runtime_needm.  But
1263         // here we need to set up the context for g0.
1264         uc = ucontext_arg(&mp->g0->context[0]);
1265         getcontext(uc);
1266         uc->uc_stack.ss_sp = g0_sp;
1267         uc->uc_stack.ss_size = (size_t)g0_spsize;
1268         makecontext(uc, kickoff, 0);
1269
1270         // Add m to the extra list.
1271         mnext = lockextra(true);
1272         mp->schedlink = (uintptr)mnext;
1273         unlockextra(mp);
1274 }
1275
1276 // dropm is called when a cgo callback has called needm but is now
1277 // done with the callback and returning back into the non-Go thread.
1278 // It puts the current m back onto the extra list.
1279 //
1280 // The main expense here is the call to signalstack to release the
1281 // m's signal stack, and then the call to needm on the next callback
1282 // from this thread. It is tempting to try to save the m for next time,
1283 // which would eliminate both these costs, but there might not be
1284 // a next time: the current thread (which Go does not control) might exit.
1285 // If we saved the m for that thread, there would be an m leak each time
1286 // such a thread exited. Instead, we acquire and release an m on each
1287 // call. These should typically not be scheduling operations, just a few
1288 // atomics, so the cost should be small.
1289 //
1290 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1291 // variable using pthread_key_create. Unlike the pthread keys we already use
1292 // on OS X, this dummy key would never be read by Go code. It would exist
1293 // only so that we could register at thread-exit-time destructor.
1294 // That destructor would put the m back onto the extra list.
1295 // This is purely a performance optimization. The current version,
1296 // in which dropm happens on each cgo call, is still correct too.
1297 // We may have to keep the current version on systems with cgo
1298 // but without pthreads, like Windows.
1299 void
1300 runtime_dropm(void)
1301 {
1302         M *mp, *mnext;
1303
1304         // Undo whatever initialization minit did during needm.
1305         runtime_unminit();
1306
1307         // Clear m and g, and return m to the extra list.
1308         // After the call to setg we can only call nosplit functions.
1309         mp = g->m;
1310         runtime_setg(nil);
1311
1312         mp->curg->atomicstatus = _Gdead;
1313         mp->curg->gcstack = nil;
1314         mp->curg->gcnextsp = nil;
1315
1316         mnext = lockextra(true);
1317         mp->schedlink = (uintptr)mnext;
1318         unlockextra(mp);
1319 }
1320
1321 #define MLOCKED ((M*)1)
1322
1323 // lockextra locks the extra list and returns the list head.
1324 // The caller must unlock the list by storing a new list head
1325 // to runtime.extram. If nilokay is true, then lockextra will
1326 // return a nil list head if that's what it finds. If nilokay is false,
1327 // lockextra will keep waiting until the list head is no longer nil.
1328 static M*
1329 lockextra(bool nilokay)
1330 {
1331         M *mp;
1332         void (*yield)(void);
1333
1334         for(;;) {
1335                 mp = runtime_atomicloadp(&runtime_extram);
1336                 if(mp == MLOCKED) {
1337                         yield = runtime_osyield;
1338                         yield();
1339                         continue;
1340                 }
1341                 if(mp == nil && !nilokay) {
1342                         runtime_usleep(1);
1343                         continue;
1344                 }
1345                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1346                         yield = runtime_osyield;
1347                         yield();
1348                         continue;
1349                 }
1350                 break;
1351         }
1352         return mp;
1353 }
1354
1355 static void
1356 unlockextra(M *mp)
1357 {
1358         runtime_atomicstorep(&runtime_extram, mp);
1359 }
1360
1361 static int32
1362 countextra()
1363 {
1364         M *mp, *mc;
1365         int32 c;
1366
1367         for(;;) {
1368                 mp = runtime_atomicloadp(&runtime_extram);
1369                 if(mp == MLOCKED) {
1370                         runtime_osyield();
1371                         continue;
1372                 }
1373                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1374                         runtime_osyield();
1375                         continue;
1376                 }
1377                 c = 0;
1378                 for(mc = mp; mc != nil; mc = (M*)mc->schedlink)
1379                         c++;
1380                 runtime_atomicstorep(&runtime_extram, mp);
1381                 return c;
1382         }
1383 }
1384
1385 // Create a new m.  It will start off with a call to fn, or else the scheduler.
1386 static void
1387 newm(void(*fn)(void), P *p)
1388 {
1389         M *mp;
1390
1391         mp = runtime_allocm(p, -1, nil, nil);
1392         mp->nextp = (uintptr)p;
1393         mp->mstartfn = (uintptr)(void*)fn;
1394
1395         runtime_newosproc(mp);
1396 }
1397
1398 // Stops execution of the current m until new work is available.
1399 // Returns with acquired P.
1400 static void
1401 stopm(void)
1402 {
1403         M* m;
1404
1405         m = g->m;
1406         if(m->locks)
1407                 runtime_throw("stopm holding locks");
1408         if(m->p)
1409                 runtime_throw("stopm holding p");
1410         if(m->spinning) {
1411                 m->spinning = false;
1412                 runtime_xadd(&runtime_sched->nmspinning, -1);
1413         }
1414
1415 retry:
1416         runtime_lock(&runtime_sched->lock);
1417         mput(m);
1418         runtime_unlock(&runtime_sched->lock);
1419         runtime_notesleep(&m->park);
1420         m = g->m;
1421         runtime_noteclear(&m->park);
1422         if(m->helpgc) {
1423                 runtime_gchelper();
1424                 m->helpgc = 0;
1425                 m->mcache = nil;
1426                 goto retry;
1427         }
1428         acquirep((P*)m->nextp);
1429         m->nextp = 0;
1430 }
1431
1432 static void
1433 mspinning(void)
1434 {
1435         g->m->spinning = true;
1436 }
1437
1438 // Schedules some M to run the p (creates an M if necessary).
1439 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1440 static void
1441 startm(P *p, bool spinning)
1442 {
1443         M *mp;
1444         void (*fn)(void);
1445
1446         runtime_lock(&runtime_sched->lock);
1447         if(p == nil) {
1448                 p = pidleget();
1449                 if(p == nil) {
1450                         runtime_unlock(&runtime_sched->lock);
1451                         if(spinning)
1452                                 runtime_xadd(&runtime_sched->nmspinning, -1);
1453                         return;
1454                 }
1455         }
1456         mp = mget();
1457         runtime_unlock(&runtime_sched->lock);
1458         if(mp == nil) {
1459                 fn = nil;
1460                 if(spinning)
1461                         fn = mspinning;
1462                 newm(fn, p);
1463                 return;
1464         }
1465         if(mp->spinning)
1466                 runtime_throw("startm: m is spinning");
1467         if(mp->nextp)
1468                 runtime_throw("startm: m has p");
1469         mp->spinning = spinning;
1470         mp->nextp = (uintptr)p;
1471         runtime_notewakeup(&mp->park);
1472 }
1473
1474 // Hands off P from syscall or locked M.
1475 static void
1476 handoffp(P *p)
1477 {
1478         // if it has local work, start it straight away
1479         if(p->runqhead != p->runqtail || runtime_sched->runqsize) {
1480                 startm(p, false);
1481                 return;
1482         }
1483         // no local work, check that there are no spinning/idle M's,
1484         // otherwise our help is not required
1485         if(runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) == 0 &&  // TODO: fast atomic
1486                 runtime_cas(&runtime_sched->nmspinning, 0, 1)) {
1487                 startm(p, true);
1488                 return;
1489         }
1490         runtime_lock(&runtime_sched->lock);
1491         if(runtime_sched->gcwaiting) {
1492                 p->status = _Pgcstop;
1493                 if(--runtime_sched->stopwait == 0)
1494                         runtime_notewakeup(&runtime_sched->stopnote);
1495                 runtime_unlock(&runtime_sched->lock);
1496                 return;
1497         }
1498         if(runtime_sched->runqsize) {
1499                 runtime_unlock(&runtime_sched->lock);
1500                 startm(p, false);
1501                 return;
1502         }
1503         // If this is the last running P and nobody is polling network,
1504         // need to wakeup another M to poll network.
1505         if(runtime_sched->npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched->lastpoll) != 0) {
1506                 runtime_unlock(&runtime_sched->lock);
1507                 startm(p, false);
1508                 return;
1509         }
1510         pidleput(p);
1511         runtime_unlock(&runtime_sched->lock);
1512 }
1513
1514 // Tries to add one more P to execute G's.
1515 // Called when a G is made runnable (newproc, ready).
1516 static void
1517 wakep(void)
1518 {
1519         // be conservative about spinning threads
1520         if(!runtime_cas(&runtime_sched->nmspinning, 0, 1))
1521                 return;
1522         startm(nil, true);
1523 }
1524
1525 // Stops execution of the current m that is locked to a g until the g is runnable again.
1526 // Returns with acquired P.
1527 static void
1528 stoplockedm(void)
1529 {
1530         M *m;
1531         P *p;
1532
1533         m = g->m;
1534         if(m->lockedg == nil || m->lockedg->lockedm != m)
1535                 runtime_throw("stoplockedm: inconsistent locking");
1536         if(m->p) {
1537                 // Schedule another M to run this p.
1538                 p = releasep();
1539                 handoffp(p);
1540         }
1541         incidlelocked(1);
1542         // Wait until another thread schedules lockedg again.
1543         runtime_notesleep(&m->park);
1544         m = g->m;
1545         runtime_noteclear(&m->park);
1546         if(m->lockedg->atomicstatus != _Grunnable)
1547                 runtime_throw("stoplockedm: not runnable");
1548         acquirep((P*)m->nextp);
1549         m->nextp = 0;
1550 }
1551
1552 // Schedules the locked m to run the locked gp.
1553 static void
1554 startlockedm(G *gp)
1555 {
1556         M *mp;
1557         P *p;
1558
1559         mp = gp->lockedm;
1560         if(mp == g->m)
1561                 runtime_throw("startlockedm: locked to me");
1562         if(mp->nextp)
1563                 runtime_throw("startlockedm: m has p");
1564         // directly handoff current P to the locked m
1565         incidlelocked(-1);
1566         p = releasep();
1567         mp->nextp = (uintptr)p;
1568         runtime_notewakeup(&mp->park);
1569         stopm();
1570 }
1571
1572 // Stops the current m for stoptheworld.
1573 // Returns when the world is restarted.
1574 static void
1575 gcstopm(void)
1576 {
1577         P *p;
1578
1579         if(!runtime_sched->gcwaiting)
1580                 runtime_throw("gcstopm: not waiting for gc");
1581         if(g->m->spinning) {
1582                 g->m->spinning = false;
1583                 runtime_xadd(&runtime_sched->nmspinning, -1);
1584         }
1585         p = releasep();
1586         runtime_lock(&runtime_sched->lock);
1587         p->status = _Pgcstop;
1588         if(--runtime_sched->stopwait == 0)
1589                 runtime_notewakeup(&runtime_sched->stopnote);
1590         runtime_unlock(&runtime_sched->lock);
1591         stopm();
1592 }
1593
1594 // Schedules gp to run on the current M.
1595 // Never returns.
1596 static void
1597 execute(G *gp)
1598 {
1599         int32 hz;
1600
1601         if(gp->atomicstatus != _Grunnable) {
1602                 runtime_printf("execute: bad g status %d\n", gp->atomicstatus);
1603                 runtime_throw("execute: bad g status");
1604         }
1605         gp->atomicstatus = _Grunning;
1606         gp->waitsince = 0;
1607         ((P*)g->m->p)->schedtick++;
1608         g->m->curg = gp;
1609         gp->m = g->m;
1610
1611         // Check whether the profiler needs to be turned on or off.
1612         hz = runtime_sched->profilehz;
1613         if(g->m->profilehz != hz)
1614                 runtime_resetcpuprofiler(hz);
1615
1616         runtime_gogo(gp);
1617 }
1618
1619 // Finds a runnable goroutine to execute.
1620 // Tries to steal from other P's, get g from global queue, poll network.
1621 static G*
1622 findrunnable(void)
1623 {
1624         G *gp;
1625         P *p;
1626         int32 i;
1627
1628 top:
1629         if(runtime_sched->gcwaiting) {
1630                 gcstopm();
1631                 goto top;
1632         }
1633         if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1634                 runtime_ready(gp);
1635         // local runq
1636         gp = runqget((P*)g->m->p);
1637         if(gp)
1638                 return gp;
1639         // global runq
1640         if(runtime_sched->runqsize) {
1641                 runtime_lock(&runtime_sched->lock);
1642                 gp = globrunqget((P*)g->m->p, 0);
1643                 runtime_unlock(&runtime_sched->lock);
1644                 if(gp)
1645                         return gp;
1646         }
1647         // poll network
1648         gp = runtime_netpoll(false);  // non-blocking
1649         if(gp) {
1650                 injectglist((G*)gp->schedlink);
1651                 gp->atomicstatus = _Grunnable;
1652                 return gp;
1653         }
1654         // If number of spinning M's >= number of busy P's, block.
1655         // This is necessary to prevent excessive CPU consumption
1656         // when GOMAXPROCS>>1 but the program parallelism is low.
1657         if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched->nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched->npidle))  // TODO: fast atomic
1658                 goto stop;
1659         if(!g->m->spinning) {
1660                 g->m->spinning = true;
1661                 runtime_xadd(&runtime_sched->nmspinning, 1);
1662         }
1663         // random steal from other P's
1664         for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1665                 if(runtime_sched->gcwaiting)
1666                         goto top;
1667                 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1668                 if(p == (P*)g->m->p)
1669                         gp = runqget(p);
1670                 else
1671                         gp = runqsteal((P*)g->m->p, p);
1672                 if(gp)
1673                         return gp;
1674         }
1675 stop:
1676         // return P and block
1677         runtime_lock(&runtime_sched->lock);
1678         if(runtime_sched->gcwaiting) {
1679                 runtime_unlock(&runtime_sched->lock);
1680                 goto top;
1681         }
1682         if(runtime_sched->runqsize) {
1683                 gp = globrunqget((P*)g->m->p, 0);
1684                 runtime_unlock(&runtime_sched->lock);
1685                 return gp;
1686         }
1687         p = releasep();
1688         pidleput(p);
1689         runtime_unlock(&runtime_sched->lock);
1690         if(g->m->spinning) {
1691                 g->m->spinning = false;
1692                 runtime_xadd(&runtime_sched->nmspinning, -1);
1693         }
1694         // check all runqueues once again
1695         for(i = 0; i < runtime_gomaxprocs; i++) {
1696                 p = runtime_allp[i];
1697                 if(p && p->runqhead != p->runqtail) {
1698                         runtime_lock(&runtime_sched->lock);
1699                         p = pidleget();
1700                         runtime_unlock(&runtime_sched->lock);
1701                         if(p) {
1702                                 acquirep(p);
1703                                 goto top;
1704                         }
1705                         break;
1706                 }
1707         }
1708         // poll network
1709         if(runtime_xchg64(&runtime_sched->lastpoll, 0) != 0) {
1710                 if(g->m->p)
1711                         runtime_throw("findrunnable: netpoll with p");
1712                 if(g->m->spinning)
1713                         runtime_throw("findrunnable: netpoll with spinning");
1714                 gp = runtime_netpoll(true);  // block until new work is available
1715                 runtime_atomicstore64(&runtime_sched->lastpoll, runtime_nanotime());
1716                 if(gp) {
1717                         runtime_lock(&runtime_sched->lock);
1718                         p = pidleget();
1719                         runtime_unlock(&runtime_sched->lock);
1720                         if(p) {
1721                                 acquirep(p);
1722                                 injectglist((G*)gp->schedlink);
1723                                 gp->atomicstatus = _Grunnable;
1724                                 return gp;
1725                         }
1726                         injectglist(gp);
1727                 }
1728         }
1729         stopm();
1730         goto top;
1731 }
1732
1733 static void
1734 resetspinning(void)
1735 {
1736         int32 nmspinning;
1737
1738         if(g->m->spinning) {
1739                 g->m->spinning = false;
1740                 nmspinning = runtime_xadd(&runtime_sched->nmspinning, -1);
1741                 if(nmspinning < 0)
1742                         runtime_throw("findrunnable: negative nmspinning");
1743         } else
1744                 nmspinning = runtime_atomicload(&runtime_sched->nmspinning);
1745
1746         // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1747         // so see if we need to wakeup another P here.
1748         if (nmspinning == 0 && runtime_atomicload(&runtime_sched->npidle) > 0)
1749                 wakep();
1750 }
1751
1752 // Injects the list of runnable G's into the scheduler.
1753 // Can run concurrently with GC.
1754 static void
1755 injectglist(G *glist)
1756 {
1757         int32 n;
1758         G *gp;
1759
1760         if(glist == nil)
1761                 return;
1762         runtime_lock(&runtime_sched->lock);
1763         for(n = 0; glist; n++) {
1764                 gp = glist;
1765                 glist = (G*)gp->schedlink;
1766                 gp->atomicstatus = _Grunnable;
1767                 globrunqput(gp);
1768         }
1769         runtime_unlock(&runtime_sched->lock);
1770
1771         for(; n && runtime_sched->npidle; n--)
1772                 startm(nil, false);
1773 }
1774
1775 // One round of scheduler: find a runnable goroutine and execute it.
1776 // Never returns.
1777 static void
1778 schedule(void)
1779 {
1780         G *gp;
1781         uint32 tick;
1782
1783         if(g->m->locks)
1784                 runtime_throw("schedule: holding locks");
1785
1786 top:
1787         if(runtime_sched->gcwaiting) {
1788                 gcstopm();
1789                 goto top;
1790         }
1791
1792         gp = nil;
1793         // Check the global runnable queue once in a while to ensure fairness.
1794         // Otherwise two goroutines can completely occupy the local runqueue
1795         // by constantly respawning each other.
1796         tick = ((P*)g->m->p)->schedtick;
1797         // This is a fancy way to say tick%61==0,
1798         // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1799         if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched->runqsize > 0) {
1800                 runtime_lock(&runtime_sched->lock);
1801                 gp = globrunqget((P*)g->m->p, 1);
1802                 runtime_unlock(&runtime_sched->lock);
1803                 if(gp)
1804                         resetspinning();
1805         }
1806         if(gp == nil) {
1807                 gp = runqget((P*)g->m->p);
1808                 if(gp && g->m->spinning)
1809                         runtime_throw("schedule: spinning with local work");
1810         }
1811         if(gp == nil) {
1812                 gp = findrunnable();  // blocks until work is available
1813                 resetspinning();
1814         }
1815
1816         if(gp->lockedm) {
1817                 // Hands off own p to the locked m,
1818                 // then blocks waiting for a new p.
1819                 startlockedm(gp);
1820                 goto top;
1821         }
1822
1823         execute(gp);
1824 }
1825
1826 // Puts the current goroutine into a waiting state and calls unlockf.
1827 // If unlockf returns false, the goroutine is resumed.
1828 void
1829 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1830 {
1831         if(g->atomicstatus != _Grunning)
1832                 runtime_throw("bad g status");
1833         g->m->waitlock = lock;
1834         g->m->waitunlockf = unlockf;
1835         g->waitreason = runtime_gostringnocopy((const byte*)reason);
1836         runtime_mcall(park0);
1837 }
1838
1839 void gopark(FuncVal *, void *, String, byte, int)
1840   __asm__ ("runtime.gopark");
1841
1842 void
1843 gopark(FuncVal *unlockf, void *lock, String reason,
1844        byte traceEv __attribute__ ((unused)),
1845        int traceskip __attribute__ ((unused)))
1846 {
1847         if(g->atomicstatus != _Grunning)
1848                 runtime_throw("bad g status");
1849         g->m->waitlock = lock;
1850         g->m->waitunlockf = unlockf == nil ? nil : (void*)unlockf->fn;
1851         g->waitreason = reason;
1852         runtime_mcall(park0);
1853 }
1854
1855 static bool
1856 parkunlock(G *gp, void *lock)
1857 {
1858         USED(gp);
1859         runtime_unlock(lock);
1860         return true;
1861 }
1862
1863 // Puts the current goroutine into a waiting state and unlocks the lock.
1864 // The goroutine can be made runnable again by calling runtime_ready(gp).
1865 void
1866 runtime_parkunlock(Lock *lock, const char *reason)
1867 {
1868         runtime_park(parkunlock, lock, reason);
1869 }
1870
1871 void goparkunlock(Lock *, String, byte, int)
1872   __asm__ (GOSYM_PREFIX "runtime.goparkunlock");
1873
1874 void
1875 goparkunlock(Lock *lock, String reason, byte traceEv __attribute__ ((unused)),
1876              int traceskip __attribute__ ((unused)))
1877 {
1878         if(g->atomicstatus != _Grunning)
1879                 runtime_throw("bad g status");
1880         g->m->waitlock = lock;
1881         g->m->waitunlockf = parkunlock;
1882         g->waitreason = reason;
1883         runtime_mcall(park0);
1884 }
1885
1886 // runtime_park continuation on g0.
1887 static void
1888 park0(G *gp)
1889 {
1890         M *m;
1891         bool ok;
1892
1893         m = g->m;
1894         gp->atomicstatus = _Gwaiting;
1895         gp->m = nil;
1896         m->curg = nil;
1897         if(m->waitunlockf) {
1898                 ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock);
1899                 m->waitunlockf = nil;
1900                 m->waitlock = nil;
1901                 if(!ok) {
1902                         gp->atomicstatus = _Grunnable;
1903                         execute(gp);  // Schedule it back, never returns.
1904                 }
1905         }
1906         if(m->lockedg) {
1907                 stoplockedm();
1908                 execute(gp);  // Never returns.
1909         }
1910         schedule();
1911 }
1912
1913 // Scheduler yield.
1914 void
1915 runtime_gosched(void)
1916 {
1917         if(g->atomicstatus != _Grunning)
1918                 runtime_throw("bad g status");
1919         runtime_mcall(runtime_gosched0);
1920 }
1921
1922 // runtime_gosched continuation on g0.
1923 void
1924 runtime_gosched0(G *gp)
1925 {
1926         M *m;
1927
1928         m = g->m;
1929         gp->atomicstatus = _Grunnable;
1930         gp->m = nil;
1931         m->curg = nil;
1932         runtime_lock(&runtime_sched->lock);
1933         globrunqput(gp);
1934         runtime_unlock(&runtime_sched->lock);
1935         if(m->lockedg) {
1936                 stoplockedm();
1937                 execute(gp);  // Never returns.
1938         }
1939         schedule();
1940 }
1941
1942 // Finishes execution of the current goroutine.
1943 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1944 // Since it does not return it does not matter.  But if it is preempted
1945 // at the split stack check, GC will complain about inconsistent sp.
1946 void runtime_goexit1(void) __attribute__ ((noinline));
1947 void
1948 runtime_goexit1(void)
1949 {
1950         if(g->atomicstatus != _Grunning)
1951                 runtime_throw("bad g status");
1952         runtime_mcall(goexit0);
1953 }
1954
1955 // runtime_goexit1 continuation on g0.
1956 static void
1957 goexit0(G *gp)
1958 {
1959         M *m;
1960
1961         m = g->m;
1962         gp->atomicstatus = _Gdead;
1963         gp->entry = nil;
1964         gp->m = nil;
1965         gp->lockedm = nil;
1966         gp->paniconfault = 0;
1967         gp->_defer = nil; // should be true already but just in case.
1968         gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
1969         gp->writebuf.__values = nil;
1970         gp->writebuf.__count = 0;
1971         gp->writebuf.__capacity = 0;
1972         gp->waitreason = runtime_gostringnocopy(nil);
1973         gp->param = nil;
1974         m->curg = nil;
1975         m->lockedg = nil;
1976         if(m->locked & ~_LockExternal) {
1977                 runtime_printf("invalid m->locked = %d\n", m->locked);
1978                 runtime_throw("internal lockOSThread error");
1979         }
1980         m->locked = 0;
1981         gfput((P*)m->p, gp);
1982         schedule();
1983 }
1984
1985 // The goroutine g is about to enter a system call.
1986 // Record that it's not using the cpu anymore.
1987 // This is called only from the go syscall library and cgocall,
1988 // not from the low-level system calls used by the runtime.
1989 //
1990 // Entersyscall cannot split the stack: the runtime_gosave must
1991 // make g->sched refer to the caller's stack segment, because
1992 // entersyscall is going to return immediately after.
1993
1994 void runtime_entersyscall(int32) __attribute__ ((no_split_stack));
1995 static void doentersyscall(uintptr, uintptr)
1996   __attribute__ ((no_split_stack, noinline));
1997
1998 void
1999 runtime_entersyscall(int32 dummy __attribute__ ((unused)))
2000 {
2001         // Save the registers in the g structure so that any pointers
2002         // held in registers will be seen by the garbage collector.
2003         getcontext(ucontext_arg(&g->gcregs[0]));
2004
2005         // Do the work in a separate function, so that this function
2006         // doesn't save any registers on its own stack.  If this
2007         // function does save any registers, we might store the wrong
2008         // value in the call to getcontext.
2009         //
2010         // FIXME: This assumes that we do not need to save any
2011         // callee-saved registers to access the TLS variable g.  We
2012         // don't want to put the ucontext_t on the stack because it is
2013         // large and we can not split the stack here.
2014         doentersyscall((uintptr)runtime_getcallerpc(&dummy),
2015                        (uintptr)runtime_getcallersp(&dummy));
2016 }
2017
2018 static void
2019 doentersyscall(uintptr pc, uintptr sp)
2020 {
2021         // Disable preemption because during this function g is in _Gsyscall status,
2022         // but can have inconsistent g->sched, do not let GC observe it.
2023         g->m->locks++;
2024
2025         // Leave SP around for GC and traceback.
2026 #ifdef USING_SPLIT_STACK
2027         {
2028           size_t gcstacksize;
2029           g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2030                                          &g->gcnextsegment, &g->gcnextsp,
2031                                          &g->gcinitialsp);
2032           g->gcstacksize = (uintptr)gcstacksize;
2033         }
2034 #else
2035         {
2036                 void *v;
2037
2038                 g->gcnextsp = (byte *) &v;
2039         }
2040 #endif
2041
2042         g->syscallsp = sp;
2043         g->syscallpc = pc;
2044
2045         g->atomicstatus = _Gsyscall;
2046
2047         if(runtime_atomicload(&runtime_sched->sysmonwait)) {  // TODO: fast atomic
2048                 runtime_lock(&runtime_sched->lock);
2049                 if(runtime_atomicload(&runtime_sched->sysmonwait)) {
2050                         runtime_atomicstore(&runtime_sched->sysmonwait, 0);
2051                         runtime_notewakeup(&runtime_sched->sysmonnote);
2052                 }
2053                 runtime_unlock(&runtime_sched->lock);
2054         }
2055
2056         g->m->mcache = nil;
2057         ((P*)(g->m->p))->m = 0;
2058         runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
2059         if(runtime_atomicload(&runtime_sched->gcwaiting)) {
2060                 runtime_lock(&runtime_sched->lock);
2061                 if (runtime_sched->stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
2062                         if(--runtime_sched->stopwait == 0)
2063                                 runtime_notewakeup(&runtime_sched->stopnote);
2064                 }
2065                 runtime_unlock(&runtime_sched->lock);
2066         }
2067
2068         g->m->locks--;
2069 }
2070
2071 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
2072 void
2073 runtime_entersyscallblock(int32 dummy __attribute__ ((unused)))
2074 {
2075         P *p;
2076
2077         g->m->locks++;  // see comment in entersyscall
2078
2079         // Leave SP around for GC and traceback.
2080 #ifdef USING_SPLIT_STACK
2081         {
2082           size_t gcstacksize;
2083           g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2084                                          &g->gcnextsegment, &g->gcnextsp,
2085                                          &g->gcinitialsp);
2086           g->gcstacksize = (uintptr)gcstacksize;
2087         }
2088 #else
2089         g->gcnextsp = (byte *) &p;
2090 #endif
2091
2092         // Save the registers in the g structure so that any pointers
2093         // held in registers will be seen by the garbage collector.
2094         getcontext(ucontext_arg(&g->gcregs[0]));
2095
2096         g->syscallpc = (uintptr)runtime_getcallerpc(&dummy);
2097         g->syscallsp = (uintptr)runtime_getcallersp(&dummy);
2098
2099         g->atomicstatus = _Gsyscall;
2100
2101         p = releasep();
2102         handoffp(p);
2103         if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
2104                 incidlelocked(1);
2105
2106         g->m->locks--;
2107 }
2108
2109 // The goroutine g exited its system call.
2110 // Arrange for it to run on a cpu again.
2111 // This is called only from the go syscall library, not
2112 // from the low-level system calls used by the runtime.
2113 void
2114 runtime_exitsyscall(int32 dummy __attribute__ ((unused)))
2115 {
2116         G *gp;
2117
2118         gp = g;
2119         gp->m->locks++;  // see comment in entersyscall
2120
2121         if(gp->isbackground)  // do not consider blocked scavenger for deadlock detection
2122                 incidlelocked(-1);
2123
2124         gp->waitsince = 0;
2125         if(exitsyscallfast()) {
2126                 // There's a cpu for us, so we can run.
2127                 ((P*)gp->m->p)->syscalltick++;
2128                 gp->atomicstatus = _Grunning;
2129                 // Garbage collector isn't running (since we are),
2130                 // so okay to clear gcstack and gcsp.
2131 #ifdef USING_SPLIT_STACK
2132                 gp->gcstack = nil;
2133 #endif
2134                 gp->gcnextsp = nil;
2135                 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2136                 gp->syscallsp = 0;
2137                 gp->m->locks--;
2138                 return;
2139         }
2140
2141         gp->m->locks--;
2142
2143         // Call the scheduler.
2144         runtime_mcall(exitsyscall0);
2145
2146         // Scheduler returned, so we're allowed to run now.
2147         // Delete the gcstack information that we left for
2148         // the garbage collector during the system call.
2149         // Must wait until now because until gosched returns
2150         // we don't know for sure that the garbage collector
2151         // is not running.
2152 #ifdef USING_SPLIT_STACK
2153         gp->gcstack = nil;
2154 #endif
2155         gp->gcnextsp = nil;
2156         runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2157
2158         gp->syscallsp = 0;
2159
2160         // Note that this gp->m might be different than the earlier
2161         // gp->m after returning from runtime_mcall.
2162         ((P*)gp->m->p)->syscalltick++;
2163 }
2164
2165 static bool
2166 exitsyscallfast(void)
2167 {
2168         G *gp;
2169         P *p;
2170
2171         gp = g;
2172
2173         // Freezetheworld sets stopwait but does not retake P's.
2174         if(runtime_sched->stopwait) {
2175                 gp->m->p = 0;
2176                 return false;
2177         }
2178
2179         // Try to re-acquire the last P.
2180         if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) {
2181                 // There's a cpu for us, so we can run.
2182                 gp->m->mcache = ((P*)gp->m->p)->mcache;
2183                 ((P*)gp->m->p)->m = (uintptr)gp->m;
2184                 return true;
2185         }
2186         // Try to get any other idle P.
2187         gp->m->p = 0;
2188         if(runtime_sched->pidle) {
2189                 runtime_lock(&runtime_sched->lock);
2190                 p = pidleget();
2191                 if(p && runtime_atomicload(&runtime_sched->sysmonwait)) {
2192                         runtime_atomicstore(&runtime_sched->sysmonwait, 0);
2193                         runtime_notewakeup(&runtime_sched->sysmonnote);
2194                 }
2195                 runtime_unlock(&runtime_sched->lock);
2196                 if(p) {
2197                         acquirep(p);
2198                         return true;
2199                 }
2200         }
2201         return false;
2202 }
2203
2204 // runtime_exitsyscall slow path on g0.
2205 // Failed to acquire P, enqueue gp as runnable.
2206 static void
2207 exitsyscall0(G *gp)
2208 {
2209         M *m;
2210         P *p;
2211
2212         m = g->m;
2213         gp->atomicstatus = _Grunnable;
2214         gp->m = nil;
2215         m->curg = nil;
2216         runtime_lock(&runtime_sched->lock);
2217         p = pidleget();
2218         if(p == nil)
2219                 globrunqput(gp);
2220         else if(runtime_atomicload(&runtime_sched->sysmonwait)) {
2221                 runtime_atomicstore(&runtime_sched->sysmonwait, 0);
2222                 runtime_notewakeup(&runtime_sched->sysmonnote);
2223         }
2224         runtime_unlock(&runtime_sched->lock);
2225         if(p) {
2226                 acquirep(p);
2227                 execute(gp);  // Never returns.
2228         }
2229         if(m->lockedg) {
2230                 // Wait until another thread schedules gp and so m again.
2231                 stoplockedm();
2232                 execute(gp);  // Never returns.
2233         }
2234         stopm();
2235         schedule();  // Never returns.
2236 }
2237
2238 void syscall_entersyscall(void)
2239   __asm__(GOSYM_PREFIX "syscall.Entersyscall");
2240
2241 void syscall_entersyscall(void) __attribute__ ((no_split_stack));
2242
2243 void
2244 syscall_entersyscall()
2245 {
2246   runtime_entersyscall(0);
2247 }
2248
2249 void syscall_exitsyscall(void)
2250   __asm__(GOSYM_PREFIX "syscall.Exitsyscall");
2251
2252 void syscall_exitsyscall(void) __attribute__ ((no_split_stack));
2253
2254 void
2255 syscall_exitsyscall()
2256 {
2257   runtime_exitsyscall(0);
2258 }
2259
2260 // Called from syscall package before fork.
2261 void syscall_runtime_BeforeFork(void)
2262   __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2263 void
2264 syscall_runtime_BeforeFork(void)
2265 {
2266         // Fork can hang if preempted with signals frequently enough (see issue 5517).
2267         // Ensure that we stay on the same M where we disable profiling.
2268         runtime_m()->locks++;
2269         if(runtime_m()->profilehz != 0)
2270                 runtime_resetcpuprofiler(0);
2271 }
2272
2273 // Called from syscall package after fork in parent.
2274 void syscall_runtime_AfterFork(void)
2275   __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2276 void
2277 syscall_runtime_AfterFork(void)
2278 {
2279         int32 hz;
2280
2281         hz = runtime_sched->profilehz;
2282         if(hz != 0)
2283                 runtime_resetcpuprofiler(hz);
2284         runtime_m()->locks--;
2285 }
2286
2287 // Allocate a new g, with a stack big enough for stacksize bytes.
2288 G*
2289 runtime_malg(int32 stacksize, byte** ret_stack, uintptr* ret_stacksize)
2290 {
2291         G *newg;
2292
2293         newg = allocg();
2294         if(stacksize >= 0) {
2295 #if USING_SPLIT_STACK
2296                 int dont_block_signals = 0;
2297                 size_t ss_stacksize;
2298
2299                 *ret_stack = __splitstack_makecontext(stacksize,
2300                                                       &newg->stackcontext[0],
2301                                                       &ss_stacksize);
2302                 *ret_stacksize = (uintptr)ss_stacksize;
2303                 __splitstack_block_signals_context(&newg->stackcontext[0],
2304                                                    &dont_block_signals, nil);
2305 #else
2306                 // In 64-bit mode, the maximum Go allocation space is
2307                 // 128G.  Our stack size is 4M, which only permits 32K
2308                 // goroutines.  In order to not limit ourselves,
2309                 // allocate the stacks out of separate memory.  In
2310                 // 32-bit mode, the Go allocation space is all of
2311                 // memory anyhow.
2312                 if(sizeof(void*) == 8) {
2313                         void *p = runtime_SysAlloc(stacksize, &mstats()->other_sys);
2314                         if(p == nil)
2315                                 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2316                         *ret_stack = (byte*)p;
2317                 } else {
2318                         *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2319                         runtime_xadd(&runtime_stacks_sys, stacksize);
2320                 }
2321                 *ret_stacksize = (uintptr)stacksize;
2322                 newg->gcinitialsp = *ret_stack;
2323                 newg->gcstacksize = (uintptr)stacksize;
2324 #endif
2325         }
2326         return newg;
2327 }
2328
2329 G*
2330 __go_go(void (*fn)(void*), void* arg)
2331 {
2332         byte *sp;
2333         size_t spsize;
2334         G *newg;
2335         P *p;
2336
2337 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2338         if(fn == nil) {
2339                 g->m->throwing = -1;  // do not dump full stacks
2340                 runtime_throw("go of nil func value");
2341         }
2342         g->m->locks++;  // disable preemption because it can be holding p in a local var
2343
2344         p = (P*)g->m->p;
2345         if((newg = gfget(p)) != nil) {
2346 #ifdef USING_SPLIT_STACK
2347                 int dont_block_signals = 0;
2348
2349                 sp = __splitstack_resetcontext(&newg->stackcontext[0],
2350                                                &spsize);
2351                 __splitstack_block_signals_context(&newg->stackcontext[0],
2352                                                    &dont_block_signals, nil);
2353 #else
2354                 sp = newg->gcinitialsp;
2355                 spsize = newg->gcstacksize;
2356                 if(spsize == 0)
2357                         runtime_throw("bad spsize in __go_go");
2358                 newg->gcnextsp = sp;
2359 #endif
2360         } else {
2361                 uintptr malsize;
2362
2363                 newg = runtime_malg(StackMin, &sp, &malsize);
2364                 spsize = (size_t)malsize;
2365                 allgadd(newg);
2366         }
2367
2368         newg->entry = (byte*)fn;
2369         newg->param = arg;
2370         newg->gopc = (uintptr)__builtin_return_address(0);
2371         newg->atomicstatus = _Grunnable;
2372         if(p->goidcache == p->goidcacheend) {
2373                 p->goidcache = runtime_xadd64(&runtime_sched->goidgen, GoidCacheBatch);
2374                 p->goidcacheend = p->goidcache + GoidCacheBatch;
2375         }
2376         newg->goid = p->goidcache++;
2377
2378         {
2379                 // Avoid warnings about variables clobbered by
2380                 // longjmp.
2381                 byte * volatile vsp = sp;
2382                 size_t volatile vspsize = spsize;
2383                 G * volatile vnewg = newg;
2384                 ucontext_t * volatile uc;
2385
2386                 uc = ucontext_arg(&vnewg->context[0]);
2387                 getcontext(uc);
2388                 uc->uc_stack.ss_sp = vsp;
2389                 uc->uc_stack.ss_size = vspsize;
2390                 makecontext(uc, kickoff, 0);
2391
2392                 runqput(p, vnewg);
2393
2394                 if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
2395                         wakep();
2396                 g->m->locks--;
2397                 return vnewg;
2398         }
2399 }
2400
2401 static void
2402 allgadd(G *gp)
2403 {
2404         G **new;
2405         uintptr cap;
2406
2407         runtime_lock(&allglock);
2408         if(runtime_allglen >= allgcap) {
2409                 cap = 4096/sizeof(new[0]);
2410                 if(cap < 2*allgcap)
2411                         cap = 2*allgcap;
2412                 new = runtime_malloc(cap*sizeof(new[0]));
2413                 if(new == nil)
2414                         runtime_throw("runtime: cannot allocate memory");
2415                 if(runtime_allg != nil) {
2416                         runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
2417                         runtime_free(runtime_allg);
2418                 }
2419                 runtime_allg = new;
2420                 allgcap = cap;
2421         }
2422         runtime_allg[runtime_allglen++] = gp;
2423         runtime_unlock(&allglock);
2424 }
2425
2426 // Put on gfree list.
2427 // If local list is too long, transfer a batch to the global list.
2428 static void
2429 gfput(P *p, G *gp)
2430 {
2431         gp->schedlink = (uintptr)p->gfree;
2432         p->gfree = gp;
2433         p->gfreecnt++;
2434         if(p->gfreecnt >= 64) {
2435                 runtime_lock(&runtime_sched->gflock);
2436                 while(p->gfreecnt >= 32) {
2437                         p->gfreecnt--;
2438                         gp = p->gfree;
2439                         p->gfree = (G*)gp->schedlink;
2440                         gp->schedlink = (uintptr)runtime_sched->gfree;
2441                         runtime_sched->gfree = gp;
2442                 }
2443                 runtime_unlock(&runtime_sched->gflock);
2444         }
2445 }
2446
2447 // Get from gfree list.
2448 // If local list is empty, grab a batch from global list.
2449 static G*
2450 gfget(P *p)
2451 {
2452         G *gp;
2453
2454 retry:
2455         gp = p->gfree;
2456         if(gp == nil && runtime_sched->gfree) {
2457                 runtime_lock(&runtime_sched->gflock);
2458                 while(p->gfreecnt < 32 && runtime_sched->gfree) {
2459                         p->gfreecnt++;
2460                         gp = runtime_sched->gfree;
2461                         runtime_sched->gfree = (G*)gp->schedlink;
2462                         gp->schedlink = (uintptr)p->gfree;
2463                         p->gfree = gp;
2464                 }
2465                 runtime_unlock(&runtime_sched->gflock);
2466                 goto retry;
2467         }
2468         if(gp) {
2469                 p->gfree = (G*)gp->schedlink;
2470                 p->gfreecnt--;
2471         }
2472         return gp;
2473 }
2474
2475 // Purge all cached G's from gfree list to the global list.
2476 static void
2477 gfpurge(P *p)
2478 {
2479         G *gp;
2480
2481         runtime_lock(&runtime_sched->gflock);
2482         while(p->gfreecnt) {
2483                 p->gfreecnt--;
2484                 gp = p->gfree;
2485                 p->gfree = (G*)gp->schedlink;
2486                 gp->schedlink = (uintptr)runtime_sched->gfree;
2487                 runtime_sched->gfree = gp;
2488         }
2489         runtime_unlock(&runtime_sched->gflock);
2490 }
2491
2492 void
2493 runtime_Breakpoint(void)
2494 {
2495         runtime_breakpoint();
2496 }
2497
2498 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2499
2500 void
2501 runtime_Gosched(void)
2502 {
2503         runtime_gosched();
2504 }
2505
2506 // Implementation of runtime.GOMAXPROCS.
2507 // delete when scheduler is even stronger
2508
2509 intgo runtime_GOMAXPROCS(intgo)
2510   __asm__(GOSYM_PREFIX "runtime.GOMAXPROCS");
2511
2512 intgo
2513 runtime_GOMAXPROCS(intgo n)
2514 {
2515         intgo ret;
2516
2517         if(n > _MaxGomaxprocs)
2518                 n = _MaxGomaxprocs;
2519         runtime_lock(&runtime_sched->lock);
2520         ret = (intgo)runtime_gomaxprocs;
2521         if(n <= 0 || n == ret) {
2522                 runtime_unlock(&runtime_sched->lock);
2523                 return ret;
2524         }
2525         runtime_unlock(&runtime_sched->lock);
2526
2527         runtime_acquireWorldsema();
2528         g->m->gcing = 1;
2529         runtime_stopTheWorldWithSema();
2530         newprocs = (int32)n;
2531         g->m->gcing = 0;
2532         runtime_releaseWorldsema();
2533         runtime_startTheWorldWithSema();
2534
2535         return ret;
2536 }
2537
2538 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2539 // after they modify m->locked. Do not allow preemption during this call,
2540 // or else the m might be different in this function than in the caller.
2541 static void
2542 lockOSThread(void)
2543 {
2544         g->m->lockedg = g;
2545         g->lockedm = g->m;
2546 }
2547
2548 void    runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2549 void
2550 runtime_LockOSThread(void)
2551 {
2552         g->m->locked |= _LockExternal;
2553         lockOSThread();
2554 }
2555
2556 void
2557 runtime_lockOSThread(void)
2558 {
2559         g->m->locked += _LockInternal;
2560         lockOSThread();
2561 }
2562
2563
2564 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2565 // after they update m->locked. Do not allow preemption during this call,
2566 // or else the m might be in different in this function than in the caller.
2567 static void
2568 unlockOSThread(void)
2569 {
2570         if(g->m->locked != 0)
2571                 return;
2572         g->m->lockedg = nil;
2573         g->lockedm = nil;
2574 }
2575
2576 void    runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2577
2578 void
2579 runtime_UnlockOSThread(void)
2580 {
2581         g->m->locked &= ~_LockExternal;
2582         unlockOSThread();
2583 }
2584
2585 void
2586 runtime_unlockOSThread(void)
2587 {
2588         if(g->m->locked < _LockInternal)
2589                 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2590         g->m->locked -= _LockInternal;
2591         unlockOSThread();
2592 }
2593
2594 bool
2595 runtime_lockedOSThread(void)
2596 {
2597         return g->lockedm != nil && g->m->lockedg != nil;
2598 }
2599
2600 int32
2601 runtime_gcount(void)
2602 {
2603         G *gp;
2604         int32 n, s;
2605         uintptr i;
2606
2607         n = 0;
2608         runtime_lock(&allglock);
2609         // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2610         // We do not want to increment/decrement centralized counter in newproc/goexit,
2611         // just to make runtime.NumGoroutine() faster.
2612         // Compromise solution is to introduce per-P counters of active goroutines.
2613         for(i = 0; i < runtime_allglen; i++) {
2614                 gp = runtime_allg[i];
2615                 s = gp->atomicstatus;
2616                 if(s == _Grunnable || s == _Grunning || s == _Gsyscall || s == _Gwaiting)
2617                         n++;
2618         }
2619         runtime_unlock(&allglock);
2620         return n;
2621 }
2622
2623 int32
2624 runtime_mcount(void)
2625 {
2626         return runtime_sched->mcount;
2627 }
2628
2629 static struct {
2630         uint32 lock;
2631         int32 hz;
2632 } prof;
2633
2634 static void System(void) {}
2635 static void GC(void) {}
2636
2637 // Called if we receive a SIGPROF signal.
2638 void
2639 runtime_sigprof()
2640 {
2641         M *mp = g->m;
2642         int32 n, i;
2643         bool traceback;
2644         uintptr pcbuf[TracebackMaxFrames];
2645         Location locbuf[TracebackMaxFrames];
2646         Slice stk;
2647
2648         if(prof.hz == 0)
2649                 return;
2650
2651         if(mp == nil)
2652                 return;
2653
2654         // Profiling runs concurrently with GC, so it must not allocate.
2655         mp->mallocing++;
2656
2657         traceback = true;
2658
2659         if(mp->mcache == nil)
2660                 traceback = false;
2661
2662         n = 0;
2663
2664         if(runtime_atomicload(&runtime_in_callers) > 0) {
2665                 // If SIGPROF arrived while already fetching runtime
2666                 // callers we can have trouble on older systems
2667                 // because the unwind library calls dl_iterate_phdr
2668                 // which was not recursive in the past.
2669                 traceback = false;
2670         }
2671
2672         if(traceback) {
2673                 n = runtime_callers(0, locbuf, nelem(locbuf), false);
2674                 for(i = 0; i < n; i++)
2675                         pcbuf[i] = locbuf[i].pc;
2676         }
2677         if(!traceback || n <= 0) {
2678                 n = 2;
2679                 pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2680                 if(mp->gcing || mp->helpgc)
2681                         pcbuf[1] = (uintptr)GC;
2682                 else
2683                         pcbuf[1] = (uintptr)System;
2684         }
2685
2686         if (prof.hz != 0) {
2687                 stk.__values = &pcbuf[0];
2688                 stk.__count = n;
2689                 stk.__capacity = n;
2690
2691                 // Simple cas-lock to coordinate with setcpuprofilerate.
2692                 while (!runtime_cas(&prof.lock, 0, 1)) {
2693                         runtime_osyield();
2694                 }
2695                 if (prof.hz != 0) {
2696                         runtime_cpuprofAdd(stk);
2697                 }
2698                 runtime_atomicstore(&prof.lock, 0);
2699         }
2700
2701         mp->mallocing--;
2702 }
2703
2704 // Arrange to call fn with a traceback hz times a second.
2705 void
2706 runtime_setcpuprofilerate_m(int32 hz)
2707 {
2708         // Force sane arguments.
2709         if(hz < 0)
2710                 hz = 0;
2711
2712         // Disable preemption, otherwise we can be rescheduled to another thread
2713         // that has profiling enabled.
2714         g->m->locks++;
2715
2716         // Stop profiler on this thread so that it is safe to lock prof.
2717         // if a profiling signal came in while we had prof locked,
2718         // it would deadlock.
2719         runtime_resetcpuprofiler(0);
2720
2721         while (!runtime_cas(&prof.lock, 0, 1)) {
2722                 runtime_osyield();
2723         }
2724         prof.hz = hz;
2725         runtime_atomicstore(&prof.lock, 0);
2726
2727         runtime_lock(&runtime_sched->lock);
2728         runtime_sched->profilehz = hz;
2729         runtime_unlock(&runtime_sched->lock);
2730
2731         if(hz != 0)
2732                 runtime_resetcpuprofiler(hz);
2733
2734         g->m->locks--;
2735 }
2736
2737 // Change number of processors.  The world is stopped, sched is locked.
2738 static void
2739 procresize(int32 new)
2740 {
2741         int32 i, old;
2742         bool pempty;
2743         G *gp;
2744         P *p;
2745         intgo j;
2746
2747         old = runtime_gomaxprocs;
2748         if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs)
2749                 runtime_throw("procresize: invalid arg");
2750         // initialize new P's
2751         for(i = 0; i < new; i++) {
2752                 p = runtime_allp[i];
2753                 if(p == nil) {
2754                         p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2755                         p->id = i;
2756                         p->status = _Pgcstop;
2757                         p->deferpool.__values = &p->deferpoolbuf[0];
2758                         p->deferpool.__count = 0;
2759                         p->deferpool.__capacity = nelem(p->deferpoolbuf);
2760                         runtime_atomicstorep(&runtime_allp[i], p);
2761                 }
2762                 if(p->mcache == nil) {
2763                         if(old==0 && i==0)
2764                                 p->mcache = g->m->mcache;  // bootstrap
2765                         else
2766                                 p->mcache = runtime_allocmcache();
2767                 }
2768         }
2769
2770         // redistribute runnable G's evenly
2771         // collect all runnable goroutines in global queue preserving FIFO order
2772         // FIFO order is required to ensure fairness even during frequent GCs
2773         // see http://golang.org/issue/7126
2774         pempty = false;
2775         while(!pempty) {
2776                 pempty = true;
2777                 for(i = 0; i < old; i++) {
2778                         p = runtime_allp[i];
2779                         if(p->runqhead == p->runqtail)
2780                                 continue;
2781                         pempty = false;
2782                         // pop from tail of local queue
2783                         p->runqtail--;
2784                         gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
2785                         // push onto head of global queue
2786                         gp->schedlink = runtime_sched->runqhead;
2787                         runtime_sched->runqhead = (uintptr)gp;
2788                         if(runtime_sched->runqtail == 0)
2789                                 runtime_sched->runqtail = (uintptr)gp;
2790                         runtime_sched->runqsize++;
2791                 }
2792         }
2793         // fill local queues with at most nelem(p->runq)/2 goroutines
2794         // start at 1 because current M already executes some G and will acquire allp[0] below,
2795         // so if we have a spare G we want to put it into allp[1].
2796         for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched->runqsize > 0; i++) {
2797                 gp = (G*)runtime_sched->runqhead;
2798                 runtime_sched->runqhead = gp->schedlink;
2799                 if(runtime_sched->runqhead == 0)
2800                         runtime_sched->runqtail = 0;
2801                 runtime_sched->runqsize--;
2802                 runqput(runtime_allp[i%new], gp);
2803         }
2804
2805         // free unused P's
2806         for(i = new; i < old; i++) {
2807                 p = runtime_allp[i];
2808                 for(j = 0; j < p->deferpool.__count; j++) {
2809                         ((struct _defer**)p->deferpool.__values)[j] = nil;
2810                 }
2811                 p->deferpool.__count = 0;
2812                 runtime_freemcache(p->mcache);
2813                 p->mcache = nil;
2814                 gfpurge(p);
2815                 p->status = _Pdead;
2816                 // can't free P itself because it can be referenced by an M in syscall
2817         }
2818
2819         if(g->m->p)
2820                 ((P*)g->m->p)->m = 0;
2821         g->m->p = 0;
2822         g->m->mcache = nil;
2823         p = runtime_allp[0];
2824         p->m = 0;
2825         p->status = _Pidle;
2826         acquirep(p);
2827         for(i = new-1; i > 0; i--) {
2828                 p = runtime_allp[i];
2829                 p->status = _Pidle;
2830                 pidleput(p);
2831         }
2832         runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2833 }
2834
2835 // Associate p and the current m.
2836 static void
2837 acquirep(P *p)
2838 {
2839         M *m;
2840
2841         m = g->m;
2842         if(m->p || m->mcache)
2843                 runtime_throw("acquirep: already in go");
2844         if(p->m || p->status != _Pidle) {
2845                 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status);
2846                 runtime_throw("acquirep: invalid p state");
2847         }
2848         m->mcache = p->mcache;
2849         m->p = (uintptr)p;
2850         p->m = (uintptr)m;
2851         p->status = _Prunning;
2852 }
2853
2854 // Disassociate p and the current m.
2855 static P*
2856 releasep(void)
2857 {
2858         M *m;
2859         P *p;
2860
2861         m = g->m;
2862         if(m->p == 0 || m->mcache == nil)
2863                 runtime_throw("releasep: invalid arg");
2864         p = (P*)m->p;
2865         if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) {
2866                 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2867                         m, m->p, p->m, m->mcache, p->mcache, p->status);
2868                 runtime_throw("releasep: invalid p state");
2869         }
2870         m->p = 0;
2871         m->mcache = nil;
2872         p->m = 0;
2873         p->status = _Pidle;
2874         return p;
2875 }
2876
2877 static void
2878 incidlelocked(int32 v)
2879 {
2880         runtime_lock(&runtime_sched->lock);
2881         runtime_sched->nmidlelocked += v;
2882         if(v > 0)
2883                 checkdead();
2884         runtime_unlock(&runtime_sched->lock);
2885 }
2886
2887 // Check for deadlock situation.
2888 // The check is based on number of running M's, if 0 -> deadlock.
2889 static void
2890 checkdead(void)
2891 {
2892         G *gp;
2893         int32 run, grunning, s;
2894         uintptr i;
2895
2896         // For -buildmode=c-shared or -buildmode=c-archive it's OK if
2897         // there are no running goroutines.  The calling program is
2898         // assumed to be running.
2899         if(runtime_isarchive) {
2900                 return;
2901         }
2902
2903         // -1 for sysmon
2904         run = runtime_sched->mcount - runtime_sched->nmidle - runtime_sched->nmidlelocked - 1 - countextra();
2905         if(run > 0)
2906                 return;
2907         // If we are dying because of a signal caught on an already idle thread,
2908         // freezetheworld will cause all running threads to block.
2909         // And runtime will essentially enter into deadlock state,
2910         // except that there is a thread that will call runtime_exit soon.
2911         if(runtime_panicking() > 0)
2912                 return;
2913         if(run < 0) {
2914                 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2915                         runtime_sched->nmidle, runtime_sched->nmidlelocked, runtime_sched->mcount);
2916                 runtime_throw("checkdead: inconsistent counts");
2917         }
2918         grunning = 0;
2919         runtime_lock(&allglock);
2920         for(i = 0; i < runtime_allglen; i++) {
2921                 gp = runtime_allg[i];
2922                 if(gp->isbackground)
2923                         continue;
2924                 s = gp->atomicstatus;
2925                 if(s == _Gwaiting)
2926                         grunning++;
2927                 else if(s == _Grunnable || s == _Grunning || s == _Gsyscall) {
2928                         runtime_unlock(&allglock);
2929                         runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
2930                         runtime_throw("checkdead: runnable g");
2931                 }
2932         }
2933         runtime_unlock(&allglock);
2934         if(grunning == 0)  // possible if main goroutine calls runtime_Goexit()
2935                 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
2936         g->m->throwing = -1;  // do not dump full stacks
2937         runtime_throw("all goroutines are asleep - deadlock!");
2938 }
2939
2940 static void
2941 sysmon(void)
2942 {
2943         uint32 idle, delay;
2944         int64 now, lastpoll, lasttrace;
2945         G *gp;
2946
2947         lasttrace = 0;
2948         idle = 0;  // how many cycles in succession we had not wokeup somebody
2949         delay = 0;
2950         for(;;) {
2951                 if(idle == 0)  // start with 20us sleep...
2952                         delay = 20;
2953                 else if(idle > 50)  // start doubling the sleep after 1ms...
2954                         delay *= 2;
2955                 if(delay > 10*1000)  // up to 10ms
2956                         delay = 10*1000;
2957                 runtime_usleep(delay);
2958                 if(runtime_debug.schedtrace <= 0 &&
2959                         (runtime_sched->gcwaiting || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs)) {  // TODO: fast atomic
2960                         runtime_lock(&runtime_sched->lock);
2961                         if(runtime_atomicload(&runtime_sched->gcwaiting) || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs) {
2962                                 runtime_atomicstore(&runtime_sched->sysmonwait, 1);
2963                                 runtime_unlock(&runtime_sched->lock);
2964                                 runtime_notesleep(&runtime_sched->sysmonnote);
2965                                 runtime_noteclear(&runtime_sched->sysmonnote);
2966                                 idle = 0;
2967                                 delay = 20;
2968                         } else
2969                                 runtime_unlock(&runtime_sched->lock);
2970                 }
2971                 // poll network if not polled for more than 10ms
2972                 lastpoll = runtime_atomicload64(&runtime_sched->lastpoll);
2973                 now = runtime_nanotime();
2974                 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2975                         runtime_cas64(&runtime_sched->lastpoll, lastpoll, now);
2976                         gp = runtime_netpoll(false);  // non-blocking
2977                         if(gp) {
2978                                 // Need to decrement number of idle locked M's
2979                                 // (pretending that one more is running) before injectglist.
2980                                 // Otherwise it can lead to the following situation:
2981                                 // injectglist grabs all P's but before it starts M's to run the P's,
2982                                 // another M returns from syscall, finishes running its G,
2983                                 // observes that there is no work to do and no other running M's
2984                                 // and reports deadlock.
2985                                 incidlelocked(-1);
2986                                 injectglist(gp);
2987                                 incidlelocked(1);
2988                         }
2989                 }
2990                 // retake P's blocked in syscalls
2991                 // and preempt long running G's
2992                 if(retake(now))
2993                         idle = 0;
2994                 else
2995                         idle++;
2996
2997                 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
2998                         lasttrace = now;
2999                         runtime_schedtrace(runtime_debug.scheddetail);
3000                 }
3001         }
3002 }
3003
3004 typedef struct Pdesc Pdesc;
3005 struct Pdesc
3006 {
3007         uint32  schedtick;
3008         int64   schedwhen;
3009         uint32  syscalltick;
3010         int64   syscallwhen;
3011 };
3012 static Pdesc pdesc[_MaxGomaxprocs];
3013
3014 static uint32
3015 retake(int64 now)
3016 {
3017         uint32 i, s, n;
3018         int64 t;
3019         P *p;
3020         Pdesc *pd;
3021
3022         n = 0;
3023         for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
3024                 p = runtime_allp[i];
3025                 if(p==nil)
3026                         continue;
3027                 pd = &pdesc[i];
3028                 s = p->status;
3029                 if(s == _Psyscall) {
3030                         // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
3031                         t = p->syscalltick;
3032                         if(pd->syscalltick != t) {
3033                                 pd->syscalltick = t;
3034                                 pd->syscallwhen = now;
3035                                 continue;
3036                         }
3037                         // On the one hand we don't want to retake Ps if there is no other work to do,
3038                         // but on the other hand we want to retake them eventually
3039                         // because they can prevent the sysmon thread from deep sleep.
3040                         if(p->runqhead == p->runqtail &&
3041                                 runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) > 0 &&
3042                                 pd->syscallwhen + 10*1000*1000 > now)
3043                                 continue;
3044                         // Need to decrement number of idle locked M's
3045                         // (pretending that one more is running) before the CAS.
3046                         // Otherwise the M from which we retake can exit the syscall,
3047                         // increment nmidle and report deadlock.
3048                         incidlelocked(-1);
3049                         if(runtime_cas(&p->status, s, _Pidle)) {
3050                                 n++;
3051                                 handoffp(p);
3052                         }
3053                         incidlelocked(1);
3054                 } else if(s == _Prunning) {
3055                         // Preempt G if it's running for more than 10ms.
3056                         t = p->schedtick;
3057                         if(pd->schedtick != t) {
3058                                 pd->schedtick = t;
3059                                 pd->schedwhen = now;
3060                                 continue;
3061                         }
3062                         if(pd->schedwhen + 10*1000*1000 > now)
3063                                 continue;
3064                         // preemptone(p);
3065                 }
3066         }
3067         return n;
3068 }
3069
3070 // Tell all goroutines that they have been preempted and they should stop.
3071 // This function is purely best-effort.  It can fail to inform a goroutine if a
3072 // processor just started running it.
3073 // No locks need to be held.
3074 // Returns true if preemption request was issued to at least one goroutine.
3075 static bool
3076 preemptall(void)
3077 {
3078         return false;
3079 }
3080
3081 void
3082 runtime_schedtrace(bool detailed)
3083 {
3084         static int64 starttime;
3085         int64 now;
3086         int64 id1, id2, id3;
3087         int32 i, t, h;
3088         uintptr gi;
3089         const char *fmt;
3090         M *mp, *lockedm;
3091         G *gp, *lockedg;
3092         P *p;
3093
3094         now = runtime_nanotime();
3095         if(starttime == 0)
3096                 starttime = now;
3097
3098         runtime_lock(&runtime_sched->lock);
3099         runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
3100                 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched->npidle, runtime_sched->mcount,
3101                 runtime_sched->nmidle, runtime_sched->runqsize);
3102         if(detailed) {
3103                 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
3104                         runtime_sched->gcwaiting, runtime_sched->nmidlelocked, runtime_sched->nmspinning,
3105                         runtime_sched->stopwait, runtime_sched->sysmonwait);
3106         }
3107         // We must be careful while reading data from P's, M's and G's.
3108         // Even if we hold schedlock, most data can be changed concurrently.
3109         // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
3110         for(i = 0; i < runtime_gomaxprocs; i++) {
3111                 p = runtime_allp[i];
3112                 if(p == nil)
3113                         continue;
3114                 mp = (M*)p->m;
3115                 h = runtime_atomicload(&p->runqhead);
3116                 t = runtime_atomicload(&p->runqtail);
3117                 if(detailed)
3118                         runtime_printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
3119                                 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
3120                 else {
3121                         // In non-detailed mode format lengths of per-P run queues as:
3122                         // [len1 len2 len3 len4]
3123                         fmt = " %d";
3124                         if(runtime_gomaxprocs == 1)
3125                                 fmt = " [%d]\n";
3126                         else if(i == 0)
3127                                 fmt = " [%d";
3128                         else if(i == runtime_gomaxprocs-1)
3129                                 fmt = " %d]\n";
3130                         runtime_printf(fmt, t-h);
3131                 }
3132         }
3133         if(!detailed) {
3134                 runtime_unlock(&runtime_sched->lock);
3135                 return;
3136         }
3137         for(mp = runtime_allm; mp; mp = mp->alllink) {
3138                 p = (P*)mp->p;
3139                 gp = mp->curg;
3140                 lockedg = mp->lockedg;
3141                 id1 = -1;
3142                 if(p)
3143                         id1 = p->id;
3144                 id2 = -1;
3145                 if(gp)
3146                         id2 = gp->goid;
3147                 id3 = -1;
3148                 if(lockedg)
3149                         id3 = lockedg->goid;
3150                 runtime_printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
3151                         " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
3152                         mp->id, id1, id2,
3153                         mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
3154                         mp->spinning, mp->blocked, id3);
3155         }
3156         runtime_lock(&allglock);
3157         for(gi = 0; gi < runtime_allglen; gi++) {
3158                 gp = runtime_allg[gi];
3159                 mp = gp->m;
3160                 lockedm = gp->lockedm;
3161                 runtime_printf("  G%D: status=%d(%S) m=%d lockedm=%d\n",
3162                         gp->goid, gp->atomicstatus, gp->waitreason, mp ? mp->id : -1,
3163                         lockedm ? lockedm->id : -1);
3164         }
3165         runtime_unlock(&allglock);
3166         runtime_unlock(&runtime_sched->lock);
3167 }
3168
3169 // Put mp on midle list.
3170 // Sched must be locked.
3171 static void
3172 mput(M *mp)
3173 {
3174         mp->schedlink = runtime_sched->midle;
3175         runtime_sched->midle = (uintptr)mp;
3176         runtime_sched->nmidle++;
3177         checkdead();
3178 }
3179
3180 // Try to get an m from midle list.
3181 // Sched must be locked.
3182 static M*
3183 mget(void)
3184 {
3185         M *mp;
3186
3187         if((mp = (M*)runtime_sched->midle) != nil){
3188                 runtime_sched->midle = mp->schedlink;
3189                 runtime_sched->nmidle--;
3190         }
3191         return mp;
3192 }
3193
3194 // Put gp on the global runnable queue.
3195 // Sched must be locked.
3196 static void
3197 globrunqput(G *gp)
3198 {
3199         gp->schedlink = 0;
3200         if(runtime_sched->runqtail)
3201                 ((G*)runtime_sched->runqtail)->schedlink = (uintptr)gp;
3202         else
3203                 runtime_sched->runqhead = (uintptr)gp;
3204         runtime_sched->runqtail = (uintptr)gp;
3205         runtime_sched->runqsize++;
3206 }
3207
3208 // Put a batch of runnable goroutines on the global runnable queue.
3209 // Sched must be locked.
3210 static void
3211 globrunqputbatch(G *ghead, G *gtail, int32 n)
3212 {
3213         gtail->schedlink = 0;
3214         if(runtime_sched->runqtail)
3215                 ((G*)runtime_sched->runqtail)->schedlink = (uintptr)ghead;
3216         else
3217                 runtime_sched->runqhead = (uintptr)ghead;
3218         runtime_sched->runqtail = (uintptr)gtail;
3219         runtime_sched->runqsize += n;
3220 }
3221
3222 // Try get a batch of G's from the global runnable queue.
3223 // Sched must be locked.
3224 static G*
3225 globrunqget(P *p, int32 max)
3226 {
3227         G *gp, *gp1;
3228         int32 n;
3229
3230         if(runtime_sched->runqsize == 0)
3231                 return nil;
3232         n = runtime_sched->runqsize/runtime_gomaxprocs+1;
3233         if(n > runtime_sched->runqsize)
3234                 n = runtime_sched->runqsize;
3235         if(max > 0 && n > max)
3236                 n = max;
3237         if((uint32)n > nelem(p->runq)/2)
3238                 n = nelem(p->runq)/2;
3239         runtime_sched->runqsize -= n;
3240         if(runtime_sched->runqsize == 0)
3241                 runtime_sched->runqtail = 0;
3242         gp = (G*)runtime_sched->runqhead;
3243         runtime_sched->runqhead = gp->schedlink;
3244         n--;
3245         while(n--) {
3246                 gp1 = (G*)runtime_sched->runqhead;
3247                 runtime_sched->runqhead = gp1->schedlink;
3248                 runqput(p, gp1);
3249         }
3250         return gp;
3251 }
3252
3253 // Put p to on pidle list.
3254 // Sched must be locked.
3255 static void
3256 pidleput(P *p)
3257 {
3258         p->link = runtime_sched->pidle;
3259         runtime_sched->pidle = (uintptr)p;
3260         runtime_xadd(&runtime_sched->npidle, 1);  // TODO: fast atomic
3261 }
3262
3263 // Try get a p from pidle list.
3264 // Sched must be locked.
3265 static P*
3266 pidleget(void)
3267 {
3268         P *p;
3269
3270         p = (P*)runtime_sched->pidle;
3271         if(p) {
3272                 runtime_sched->pidle = p->link;
3273                 runtime_xadd(&runtime_sched->npidle, -1);  // TODO: fast atomic
3274         }
3275         return p;
3276 }
3277
3278 // Try to put g on local runnable queue.
3279 // If it's full, put onto global queue.
3280 // Executed only by the owner P.
3281 static void
3282 runqput(P *p, G *gp)
3283 {
3284         uint32 h, t;
3285
3286 retry:
3287         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3288         t = p->runqtail;
3289         if(t - h < nelem(p->runq)) {
3290                 p->runq[t%nelem(p->runq)] = (uintptr)gp;
3291                 runtime_atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
3292                 return;
3293         }
3294         if(runqputslow(p, gp, h, t))
3295                 return;
3296         // the queue is not full, now the put above must suceed
3297         goto retry;
3298 }
3299
3300 // Put g and a batch of work from local runnable queue on global queue.
3301 // Executed only by the owner P.
3302 static bool
3303 runqputslow(P *p, G *gp, uint32 h, uint32 t)
3304 {
3305         G *batch[nelem(p->runq)/2+1];
3306         uint32 n, i;
3307
3308         // First, grab a batch from local queue.
3309         n = t-h;
3310         n = n/2;
3311         if(n != nelem(p->runq)/2)
3312                 runtime_throw("runqputslow: queue is not full");
3313         for(i=0; i<n; i++)
3314                 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3315         if(!runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3316                 return false;
3317         batch[n] = gp;
3318         // Link the goroutines.
3319         for(i=0; i<n; i++)
3320                 batch[i]->schedlink = (uintptr)batch[i+1];
3321         // Now put the batch on global queue.
3322         runtime_lock(&runtime_sched->lock);
3323         globrunqputbatch(batch[0], batch[n], n+1);
3324         runtime_unlock(&runtime_sched->lock);
3325         return true;
3326 }
3327
3328 // Get g from local runnable queue.
3329 // Executed only by the owner P.
3330 static G*
3331 runqget(P *p)
3332 {
3333         G *gp;
3334         uint32 t, h;
3335
3336         for(;;) {
3337                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3338                 t = p->runqtail;
3339                 if(t == h)
3340                         return nil;
3341                 gp = (G*)p->runq[h%nelem(p->runq)];
3342                 if(runtime_cas(&p->runqhead, h, h+1))  // cas-release, commits consume
3343                         return gp;
3344         }
3345 }
3346
3347 // Grabs a batch of goroutines from local runnable queue.
3348 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3349 // Can be executed by any P.
3350 static uint32
3351 runqgrab(P *p, G **batch)
3352 {
3353         uint32 t, h, n, i;
3354
3355         for(;;) {
3356                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3357                 t = runtime_atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
3358                 n = t-h;
3359                 n = n - n/2;
3360                 if(n == 0)
3361                         break;
3362                 if(n > nelem(p->runq)/2)  // read inconsistent h and t
3363                         continue;
3364                 for(i=0; i<n; i++)
3365                         batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3366                 if(runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3367                         break;
3368         }
3369         return n;
3370 }
3371
3372 // Steal half of elements from local runnable queue of p2
3373 // and put onto local runnable queue of p.
3374 // Returns one of the stolen elements (or nil if failed).
3375 static G*
3376 runqsteal(P *p, P *p2)
3377 {
3378         G *gp;
3379         G *batch[nelem(p->runq)/2];
3380         uint32 t, h, n, i;
3381
3382         n = runqgrab(p2, batch);
3383         if(n == 0)
3384                 return nil;
3385         n--;
3386         gp = batch[n];
3387         if(n == 0)
3388                 return gp;
3389         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3390         t = p->runqtail;
3391         if(t - h + n >= nelem(p->runq))
3392                 runtime_throw("runqsteal: runq overflow");
3393         for(i=0; i<n; i++, t++)
3394                 p->runq[t%nelem(p->runq)] = (uintptr)batch[i];
3395         runtime_atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
3396         return gp;
3397 }
3398
3399 void runtime_testSchedLocalQueue(void)
3400   __asm__("runtime.testSchedLocalQueue");
3401
3402 void
3403 runtime_testSchedLocalQueue(void)
3404 {
3405         P p;
3406         G gs[nelem(p.runq)];
3407         int32 i, j;
3408
3409         runtime_memclr((byte*)&p, sizeof(p));
3410
3411         for(i = 0; i < (int32)nelem(gs); i++) {
3412                 if(runqget(&p) != nil)
3413                         runtime_throw("runq is not empty initially");
3414                 for(j = 0; j < i; j++)
3415                         runqput(&p, &gs[i]);
3416                 for(j = 0; j < i; j++) {
3417                         if(runqget(&p) != &gs[i]) {
3418                                 runtime_printf("bad element at iter %d/%d\n", i, j);
3419                                 runtime_throw("bad element");
3420                         }
3421                 }
3422                 if(runqget(&p) != nil)
3423                         runtime_throw("runq is not empty afterwards");
3424         }
3425 }
3426
3427 void runtime_testSchedLocalQueueSteal(void)
3428   __asm__("runtime.testSchedLocalQueueSteal");
3429
3430 void
3431 runtime_testSchedLocalQueueSteal(void)
3432 {
3433         P p1, p2;
3434         G gs[nelem(p1.runq)], *gp;
3435         int32 i, j, s;
3436
3437         runtime_memclr((byte*)&p1, sizeof(p1));
3438         runtime_memclr((byte*)&p2, sizeof(p2));
3439
3440         for(i = 0; i < (int32)nelem(gs); i++) {
3441                 for(j = 0; j < i; j++) {
3442                         gs[j].sig = 0;
3443                         runqput(&p1, &gs[j]);
3444                 }
3445                 gp = runqsteal(&p2, &p1);
3446                 s = 0;
3447                 if(gp) {
3448                         s++;
3449                         gp->sig++;
3450                 }
3451                 while((gp = runqget(&p2)) != nil) {
3452                         s++;
3453                         gp->sig++;
3454                 }
3455                 while((gp = runqget(&p1)) != nil)
3456                         gp->sig++;
3457                 for(j = 0; j < i; j++) {
3458                         if(gs[j].sig != 1) {
3459                                 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3460                                 runtime_throw("bad element");
3461                         }
3462                 }
3463                 if(s != i/2 && s != i/2+1) {
3464                         runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3465                                 s, i/2, i/2+1, i);
3466                         runtime_throw("bad steal");
3467                 }
3468         }
3469 }
3470
3471 intgo
3472 runtime_setmaxthreads(intgo in)
3473 {
3474         intgo out;
3475
3476         runtime_lock(&runtime_sched->lock);
3477         out = (intgo)runtime_sched->maxmcount;
3478         runtime_sched->maxmcount = (int32)in;
3479         checkmcount();
3480         runtime_unlock(&runtime_sched->lock);
3481         return out;
3482 }
3483
3484 static intgo
3485 procPin()
3486 {
3487         M *mp;
3488
3489         mp = runtime_m();
3490         mp->locks++;
3491         return (intgo)(((P*)mp->p)->id);
3492 }
3493
3494 static void
3495 procUnpin()
3496 {
3497         runtime_m()->locks--;
3498 }
3499
3500 intgo sync_runtime_procPin(void)
3501   __asm__ (GOSYM_PREFIX "sync.runtime_procPin");
3502
3503 intgo
3504 sync_runtime_procPin()
3505 {
3506         return procPin();
3507 }
3508
3509 void sync_runtime_procUnpin(void)
3510   __asm__ (GOSYM_PREFIX  "sync.runtime_procUnpin");
3511
3512 void
3513 sync_runtime_procUnpin()
3514 {
3515         procUnpin();
3516 }
3517
3518 intgo sync_atomic_runtime_procPin(void)
3519   __asm__ (GOSYM_PREFIX "sync_atomic.runtime_procPin");
3520
3521 intgo
3522 sync_atomic_runtime_procPin()
3523 {
3524         return procPin();
3525 }
3526
3527 void sync_atomic_runtime_procUnpin(void)
3528   __asm__ (GOSYM_PREFIX  "sync_atomic.runtime_procUnpin");
3529
3530 void
3531 sync_atomic_runtime_procUnpin()
3532 {
3533         procUnpin();
3534 }
3535
3536 void
3537 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
3538 {
3539         enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
3540 }
3541
3542 // Return whether we are waiting for a GC.  This gc toolchain uses
3543 // preemption instead.
3544 bool
3545 runtime_gcwaiting(void)
3546 {
3547         return runtime_sched->gcwaiting;
3548 }
3549
3550 // os_beforeExit is called from os.Exit(0).
3551 //go:linkname os_beforeExit os.runtime_beforeExit
3552
3553 extern void os_beforeExit() __asm__ (GOSYM_PREFIX "os.runtime_beforeExit");
3554
3555 void
3556 os_beforeExit()
3557 {
3558 }
3559
3560 // Active spinning for sync.Mutex.
3561 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3562
3563 enum
3564 {
3565         ACTIVE_SPIN = 4,
3566         ACTIVE_SPIN_CNT = 30,
3567 };
3568
3569 extern _Bool sync_runtime_canSpin(intgo i)
3570   __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
3571
3572 _Bool
3573 sync_runtime_canSpin(intgo i)
3574 {
3575         P *p;
3576
3577         // sync.Mutex is cooperative, so we are conservative with spinning.
3578         // Spin only few times and only if running on a multicore machine and
3579         // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3580         // As opposed to runtime mutex we don't do passive spinning here,
3581         // because there can be work on global runq on on other Ps.
3582         if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched->npidle+runtime_sched->nmspinning)+1) {
3583                 return false;
3584         }
3585         p = (P*)g->m->p;
3586         return p != nil && p->runqhead == p->runqtail;
3587 }
3588
3589 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3590 //go:nosplit
3591
3592 extern void sync_runtime_doSpin(void)
3593   __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
3594
3595 void
3596 sync_runtime_doSpin()
3597 {
3598         runtime_procyield(ACTIVE_SPIN_CNT);
3599 }
3600
3601 // For Go code to look at variables, until we port proc.go.
3602
3603 extern M** runtime_go_allm(void)
3604   __asm__ (GOSYM_PREFIX "runtime.allm");
3605
3606 M**
3607 runtime_go_allm()
3608 {
3609         return &runtime_allm;
3610 }
3611
3612 extern Slice runtime_go_allgs(void)
3613   __asm__ (GOSYM_PREFIX "runtime.allgs");
3614
3615 Slice
3616 runtime_go_allgs()
3617 {
3618         Slice s;
3619
3620         s.__values = runtime_allg;
3621         s.__count = runtime_allglen;
3622         s.__capacity = allgcap;
3623         return s;
3624 }
3625
3626 intgo NumCPU(void) __asm__ (GOSYM_PREFIX "runtime.NumCPU");
3627
3628 intgo
3629 NumCPU()
3630 {
3631         return (intgo)(runtime_ncpu);
3632 }