libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <signal.h>
   7 #include <stdlib.h>
   8 #include <pthread.h>
   9 #include <unistd.h>
  10
  11 #include "config.h"
  12
  13 #ifdef HAVE_DL_ITERATE_PHDR
  14 #include <link.h>
  15 #endif
  16
  17 #include "runtime.h"
  18 #include "arch.h"
  19 #include "defs.h"
  20 #include "malloc.h"
  21 #include "go-type.h"
  22
  23 #ifdef USING_SPLIT_STACK
  24
  25 /* FIXME: These are not declared anywhere.  */
  26
  27 extern void __splitstack_getcontext(void *context[10]);
  28
  29 extern void __splitstack_setcontext(void *context[10]);
  30
  31 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  32
  33 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  34
  35 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  36                                void **);
  37
  38 extern void __splitstack_block_signals (int *, int *);
  39
  40 extern void __splitstack_block_signals_context (void *context[10], int *,
  41                                                 int *);
  42
  43 #endif
  44
  45 #ifndef PTHREAD_STACK_MIN
  46 # define PTHREAD_STACK_MIN 8192
  47 #endif
  48
  49 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  50 # define StackMin PTHREAD_STACK_MIN
  51 #else
  52 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
  53 #endif
  54
  55 uintptr runtime_stacks_sys;
  56
  57 static void gtraceback(G*);
  58
  59 #ifdef __rtems__
  60 #define __thread
  61 #endif
  62
  63 static __thread G *g;
  64
  65 #ifndef SETCONTEXT_CLOBBERS_TLS
  66
  67 static inline void
  68 initcontext(void)
  69 {
  70 }
  71
  72 static inline void
  73 fixcontext(ucontext_t *c __attribute__ ((unused)))
  74 {
  75 }
  76
  77 #else
  78
  79 # if defined(__x86_64__) && defined(__sun__)
  80
  81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  82 // register to that of the thread which called getcontext.  The effect
  83 // is that the address of all __thread variables changes.  This bug
  84 // also affects pthread_self() and pthread_getspecific.  We work
  85 // around it by clobbering the context field directly to keep %fs the
  86 // same.
  87
  88 static __thread greg_t fs;
  89
  90 static inline void
  91 initcontext(void)
  92 {
  93         ucontext_t c;
  94
  95         getcontext(&c);
  96         fs = c.uc_mcontext.gregs[REG_FSBASE];
  97 }
  98
  99 static inline void
 100 fixcontext(ucontext_t* c)
 101 {
 102         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 103 }
 104
 105 # elif defined(__NetBSD__)
 106
 107 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
 108 // and restore it ourselves.
 109
 110 static __thread __greg_t tlsbase;
 111
 112 static inline void
 113 initcontext(void)
 114 {
 115         ucontext_t c;
 116
 117         getcontext(&c);
 118         tlsbase = c.uc_mcontext._mc_tlsbase;
 119 }
 120
 121 static inline void
 122 fixcontext(ucontext_t* c)
 123 {
 124         c->uc_mcontext._mc_tlsbase = tlsbase;
 125 }
 126
 127 # elif defined(__sparc__)
 128
 129 static inline void
 130 initcontext(void)
 131 {
 132 }
 133
 134 static inline void
 135 fixcontext(ucontext_t *c)
 136 {
 137         /* ??? Using
 138              register unsigned long thread __asm__("%g7");
 139              c->uc_mcontext.gregs[REG_G7] = thread;
 140            results in
 141              error: variable ‘thread’ might be clobbered by \
 142                 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
 143            which ought to be false, as %g7 is a fixed register.  */
 144
 145         if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8)
 146                 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 147         else
 148                 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 149 }
 150
 151 # else
 152
 153 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 154
 155 # endif
 156
 157 #endif
 158
 159 // ucontext_arg returns a properly aligned ucontext_t value.  On some
 160 // systems a ucontext_t value must be aligned to a 16-byte boundary.
 161 // The g structure that has fields of type ucontext_t is defined in
 162 // Go, and Go has no simple way to align a field to such a boundary.
 163 // So we make the field larger in runtime2.go and pick an appropriate
 164 // offset within the field here.
 165 static ucontext_t*
 166 ucontext_arg(void** go_ucontext)
 167 {
 168         uintptr_t p = (uintptr_t)go_ucontext;
 169         size_t align = __alignof__(ucontext_t);
 170         if(align > 16) {
 171                 // We only ensured space for up to a 16 byte alignment
 172                 // in libgo/go/runtime/runtime2.go.
 173                 runtime_throw("required alignment of ucontext_t too large");
 174         }
 175         p = (p + align - 1) &~ (uintptr_t)(align - 1);
 176         return (ucontext_t*)p;
 177 }
 178
 179 // We can not always refer to the TLS variables directly.  The
 180 // compiler will call tls_get_addr to get the address of the variable,
 181 // and it may hold it in a register across a call to schedule.  When
 182 // we get back from the call we may be running in a different thread,
 183 // in which case the register now points to the TLS variable for a
 184 // different thread.  We use non-inlinable functions to avoid this
 185 // when necessary.
 186
 187 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 188
 189 G*
 190 runtime_g(void)
 191 {
 192         return g;
 193 }
 194
 195 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 196
 197 M*
 198 runtime_m(void)
 199 {
 200         if(g == nil)
 201                 return nil;
 202         return g->m;
 203 }
 204
 205 // Set g.
 206 void
 207 runtime_setg(G* gp)
 208 {
 209         g = gp;
 210 }
 211
 212 // Start a new thread.
 213 static void
 214 runtime_newosproc(M *mp)
 215 {
 216         pthread_attr_t attr;
 217         sigset_t clear, old;
 218         pthread_t tid;
 219         int ret;
 220
 221         if(pthread_attr_init(&attr) != 0)
 222                 runtime_throw("pthread_attr_init");
 223         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 224                 runtime_throw("pthread_attr_setdetachstate");
 225
 226         // Block signals during pthread_create so that the new thread
 227         // starts with signals disabled.  It will enable them in minit.
 228         sigfillset(&clear);
 229
 230 #ifdef SIGTRAP
 231         // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
 232         sigdelset(&clear, SIGTRAP);
 233 #endif
 234
 235         sigemptyset(&old);
 236         pthread_sigmask(SIG_BLOCK, &clear, &old);
 237         ret = pthread_create(&tid, &attr, runtime_mstart, mp);
 238         pthread_sigmask(SIG_SETMASK, &old, nil);
 239
 240         if (ret != 0)
 241                 runtime_throw("pthread_create");
 242 }
 243
 244 // First function run by a new goroutine.  This replaces gogocall.
 245 static void
 246 kickoff(void)
 247 {
 248         void (*fn)(void*);
 249         void *param;
 250
 251         if(g->traceback != nil)
 252                 gtraceback(g);
 253
 254         fn = (void (*)(void*))(g->entry);
 255         param = g->param;
 256         g->param = nil;
 257         fn(param);
 258         runtime_goexit1();
 259 }
 260
 261 // Switch context to a different goroutine.  This is like longjmp.
 262 void runtime_gogo(G*) __attribute__ ((noinline));
 263 void
 264 runtime_gogo(G* newg)
 265 {
 266 #ifdef USING_SPLIT_STACK
 267         __splitstack_setcontext(&newg->stackcontext[0]);
 268 #endif
 269         g = newg;
 270         newg->fromgogo = true;
 271         fixcontext(ucontext_arg(&newg->context[0]));
 272         setcontext(ucontext_arg(&newg->context[0]));
 273         runtime_throw("gogo setcontext returned");
 274 }
 275
 276 // Save context and call fn passing g as a parameter.  This is like
 277 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 278 // g->fromgogo as a code.  It will be true if we got here via
 279 // setcontext.  g == nil the first time this is called in a new m.
 280 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 281 void
 282 runtime_mcall(void (*pfn)(G*))
 283 {
 284         M *mp;
 285         G *gp;
 286 #ifndef USING_SPLIT_STACK
 287         void *afterregs;
 288 #endif
 289
 290         // Ensure that all registers are on the stack for the garbage
 291         // collector.
 292         __builtin_unwind_init();
 293
 294         gp = g;
 295         mp = gp->m;
 296         if(gp == mp->g0)
 297                 runtime_throw("runtime: mcall called on m->g0 stack");
 298
 299         if(gp != nil) {
 300
 301 #ifdef USING_SPLIT_STACK
 302                 __splitstack_getcontext(&g->stackcontext[0]);
 303 #else
 304                 // We have to point to an address on the stack that is
 305                 // below the saved registers.
 306                 gp->gcnextsp = &afterregs;
 307 #endif
 308                 gp->fromgogo = false;
 309                 getcontext(ucontext_arg(&gp->context[0]));
 310
 311                 // When we return from getcontext, we may be running
 312                 // in a new thread.  That means that g may have
 313                 // changed.  It is a global variables so we will
 314                 // reload it, but the address of g may be cached in
 315                 // our local stack frame, and that address may be
 316                 // wrong.  Call the function to reload the value for
 317                 // this thread.
 318                 gp = runtime_g();
 319                 mp = gp->m;
 320
 321                 if(gp->traceback != nil)
 322                         gtraceback(gp);
 323         }
 324         if (gp == nil || !gp->fromgogo) {
 325 #ifdef USING_SPLIT_STACK
 326                 __splitstack_setcontext(&mp->g0->stackcontext[0]);
 327 #endif
 328                 mp->g0->entry = (byte*)pfn;
 329                 mp->g0->param = gp;
 330
 331                 // It's OK to set g directly here because this case
 332                 // can not occur if we got here via a setcontext to
 333                 // the getcontext call just above.
 334                 g = mp->g0;
 335
 336                 fixcontext(ucontext_arg(&mp->g0->context[0]));
 337                 setcontext(ucontext_arg(&mp->g0->context[0]));
 338                 runtime_throw("runtime: mcall function returned");
 339         }
 340 }
 341
 342 // Goroutine scheduler
 343 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
 344 //
 345 // The main concepts are:
 346 // G - goroutine.
 347 // M - worker thread, or machine.
 348 // P - processor, a resource that is required to execute Go code.
 349 //     M must have an associated P to execute Go code, however it can be
 350 //     blocked or in a syscall w/o an associated P.
 351 //
 352 // Design doc at http://golang.org/s/go11sched.
 353
 354 enum
 355 {
 356         // Number of goroutine ids to grab from runtime_sched->goidgen to local per-P cache at once.
 357         // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
 358         GoidCacheBatch = 16,
 359 };
 360
 361 extern Sched* runtime_getsched() __asm__ (GOSYM_PREFIX "runtime.getsched");
 362
 363 Sched*  runtime_sched;
 364 int32   runtime_gomaxprocs;
 365 uint32  runtime_needextram = 1;
 366 M       runtime_m0;
 367 G       runtime_g0;     // idle goroutine for m0
 368 G*      runtime_lastg;
 369 M*      runtime_allm;
 370 P**     runtime_allp;
 371 M*      runtime_extram;
 372 int8*   runtime_goos;
 373 int32   runtime_ncpu;
 374 bool    runtime_precisestack;
 375 static int32    newprocs;
 376
 377 static  Lock allglock;  // the following vars are protected by this lock or by stoptheworld
 378 G**     runtime_allg;
 379 uintptr runtime_allglen;
 380 static  uintptr allgcap;
 381
 382 bool    runtime_isarchive;
 383
 384 void* runtime_mstart(void*);
 385 static void runqput(P*, G*);
 386 static G* runqget(P*);
 387 static bool runqputslow(P*, G*, uint32, uint32);
 388 static G* runqsteal(P*, P*);
 389 static void mput(M*);
 390 static M* mget(void);
 391 static void mcommoninit(M*);
 392 static void schedule(void);
 393 static void procresize(int32);
 394 static void acquirep(P*);
 395 static P* releasep(void);
 396 static void newm(void(*)(void), P*);
 397 static void stopm(void);
 398 static void startm(P*, bool);
 399 static void handoffp(P*);
 400 static void wakep(void);
 401 static void stoplockedm(void);
 402 static void startlockedm(G*);
 403 static void sysmon(void);
 404 static uint32 retake(int64);
 405 static void incidlelocked(int32);
 406 static void checkdead(void);
 407 static void exitsyscall0(G*);
 408 static void park0(G*);
 409 static void goexit0(G*);
 410 static void gfput(P*, G*);
 411 static G* gfget(P*);
 412 static void gfpurge(P*);
 413 static void globrunqput(G*);
 414 static void globrunqputbatch(G*, G*, int32);
 415 static G* globrunqget(P*, int32);
 416 static P* pidleget(void);
 417 static void pidleput(P*);
 418 static void injectglist(G*);
 419 static bool preemptall(void);
 420 static bool exitsyscallfast(void);
 421 static void allgadd(G*);
 422
 423 bool runtime_isstarted;
 424
 425 // The bootstrap sequence is:
 426 //
 427 //      call osinit
 428 //      call schedinit
 429 //      make & queue new G
 430 //      call runtime_mstart
 431 //
 432 // The new G calls runtime_main.
 433 void
 434 runtime_schedinit(void)
 435 {
 436         M *m;
 437         int32 n, procs;
 438         String s;
 439         const byte *p;
 440         Eface i;
 441
 442         runtime_sched = runtime_getsched();
 443
 444         m = &runtime_m0;
 445         g = &runtime_g0;
 446         m->g0 = g;
 447         m->curg = g;
 448         g->m = m;
 449
 450         initcontext();
 451
 452         runtime_sched->maxmcount = 10000;
 453         runtime_precisestack = 0;
 454
 455         // runtime_symtabinit();
 456         runtime_mallocinit();
 457         mcommoninit(m);
 458         runtime_alginit(); // maps must not be used before this call
 459
 460         // Initialize the itable value for newErrorCString,
 461         // so that the next time it gets called, possibly
 462         // in a fault during a garbage collection, it will not
 463         // need to allocated memory.
 464         runtime_newErrorCString(0, &i);
 465
 466         // Initialize the cached gotraceback value, since
 467         // gotraceback calls getenv, which mallocs on Plan 9.
 468         runtime_gotraceback(nil);
 469
 470         runtime_goargs();
 471         runtime_goenvs();
 472         runtime_parsedebugvars();
 473
 474         runtime_sched->lastpoll = runtime_nanotime();
 475         procs = 1;
 476         s = runtime_getenv("GOMAXPROCS");
 477         p = s.str;
 478         if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
 479                 if(n > _MaxGomaxprocs)
 480                         n = _MaxGomaxprocs;
 481                 procs = n;
 482         }
 483         runtime_allp = runtime_malloc((_MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
 484         procresize(procs);
 485
 486         // Can not enable GC until all roots are registered.
 487         // mstats()->enablegc = 1;
 488 }
 489
 490 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
 491 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
 492
 493 // Used to determine the field alignment.
 494
 495 struct field_align
 496 {
 497   char c;
 498   Hchan *p;
 499 };
 500
 501 // main_init_done is a signal used by cgocallbackg that initialization
 502 // has been completed.  It is made before _cgo_notify_runtime_init_done,
 503 // so all cgo calls can rely on it existing.  When main_init is
 504 // complete, it is closed, meaning cgocallbackg can reliably receive
 505 // from it.
 506 Hchan *runtime_main_init_done;
 507
 508 // The chan bool type, for runtime_main_init_done.
 509
 510 extern const struct __go_type_descriptor bool_type_descriptor
 511   __asm__ (GOSYM_PREFIX "__go_tdn_bool");
 512
 513 static struct __go_channel_type chan_bool_type_descriptor =
 514   {
 515     /* __common */
 516     {
 517       /* __code */
 518       GO_CHAN,
 519       /* __align */
 520       __alignof (Hchan *),
 521       /* __field_align */
 522       offsetof (struct field_align, p) - 1,
 523       /* __size */
 524       sizeof (Hchan *),
 525       /* __hash */
 526       0, /* This value doesn't matter.  */
 527       /* __hashfn */
 528       NULL,
 529       /* __equalfn */
 530       NULL,
 531       /* __gc */
 532       NULL, /* This value doesn't matter */
 533       /* __reflection */
 534       NULL, /* This value doesn't matter */
 535       /* __uncommon */
 536       NULL,
 537       /* __pointer_to_this */
 538       NULL
 539     },
 540     /* __element_type */
 541     &bool_type_descriptor,
 542     /* __dir */
 543     CHANNEL_BOTH_DIR
 544   };
 545
 546 extern Hchan *makechan (ChanType *, int64)
 547   __asm__ (GOSYM_PREFIX "runtime.makechan");
 548 extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan");
 549
 550 static void
 551 initDone(void *arg __attribute__ ((unused))) {
 552         runtime_unlockOSThread();
 553 };
 554
 555 // The main goroutine.
 556 // Note: C frames in general are not copyable during stack growth, for two reasons:
 557 //   1) We don't know where in a frame to find pointers to other stack locations.
 558 //   2) There's no guarantee that globals or heap values do not point into the frame.
 559 //
 560 // The C frame for runtime.main is copyable, because:
 561 //   1) There are no pointers to other stack locations in the frame
 562 //      (d.fn points at a global, d.link is nil, d.argp is -1).
 563 //   2) The only pointer into this frame is from the defer chain,
 564 //      which is explicitly handled during stack copying.
 565 void
 566 runtime_main(void* dummy __attribute__((unused)))
 567 {
 568         Defer d;
 569         _Bool frame;
 570
 571         newm(sysmon, nil);
 572
 573         // Lock the main goroutine onto this, the main OS thread,
 574         // during initialization.  Most programs won't care, but a few
 575         // do require certain calls to be made by the main thread.
 576         // Those can arrange for main.main to run in the main thread
 577         // by calling runtime.LockOSThread during initialization
 578         // to preserve the lock.
 579         runtime_lockOSThread();
 580
 581         // Defer unlock so that runtime.Goexit during init does the unlock too.
 582         d.pfn = (uintptr)(void*)initDone;
 583         d.link = g->_defer;
 584         d.arg = (void*)-1;
 585         d._panic = g->_panic;
 586         d.retaddr = 0;
 587         d.makefunccanrecover = 0;
 588         d.frame = &frame;
 589         d.special = true;
 590         g->_defer = &d;
 591
 592         if(g->m != &runtime_m0)
 593                 runtime_throw("runtime_main not on m0");
 594         __go_go(runtime_MHeap_Scavenger, nil);
 595
 596         runtime_main_init_done = makechan(&chan_bool_type_descriptor, 0);
 597
 598         _cgo_notify_runtime_init_done();
 599
 600         main_init();
 601
 602         closechan(runtime_main_init_done);
 603
 604         if(g->_defer != &d || (void*)d.pfn != initDone)
 605                 runtime_throw("runtime: bad defer entry after init");
 606         g->_defer = d.link;
 607         runtime_unlockOSThread();
 608
 609         // For gccgo we have to wait until after main is initialized
 610         // to enable GC, because initializing main registers the GC
 611         // roots.
 612         mstats()->enablegc = 1;
 613
 614         if(runtime_isarchive) {
 615                 // This is not a complete program, but is instead a
 616                 // library built using -buildmode=c-archive or
 617                 // c-shared.  Now that we are initialized, there is
 618                 // nothing further to do.
 619                 return;
 620         }
 621
 622         main_main();
 623
 624         // Make racy client program work: if panicking on
 625         // another goroutine at the same time as main returns,
 626         // let the other goroutine finish printing the panic trace.
 627         // Once it does, it will exit. See issue 3934.
 628         if(runtime_panicking())
 629                 runtime_park(nil, nil, "panicwait");
 630
 631         runtime_exit(0);
 632         for(;;)
 633                 *(int32*)0 = 0;
 634 }
 635
 636 void
 637 runtime_tracebackothers(G * volatile me)
 638 {
 639         G * volatile gp;
 640         Traceback tb;
 641         int32 traceback;
 642         Slice slice;
 643         volatile uintptr i;
 644
 645         tb.gp = me;
 646         traceback = runtime_gotraceback(nil);
 647
 648         // Show the current goroutine first, if we haven't already.
 649         if((gp = g->m->curg) != nil && gp != me) {
 650                 runtime_printf("\n");
 651                 runtime_goroutineheader(gp);
 652                 gp->traceback = &tb;
 653
 654 #ifdef USING_SPLIT_STACK
 655                 __splitstack_getcontext(&me->stackcontext[0]);
 656 #endif
 657                 getcontext(ucontext_arg(&me->context[0]));
 658
 659                 if(gp->traceback != nil) {
 660                   runtime_gogo(gp);
 661                 }
 662
 663                 slice.__values = &tb.locbuf[0];
 664                 slice.__count = tb.c;
 665                 slice.__capacity = tb.c;
 666                 runtime_printtrace(slice, nil);
 667                 runtime_printcreatedby(gp);
 668         }
 669
 670         runtime_lock(&allglock);
 671         for(i = 0; i < runtime_allglen; i++) {
 672                 gp = runtime_allg[i];
 673                 if(gp == me || gp == g->m->curg || gp->atomicstatus == _Gdead)
 674                         continue;
 675                 if(gp->issystem && traceback < 2)
 676                         continue;
 677                 runtime_printf("\n");
 678                 runtime_goroutineheader(gp);
 679
 680                 // Our only mechanism for doing a stack trace is
 681                 // _Unwind_Backtrace.  And that only works for the
 682                 // current thread, not for other random goroutines.
 683                 // So we need to switch context to the goroutine, get
 684                 // the backtrace, and then switch back.
 685
 686                 // This means that if g is running or in a syscall, we
 687                 // can't reliably print a stack trace.  FIXME.
 688
 689                 if(gp->atomicstatus == _Grunning) {
 690                         runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
 691                         runtime_printcreatedby(gp);
 692                 } else if(gp->atomicstatus == _Gsyscall) {
 693                         runtime_printf("\tgoroutine in C code; stack unavailable\n");
 694                         runtime_printcreatedby(gp);
 695                 } else {
 696                         gp->traceback = &tb;
 697
 698 #ifdef USING_SPLIT_STACK
 699                         __splitstack_getcontext(&me->stackcontext[0]);
 700 #endif
 701                         getcontext(ucontext_arg(&me->context[0]));
 702
 703                         if(gp->traceback != nil) {
 704                                 runtime_gogo(gp);
 705                         }
 706
 707                         slice.__values = &tb.locbuf[0];
 708                         slice.__count = tb.c;
 709                         slice.__capacity = tb.c;
 710                         runtime_printtrace(slice, nil);
 711                         runtime_printcreatedby(gp);
 712                 }
 713         }
 714         runtime_unlock(&allglock);
 715 }
 716
 717 static void
 718 checkmcount(void)
 719 {
 720         // sched lock is held
 721         if(runtime_sched->mcount > runtime_sched->maxmcount) {
 722                 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched->maxmcount);
 723                 runtime_throw("thread exhaustion");
 724         }
 725 }
 726
 727 // Do a stack trace of gp, and then restore the context to
 728 // gp->dotraceback.
 729
 730 static void
 731 gtraceback(G* gp)
 732 {
 733         Traceback* traceback;
 734
 735         traceback = gp->traceback;
 736         gp->traceback = nil;
 737         if(gp->m != nil)
 738                 runtime_throw("gtraceback: m is not nil");
 739         gp->m = traceback->gp->m;
 740         traceback->c = runtime_callers(1, traceback->locbuf,
 741                 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
 742         gp->m = nil;
 743         runtime_gogo(traceback->gp);
 744 }
 745
 746 static void
 747 mcommoninit(M *mp)
 748 {
 749         // If there is no mcache runtime_callers() will crash,
 750         // and we are most likely in sysmon thread so the stack is senseless anyway.
 751         if(g->m->mcache)
 752                 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
 753
 754         mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
 755
 756         runtime_lock(&runtime_sched->lock);
 757         mp->id = runtime_sched->mcount++;
 758         checkmcount();
 759         runtime_mpreinit(mp);
 760
 761         // Add to runtime_allm so garbage collector doesn't free m
 762         // when it is just in a register or thread-local storage.
 763         mp->alllink = runtime_allm;
 764         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 765         // so we need to publish it safely.
 766         runtime_atomicstorep(&runtime_allm, mp);
 767         runtime_unlock(&runtime_sched->lock);
 768 }
 769
 770 // Mark gp ready to run.
 771 void
 772 runtime_ready(G *gp)
 773 {
 774         // Mark runnable.
 775         g->m->locks++;  // disable preemption because it can be holding p in a local var
 776         if(gp->atomicstatus != _Gwaiting) {
 777                 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus);
 778                 runtime_throw("bad g->atomicstatus in ready");
 779         }
 780         gp->atomicstatus = _Grunnable;
 781         runqput((P*)g->m->p, gp);
 782         if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0)  // TODO: fast atomic
 783                 wakep();
 784         g->m->locks--;
 785 }
 786
 787 void goready(G*, int) __asm__ (GOSYM_PREFIX "runtime.goready");
 788
 789 void
 790 goready(G* gp, int traceskip __attribute__ ((unused)))
 791 {
 792         runtime_ready(gp);
 793 }
 794
 795 int32
 796 runtime_gcprocs(void)
 797 {
 798         int32 n;
 799
 800         // Figure out how many CPUs to use during GC.
 801         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 802         runtime_lock(&runtime_sched->lock);
 803         n = runtime_gomaxprocs;
 804         if(n > runtime_ncpu)
 805                 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
 806         if(n > MaxGcproc)
 807                 n = MaxGcproc;
 808         if(n > runtime_sched->nmidle+1) // one M is currently running
 809                 n = runtime_sched->nmidle+1;
 810         runtime_unlock(&runtime_sched->lock);
 811         return n;
 812 }
 813
 814 static bool
 815 needaddgcproc(void)
 816 {
 817         int32 n;
 818
 819         runtime_lock(&runtime_sched->lock);
 820         n = runtime_gomaxprocs;
 821         if(n > runtime_ncpu)
 822                 n = runtime_ncpu;
 823         if(n > MaxGcproc)
 824                 n = MaxGcproc;
 825         n -= runtime_sched->nmidle+1; // one M is currently running
 826         runtime_unlock(&runtime_sched->lock);
 827         return n > 0;
 828 }
 829
 830 void
 831 runtime_helpgc(int32 nproc)
 832 {
 833         M *mp;
 834         int32 n, pos;
 835
 836         runtime_lock(&runtime_sched->lock);
 837         pos = 0;
 838         for(n = 1; n < nproc; n++) {  // one M is currently running
 839                 if(runtime_allp[pos]->mcache == g->m->mcache)
 840                         pos++;
 841                 mp = mget();
 842                 if(mp == nil)
 843                         runtime_throw("runtime_gcprocs inconsistency");
 844                 mp->helpgc = n;
 845                 mp->mcache = runtime_allp[pos]->mcache;
 846                 pos++;
 847                 runtime_notewakeup(&mp->park);
 848         }
 849         runtime_unlock(&runtime_sched->lock);
 850 }
 851
 852 // Similar to stoptheworld but best-effort and can be called several times.
 853 // There is no reverse operation, used during crashing.
 854 // This function must not lock any mutexes.
 855 void
 856 runtime_freezetheworld(void)
 857 {
 858         int32 i;
 859
 860         if(runtime_gomaxprocs == 1)
 861                 return;
 862         // stopwait and preemption requests can be lost
 863         // due to races with concurrently executing threads,
 864         // so try several times
 865         for(i = 0; i < 5; i++) {
 866                 // this should tell the scheduler to not start any new goroutines
 867                 runtime_sched->stopwait = 0x7fffffff;
 868                 runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
 869                 // this should stop running goroutines
 870                 if(!preemptall())
 871                         break;  // no running goroutines
 872                 runtime_usleep(1000);
 873         }
 874         // to be sure
 875         runtime_usleep(1000);
 876         preemptall();
 877         runtime_usleep(1000);
 878 }
 879
 880 void
 881 runtime_stopTheWorldWithSema(void)
 882 {
 883         int32 i;
 884         uint32 s;
 885         P *p;
 886         bool wait;
 887
 888         runtime_lock(&runtime_sched->lock);
 889         runtime_sched->stopwait = runtime_gomaxprocs;
 890         runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
 891         preemptall();
 892         // stop current P
 893         ((P*)g->m->p)->status = _Pgcstop;
 894         runtime_sched->stopwait--;
 895         // try to retake all P's in _Psyscall status
 896         for(i = 0; i < runtime_gomaxprocs; i++) {
 897                 p = runtime_allp[i];
 898                 s = p->status;
 899                 if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
 900                         runtime_sched->stopwait--;
 901         }
 902         // stop idle P's
 903         while((p = pidleget()) != nil) {
 904                 p->status = _Pgcstop;
 905                 runtime_sched->stopwait--;
 906         }
 907         wait = runtime_sched->stopwait > 0;
 908         runtime_unlock(&runtime_sched->lock);
 909
 910         // wait for remaining P's to stop voluntarily
 911         if(wait) {
 912                 runtime_notesleep(&runtime_sched->stopnote);
 913                 runtime_noteclear(&runtime_sched->stopnote);
 914         }
 915         if(runtime_sched->stopwait)
 916                 runtime_throw("stoptheworld: not stopped");
 917         for(i = 0; i < runtime_gomaxprocs; i++) {
 918                 p = runtime_allp[i];
 919                 if(p->status != _Pgcstop)
 920                         runtime_throw("stoptheworld: not stopped");
 921         }
 922 }
 923
 924 static void
 925 mhelpgc(void)
 926 {
 927         g->m->helpgc = -1;
 928 }
 929
 930 void
 931 runtime_startTheWorldWithSema(void)
 932 {
 933         P *p, *p1;
 934         M *mp;
 935         G *gp;
 936         bool add;
 937
 938         g->m->locks++;  // disable preemption because it can be holding p in a local var
 939         gp = runtime_netpoll(false);  // non-blocking
 940         injectglist(gp);
 941         add = needaddgcproc();
 942         runtime_lock(&runtime_sched->lock);
 943         if(newprocs) {
 944                 procresize(newprocs);
 945                 newprocs = 0;
 946         } else
 947                 procresize(runtime_gomaxprocs);
 948         runtime_sched->gcwaiting = 0;
 949
 950         p1 = nil;
 951         while((p = pidleget()) != nil) {
 952                 // procresize() puts p's with work at the beginning of the list.
 953                 // Once we reach a p without a run queue, the rest don't have one either.
 954                 if(p->runqhead == p->runqtail) {
 955                         pidleput(p);
 956                         break;
 957                 }
 958                 p->m = (uintptr)mget();
 959                 p->link = (uintptr)p1;
 960                 p1 = p;
 961         }
 962         if(runtime_sched->sysmonwait) {
 963                 runtime_sched->sysmonwait = false;
 964                 runtime_notewakeup(&runtime_sched->sysmonnote);
 965         }
 966         runtime_unlock(&runtime_sched->lock);
 967
 968         while(p1) {
 969                 p = p1;
 970                 p1 = (P*)p1->link;
 971                 if(p->m) {
 972                         mp = (M*)p->m;
 973                         p->m = 0;
 974                         if(mp->nextp)
 975                                 runtime_throw("startTheWorldWithSema: inconsistent mp->nextp");
 976                         mp->nextp = (uintptr)p;
 977                         runtime_notewakeup(&mp->park);
 978                 } else {
 979                         // Start M to run P.  Do not start another M below.
 980                         newm(nil, p);
 981                         add = false;
 982                 }
 983         }
 984
 985         if(add) {
 986                 // If GC could have used another helper proc, start one now,
 987                 // in the hope that it will be available next time.
 988                 // It would have been even better to start it before the collection,
 989                 // but doing so requires allocating memory, so it's tricky to
 990                 // coordinate.  This lazy approach works out in practice:
 991                 // we don't mind if the first couple gc rounds don't have quite
 992                 // the maximum number of procs.
 993                 newm(mhelpgc, nil);
 994         }
 995         g->m->locks--;
 996 }
 997
 998 // Called to start an M.
 999 void*
1000 runtime_mstart(void* mp)
1001 {
1002         M *m;
1003
1004         m = (M*)mp;
1005         g = m->g0;
1006         g->m = m;
1007
1008         initcontext();
1009
1010         g->entry = nil;
1011         g->param = nil;
1012
1013         // Record top of stack for use by mcall.
1014         // Once we call schedule we're never coming back,
1015         // so other calls can reuse this stack space.
1016 #ifdef USING_SPLIT_STACK
1017         __splitstack_getcontext(&g->stackcontext[0]);
1018 #else
1019         g->gcinitialsp = &mp;
1020         // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
1021         // is the top of the stack, not the bottom.
1022         g->gcstacksize = 0;
1023         g->gcnextsp = &mp;
1024 #endif
1025         getcontext(ucontext_arg(&g->context[0]));
1026
1027         if(g->entry != nil) {
1028                 // Got here from mcall.
1029                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1030                 G* gp = (G*)g->param;
1031                 pfn(gp);
1032                 *(int*)0x21 = 0x21;
1033         }
1034         runtime_minit();
1035
1036 #ifdef USING_SPLIT_STACK
1037         {
1038                 int dont_block_signals = 0;
1039                 __splitstack_block_signals(&dont_block_signals, nil);
1040         }
1041 #endif
1042
1043         // Install signal handlers; after minit so that minit can
1044         // prepare the thread to be able to handle the signals.
1045         if(m == &runtime_m0) {
1046                 if(runtime_iscgo && !runtime_cgoHasExtraM) {
1047                         runtime_cgoHasExtraM = true;
1048                         runtime_newextram();
1049                         runtime_needextram = 0;
1050                 }
1051                 runtime_initsig(false);
1052         }
1053
1054         if(m->mstartfn)
1055                 ((void (*)(void))m->mstartfn)();
1056
1057         if(m->helpgc) {
1058                 m->helpgc = 0;
1059                 stopm();
1060         } else if(m != &runtime_m0) {
1061                 acquirep((P*)m->nextp);
1062                 m->nextp = 0;
1063         }
1064         schedule();
1065
1066         // TODO(brainman): This point is never reached, because scheduler
1067         // does not release os threads at the moment. But once this path
1068         // is enabled, we must remove our seh here.
1069
1070         return nil;
1071 }
1072
1073 typedef struct CgoThreadStart CgoThreadStart;
1074 struct CgoThreadStart
1075 {
1076         M *m;
1077         G *g;
1078         uintptr *tls;
1079         void (*fn)(void);
1080 };
1081
1082 // Allocate a new m unassociated with any thread.
1083 // Can use p for allocation context if needed.
1084 M*
1085 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
1086 {
1087         M *mp;
1088
1089         g->m->locks++;  // disable GC because it can be called from sysmon
1090         if(g->m->p == 0)
1091                 acquirep(p);  // temporarily borrow p for mallocs in this function
1092 #if 0
1093         if(mtype == nil) {
1094                 Eface e;
1095                 runtime_gc_m_ptr(&e);
1096                 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1097         }
1098 #endif
1099
1100         mp = runtime_mal(sizeof *mp);
1101         mcommoninit(mp);
1102         mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1103         mp->g0->m = mp;
1104
1105         if(p == (P*)g->m->p)
1106                 releasep();
1107         g->m->locks--;
1108
1109         return mp;
1110 }
1111
1112 static G*
1113 allocg(void)
1114 {
1115         G *gp;
1116         // static Type *gtype;
1117
1118         // if(gtype == nil) {
1119         //      Eface e;
1120         //      runtime_gc_g_ptr(&e);
1121         //      gtype = ((PtrType*)e.__type_descriptor)->__element_type;
1122         // }
1123         // gp = runtime_cnew(gtype);
1124         gp = runtime_malloc(sizeof(G));
1125         return gp;
1126 }
1127
1128 static M* lockextra(bool nilokay);
1129 static void unlockextra(M*);
1130
1131 // needm is called when a cgo callback happens on a
1132 // thread without an m (a thread not created by Go).
1133 // In this case, needm is expected to find an m to use
1134 // and return with m, g initialized correctly.
1135 // Since m and g are not set now (likely nil, but see below)
1136 // needm is limited in what routines it can call. In particular
1137 // it can only call nosplit functions (textflag 7) and cannot
1138 // do any scheduling that requires an m.
1139 //
1140 // In order to avoid needing heavy lifting here, we adopt
1141 // the following strategy: there is a stack of available m's
1142 // that can be stolen. Using compare-and-swap
1143 // to pop from the stack has ABA races, so we simulate
1144 // a lock by doing an exchange (via casp) to steal the stack
1145 // head and replace the top pointer with MLOCKED (1).
1146 // This serves as a simple spin lock that we can use even
1147 // without an m. The thread that locks the stack in this way
1148 // unlocks the stack by storing a valid stack head pointer.
1149 //
1150 // In order to make sure that there is always an m structure
1151 // available to be stolen, we maintain the invariant that there
1152 // is always one more than needed. At the beginning of the
1153 // program (if cgo is in use) the list is seeded with a single m.
1154 // If needm finds that it has taken the last m off the list, its job
1155 // is - once it has installed its own m so that it can do things like
1156 // allocate memory - to create a spare m and put it on the list.
1157 //
1158 // Each of these extra m's also has a g0 and a curg that are
1159 // pressed into service as the scheduling stack and current
1160 // goroutine for the duration of the cgo callback.
1161 //
1162 // When the callback is done with the m, it calls dropm to
1163 // put the m back on the list.
1164 //
1165 // Unlike the gc toolchain, we start running on curg, since we are
1166 // just going to return and let the caller continue.
1167 void
1168 runtime_needm(void)
1169 {
1170         M *mp;
1171
1172         if(runtime_needextram) {
1173                 // Can happen if C/C++ code calls Go from a global ctor.
1174                 // Can not throw, because scheduler is not initialized yet.
1175                 int rv __attribute__((unused));
1176                 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
1177                         sizeof("fatal error: cgo callback before cgo call\n")-1);
1178                 runtime_exit(1);
1179         }
1180
1181         // Lock extra list, take head, unlock popped list.
1182         // nilokay=false is safe here because of the invariant above,
1183         // that the extra list always contains or will soon contain
1184         // at least one m.
1185         mp = lockextra(false);
1186
1187         // Set needextram when we've just emptied the list,
1188         // so that the eventual call into cgocallbackg will
1189         // allocate a new m for the extra list. We delay the
1190         // allocation until then so that it can be done
1191         // after exitsyscall makes sure it is okay to be
1192         // running at all (that is, there's no garbage collection
1193         // running right now).
1194         mp->needextram = mp->schedlink == 0;
1195         unlockextra((M*)mp->schedlink);
1196
1197         // Install g (= m->curg).
1198         runtime_setg(mp->curg);
1199
1200         // Initialize g's context as in mstart.
1201         initcontext();
1202         g->atomicstatus = _Gsyscall;
1203         g->entry = nil;
1204         g->param = nil;
1205 #ifdef USING_SPLIT_STACK
1206         __splitstack_getcontext(&g->stackcontext[0]);
1207 #else
1208         g->gcinitialsp = &mp;
1209         g->gcstack = nil;
1210         g->gcstacksize = 0;
1211         g->gcnextsp = &mp;
1212 #endif
1213         getcontext(ucontext_arg(&g->context[0]));
1214
1215         if(g->entry != nil) {
1216                 // Got here from mcall.
1217                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1218                 G* gp = (G*)g->param;
1219                 pfn(gp);
1220                 *(int*)0x22 = 0x22;
1221         }
1222
1223         // Initialize this thread to use the m.
1224         runtime_minit();
1225
1226 #ifdef USING_SPLIT_STACK
1227         {
1228                 int dont_block_signals = 0;
1229                 __splitstack_block_signals(&dont_block_signals, nil);
1230         }
1231 #endif
1232 }
1233
1234 // newextram allocates an m and puts it on the extra list.
1235 // It is called with a working local m, so that it can do things
1236 // like call schedlock and allocate.
1237 void
1238 runtime_newextram(void)
1239 {
1240         M *mp, *mnext;
1241         G *gp;
1242         byte *g0_sp, *sp;
1243         uintptr g0_spsize, spsize;
1244         ucontext_t *uc;
1245
1246         // Create extra goroutine locked to extra m.
1247         // The goroutine is the context in which the cgo callback will run.
1248         // The sched.pc will never be returned to, but setting it to
1249         // runtime.goexit makes clear to the traceback routines where
1250         // the goroutine stack ends.
1251         mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1252         gp = runtime_malg(StackMin, &sp, &spsize);
1253         gp->atomicstatus = _Gdead;
1254         gp->m = mp;
1255         mp->curg = gp;
1256         mp->locked = _LockInternal;
1257         mp->lockedg = gp;
1258         gp->lockedm = mp;
1259         gp->goid = runtime_xadd64(&runtime_sched->goidgen, 1);
1260         // put on allg for garbage collector
1261         allgadd(gp);
1262
1263         // The context for gp will be set up in runtime_needm.  But
1264         // here we need to set up the context for g0.
1265         uc = ucontext_arg(&mp->g0->context[0]);
1266         getcontext(uc);
1267         uc->uc_stack.ss_sp = g0_sp;
1268         uc->uc_stack.ss_size = (size_t)g0_spsize;
1269         makecontext(uc, kickoff, 0);
1270
1271         // Add m to the extra list.
1272         mnext = lockextra(true);
1273         mp->schedlink = (uintptr)mnext;
1274         unlockextra(mp);
1275 }
1276
1277 // dropm is called when a cgo callback has called needm but is now
1278 // done with the callback and returning back into the non-Go thread.
1279 // It puts the current m back onto the extra list.
1280 //
1281 // The main expense here is the call to signalstack to release the
1282 // m's signal stack, and then the call to needm on the next callback
1283 // from this thread. It is tempting to try to save the m for next time,
1284 // which would eliminate both these costs, but there might not be
1285 // a next time: the current thread (which Go does not control) might exit.
1286 // If we saved the m for that thread, there would be an m leak each time
1287 // such a thread exited. Instead, we acquire and release an m on each
1288 // call. These should typically not be scheduling operations, just a few
1289 // atomics, so the cost should be small.
1290 //
1291 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1292 // variable using pthread_key_create. Unlike the pthread keys we already use
1293 // on OS X, this dummy key would never be read by Go code. It would exist
1294 // only so that we could register at thread-exit-time destructor.
1295 // That destructor would put the m back onto the extra list.
1296 // This is purely a performance optimization. The current version,
1297 // in which dropm happens on each cgo call, is still correct too.
1298 // We may have to keep the current version on systems with cgo
1299 // but without pthreads, like Windows.
1300 void
1301 runtime_dropm(void)
1302 {
1303         M *mp, *mnext;
1304
1305         // Undo whatever initialization minit did during needm.
1306         runtime_unminit();
1307
1308         // Clear m and g, and return m to the extra list.
1309         // After the call to setg we can only call nosplit functions.
1310         mp = g->m;
1311         runtime_setg(nil);
1312
1313         mp->curg->atomicstatus = _Gdead;
1314         mp->curg->gcstack = nil;
1315         mp->curg->gcnextsp = nil;
1316
1317         mnext = lockextra(true);
1318         mp->schedlink = (uintptr)mnext;
1319         unlockextra(mp);
1320 }
1321
1322 #define MLOCKED ((M*)1)
1323
1324 // lockextra locks the extra list and returns the list head.
1325 // The caller must unlock the list by storing a new list head
1326 // to runtime.extram. If nilokay is true, then lockextra will
1327 // return a nil list head if that's what it finds. If nilokay is false,
1328 // lockextra will keep waiting until the list head is no longer nil.
1329 static M*
1330 lockextra(bool nilokay)
1331 {
1332         M *mp;
1333         void (*yield)(void);
1334
1335         for(;;) {
1336                 mp = runtime_atomicloadp(&runtime_extram);
1337                 if(mp == MLOCKED) {
1338                         yield = runtime_osyield;
1339                         yield();
1340                         continue;
1341                 }
1342                 if(mp == nil && !nilokay) {
1343                         runtime_usleep(1);
1344                         continue;
1345                 }
1346                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1347                         yield = runtime_osyield;
1348                         yield();
1349                         continue;
1350                 }
1351                 break;
1352         }
1353         return mp;
1354 }
1355
1356 static void
1357 unlockextra(M *mp)
1358 {
1359         runtime_atomicstorep(&runtime_extram, mp);
1360 }
1361
1362 static int32
1363 countextra()
1364 {
1365         M *mp, *mc;
1366         int32 c;
1367
1368         for(;;) {
1369                 mp = runtime_atomicloadp(&runtime_extram);
1370                 if(mp == MLOCKED) {
1371                         runtime_osyield();
1372                         continue;
1373                 }
1374                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1375                         runtime_osyield();
1376                         continue;
1377                 }
1378                 c = 0;
1379                 for(mc = mp; mc != nil; mc = (M*)mc->schedlink)
1380                         c++;
1381                 runtime_atomicstorep(&runtime_extram, mp);
1382                 return c;
1383         }
1384 }
1385
1386 // Create a new m.  It will start off with a call to fn, or else the scheduler.
1387 static void
1388 newm(void(*fn)(void), P *p)
1389 {
1390         M *mp;
1391
1392         mp = runtime_allocm(p, -1, nil, nil);
1393         mp->nextp = (uintptr)p;
1394         mp->mstartfn = (uintptr)(void*)fn;
1395
1396         runtime_newosproc(mp);
1397 }
1398
1399 // Stops execution of the current m until new work is available.
1400 // Returns with acquired P.
1401 static void
1402 stopm(void)
1403 {
1404         M* m;
1405
1406         m = g->m;
1407         if(m->locks)
1408                 runtime_throw("stopm holding locks");
1409         if(m->p)
1410                 runtime_throw("stopm holding p");
1411         if(m->spinning) {
1412                 m->spinning = false;
1413                 runtime_xadd(&runtime_sched->nmspinning, -1);
1414         }
1415
1416 retry:
1417         runtime_lock(&runtime_sched->lock);
1418         mput(m);
1419         runtime_unlock(&runtime_sched->lock);
1420         runtime_notesleep(&m->park);
1421         m = g->m;
1422         runtime_noteclear(&m->park);
1423         if(m->helpgc) {
1424                 runtime_gchelper();
1425                 m->helpgc = 0;
1426                 m->mcache = nil;
1427                 goto retry;
1428         }
1429         acquirep((P*)m->nextp);
1430         m->nextp = 0;
1431 }
1432
1433 static void
1434 mspinning(void)
1435 {
1436         g->m->spinning = true;
1437 }
1438
1439 // Schedules some M to run the p (creates an M if necessary).
1440 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1441 static void
1442 startm(P *p, bool spinning)
1443 {
1444         M *mp;
1445         void (*fn)(void);
1446
1447         runtime_lock(&runtime_sched->lock);
1448         if(p == nil) {
1449                 p = pidleget();
1450                 if(p == nil) {
1451                         runtime_unlock(&runtime_sched->lock);
1452                         if(spinning)
1453                                 runtime_xadd(&runtime_sched->nmspinning, -1);
1454                         return;
1455                 }
1456         }
1457         mp = mget();
1458         runtime_unlock(&runtime_sched->lock);
1459         if(mp == nil) {
1460                 fn = nil;
1461                 if(spinning)
1462                         fn = mspinning;
1463                 newm(fn, p);
1464                 return;
1465         }
1466         if(mp->spinning)
1467                 runtime_throw("startm: m is spinning");
1468         if(mp->nextp)
1469                 runtime_throw("startm: m has p");
1470         mp->spinning = spinning;
1471         mp->nextp = (uintptr)p;
1472         runtime_notewakeup(&mp->park);
1473 }
1474
1475 // Hands off P from syscall or locked M.
1476 static void
1477 handoffp(P *p)
1478 {
1479         // if it has local work, start it straight away
1480         if(p->runqhead != p->runqtail || runtime_sched->runqsize) {
1481                 startm(p, false);
1482                 return;
1483         }
1484         // no local work, check that there are no spinning/idle M's,
1485         // otherwise our help is not required
1486         if(runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) == 0 &&  // TODO: fast atomic
1487                 runtime_cas(&runtime_sched->nmspinning, 0, 1)) {
1488                 startm(p, true);
1489                 return;
1490         }
1491         runtime_lock(&runtime_sched->lock);
1492         if(runtime_sched->gcwaiting) {
1493                 p->status = _Pgcstop;
1494                 if(--runtime_sched->stopwait == 0)
1495                         runtime_notewakeup(&runtime_sched->stopnote);
1496                 runtime_unlock(&runtime_sched->lock);
1497                 return;
1498         }
1499         if(runtime_sched->runqsize) {
1500                 runtime_unlock(&runtime_sched->lock);
1501                 startm(p, false);
1502                 return;
1503         }
1504         // If this is the last running P and nobody is polling network,
1505         // need to wakeup another M to poll network.
1506         if(runtime_sched->npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched->lastpoll) != 0) {
1507                 runtime_unlock(&runtime_sched->lock);
1508                 startm(p, false);
1509                 return;
1510         }
1511         pidleput(p);
1512         runtime_unlock(&runtime_sched->lock);
1513 }
1514
1515 // Tries to add one more P to execute G's.
1516 // Called when a G is made runnable (newproc, ready).
1517 static void
1518 wakep(void)
1519 {
1520         // be conservative about spinning threads
1521         if(!runtime_cas(&runtime_sched->nmspinning, 0, 1))
1522                 return;
1523         startm(nil, true);
1524 }
1525
1526 // Stops execution of the current m that is locked to a g until the g is runnable again.
1527 // Returns with acquired P.
1528 static void
1529 stoplockedm(void)
1530 {
1531         M *m;
1532         P *p;
1533
1534         m = g->m;
1535         if(m->lockedg == nil || m->lockedg->lockedm != m)
1536                 runtime_throw("stoplockedm: inconsistent locking");
1537         if(m->p) {
1538                 // Schedule another M to run this p.
1539                 p = releasep();
1540                 handoffp(p);
1541         }
1542         incidlelocked(1);
1543         // Wait until another thread schedules lockedg again.
1544         runtime_notesleep(&m->park);
1545         m = g->m;
1546         runtime_noteclear(&m->park);
1547         if(m->lockedg->atomicstatus != _Grunnable)
1548                 runtime_throw("stoplockedm: not runnable");
1549         acquirep((P*)m->nextp);
1550         m->nextp = 0;
1551 }
1552
1553 // Schedules the locked m to run the locked gp.
1554 static void
1555 startlockedm(G *gp)
1556 {
1557         M *mp;
1558         P *p;
1559
1560         mp = gp->lockedm;
1561         if(mp == g->m)
1562                 runtime_throw("startlockedm: locked to me");
1563         if(mp->nextp)
1564                 runtime_throw("startlockedm: m has p");
1565         // directly handoff current P to the locked m
1566         incidlelocked(-1);
1567         p = releasep();
1568         mp->nextp = (uintptr)p;
1569         runtime_notewakeup(&mp->park);
1570         stopm();
1571 }
1572
1573 // Stops the current m for stoptheworld.
1574 // Returns when the world is restarted.
1575 static void
1576 gcstopm(void)
1577 {
1578         P *p;
1579
1580         if(!runtime_sched->gcwaiting)
1581                 runtime_throw("gcstopm: not waiting for gc");
1582         if(g->m->spinning) {
1583                 g->m->spinning = false;
1584                 runtime_xadd(&runtime_sched->nmspinning, -1);
1585         }
1586         p = releasep();
1587         runtime_lock(&runtime_sched->lock);
1588         p->status = _Pgcstop;
1589         if(--runtime_sched->stopwait == 0)
1590                 runtime_notewakeup(&runtime_sched->stopnote);
1591         runtime_unlock(&runtime_sched->lock);
1592         stopm();
1593 }
1594
1595 // Schedules gp to run on the current M.
1596 // Never returns.
1597 static void
1598 execute(G *gp)
1599 {
1600         int32 hz;
1601
1602         if(gp->atomicstatus != _Grunnable) {
1603                 runtime_printf("execute: bad g status %d\n", gp->atomicstatus);
1604                 runtime_throw("execute: bad g status");
1605         }
1606         gp->atomicstatus = _Grunning;
1607         gp->waitsince = 0;
1608         ((P*)g->m->p)->schedtick++;
1609         g->m->curg = gp;
1610         gp->m = g->m;
1611
1612         // Check whether the profiler needs to be turned on or off.
1613         hz = runtime_sched->profilehz;
1614         if(g->m->profilehz != hz)
1615                 runtime_resetcpuprofiler(hz);
1616
1617         runtime_gogo(gp);
1618 }
1619
1620 // Finds a runnable goroutine to execute.
1621 // Tries to steal from other P's, get g from global queue, poll network.
1622 static G*
1623 findrunnable(void)
1624 {
1625         G *gp;
1626         P *p;
1627         int32 i;
1628
1629 top:
1630         if(runtime_sched->gcwaiting) {
1631                 gcstopm();
1632                 goto top;
1633         }
1634         if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1635                 runtime_ready(gp);
1636         // local runq
1637         gp = runqget((P*)g->m->p);
1638         if(gp)
1639                 return gp;
1640         // global runq
1641         if(runtime_sched->runqsize) {
1642                 runtime_lock(&runtime_sched->lock);
1643                 gp = globrunqget((P*)g->m->p, 0);
1644                 runtime_unlock(&runtime_sched->lock);
1645                 if(gp)
1646                         return gp;
1647         }
1648         // poll network
1649         gp = runtime_netpoll(false);  // non-blocking
1650         if(gp) {
1651                 injectglist((G*)gp->schedlink);
1652                 gp->atomicstatus = _Grunnable;
1653                 return gp;
1654         }
1655         // If number of spinning M's >= number of busy P's, block.
1656         // This is necessary to prevent excessive CPU consumption
1657         // when GOMAXPROCS>>1 but the program parallelism is low.
1658         if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched->nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched->npidle))  // TODO: fast atomic
1659                 goto stop;
1660         if(!g->m->spinning) {
1661                 g->m->spinning = true;
1662                 runtime_xadd(&runtime_sched->nmspinning, 1);
1663         }
1664         // random steal from other P's
1665         for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1666                 if(runtime_sched->gcwaiting)
1667                         goto top;
1668                 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1669                 if(p == (P*)g->m->p)
1670                         gp = runqget(p);
1671                 else
1672                         gp = runqsteal((P*)g->m->p, p);
1673                 if(gp)
1674                         return gp;
1675         }
1676 stop:
1677         // return P and block
1678         runtime_lock(&runtime_sched->lock);
1679         if(runtime_sched->gcwaiting) {
1680                 runtime_unlock(&runtime_sched->lock);
1681                 goto top;
1682         }
1683         if(runtime_sched->runqsize) {
1684                 gp = globrunqget((P*)g->m->p, 0);
1685                 runtime_unlock(&runtime_sched->lock);
1686                 return gp;
1687         }
1688         p = releasep();
1689         pidleput(p);
1690         runtime_unlock(&runtime_sched->lock);
1691         if(g->m->spinning) {
1692                 g->m->spinning = false;
1693                 runtime_xadd(&runtime_sched->nmspinning, -1);
1694         }
1695         // check all runqueues once again
1696         for(i = 0; i < runtime_gomaxprocs; i++) {
1697                 p = runtime_allp[i];
1698                 if(p && p->runqhead != p->runqtail) {
1699                         runtime_lock(&runtime_sched->lock);
1700                         p = pidleget();
1701                         runtime_unlock(&runtime_sched->lock);
1702                         if(p) {
1703                                 acquirep(p);
1704                                 goto top;
1705                         }
1706                         break;
1707                 }
1708         }
1709         // poll network
1710         if(runtime_xchg64(&runtime_sched->lastpoll, 0) != 0) {
1711                 if(g->m->p)
1712                         runtime_throw("findrunnable: netpoll with p");
1713                 if(g->m->spinning)
1714                         runtime_throw("findrunnable: netpoll with spinning");
1715                 gp = runtime_netpoll(true);  // block until new work is available
1716                 runtime_atomicstore64(&runtime_sched->lastpoll, runtime_nanotime());
1717                 if(gp) {
1718                         runtime_lock(&runtime_sched->lock);
1719                         p = pidleget();
1720                         runtime_unlock(&runtime_sched->lock);
1721                         if(p) {
1722                                 acquirep(p);
1723                                 injectglist((G*)gp->schedlink);
1724                                 gp->atomicstatus = _Grunnable;
1725                                 return gp;
1726                         }
1727                         injectglist(gp);
1728                 }
1729         }
1730         stopm();
1731         goto top;
1732 }
1733
1734 static void
1735 resetspinning(void)
1736 {
1737         int32 nmspinning;
1738
1739         if(g->m->spinning) {
1740                 g->m->spinning = false;
1741                 nmspinning = runtime_xadd(&runtime_sched->nmspinning, -1);
1742                 if(nmspinning < 0)
1743                         runtime_throw("findrunnable: negative nmspinning");
1744         } else
1745                 nmspinning = runtime_atomicload(&runtime_sched->nmspinning);
1746
1747         // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1748         // so see if we need to wakeup another P here.
1749         if (nmspinning == 0 && runtime_atomicload(&runtime_sched->npidle) > 0)
1750                 wakep();
1751 }
1752
1753 // Injects the list of runnable G's into the scheduler.
1754 // Can run concurrently with GC.
1755 static void
1756 injectglist(G *glist)
1757 {
1758         int32 n;
1759         G *gp;
1760
1761         if(glist == nil)
1762                 return;
1763         runtime_lock(&runtime_sched->lock);
1764         for(n = 0; glist; n++) {
1765                 gp = glist;
1766                 glist = (G*)gp->schedlink;
1767                 gp->atomicstatus = _Grunnable;
1768                 globrunqput(gp);
1769         }
1770         runtime_unlock(&runtime_sched->lock);
1771
1772         for(; n && runtime_sched->npidle; n--)
1773                 startm(nil, false);
1774 }
1775
1776 // One round of scheduler: find a runnable goroutine and execute it.
1777 // Never returns.
1778 static void
1779 schedule(void)
1780 {
1781         G *gp;
1782         uint32 tick;
1783
1784         if(g->m->locks)
1785                 runtime_throw("schedule: holding locks");
1786
1787 top:
1788         if(runtime_sched->gcwaiting) {
1789                 gcstopm();
1790                 goto top;
1791         }
1792
1793         gp = nil;
1794         // Check the global runnable queue once in a while to ensure fairness.
1795         // Otherwise two goroutines can completely occupy the local runqueue
1796         // by constantly respawning each other.
1797         tick = ((P*)g->m->p)->schedtick;
1798         // This is a fancy way to say tick%61==0,
1799         // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1800         if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched->runqsize > 0) {
1801                 runtime_lock(&runtime_sched->lock);
1802                 gp = globrunqget((P*)g->m->p, 1);
1803                 runtime_unlock(&runtime_sched->lock);
1804                 if(gp)
1805                         resetspinning();
1806         }
1807         if(gp == nil) {
1808                 gp = runqget((P*)g->m->p);
1809                 if(gp && g->m->spinning)
1810                         runtime_throw("schedule: spinning with local work");
1811         }
1812         if(gp == nil) {
1813                 gp = findrunnable();  // blocks until work is available
1814                 resetspinning();
1815         }
1816
1817         if(gp->lockedm) {
1818                 // Hands off own p to the locked m,
1819                 // then blocks waiting for a new p.
1820                 startlockedm(gp);
1821                 goto top;
1822         }
1823
1824         execute(gp);
1825 }
1826
1827 // Puts the current goroutine into a waiting state and calls unlockf.
1828 // If unlockf returns false, the goroutine is resumed.
1829 void
1830 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1831 {
1832         if(g->atomicstatus != _Grunning)
1833                 runtime_throw("bad g status");
1834         g->m->waitlock = lock;
1835         g->m->waitunlockf = unlockf;
1836         g->waitreason = runtime_gostringnocopy((const byte*)reason);
1837         runtime_mcall(park0);
1838 }
1839
1840 void gopark(FuncVal *, void *, String, byte, int)
1841   __asm__ ("runtime.gopark");
1842
1843 void
1844 gopark(FuncVal *unlockf, void *lock, String reason,
1845        byte traceEv __attribute__ ((unused)),
1846        int traceskip __attribute__ ((unused)))
1847 {
1848         if(g->atomicstatus != _Grunning)
1849                 runtime_throw("bad g status");
1850         g->m->waitlock = lock;
1851         g->m->waitunlockf = unlockf == nil ? nil : (void*)unlockf->fn;
1852         g->waitreason = reason;
1853         runtime_mcall(park0);
1854 }
1855
1856 static bool
1857 parkunlock(G *gp, void *lock)
1858 {
1859         USED(gp);
1860         runtime_unlock(lock);
1861         return true;
1862 }
1863
1864 // Puts the current goroutine into a waiting state and unlocks the lock.
1865 // The goroutine can be made runnable again by calling runtime_ready(gp).
1866 void
1867 runtime_parkunlock(Lock *lock, const char *reason)
1868 {
1869         runtime_park(parkunlock, lock, reason);
1870 }
1871
1872 void goparkunlock(Lock *, String, byte, int)
1873   __asm__ (GOSYM_PREFIX "runtime.goparkunlock");
1874
1875 void
1876 goparkunlock(Lock *lock, String reason, byte traceEv __attribute__ ((unused)),
1877              int traceskip __attribute__ ((unused)))
1878 {
1879         if(g->atomicstatus != _Grunning)
1880                 runtime_throw("bad g status");
1881         g->m->waitlock = lock;
1882         g->m->waitunlockf = parkunlock;
1883         g->waitreason = reason;
1884         runtime_mcall(park0);
1885 }
1886
1887 // runtime_park continuation on g0.
1888 static void
1889 park0(G *gp)
1890 {
1891         M *m;
1892         bool ok;
1893
1894         m = g->m;
1895         gp->atomicstatus = _Gwaiting;
1896         gp->m = nil;
1897         m->curg = nil;
1898         if(m->waitunlockf) {
1899                 ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock);
1900                 m->waitunlockf = nil;
1901                 m->waitlock = nil;
1902                 if(!ok) {
1903                         gp->atomicstatus = _Grunnable;
1904                         execute(gp);  // Schedule it back, never returns.
1905                 }
1906         }
1907         if(m->lockedg) {
1908                 stoplockedm();
1909                 execute(gp);  // Never returns.
1910         }
1911         schedule();
1912 }
1913
1914 // Scheduler yield.
1915 void
1916 runtime_gosched(void)
1917 {
1918         if(g->atomicstatus != _Grunning)
1919                 runtime_throw("bad g status");
1920         runtime_mcall(runtime_gosched0);
1921 }
1922
1923 // runtime_gosched continuation on g0.
1924 void
1925 runtime_gosched0(G *gp)
1926 {
1927         M *m;
1928
1929         m = g->m;
1930         gp->atomicstatus = _Grunnable;
1931         gp->m = nil;
1932         m->curg = nil;
1933         runtime_lock(&runtime_sched->lock);
1934         globrunqput(gp);
1935         runtime_unlock(&runtime_sched->lock);
1936         if(m->lockedg) {
1937                 stoplockedm();
1938                 execute(gp);  // Never returns.
1939         }
1940         schedule();
1941 }
1942
1943 // Finishes execution of the current goroutine.
1944 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1945 // Since it does not return it does not matter.  But if it is preempted
1946 // at the split stack check, GC will complain about inconsistent sp.
1947 void runtime_goexit1(void) __attribute__ ((noinline));
1948 void
1949 runtime_goexit1(void)
1950 {
1951         if(g->atomicstatus != _Grunning)
1952                 runtime_throw("bad g status");
1953         runtime_mcall(goexit0);
1954 }
1955
1956 // runtime_goexit1 continuation on g0.
1957 static void
1958 goexit0(G *gp)
1959 {
1960         M *m;
1961
1962         m = g->m;
1963         gp->atomicstatus = _Gdead;
1964         gp->entry = nil;
1965         gp->m = nil;
1966         gp->lockedm = nil;
1967         gp->paniconfault = 0;
1968         gp->_defer = nil; // should be true already but just in case.
1969         gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
1970         gp->writebuf.__values = nil;
1971         gp->writebuf.__count = 0;
1972         gp->writebuf.__capacity = 0;
1973         gp->waitreason = runtime_gostringnocopy(nil);
1974         gp->param = nil;
1975         m->curg = nil;
1976         m->lockedg = nil;
1977         if(m->locked & ~_LockExternal) {
1978                 runtime_printf("invalid m->locked = %d\n", m->locked);
1979                 runtime_throw("internal lockOSThread error");
1980         }
1981         m->locked = 0;
1982         gfput((P*)m->p, gp);
1983         schedule();
1984 }
1985
1986 // The goroutine g is about to enter a system call.
1987 // Record that it's not using the cpu anymore.
1988 // This is called only from the go syscall library and cgocall,
1989 // not from the low-level system calls used by the runtime.
1990 //
1991 // Entersyscall cannot split the stack: the runtime_gosave must
1992 // make g->sched refer to the caller's stack segment, because
1993 // entersyscall is going to return immediately after.
1994
1995 void runtime_entersyscall(int32) __attribute__ ((no_split_stack));
1996 static void doentersyscall(uintptr, uintptr)
1997   __attribute__ ((no_split_stack, noinline));
1998
1999 void
2000 runtime_entersyscall(int32 dummy __attribute__ ((unused)))
2001 {
2002         // Save the registers in the g structure so that any pointers
2003         // held in registers will be seen by the garbage collector.
2004         getcontext(ucontext_arg(&g->gcregs[0]));
2005
2006         // Do the work in a separate function, so that this function
2007         // doesn't save any registers on its own stack.  If this
2008         // function does save any registers, we might store the wrong
2009         // value in the call to getcontext.
2010         //
2011         // FIXME: This assumes that we do not need to save any
2012         // callee-saved registers to access the TLS variable g.  We
2013         // don't want to put the ucontext_t on the stack because it is
2014         // large and we can not split the stack here.
2015         doentersyscall((uintptr)runtime_getcallerpc(&dummy),
2016                        (uintptr)runtime_getcallersp(&dummy));
2017 }
2018
2019 static void
2020 doentersyscall(uintptr pc, uintptr sp)
2021 {
2022         // Disable preemption because during this function g is in _Gsyscall status,
2023         // but can have inconsistent g->sched, do not let GC observe it.
2024         g->m->locks++;
2025
2026         // Leave SP around for GC and traceback.
2027 #ifdef USING_SPLIT_STACK
2028         {
2029           size_t gcstacksize;
2030           g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2031                                          &g->gcnextsegment, &g->gcnextsp,
2032                                          &g->gcinitialsp);
2033           g->gcstacksize = (uintptr)gcstacksize;
2034         }
2035 #else
2036         {
2037                 void *v;
2038
2039                 g->gcnextsp = (byte *) &v;
2040         }
2041 #endif
2042
2043         g->syscallsp = sp;
2044         g->syscallpc = pc;
2045
2046         g->atomicstatus = _Gsyscall;
2047
2048         if(runtime_atomicload(&runtime_sched->sysmonwait)) {  // TODO: fast atomic
2049                 runtime_lock(&runtime_sched->lock);
2050                 if(runtime_atomicload(&runtime_sched->sysmonwait)) {
2051                         runtime_atomicstore(&runtime_sched->sysmonwait, 0);
2052                         runtime_notewakeup(&runtime_sched->sysmonnote);
2053                 }
2054                 runtime_unlock(&runtime_sched->lock);
2055         }
2056
2057         g->m->mcache = nil;
2058         ((P*)(g->m->p))->m = 0;
2059         runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
2060         if(runtime_atomicload(&runtime_sched->gcwaiting)) {
2061                 runtime_lock(&runtime_sched->lock);
2062                 if (runtime_sched->stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
2063                         if(--runtime_sched->stopwait == 0)
2064                                 runtime_notewakeup(&runtime_sched->stopnote);
2065                 }
2066                 runtime_unlock(&runtime_sched->lock);
2067         }
2068
2069         g->m->locks--;
2070 }
2071
2072 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
2073 void
2074 runtime_entersyscallblock(int32 dummy __attribute__ ((unused)))
2075 {
2076         P *p;
2077
2078         g->m->locks++;  // see comment in entersyscall
2079
2080         // Leave SP around for GC and traceback.
2081 #ifdef USING_SPLIT_STACK
2082         {
2083           size_t gcstacksize;
2084           g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2085                                          &g->gcnextsegment, &g->gcnextsp,
2086                                          &g->gcinitialsp);
2087           g->gcstacksize = (uintptr)gcstacksize;
2088         }
2089 #else
2090         g->gcnextsp = (byte *) &p;
2091 #endif
2092
2093         // Save the registers in the g structure so that any pointers
2094         // held in registers will be seen by the garbage collector.
2095         getcontext(ucontext_arg(&g->gcregs[0]));
2096
2097         g->syscallpc = (uintptr)runtime_getcallerpc(&dummy);
2098         g->syscallsp = (uintptr)runtime_getcallersp(&dummy);
2099
2100         g->atomicstatus = _Gsyscall;
2101
2102         p = releasep();
2103         handoffp(p);
2104         if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
2105                 incidlelocked(1);
2106
2107         g->m->locks--;
2108 }
2109
2110 // The goroutine g exited its system call.
2111 // Arrange for it to run on a cpu again.
2112 // This is called only from the go syscall library, not
2113 // from the low-level system calls used by the runtime.
2114 void
2115 runtime_exitsyscall(int32 dummy __attribute__ ((unused)))
2116 {
2117         G *gp;
2118
2119         gp = g;
2120         gp->m->locks++;  // see comment in entersyscall
2121
2122         if(gp->isbackground)  // do not consider blocked scavenger for deadlock detection
2123                 incidlelocked(-1);
2124
2125         gp->waitsince = 0;
2126         if(exitsyscallfast()) {
2127                 // There's a cpu for us, so we can run.
2128                 ((P*)gp->m->p)->syscalltick++;
2129                 gp->atomicstatus = _Grunning;
2130                 // Garbage collector isn't running (since we are),
2131                 // so okay to clear gcstack and gcsp.
2132 #ifdef USING_SPLIT_STACK
2133                 gp->gcstack = nil;
2134 #endif
2135                 gp->gcnextsp = nil;
2136                 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2137                 gp->syscallsp = 0;
2138                 gp->m->locks--;
2139                 return;
2140         }
2141
2142         gp->m->locks--;
2143
2144         // Call the scheduler.
2145         runtime_mcall(exitsyscall0);
2146
2147         // Scheduler returned, so we're allowed to run now.
2148         // Delete the gcstack information that we left for
2149         // the garbage collector during the system call.
2150         // Must wait until now because until gosched returns
2151         // we don't know for sure that the garbage collector
2152         // is not running.
2153 #ifdef USING_SPLIT_STACK
2154         gp->gcstack = nil;
2155 #endif
2156         gp->gcnextsp = nil;
2157         runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2158
2159         gp->syscallsp = 0;
2160
2161         // Note that this gp->m might be different than the earlier
2162         // gp->m after returning from runtime_mcall.
2163         ((P*)gp->m->p)->syscalltick++;
2164 }
2165
2166 static bool
2167 exitsyscallfast(void)
2168 {
2169         G *gp;
2170         P *p;
2171
2172         gp = g;
2173
2174         // Freezetheworld sets stopwait but does not retake P's.
2175         if(runtime_sched->stopwait) {
2176                 gp->m->p = 0;
2177                 return false;
2178         }
2179
2180         // Try to re-acquire the last P.
2181         if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) {
2182                 // There's a cpu for us, so we can run.
2183                 gp->m->mcache = ((P*)gp->m->p)->mcache;
2184                 ((P*)gp->m->p)->m = (uintptr)gp->m;
2185                 return true;
2186         }
2187         // Try to get any other idle P.
2188         gp->m->p = 0;
2189         if(runtime_sched->pidle) {
2190                 runtime_lock(&runtime_sched->lock);
2191                 p = pidleget();
2192                 if(p && runtime_atomicload(&runtime_sched->sysmonwait)) {
2193                         runtime_atomicstore(&runtime_sched->sysmonwait, 0);
2194                         runtime_notewakeup(&runtime_sched->sysmonnote);
2195                 }
2196                 runtime_unlock(&runtime_sched->lock);
2197                 if(p) {
2198                         acquirep(p);
2199                         return true;
2200                 }
2201         }
2202         return false;
2203 }
2204
2205 // runtime_exitsyscall slow path on g0.
2206 // Failed to acquire P, enqueue gp as runnable.
2207 static void
2208 exitsyscall0(G *gp)
2209 {
2210         M *m;
2211         P *p;
2212
2213         m = g->m;
2214         gp->atomicstatus = _Grunnable;
2215         gp->m = nil;
2216         m->curg = nil;
2217         runtime_lock(&runtime_sched->lock);
2218         p = pidleget();
2219         if(p == nil)
2220                 globrunqput(gp);
2221         else if(runtime_atomicload(&runtime_sched->sysmonwait)) {
2222                 runtime_atomicstore(&runtime_sched->sysmonwait, 0);
2223                 runtime_notewakeup(&runtime_sched->sysmonnote);
2224         }
2225         runtime_unlock(&runtime_sched->lock);
2226         if(p) {
2227                 acquirep(p);
2228                 execute(gp);  // Never returns.
2229         }
2230         if(m->lockedg) {
2231                 // Wait until another thread schedules gp and so m again.
2232                 stoplockedm();
2233                 execute(gp);  // Never returns.
2234         }
2235         stopm();
2236         schedule();  // Never returns.
2237 }
2238
2239 void syscall_entersyscall(void)
2240   __asm__(GOSYM_PREFIX "syscall.Entersyscall");
2241
2242 void syscall_entersyscall(void) __attribute__ ((no_split_stack));
2243
2244 void
2245 syscall_entersyscall()
2246 {
2247   runtime_entersyscall(0);
2248 }
2249
2250 void syscall_exitsyscall(void)
2251   __asm__(GOSYM_PREFIX "syscall.Exitsyscall");
2252
2253 void syscall_exitsyscall(void) __attribute__ ((no_split_stack));
2254
2255 void
2256 syscall_exitsyscall()
2257 {
2258   runtime_exitsyscall(0);
2259 }
2260
2261 // Called from syscall package before fork.
2262 void syscall_runtime_BeforeFork(void)
2263   __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2264 void
2265 syscall_runtime_BeforeFork(void)
2266 {
2267         // Fork can hang if preempted with signals frequently enough (see issue 5517).
2268         // Ensure that we stay on the same M where we disable profiling.
2269         runtime_m()->locks++;
2270         if(runtime_m()->profilehz != 0)
2271                 runtime_resetcpuprofiler(0);
2272 }
2273
2274 // Called from syscall package after fork in parent.
2275 void syscall_runtime_AfterFork(void)
2276   __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2277 void
2278 syscall_runtime_AfterFork(void)
2279 {
2280         int32 hz;
2281
2282         hz = runtime_sched->profilehz;
2283         if(hz != 0)
2284                 runtime_resetcpuprofiler(hz);
2285         runtime_m()->locks--;
2286 }
2287
2288 // Allocate a new g, with a stack big enough for stacksize bytes.
2289 G*
2290 runtime_malg(int32 stacksize, byte** ret_stack, uintptr* ret_stacksize)
2291 {
2292         G *newg;
2293
2294         newg = allocg();
2295         if(stacksize >= 0) {
2296 #if USING_SPLIT_STACK
2297                 int dont_block_signals = 0;
2298                 size_t ss_stacksize;
2299
2300                 *ret_stack = __splitstack_makecontext(stacksize,
2301                                                       &newg->stackcontext[0],
2302                                                       &ss_stacksize);
2303                 *ret_stacksize = (uintptr)ss_stacksize;
2304                 __splitstack_block_signals_context(&newg->stackcontext[0],
2305                                                    &dont_block_signals, nil);
2306 #else
2307                 // In 64-bit mode, the maximum Go allocation space is
2308                 // 128G.  Our stack size is 4M, which only permits 32K
2309                 // goroutines.  In order to not limit ourselves,
2310                 // allocate the stacks out of separate memory.  In
2311                 // 32-bit mode, the Go allocation space is all of
2312                 // memory anyhow.
2313                 if(sizeof(void*) == 8) {
2314                         void *p = runtime_SysAlloc(stacksize, &mstats()->other_sys);
2315                         if(p == nil)
2316                                 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2317                         *ret_stack = (byte*)p;
2318                 } else {
2319                         *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2320                         runtime_xadd(&runtime_stacks_sys, stacksize);
2321                 }
2322                 *ret_stacksize = (uintptr)stacksize;
2323                 newg->gcinitialsp = *ret_stack;
2324                 newg->gcstacksize = (uintptr)stacksize;
2325 #endif
2326         }
2327         return newg;
2328 }
2329
2330 G*
2331 __go_go(void (*fn)(void*), void* arg)
2332 {
2333         byte *sp;
2334         size_t spsize;
2335         G *newg;
2336         P *p;
2337
2338 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2339         if(fn == nil) {
2340                 g->m->throwing = -1;  // do not dump full stacks
2341                 runtime_throw("go of nil func value");
2342         }
2343         g->m->locks++;  // disable preemption because it can be holding p in a local var
2344
2345         p = (P*)g->m->p;
2346         if((newg = gfget(p)) != nil) {
2347 #ifdef USING_SPLIT_STACK
2348                 int dont_block_signals = 0;
2349
2350                 sp = __splitstack_resetcontext(&newg->stackcontext[0],
2351                                                &spsize);
2352                 __splitstack_block_signals_context(&newg->stackcontext[0],
2353                                                    &dont_block_signals, nil);
2354 #else
2355                 sp = newg->gcinitialsp;
2356                 spsize = newg->gcstacksize;
2357                 if(spsize == 0)
2358                         runtime_throw("bad spsize in __go_go");
2359                 newg->gcnextsp = sp;
2360 #endif
2361         } else {
2362                 uintptr malsize;
2363
2364                 newg = runtime_malg(StackMin, &sp, &malsize);
2365                 spsize = (size_t)malsize;
2366                 allgadd(newg);
2367         }
2368
2369         newg->entry = (byte*)fn;
2370         newg->param = arg;
2371         newg->gopc = (uintptr)__builtin_return_address(0);
2372         newg->atomicstatus = _Grunnable;
2373         if(p->goidcache == p->goidcacheend) {
2374                 p->goidcache = runtime_xadd64(&runtime_sched->goidgen, GoidCacheBatch);
2375                 p->goidcacheend = p->goidcache + GoidCacheBatch;
2376         }
2377         newg->goid = p->goidcache++;
2378
2379         {
2380                 // Avoid warnings about variables clobbered by
2381                 // longjmp.
2382                 byte * volatile vsp = sp;
2383                 size_t volatile vspsize = spsize;
2384                 G * volatile vnewg = newg;
2385                 ucontext_t * volatile uc;
2386
2387                 uc = ucontext_arg(&vnewg->context[0]);
2388                 getcontext(uc);
2389                 uc->uc_stack.ss_sp = vsp;
2390                 uc->uc_stack.ss_size = vspsize;
2391                 makecontext(uc, kickoff, 0);
2392
2393                 runqput(p, vnewg);
2394
2395                 if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
2396                         wakep();
2397                 g->m->locks--;
2398                 return vnewg;
2399         }
2400 }
2401
2402 static void
2403 allgadd(G *gp)
2404 {
2405         G **new;
2406         uintptr cap;
2407
2408         runtime_lock(&allglock);
2409         if(runtime_allglen >= allgcap) {
2410                 cap = 4096/sizeof(new[0]);
2411                 if(cap < 2*allgcap)
2412                         cap = 2*allgcap;
2413                 new = runtime_malloc(cap*sizeof(new[0]));
2414                 if(new == nil)
2415                         runtime_throw("runtime: cannot allocate memory");
2416                 if(runtime_allg != nil) {
2417                         runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
2418                         runtime_free(runtime_allg);
2419                 }
2420                 runtime_allg = new;
2421                 allgcap = cap;
2422         }
2423         runtime_allg[runtime_allglen++] = gp;
2424         runtime_unlock(&allglock);
2425 }
2426
2427 // Put on gfree list.
2428 // If local list is too long, transfer a batch to the global list.
2429 static void
2430 gfput(P *p, G *gp)
2431 {
2432         gp->schedlink = (uintptr)p->gfree;
2433         p->gfree = gp;
2434         p->gfreecnt++;
2435         if(p->gfreecnt >= 64) {
2436                 runtime_lock(&runtime_sched->gflock);
2437                 while(p->gfreecnt >= 32) {
2438                         p->gfreecnt--;
2439                         gp = p->gfree;
2440                         p->gfree = (G*)gp->schedlink;
2441                         gp->schedlink = (uintptr)runtime_sched->gfree;
2442                         runtime_sched->gfree = gp;
2443                 }
2444                 runtime_unlock(&runtime_sched->gflock);
2445         }
2446 }
2447
2448 // Get from gfree list.
2449 // If local list is empty, grab a batch from global list.
2450 static G*
2451 gfget(P *p)
2452 {
2453         G *gp;
2454
2455 retry:
2456         gp = p->gfree;
2457         if(gp == nil && runtime_sched->gfree) {
2458                 runtime_lock(&runtime_sched->gflock);
2459                 while(p->gfreecnt < 32 && runtime_sched->gfree) {
2460                         p->gfreecnt++;
2461                         gp = runtime_sched->gfree;
2462                         runtime_sched->gfree = (G*)gp->schedlink;
2463                         gp->schedlink = (uintptr)p->gfree;
2464                         p->gfree = gp;
2465                 }
2466                 runtime_unlock(&runtime_sched->gflock);
2467                 goto retry;
2468         }
2469         if(gp) {
2470                 p->gfree = (G*)gp->schedlink;
2471                 p->gfreecnt--;
2472         }
2473         return gp;
2474 }
2475
2476 // Purge all cached G's from gfree list to the global list.
2477 static void
2478 gfpurge(P *p)
2479 {
2480         G *gp;
2481
2482         runtime_lock(&runtime_sched->gflock);
2483         while(p->gfreecnt) {
2484                 p->gfreecnt--;
2485                 gp = p->gfree;
2486                 p->gfree = (G*)gp->schedlink;
2487                 gp->schedlink = (uintptr)runtime_sched->gfree;
2488                 runtime_sched->gfree = gp;
2489         }
2490         runtime_unlock(&runtime_sched->gflock);
2491 }
2492
2493 void
2494 runtime_Breakpoint(void)
2495 {
2496         runtime_breakpoint();
2497 }
2498
2499 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2500
2501 void
2502 runtime_Gosched(void)
2503 {
2504         runtime_gosched();
2505 }
2506
2507 // Implementation of runtime.GOMAXPROCS.
2508 // delete when scheduler is even stronger
2509
2510 intgo runtime_GOMAXPROCS(intgo)
2511   __asm__(GOSYM_PREFIX "runtime.GOMAXPROCS");
2512
2513 intgo
2514 runtime_GOMAXPROCS(intgo n)
2515 {
2516         intgo ret;
2517
2518         if(n > _MaxGomaxprocs)
2519                 n = _MaxGomaxprocs;
2520         runtime_lock(&runtime_sched->lock);
2521         ret = (intgo)runtime_gomaxprocs;
2522         if(n <= 0 || n == ret) {
2523                 runtime_unlock(&runtime_sched->lock);
2524                 return ret;
2525         }
2526         runtime_unlock(&runtime_sched->lock);
2527
2528         runtime_acquireWorldsema();
2529         g->m->gcing = 1;
2530         runtime_stopTheWorldWithSema();
2531         newprocs = (int32)n;
2532         g->m->gcing = 0;
2533         runtime_releaseWorldsema();
2534         runtime_startTheWorldWithSema();
2535
2536         return ret;
2537 }
2538
2539 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2540 // after they modify m->locked. Do not allow preemption during this call,
2541 // or else the m might be different in this function than in the caller.
2542 static void
2543 lockOSThread(void)
2544 {
2545         g->m->lockedg = g;
2546         g->lockedm = g->m;
2547 }
2548
2549 void    runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2550 void
2551 runtime_LockOSThread(void)
2552 {
2553         g->m->locked |= _LockExternal;
2554         lockOSThread();
2555 }
2556
2557 void
2558 runtime_lockOSThread(void)
2559 {
2560         g->m->locked += _LockInternal;
2561         lockOSThread();
2562 }
2563
2564
2565 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2566 // after they update m->locked. Do not allow preemption during this call,
2567 // or else the m might be in different in this function than in the caller.
2568 static void
2569 unlockOSThread(void)
2570 {
2571         if(g->m->locked != 0)
2572                 return;
2573         g->m->lockedg = nil;
2574         g->lockedm = nil;
2575 }
2576
2577 void    runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2578
2579 void
2580 runtime_UnlockOSThread(void)
2581 {
2582         g->m->locked &= ~_LockExternal;
2583         unlockOSThread();
2584 }
2585
2586 void
2587 runtime_unlockOSThread(void)
2588 {
2589         if(g->m->locked < _LockInternal)
2590                 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2591         g->m->locked -= _LockInternal;
2592         unlockOSThread();
2593 }
2594
2595 bool
2596 runtime_lockedOSThread(void)
2597 {
2598         return g->lockedm != nil && g->m->lockedg != nil;
2599 }
2600
2601 int32
2602 runtime_gcount(void)
2603 {
2604         G *gp;
2605         int32 n, s;
2606         uintptr i;
2607
2608         n = 0;
2609         runtime_lock(&allglock);
2610         // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2611         // We do not want to increment/decrement centralized counter in newproc/goexit,
2612         // just to make runtime.NumGoroutine() faster.
2613         // Compromise solution is to introduce per-P counters of active goroutines.
2614         for(i = 0; i < runtime_allglen; i++) {
2615                 gp = runtime_allg[i];
2616                 s = gp->atomicstatus;
2617                 if(s == _Grunnable || s == _Grunning || s == _Gsyscall || s == _Gwaiting)
2618                         n++;
2619         }
2620         runtime_unlock(&allglock);
2621         return n;
2622 }
2623
2624 int32
2625 runtime_mcount(void)
2626 {
2627         return runtime_sched->mcount;
2628 }
2629
2630 static struct {
2631         uint32 lock;
2632         int32 hz;
2633 } prof;
2634
2635 static void System(void) {}
2636 static void GC(void) {}
2637
2638 // Called if we receive a SIGPROF signal.
2639 void
2640 runtime_sigprof()
2641 {
2642         M *mp = g->m;
2643         int32 n, i;
2644         bool traceback;
2645         uintptr pcbuf[TracebackMaxFrames];
2646         Location locbuf[TracebackMaxFrames];
2647         Slice stk;
2648
2649         if(prof.hz == 0)
2650                 return;
2651
2652         if(mp == nil)
2653                 return;
2654
2655         // Profiling runs concurrently with GC, so it must not allocate.
2656         mp->mallocing++;
2657
2658         traceback = true;
2659
2660         if(mp->mcache == nil)
2661                 traceback = false;
2662
2663         n = 0;
2664
2665         if(runtime_atomicload(&runtime_in_callers) > 0) {
2666                 // If SIGPROF arrived while already fetching runtime
2667                 // callers we can have trouble on older systems
2668                 // because the unwind library calls dl_iterate_phdr
2669                 // which was not recursive in the past.
2670                 traceback = false;
2671         }
2672
2673         if(traceback) {
2674                 n = runtime_callers(0, locbuf, nelem(locbuf), false);
2675                 for(i = 0; i < n; i++)
2676                         pcbuf[i] = locbuf[i].pc;
2677         }
2678         if(!traceback || n <= 0) {
2679                 n = 2;
2680                 pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2681                 if(mp->gcing || mp->helpgc)
2682                         pcbuf[1] = (uintptr)GC;
2683                 else
2684                         pcbuf[1] = (uintptr)System;
2685         }
2686
2687         if (prof.hz != 0) {
2688                 stk.__values = &pcbuf[0];
2689                 stk.__count = n;
2690                 stk.__capacity = n;
2691
2692                 // Simple cas-lock to coordinate with setcpuprofilerate.
2693                 while (!runtime_cas(&prof.lock, 0, 1)) {
2694                         runtime_osyield();
2695                 }
2696                 if (prof.hz != 0) {
2697                         runtime_cpuprofAdd(stk);
2698                 }
2699                 runtime_atomicstore(&prof.lock, 0);
2700         }
2701
2702         mp->mallocing--;
2703 }
2704
2705 // Arrange to call fn with a traceback hz times a second.
2706 void
2707 runtime_setcpuprofilerate_m(int32 hz)
2708 {
2709         // Force sane arguments.
2710         if(hz < 0)
2711                 hz = 0;
2712
2713         // Disable preemption, otherwise we can be rescheduled to another thread
2714         // that has profiling enabled.
2715         g->m->locks++;
2716
2717         // Stop profiler on this thread so that it is safe to lock prof.
2718         // if a profiling signal came in while we had prof locked,
2719         // it would deadlock.
2720         runtime_resetcpuprofiler(0);
2721
2722         while (!runtime_cas(&prof.lock, 0, 1)) {
2723                 runtime_osyield();
2724         }
2725         prof.hz = hz;
2726         runtime_atomicstore(&prof.lock, 0);
2727
2728         runtime_lock(&runtime_sched->lock);
2729         runtime_sched->profilehz = hz;
2730         runtime_unlock(&runtime_sched->lock);
2731
2732         if(hz != 0)
2733                 runtime_resetcpuprofiler(hz);
2734
2735         g->m->locks--;
2736 }
2737
2738 // Change number of processors.  The world is stopped, sched is locked.
2739 static void
2740 procresize(int32 new)
2741 {
2742         int32 i, old;
2743         bool pempty;
2744         G *gp;
2745         P *p;
2746         intgo j;
2747
2748         old = runtime_gomaxprocs;
2749         if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs)
2750                 runtime_throw("procresize: invalid arg");
2751         // initialize new P's
2752         for(i = 0; i < new; i++) {
2753                 p = runtime_allp[i];
2754                 if(p == nil) {
2755                         p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2756                         p->id = i;
2757                         p->status = _Pgcstop;
2758                         p->deferpool.__values = &p->deferpoolbuf[0];
2759                         p->deferpool.__count = 0;
2760                         p->deferpool.__capacity = nelem(p->deferpoolbuf);
2761                         runtime_atomicstorep(&runtime_allp[i], p);
2762                 }
2763                 if(p->mcache == nil) {
2764                         if(old==0 && i==0)
2765                                 p->mcache = g->m->mcache;  // bootstrap
2766                         else
2767                                 p->mcache = runtime_allocmcache();
2768                 }
2769         }
2770
2771         // redistribute runnable G's evenly
2772         // collect all runnable goroutines in global queue preserving FIFO order
2773         // FIFO order is required to ensure fairness even during frequent GCs
2774         // see http://golang.org/issue/7126
2775         pempty = false;
2776         while(!pempty) {
2777                 pempty = true;
2778                 for(i = 0; i < old; i++) {
2779                         p = runtime_allp[i];
2780                         if(p->runqhead == p->runqtail)
2781                                 continue;
2782                         pempty = false;
2783                         // pop from tail of local queue
2784                         p->runqtail--;
2785                         gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
2786                         // push onto head of global queue
2787                         gp->schedlink = runtime_sched->runqhead;
2788                         runtime_sched->runqhead = (uintptr)gp;
2789                         if(runtime_sched->runqtail == 0)
2790                                 runtime_sched->runqtail = (uintptr)gp;
2791                         runtime_sched->runqsize++;
2792                 }
2793         }
2794         // fill local queues with at most nelem(p->runq)/2 goroutines
2795         // start at 1 because current M already executes some G and will acquire allp[0] below,
2796         // so if we have a spare G we want to put it into allp[1].
2797         for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched->runqsize > 0; i++) {
2798                 gp = (G*)runtime_sched->runqhead;
2799                 runtime_sched->runqhead = gp->schedlink;
2800                 if(runtime_sched->runqhead == 0)
2801                         runtime_sched->runqtail = 0;
2802                 runtime_sched->runqsize--;
2803                 runqput(runtime_allp[i%new], gp);
2804         }
2805
2806         // free unused P's
2807         for(i = new; i < old; i++) {
2808                 p = runtime_allp[i];
2809                 for(j = 0; j < p->deferpool.__count; j++) {
2810                         ((struct _defer**)p->deferpool.__values)[j] = nil;
2811                 }
2812                 p->deferpool.__count = 0;
2813                 runtime_freemcache(p->mcache);
2814                 p->mcache = nil;
2815                 gfpurge(p);
2816                 p->status = _Pdead;
2817                 // can't free P itself because it can be referenced by an M in syscall
2818         }
2819
2820         if(g->m->p)
2821                 ((P*)g->m->p)->m = 0;
2822         g->m->p = 0;
2823         g->m->mcache = nil;
2824         p = runtime_allp[0];
2825         p->m = 0;
2826         p->status = _Pidle;
2827         acquirep(p);
2828         for(i = new-1; i > 0; i--) {
2829                 p = runtime_allp[i];
2830                 p->status = _Pidle;
2831                 pidleput(p);
2832         }
2833         runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2834 }
2835
2836 // Associate p and the current m.
2837 static void
2838 acquirep(P *p)
2839 {
2840         M *m;
2841
2842         m = g->m;
2843         if(m->p || m->mcache)
2844                 runtime_throw("acquirep: already in go");
2845         if(p->m || p->status != _Pidle) {
2846                 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status);
2847                 runtime_throw("acquirep: invalid p state");
2848         }
2849         m->mcache = p->mcache;
2850         m->p = (uintptr)p;
2851         p->m = (uintptr)m;
2852         p->status = _Prunning;
2853 }
2854
2855 // Disassociate p and the current m.
2856 static P*
2857 releasep(void)
2858 {
2859         M *m;
2860         P *p;
2861
2862         m = g->m;
2863         if(m->p == 0 || m->mcache == nil)
2864                 runtime_throw("releasep: invalid arg");
2865         p = (P*)m->p;
2866         if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) {
2867                 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2868                         m, m->p, p->m, m->mcache, p->mcache, p->status);
2869                 runtime_throw("releasep: invalid p state");
2870         }
2871         m->p = 0;
2872         m->mcache = nil;
2873         p->m = 0;
2874         p->status = _Pidle;
2875         return p;
2876 }
2877
2878 static void
2879 incidlelocked(int32 v)
2880 {
2881         runtime_lock(&runtime_sched->lock);
2882         runtime_sched->nmidlelocked += v;
2883         if(v > 0)
2884                 checkdead();
2885         runtime_unlock(&runtime_sched->lock);
2886 }
2887
2888 // Check for deadlock situation.
2889 // The check is based on number of running M's, if 0 -> deadlock.
2890 static void
2891 checkdead(void)
2892 {
2893         G *gp;
2894         int32 run, grunning, s;
2895         uintptr i;
2896
2897         // For -buildmode=c-shared or -buildmode=c-archive it's OK if
2898         // there are no running goroutines.  The calling program is
2899         // assumed to be running.
2900         if(runtime_isarchive) {
2901                 return;
2902         }
2903
2904         // -1 for sysmon
2905         run = runtime_sched->mcount - runtime_sched->nmidle - runtime_sched->nmidlelocked - 1 - countextra();
2906         if(run > 0)
2907                 return;
2908         // If we are dying because of a signal caught on an already idle thread,
2909         // freezetheworld will cause all running threads to block.
2910         // And runtime will essentially enter into deadlock state,
2911         // except that there is a thread that will call runtime_exit soon.
2912         if(runtime_panicking() > 0)
2913                 return;
2914         if(run < 0) {
2915                 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2916                         runtime_sched->nmidle, runtime_sched->nmidlelocked, runtime_sched->mcount);
2917                 runtime_throw("checkdead: inconsistent counts");
2918         }
2919         grunning = 0;
2920         runtime_lock(&allglock);
2921         for(i = 0; i < runtime_allglen; i++) {
2922                 gp = runtime_allg[i];
2923                 if(gp->isbackground)
2924                         continue;
2925                 s = gp->atomicstatus;
2926                 if(s == _Gwaiting)
2927                         grunning++;
2928                 else if(s == _Grunnable || s == _Grunning || s == _Gsyscall) {
2929                         runtime_unlock(&allglock);
2930                         runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
2931                         runtime_throw("checkdead: runnable g");
2932                 }
2933         }
2934         runtime_unlock(&allglock);
2935         if(grunning == 0)  // possible if main goroutine calls runtime_Goexit()
2936                 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
2937         g->m->throwing = -1;  // do not dump full stacks
2938         runtime_throw("all goroutines are asleep - deadlock!");
2939 }
2940
2941 static void
2942 sysmon(void)
2943 {
2944         uint32 idle, delay;
2945         int64 now, lastpoll, lasttrace;
2946         G *gp;
2947
2948         lasttrace = 0;
2949         idle = 0;  // how many cycles in succession we had not wokeup somebody
2950         delay = 0;
2951         for(;;) {
2952                 if(idle == 0)  // start with 20us sleep...
2953                         delay = 20;
2954                 else if(idle > 50)  // start doubling the sleep after 1ms...
2955                         delay *= 2;
2956                 if(delay > 10*1000)  // up to 10ms
2957                         delay = 10*1000;
2958                 runtime_usleep(delay);
2959                 if(runtime_debug.schedtrace <= 0 &&
2960                         (runtime_sched->gcwaiting || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs)) {  // TODO: fast atomic
2961                         runtime_lock(&runtime_sched->lock);
2962                         if(runtime_atomicload(&runtime_sched->gcwaiting) || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs) {
2963                                 runtime_atomicstore(&runtime_sched->sysmonwait, 1);
2964                                 runtime_unlock(&runtime_sched->lock);
2965                                 runtime_notesleep(&runtime_sched->sysmonnote);
2966                                 runtime_noteclear(&runtime_sched->sysmonnote);
2967                                 idle = 0;
2968                                 delay = 20;
2969                         } else
2970                                 runtime_unlock(&runtime_sched->lock);
2971                 }
2972                 // poll network if not polled for more than 10ms
2973                 lastpoll = runtime_atomicload64(&runtime_sched->lastpoll);
2974                 now = runtime_nanotime();
2975                 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2976                         runtime_cas64(&runtime_sched->lastpoll, lastpoll, now);
2977                         gp = runtime_netpoll(false);  // non-blocking
2978                         if(gp) {
2979                                 // Need to decrement number of idle locked M's
2980                                 // (pretending that one more is running) before injectglist.
2981                                 // Otherwise it can lead to the following situation:
2982                                 // injectglist grabs all P's but before it starts M's to run the P's,
2983                                 // another M returns from syscall, finishes running its G,
2984                                 // observes that there is no work to do and no other running M's
2985                                 // and reports deadlock.
2986                                 incidlelocked(-1);
2987                                 injectglist(gp);
2988                                 incidlelocked(1);
2989                         }
2990                 }
2991                 // retake P's blocked in syscalls
2992                 // and preempt long running G's
2993                 if(retake(now))
2994                         idle = 0;
2995                 else
2996                         idle++;
2997
2998                 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
2999                         lasttrace = now;
3000                         runtime_schedtrace(runtime_debug.scheddetail);
3001                 }
3002         }
3003 }
3004
3005 typedef struct Pdesc Pdesc;
3006 struct Pdesc
3007 {
3008         uint32  schedtick;
3009         int64   schedwhen;
3010         uint32  syscalltick;
3011         int64   syscallwhen;
3012 };
3013 static Pdesc pdesc[_MaxGomaxprocs];
3014
3015 static uint32
3016 retake(int64 now)
3017 {
3018         uint32 i, s, n;
3019         int64 t;
3020         P *p;
3021         Pdesc *pd;
3022
3023         n = 0;
3024         for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
3025                 p = runtime_allp[i];
3026                 if(p==nil)
3027                         continue;
3028                 pd = &pdesc[i];
3029                 s = p->status;
3030                 if(s == _Psyscall) {
3031                         // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
3032                         t = p->syscalltick;
3033                         if(pd->syscalltick != t) {
3034                                 pd->syscalltick = t;
3035                                 pd->syscallwhen = now;
3036                                 continue;
3037                         }
3038                         // On the one hand we don't want to retake Ps if there is no other work to do,
3039                         // but on the other hand we want to retake them eventually
3040                         // because they can prevent the sysmon thread from deep sleep.
3041                         if(p->runqhead == p->runqtail &&
3042                                 runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) > 0 &&
3043                                 pd->syscallwhen + 10*1000*1000 > now)
3044                                 continue;
3045                         // Need to decrement number of idle locked M's
3046                         // (pretending that one more is running) before the CAS.
3047                         // Otherwise the M from which we retake can exit the syscall,
3048                         // increment nmidle and report deadlock.
3049                         incidlelocked(-1);
3050                         if(runtime_cas(&p->status, s, _Pidle)) {
3051                                 n++;
3052                                 handoffp(p);
3053                         }
3054                         incidlelocked(1);
3055                 } else if(s == _Prunning) {
3056                         // Preempt G if it's running for more than 10ms.
3057                         t = p->schedtick;
3058                         if(pd->schedtick != t) {
3059                                 pd->schedtick = t;
3060                                 pd->schedwhen = now;
3061                                 continue;
3062                         }
3063                         if(pd->schedwhen + 10*1000*1000 > now)
3064                                 continue;
3065                         // preemptone(p);
3066                 }
3067         }
3068         return n;
3069 }
3070
3071 // Tell all goroutines that they have been preempted and they should stop.
3072 // This function is purely best-effort.  It can fail to inform a goroutine if a
3073 // processor just started running it.
3074 // No locks need to be held.
3075 // Returns true if preemption request was issued to at least one goroutine.
3076 static bool
3077 preemptall(void)
3078 {
3079         return false;
3080 }
3081
3082 void
3083 runtime_schedtrace(bool detailed)
3084 {
3085         static int64 starttime;
3086         int64 now;
3087         int64 id1, id2, id3;
3088         int32 i, t, h;
3089         uintptr gi;
3090         const char *fmt;
3091         M *mp, *lockedm;
3092         G *gp, *lockedg;
3093         P *p;
3094
3095         now = runtime_nanotime();
3096         if(starttime == 0)
3097                 starttime = now;
3098
3099         runtime_lock(&runtime_sched->lock);
3100         runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
3101                 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched->npidle, runtime_sched->mcount,
3102                 runtime_sched->nmidle, runtime_sched->runqsize);
3103         if(detailed) {
3104                 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
3105                         runtime_sched->gcwaiting, runtime_sched->nmidlelocked, runtime_sched->nmspinning,
3106                         runtime_sched->stopwait, runtime_sched->sysmonwait);
3107         }
3108         // We must be careful while reading data from P's, M's and G's.
3109         // Even if we hold schedlock, most data can be changed concurrently.
3110         // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
3111         for(i = 0; i < runtime_gomaxprocs; i++) {
3112                 p = runtime_allp[i];
3113                 if(p == nil)
3114                         continue;
3115                 mp = (M*)p->m;
3116                 h = runtime_atomicload(&p->runqhead);
3117                 t = runtime_atomicload(&p->runqtail);
3118                 if(detailed)
3119                         runtime_printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
3120                                 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
3121                 else {
3122                         // In non-detailed mode format lengths of per-P run queues as:
3123                         // [len1 len2 len3 len4]
3124                         fmt = " %d";
3125                         if(runtime_gomaxprocs == 1)
3126                                 fmt = " [%d]\n";
3127                         else if(i == 0)
3128                                 fmt = " [%d";
3129                         else if(i == runtime_gomaxprocs-1)
3130                                 fmt = " %d]\n";
3131                         runtime_printf(fmt, t-h);
3132                 }
3133         }
3134         if(!detailed) {
3135                 runtime_unlock(&runtime_sched->lock);
3136                 return;
3137         }
3138         for(mp = runtime_allm; mp; mp = mp->alllink) {
3139                 p = (P*)mp->p;
3140                 gp = mp->curg;
3141                 lockedg = mp->lockedg;
3142                 id1 = -1;
3143                 if(p)
3144                         id1 = p->id;
3145                 id2 = -1;
3146                 if(gp)
3147                         id2 = gp->goid;
3148                 id3 = -1;
3149                 if(lockedg)
3150                         id3 = lockedg->goid;
3151                 runtime_printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
3152                         " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
3153                         mp->id, id1, id2,
3154                         mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
3155                         mp->spinning, mp->blocked, id3);
3156         }
3157         runtime_lock(&allglock);
3158         for(gi = 0; gi < runtime_allglen; gi++) {
3159                 gp = runtime_allg[gi];
3160                 mp = gp->m;
3161                 lockedm = gp->lockedm;
3162                 runtime_printf("  G%D: status=%d(%S) m=%d lockedm=%d\n",
3163                         gp->goid, gp->atomicstatus, gp->waitreason, mp ? mp->id : -1,
3164                         lockedm ? lockedm->id : -1);
3165         }
3166         runtime_unlock(&allglock);
3167         runtime_unlock(&runtime_sched->lock);
3168 }
3169
3170 // Put mp on midle list.
3171 // Sched must be locked.
3172 static void
3173 mput(M *mp)
3174 {
3175         mp->schedlink = runtime_sched->midle;
3176         runtime_sched->midle = (uintptr)mp;
3177         runtime_sched->nmidle++;
3178         checkdead();
3179 }
3180
3181 // Try to get an m from midle list.
3182 // Sched must be locked.
3183 static M*
3184 mget(void)
3185 {
3186         M *mp;
3187
3188         if((mp = (M*)runtime_sched->midle) != nil){
3189                 runtime_sched->midle = mp->schedlink;
3190                 runtime_sched->nmidle--;
3191         }
3192         return mp;
3193 }
3194
3195 // Put gp on the global runnable queue.
3196 // Sched must be locked.
3197 static void
3198 globrunqput(G *gp)
3199 {
3200         gp->schedlink = 0;
3201         if(runtime_sched->runqtail)
3202                 ((G*)runtime_sched->runqtail)->schedlink = (uintptr)gp;
3203         else
3204                 runtime_sched->runqhead = (uintptr)gp;
3205         runtime_sched->runqtail = (uintptr)gp;
3206         runtime_sched->runqsize++;
3207 }
3208
3209 // Put a batch of runnable goroutines on the global runnable queue.
3210 // Sched must be locked.
3211 static void
3212 globrunqputbatch(G *ghead, G *gtail, int32 n)
3213 {
3214         gtail->schedlink = 0;
3215         if(runtime_sched->runqtail)
3216                 ((G*)runtime_sched->runqtail)->schedlink = (uintptr)ghead;
3217         else
3218                 runtime_sched->runqhead = (uintptr)ghead;
3219         runtime_sched->runqtail = (uintptr)gtail;
3220         runtime_sched->runqsize += n;
3221 }
3222
3223 // Try get a batch of G's from the global runnable queue.
3224 // Sched must be locked.
3225 static G*
3226 globrunqget(P *p, int32 max)
3227 {
3228         G *gp, *gp1;
3229         int32 n;
3230
3231         if(runtime_sched->runqsize == 0)
3232                 return nil;
3233         n = runtime_sched->runqsize/runtime_gomaxprocs+1;
3234         if(n > runtime_sched->runqsize)
3235                 n = runtime_sched->runqsize;
3236         if(max > 0 && n > max)
3237                 n = max;
3238         if((uint32)n > nelem(p->runq)/2)
3239                 n = nelem(p->runq)/2;
3240         runtime_sched->runqsize -= n;
3241         if(runtime_sched->runqsize == 0)
3242                 runtime_sched->runqtail = 0;
3243         gp = (G*)runtime_sched->runqhead;
3244         runtime_sched->runqhead = gp->schedlink;
3245         n--;
3246         while(n--) {
3247                 gp1 = (G*)runtime_sched->runqhead;
3248                 runtime_sched->runqhead = gp1->schedlink;
3249                 runqput(p, gp1);
3250         }
3251         return gp;
3252 }
3253
3254 // Put p to on pidle list.
3255 // Sched must be locked.
3256 static void
3257 pidleput(P *p)
3258 {
3259         p->link = runtime_sched->pidle;
3260         runtime_sched->pidle = (uintptr)p;
3261         runtime_xadd(&runtime_sched->npidle, 1);  // TODO: fast atomic
3262 }
3263
3264 // Try get a p from pidle list.
3265 // Sched must be locked.
3266 static P*
3267 pidleget(void)
3268 {
3269         P *p;
3270
3271         p = (P*)runtime_sched->pidle;
3272         if(p) {
3273                 runtime_sched->pidle = p->link;
3274                 runtime_xadd(&runtime_sched->npidle, -1);  // TODO: fast atomic
3275         }
3276         return p;
3277 }
3278
3279 // Try to put g on local runnable queue.
3280 // If it's full, put onto global queue.
3281 // Executed only by the owner P.
3282 static void
3283 runqput(P *p, G *gp)
3284 {
3285         uint32 h, t;
3286
3287 retry:
3288         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3289         t = p->runqtail;
3290         if(t - h < nelem(p->runq)) {
3291                 p->runq[t%nelem(p->runq)] = (uintptr)gp;
3292                 runtime_atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
3293                 return;
3294         }
3295         if(runqputslow(p, gp, h, t))
3296                 return;
3297         // the queue is not full, now the put above must suceed
3298         goto retry;
3299 }
3300
3301 // Put g and a batch of work from local runnable queue on global queue.
3302 // Executed only by the owner P.
3303 static bool
3304 runqputslow(P *p, G *gp, uint32 h, uint32 t)
3305 {
3306         G *batch[nelem(p->runq)/2+1];
3307         uint32 n, i;
3308
3309         // First, grab a batch from local queue.
3310         n = t-h;
3311         n = n/2;
3312         if(n != nelem(p->runq)/2)
3313                 runtime_throw("runqputslow: queue is not full");
3314         for(i=0; i<n; i++)
3315                 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3316         if(!runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3317                 return false;
3318         batch[n] = gp;
3319         // Link the goroutines.
3320         for(i=0; i<n; i++)
3321                 batch[i]->schedlink = (uintptr)batch[i+1];
3322         // Now put the batch on global queue.
3323         runtime_lock(&runtime_sched->lock);
3324         globrunqputbatch(batch[0], batch[n], n+1);
3325         runtime_unlock(&runtime_sched->lock);
3326         return true;
3327 }
3328
3329 // Get g from local runnable queue.
3330 // Executed only by the owner P.
3331 static G*
3332 runqget(P *p)
3333 {
3334         G *gp;
3335         uint32 t, h;
3336
3337         for(;;) {
3338                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3339                 t = p->runqtail;
3340                 if(t == h)
3341                         return nil;
3342                 gp = (G*)p->runq[h%nelem(p->runq)];
3343                 if(runtime_cas(&p->runqhead, h, h+1))  // cas-release, commits consume
3344                         return gp;
3345         }
3346 }
3347
3348 // Grabs a batch of goroutines from local runnable queue.
3349 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3350 // Can be executed by any P.
3351 static uint32
3352 runqgrab(P *p, G **batch)
3353 {
3354         uint32 t, h, n, i;
3355
3356         for(;;) {
3357                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3358                 t = runtime_atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
3359                 n = t-h;
3360                 n = n - n/2;
3361                 if(n == 0)
3362                         break;
3363                 if(n > nelem(p->runq)/2)  // read inconsistent h and t
3364                         continue;
3365                 for(i=0; i<n; i++)
3366                         batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3367                 if(runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3368                         break;
3369         }
3370         return n;
3371 }
3372
3373 // Steal half of elements from local runnable queue of p2
3374 // and put onto local runnable queue of p.
3375 // Returns one of the stolen elements (or nil if failed).
3376 static G*
3377 runqsteal(P *p, P *p2)
3378 {
3379         G *gp;
3380         G *batch[nelem(p->runq)/2];
3381         uint32 t, h, n, i;
3382
3383         n = runqgrab(p2, batch);
3384         if(n == 0)
3385                 return nil;
3386         n--;
3387         gp = batch[n];
3388         if(n == 0)
3389                 return gp;
3390         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3391         t = p->runqtail;
3392         if(t - h + n >= nelem(p->runq))
3393                 runtime_throw("runqsteal: runq overflow");
3394         for(i=0; i<n; i++, t++)
3395                 p->runq[t%nelem(p->runq)] = (uintptr)batch[i];
3396         runtime_atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
3397         return gp;
3398 }
3399
3400 void runtime_testSchedLocalQueue(void)
3401   __asm__("runtime.testSchedLocalQueue");
3402
3403 void
3404 runtime_testSchedLocalQueue(void)
3405 {
3406         P p;
3407         G gs[nelem(p.runq)];
3408         int32 i, j;
3409
3410         runtime_memclr((byte*)&p, sizeof(p));
3411
3412         for(i = 0; i < (int32)nelem(gs); i++) {
3413                 if(runqget(&p) != nil)
3414                         runtime_throw("runq is not empty initially");
3415                 for(j = 0; j < i; j++)
3416                         runqput(&p, &gs[i]);
3417                 for(j = 0; j < i; j++) {
3418                         if(runqget(&p) != &gs[i]) {
3419                                 runtime_printf("bad element at iter %d/%d\n", i, j);
3420                                 runtime_throw("bad element");
3421                         }
3422                 }
3423                 if(runqget(&p) != nil)
3424                         runtime_throw("runq is not empty afterwards");
3425         }
3426 }
3427
3428 void runtime_testSchedLocalQueueSteal(void)
3429   __asm__("runtime.testSchedLocalQueueSteal");
3430
3431 void
3432 runtime_testSchedLocalQueueSteal(void)
3433 {
3434         P p1, p2;
3435         G gs[nelem(p1.runq)], *gp;
3436         int32 i, j, s;
3437
3438         runtime_memclr((byte*)&p1, sizeof(p1));
3439         runtime_memclr((byte*)&p2, sizeof(p2));
3440
3441         for(i = 0; i < (int32)nelem(gs); i++) {
3442                 for(j = 0; j < i; j++) {
3443                         gs[j].sig = 0;
3444                         runqput(&p1, &gs[j]);
3445                 }
3446                 gp = runqsteal(&p2, &p1);
3447                 s = 0;
3448                 if(gp) {
3449                         s++;
3450                         gp->sig++;
3451                 }
3452                 while((gp = runqget(&p2)) != nil) {
3453                         s++;
3454                         gp->sig++;
3455                 }
3456                 while((gp = runqget(&p1)) != nil)
3457                         gp->sig++;
3458                 for(j = 0; j < i; j++) {
3459                         if(gs[j].sig != 1) {
3460                                 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3461                                 runtime_throw("bad element");
3462                         }
3463                 }
3464                 if(s != i/2 && s != i/2+1) {
3465                         runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3466                                 s, i/2, i/2+1, i);
3467                         runtime_throw("bad steal");
3468                 }
3469         }
3470 }
3471
3472 intgo
3473 runtime_setmaxthreads(intgo in)
3474 {
3475         intgo out;
3476
3477         runtime_lock(&runtime_sched->lock);
3478         out = (intgo)runtime_sched->maxmcount;
3479         runtime_sched->maxmcount = (int32)in;
3480         checkmcount();
3481         runtime_unlock(&runtime_sched->lock);
3482         return out;
3483 }
3484
3485 static intgo
3486 procPin()
3487 {
3488         M *mp;
3489
3490         mp = runtime_m();
3491         mp->locks++;
3492         return (intgo)(((P*)mp->p)->id);
3493 }
3494
3495 static void
3496 procUnpin()
3497 {
3498         runtime_m()->locks--;
3499 }
3500
3501 intgo sync_runtime_procPin(void)
3502   __asm__ (GOSYM_PREFIX "sync.runtime_procPin");
3503
3504 intgo
3505 sync_runtime_procPin()
3506 {
3507         return procPin();
3508 }
3509
3510 void sync_runtime_procUnpin(void)
3511   __asm__ (GOSYM_PREFIX  "sync.runtime_procUnpin");
3512
3513 void
3514 sync_runtime_procUnpin()
3515 {
3516         procUnpin();
3517 }
3518
3519 intgo sync_atomic_runtime_procPin(void)
3520   __asm__ (GOSYM_PREFIX "sync_atomic.runtime_procPin");
3521
3522 intgo
3523 sync_atomic_runtime_procPin()
3524 {
3525         return procPin();
3526 }
3527
3528 void sync_atomic_runtime_procUnpin(void)
3529   __asm__ (GOSYM_PREFIX  "sync_atomic.runtime_procUnpin");
3530
3531 void
3532 sync_atomic_runtime_procUnpin()
3533 {
3534         procUnpin();
3535 }
3536
3537 void
3538 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
3539 {
3540         enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
3541 }
3542
3543 // Return whether we are waiting for a GC.  This gc toolchain uses
3544 // preemption instead.
3545 bool
3546 runtime_gcwaiting(void)
3547 {
3548         return runtime_sched->gcwaiting;
3549 }
3550
3551 // os_beforeExit is called from os.Exit(0).
3552 //go:linkname os_beforeExit os.runtime_beforeExit
3553
3554 extern void os_beforeExit() __asm__ (GOSYM_PREFIX "os.runtime_beforeExit");
3555
3556 void
3557 os_beforeExit()
3558 {
3559 }
3560
3561 // Active spinning for sync.Mutex.
3562 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3563
3564 enum
3565 {
3566         ACTIVE_SPIN = 4,
3567         ACTIVE_SPIN_CNT = 30,
3568 };
3569
3570 extern _Bool sync_runtime_canSpin(intgo i)
3571   __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
3572
3573 _Bool
3574 sync_runtime_canSpin(intgo i)
3575 {
3576         P *p;
3577
3578         // sync.Mutex is cooperative, so we are conservative with spinning.
3579         // Spin only few times and only if running on a multicore machine and
3580         // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3581         // As opposed to runtime mutex we don't do passive spinning here,
3582         // because there can be work on global runq on on other Ps.
3583         if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched->npidle+runtime_sched->nmspinning)+1) {
3584                 return false;
3585         }
3586         p = (P*)g->m->p;
3587         return p != nil && p->runqhead == p->runqtail;
3588 }
3589
3590 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3591 //go:nosplit
3592
3593 extern void sync_runtime_doSpin(void)
3594   __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
3595
3596 void
3597 sync_runtime_doSpin()
3598 {
3599         runtime_procyield(ACTIVE_SPIN_CNT);
3600 }
3601
3602 // For Go code to look at variables, until we port proc.go.
3603
3604 extern M** runtime_go_allm(void)
3605   __asm__ (GOSYM_PREFIX "runtime.allm");
3606
3607 M**
3608 runtime_go_allm()
3609 {
3610         return &runtime_allm;
3611 }
3612
3613 extern Slice runtime_go_allgs(void)
3614   __asm__ (GOSYM_PREFIX "runtime.allgs");
3615
3616 Slice
3617 runtime_go_allgs()
3618 {
3619         Slice s;
3620
3621         s.__values = runtime_allg;
3622         s.__count = runtime_allglen;
3623         s.__capacity = allgcap;
3624         return s;
3625 }
3626
3627 intgo NumCPU(void) __asm__ (GOSYM_PREFIX "runtime.NumCPU");
3628
3629 intgo
3630 NumCPU()
3631 {
3632         return (intgo)(runtime_ncpu);
3633 }