libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <signal.h>
   7 #include <stdlib.h>
   8 #include <pthread.h>
   9 #include <unistd.h>
  10
  11 #include "config.h"
  12
  13 #ifdef HAVE_DL_ITERATE_PHDR
  14 #include <link.h>
  15 #endif
  16
  17 #include "runtime.h"
  18 #include "arch.h"
  19 #include "defs.h"
  20 #include "malloc.h"
  21 #include "go-type.h"
  22
  23 #ifdef USING_SPLIT_STACK
  24
  25 /* FIXME: These are not declared anywhere.  */
  26
  27 extern void __splitstack_getcontext(void *context[10]);
  28
  29 extern void __splitstack_setcontext(void *context[10]);
  30
  31 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  32
  33 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  34
  35 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  36                                void **);
  37
  38 extern void __splitstack_block_signals (int *, int *);
  39
  40 extern void __splitstack_block_signals_context (void *context[10], int *,
  41                                                 int *);
  42
  43 #endif
  44
  45 #ifndef PTHREAD_STACK_MIN
  46 # define PTHREAD_STACK_MIN 8192
  47 #endif
  48
  49 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  50 # define StackMin PTHREAD_STACK_MIN
  51 #else
  52 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
  53 #endif
  54
  55 uintptr runtime_stacks_sys;
  56
  57 static void gtraceback(G*);
  58
  59 #ifdef __rtems__
  60 #define __thread
  61 #endif
  62
  63 static __thread G *g;
  64
  65 #ifndef SETCONTEXT_CLOBBERS_TLS
  66
  67 static inline void
  68 initcontext(void)
  69 {
  70 }
  71
  72 static inline void
  73 fixcontext(ucontext_t *c __attribute__ ((unused)))
  74 {
  75 }
  76
  77 #else
  78
  79 # if defined(__x86_64__) && defined(__sun__)
  80
  81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  82 // register to that of the thread which called getcontext.  The effect
  83 // is that the address of all __thread variables changes.  This bug
  84 // also affects pthread_self() and pthread_getspecific.  We work
  85 // around it by clobbering the context field directly to keep %fs the
  86 // same.
  87
  88 static __thread greg_t fs;
  89
  90 static inline void
  91 initcontext(void)
  92 {
  93         ucontext_t c;
  94
  95         getcontext(&c);
  96         fs = c.uc_mcontext.gregs[REG_FSBASE];
  97 }
  98
  99 static inline void
 100 fixcontext(ucontext_t* c)
 101 {
 102         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 103 }
 104
 105 # elif defined(__NetBSD__)
 106
 107 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
 108 // and restore it ourselves.
 109
 110 static __thread __greg_t tlsbase;
 111
 112 static inline void
 113 initcontext(void)
 114 {
 115         ucontext_t c;
 116
 117         getcontext(&c);
 118         tlsbase = c.uc_mcontext._mc_tlsbase;
 119 }
 120
 121 static inline void
 122 fixcontext(ucontext_t* c)
 123 {
 124         c->uc_mcontext._mc_tlsbase = tlsbase;
 125 }
 126
 127 # elif defined(__sparc__)
 128
 129 static inline void
 130 initcontext(void)
 131 {
 132 }
 133
 134 static inline void
 135 fixcontext(ucontext_t *c)
 136 {
 137         /* ??? Using
 138              register unsigned long thread __asm__("%g7");
 139              c->uc_mcontext.gregs[REG_G7] = thread;
 140            results in
 141              error: variable ‘thread’ might be clobbered by \
 142                 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
 143            which ought to be false, as %g7 is a fixed register.  */
 144
 145         if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8)
 146                 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 147         else
 148                 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 149 }
 150
 151 # else
 152
 153 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 154
 155 # endif
 156
 157 #endif
 158
 159 // ucontext_arg returns a properly aligned ucontext_t value.  On some
 160 // systems a ucontext_t value must be aligned to a 16-byte boundary.
 161 // The g structure that has fields of type ucontext_t is defined in
 162 // Go, and Go has no simple way to align a field to such a boundary.
 163 // So we make the field larger in runtime2.go and pick an appropriate
 164 // offset within the field here.
 165 static ucontext_t*
 166 ucontext_arg(void** go_ucontext)
 167 {
 168         uintptr_t p = (uintptr_t)go_ucontext;
 169         size_t align = __alignof__(ucontext_t);
 170         if(align > 16) {
 171                 // We only ensured space for up to a 16 byte alignment
 172                 // in libgo/go/runtime/runtime2.go.
 173                 runtime_throw("required alignment of ucontext_t too large");
 174         }
 175         p = (p + align - 1) &~ (uintptr_t)(align - 1);
 176         return (ucontext_t*)p;
 177 }
 178
 179 // We can not always refer to the TLS variables directly.  The
 180 // compiler will call tls_get_addr to get the address of the variable,
 181 // and it may hold it in a register across a call to schedule.  When
 182 // we get back from the call we may be running in a different thread,
 183 // in which case the register now points to the TLS variable for a
 184 // different thread.  We use non-inlinable functions to avoid this
 185 // when necessary.
 186
 187 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 188
 189 G*
 190 runtime_g(void)
 191 {
 192         return g;
 193 }
 194
 195 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 196
 197 M*
 198 runtime_m(void)
 199 {
 200         if(g == nil)
 201                 return nil;
 202         return g->m;
 203 }
 204
 205 // Set g.
 206 void
 207 runtime_setg(G* gp)
 208 {
 209         g = gp;
 210 }
 211
 212 // Start a new thread.
 213 static void
 214 runtime_newosproc(M *mp)
 215 {
 216         pthread_attr_t attr;
 217         sigset_t clear, old;
 218         pthread_t tid;
 219         int ret;
 220
 221         if(pthread_attr_init(&attr) != 0)
 222                 runtime_throw("pthread_attr_init");
 223         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 224                 runtime_throw("pthread_attr_setdetachstate");
 225
 226         // Block signals during pthread_create so that the new thread
 227         // starts with signals disabled.  It will enable them in minit.
 228         sigfillset(&clear);
 229
 230 #ifdef SIGTRAP
 231         // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
 232         sigdelset(&clear, SIGTRAP);
 233 #endif
 234
 235         sigemptyset(&old);
 236         pthread_sigmask(SIG_BLOCK, &clear, &old);
 237         ret = pthread_create(&tid, &attr, runtime_mstart, mp);
 238         pthread_sigmask(SIG_SETMASK, &old, nil);
 239
 240         if (ret != 0)
 241                 runtime_throw("pthread_create");
 242 }
 243
 244 // First function run by a new goroutine.  This replaces gogocall.
 245 static void
 246 kickoff(void)
 247 {
 248         void (*fn)(void*);
 249         void *param;
 250
 251         if(g->traceback != nil)
 252                 gtraceback(g);
 253
 254         fn = (void (*)(void*))(g->entry);
 255         param = g->param;
 256         g->param = nil;
 257         fn(param);
 258         runtime_goexit();
 259 }
 260
 261 // Switch context to a different goroutine.  This is like longjmp.
 262 void runtime_gogo(G*) __attribute__ ((noinline));
 263 void
 264 runtime_gogo(G* newg)
 265 {
 266 #ifdef USING_SPLIT_STACK
 267         __splitstack_setcontext(&newg->stackcontext[0]);
 268 #endif
 269         g = newg;
 270         newg->fromgogo = true;
 271         fixcontext(ucontext_arg(&newg->context[0]));
 272         setcontext(ucontext_arg(&newg->context[0]));
 273         runtime_throw("gogo setcontext returned");
 274 }
 275
 276 // Save context and call fn passing g as a parameter.  This is like
 277 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 278 // g->fromgogo as a code.  It will be true if we got here via
 279 // setcontext.  g == nil the first time this is called in a new m.
 280 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 281 void
 282 runtime_mcall(void (*pfn)(G*))
 283 {
 284         M *mp;
 285         G *gp;
 286
 287         // Ensure that all registers are on the stack for the garbage
 288         // collector.
 289         __builtin_unwind_init();
 290
 291         gp = g;
 292         mp = gp->m;
 293         if(gp == mp->g0)
 294                 runtime_throw("runtime: mcall called on m->g0 stack");
 295
 296         if(gp != nil) {
 297
 298 #ifdef USING_SPLIT_STACK
 299                 __splitstack_getcontext(&g->stackcontext[0]);
 300 #else
 301                 gp->gcnextsp = &pfn;
 302 #endif
 303                 gp->fromgogo = false;
 304                 getcontext(ucontext_arg(&gp->context[0]));
 305
 306                 // When we return from getcontext, we may be running
 307                 // in a new thread.  That means that g may have
 308                 // changed.  It is a global variables so we will
 309                 // reload it, but the address of g may be cached in
 310                 // our local stack frame, and that address may be
 311                 // wrong.  Call the function to reload the value for
 312                 // this thread.
 313                 gp = runtime_g();
 314                 mp = gp->m;
 315
 316                 if(gp->traceback != nil)
 317                         gtraceback(gp);
 318         }
 319         if (gp == nil || !gp->fromgogo) {
 320 #ifdef USING_SPLIT_STACK
 321                 __splitstack_setcontext(&mp->g0->stackcontext[0]);
 322 #endif
 323                 mp->g0->entry = (byte*)pfn;
 324                 mp->g0->param = gp;
 325
 326                 // It's OK to set g directly here because this case
 327                 // can not occur if we got here via a setcontext to
 328                 // the getcontext call just above.
 329                 g = mp->g0;
 330
 331                 fixcontext(ucontext_arg(&mp->g0->context[0]));
 332                 setcontext(ucontext_arg(&mp->g0->context[0]));
 333                 runtime_throw("runtime: mcall function returned");
 334         }
 335 }
 336
 337 // Goroutine scheduler
 338 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
 339 //
 340 // The main concepts are:
 341 // G - goroutine.
 342 // M - worker thread, or machine.
 343 // P - processor, a resource that is required to execute Go code.
 344 //     M must have an associated P to execute Go code, however it can be
 345 //     blocked or in a syscall w/o an associated P.
 346 //
 347 // Design doc at http://golang.org/s/go11sched.
 348
 349 typedef struct Sched Sched;
 350 struct Sched {
 351         Lock;
 352
 353         uint64  goidgen;
 354         M*      midle;   // idle m's waiting for work
 355         int32   nmidle;  // number of idle m's waiting for work
 356         int32   nmidlelocked; // number of locked m's waiting for work
 357         int32   mcount;  // number of m's that have been created
 358         int32   maxmcount;      // maximum number of m's allowed (or die)
 359
 360         P*      pidle;  // idle P's
 361         uint32  npidle;
 362         uint32  nmspinning;
 363
 364         // Global runnable queue.
 365         G*      runqhead;
 366         G*      runqtail;
 367         int32   runqsize;
 368
 369         // Global cache of dead G's.
 370         Lock    gflock;
 371         G*      gfree;
 372
 373         uint32  gcwaiting;      // gc is waiting to run
 374         int32   stopwait;
 375         Note    stopnote;
 376         uint32  sysmonwait;
 377         Note    sysmonnote;
 378         uint64  lastpoll;
 379
 380         int32   profilehz;      // cpu profiling rate
 381 };
 382
 383 enum
 384 {
 385         // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once.
 386         // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
 387         GoidCacheBatch = 16,
 388 };
 389
 390 Sched   runtime_sched;
 391 int32   runtime_gomaxprocs;
 392 uint32  runtime_needextram = 1;
 393 M       runtime_m0;
 394 G       runtime_g0;     // idle goroutine for m0
 395 G*      runtime_lastg;
 396 M*      runtime_allm;
 397 P**     runtime_allp;
 398 M*      runtime_extram;
 399 int8*   runtime_goos;
 400 int32   runtime_ncpu;
 401 bool    runtime_precisestack;
 402 static int32    newprocs;
 403
 404 static  Lock allglock;  // the following vars are protected by this lock or by stoptheworld
 405 G**     runtime_allg;
 406 uintptr runtime_allglen;
 407 static  uintptr allgcap;
 408
 409 bool    runtime_isarchive;
 410
 411 void* runtime_mstart(void*);
 412 static void runqput(P*, G*);
 413 static G* runqget(P*);
 414 static bool runqputslow(P*, G*, uint32, uint32);
 415 static G* runqsteal(P*, P*);
 416 static void mput(M*);
 417 static M* mget(void);
 418 static void mcommoninit(M*);
 419 static void schedule(void);
 420 static void procresize(int32);
 421 static void acquirep(P*);
 422 static P* releasep(void);
 423 static void newm(void(*)(void), P*);
 424 static void stopm(void);
 425 static void startm(P*, bool);
 426 static void handoffp(P*);
 427 static void wakep(void);
 428 static void stoplockedm(void);
 429 static void startlockedm(G*);
 430 static void sysmon(void);
 431 static uint32 retake(int64);
 432 static void incidlelocked(int32);
 433 static void checkdead(void);
 434 static void exitsyscall0(G*);
 435 static void park0(G*);
 436 static void goexit0(G*);
 437 static void gfput(P*, G*);
 438 static G* gfget(P*);
 439 static void gfpurge(P*);
 440 static void globrunqput(G*);
 441 static void globrunqputbatch(G*, G*, int32);
 442 static G* globrunqget(P*, int32);
 443 static P* pidleget(void);
 444 static void pidleput(P*);
 445 static void injectglist(G*);
 446 static bool preemptall(void);
 447 static bool exitsyscallfast(void);
 448 static void allgadd(G*);
 449
 450 bool runtime_isstarted;
 451
 452 // The bootstrap sequence is:
 453 //
 454 //      call osinit
 455 //      call schedinit
 456 //      make & queue new G
 457 //      call runtime_mstart
 458 //
 459 // The new G calls runtime_main.
 460 void
 461 runtime_schedinit(void)
 462 {
 463         M *m;
 464         int32 n, procs;
 465         String s;
 466         const byte *p;
 467         Eface i;
 468
 469         m = &runtime_m0;
 470         g = &runtime_g0;
 471         m->g0 = g;
 472         m->curg = g;
 473         g->m = m;
 474
 475         initcontext();
 476
 477         runtime_sched.maxmcount = 10000;
 478         runtime_precisestack = 0;
 479
 480         // runtime_symtabinit();
 481         runtime_mallocinit();
 482         mcommoninit(m);
 483
 484         // Initialize the itable value for newErrorCString,
 485         // so that the next time it gets called, possibly
 486         // in a fault during a garbage collection, it will not
 487         // need to allocated memory.
 488         runtime_newErrorCString(0, &i);
 489
 490         // Initialize the cached gotraceback value, since
 491         // gotraceback calls getenv, which mallocs on Plan 9.
 492         runtime_gotraceback(nil);
 493
 494         runtime_goargs();
 495         runtime_goenvs();
 496         runtime_parsedebugvars();
 497
 498         runtime_sched.lastpoll = runtime_nanotime();
 499         procs = 1;
 500         s = runtime_getenv("GOMAXPROCS");
 501         p = s.str;
 502         if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
 503                 if(n > _MaxGomaxprocs)
 504                         n = _MaxGomaxprocs;
 505                 procs = n;
 506         }
 507         runtime_allp = runtime_malloc((_MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
 508         procresize(procs);
 509
 510         // Can not enable GC until all roots are registered.
 511         // mstats.enablegc = 1;
 512 }
 513
 514 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
 515 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
 516
 517 // Used to determine the field alignment.
 518
 519 struct field_align
 520 {
 521   char c;
 522   Hchan *p;
 523 };
 524
 525 // main_init_done is a signal used by cgocallbackg that initialization
 526 // has been completed.  It is made before _cgo_notify_runtime_init_done,
 527 // so all cgo calls can rely on it existing.  When main_init is
 528 // complete, it is closed, meaning cgocallbackg can reliably receive
 529 // from it.
 530 Hchan *runtime_main_init_done;
 531
 532 // The chan bool type, for runtime_main_init_done.
 533
 534 extern const struct __go_type_descriptor bool_type_descriptor
 535   __asm__ (GOSYM_PREFIX "__go_tdn_bool");
 536
 537 static struct __go_channel_type chan_bool_type_descriptor =
 538   {
 539     /* __common */
 540     {
 541       /* __code */
 542       GO_CHAN,
 543       /* __align */
 544       __alignof (Hchan *),
 545       /* __field_align */
 546       offsetof (struct field_align, p) - 1,
 547       /* __size */
 548       sizeof (Hchan *),
 549       /* __hash */
 550       0, /* This value doesn't matter.  */
 551       /* __hashfn */
 552       NULL,
 553       /* __equalfn */
 554       NULL,
 555       /* __gc */
 556       NULL, /* This value doesn't matter */
 557       /* __reflection */
 558       NULL, /* This value doesn't matter */
 559       /* __uncommon */
 560       NULL,
 561       /* __pointer_to_this */
 562       NULL
 563     },
 564     /* __element_type */
 565     &bool_type_descriptor,
 566     /* __dir */
 567     CHANNEL_BOTH_DIR
 568   };
 569
 570 extern Hchan *makechan (ChanType *, int64)
 571   __asm__ (GOSYM_PREFIX "runtime.makechan");
 572 extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan");
 573
 574 static void
 575 initDone(void *arg __attribute__ ((unused))) {
 576         runtime_unlockOSThread();
 577 };
 578
 579 // The main goroutine.
 580 // Note: C frames in general are not copyable during stack growth, for two reasons:
 581 //   1) We don't know where in a frame to find pointers to other stack locations.
 582 //   2) There's no guarantee that globals or heap values do not point into the frame.
 583 //
 584 // The C frame for runtime.main is copyable, because:
 585 //   1) There are no pointers to other stack locations in the frame
 586 //      (d.fn points at a global, d.link is nil, d.argp is -1).
 587 //   2) The only pointer into this frame is from the defer chain,
 588 //      which is explicitly handled during stack copying.
 589 void
 590 runtime_main(void* dummy __attribute__((unused)))
 591 {
 592         Defer d;
 593         _Bool frame;
 594
 595         newm(sysmon, nil);
 596
 597         // Lock the main goroutine onto this, the main OS thread,
 598         // during initialization.  Most programs won't care, but a few
 599         // do require certain calls to be made by the main thread.
 600         // Those can arrange for main.main to run in the main thread
 601         // by calling runtime.LockOSThread during initialization
 602         // to preserve the lock.
 603         runtime_lockOSThread();
 604
 605         // Defer unlock so that runtime.Goexit during init does the unlock too.
 606         d.pfn = (uintptr)(void*)initDone;
 607         d.next = g->_defer;
 608         d.arg = (void*)-1;
 609         d._panic = g->_panic;
 610         d.retaddr = 0;
 611         d.makefunccanrecover = 0;
 612         d.frame = &frame;
 613         d.special = true;
 614         g->_defer = &d;
 615
 616         if(g->m != &runtime_m0)
 617                 runtime_throw("runtime_main not on m0");
 618         __go_go(runtime_MHeap_Scavenger, nil);
 619
 620         runtime_main_init_done = makechan(&chan_bool_type_descriptor, 0);
 621
 622         _cgo_notify_runtime_init_done();
 623
 624         main_init();
 625
 626         closechan(runtime_main_init_done);
 627
 628         if(g->_defer != &d || (void*)d.pfn != initDone)
 629                 runtime_throw("runtime: bad defer entry after init");
 630         g->_defer = d.next;
 631         runtime_unlockOSThread();
 632
 633         // For gccgo we have to wait until after main is initialized
 634         // to enable GC, because initializing main registers the GC
 635         // roots.
 636         mstats.enablegc = 1;
 637
 638         if(runtime_isarchive) {
 639                 // This is not a complete program, but is instead a
 640                 // library built using -buildmode=c-archive or
 641                 // c-shared.  Now that we are initialized, there is
 642                 // nothing further to do.
 643                 return;
 644         }
 645
 646         main_main();
 647
 648         // Make racy client program work: if panicking on
 649         // another goroutine at the same time as main returns,
 650         // let the other goroutine finish printing the panic trace.
 651         // Once it does, it will exit. See issue 3934.
 652         if(runtime_panicking)
 653                 runtime_park(nil, nil, "panicwait");
 654
 655         runtime_exit(0);
 656         for(;;)
 657                 *(int32*)0 = 0;
 658 }
 659
 660 void
 661 runtime_goroutineheader(G *gp)
 662 {
 663         String status;
 664         int64 waitfor;
 665
 666         switch(gp->atomicstatus) {
 667         case _Gidle:
 668                 status = runtime_gostringnocopy((const byte*)"idle");
 669                 break;
 670         case _Grunnable:
 671                 status = runtime_gostringnocopy((const byte*)"runnable");
 672                 break;
 673         case _Grunning:
 674                 status = runtime_gostringnocopy((const byte*)"running");
 675                 break;
 676         case _Gsyscall:
 677                 status = runtime_gostringnocopy((const byte*)"syscall");
 678                 break;
 679         case _Gwaiting:
 680                 if(gp->waitreason.len > 0)
 681                         status = gp->waitreason;
 682                 else
 683                         status = runtime_gostringnocopy((const byte*)"waiting");
 684                 break;
 685         default:
 686                 status = runtime_gostringnocopy((const byte*)"???");
 687                 break;
 688         }
 689
 690         // approx time the G is blocked, in minutes
 691         waitfor = 0;
 692         if((gp->atomicstatus == _Gwaiting || gp->atomicstatus == _Gsyscall) && gp->waitsince != 0)
 693                 waitfor = (runtime_nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
 694
 695         if(waitfor < 1)
 696                 runtime_printf("goroutine %D [%S]:\n", gp->goid, status);
 697         else
 698                 runtime_printf("goroutine %D [%S, %D minutes]:\n", gp->goid, status, waitfor);
 699 }
 700
 701 void
 702 runtime_printcreatedby(G *g)
 703 {
 704         if(g != nil && g->gopc != 0 && g->goid != 1) {
 705                 String fn;
 706                 String file;
 707                 intgo line;
 708
 709                 if(__go_file_line(g->gopc - 1, -1, &fn, &file, &line)) {
 710                         runtime_printf("created by %S\n", fn);
 711                         runtime_printf("\t%S:%D\n", file, (int64) line);
 712                 }
 713         }
 714 }
 715
 716 void
 717 runtime_tracebackothers(G * volatile me)
 718 {
 719         G * volatile gp;
 720         Traceback tb;
 721         int32 traceback;
 722         volatile uintptr i;
 723
 724         tb.gp = me;
 725         traceback = runtime_gotraceback(nil);
 726
 727         // Show the current goroutine first, if we haven't already.
 728         if((gp = g->m->curg) != nil && gp != me) {
 729                 runtime_printf("\n");
 730                 runtime_goroutineheader(gp);
 731                 gp->traceback = &tb;
 732
 733 #ifdef USING_SPLIT_STACK
 734                 __splitstack_getcontext(&me->stackcontext[0]);
 735 #endif
 736                 getcontext(ucontext_arg(&me->context[0]));
 737
 738                 if(gp->traceback != nil) {
 739                   runtime_gogo(gp);
 740                 }
 741
 742                 runtime_printtrace(tb.locbuf, tb.c, false);
 743                 runtime_printcreatedby(gp);
 744         }
 745
 746         runtime_lock(&allglock);
 747         for(i = 0; i < runtime_allglen; i++) {
 748                 gp = runtime_allg[i];
 749                 if(gp == me || gp == g->m->curg || gp->atomicstatus == _Gdead)
 750                         continue;
 751                 if(gp->issystem && traceback < 2)
 752                         continue;
 753                 runtime_printf("\n");
 754                 runtime_goroutineheader(gp);
 755
 756                 // Our only mechanism for doing a stack trace is
 757                 // _Unwind_Backtrace.  And that only works for the
 758                 // current thread, not for other random goroutines.
 759                 // So we need to switch context to the goroutine, get
 760                 // the backtrace, and then switch back.
 761
 762                 // This means that if g is running or in a syscall, we
 763                 // can't reliably print a stack trace.  FIXME.
 764
 765                 if(gp->atomicstatus == _Grunning) {
 766                         runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
 767                         runtime_printcreatedby(gp);
 768                 } else if(gp->atomicstatus == _Gsyscall) {
 769                         runtime_printf("\tgoroutine in C code; stack unavailable\n");
 770                         runtime_printcreatedby(gp);
 771                 } else {
 772                         gp->traceback = &tb;
 773
 774 #ifdef USING_SPLIT_STACK
 775                         __splitstack_getcontext(&me->stackcontext[0]);
 776 #endif
 777                         getcontext(ucontext_arg(&me->context[0]));
 778
 779                         if(gp->traceback != nil) {
 780                                 runtime_gogo(gp);
 781                         }
 782
 783                         runtime_printtrace(tb.locbuf, tb.c, false);
 784                         runtime_printcreatedby(gp);
 785                 }
 786         }
 787         runtime_unlock(&allglock);
 788 }
 789
 790 static void
 791 checkmcount(void)
 792 {
 793         // sched lock is held
 794         if(runtime_sched.mcount > runtime_sched.maxmcount) {
 795                 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
 796                 runtime_throw("thread exhaustion");
 797         }
 798 }
 799
 800 // Do a stack trace of gp, and then restore the context to
 801 // gp->dotraceback.
 802
 803 static void
 804 gtraceback(G* gp)
 805 {
 806         Traceback* traceback;
 807
 808         traceback = gp->traceback;
 809         gp->traceback = nil;
 810         if(gp->m != nil)
 811                 runtime_throw("gtraceback: m is not nil");
 812         gp->m = traceback->gp->m;
 813         traceback->c = runtime_callers(1, traceback->locbuf,
 814                 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
 815         gp->m = nil;
 816         runtime_gogo(traceback->gp);
 817 }
 818
 819 static void
 820 mcommoninit(M *mp)
 821 {
 822         // If there is no mcache runtime_callers() will crash,
 823         // and we are most likely in sysmon thread so the stack is senseless anyway.
 824         if(g->m->mcache)
 825                 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
 826
 827         mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
 828
 829         runtime_lock(&runtime_sched);
 830         mp->id = runtime_sched.mcount++;
 831         checkmcount();
 832         runtime_mpreinit(mp);
 833
 834         // Add to runtime_allm so garbage collector doesn't free m
 835         // when it is just in a register or thread-local storage.
 836         mp->alllink = runtime_allm;
 837         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 838         // so we need to publish it safely.
 839         runtime_atomicstorep(&runtime_allm, mp);
 840         runtime_unlock(&runtime_sched);
 841 }
 842
 843 // Mark gp ready to run.
 844 void
 845 runtime_ready(G *gp)
 846 {
 847         // Mark runnable.
 848         g->m->locks++;  // disable preemption because it can be holding p in a local var
 849         if(gp->atomicstatus != _Gwaiting) {
 850                 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus);
 851                 runtime_throw("bad g->atomicstatus in ready");
 852         }
 853         gp->atomicstatus = _Grunnable;
 854         runqput((P*)g->m->p, gp);
 855         if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0)  // TODO: fast atomic
 856                 wakep();
 857         g->m->locks--;
 858 }
 859
 860 void goready(G*, int) __asm__ (GOSYM_PREFIX "runtime.goready");
 861
 862 void
 863 goready(G* gp, int traceskip __attribute__ ((unused)))
 864 {
 865         runtime_ready(gp);
 866 }
 867
 868 int32
 869 runtime_gcprocs(void)
 870 {
 871         int32 n;
 872
 873         // Figure out how many CPUs to use during GC.
 874         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 875         runtime_lock(&runtime_sched);
 876         n = runtime_gomaxprocs;
 877         if(n > runtime_ncpu)
 878                 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
 879         if(n > MaxGcproc)
 880                 n = MaxGcproc;
 881         if(n > runtime_sched.nmidle+1) // one M is currently running
 882                 n = runtime_sched.nmidle+1;
 883         runtime_unlock(&runtime_sched);
 884         return n;
 885 }
 886
 887 static bool
 888 needaddgcproc(void)
 889 {
 890         int32 n;
 891
 892         runtime_lock(&runtime_sched);
 893         n = runtime_gomaxprocs;
 894         if(n > runtime_ncpu)
 895                 n = runtime_ncpu;
 896         if(n > MaxGcproc)
 897                 n = MaxGcproc;
 898         n -= runtime_sched.nmidle+1; // one M is currently running
 899         runtime_unlock(&runtime_sched);
 900         return n > 0;
 901 }
 902
 903 void
 904 runtime_helpgc(int32 nproc)
 905 {
 906         M *mp;
 907         int32 n, pos;
 908
 909         runtime_lock(&runtime_sched);
 910         pos = 0;
 911         for(n = 1; n < nproc; n++) {  // one M is currently running
 912                 if(runtime_allp[pos]->mcache == g->m->mcache)
 913                         pos++;
 914                 mp = mget();
 915                 if(mp == nil)
 916                         runtime_throw("runtime_gcprocs inconsistency");
 917                 mp->helpgc = n;
 918                 mp->mcache = runtime_allp[pos]->mcache;
 919                 pos++;
 920                 runtime_notewakeup(&mp->park);
 921         }
 922         runtime_unlock(&runtime_sched);
 923 }
 924
 925 // Similar to stoptheworld but best-effort and can be called several times.
 926 // There is no reverse operation, used during crashing.
 927 // This function must not lock any mutexes.
 928 void
 929 runtime_freezetheworld(void)
 930 {
 931         int32 i;
 932
 933         if(runtime_gomaxprocs == 1)
 934                 return;
 935         // stopwait and preemption requests can be lost
 936         // due to races with concurrently executing threads,
 937         // so try several times
 938         for(i = 0; i < 5; i++) {
 939                 // this should tell the scheduler to not start any new goroutines
 940                 runtime_sched.stopwait = 0x7fffffff;
 941                 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
 942                 // this should stop running goroutines
 943                 if(!preemptall())
 944                         break;  // no running goroutines
 945                 runtime_usleep(1000);
 946         }
 947         // to be sure
 948         runtime_usleep(1000);
 949         preemptall();
 950         runtime_usleep(1000);
 951 }
 952
 953 void
 954 runtime_stoptheworld(void)
 955 {
 956         int32 i;
 957         uint32 s;
 958         P *p;
 959         bool wait;
 960
 961         runtime_lock(&runtime_sched);
 962         runtime_sched.stopwait = runtime_gomaxprocs;
 963         runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
 964         preemptall();
 965         // stop current P
 966         ((P*)g->m->p)->status = _Pgcstop;
 967         runtime_sched.stopwait--;
 968         // try to retake all P's in _Psyscall status
 969         for(i = 0; i < runtime_gomaxprocs; i++) {
 970                 p = runtime_allp[i];
 971                 s = p->status;
 972                 if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
 973                         runtime_sched.stopwait--;
 974         }
 975         // stop idle P's
 976         while((p = pidleget()) != nil) {
 977                 p->status = _Pgcstop;
 978                 runtime_sched.stopwait--;
 979         }
 980         wait = runtime_sched.stopwait > 0;
 981         runtime_unlock(&runtime_sched);
 982
 983         // wait for remaining P's to stop voluntarily
 984         if(wait) {
 985                 runtime_notesleep(&runtime_sched.stopnote);
 986                 runtime_noteclear(&runtime_sched.stopnote);
 987         }
 988         if(runtime_sched.stopwait)
 989                 runtime_throw("stoptheworld: not stopped");
 990         for(i = 0; i < runtime_gomaxprocs; i++) {
 991                 p = runtime_allp[i];
 992                 if(p->status != _Pgcstop)
 993                         runtime_throw("stoptheworld: not stopped");
 994         }
 995 }
 996
 997 static void
 998 mhelpgc(void)
 999 {
1000         g->m->helpgc = -1;
1001 }
1002
1003 void
1004 runtime_starttheworld(void)
1005 {
1006         P *p, *p1;
1007         M *mp;
1008         G *gp;
1009         bool add;
1010
1011         g->m->locks++;  // disable preemption because it can be holding p in a local var
1012         gp = runtime_netpoll(false);  // non-blocking
1013         injectglist(gp);
1014         add = needaddgcproc();
1015         runtime_lock(&runtime_sched);
1016         if(newprocs) {
1017                 procresize(newprocs);
1018                 newprocs = 0;
1019         } else
1020                 procresize(runtime_gomaxprocs);
1021         runtime_sched.gcwaiting = 0;
1022
1023         p1 = nil;
1024         while((p = pidleget()) != nil) {
1025                 // procresize() puts p's with work at the beginning of the list.
1026                 // Once we reach a p without a run queue, the rest don't have one either.
1027                 if(p->runqhead == p->runqtail) {
1028                         pidleput(p);
1029                         break;
1030                 }
1031                 p->m = (uintptr)mget();
1032                 p->link = (uintptr)p1;
1033                 p1 = p;
1034         }
1035         if(runtime_sched.sysmonwait) {
1036                 runtime_sched.sysmonwait = false;
1037                 runtime_notewakeup(&runtime_sched.sysmonnote);
1038         }
1039         runtime_unlock(&runtime_sched);
1040
1041         while(p1) {
1042                 p = p1;
1043                 p1 = (P*)p1->link;
1044                 if(p->m) {
1045                         mp = (M*)p->m;
1046                         p->m = 0;
1047                         if(mp->nextp)
1048                                 runtime_throw("starttheworld: inconsistent mp->nextp");
1049                         mp->nextp = (uintptr)p;
1050                         runtime_notewakeup(&mp->park);
1051                 } else {
1052                         // Start M to run P.  Do not start another M below.
1053                         newm(nil, p);
1054                         add = false;
1055                 }
1056         }
1057
1058         if(add) {
1059                 // If GC could have used another helper proc, start one now,
1060                 // in the hope that it will be available next time.
1061                 // It would have been even better to start it before the collection,
1062                 // but doing so requires allocating memory, so it's tricky to
1063                 // coordinate.  This lazy approach works out in practice:
1064                 // we don't mind if the first couple gc rounds don't have quite
1065                 // the maximum number of procs.
1066                 newm(mhelpgc, nil);
1067         }
1068         g->m->locks--;
1069 }
1070
1071 // Called to start an M.
1072 void*
1073 runtime_mstart(void* mp)
1074 {
1075         M *m;
1076
1077         m = (M*)mp;
1078         g = m->g0;
1079         g->m = m;
1080
1081         initcontext();
1082
1083         g->entry = nil;
1084         g->param = nil;
1085
1086         // Record top of stack for use by mcall.
1087         // Once we call schedule we're never coming back,
1088         // so other calls can reuse this stack space.
1089 #ifdef USING_SPLIT_STACK
1090         __splitstack_getcontext(&g->stackcontext[0]);
1091 #else
1092         g->gcinitialsp = &mp;
1093         // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
1094         // is the top of the stack, not the bottom.
1095         g->gcstacksize = 0;
1096         g->gcnextsp = &mp;
1097 #endif
1098         getcontext(ucontext_arg(&g->context[0]));
1099
1100         if(g->entry != nil) {
1101                 // Got here from mcall.
1102                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1103                 G* gp = (G*)g->param;
1104                 pfn(gp);
1105                 *(int*)0x21 = 0x21;
1106         }
1107         runtime_minit();
1108
1109 #ifdef USING_SPLIT_STACK
1110         {
1111                 int dont_block_signals = 0;
1112                 __splitstack_block_signals(&dont_block_signals, nil);
1113         }
1114 #endif
1115
1116         // Install signal handlers; after minit so that minit can
1117         // prepare the thread to be able to handle the signals.
1118         if(m == &runtime_m0) {
1119                 if(runtime_iscgo && !runtime_cgoHasExtraM) {
1120                         runtime_cgoHasExtraM = true;
1121                         runtime_newextram();
1122                         runtime_needextram = 0;
1123                 }
1124                 runtime_initsig(false);
1125         }
1126
1127         if(m->mstartfn)
1128                 ((void (*)(void))m->mstartfn)();
1129
1130         if(m->helpgc) {
1131                 m->helpgc = 0;
1132                 stopm();
1133         } else if(m != &runtime_m0) {
1134                 acquirep((P*)m->nextp);
1135                 m->nextp = 0;
1136         }
1137         schedule();
1138
1139         // TODO(brainman): This point is never reached, because scheduler
1140         // does not release os threads at the moment. But once this path
1141         // is enabled, we must remove our seh here.
1142
1143         return nil;
1144 }
1145
1146 typedef struct CgoThreadStart CgoThreadStart;
1147 struct CgoThreadStart
1148 {
1149         M *m;
1150         G *g;
1151         uintptr *tls;
1152         void (*fn)(void);
1153 };
1154
1155 // Allocate a new m unassociated with any thread.
1156 // Can use p for allocation context if needed.
1157 M*
1158 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
1159 {
1160         M *mp;
1161
1162         g->m->locks++;  // disable GC because it can be called from sysmon
1163         if(g->m->p == 0)
1164                 acquirep(p);  // temporarily borrow p for mallocs in this function
1165 #if 0
1166         if(mtype == nil) {
1167                 Eface e;
1168                 runtime_gc_m_ptr(&e);
1169                 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1170         }
1171 #endif
1172
1173         mp = runtime_mal(sizeof *mp);
1174         mcommoninit(mp);
1175         mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1176         mp->g0->m = mp;
1177
1178         if(p == (P*)g->m->p)
1179                 releasep();
1180         g->m->locks--;
1181
1182         return mp;
1183 }
1184
1185 static G*
1186 allocg(void)
1187 {
1188         G *gp;
1189         // static Type *gtype;
1190
1191         // if(gtype == nil) {
1192         //      Eface e;
1193         //      runtime_gc_g_ptr(&e);
1194         //      gtype = ((PtrType*)e.__type_descriptor)->__element_type;
1195         // }
1196         // gp = runtime_cnew(gtype);
1197         gp = runtime_malloc(sizeof(G));
1198         return gp;
1199 }
1200
1201 static M* lockextra(bool nilokay);
1202 static void unlockextra(M*);
1203
1204 // needm is called when a cgo callback happens on a
1205 // thread without an m (a thread not created by Go).
1206 // In this case, needm is expected to find an m to use
1207 // and return with m, g initialized correctly.
1208 // Since m and g are not set now (likely nil, but see below)
1209 // needm is limited in what routines it can call. In particular
1210 // it can only call nosplit functions (textflag 7) and cannot
1211 // do any scheduling that requires an m.
1212 //
1213 // In order to avoid needing heavy lifting here, we adopt
1214 // the following strategy: there is a stack of available m's
1215 // that can be stolen. Using compare-and-swap
1216 // to pop from the stack has ABA races, so we simulate
1217 // a lock by doing an exchange (via casp) to steal the stack
1218 // head and replace the top pointer with MLOCKED (1).
1219 // This serves as a simple spin lock that we can use even
1220 // without an m. The thread that locks the stack in this way
1221 // unlocks the stack by storing a valid stack head pointer.
1222 //
1223 // In order to make sure that there is always an m structure
1224 // available to be stolen, we maintain the invariant that there
1225 // is always one more than needed. At the beginning of the
1226 // program (if cgo is in use) the list is seeded with a single m.
1227 // If needm finds that it has taken the last m off the list, its job
1228 // is - once it has installed its own m so that it can do things like
1229 // allocate memory - to create a spare m and put it on the list.
1230 //
1231 // Each of these extra m's also has a g0 and a curg that are
1232 // pressed into service as the scheduling stack and current
1233 // goroutine for the duration of the cgo callback.
1234 //
1235 // When the callback is done with the m, it calls dropm to
1236 // put the m back on the list.
1237 //
1238 // Unlike the gc toolchain, we start running on curg, since we are
1239 // just going to return and let the caller continue.
1240 void
1241 runtime_needm(void)
1242 {
1243         M *mp;
1244
1245         if(runtime_needextram) {
1246                 // Can happen if C/C++ code calls Go from a global ctor.
1247                 // Can not throw, because scheduler is not initialized yet.
1248                 int rv __attribute__((unused));
1249                 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
1250                         sizeof("fatal error: cgo callback before cgo call\n")-1);
1251                 runtime_exit(1);
1252         }
1253
1254         // Lock extra list, take head, unlock popped list.
1255         // nilokay=false is safe here because of the invariant above,
1256         // that the extra list always contains or will soon contain
1257         // at least one m.
1258         mp = lockextra(false);
1259
1260         // Set needextram when we've just emptied the list,
1261         // so that the eventual call into cgocallbackg will
1262         // allocate a new m for the extra list. We delay the
1263         // allocation until then so that it can be done
1264         // after exitsyscall makes sure it is okay to be
1265         // running at all (that is, there's no garbage collection
1266         // running right now).
1267         mp->needextram = mp->schedlink == 0;
1268         unlockextra((M*)mp->schedlink);
1269
1270         // Install g (= m->curg).
1271         runtime_setg(mp->curg);
1272
1273         // Initialize g's context as in mstart.
1274         initcontext();
1275         g->atomicstatus = _Gsyscall;
1276         g->entry = nil;
1277         g->param = nil;
1278 #ifdef USING_SPLIT_STACK
1279         __splitstack_getcontext(&g->stackcontext[0]);
1280 #else
1281         g->gcinitialsp = &mp;
1282         g->gcstack = nil;
1283         g->gcstacksize = 0;
1284         g->gcnextsp = &mp;
1285 #endif
1286         getcontext(ucontext_arg(&g->context[0]));
1287
1288         if(g->entry != nil) {
1289                 // Got here from mcall.
1290                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1291                 G* gp = (G*)g->param;
1292                 pfn(gp);
1293                 *(int*)0x22 = 0x22;
1294         }
1295
1296         // Initialize this thread to use the m.
1297         runtime_minit();
1298
1299 #ifdef USING_SPLIT_STACK
1300         {
1301                 int dont_block_signals = 0;
1302                 __splitstack_block_signals(&dont_block_signals, nil);
1303         }
1304 #endif
1305 }
1306
1307 // newextram allocates an m and puts it on the extra list.
1308 // It is called with a working local m, so that it can do things
1309 // like call schedlock and allocate.
1310 void
1311 runtime_newextram(void)
1312 {
1313         M *mp, *mnext;
1314         G *gp;
1315         byte *g0_sp, *sp;
1316         uintptr g0_spsize, spsize;
1317         ucontext_t *uc;
1318
1319         // Create extra goroutine locked to extra m.
1320         // The goroutine is the context in which the cgo callback will run.
1321         // The sched.pc will never be returned to, but setting it to
1322         // runtime.goexit makes clear to the traceback routines where
1323         // the goroutine stack ends.
1324         mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1325         gp = runtime_malg(StackMin, &sp, &spsize);
1326         gp->atomicstatus = _Gdead;
1327         gp->m = mp;
1328         mp->curg = gp;
1329         mp->locked = _LockInternal;
1330         mp->lockedg = gp;
1331         gp->lockedm = mp;
1332         gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1333         // put on allg for garbage collector
1334         allgadd(gp);
1335
1336         // The context for gp will be set up in runtime_needm.  But
1337         // here we need to set up the context for g0.
1338         uc = ucontext_arg(&mp->g0->context[0]);
1339         getcontext(uc);
1340         uc->uc_stack.ss_sp = g0_sp;
1341         uc->uc_stack.ss_size = (size_t)g0_spsize;
1342         makecontext(uc, kickoff, 0);
1343
1344         // Add m to the extra list.
1345         mnext = lockextra(true);
1346         mp->schedlink = (uintptr)mnext;
1347         unlockextra(mp);
1348 }
1349
1350 // dropm is called when a cgo callback has called needm but is now
1351 // done with the callback and returning back into the non-Go thread.
1352 // It puts the current m back onto the extra list.
1353 //
1354 // The main expense here is the call to signalstack to release the
1355 // m's signal stack, and then the call to needm on the next callback
1356 // from this thread. It is tempting to try to save the m for next time,
1357 // which would eliminate both these costs, but there might not be
1358 // a next time: the current thread (which Go does not control) might exit.
1359 // If we saved the m for that thread, there would be an m leak each time
1360 // such a thread exited. Instead, we acquire and release an m on each
1361 // call. These should typically not be scheduling operations, just a few
1362 // atomics, so the cost should be small.
1363 //
1364 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1365 // variable using pthread_key_create. Unlike the pthread keys we already use
1366 // on OS X, this dummy key would never be read by Go code. It would exist
1367 // only so that we could register at thread-exit-time destructor.
1368 // That destructor would put the m back onto the extra list.
1369 // This is purely a performance optimization. The current version,
1370 // in which dropm happens on each cgo call, is still correct too.
1371 // We may have to keep the current version on systems with cgo
1372 // but without pthreads, like Windows.
1373 void
1374 runtime_dropm(void)
1375 {
1376         M *mp, *mnext;
1377
1378         // Undo whatever initialization minit did during needm.
1379         runtime_unminit();
1380
1381         // Clear m and g, and return m to the extra list.
1382         // After the call to setg we can only call nosplit functions.
1383         mp = g->m;
1384         runtime_setg(nil);
1385
1386         mp->curg->atomicstatus = _Gdead;
1387         mp->curg->gcstack = nil;
1388         mp->curg->gcnextsp = nil;
1389
1390         mnext = lockextra(true);
1391         mp->schedlink = (uintptr)mnext;
1392         unlockextra(mp);
1393 }
1394
1395 #define MLOCKED ((M*)1)
1396
1397 // lockextra locks the extra list and returns the list head.
1398 // The caller must unlock the list by storing a new list head
1399 // to runtime.extram. If nilokay is true, then lockextra will
1400 // return a nil list head if that's what it finds. If nilokay is false,
1401 // lockextra will keep waiting until the list head is no longer nil.
1402 static M*
1403 lockextra(bool nilokay)
1404 {
1405         M *mp;
1406         void (*yield)(void);
1407
1408         for(;;) {
1409                 mp = runtime_atomicloadp(&runtime_extram);
1410                 if(mp == MLOCKED) {
1411                         yield = runtime_osyield;
1412                         yield();
1413                         continue;
1414                 }
1415                 if(mp == nil && !nilokay) {
1416                         runtime_usleep(1);
1417                         continue;
1418                 }
1419                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1420                         yield = runtime_osyield;
1421                         yield();
1422                         continue;
1423                 }
1424                 break;
1425         }
1426         return mp;
1427 }
1428
1429 static void
1430 unlockextra(M *mp)
1431 {
1432         runtime_atomicstorep(&runtime_extram, mp);
1433 }
1434
1435 static int32
1436 countextra()
1437 {
1438         M *mp, *mc;
1439         int32 c;
1440
1441         for(;;) {
1442                 mp = runtime_atomicloadp(&runtime_extram);
1443                 if(mp == MLOCKED) {
1444                         runtime_osyield();
1445                         continue;
1446                 }
1447                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1448                         runtime_osyield();
1449                         continue;
1450                 }
1451                 c = 0;
1452                 for(mc = mp; mc != nil; mc = (M*)mc->schedlink)
1453                         c++;
1454                 runtime_atomicstorep(&runtime_extram, mp);
1455                 return c;
1456         }
1457 }
1458
1459 // Create a new m.  It will start off with a call to fn, or else the scheduler.
1460 static void
1461 newm(void(*fn)(void), P *p)
1462 {
1463         M *mp;
1464
1465         mp = runtime_allocm(p, -1, nil, nil);
1466         mp->nextp = (uintptr)p;
1467         mp->mstartfn = (uintptr)(void*)fn;
1468
1469         runtime_newosproc(mp);
1470 }
1471
1472 // Stops execution of the current m until new work is available.
1473 // Returns with acquired P.
1474 static void
1475 stopm(void)
1476 {
1477         M* m;
1478
1479         m = g->m;
1480         if(m->locks)
1481                 runtime_throw("stopm holding locks");
1482         if(m->p)
1483                 runtime_throw("stopm holding p");
1484         if(m->spinning) {
1485                 m->spinning = false;
1486                 runtime_xadd(&runtime_sched.nmspinning, -1);
1487         }
1488
1489 retry:
1490         runtime_lock(&runtime_sched);
1491         mput(m);
1492         runtime_unlock(&runtime_sched);
1493         runtime_notesleep(&m->park);
1494         m = g->m;
1495         runtime_noteclear(&m->park);
1496         if(m->helpgc) {
1497                 runtime_gchelper();
1498                 m->helpgc = 0;
1499                 m->mcache = nil;
1500                 goto retry;
1501         }
1502         acquirep((P*)m->nextp);
1503         m->nextp = 0;
1504 }
1505
1506 static void
1507 mspinning(void)
1508 {
1509         g->m->spinning = true;
1510 }
1511
1512 // Schedules some M to run the p (creates an M if necessary).
1513 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1514 static void
1515 startm(P *p, bool spinning)
1516 {
1517         M *mp;
1518         void (*fn)(void);
1519
1520         runtime_lock(&runtime_sched);
1521         if(p == nil) {
1522                 p = pidleget();
1523                 if(p == nil) {
1524                         runtime_unlock(&runtime_sched);
1525                         if(spinning)
1526                                 runtime_xadd(&runtime_sched.nmspinning, -1);
1527                         return;
1528                 }
1529         }
1530         mp = mget();
1531         runtime_unlock(&runtime_sched);
1532         if(mp == nil) {
1533                 fn = nil;
1534                 if(spinning)
1535                         fn = mspinning;
1536                 newm(fn, p);
1537                 return;
1538         }
1539         if(mp->spinning)
1540                 runtime_throw("startm: m is spinning");
1541         if(mp->nextp)
1542                 runtime_throw("startm: m has p");
1543         mp->spinning = spinning;
1544         mp->nextp = (uintptr)p;
1545         runtime_notewakeup(&mp->park);
1546 }
1547
1548 // Hands off P from syscall or locked M.
1549 static void
1550 handoffp(P *p)
1551 {
1552         // if it has local work, start it straight away
1553         if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
1554                 startm(p, false);
1555                 return;
1556         }
1557         // no local work, check that there are no spinning/idle M's,
1558         // otherwise our help is not required
1559         if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 &&  // TODO: fast atomic
1560                 runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
1561                 startm(p, true);
1562                 return;
1563         }
1564         runtime_lock(&runtime_sched);
1565         if(runtime_sched.gcwaiting) {
1566                 p->status = _Pgcstop;
1567                 if(--runtime_sched.stopwait == 0)
1568                         runtime_notewakeup(&runtime_sched.stopnote);
1569                 runtime_unlock(&runtime_sched);
1570                 return;
1571         }
1572         if(runtime_sched.runqsize) {
1573                 runtime_unlock(&runtime_sched);
1574                 startm(p, false);
1575                 return;
1576         }
1577         // If this is the last running P and nobody is polling network,
1578         // need to wakeup another M to poll network.
1579         if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
1580                 runtime_unlock(&runtime_sched);
1581                 startm(p, false);
1582                 return;
1583         }
1584         pidleput(p);
1585         runtime_unlock(&runtime_sched);
1586 }
1587
1588 // Tries to add one more P to execute G's.
1589 // Called when a G is made runnable (newproc, ready).
1590 static void
1591 wakep(void)
1592 {
1593         // be conservative about spinning threads
1594         if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
1595                 return;
1596         startm(nil, true);
1597 }
1598
1599 // Stops execution of the current m that is locked to a g until the g is runnable again.
1600 // Returns with acquired P.
1601 static void
1602 stoplockedm(void)
1603 {
1604         M *m;
1605         P *p;
1606
1607         m = g->m;
1608         if(m->lockedg == nil || m->lockedg->lockedm != m)
1609                 runtime_throw("stoplockedm: inconsistent locking");
1610         if(m->p) {
1611                 // Schedule another M to run this p.
1612                 p = releasep();
1613                 handoffp(p);
1614         }
1615         incidlelocked(1);
1616         // Wait until another thread schedules lockedg again.
1617         runtime_notesleep(&m->park);
1618         m = g->m;
1619         runtime_noteclear(&m->park);
1620         if(m->lockedg->atomicstatus != _Grunnable)
1621                 runtime_throw("stoplockedm: not runnable");
1622         acquirep((P*)m->nextp);
1623         m->nextp = 0;
1624 }
1625
1626 // Schedules the locked m to run the locked gp.
1627 static void
1628 startlockedm(G *gp)
1629 {
1630         M *mp;
1631         P *p;
1632
1633         mp = gp->lockedm;
1634         if(mp == g->m)
1635                 runtime_throw("startlockedm: locked to me");
1636         if(mp->nextp)
1637                 runtime_throw("startlockedm: m has p");
1638         // directly handoff current P to the locked m
1639         incidlelocked(-1);
1640         p = releasep();
1641         mp->nextp = (uintptr)p;
1642         runtime_notewakeup(&mp->park);
1643         stopm();
1644 }
1645
1646 // Stops the current m for stoptheworld.
1647 // Returns when the world is restarted.
1648 static void
1649 gcstopm(void)
1650 {
1651         P *p;
1652
1653         if(!runtime_sched.gcwaiting)
1654                 runtime_throw("gcstopm: not waiting for gc");
1655         if(g->m->spinning) {
1656                 g->m->spinning = false;
1657                 runtime_xadd(&runtime_sched.nmspinning, -1);
1658         }
1659         p = releasep();
1660         runtime_lock(&runtime_sched);
1661         p->status = _Pgcstop;
1662         if(--runtime_sched.stopwait == 0)
1663                 runtime_notewakeup(&runtime_sched.stopnote);
1664         runtime_unlock(&runtime_sched);
1665         stopm();
1666 }
1667
1668 // Schedules gp to run on the current M.
1669 // Never returns.
1670 static void
1671 execute(G *gp)
1672 {
1673         int32 hz;
1674
1675         if(gp->atomicstatus != _Grunnable) {
1676                 runtime_printf("execute: bad g status %d\n", gp->atomicstatus);
1677                 runtime_throw("execute: bad g status");
1678         }
1679         gp->atomicstatus = _Grunning;
1680         gp->waitsince = 0;
1681         ((P*)g->m->p)->schedtick++;
1682         g->m->curg = gp;
1683         gp->m = g->m;
1684
1685         // Check whether the profiler needs to be turned on or off.
1686         hz = runtime_sched.profilehz;
1687         if(g->m->profilehz != hz)
1688                 runtime_resetcpuprofiler(hz);
1689
1690         runtime_gogo(gp);
1691 }
1692
1693 // Finds a runnable goroutine to execute.
1694 // Tries to steal from other P's, get g from global queue, poll network.
1695 static G*
1696 findrunnable(void)
1697 {
1698         G *gp;
1699         P *p;
1700         int32 i;
1701
1702 top:
1703         if(runtime_sched.gcwaiting) {
1704                 gcstopm();
1705                 goto top;
1706         }
1707         if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1708                 runtime_ready(gp);
1709         // local runq
1710         gp = runqget((P*)g->m->p);
1711         if(gp)
1712                 return gp;
1713         // global runq
1714         if(runtime_sched.runqsize) {
1715                 runtime_lock(&runtime_sched);
1716                 gp = globrunqget((P*)g->m->p, 0);
1717                 runtime_unlock(&runtime_sched);
1718                 if(gp)
1719                         return gp;
1720         }
1721         // poll network
1722         gp = runtime_netpoll(false);  // non-blocking
1723         if(gp) {
1724                 injectglist((G*)gp->schedlink);
1725                 gp->atomicstatus = _Grunnable;
1726                 return gp;
1727         }
1728         // If number of spinning M's >= number of busy P's, block.
1729         // This is necessary to prevent excessive CPU consumption
1730         // when GOMAXPROCS>>1 but the program parallelism is low.
1731         if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle))  // TODO: fast atomic
1732                 goto stop;
1733         if(!g->m->spinning) {
1734                 g->m->spinning = true;
1735                 runtime_xadd(&runtime_sched.nmspinning, 1);
1736         }
1737         // random steal from other P's
1738         for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1739                 if(runtime_sched.gcwaiting)
1740                         goto top;
1741                 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1742                 if(p == (P*)g->m->p)
1743                         gp = runqget(p);
1744                 else
1745                         gp = runqsteal((P*)g->m->p, p);
1746                 if(gp)
1747                         return gp;
1748         }
1749 stop:
1750         // return P and block
1751         runtime_lock(&runtime_sched);
1752         if(runtime_sched.gcwaiting) {
1753                 runtime_unlock(&runtime_sched);
1754                 goto top;
1755         }
1756         if(runtime_sched.runqsize) {
1757                 gp = globrunqget((P*)g->m->p, 0);
1758                 runtime_unlock(&runtime_sched);
1759                 return gp;
1760         }
1761         p = releasep();
1762         pidleput(p);
1763         runtime_unlock(&runtime_sched);
1764         if(g->m->spinning) {
1765                 g->m->spinning = false;
1766                 runtime_xadd(&runtime_sched.nmspinning, -1);
1767         }
1768         // check all runqueues once again
1769         for(i = 0; i < runtime_gomaxprocs; i++) {
1770                 p = runtime_allp[i];
1771                 if(p && p->runqhead != p->runqtail) {
1772                         runtime_lock(&runtime_sched);
1773                         p = pidleget();
1774                         runtime_unlock(&runtime_sched);
1775                         if(p) {
1776                                 acquirep(p);
1777                                 goto top;
1778                         }
1779                         break;
1780                 }
1781         }
1782         // poll network
1783         if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
1784                 if(g->m->p)
1785                         runtime_throw("findrunnable: netpoll with p");
1786                 if(g->m->spinning)
1787                         runtime_throw("findrunnable: netpoll with spinning");
1788                 gp = runtime_netpoll(true);  // block until new work is available
1789                 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
1790                 if(gp) {
1791                         runtime_lock(&runtime_sched);
1792                         p = pidleget();
1793                         runtime_unlock(&runtime_sched);
1794                         if(p) {
1795                                 acquirep(p);
1796                                 injectglist((G*)gp->schedlink);
1797                                 gp->atomicstatus = _Grunnable;
1798                                 return gp;
1799                         }
1800                         injectglist(gp);
1801                 }
1802         }
1803         stopm();
1804         goto top;
1805 }
1806
1807 static void
1808 resetspinning(void)
1809 {
1810         int32 nmspinning;
1811
1812         if(g->m->spinning) {
1813                 g->m->spinning = false;
1814                 nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
1815                 if(nmspinning < 0)
1816                         runtime_throw("findrunnable: negative nmspinning");
1817         } else
1818                 nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
1819
1820         // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1821         // so see if we need to wakeup another P here.
1822         if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
1823                 wakep();
1824 }
1825
1826 // Injects the list of runnable G's into the scheduler.
1827 // Can run concurrently with GC.
1828 static void
1829 injectglist(G *glist)
1830 {
1831         int32 n;
1832         G *gp;
1833
1834         if(glist == nil)
1835                 return;
1836         runtime_lock(&runtime_sched);
1837         for(n = 0; glist; n++) {
1838                 gp = glist;
1839                 glist = (G*)gp->schedlink;
1840                 gp->atomicstatus = _Grunnable;
1841                 globrunqput(gp);
1842         }
1843         runtime_unlock(&runtime_sched);
1844
1845         for(; n && runtime_sched.npidle; n--)
1846                 startm(nil, false);
1847 }
1848
1849 // One round of scheduler: find a runnable goroutine and execute it.
1850 // Never returns.
1851 static void
1852 schedule(void)
1853 {
1854         G *gp;
1855         uint32 tick;
1856
1857         if(g->m->locks)
1858                 runtime_throw("schedule: holding locks");
1859
1860 top:
1861         if(runtime_sched.gcwaiting) {
1862                 gcstopm();
1863                 goto top;
1864         }
1865
1866         gp = nil;
1867         // Check the global runnable queue once in a while to ensure fairness.
1868         // Otherwise two goroutines can completely occupy the local runqueue
1869         // by constantly respawning each other.
1870         tick = ((P*)g->m->p)->schedtick;
1871         // This is a fancy way to say tick%61==0,
1872         // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1873         if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
1874                 runtime_lock(&runtime_sched);
1875                 gp = globrunqget((P*)g->m->p, 1);
1876                 runtime_unlock(&runtime_sched);
1877                 if(gp)
1878                         resetspinning();
1879         }
1880         if(gp == nil) {
1881                 gp = runqget((P*)g->m->p);
1882                 if(gp && g->m->spinning)
1883                         runtime_throw("schedule: spinning with local work");
1884         }
1885         if(gp == nil) {
1886                 gp = findrunnable();  // blocks until work is available
1887                 resetspinning();
1888         }
1889
1890         if(gp->lockedm) {
1891                 // Hands off own p to the locked m,
1892                 // then blocks waiting for a new p.
1893                 startlockedm(gp);
1894                 goto top;
1895         }
1896
1897         execute(gp);
1898 }
1899
1900 // Puts the current goroutine into a waiting state and calls unlockf.
1901 // If unlockf returns false, the goroutine is resumed.
1902 void
1903 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1904 {
1905         if(g->atomicstatus != _Grunning)
1906                 runtime_throw("bad g status");
1907         g->m->waitlock = lock;
1908         g->m->waitunlockf = unlockf;
1909         g->waitreason = runtime_gostringnocopy((const byte*)reason);
1910         runtime_mcall(park0);
1911 }
1912
1913 void gopark(FuncVal *, void *, String, byte, int)
1914   __asm__ ("runtime.gopark");
1915
1916 void
1917 gopark(FuncVal *unlockf, void *lock, String reason,
1918        byte traceEv __attribute__ ((unused)),
1919        int traceskip __attribute__ ((unused)))
1920 {
1921         if(g->atomicstatus != _Grunning)
1922                 runtime_throw("bad g status");
1923         g->m->waitlock = lock;
1924         g->m->waitunlockf = unlockf == nil ? nil : (void*)unlockf->fn;
1925         g->waitreason = reason;
1926         runtime_mcall(park0);
1927 }
1928
1929 static bool
1930 parkunlock(G *gp, void *lock)
1931 {
1932         USED(gp);
1933         runtime_unlock(lock);
1934         return true;
1935 }
1936
1937 // Puts the current goroutine into a waiting state and unlocks the lock.
1938 // The goroutine can be made runnable again by calling runtime_ready(gp).
1939 void
1940 runtime_parkunlock(Lock *lock, const char *reason)
1941 {
1942         runtime_park(parkunlock, lock, reason);
1943 }
1944
1945 void goparkunlock(Lock *, String, byte, int)
1946   __asm__ (GOSYM_PREFIX "runtime.goparkunlock");
1947
1948 void
1949 goparkunlock(Lock *lock, String reason, byte traceEv __attribute__ ((unused)),
1950              int traceskip __attribute__ ((unused)))
1951 {
1952         if(g->atomicstatus != _Grunning)
1953                 runtime_throw("bad g status");
1954         g->m->waitlock = lock;
1955         g->m->waitunlockf = parkunlock;
1956         g->waitreason = reason;
1957         runtime_mcall(park0);
1958 }
1959
1960 // runtime_park continuation on g0.
1961 static void
1962 park0(G *gp)
1963 {
1964         M *m;
1965         bool ok;
1966
1967         m = g->m;
1968         gp->atomicstatus = _Gwaiting;
1969         gp->m = nil;
1970         m->curg = nil;
1971         if(m->waitunlockf) {
1972                 ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock);
1973                 m->waitunlockf = nil;
1974                 m->waitlock = nil;
1975                 if(!ok) {
1976                         gp->atomicstatus = _Grunnable;
1977                         execute(gp);  // Schedule it back, never returns.
1978                 }
1979         }
1980         if(m->lockedg) {
1981                 stoplockedm();
1982                 execute(gp);  // Never returns.
1983         }
1984         schedule();
1985 }
1986
1987 // Scheduler yield.
1988 void
1989 runtime_gosched(void)
1990 {
1991         if(g->atomicstatus != _Grunning)
1992                 runtime_throw("bad g status");
1993         runtime_mcall(runtime_gosched0);
1994 }
1995
1996 // runtime_gosched continuation on g0.
1997 void
1998 runtime_gosched0(G *gp)
1999 {
2000         M *m;
2001
2002         m = g->m;
2003         gp->atomicstatus = _Grunnable;
2004         gp->m = nil;
2005         m->curg = nil;
2006         runtime_lock(&runtime_sched);
2007         globrunqput(gp);
2008         runtime_unlock(&runtime_sched);
2009         if(m->lockedg) {
2010                 stoplockedm();
2011                 execute(gp);  // Never returns.
2012         }
2013         schedule();
2014 }
2015
2016 // Finishes execution of the current goroutine.
2017 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
2018 // Since it does not return it does not matter.  But if it is preempted
2019 // at the split stack check, GC will complain about inconsistent sp.
2020 void runtime_goexit(void) __attribute__ ((noinline));
2021 void
2022 runtime_goexit(void)
2023 {
2024         if(g->atomicstatus != _Grunning)
2025                 runtime_throw("bad g status");
2026         runtime_mcall(goexit0);
2027 }
2028
2029 // runtime_goexit continuation on g0.
2030 static void
2031 goexit0(G *gp)
2032 {
2033         M *m;
2034
2035         m = g->m;
2036         gp->atomicstatus = _Gdead;
2037         gp->entry = nil;
2038         gp->m = nil;
2039         gp->lockedm = nil;
2040         gp->paniconfault = 0;
2041         gp->_defer = nil; // should be true already but just in case.
2042         gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
2043         gp->writebuf.__values = nil;
2044         gp->writebuf.__count = 0;
2045         gp->writebuf.__capacity = 0;
2046         gp->waitreason = runtime_gostringnocopy(nil);
2047         gp->param = nil;
2048         m->curg = nil;
2049         m->lockedg = nil;
2050         if(m->locked & ~_LockExternal) {
2051                 runtime_printf("invalid m->locked = %d\n", m->locked);
2052                 runtime_throw("internal lockOSThread error");
2053         }
2054         m->locked = 0;
2055         gfput((P*)m->p, gp);
2056         schedule();
2057 }
2058
2059 // The goroutine g is about to enter a system call.
2060 // Record that it's not using the cpu anymore.
2061 // This is called only from the go syscall library and cgocall,
2062 // not from the low-level system calls used by the runtime.
2063 //
2064 // Entersyscall cannot split the stack: the runtime_gosave must
2065 // make g->sched refer to the caller's stack segment, because
2066 // entersyscall is going to return immediately after.
2067
2068 void runtime_entersyscall(int32) __attribute__ ((no_split_stack));
2069 static void doentersyscall(void) __attribute__ ((no_split_stack, noinline));
2070
2071 void
2072 runtime_entersyscall(int32 dummy __attribute__ ((unused)))
2073 {
2074         // Save the registers in the g structure so that any pointers
2075         // held in registers will be seen by the garbage collector.
2076         getcontext(ucontext_arg(&g->gcregs[0]));
2077
2078         // Do the work in a separate function, so that this function
2079         // doesn't save any registers on its own stack.  If this
2080         // function does save any registers, we might store the wrong
2081         // value in the call to getcontext.
2082         //
2083         // FIXME: This assumes that we do not need to save any
2084         // callee-saved registers to access the TLS variable g.  We
2085         // don't want to put the ucontext_t on the stack because it is
2086         // large and we can not split the stack here.
2087         doentersyscall();
2088 }
2089
2090 static void
2091 doentersyscall()
2092 {
2093         // Disable preemption because during this function g is in _Gsyscall status,
2094         // but can have inconsistent g->sched, do not let GC observe it.
2095         g->m->locks++;
2096
2097         // Leave SP around for GC and traceback.
2098 #ifdef USING_SPLIT_STACK
2099         {
2100           size_t gcstacksize;
2101           g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2102                                          &g->gcnextsegment, &g->gcnextsp,
2103                                          &g->gcinitialsp);
2104           g->gcstacksize = (uintptr)gcstacksize;
2105         }
2106 #else
2107         {
2108                 void *v;
2109
2110                 g->gcnextsp = (byte *) &v;
2111         }
2112 #endif
2113
2114         g->atomicstatus = _Gsyscall;
2115
2116         if(runtime_atomicload(&runtime_sched.sysmonwait)) {  // TODO: fast atomic
2117                 runtime_lock(&runtime_sched);
2118                 if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2119                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2120                         runtime_notewakeup(&runtime_sched.sysmonnote);
2121                 }
2122                 runtime_unlock(&runtime_sched);
2123         }
2124
2125         g->m->mcache = nil;
2126         ((P*)(g->m->p))->m = 0;
2127         runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
2128         if(runtime_atomicload(&runtime_sched.gcwaiting)) {
2129                 runtime_lock(&runtime_sched);
2130                 if (runtime_sched.stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
2131                         if(--runtime_sched.stopwait == 0)
2132                                 runtime_notewakeup(&runtime_sched.stopnote);
2133                 }
2134                 runtime_unlock(&runtime_sched);
2135         }
2136
2137         g->m->locks--;
2138 }
2139
2140 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
2141 void
2142 runtime_entersyscallblock(int32 dummy __attribute__ ((unused)))
2143 {
2144         P *p;
2145
2146         g->m->locks++;  // see comment in entersyscall
2147
2148         // Leave SP around for GC and traceback.
2149 #ifdef USING_SPLIT_STACK
2150         {
2151           size_t gcstacksize;
2152           g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2153                                          &g->gcnextsegment, &g->gcnextsp,
2154                                          &g->gcinitialsp);
2155           g->gcstacksize = (uintptr)gcstacksize;
2156         }
2157 #else
2158         g->gcnextsp = (byte *) &p;
2159 #endif
2160
2161         // Save the registers in the g structure so that any pointers
2162         // held in registers will be seen by the garbage collector.
2163         getcontext(ucontext_arg(&g->gcregs[0]));
2164
2165         g->atomicstatus = _Gsyscall;
2166
2167         p = releasep();
2168         handoffp(p);
2169         if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
2170                 incidlelocked(1);
2171
2172         g->m->locks--;
2173 }
2174
2175 // The goroutine g exited its system call.
2176 // Arrange for it to run on a cpu again.
2177 // This is called only from the go syscall library, not
2178 // from the low-level system calls used by the runtime.
2179 void
2180 runtime_exitsyscall(int32 dummy __attribute__ ((unused)))
2181 {
2182         G *gp;
2183
2184         gp = g;
2185         gp->m->locks++;  // see comment in entersyscall
2186
2187         if(gp->isbackground)  // do not consider blocked scavenger for deadlock detection
2188                 incidlelocked(-1);
2189
2190         gp->waitsince = 0;
2191         if(exitsyscallfast()) {
2192                 // There's a cpu for us, so we can run.
2193                 ((P*)gp->m->p)->syscalltick++;
2194                 gp->atomicstatus = _Grunning;
2195                 // Garbage collector isn't running (since we are),
2196                 // so okay to clear gcstack and gcsp.
2197 #ifdef USING_SPLIT_STACK
2198                 gp->gcstack = nil;
2199 #endif
2200                 gp->gcnextsp = nil;
2201                 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2202                 gp->m->locks--;
2203                 return;
2204         }
2205
2206         gp->m->locks--;
2207
2208         // Call the scheduler.
2209         runtime_mcall(exitsyscall0);
2210
2211         // Scheduler returned, so we're allowed to run now.
2212         // Delete the gcstack information that we left for
2213         // the garbage collector during the system call.
2214         // Must wait until now because until gosched returns
2215         // we don't know for sure that the garbage collector
2216         // is not running.
2217 #ifdef USING_SPLIT_STACK
2218         gp->gcstack = nil;
2219 #endif
2220         gp->gcnextsp = nil;
2221         runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2222
2223         // Note that this gp->m might be different than the earlier
2224         // gp->m after returning from runtime_mcall.
2225         ((P*)gp->m->p)->syscalltick++;
2226 }
2227
2228 static bool
2229 exitsyscallfast(void)
2230 {
2231         G *gp;
2232         P *p;
2233
2234         gp = g;
2235
2236         // Freezetheworld sets stopwait but does not retake P's.
2237         if(runtime_sched.stopwait) {
2238                 gp->m->p = 0;
2239                 return false;
2240         }
2241
2242         // Try to re-acquire the last P.
2243         if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) {
2244                 // There's a cpu for us, so we can run.
2245                 gp->m->mcache = ((P*)gp->m->p)->mcache;
2246                 ((P*)gp->m->p)->m = (uintptr)gp->m;
2247                 return true;
2248         }
2249         // Try to get any other idle P.
2250         gp->m->p = 0;
2251         if(runtime_sched.pidle) {
2252                 runtime_lock(&runtime_sched);
2253                 p = pidleget();
2254                 if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
2255                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2256                         runtime_notewakeup(&runtime_sched.sysmonnote);
2257                 }
2258                 runtime_unlock(&runtime_sched);
2259                 if(p) {
2260                         acquirep(p);
2261                         return true;
2262                 }
2263         }
2264         return false;
2265 }
2266
2267 // runtime_exitsyscall slow path on g0.
2268 // Failed to acquire P, enqueue gp as runnable.
2269 static void
2270 exitsyscall0(G *gp)
2271 {
2272         M *m;
2273         P *p;
2274
2275         m = g->m;
2276         gp->atomicstatus = _Grunnable;
2277         gp->m = nil;
2278         m->curg = nil;
2279         runtime_lock(&runtime_sched);
2280         p = pidleget();
2281         if(p == nil)
2282                 globrunqput(gp);
2283         else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2284                 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2285                 runtime_notewakeup(&runtime_sched.sysmonnote);
2286         }
2287         runtime_unlock(&runtime_sched);
2288         if(p) {
2289                 acquirep(p);
2290                 execute(gp);  // Never returns.
2291         }
2292         if(m->lockedg) {
2293                 // Wait until another thread schedules gp and so m again.
2294                 stoplockedm();
2295                 execute(gp);  // Never returns.
2296         }
2297         stopm();
2298         schedule();  // Never returns.
2299 }
2300
2301 void syscall_entersyscall(void)
2302   __asm__(GOSYM_PREFIX "syscall.Entersyscall");
2303
2304 void syscall_entersyscall(void) __attribute__ ((no_split_stack));
2305
2306 void
2307 syscall_entersyscall()
2308 {
2309   runtime_entersyscall(0);
2310 }
2311
2312 void syscall_exitsyscall(void)
2313   __asm__(GOSYM_PREFIX "syscall.Exitsyscall");
2314
2315 void syscall_exitsyscall(void) __attribute__ ((no_split_stack));
2316
2317 void
2318 syscall_exitsyscall()
2319 {
2320   runtime_exitsyscall(0);
2321 }
2322
2323 // Called from syscall package before fork.
2324 void syscall_runtime_BeforeFork(void)
2325   __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2326 void
2327 syscall_runtime_BeforeFork(void)
2328 {
2329         // Fork can hang if preempted with signals frequently enough (see issue 5517).
2330         // Ensure that we stay on the same M where we disable profiling.
2331         runtime_m()->locks++;
2332         if(runtime_m()->profilehz != 0)
2333                 runtime_resetcpuprofiler(0);
2334 }
2335
2336 // Called from syscall package after fork in parent.
2337 void syscall_runtime_AfterFork(void)
2338   __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2339 void
2340 syscall_runtime_AfterFork(void)
2341 {
2342         int32 hz;
2343
2344         hz = runtime_sched.profilehz;
2345         if(hz != 0)
2346                 runtime_resetcpuprofiler(hz);
2347         runtime_m()->locks--;
2348 }
2349
2350 // Allocate a new g, with a stack big enough for stacksize bytes.
2351 G*
2352 runtime_malg(int32 stacksize, byte** ret_stack, uintptr* ret_stacksize)
2353 {
2354         G *newg;
2355
2356         newg = allocg();
2357         if(stacksize >= 0) {
2358 #if USING_SPLIT_STACK
2359                 int dont_block_signals = 0;
2360                 size_t ss_stacksize;
2361
2362                 *ret_stack = __splitstack_makecontext(stacksize,
2363                                                       &newg->stackcontext[0],
2364                                                       &ss_stacksize);
2365                 *ret_stacksize = (uintptr)ss_stacksize;
2366                 __splitstack_block_signals_context(&newg->stackcontext[0],
2367                                                    &dont_block_signals, nil);
2368 #else
2369                 // In 64-bit mode, the maximum Go allocation space is
2370                 // 128G.  Our stack size is 4M, which only permits 32K
2371                 // goroutines.  In order to not limit ourselves,
2372                 // allocate the stacks out of separate memory.  In
2373                 // 32-bit mode, the Go allocation space is all of
2374                 // memory anyhow.
2375                 if(sizeof(void*) == 8) {
2376                         void *p = runtime_SysAlloc(stacksize, &mstats.other_sys);
2377                         if(p == nil)
2378                                 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2379                         *ret_stack = (byte*)p;
2380                 } else {
2381                         *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2382                         runtime_xadd(&runtime_stacks_sys, stacksize);
2383                 }
2384                 *ret_stacksize = (uintptr)stacksize;
2385                 newg->gcinitialsp = *ret_stack;
2386                 newg->gcstacksize = (uintptr)stacksize;
2387 #endif
2388         }
2389         return newg;
2390 }
2391
2392 G*
2393 __go_go(void (*fn)(void*), void* arg)
2394 {
2395         byte *sp;
2396         size_t spsize;
2397         G *newg;
2398         P *p;
2399
2400 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2401         if(fn == nil) {
2402                 g->m->throwing = -1;  // do not dump full stacks
2403                 runtime_throw("go of nil func value");
2404         }
2405         g->m->locks++;  // disable preemption because it can be holding p in a local var
2406
2407         p = (P*)g->m->p;
2408         if((newg = gfget(p)) != nil) {
2409 #ifdef USING_SPLIT_STACK
2410                 int dont_block_signals = 0;
2411
2412                 sp = __splitstack_resetcontext(&newg->stackcontext[0],
2413                                                &spsize);
2414                 __splitstack_block_signals_context(&newg->stackcontext[0],
2415                                                    &dont_block_signals, nil);
2416 #else
2417                 sp = newg->gcinitialsp;
2418                 spsize = newg->gcstacksize;
2419                 if(spsize == 0)
2420                         runtime_throw("bad spsize in __go_go");
2421                 newg->gcnextsp = sp;
2422 #endif
2423         } else {
2424                 uintptr malsize;
2425
2426                 newg = runtime_malg(StackMin, &sp, &malsize);
2427                 spsize = (size_t)malsize;
2428                 allgadd(newg);
2429         }
2430
2431         newg->entry = (byte*)fn;
2432         newg->param = arg;
2433         newg->gopc = (uintptr)__builtin_return_address(0);
2434         newg->atomicstatus = _Grunnable;
2435         if(p->goidcache == p->goidcacheend) {
2436                 p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch);
2437                 p->goidcacheend = p->goidcache + GoidCacheBatch;
2438         }
2439         newg->goid = p->goidcache++;
2440
2441         {
2442                 // Avoid warnings about variables clobbered by
2443                 // longjmp.
2444                 byte * volatile vsp = sp;
2445                 size_t volatile vspsize = spsize;
2446                 G * volatile vnewg = newg;
2447                 ucontext_t * volatile uc;
2448
2449                 uc = ucontext_arg(&vnewg->context[0]);
2450                 getcontext(uc);
2451                 uc->uc_stack.ss_sp = vsp;
2452                 uc->uc_stack.ss_size = vspsize;
2453                 makecontext(uc, kickoff, 0);
2454
2455                 runqput(p, vnewg);
2456
2457                 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
2458                         wakep();
2459                 g->m->locks--;
2460                 return vnewg;
2461         }
2462 }
2463
2464 static void
2465 allgadd(G *gp)
2466 {
2467         G **new;
2468         uintptr cap;
2469
2470         runtime_lock(&allglock);
2471         if(runtime_allglen >= allgcap) {
2472                 cap = 4096/sizeof(new[0]);
2473                 if(cap < 2*allgcap)
2474                         cap = 2*allgcap;
2475                 new = runtime_malloc(cap*sizeof(new[0]));
2476                 if(new == nil)
2477                         runtime_throw("runtime: cannot allocate memory");
2478                 if(runtime_allg != nil) {
2479                         runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
2480                         runtime_free(runtime_allg);
2481                 }
2482                 runtime_allg = new;
2483                 allgcap = cap;
2484         }
2485         runtime_allg[runtime_allglen++] = gp;
2486         runtime_unlock(&allglock);
2487 }
2488
2489 // Put on gfree list.
2490 // If local list is too long, transfer a batch to the global list.
2491 static void
2492 gfput(P *p, G *gp)
2493 {
2494         gp->schedlink = (uintptr)p->gfree;
2495         p->gfree = gp;
2496         p->gfreecnt++;
2497         if(p->gfreecnt >= 64) {
2498                 runtime_lock(&runtime_sched.gflock);
2499                 while(p->gfreecnt >= 32) {
2500                         p->gfreecnt--;
2501                         gp = p->gfree;
2502                         p->gfree = (G*)gp->schedlink;
2503                         gp->schedlink = (uintptr)runtime_sched.gfree;
2504                         runtime_sched.gfree = gp;
2505                 }
2506                 runtime_unlock(&runtime_sched.gflock);
2507         }
2508 }
2509
2510 // Get from gfree list.
2511 // If local list is empty, grab a batch from global list.
2512 static G*
2513 gfget(P *p)
2514 {
2515         G *gp;
2516
2517 retry:
2518         gp = p->gfree;
2519         if(gp == nil && runtime_sched.gfree) {
2520                 runtime_lock(&runtime_sched.gflock);
2521                 while(p->gfreecnt < 32 && runtime_sched.gfree) {
2522                         p->gfreecnt++;
2523                         gp = runtime_sched.gfree;
2524                         runtime_sched.gfree = (G*)gp->schedlink;
2525                         gp->schedlink = (uintptr)p->gfree;
2526                         p->gfree = gp;
2527                 }
2528                 runtime_unlock(&runtime_sched.gflock);
2529                 goto retry;
2530         }
2531         if(gp) {
2532                 p->gfree = (G*)gp->schedlink;
2533                 p->gfreecnt--;
2534         }
2535         return gp;
2536 }
2537
2538 // Purge all cached G's from gfree list to the global list.
2539 static void
2540 gfpurge(P *p)
2541 {
2542         G *gp;
2543
2544         runtime_lock(&runtime_sched.gflock);
2545         while(p->gfreecnt) {
2546                 p->gfreecnt--;
2547                 gp = p->gfree;
2548                 p->gfree = (G*)gp->schedlink;
2549                 gp->schedlink = (uintptr)runtime_sched.gfree;
2550                 runtime_sched.gfree = gp;
2551         }
2552         runtime_unlock(&runtime_sched.gflock);
2553 }
2554
2555 void
2556 runtime_Breakpoint(void)
2557 {
2558         runtime_breakpoint();
2559 }
2560
2561 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2562
2563 void
2564 runtime_Gosched(void)
2565 {
2566         runtime_gosched();
2567 }
2568
2569 // Implementation of runtime.GOMAXPROCS.
2570 // delete when scheduler is even stronger
2571 int32
2572 runtime_gomaxprocsfunc(int32 n)
2573 {
2574         int32 ret;
2575
2576         if(n > _MaxGomaxprocs)
2577                 n = _MaxGomaxprocs;
2578         runtime_lock(&runtime_sched);
2579         ret = runtime_gomaxprocs;
2580         if(n <= 0 || n == ret) {
2581                 runtime_unlock(&runtime_sched);
2582                 return ret;
2583         }
2584         runtime_unlock(&runtime_sched);
2585
2586         runtime_semacquire(&runtime_worldsema, false);
2587         g->m->gcing = 1;
2588         runtime_stoptheworld();
2589         newprocs = n;
2590         g->m->gcing = 0;
2591         runtime_semrelease(&runtime_worldsema);
2592         runtime_starttheworld();
2593
2594         return ret;
2595 }
2596
2597 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2598 // after they modify m->locked. Do not allow preemption during this call,
2599 // or else the m might be different in this function than in the caller.
2600 static void
2601 lockOSThread(void)
2602 {
2603         g->m->lockedg = g;
2604         g->lockedm = g->m;
2605 }
2606
2607 void    runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2608 void
2609 runtime_LockOSThread(void)
2610 {
2611         g->m->locked |= _LockExternal;
2612         lockOSThread();
2613 }
2614
2615 void
2616 runtime_lockOSThread(void)
2617 {
2618         g->m->locked += _LockInternal;
2619         lockOSThread();
2620 }
2621
2622
2623 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2624 // after they update m->locked. Do not allow preemption during this call,
2625 // or else the m might be in different in this function than in the caller.
2626 static void
2627 unlockOSThread(void)
2628 {
2629         if(g->m->locked != 0)
2630                 return;
2631         g->m->lockedg = nil;
2632         g->lockedm = nil;
2633 }
2634
2635 void    runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2636
2637 void
2638 runtime_UnlockOSThread(void)
2639 {
2640         g->m->locked &= ~_LockExternal;
2641         unlockOSThread();
2642 }
2643
2644 void
2645 runtime_unlockOSThread(void)
2646 {
2647         if(g->m->locked < _LockInternal)
2648                 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2649         g->m->locked -= _LockInternal;
2650         unlockOSThread();
2651 }
2652
2653 bool
2654 runtime_lockedOSThread(void)
2655 {
2656         return g->lockedm != nil && g->m->lockedg != nil;
2657 }
2658
2659 int32
2660 runtime_gcount(void)
2661 {
2662         G *gp;
2663         int32 n, s;
2664         uintptr i;
2665
2666         n = 0;
2667         runtime_lock(&allglock);
2668         // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2669         // We do not want to increment/decrement centralized counter in newproc/goexit,
2670         // just to make runtime.NumGoroutine() faster.
2671         // Compromise solution is to introduce per-P counters of active goroutines.
2672         for(i = 0; i < runtime_allglen; i++) {
2673                 gp = runtime_allg[i];
2674                 s = gp->atomicstatus;
2675                 if(s == _Grunnable || s == _Grunning || s == _Gsyscall || s == _Gwaiting)
2676                         n++;
2677         }
2678         runtime_unlock(&allglock);
2679         return n;
2680 }
2681
2682 int32
2683 runtime_mcount(void)
2684 {
2685         return runtime_sched.mcount;
2686 }
2687
2688 static struct {
2689         Lock;
2690         void (*fn)(uintptr*, int32);
2691         int32 hz;
2692         uintptr pcbuf[TracebackMaxFrames];
2693         Location locbuf[TracebackMaxFrames];
2694 } prof;
2695
2696 static void System(void) {}
2697 static void GC(void) {}
2698
2699 // Called if we receive a SIGPROF signal.
2700 void
2701 runtime_sigprof()
2702 {
2703         M *mp = g->m;
2704         int32 n, i;
2705         bool traceback;
2706
2707         if(prof.fn == nil || prof.hz == 0)
2708                 return;
2709
2710         if(mp == nil)
2711                 return;
2712
2713         // Profiling runs concurrently with GC, so it must not allocate.
2714         mp->mallocing++;
2715
2716         traceback = true;
2717
2718         if(mp->mcache == nil)
2719                 traceback = false;
2720
2721         runtime_lock(&prof);
2722         if(prof.fn == nil) {
2723                 runtime_unlock(&prof);
2724                 mp->mallocing--;
2725                 return;
2726         }
2727         n = 0;
2728
2729         if(runtime_atomicload(&runtime_in_callers) > 0) {
2730                 // If SIGPROF arrived while already fetching runtime
2731                 // callers we can have trouble on older systems
2732                 // because the unwind library calls dl_iterate_phdr
2733                 // which was not recursive in the past.
2734                 traceback = false;
2735         }
2736
2737         if(traceback) {
2738                 n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf), false);
2739                 for(i = 0; i < n; i++)
2740                         prof.pcbuf[i] = prof.locbuf[i].pc;
2741         }
2742         if(!traceback || n <= 0) {
2743                 n = 2;
2744                 prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2745                 if(mp->gcing || mp->helpgc)
2746                         prof.pcbuf[1] = (uintptr)GC;
2747                 else
2748                         prof.pcbuf[1] = (uintptr)System;
2749         }
2750         prof.fn(prof.pcbuf, n);
2751         runtime_unlock(&prof);
2752         mp->mallocing--;
2753 }
2754
2755 // Arrange to call fn with a traceback hz times a second.
2756 void
2757 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
2758 {
2759         // Force sane arguments.
2760         if(hz < 0)
2761                 hz = 0;
2762         if(hz == 0)
2763                 fn = nil;
2764         if(fn == nil)
2765                 hz = 0;
2766
2767         // Disable preemption, otherwise we can be rescheduled to another thread
2768         // that has profiling enabled.
2769         g->m->locks++;
2770
2771         // Stop profiler on this thread so that it is safe to lock prof.
2772         // if a profiling signal came in while we had prof locked,
2773         // it would deadlock.
2774         runtime_resetcpuprofiler(0);
2775
2776         runtime_lock(&prof);
2777         prof.fn = fn;
2778         prof.hz = hz;
2779         runtime_unlock(&prof);
2780         runtime_lock(&runtime_sched);
2781         runtime_sched.profilehz = hz;
2782         runtime_unlock(&runtime_sched);
2783
2784         if(hz != 0)
2785                 runtime_resetcpuprofiler(hz);
2786
2787         g->m->locks--;
2788 }
2789
2790 // Change number of processors.  The world is stopped, sched is locked.
2791 static void
2792 procresize(int32 new)
2793 {
2794         int32 i, old;
2795         bool pempty;
2796         G *gp;
2797         P *p;
2798
2799         old = runtime_gomaxprocs;
2800         if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs)
2801                 runtime_throw("procresize: invalid arg");
2802         // initialize new P's
2803         for(i = 0; i < new; i++) {
2804                 p = runtime_allp[i];
2805                 if(p == nil) {
2806                         p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2807                         p->id = i;
2808                         p->status = _Pgcstop;
2809                         runtime_atomicstorep(&runtime_allp[i], p);
2810                 }
2811                 if(p->mcache == nil) {
2812                         if(old==0 && i==0)
2813                                 p->mcache = g->m->mcache;  // bootstrap
2814                         else
2815                                 p->mcache = runtime_allocmcache();
2816                 }
2817         }
2818
2819         // redistribute runnable G's evenly
2820         // collect all runnable goroutines in global queue preserving FIFO order
2821         // FIFO order is required to ensure fairness even during frequent GCs
2822         // see http://golang.org/issue/7126
2823         pempty = false;
2824         while(!pempty) {
2825                 pempty = true;
2826                 for(i = 0; i < old; i++) {
2827                         p = runtime_allp[i];
2828                         if(p->runqhead == p->runqtail)
2829                                 continue;
2830                         pempty = false;
2831                         // pop from tail of local queue
2832                         p->runqtail--;
2833                         gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
2834                         // push onto head of global queue
2835                         gp->schedlink = (uintptr)runtime_sched.runqhead;
2836                         runtime_sched.runqhead = gp;
2837                         if(runtime_sched.runqtail == nil)
2838                                 runtime_sched.runqtail = gp;
2839                         runtime_sched.runqsize++;
2840                 }
2841         }
2842         // fill local queues with at most nelem(p->runq)/2 goroutines
2843         // start at 1 because current M already executes some G and will acquire allp[0] below,
2844         // so if we have a spare G we want to put it into allp[1].
2845         for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) {
2846                 gp = runtime_sched.runqhead;
2847                 runtime_sched.runqhead = (G*)gp->schedlink;
2848                 if(runtime_sched.runqhead == nil)
2849                         runtime_sched.runqtail = nil;
2850                 runtime_sched.runqsize--;
2851                 runqput(runtime_allp[i%new], gp);
2852         }
2853
2854         // free unused P's
2855         for(i = new; i < old; i++) {
2856                 p = runtime_allp[i];
2857                 runtime_freemcache(p->mcache);
2858                 p->mcache = nil;
2859                 gfpurge(p);
2860                 p->status = _Pdead;
2861                 // can't free P itself because it can be referenced by an M in syscall
2862         }
2863
2864         if(g->m->p)
2865                 ((P*)g->m->p)->m = 0;
2866         g->m->p = 0;
2867         g->m->mcache = nil;
2868         p = runtime_allp[0];
2869         p->m = 0;
2870         p->status = _Pidle;
2871         acquirep(p);
2872         for(i = new-1; i > 0; i--) {
2873                 p = runtime_allp[i];
2874                 p->status = _Pidle;
2875                 pidleput(p);
2876         }
2877         runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2878 }
2879
2880 // Associate p and the current m.
2881 static void
2882 acquirep(P *p)
2883 {
2884         M *m;
2885
2886         m = g->m;
2887         if(m->p || m->mcache)
2888                 runtime_throw("acquirep: already in go");
2889         if(p->m || p->status != _Pidle) {
2890                 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status);
2891                 runtime_throw("acquirep: invalid p state");
2892         }
2893         m->mcache = p->mcache;
2894         m->p = (uintptr)p;
2895         p->m = (uintptr)m;
2896         p->status = _Prunning;
2897 }
2898
2899 // Disassociate p and the current m.
2900 static P*
2901 releasep(void)
2902 {
2903         M *m;
2904         P *p;
2905
2906         m = g->m;
2907         if(m->p == 0 || m->mcache == nil)
2908                 runtime_throw("releasep: invalid arg");
2909         p = (P*)m->p;
2910         if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) {
2911                 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2912                         m, m->p, p->m, m->mcache, p->mcache, p->status);
2913                 runtime_throw("releasep: invalid p state");
2914         }
2915         m->p = 0;
2916         m->mcache = nil;
2917         p->m = 0;
2918         p->status = _Pidle;
2919         return p;
2920 }
2921
2922 static void
2923 incidlelocked(int32 v)
2924 {
2925         runtime_lock(&runtime_sched);
2926         runtime_sched.nmidlelocked += v;
2927         if(v > 0)
2928                 checkdead();
2929         runtime_unlock(&runtime_sched);
2930 }
2931
2932 // Check for deadlock situation.
2933 // The check is based on number of running M's, if 0 -> deadlock.
2934 static void
2935 checkdead(void)
2936 {
2937         G *gp;
2938         int32 run, grunning, s;
2939         uintptr i;
2940
2941         // For -buildmode=c-shared or -buildmode=c-archive it's OK if
2942         // there are no running goroutines.  The calling program is
2943         // assumed to be running.
2944         if(runtime_isarchive) {
2945                 return;
2946         }
2947
2948         // -1 for sysmon
2949         run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
2950         if(run > 0)
2951                 return;
2952         // If we are dying because of a signal caught on an already idle thread,
2953         // freezetheworld will cause all running threads to block.
2954         // And runtime will essentially enter into deadlock state,
2955         // except that there is a thread that will call runtime_exit soon.
2956         if(runtime_panicking > 0)
2957                 return;
2958         if(run < 0) {
2959                 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2960                         runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
2961                 runtime_throw("checkdead: inconsistent counts");
2962         }
2963         grunning = 0;
2964         runtime_lock(&allglock);
2965         for(i = 0; i < runtime_allglen; i++) {
2966                 gp = runtime_allg[i];
2967                 if(gp->isbackground)
2968                         continue;
2969                 s = gp->atomicstatus;
2970                 if(s == _Gwaiting)
2971                         grunning++;
2972                 else if(s == _Grunnable || s == _Grunning || s == _Gsyscall) {
2973                         runtime_unlock(&allglock);
2974                         runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
2975                         runtime_throw("checkdead: runnable g");
2976                 }
2977         }
2978         runtime_unlock(&allglock);
2979         if(grunning == 0)  // possible if main goroutine calls runtime_Goexit()
2980                 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
2981         g->m->throwing = -1;  // do not dump full stacks
2982         runtime_throw("all goroutines are asleep - deadlock!");
2983 }
2984
2985 static void
2986 sysmon(void)
2987 {
2988         uint32 idle, delay;
2989         int64 now, lastpoll, lasttrace;
2990         G *gp;
2991
2992         lasttrace = 0;
2993         idle = 0;  // how many cycles in succession we had not wokeup somebody
2994         delay = 0;
2995         for(;;) {
2996                 if(idle == 0)  // start with 20us sleep...
2997                         delay = 20;
2998                 else if(idle > 50)  // start doubling the sleep after 1ms...
2999                         delay *= 2;
3000                 if(delay > 10*1000)  // up to 10ms
3001                         delay = 10*1000;
3002                 runtime_usleep(delay);
3003                 if(runtime_debug.schedtrace <= 0 &&
3004                         (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) {  // TODO: fast atomic
3005                         runtime_lock(&runtime_sched);
3006                         if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
3007                                 runtime_atomicstore(&runtime_sched.sysmonwait, 1);
3008                                 runtime_unlock(&runtime_sched);
3009                                 runtime_notesleep(&runtime_sched.sysmonnote);
3010                                 runtime_noteclear(&runtime_sched.sysmonnote);
3011                                 idle = 0;
3012                                 delay = 20;
3013                         } else
3014                                 runtime_unlock(&runtime_sched);
3015                 }
3016                 // poll network if not polled for more than 10ms
3017                 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
3018                 now = runtime_nanotime();
3019                 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
3020                         runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
3021                         gp = runtime_netpoll(false);  // non-blocking
3022                         if(gp) {
3023                                 // Need to decrement number of idle locked M's
3024                                 // (pretending that one more is running) before injectglist.
3025                                 // Otherwise it can lead to the following situation:
3026                                 // injectglist grabs all P's but before it starts M's to run the P's,
3027                                 // another M returns from syscall, finishes running its G,
3028                                 // observes that there is no work to do and no other running M's
3029                                 // and reports deadlock.
3030                                 incidlelocked(-1);
3031                                 injectglist(gp);
3032                                 incidlelocked(1);
3033                         }
3034                 }
3035                 // retake P's blocked in syscalls
3036                 // and preempt long running G's
3037                 if(retake(now))
3038                         idle = 0;
3039                 else
3040                         idle++;
3041
3042                 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
3043                         lasttrace = now;
3044                         runtime_schedtrace(runtime_debug.scheddetail);
3045                 }
3046         }
3047 }
3048
3049 typedef struct Pdesc Pdesc;
3050 struct Pdesc
3051 {
3052         uint32  schedtick;
3053         int64   schedwhen;
3054         uint32  syscalltick;
3055         int64   syscallwhen;
3056 };
3057 static Pdesc pdesc[_MaxGomaxprocs];
3058
3059 static uint32
3060 retake(int64 now)
3061 {
3062         uint32 i, s, n;
3063         int64 t;
3064         P *p;
3065         Pdesc *pd;
3066
3067         n = 0;
3068         for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
3069                 p = runtime_allp[i];
3070                 if(p==nil)
3071                         continue;
3072                 pd = &pdesc[i];
3073                 s = p->status;
3074                 if(s == _Psyscall) {
3075                         // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
3076                         t = p->syscalltick;
3077                         if(pd->syscalltick != t) {
3078                                 pd->syscalltick = t;
3079                                 pd->syscallwhen = now;
3080                                 continue;
3081                         }
3082                         // On the one hand we don't want to retake Ps if there is no other work to do,
3083                         // but on the other hand we want to retake them eventually
3084                         // because they can prevent the sysmon thread from deep sleep.
3085                         if(p->runqhead == p->runqtail &&
3086                                 runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 &&
3087                                 pd->syscallwhen + 10*1000*1000 > now)
3088                                 continue;
3089                         // Need to decrement number of idle locked M's
3090                         // (pretending that one more is running) before the CAS.
3091                         // Otherwise the M from which we retake can exit the syscall,
3092                         // increment nmidle and report deadlock.
3093                         incidlelocked(-1);
3094                         if(runtime_cas(&p->status, s, _Pidle)) {
3095                                 n++;
3096                                 handoffp(p);
3097                         }
3098                         incidlelocked(1);
3099                 } else if(s == _Prunning) {
3100                         // Preempt G if it's running for more than 10ms.
3101                         t = p->schedtick;
3102                         if(pd->schedtick != t) {
3103                                 pd->schedtick = t;
3104                                 pd->schedwhen = now;
3105                                 continue;
3106                         }
3107                         if(pd->schedwhen + 10*1000*1000 > now)
3108                                 continue;
3109                         // preemptone(p);
3110                 }
3111         }
3112         return n;
3113 }
3114
3115 // Tell all goroutines that they have been preempted and they should stop.
3116 // This function is purely best-effort.  It can fail to inform a goroutine if a
3117 // processor just started running it.
3118 // No locks need to be held.
3119 // Returns true if preemption request was issued to at least one goroutine.
3120 static bool
3121 preemptall(void)
3122 {
3123         return false;
3124 }
3125
3126 void
3127 runtime_schedtrace(bool detailed)
3128 {
3129         static int64 starttime;
3130         int64 now;
3131         int64 id1, id2, id3;
3132         int32 i, t, h;
3133         uintptr gi;
3134         const char *fmt;
3135         M *mp, *lockedm;
3136         G *gp, *lockedg;
3137         P *p;
3138
3139         now = runtime_nanotime();
3140         if(starttime == 0)
3141                 starttime = now;
3142
3143         runtime_lock(&runtime_sched);
3144         runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
3145                 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
3146                 runtime_sched.nmidle, runtime_sched.runqsize);
3147         if(detailed) {
3148                 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
3149                         runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
3150                         runtime_sched.stopwait, runtime_sched.sysmonwait);
3151         }
3152         // We must be careful while reading data from P's, M's and G's.
3153         // Even if we hold schedlock, most data can be changed concurrently.
3154         // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
3155         for(i = 0; i < runtime_gomaxprocs; i++) {
3156                 p = runtime_allp[i];
3157                 if(p == nil)
3158                         continue;
3159                 mp = (M*)p->m;
3160                 h = runtime_atomicload(&p->runqhead);
3161                 t = runtime_atomicload(&p->runqtail);
3162                 if(detailed)
3163                         runtime_printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
3164                                 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
3165                 else {
3166                         // In non-detailed mode format lengths of per-P run queues as:
3167                         // [len1 len2 len3 len4]
3168                         fmt = " %d";
3169                         if(runtime_gomaxprocs == 1)
3170                                 fmt = " [%d]\n";
3171                         else if(i == 0)
3172                                 fmt = " [%d";
3173                         else if(i == runtime_gomaxprocs-1)
3174                                 fmt = " %d]\n";
3175                         runtime_printf(fmt, t-h);
3176                 }
3177         }
3178         if(!detailed) {
3179                 runtime_unlock(&runtime_sched);
3180                 return;
3181         }
3182         for(mp = runtime_allm; mp; mp = mp->alllink) {
3183                 p = (P*)mp->p;
3184                 gp = mp->curg;
3185                 lockedg = mp->lockedg;
3186                 id1 = -1;
3187                 if(p)
3188                         id1 = p->id;
3189                 id2 = -1;
3190                 if(gp)
3191                         id2 = gp->goid;
3192                 id3 = -1;
3193                 if(lockedg)
3194                         id3 = lockedg->goid;
3195                 runtime_printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
3196                         " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
3197                         mp->id, id1, id2,
3198                         mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
3199                         mp->spinning, mp->blocked, id3);
3200         }
3201         runtime_lock(&allglock);
3202         for(gi = 0; gi < runtime_allglen; gi++) {
3203                 gp = runtime_allg[gi];
3204                 mp = gp->m;
3205                 lockedm = gp->lockedm;
3206                 runtime_printf("  G%D: status=%d(%S) m=%d lockedm=%d\n",
3207                         gp->goid, gp->atomicstatus, gp->waitreason, mp ? mp->id : -1,
3208                         lockedm ? lockedm->id : -1);
3209         }
3210         runtime_unlock(&allglock);
3211         runtime_unlock(&runtime_sched);
3212 }
3213
3214 // Put mp on midle list.
3215 // Sched must be locked.
3216 static void
3217 mput(M *mp)
3218 {
3219         mp->schedlink = (uintptr)runtime_sched.midle;
3220         runtime_sched.midle = mp;
3221         runtime_sched.nmidle++;
3222         checkdead();
3223 }
3224
3225 // Try to get an m from midle list.
3226 // Sched must be locked.
3227 static M*
3228 mget(void)
3229 {
3230         M *mp;
3231
3232         if((mp = runtime_sched.midle) != nil){
3233                 runtime_sched.midle = (M*)mp->schedlink;
3234                 runtime_sched.nmidle--;
3235         }
3236         return mp;
3237 }
3238
3239 // Put gp on the global runnable queue.
3240 // Sched must be locked.
3241 static void
3242 globrunqput(G *gp)
3243 {
3244         gp->schedlink = 0;
3245         if(runtime_sched.runqtail)
3246                 runtime_sched.runqtail->schedlink = (uintptr)gp;
3247         else
3248                 runtime_sched.runqhead = gp;
3249         runtime_sched.runqtail = gp;
3250         runtime_sched.runqsize++;
3251 }
3252
3253 // Put a batch of runnable goroutines on the global runnable queue.
3254 // Sched must be locked.
3255 static void
3256 globrunqputbatch(G *ghead, G *gtail, int32 n)
3257 {
3258         gtail->schedlink = 0;
3259         if(runtime_sched.runqtail)
3260                 runtime_sched.runqtail->schedlink = (uintptr)ghead;
3261         else
3262                 runtime_sched.runqhead = ghead;
3263         runtime_sched.runqtail = gtail;
3264         runtime_sched.runqsize += n;
3265 }
3266
3267 // Try get a batch of G's from the global runnable queue.
3268 // Sched must be locked.
3269 static G*
3270 globrunqget(P *p, int32 max)
3271 {
3272         G *gp, *gp1;
3273         int32 n;
3274
3275         if(runtime_sched.runqsize == 0)
3276                 return nil;
3277         n = runtime_sched.runqsize/runtime_gomaxprocs+1;
3278         if(n > runtime_sched.runqsize)
3279                 n = runtime_sched.runqsize;
3280         if(max > 0 && n > max)
3281                 n = max;
3282         if((uint32)n > nelem(p->runq)/2)
3283                 n = nelem(p->runq)/2;
3284         runtime_sched.runqsize -= n;
3285         if(runtime_sched.runqsize == 0)
3286                 runtime_sched.runqtail = nil;
3287         gp = runtime_sched.runqhead;
3288         runtime_sched.runqhead = (G*)gp->schedlink;
3289         n--;
3290         while(n--) {
3291                 gp1 = runtime_sched.runqhead;
3292                 runtime_sched.runqhead = (G*)gp1->schedlink;
3293                 runqput(p, gp1);
3294         }
3295         return gp;
3296 }
3297
3298 // Put p to on pidle list.
3299 // Sched must be locked.
3300 static void
3301 pidleput(P *p)
3302 {
3303         p->link = (uintptr)runtime_sched.pidle;
3304         runtime_sched.pidle = p;
3305         runtime_xadd(&runtime_sched.npidle, 1);  // TODO: fast atomic
3306 }
3307
3308 // Try get a p from pidle list.
3309 // Sched must be locked.
3310 static P*
3311 pidleget(void)
3312 {
3313         P *p;
3314
3315         p = runtime_sched.pidle;
3316         if(p) {
3317                 runtime_sched.pidle = (P*)p->link;
3318                 runtime_xadd(&runtime_sched.npidle, -1);  // TODO: fast atomic
3319         }
3320         return p;
3321 }
3322
3323 // Try to put g on local runnable queue.
3324 // If it's full, put onto global queue.
3325 // Executed only by the owner P.
3326 static void
3327 runqput(P *p, G *gp)
3328 {
3329         uint32 h, t;
3330
3331 retry:
3332         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3333         t = p->runqtail;
3334         if(t - h < nelem(p->runq)) {
3335                 p->runq[t%nelem(p->runq)] = (uintptr)gp;
3336                 runtime_atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
3337                 return;
3338         }
3339         if(runqputslow(p, gp, h, t))
3340                 return;
3341         // the queue is not full, now the put above must suceed
3342         goto retry;
3343 }
3344
3345 // Put g and a batch of work from local runnable queue on global queue.
3346 // Executed only by the owner P.
3347 static bool
3348 runqputslow(P *p, G *gp, uint32 h, uint32 t)
3349 {
3350         G *batch[nelem(p->runq)/2+1];
3351         uint32 n, i;
3352
3353         // First, grab a batch from local queue.
3354         n = t-h;
3355         n = n/2;
3356         if(n != nelem(p->runq)/2)
3357                 runtime_throw("runqputslow: queue is not full");
3358         for(i=0; i<n; i++)
3359                 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3360         if(!runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3361                 return false;
3362         batch[n] = gp;
3363         // Link the goroutines.
3364         for(i=0; i<n; i++)
3365                 batch[i]->schedlink = (uintptr)batch[i+1];
3366         // Now put the batch on global queue.
3367         runtime_lock(&runtime_sched);
3368         globrunqputbatch(batch[0], batch[n], n+1);
3369         runtime_unlock(&runtime_sched);
3370         return true;
3371 }
3372
3373 // Get g from local runnable queue.
3374 // Executed only by the owner P.
3375 static G*
3376 runqget(P *p)
3377 {
3378         G *gp;
3379         uint32 t, h;
3380
3381         for(;;) {
3382                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3383                 t = p->runqtail;
3384                 if(t == h)
3385                         return nil;
3386                 gp = (G*)p->runq[h%nelem(p->runq)];
3387                 if(runtime_cas(&p->runqhead, h, h+1))  // cas-release, commits consume
3388                         return gp;
3389         }
3390 }
3391
3392 // Grabs a batch of goroutines from local runnable queue.
3393 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3394 // Can be executed by any P.
3395 static uint32
3396 runqgrab(P *p, G **batch)
3397 {
3398         uint32 t, h, n, i;
3399
3400         for(;;) {
3401                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3402                 t = runtime_atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
3403                 n = t-h;
3404                 n = n - n/2;
3405                 if(n == 0)
3406                         break;
3407                 if(n > nelem(p->runq)/2)  // read inconsistent h and t
3408                         continue;
3409                 for(i=0; i<n; i++)
3410                         batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3411                 if(runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3412                         break;
3413         }
3414         return n;
3415 }
3416
3417 // Steal half of elements from local runnable queue of p2
3418 // and put onto local runnable queue of p.
3419 // Returns one of the stolen elements (or nil if failed).
3420 static G*
3421 runqsteal(P *p, P *p2)
3422 {
3423         G *gp;
3424         G *batch[nelem(p->runq)/2];
3425         uint32 t, h, n, i;
3426
3427         n = runqgrab(p2, batch);
3428         if(n == 0)
3429                 return nil;
3430         n--;
3431         gp = batch[n];
3432         if(n == 0)
3433                 return gp;
3434         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3435         t = p->runqtail;
3436         if(t - h + n >= nelem(p->runq))
3437                 runtime_throw("runqsteal: runq overflow");
3438         for(i=0; i<n; i++, t++)
3439                 p->runq[t%nelem(p->runq)] = (uintptr)batch[i];
3440         runtime_atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
3441         return gp;
3442 }
3443
3444 void runtime_testSchedLocalQueue(void)
3445   __asm__("runtime.testSchedLocalQueue");
3446
3447 void
3448 runtime_testSchedLocalQueue(void)
3449 {
3450         P p;
3451         G gs[nelem(p.runq)];
3452         int32 i, j;
3453
3454         runtime_memclr((byte*)&p, sizeof(p));
3455
3456         for(i = 0; i < (int32)nelem(gs); i++) {
3457                 if(runqget(&p) != nil)
3458                         runtime_throw("runq is not empty initially");
3459                 for(j = 0; j < i; j++)
3460                         runqput(&p, &gs[i]);
3461                 for(j = 0; j < i; j++) {
3462                         if(runqget(&p) != &gs[i]) {
3463                                 runtime_printf("bad element at iter %d/%d\n", i, j);
3464                                 runtime_throw("bad element");
3465                         }
3466                 }
3467                 if(runqget(&p) != nil)
3468                         runtime_throw("runq is not empty afterwards");
3469         }
3470 }
3471
3472 void runtime_testSchedLocalQueueSteal(void)
3473   __asm__("runtime.testSchedLocalQueueSteal");
3474
3475 void
3476 runtime_testSchedLocalQueueSteal(void)
3477 {
3478         P p1, p2;
3479         G gs[nelem(p1.runq)], *gp;
3480         int32 i, j, s;
3481
3482         runtime_memclr((byte*)&p1, sizeof(p1));
3483         runtime_memclr((byte*)&p2, sizeof(p2));
3484
3485         for(i = 0; i < (int32)nelem(gs); i++) {
3486                 for(j = 0; j < i; j++) {
3487                         gs[j].sig = 0;
3488                         runqput(&p1, &gs[j]);
3489                 }
3490                 gp = runqsteal(&p2, &p1);
3491                 s = 0;
3492                 if(gp) {
3493                         s++;
3494                         gp->sig++;
3495                 }
3496                 while((gp = runqget(&p2)) != nil) {
3497                         s++;
3498                         gp->sig++;
3499                 }
3500                 while((gp = runqget(&p1)) != nil)
3501                         gp->sig++;
3502                 for(j = 0; j < i; j++) {
3503                         if(gs[j].sig != 1) {
3504                                 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3505                                 runtime_throw("bad element");
3506                         }
3507                 }
3508                 if(s != i/2 && s != i/2+1) {
3509                         runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3510                                 s, i/2, i/2+1, i);
3511                         runtime_throw("bad steal");
3512                 }
3513         }
3514 }
3515
3516 int32
3517 runtime_setmaxthreads(int32 in)
3518 {
3519         int32 out;
3520
3521         runtime_lock(&runtime_sched);
3522         out = runtime_sched.maxmcount;
3523         runtime_sched.maxmcount = in;
3524         checkmcount();
3525         runtime_unlock(&runtime_sched);
3526         return out;
3527 }
3528
3529 void
3530 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
3531 {
3532         enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
3533         enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
3534 }
3535
3536 // Return whether we are waiting for a GC.  This gc toolchain uses
3537 // preemption instead.
3538 bool
3539 runtime_gcwaiting(void)
3540 {
3541         return runtime_sched.gcwaiting;
3542 }
3543
3544 // os_beforeExit is called from os.Exit(0).
3545 //go:linkname os_beforeExit os.runtime_beforeExit
3546
3547 extern void os_beforeExit() __asm__ (GOSYM_PREFIX "os.runtime_beforeExit");
3548
3549 void
3550 os_beforeExit()
3551 {
3552 }
3553
3554 // Active spinning for sync.Mutex.
3555 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3556
3557 enum
3558 {
3559         ACTIVE_SPIN = 4,
3560         ACTIVE_SPIN_CNT = 30,
3561 };
3562
3563 extern _Bool sync_runtime_canSpin(intgo i)
3564   __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
3565
3566 _Bool
3567 sync_runtime_canSpin(intgo i)
3568 {
3569         P *p;
3570
3571         // sync.Mutex is cooperative, so we are conservative with spinning.
3572         // Spin only few times and only if running on a multicore machine and
3573         // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3574         // As opposed to runtime mutex we don't do passive spinning here,
3575         // because there can be work on global runq on on other Ps.
3576         if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched.npidle+runtime_sched.nmspinning)+1) {
3577                 return false;
3578         }
3579         p = (P*)g->m->p;
3580         return p != nil && p->runqhead == p->runqtail;
3581 }
3582
3583 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3584 //go:nosplit
3585
3586 extern void sync_runtime_doSpin(void)
3587   __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
3588
3589 void
3590 sync_runtime_doSpin()
3591 {
3592         runtime_procyield(ACTIVE_SPIN_CNT);
3593 }