libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <signal.h>
   7 #include <stdlib.h>
   8 #include <pthread.h>
   9 #include <unistd.h>
  10
  11 #include "config.h"
  12
  13 #ifdef HAVE_DL_ITERATE_PHDR
  14 #include <link.h>
  15 #endif
  16
  17 #include "runtime.h"
  18 #include "arch.h"
  19 #include "defs.h"
  20 #include "malloc.h"
  21 #include "go-type.h"
  22
  23 #ifdef USING_SPLIT_STACK
  24
  25 /* FIXME: These are not declared anywhere.  */
  26
  27 extern void __splitstack_getcontext(void *context[10]);
  28
  29 extern void __splitstack_setcontext(void *context[10]);
  30
  31 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  32
  33 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  34
  35 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  36                                void **);
  37
  38 extern void __splitstack_block_signals (int *, int *);
  39
  40 extern void __splitstack_block_signals_context (void *context[10], int *,
  41                                                 int *);
  42
  43 #endif
  44
  45 #ifndef PTHREAD_STACK_MIN
  46 # define PTHREAD_STACK_MIN 8192
  47 #endif
  48
  49 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  50 # define StackMin PTHREAD_STACK_MIN
  51 #else
  52 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
  53 #endif
  54
  55 uintptr runtime_stacks_sys;
  56
  57 static void gtraceback(G*);
  58
  59 #ifdef __rtems__
  60 #define __thread
  61 #endif
  62
  63 static __thread G *g;
  64
  65 #ifndef SETCONTEXT_CLOBBERS_TLS
  66
  67 static inline void
  68 initcontext(void)
  69 {
  70 }
  71
  72 static inline void
  73 fixcontext(ucontext_t *c __attribute__ ((unused)))
  74 {
  75 }
  76
  77 #else
  78
  79 # if defined(__x86_64__) && defined(__sun__)
  80
  81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  82 // register to that of the thread which called getcontext.  The effect
  83 // is that the address of all __thread variables changes.  This bug
  84 // also affects pthread_self() and pthread_getspecific.  We work
  85 // around it by clobbering the context field directly to keep %fs the
  86 // same.
  87
  88 static __thread greg_t fs;
  89
  90 static inline void
  91 initcontext(void)
  92 {
  93         ucontext_t c;
  94
  95         getcontext(&c);
  96         fs = c.uc_mcontext.gregs[REG_FSBASE];
  97 }
  98
  99 static inline void
 100 fixcontext(ucontext_t* c)
 101 {
 102         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 103 }
 104
 105 # elif defined(__NetBSD__)
 106
 107 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
 108 // and restore it ourselves.
 109
 110 static __thread __greg_t tlsbase;
 111
 112 static inline void
 113 initcontext(void)
 114 {
 115         ucontext_t c;
 116
 117         getcontext(&c);
 118         tlsbase = c.uc_mcontext._mc_tlsbase;
 119 }
 120
 121 static inline void
 122 fixcontext(ucontext_t* c)
 123 {
 124         c->uc_mcontext._mc_tlsbase = tlsbase;
 125 }
 126
 127 # elif defined(__sparc__)
 128
 129 static inline void
 130 initcontext(void)
 131 {
 132 }
 133
 134 static inline void
 135 fixcontext(ucontext_t *c)
 136 {
 137         /* ??? Using
 138              register unsigned long thread __asm__("%g7");
 139              c->uc_mcontext.gregs[REG_G7] = thread;
 140            results in
 141              error: variable ‘thread’ might be clobbered by \
 142                 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
 143            which ought to be false, as %g7 is a fixed register.  */
 144
 145         if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8)
 146                 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 147         else
 148                 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 149 }
 150
 151 # else
 152
 153 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 154
 155 # endif
 156
 157 #endif
 158
 159 // ucontext_arg returns a properly aligned ucontext_t value.  On some
 160 // systems a ucontext_t value must be aligned to a 16-byte boundary.
 161 // The g structure that has fields of type ucontext_t is defined in
 162 // Go, and Go has no simple way to align a field to such a boundary.
 163 // So we make the field larger in runtime2.go and pick an appropriate
 164 // offset within the field here.
 165 static ucontext_t*
 166 ucontext_arg(void** go_ucontext)
 167 {
 168         uintptr_t p = (uintptr_t)go_ucontext;
 169         size_t align = __alignof__(ucontext_t);
 170         if(align > 16) {
 171                 // We only ensured space for up to a 16 byte alignment
 172                 // in libgo/go/runtime/runtime2.go.
 173                 runtime_throw("required alignment of ucontext_t too large");
 174         }
 175         p = (p + align - 1) &~ (uintptr_t)(align - 1);
 176         return (ucontext_t*)p;
 177 }
 178
 179 // We can not always refer to the TLS variables directly.  The
 180 // compiler will call tls_get_addr to get the address of the variable,
 181 // and it may hold it in a register across a call to schedule.  When
 182 // we get back from the call we may be running in a different thread,
 183 // in which case the register now points to the TLS variable for a
 184 // different thread.  We use non-inlinable functions to avoid this
 185 // when necessary.
 186
 187 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 188
 189 G*
 190 runtime_g(void)
 191 {
 192         return g;
 193 }
 194
 195 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 196
 197 M*
 198 runtime_m(void)
 199 {
 200         if(g == nil)
 201                 return nil;
 202         return g->m;
 203 }
 204
 205 // Set g.
 206 void
 207 runtime_setg(G* gp)
 208 {
 209         g = gp;
 210 }
 211
 212 // Start a new thread.
 213 static void
 214 runtime_newosproc(M *mp)
 215 {
 216         pthread_attr_t attr;
 217         sigset_t clear, old;
 218         pthread_t tid;
 219         int ret;
 220
 221         if(pthread_attr_init(&attr) != 0)
 222                 runtime_throw("pthread_attr_init");
 223         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 224                 runtime_throw("pthread_attr_setdetachstate");
 225
 226         // Block signals during pthread_create so that the new thread
 227         // starts with signals disabled.  It will enable them in minit.
 228         sigfillset(&clear);
 229
 230 #ifdef SIGTRAP
 231         // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
 232         sigdelset(&clear, SIGTRAP);
 233 #endif
 234
 235         sigemptyset(&old);
 236         pthread_sigmask(SIG_BLOCK, &clear, &old);
 237         ret = pthread_create(&tid, &attr, runtime_mstart, mp);
 238         pthread_sigmask(SIG_SETMASK, &old, nil);
 239
 240         if (ret != 0)
 241                 runtime_throw("pthread_create");
 242 }
 243
 244 // First function run by a new goroutine.  This replaces gogocall.
 245 static void
 246 kickoff(void)
 247 {
 248         void (*fn)(void*);
 249         void *param;
 250
 251         if(g->traceback != nil)
 252                 gtraceback(g);
 253
 254         fn = (void (*)(void*))(g->entry);
 255         param = g->param;
 256         g->param = nil;
 257         fn(param);
 258         runtime_goexit();
 259 }
 260
 261 // Switch context to a different goroutine.  This is like longjmp.
 262 void runtime_gogo(G*) __attribute__ ((noinline));
 263 void
 264 runtime_gogo(G* newg)
 265 {
 266 #ifdef USING_SPLIT_STACK
 267         __splitstack_setcontext(&newg->stackcontext[0]);
 268 #endif
 269         g = newg;
 270         newg->fromgogo = true;
 271         fixcontext(ucontext_arg(&newg->context[0]));
 272         setcontext(ucontext_arg(&newg->context[0]));
 273         runtime_throw("gogo setcontext returned");
 274 }
 275
 276 // Save context and call fn passing g as a parameter.  This is like
 277 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 278 // g->fromgogo as a code.  It will be true if we got here via
 279 // setcontext.  g == nil the first time this is called in a new m.
 280 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 281 void
 282 runtime_mcall(void (*pfn)(G*))
 283 {
 284         M *mp;
 285         G *gp;
 286 #ifndef USING_SPLIT_STACK
 287         void *afterregs;
 288 #endif
 289
 290         // Ensure that all registers are on the stack for the garbage
 291         // collector.
 292         __builtin_unwind_init();
 293
 294         gp = g;
 295         mp = gp->m;
 296         if(gp == mp->g0)
 297                 runtime_throw("runtime: mcall called on m->g0 stack");
 298
 299         if(gp != nil) {
 300
 301 #ifdef USING_SPLIT_STACK
 302                 __splitstack_getcontext(&g->stackcontext[0]);
 303 #else
 304                 // We have to point to an address on the stack that is
 305                 // below the saved registers.
 306                 gp->gcnextsp = &afterregs;
 307 #endif
 308                 gp->fromgogo = false;
 309                 getcontext(ucontext_arg(&gp->context[0]));
 310
 311                 // When we return from getcontext, we may be running
 312                 // in a new thread.  That means that g may have
 313                 // changed.  It is a global variables so we will
 314                 // reload it, but the address of g may be cached in
 315                 // our local stack frame, and that address may be
 316                 // wrong.  Call the function to reload the value for
 317                 // this thread.
 318                 gp = runtime_g();
 319                 mp = gp->m;
 320
 321                 if(gp->traceback != nil)
 322                         gtraceback(gp);
 323         }
 324         if (gp == nil || !gp->fromgogo) {
 325 #ifdef USING_SPLIT_STACK
 326                 __splitstack_setcontext(&mp->g0->stackcontext[0]);
 327 #endif
 328                 mp->g0->entry = (byte*)pfn;
 329                 mp->g0->param = gp;
 330
 331                 // It's OK to set g directly here because this case
 332                 // can not occur if we got here via a setcontext to
 333                 // the getcontext call just above.
 334                 g = mp->g0;
 335
 336                 fixcontext(ucontext_arg(&mp->g0->context[0]));
 337                 setcontext(ucontext_arg(&mp->g0->context[0]));
 338                 runtime_throw("runtime: mcall function returned");
 339         }
 340 }
 341
 342 // Goroutine scheduler
 343 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
 344 //
 345 // The main concepts are:
 346 // G - goroutine.
 347 // M - worker thread, or machine.
 348 // P - processor, a resource that is required to execute Go code.
 349 //     M must have an associated P to execute Go code, however it can be
 350 //     blocked or in a syscall w/o an associated P.
 351 //
 352 // Design doc at http://golang.org/s/go11sched.
 353
 354 typedef struct Sched Sched;
 355 struct Sched {
 356         Lock;
 357
 358         uint64  goidgen;
 359         M*      midle;   // idle m's waiting for work
 360         int32   nmidle;  // number of idle m's waiting for work
 361         int32   nmidlelocked; // number of locked m's waiting for work
 362         int32   mcount;  // number of m's that have been created
 363         int32   maxmcount;      // maximum number of m's allowed (or die)
 364
 365         P*      pidle;  // idle P's
 366         uint32  npidle;
 367         uint32  nmspinning;
 368
 369         // Global runnable queue.
 370         G*      runqhead;
 371         G*      runqtail;
 372         int32   runqsize;
 373
 374         // Global cache of dead G's.
 375         Lock    gflock;
 376         G*      gfree;
 377
 378         uint32  gcwaiting;      // gc is waiting to run
 379         int32   stopwait;
 380         Note    stopnote;
 381         uint32  sysmonwait;
 382         Note    sysmonnote;
 383         uint64  lastpoll;
 384
 385         int32   profilehz;      // cpu profiling rate
 386 };
 387
 388 enum
 389 {
 390         // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once.
 391         // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
 392         GoidCacheBatch = 16,
 393 };
 394
 395 Sched   runtime_sched;
 396 int32   runtime_gomaxprocs;
 397 uint32  runtime_needextram = 1;
 398 M       runtime_m0;
 399 G       runtime_g0;     // idle goroutine for m0
 400 G*      runtime_lastg;
 401 M*      runtime_allm;
 402 P**     runtime_allp;
 403 M*      runtime_extram;
 404 int8*   runtime_goos;
 405 int32   runtime_ncpu;
 406 bool    runtime_precisestack;
 407 static int32    newprocs;
 408
 409 static  Lock allglock;  // the following vars are protected by this lock or by stoptheworld
 410 G**     runtime_allg;
 411 uintptr runtime_allglen;
 412 static  uintptr allgcap;
 413
 414 bool    runtime_isarchive;
 415
 416 void* runtime_mstart(void*);
 417 static void runqput(P*, G*);
 418 static G* runqget(P*);
 419 static bool runqputslow(P*, G*, uint32, uint32);
 420 static G* runqsteal(P*, P*);
 421 static void mput(M*);
 422 static M* mget(void);
 423 static void mcommoninit(M*);
 424 static void schedule(void);
 425 static void procresize(int32);
 426 static void acquirep(P*);
 427 static P* releasep(void);
 428 static void newm(void(*)(void), P*);
 429 static void stopm(void);
 430 static void startm(P*, bool);
 431 static void handoffp(P*);
 432 static void wakep(void);
 433 static void stoplockedm(void);
 434 static void startlockedm(G*);
 435 static void sysmon(void);
 436 static uint32 retake(int64);
 437 static void incidlelocked(int32);
 438 static void checkdead(void);
 439 static void exitsyscall0(G*);
 440 static void park0(G*);
 441 static void goexit0(G*);
 442 static void gfput(P*, G*);
 443 static G* gfget(P*);
 444 static void gfpurge(P*);
 445 static void globrunqput(G*);
 446 static void globrunqputbatch(G*, G*, int32);
 447 static G* globrunqget(P*, int32);
 448 static P* pidleget(void);
 449 static void pidleput(P*);
 450 static void injectglist(G*);
 451 static bool preemptall(void);
 452 static bool exitsyscallfast(void);
 453 static void allgadd(G*);
 454
 455 bool runtime_isstarted;
 456
 457 // The bootstrap sequence is:
 458 //
 459 //      call osinit
 460 //      call schedinit
 461 //      make & queue new G
 462 //      call runtime_mstart
 463 //
 464 // The new G calls runtime_main.
 465 void
 466 runtime_schedinit(void)
 467 {
 468         M *m;
 469         int32 n, procs;
 470         String s;
 471         const byte *p;
 472         Eface i;
 473
 474         m = &runtime_m0;
 475         g = &runtime_g0;
 476         m->g0 = g;
 477         m->curg = g;
 478         g->m = m;
 479
 480         initcontext();
 481
 482         runtime_sched.maxmcount = 10000;
 483         runtime_precisestack = 0;
 484
 485         // runtime_symtabinit();
 486         runtime_mallocinit();
 487         mcommoninit(m);
 488
 489         // Initialize the itable value for newErrorCString,
 490         // so that the next time it gets called, possibly
 491         // in a fault during a garbage collection, it will not
 492         // need to allocated memory.
 493         runtime_newErrorCString(0, &i);
 494
 495         // Initialize the cached gotraceback value, since
 496         // gotraceback calls getenv, which mallocs on Plan 9.
 497         runtime_gotraceback(nil);
 498
 499         runtime_goargs();
 500         runtime_goenvs();
 501         runtime_parsedebugvars();
 502
 503         runtime_sched.lastpoll = runtime_nanotime();
 504         procs = 1;
 505         s = runtime_getenv("GOMAXPROCS");
 506         p = s.str;
 507         if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
 508                 if(n > _MaxGomaxprocs)
 509                         n = _MaxGomaxprocs;
 510                 procs = n;
 511         }
 512         runtime_allp = runtime_malloc((_MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
 513         procresize(procs);
 514
 515         // Can not enable GC until all roots are registered.
 516         // mstats()->enablegc = 1;
 517 }
 518
 519 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
 520 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
 521
 522 // Used to determine the field alignment.
 523
 524 struct field_align
 525 {
 526   char c;
 527   Hchan *p;
 528 };
 529
 530 // main_init_done is a signal used by cgocallbackg that initialization
 531 // has been completed.  It is made before _cgo_notify_runtime_init_done,
 532 // so all cgo calls can rely on it existing.  When main_init is
 533 // complete, it is closed, meaning cgocallbackg can reliably receive
 534 // from it.
 535 Hchan *runtime_main_init_done;
 536
 537 // The chan bool type, for runtime_main_init_done.
 538
 539 extern const struct __go_type_descriptor bool_type_descriptor
 540   __asm__ (GOSYM_PREFIX "__go_tdn_bool");
 541
 542 static struct __go_channel_type chan_bool_type_descriptor =
 543   {
 544     /* __common */
 545     {
 546       /* __code */
 547       GO_CHAN,
 548       /* __align */
 549       __alignof (Hchan *),
 550       /* __field_align */
 551       offsetof (struct field_align, p) - 1,
 552       /* __size */
 553       sizeof (Hchan *),
 554       /* __hash */
 555       0, /* This value doesn't matter.  */
 556       /* __hashfn */
 557       NULL,
 558       /* __equalfn */
 559       NULL,
 560       /* __gc */
 561       NULL, /* This value doesn't matter */
 562       /* __reflection */
 563       NULL, /* This value doesn't matter */
 564       /* __uncommon */
 565       NULL,
 566       /* __pointer_to_this */
 567       NULL
 568     },
 569     /* __element_type */
 570     &bool_type_descriptor,
 571     /* __dir */
 572     CHANNEL_BOTH_DIR
 573   };
 574
 575 extern Hchan *makechan (ChanType *, int64)
 576   __asm__ (GOSYM_PREFIX "runtime.makechan");
 577 extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan");
 578
 579 static void
 580 initDone(void *arg __attribute__ ((unused))) {
 581         runtime_unlockOSThread();
 582 };
 583
 584 // The main goroutine.
 585 // Note: C frames in general are not copyable during stack growth, for two reasons:
 586 //   1) We don't know where in a frame to find pointers to other stack locations.
 587 //   2) There's no guarantee that globals or heap values do not point into the frame.
 588 //
 589 // The C frame for runtime.main is copyable, because:
 590 //   1) There are no pointers to other stack locations in the frame
 591 //      (d.fn points at a global, d.link is nil, d.argp is -1).
 592 //   2) The only pointer into this frame is from the defer chain,
 593 //      which is explicitly handled during stack copying.
 594 void
 595 runtime_main(void* dummy __attribute__((unused)))
 596 {
 597         Defer d;
 598         _Bool frame;
 599
 600         newm(sysmon, nil);
 601
 602         // Lock the main goroutine onto this, the main OS thread,
 603         // during initialization.  Most programs won't care, but a few
 604         // do require certain calls to be made by the main thread.
 605         // Those can arrange for main.main to run in the main thread
 606         // by calling runtime.LockOSThread during initialization
 607         // to preserve the lock.
 608         runtime_lockOSThread();
 609
 610         // Defer unlock so that runtime.Goexit during init does the unlock too.
 611         d.pfn = (uintptr)(void*)initDone;
 612         d.next = g->_defer;
 613         d.arg = (void*)-1;
 614         d._panic = g->_panic;
 615         d.retaddr = 0;
 616         d.makefunccanrecover = 0;
 617         d.frame = &frame;
 618         d.special = true;
 619         g->_defer = &d;
 620
 621         if(g->m != &runtime_m0)
 622                 runtime_throw("runtime_main not on m0");
 623         __go_go(runtime_MHeap_Scavenger, nil);
 624
 625         runtime_main_init_done = makechan(&chan_bool_type_descriptor, 0);
 626
 627         _cgo_notify_runtime_init_done();
 628
 629         main_init();
 630
 631         closechan(runtime_main_init_done);
 632
 633         if(g->_defer != &d || (void*)d.pfn != initDone)
 634                 runtime_throw("runtime: bad defer entry after init");
 635         g->_defer = d.next;
 636         runtime_unlockOSThread();
 637
 638         // For gccgo we have to wait until after main is initialized
 639         // to enable GC, because initializing main registers the GC
 640         // roots.
 641         mstats()->enablegc = 1;
 642
 643         if(runtime_isarchive) {
 644                 // This is not a complete program, but is instead a
 645                 // library built using -buildmode=c-archive or
 646                 // c-shared.  Now that we are initialized, there is
 647                 // nothing further to do.
 648                 return;
 649         }
 650
 651         main_main();
 652
 653         // Make racy client program work: if panicking on
 654         // another goroutine at the same time as main returns,
 655         // let the other goroutine finish printing the panic trace.
 656         // Once it does, it will exit. See issue 3934.
 657         if(runtime_panicking)
 658                 runtime_park(nil, nil, "panicwait");
 659
 660         runtime_exit(0);
 661         for(;;)
 662                 *(int32*)0 = 0;
 663 }
 664
 665 void
 666 runtime_tracebackothers(G * volatile me)
 667 {
 668         G * volatile gp;
 669         Traceback tb;
 670         int32 traceback;
 671         Slice slice;
 672         volatile uintptr i;
 673
 674         tb.gp = me;
 675         traceback = runtime_gotraceback(nil);
 676
 677         // Show the current goroutine first, if we haven't already.
 678         if((gp = g->m->curg) != nil && gp != me) {
 679                 runtime_printf("\n");
 680                 runtime_goroutineheader(gp);
 681                 gp->traceback = &tb;
 682
 683 #ifdef USING_SPLIT_STACK
 684                 __splitstack_getcontext(&me->stackcontext[0]);
 685 #endif
 686                 getcontext(ucontext_arg(&me->context[0]));
 687
 688                 if(gp->traceback != nil) {
 689                   runtime_gogo(gp);
 690                 }
 691
 692                 slice.__values = &tb.locbuf[0];
 693                 slice.__count = tb.c;
 694                 slice.__capacity = tb.c;
 695                 runtime_printtrace(slice, nil);
 696                 runtime_printcreatedby(gp);
 697         }
 698
 699         runtime_lock(&allglock);
 700         for(i = 0; i < runtime_allglen; i++) {
 701                 gp = runtime_allg[i];
 702                 if(gp == me || gp == g->m->curg || gp->atomicstatus == _Gdead)
 703                         continue;
 704                 if(gp->issystem && traceback < 2)
 705                         continue;
 706                 runtime_printf("\n");
 707                 runtime_goroutineheader(gp);
 708
 709                 // Our only mechanism for doing a stack trace is
 710                 // _Unwind_Backtrace.  And that only works for the
 711                 // current thread, not for other random goroutines.
 712                 // So we need to switch context to the goroutine, get
 713                 // the backtrace, and then switch back.
 714
 715                 // This means that if g is running or in a syscall, we
 716                 // can't reliably print a stack trace.  FIXME.
 717
 718                 if(gp->atomicstatus == _Grunning) {
 719                         runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
 720                         runtime_printcreatedby(gp);
 721                 } else if(gp->atomicstatus == _Gsyscall) {
 722                         runtime_printf("\tgoroutine in C code; stack unavailable\n");
 723                         runtime_printcreatedby(gp);
 724                 } else {
 725                         gp->traceback = &tb;
 726
 727 #ifdef USING_SPLIT_STACK
 728                         __splitstack_getcontext(&me->stackcontext[0]);
 729 #endif
 730                         getcontext(ucontext_arg(&me->context[0]));
 731
 732                         if(gp->traceback != nil) {
 733                                 runtime_gogo(gp);
 734                         }
 735
 736                         slice.__values = &tb.locbuf[0];
 737                         slice.__count = tb.c;
 738                         slice.__capacity = tb.c;
 739                         runtime_printtrace(slice, nil);
 740                         runtime_printcreatedby(gp);
 741                 }
 742         }
 743         runtime_unlock(&allglock);
 744 }
 745
 746 static void
 747 checkmcount(void)
 748 {
 749         // sched lock is held
 750         if(runtime_sched.mcount > runtime_sched.maxmcount) {
 751                 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
 752                 runtime_throw("thread exhaustion");
 753         }
 754 }
 755
 756 // Do a stack trace of gp, and then restore the context to
 757 // gp->dotraceback.
 758
 759 static void
 760 gtraceback(G* gp)
 761 {
 762         Traceback* traceback;
 763
 764         traceback = gp->traceback;
 765         gp->traceback = nil;
 766         if(gp->m != nil)
 767                 runtime_throw("gtraceback: m is not nil");
 768         gp->m = traceback->gp->m;
 769         traceback->c = runtime_callers(1, traceback->locbuf,
 770                 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
 771         gp->m = nil;
 772         runtime_gogo(traceback->gp);
 773 }
 774
 775 static void
 776 mcommoninit(M *mp)
 777 {
 778         // If there is no mcache runtime_callers() will crash,
 779         // and we are most likely in sysmon thread so the stack is senseless anyway.
 780         if(g->m->mcache)
 781                 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
 782
 783         mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
 784
 785         runtime_lock(&runtime_sched);
 786         mp->id = runtime_sched.mcount++;
 787         checkmcount();
 788         runtime_mpreinit(mp);
 789
 790         // Add to runtime_allm so garbage collector doesn't free m
 791         // when it is just in a register or thread-local storage.
 792         mp->alllink = runtime_allm;
 793         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 794         // so we need to publish it safely.
 795         runtime_atomicstorep(&runtime_allm, mp);
 796         runtime_unlock(&runtime_sched);
 797 }
 798
 799 // Mark gp ready to run.
 800 void
 801 runtime_ready(G *gp)
 802 {
 803         // Mark runnable.
 804         g->m->locks++;  // disable preemption because it can be holding p in a local var
 805         if(gp->atomicstatus != _Gwaiting) {
 806                 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus);
 807                 runtime_throw("bad g->atomicstatus in ready");
 808         }
 809         gp->atomicstatus = _Grunnable;
 810         runqput((P*)g->m->p, gp);
 811         if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0)  // TODO: fast atomic
 812                 wakep();
 813         g->m->locks--;
 814 }
 815
 816 void goready(G*, int) __asm__ (GOSYM_PREFIX "runtime.goready");
 817
 818 void
 819 goready(G* gp, int traceskip __attribute__ ((unused)))
 820 {
 821         runtime_ready(gp);
 822 }
 823
 824 int32
 825 runtime_gcprocs(void)
 826 {
 827         int32 n;
 828
 829         // Figure out how many CPUs to use during GC.
 830         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 831         runtime_lock(&runtime_sched);
 832         n = runtime_gomaxprocs;
 833         if(n > runtime_ncpu)
 834                 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
 835         if(n > MaxGcproc)
 836                 n = MaxGcproc;
 837         if(n > runtime_sched.nmidle+1) // one M is currently running
 838                 n = runtime_sched.nmidle+1;
 839         runtime_unlock(&runtime_sched);
 840         return n;
 841 }
 842
 843 static bool
 844 needaddgcproc(void)
 845 {
 846         int32 n;
 847
 848         runtime_lock(&runtime_sched);
 849         n = runtime_gomaxprocs;
 850         if(n > runtime_ncpu)
 851                 n = runtime_ncpu;
 852         if(n > MaxGcproc)
 853                 n = MaxGcproc;
 854         n -= runtime_sched.nmidle+1; // one M is currently running
 855         runtime_unlock(&runtime_sched);
 856         return n > 0;
 857 }
 858
 859 void
 860 runtime_helpgc(int32 nproc)
 861 {
 862         M *mp;
 863         int32 n, pos;
 864
 865         runtime_lock(&runtime_sched);
 866         pos = 0;
 867         for(n = 1; n < nproc; n++) {  // one M is currently running
 868                 if(runtime_allp[pos]->mcache == g->m->mcache)
 869                         pos++;
 870                 mp = mget();
 871                 if(mp == nil)
 872                         runtime_throw("runtime_gcprocs inconsistency");
 873                 mp->helpgc = n;
 874                 mp->mcache = runtime_allp[pos]->mcache;
 875                 pos++;
 876                 runtime_notewakeup(&mp->park);
 877         }
 878         runtime_unlock(&runtime_sched);
 879 }
 880
 881 // Similar to stoptheworld but best-effort and can be called several times.
 882 // There is no reverse operation, used during crashing.
 883 // This function must not lock any mutexes.
 884 void
 885 runtime_freezetheworld(void)
 886 {
 887         int32 i;
 888
 889         if(runtime_gomaxprocs == 1)
 890                 return;
 891         // stopwait and preemption requests can be lost
 892         // due to races with concurrently executing threads,
 893         // so try several times
 894         for(i = 0; i < 5; i++) {
 895                 // this should tell the scheduler to not start any new goroutines
 896                 runtime_sched.stopwait = 0x7fffffff;
 897                 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
 898                 // this should stop running goroutines
 899                 if(!preemptall())
 900                         break;  // no running goroutines
 901                 runtime_usleep(1000);
 902         }
 903         // to be sure
 904         runtime_usleep(1000);
 905         preemptall();
 906         runtime_usleep(1000);
 907 }
 908
 909 void
 910 runtime_stopTheWorldWithSema(void)
 911 {
 912         int32 i;
 913         uint32 s;
 914         P *p;
 915         bool wait;
 916
 917         runtime_lock(&runtime_sched);
 918         runtime_sched.stopwait = runtime_gomaxprocs;
 919         runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
 920         preemptall();
 921         // stop current P
 922         ((P*)g->m->p)->status = _Pgcstop;
 923         runtime_sched.stopwait--;
 924         // try to retake all P's in _Psyscall status
 925         for(i = 0; i < runtime_gomaxprocs; i++) {
 926                 p = runtime_allp[i];
 927                 s = p->status;
 928                 if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
 929                         runtime_sched.stopwait--;
 930         }
 931         // stop idle P's
 932         while((p = pidleget()) != nil) {
 933                 p->status = _Pgcstop;
 934                 runtime_sched.stopwait--;
 935         }
 936         wait = runtime_sched.stopwait > 0;
 937         runtime_unlock(&runtime_sched);
 938
 939         // wait for remaining P's to stop voluntarily
 940         if(wait) {
 941                 runtime_notesleep(&runtime_sched.stopnote);
 942                 runtime_noteclear(&runtime_sched.stopnote);
 943         }
 944         if(runtime_sched.stopwait)
 945                 runtime_throw("stoptheworld: not stopped");
 946         for(i = 0; i < runtime_gomaxprocs; i++) {
 947                 p = runtime_allp[i];
 948                 if(p->status != _Pgcstop)
 949                         runtime_throw("stoptheworld: not stopped");
 950         }
 951 }
 952
 953 static void
 954 mhelpgc(void)
 955 {
 956         g->m->helpgc = -1;
 957 }
 958
 959 void
 960 runtime_startTheWorldWithSema(void)
 961 {
 962         P *p, *p1;
 963         M *mp;
 964         G *gp;
 965         bool add;
 966
 967         g->m->locks++;  // disable preemption because it can be holding p in a local var
 968         gp = runtime_netpoll(false);  // non-blocking
 969         injectglist(gp);
 970         add = needaddgcproc();
 971         runtime_lock(&runtime_sched);
 972         if(newprocs) {
 973                 procresize(newprocs);
 974                 newprocs = 0;
 975         } else
 976                 procresize(runtime_gomaxprocs);
 977         runtime_sched.gcwaiting = 0;
 978
 979         p1 = nil;
 980         while((p = pidleget()) != nil) {
 981                 // procresize() puts p's with work at the beginning of the list.
 982                 // Once we reach a p without a run queue, the rest don't have one either.
 983                 if(p->runqhead == p->runqtail) {
 984                         pidleput(p);
 985                         break;
 986                 }
 987                 p->m = (uintptr)mget();
 988                 p->link = (uintptr)p1;
 989                 p1 = p;
 990         }
 991         if(runtime_sched.sysmonwait) {
 992                 runtime_sched.sysmonwait = false;
 993                 runtime_notewakeup(&runtime_sched.sysmonnote);
 994         }
 995         runtime_unlock(&runtime_sched);
 996
 997         while(p1) {
 998                 p = p1;
 999                 p1 = (P*)p1->link;
1000                 if(p->m) {
1001                         mp = (M*)p->m;
1002                         p->m = 0;
1003                         if(mp->nextp)
1004                                 runtime_throw("startTheWorldWithSema: inconsistent mp->nextp");
1005                         mp->nextp = (uintptr)p;
1006                         runtime_notewakeup(&mp->park);
1007                 } else {
1008                         // Start M to run P.  Do not start another M below.
1009                         newm(nil, p);
1010                         add = false;
1011                 }
1012         }
1013
1014         if(add) {
1015                 // If GC could have used another helper proc, start one now,
1016                 // in the hope that it will be available next time.
1017                 // It would have been even better to start it before the collection,
1018                 // but doing so requires allocating memory, so it's tricky to
1019                 // coordinate.  This lazy approach works out in practice:
1020                 // we don't mind if the first couple gc rounds don't have quite
1021                 // the maximum number of procs.
1022                 newm(mhelpgc, nil);
1023         }
1024         g->m->locks--;
1025 }
1026
1027 // Called to start an M.
1028 void*
1029 runtime_mstart(void* mp)
1030 {
1031         M *m;
1032
1033         m = (M*)mp;
1034         g = m->g0;
1035         g->m = m;
1036
1037         initcontext();
1038
1039         g->entry = nil;
1040         g->param = nil;
1041
1042         // Record top of stack for use by mcall.
1043         // Once we call schedule we're never coming back,
1044         // so other calls can reuse this stack space.
1045 #ifdef USING_SPLIT_STACK
1046         __splitstack_getcontext(&g->stackcontext[0]);
1047 #else
1048         g->gcinitialsp = &mp;
1049         // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
1050         // is the top of the stack, not the bottom.
1051         g->gcstacksize = 0;
1052         g->gcnextsp = &mp;
1053 #endif
1054         getcontext(ucontext_arg(&g->context[0]));
1055
1056         if(g->entry != nil) {
1057                 // Got here from mcall.
1058                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1059                 G* gp = (G*)g->param;
1060                 pfn(gp);
1061                 *(int*)0x21 = 0x21;
1062         }
1063         runtime_minit();
1064
1065 #ifdef USING_SPLIT_STACK
1066         {
1067                 int dont_block_signals = 0;
1068                 __splitstack_block_signals(&dont_block_signals, nil);
1069         }
1070 #endif
1071
1072         // Install signal handlers; after minit so that minit can
1073         // prepare the thread to be able to handle the signals.
1074         if(m == &runtime_m0) {
1075                 if(runtime_iscgo && !runtime_cgoHasExtraM) {
1076                         runtime_cgoHasExtraM = true;
1077                         runtime_newextram();
1078                         runtime_needextram = 0;
1079                 }
1080                 runtime_initsig(false);
1081         }
1082
1083         if(m->mstartfn)
1084                 ((void (*)(void))m->mstartfn)();
1085
1086         if(m->helpgc) {
1087                 m->helpgc = 0;
1088                 stopm();
1089         } else if(m != &runtime_m0) {
1090                 acquirep((P*)m->nextp);
1091                 m->nextp = 0;
1092         }
1093         schedule();
1094
1095         // TODO(brainman): This point is never reached, because scheduler
1096         // does not release os threads at the moment. But once this path
1097         // is enabled, we must remove our seh here.
1098
1099         return nil;
1100 }
1101
1102 typedef struct CgoThreadStart CgoThreadStart;
1103 struct CgoThreadStart
1104 {
1105         M *m;
1106         G *g;
1107         uintptr *tls;
1108         void (*fn)(void);
1109 };
1110
1111 // Allocate a new m unassociated with any thread.
1112 // Can use p for allocation context if needed.
1113 M*
1114 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
1115 {
1116         M *mp;
1117
1118         g->m->locks++;  // disable GC because it can be called from sysmon
1119         if(g->m->p == 0)
1120                 acquirep(p);  // temporarily borrow p for mallocs in this function
1121 #if 0
1122         if(mtype == nil) {
1123                 Eface e;
1124                 runtime_gc_m_ptr(&e);
1125                 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1126         }
1127 #endif
1128
1129         mp = runtime_mal(sizeof *mp);
1130         mcommoninit(mp);
1131         mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1132         mp->g0->m = mp;
1133
1134         if(p == (P*)g->m->p)
1135                 releasep();
1136         g->m->locks--;
1137
1138         return mp;
1139 }
1140
1141 static G*
1142 allocg(void)
1143 {
1144         G *gp;
1145         // static Type *gtype;
1146
1147         // if(gtype == nil) {
1148         //      Eface e;
1149         //      runtime_gc_g_ptr(&e);
1150         //      gtype = ((PtrType*)e.__type_descriptor)->__element_type;
1151         // }
1152         // gp = runtime_cnew(gtype);
1153         gp = runtime_malloc(sizeof(G));
1154         return gp;
1155 }
1156
1157 static M* lockextra(bool nilokay);
1158 static void unlockextra(M*);
1159
1160 // needm is called when a cgo callback happens on a
1161 // thread without an m (a thread not created by Go).
1162 // In this case, needm is expected to find an m to use
1163 // and return with m, g initialized correctly.
1164 // Since m and g are not set now (likely nil, but see below)
1165 // needm is limited in what routines it can call. In particular
1166 // it can only call nosplit functions (textflag 7) and cannot
1167 // do any scheduling that requires an m.
1168 //
1169 // In order to avoid needing heavy lifting here, we adopt
1170 // the following strategy: there is a stack of available m's
1171 // that can be stolen. Using compare-and-swap
1172 // to pop from the stack has ABA races, so we simulate
1173 // a lock by doing an exchange (via casp) to steal the stack
1174 // head and replace the top pointer with MLOCKED (1).
1175 // This serves as a simple spin lock that we can use even
1176 // without an m. The thread that locks the stack in this way
1177 // unlocks the stack by storing a valid stack head pointer.
1178 //
1179 // In order to make sure that there is always an m structure
1180 // available to be stolen, we maintain the invariant that there
1181 // is always one more than needed. At the beginning of the
1182 // program (if cgo is in use) the list is seeded with a single m.
1183 // If needm finds that it has taken the last m off the list, its job
1184 // is - once it has installed its own m so that it can do things like
1185 // allocate memory - to create a spare m and put it on the list.
1186 //
1187 // Each of these extra m's also has a g0 and a curg that are
1188 // pressed into service as the scheduling stack and current
1189 // goroutine for the duration of the cgo callback.
1190 //
1191 // When the callback is done with the m, it calls dropm to
1192 // put the m back on the list.
1193 //
1194 // Unlike the gc toolchain, we start running on curg, since we are
1195 // just going to return and let the caller continue.
1196 void
1197 runtime_needm(void)
1198 {
1199         M *mp;
1200
1201         if(runtime_needextram) {
1202                 // Can happen if C/C++ code calls Go from a global ctor.
1203                 // Can not throw, because scheduler is not initialized yet.
1204                 int rv __attribute__((unused));
1205                 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
1206                         sizeof("fatal error: cgo callback before cgo call\n")-1);
1207                 runtime_exit(1);
1208         }
1209
1210         // Lock extra list, take head, unlock popped list.
1211         // nilokay=false is safe here because of the invariant above,
1212         // that the extra list always contains or will soon contain
1213         // at least one m.
1214         mp = lockextra(false);
1215
1216         // Set needextram when we've just emptied the list,
1217         // so that the eventual call into cgocallbackg will
1218         // allocate a new m for the extra list. We delay the
1219         // allocation until then so that it can be done
1220         // after exitsyscall makes sure it is okay to be
1221         // running at all (that is, there's no garbage collection
1222         // running right now).
1223         mp->needextram = mp->schedlink == 0;
1224         unlockextra((M*)mp->schedlink);
1225
1226         // Install g (= m->curg).
1227         runtime_setg(mp->curg);
1228
1229         // Initialize g's context as in mstart.
1230         initcontext();
1231         g->atomicstatus = _Gsyscall;
1232         g->entry = nil;
1233         g->param = nil;
1234 #ifdef USING_SPLIT_STACK
1235         __splitstack_getcontext(&g->stackcontext[0]);
1236 #else
1237         g->gcinitialsp = &mp;
1238         g->gcstack = nil;
1239         g->gcstacksize = 0;
1240         g->gcnextsp = &mp;
1241 #endif
1242         getcontext(ucontext_arg(&g->context[0]));
1243
1244         if(g->entry != nil) {
1245                 // Got here from mcall.
1246                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1247                 G* gp = (G*)g->param;
1248                 pfn(gp);
1249                 *(int*)0x22 = 0x22;
1250         }
1251
1252         // Initialize this thread to use the m.
1253         runtime_minit();
1254
1255 #ifdef USING_SPLIT_STACK
1256         {
1257                 int dont_block_signals = 0;
1258                 __splitstack_block_signals(&dont_block_signals, nil);
1259         }
1260 #endif
1261 }
1262
1263 // newextram allocates an m and puts it on the extra list.
1264 // It is called with a working local m, so that it can do things
1265 // like call schedlock and allocate.
1266 void
1267 runtime_newextram(void)
1268 {
1269         M *mp, *mnext;
1270         G *gp;
1271         byte *g0_sp, *sp;
1272         uintptr g0_spsize, spsize;
1273         ucontext_t *uc;
1274
1275         // Create extra goroutine locked to extra m.
1276         // The goroutine is the context in which the cgo callback will run.
1277         // The sched.pc will never be returned to, but setting it to
1278         // runtime.goexit makes clear to the traceback routines where
1279         // the goroutine stack ends.
1280         mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1281         gp = runtime_malg(StackMin, &sp, &spsize);
1282         gp->atomicstatus = _Gdead;
1283         gp->m = mp;
1284         mp->curg = gp;
1285         mp->locked = _LockInternal;
1286         mp->lockedg = gp;
1287         gp->lockedm = mp;
1288         gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1289         // put on allg for garbage collector
1290         allgadd(gp);
1291
1292         // The context for gp will be set up in runtime_needm.  But
1293         // here we need to set up the context for g0.
1294         uc = ucontext_arg(&mp->g0->context[0]);
1295         getcontext(uc);
1296         uc->uc_stack.ss_sp = g0_sp;
1297         uc->uc_stack.ss_size = (size_t)g0_spsize;
1298         makecontext(uc, kickoff, 0);
1299
1300         // Add m to the extra list.
1301         mnext = lockextra(true);
1302         mp->schedlink = (uintptr)mnext;
1303         unlockextra(mp);
1304 }
1305
1306 // dropm is called when a cgo callback has called needm but is now
1307 // done with the callback and returning back into the non-Go thread.
1308 // It puts the current m back onto the extra list.
1309 //
1310 // The main expense here is the call to signalstack to release the
1311 // m's signal stack, and then the call to needm on the next callback
1312 // from this thread. It is tempting to try to save the m for next time,
1313 // which would eliminate both these costs, but there might not be
1314 // a next time: the current thread (which Go does not control) might exit.
1315 // If we saved the m for that thread, there would be an m leak each time
1316 // such a thread exited. Instead, we acquire and release an m on each
1317 // call. These should typically not be scheduling operations, just a few
1318 // atomics, so the cost should be small.
1319 //
1320 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1321 // variable using pthread_key_create. Unlike the pthread keys we already use
1322 // on OS X, this dummy key would never be read by Go code. It would exist
1323 // only so that we could register at thread-exit-time destructor.
1324 // That destructor would put the m back onto the extra list.
1325 // This is purely a performance optimization. The current version,
1326 // in which dropm happens on each cgo call, is still correct too.
1327 // We may have to keep the current version on systems with cgo
1328 // but without pthreads, like Windows.
1329 void
1330 runtime_dropm(void)
1331 {
1332         M *mp, *mnext;
1333
1334         // Undo whatever initialization minit did during needm.
1335         runtime_unminit();
1336
1337         // Clear m and g, and return m to the extra list.
1338         // After the call to setg we can only call nosplit functions.
1339         mp = g->m;
1340         runtime_setg(nil);
1341
1342         mp->curg->atomicstatus = _Gdead;
1343         mp->curg->gcstack = nil;
1344         mp->curg->gcnextsp = nil;
1345
1346         mnext = lockextra(true);
1347         mp->schedlink = (uintptr)mnext;
1348         unlockextra(mp);
1349 }
1350
1351 #define MLOCKED ((M*)1)
1352
1353 // lockextra locks the extra list and returns the list head.
1354 // The caller must unlock the list by storing a new list head
1355 // to runtime.extram. If nilokay is true, then lockextra will
1356 // return a nil list head if that's what it finds. If nilokay is false,
1357 // lockextra will keep waiting until the list head is no longer nil.
1358 static M*
1359 lockextra(bool nilokay)
1360 {
1361         M *mp;
1362         void (*yield)(void);
1363
1364         for(;;) {
1365                 mp = runtime_atomicloadp(&runtime_extram);
1366                 if(mp == MLOCKED) {
1367                         yield = runtime_osyield;
1368                         yield();
1369                         continue;
1370                 }
1371                 if(mp == nil && !nilokay) {
1372                         runtime_usleep(1);
1373                         continue;
1374                 }
1375                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1376                         yield = runtime_osyield;
1377                         yield();
1378                         continue;
1379                 }
1380                 break;
1381         }
1382         return mp;
1383 }
1384
1385 static void
1386 unlockextra(M *mp)
1387 {
1388         runtime_atomicstorep(&runtime_extram, mp);
1389 }
1390
1391 static int32
1392 countextra()
1393 {
1394         M *mp, *mc;
1395         int32 c;
1396
1397         for(;;) {
1398                 mp = runtime_atomicloadp(&runtime_extram);
1399                 if(mp == MLOCKED) {
1400                         runtime_osyield();
1401                         continue;
1402                 }
1403                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1404                         runtime_osyield();
1405                         continue;
1406                 }
1407                 c = 0;
1408                 for(mc = mp; mc != nil; mc = (M*)mc->schedlink)
1409                         c++;
1410                 runtime_atomicstorep(&runtime_extram, mp);
1411                 return c;
1412         }
1413 }
1414
1415 // Create a new m.  It will start off with a call to fn, or else the scheduler.
1416 static void
1417 newm(void(*fn)(void), P *p)
1418 {
1419         M *mp;
1420
1421         mp = runtime_allocm(p, -1, nil, nil);
1422         mp->nextp = (uintptr)p;
1423         mp->mstartfn = (uintptr)(void*)fn;
1424
1425         runtime_newosproc(mp);
1426 }
1427
1428 // Stops execution of the current m until new work is available.
1429 // Returns with acquired P.
1430 static void
1431 stopm(void)
1432 {
1433         M* m;
1434
1435         m = g->m;
1436         if(m->locks)
1437                 runtime_throw("stopm holding locks");
1438         if(m->p)
1439                 runtime_throw("stopm holding p");
1440         if(m->spinning) {
1441                 m->spinning = false;
1442                 runtime_xadd(&runtime_sched.nmspinning, -1);
1443         }
1444
1445 retry:
1446         runtime_lock(&runtime_sched);
1447         mput(m);
1448         runtime_unlock(&runtime_sched);
1449         runtime_notesleep(&m->park);
1450         m = g->m;
1451         runtime_noteclear(&m->park);
1452         if(m->helpgc) {
1453                 runtime_gchelper();
1454                 m->helpgc = 0;
1455                 m->mcache = nil;
1456                 goto retry;
1457         }
1458         acquirep((P*)m->nextp);
1459         m->nextp = 0;
1460 }
1461
1462 static void
1463 mspinning(void)
1464 {
1465         g->m->spinning = true;
1466 }
1467
1468 // Schedules some M to run the p (creates an M if necessary).
1469 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1470 static void
1471 startm(P *p, bool spinning)
1472 {
1473         M *mp;
1474         void (*fn)(void);
1475
1476         runtime_lock(&runtime_sched);
1477         if(p == nil) {
1478                 p = pidleget();
1479                 if(p == nil) {
1480                         runtime_unlock(&runtime_sched);
1481                         if(spinning)
1482                                 runtime_xadd(&runtime_sched.nmspinning, -1);
1483                         return;
1484                 }
1485         }
1486         mp = mget();
1487         runtime_unlock(&runtime_sched);
1488         if(mp == nil) {
1489                 fn = nil;
1490                 if(spinning)
1491                         fn = mspinning;
1492                 newm(fn, p);
1493                 return;
1494         }
1495         if(mp->spinning)
1496                 runtime_throw("startm: m is spinning");
1497         if(mp->nextp)
1498                 runtime_throw("startm: m has p");
1499         mp->spinning = spinning;
1500         mp->nextp = (uintptr)p;
1501         runtime_notewakeup(&mp->park);
1502 }
1503
1504 // Hands off P from syscall or locked M.
1505 static void
1506 handoffp(P *p)
1507 {
1508         // if it has local work, start it straight away
1509         if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
1510                 startm(p, false);
1511                 return;
1512         }
1513         // no local work, check that there are no spinning/idle M's,
1514         // otherwise our help is not required
1515         if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 &&  // TODO: fast atomic
1516                 runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
1517                 startm(p, true);
1518                 return;
1519         }
1520         runtime_lock(&runtime_sched);
1521         if(runtime_sched.gcwaiting) {
1522                 p->status = _Pgcstop;
1523                 if(--runtime_sched.stopwait == 0)
1524                         runtime_notewakeup(&runtime_sched.stopnote);
1525                 runtime_unlock(&runtime_sched);
1526                 return;
1527         }
1528         if(runtime_sched.runqsize) {
1529                 runtime_unlock(&runtime_sched);
1530                 startm(p, false);
1531                 return;
1532         }
1533         // If this is the last running P and nobody is polling network,
1534         // need to wakeup another M to poll network.
1535         if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
1536                 runtime_unlock(&runtime_sched);
1537                 startm(p, false);
1538                 return;
1539         }
1540         pidleput(p);
1541         runtime_unlock(&runtime_sched);
1542 }
1543
1544 // Tries to add one more P to execute G's.
1545 // Called when a G is made runnable (newproc, ready).
1546 static void
1547 wakep(void)
1548 {
1549         // be conservative about spinning threads
1550         if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
1551                 return;
1552         startm(nil, true);
1553 }
1554
1555 // Stops execution of the current m that is locked to a g until the g is runnable again.
1556 // Returns with acquired P.
1557 static void
1558 stoplockedm(void)
1559 {
1560         M *m;
1561         P *p;
1562
1563         m = g->m;
1564         if(m->lockedg == nil || m->lockedg->lockedm != m)
1565                 runtime_throw("stoplockedm: inconsistent locking");
1566         if(m->p) {
1567                 // Schedule another M to run this p.
1568                 p = releasep();
1569                 handoffp(p);
1570         }
1571         incidlelocked(1);
1572         // Wait until another thread schedules lockedg again.
1573         runtime_notesleep(&m->park);
1574         m = g->m;
1575         runtime_noteclear(&m->park);
1576         if(m->lockedg->atomicstatus != _Grunnable)
1577                 runtime_throw("stoplockedm: not runnable");
1578         acquirep((P*)m->nextp);
1579         m->nextp = 0;
1580 }
1581
1582 // Schedules the locked m to run the locked gp.
1583 static void
1584 startlockedm(G *gp)
1585 {
1586         M *mp;
1587         P *p;
1588
1589         mp = gp->lockedm;
1590         if(mp == g->m)
1591                 runtime_throw("startlockedm: locked to me");
1592         if(mp->nextp)
1593                 runtime_throw("startlockedm: m has p");
1594         // directly handoff current P to the locked m
1595         incidlelocked(-1);
1596         p = releasep();
1597         mp->nextp = (uintptr)p;
1598         runtime_notewakeup(&mp->park);
1599         stopm();
1600 }
1601
1602 // Stops the current m for stoptheworld.
1603 // Returns when the world is restarted.
1604 static void
1605 gcstopm(void)
1606 {
1607         P *p;
1608
1609         if(!runtime_sched.gcwaiting)
1610                 runtime_throw("gcstopm: not waiting for gc");
1611         if(g->m->spinning) {
1612                 g->m->spinning = false;
1613                 runtime_xadd(&runtime_sched.nmspinning, -1);
1614         }
1615         p = releasep();
1616         runtime_lock(&runtime_sched);
1617         p->status = _Pgcstop;
1618         if(--runtime_sched.stopwait == 0)
1619                 runtime_notewakeup(&runtime_sched.stopnote);
1620         runtime_unlock(&runtime_sched);
1621         stopm();
1622 }
1623
1624 // Schedules gp to run on the current M.
1625 // Never returns.
1626 static void
1627 execute(G *gp)
1628 {
1629         int32 hz;
1630
1631         if(gp->atomicstatus != _Grunnable) {
1632                 runtime_printf("execute: bad g status %d\n", gp->atomicstatus);
1633                 runtime_throw("execute: bad g status");
1634         }
1635         gp->atomicstatus = _Grunning;
1636         gp->waitsince = 0;
1637         ((P*)g->m->p)->schedtick++;
1638         g->m->curg = gp;
1639         gp->m = g->m;
1640
1641         // Check whether the profiler needs to be turned on or off.
1642         hz = runtime_sched.profilehz;
1643         if(g->m->profilehz != hz)
1644                 runtime_resetcpuprofiler(hz);
1645
1646         runtime_gogo(gp);
1647 }
1648
1649 // Finds a runnable goroutine to execute.
1650 // Tries to steal from other P's, get g from global queue, poll network.
1651 static G*
1652 findrunnable(void)
1653 {
1654         G *gp;
1655         P *p;
1656         int32 i;
1657
1658 top:
1659         if(runtime_sched.gcwaiting) {
1660                 gcstopm();
1661                 goto top;
1662         }
1663         if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1664                 runtime_ready(gp);
1665         // local runq
1666         gp = runqget((P*)g->m->p);
1667         if(gp)
1668                 return gp;
1669         // global runq
1670         if(runtime_sched.runqsize) {
1671                 runtime_lock(&runtime_sched);
1672                 gp = globrunqget((P*)g->m->p, 0);
1673                 runtime_unlock(&runtime_sched);
1674                 if(gp)
1675                         return gp;
1676         }
1677         // poll network
1678         gp = runtime_netpoll(false);  // non-blocking
1679         if(gp) {
1680                 injectglist((G*)gp->schedlink);
1681                 gp->atomicstatus = _Grunnable;
1682                 return gp;
1683         }
1684         // If number of spinning M's >= number of busy P's, block.
1685         // This is necessary to prevent excessive CPU consumption
1686         // when GOMAXPROCS>>1 but the program parallelism is low.
1687         if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle))  // TODO: fast atomic
1688                 goto stop;
1689         if(!g->m->spinning) {
1690                 g->m->spinning = true;
1691                 runtime_xadd(&runtime_sched.nmspinning, 1);
1692         }
1693         // random steal from other P's
1694         for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1695                 if(runtime_sched.gcwaiting)
1696                         goto top;
1697                 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1698                 if(p == (P*)g->m->p)
1699                         gp = runqget(p);
1700                 else
1701                         gp = runqsteal((P*)g->m->p, p);
1702                 if(gp)
1703                         return gp;
1704         }
1705 stop:
1706         // return P and block
1707         runtime_lock(&runtime_sched);
1708         if(runtime_sched.gcwaiting) {
1709                 runtime_unlock(&runtime_sched);
1710                 goto top;
1711         }
1712         if(runtime_sched.runqsize) {
1713                 gp = globrunqget((P*)g->m->p, 0);
1714                 runtime_unlock(&runtime_sched);
1715                 return gp;
1716         }
1717         p = releasep();
1718         pidleput(p);
1719         runtime_unlock(&runtime_sched);
1720         if(g->m->spinning) {
1721                 g->m->spinning = false;
1722                 runtime_xadd(&runtime_sched.nmspinning, -1);
1723         }
1724         // check all runqueues once again
1725         for(i = 0; i < runtime_gomaxprocs; i++) {
1726                 p = runtime_allp[i];
1727                 if(p && p->runqhead != p->runqtail) {
1728                         runtime_lock(&runtime_sched);
1729                         p = pidleget();
1730                         runtime_unlock(&runtime_sched);
1731                         if(p) {
1732                                 acquirep(p);
1733                                 goto top;
1734                         }
1735                         break;
1736                 }
1737         }
1738         // poll network
1739         if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
1740                 if(g->m->p)
1741                         runtime_throw("findrunnable: netpoll with p");
1742                 if(g->m->spinning)
1743                         runtime_throw("findrunnable: netpoll with spinning");
1744                 gp = runtime_netpoll(true);  // block until new work is available
1745                 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
1746                 if(gp) {
1747                         runtime_lock(&runtime_sched);
1748                         p = pidleget();
1749                         runtime_unlock(&runtime_sched);
1750                         if(p) {
1751                                 acquirep(p);
1752                                 injectglist((G*)gp->schedlink);
1753                                 gp->atomicstatus = _Grunnable;
1754                                 return gp;
1755                         }
1756                         injectglist(gp);
1757                 }
1758         }
1759         stopm();
1760         goto top;
1761 }
1762
1763 static void
1764 resetspinning(void)
1765 {
1766         int32 nmspinning;
1767
1768         if(g->m->spinning) {
1769                 g->m->spinning = false;
1770                 nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
1771                 if(nmspinning < 0)
1772                         runtime_throw("findrunnable: negative nmspinning");
1773         } else
1774                 nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
1775
1776         // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1777         // so see if we need to wakeup another P here.
1778         if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
1779                 wakep();
1780 }
1781
1782 // Injects the list of runnable G's into the scheduler.
1783 // Can run concurrently with GC.
1784 static void
1785 injectglist(G *glist)
1786 {
1787         int32 n;
1788         G *gp;
1789
1790         if(glist == nil)
1791                 return;
1792         runtime_lock(&runtime_sched);
1793         for(n = 0; glist; n++) {
1794                 gp = glist;
1795                 glist = (G*)gp->schedlink;
1796                 gp->atomicstatus = _Grunnable;
1797                 globrunqput(gp);
1798         }
1799         runtime_unlock(&runtime_sched);
1800
1801         for(; n && runtime_sched.npidle; n--)
1802                 startm(nil, false);
1803 }
1804
1805 // One round of scheduler: find a runnable goroutine and execute it.
1806 // Never returns.
1807 static void
1808 schedule(void)
1809 {
1810         G *gp;
1811         uint32 tick;
1812
1813         if(g->m->locks)
1814                 runtime_throw("schedule: holding locks");
1815
1816 top:
1817         if(runtime_sched.gcwaiting) {
1818                 gcstopm();
1819                 goto top;
1820         }
1821
1822         gp = nil;
1823         // Check the global runnable queue once in a while to ensure fairness.
1824         // Otherwise two goroutines can completely occupy the local runqueue
1825         // by constantly respawning each other.
1826         tick = ((P*)g->m->p)->schedtick;
1827         // This is a fancy way to say tick%61==0,
1828         // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1829         if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
1830                 runtime_lock(&runtime_sched);
1831                 gp = globrunqget((P*)g->m->p, 1);
1832                 runtime_unlock(&runtime_sched);
1833                 if(gp)
1834                         resetspinning();
1835         }
1836         if(gp == nil) {
1837                 gp = runqget((P*)g->m->p);
1838                 if(gp && g->m->spinning)
1839                         runtime_throw("schedule: spinning with local work");
1840         }
1841         if(gp == nil) {
1842                 gp = findrunnable();  // blocks until work is available
1843                 resetspinning();
1844         }
1845
1846         if(gp->lockedm) {
1847                 // Hands off own p to the locked m,
1848                 // then blocks waiting for a new p.
1849                 startlockedm(gp);
1850                 goto top;
1851         }
1852
1853         execute(gp);
1854 }
1855
1856 // Puts the current goroutine into a waiting state and calls unlockf.
1857 // If unlockf returns false, the goroutine is resumed.
1858 void
1859 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1860 {
1861         if(g->atomicstatus != _Grunning)
1862                 runtime_throw("bad g status");
1863         g->m->waitlock = lock;
1864         g->m->waitunlockf = unlockf;
1865         g->waitreason = runtime_gostringnocopy((const byte*)reason);
1866         runtime_mcall(park0);
1867 }
1868
1869 void gopark(FuncVal *, void *, String, byte, int)
1870   __asm__ ("runtime.gopark");
1871
1872 void
1873 gopark(FuncVal *unlockf, void *lock, String reason,
1874        byte traceEv __attribute__ ((unused)),
1875        int traceskip __attribute__ ((unused)))
1876 {
1877         if(g->atomicstatus != _Grunning)
1878                 runtime_throw("bad g status");
1879         g->m->waitlock = lock;
1880         g->m->waitunlockf = unlockf == nil ? nil : (void*)unlockf->fn;
1881         g->waitreason = reason;
1882         runtime_mcall(park0);
1883 }
1884
1885 static bool
1886 parkunlock(G *gp, void *lock)
1887 {
1888         USED(gp);
1889         runtime_unlock(lock);
1890         return true;
1891 }
1892
1893 // Puts the current goroutine into a waiting state and unlocks the lock.
1894 // The goroutine can be made runnable again by calling runtime_ready(gp).
1895 void
1896 runtime_parkunlock(Lock *lock, const char *reason)
1897 {
1898         runtime_park(parkunlock, lock, reason);
1899 }
1900
1901 void goparkunlock(Lock *, String, byte, int)
1902   __asm__ (GOSYM_PREFIX "runtime.goparkunlock");
1903
1904 void
1905 goparkunlock(Lock *lock, String reason, byte traceEv __attribute__ ((unused)),
1906              int traceskip __attribute__ ((unused)))
1907 {
1908         if(g->atomicstatus != _Grunning)
1909                 runtime_throw("bad g status");
1910         g->m->waitlock = lock;
1911         g->m->waitunlockf = parkunlock;
1912         g->waitreason = reason;
1913         runtime_mcall(park0);
1914 }
1915
1916 // runtime_park continuation on g0.
1917 static void
1918 park0(G *gp)
1919 {
1920         M *m;
1921         bool ok;
1922
1923         m = g->m;
1924         gp->atomicstatus = _Gwaiting;
1925         gp->m = nil;
1926         m->curg = nil;
1927         if(m->waitunlockf) {
1928                 ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock);
1929                 m->waitunlockf = nil;
1930                 m->waitlock = nil;
1931                 if(!ok) {
1932                         gp->atomicstatus = _Grunnable;
1933                         execute(gp);  // Schedule it back, never returns.
1934                 }
1935         }
1936         if(m->lockedg) {
1937                 stoplockedm();
1938                 execute(gp);  // Never returns.
1939         }
1940         schedule();
1941 }
1942
1943 // Scheduler yield.
1944 void
1945 runtime_gosched(void)
1946 {
1947         if(g->atomicstatus != _Grunning)
1948                 runtime_throw("bad g status");
1949         runtime_mcall(runtime_gosched0);
1950 }
1951
1952 // runtime_gosched continuation on g0.
1953 void
1954 runtime_gosched0(G *gp)
1955 {
1956         M *m;
1957
1958         m = g->m;
1959         gp->atomicstatus = _Grunnable;
1960         gp->m = nil;
1961         m->curg = nil;
1962         runtime_lock(&runtime_sched);
1963         globrunqput(gp);
1964         runtime_unlock(&runtime_sched);
1965         if(m->lockedg) {
1966                 stoplockedm();
1967                 execute(gp);  // Never returns.
1968         }
1969         schedule();
1970 }
1971
1972 // Finishes execution of the current goroutine.
1973 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1974 // Since it does not return it does not matter.  But if it is preempted
1975 // at the split stack check, GC will complain about inconsistent sp.
1976 void runtime_goexit(void) __attribute__ ((noinline));
1977 void
1978 runtime_goexit(void)
1979 {
1980         if(g->atomicstatus != _Grunning)
1981                 runtime_throw("bad g status");
1982         runtime_mcall(goexit0);
1983 }
1984
1985 // runtime_goexit continuation on g0.
1986 static void
1987 goexit0(G *gp)
1988 {
1989         M *m;
1990
1991         m = g->m;
1992         gp->atomicstatus = _Gdead;
1993         gp->entry = nil;
1994         gp->m = nil;
1995         gp->lockedm = nil;
1996         gp->paniconfault = 0;
1997         gp->_defer = nil; // should be true already but just in case.
1998         gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
1999         gp->writebuf.__values = nil;
2000         gp->writebuf.__count = 0;
2001         gp->writebuf.__capacity = 0;
2002         gp->waitreason = runtime_gostringnocopy(nil);
2003         gp->param = nil;
2004         m->curg = nil;
2005         m->lockedg = nil;
2006         if(m->locked & ~_LockExternal) {
2007                 runtime_printf("invalid m->locked = %d\n", m->locked);
2008                 runtime_throw("internal lockOSThread error");
2009         }
2010         m->locked = 0;
2011         gfput((P*)m->p, gp);
2012         schedule();
2013 }
2014
2015 // The goroutine g is about to enter a system call.
2016 // Record that it's not using the cpu anymore.
2017 // This is called only from the go syscall library and cgocall,
2018 // not from the low-level system calls used by the runtime.
2019 //
2020 // Entersyscall cannot split the stack: the runtime_gosave must
2021 // make g->sched refer to the caller's stack segment, because
2022 // entersyscall is going to return immediately after.
2023
2024 void runtime_entersyscall(int32) __attribute__ ((no_split_stack));
2025 static void doentersyscall(uintptr, uintptr)
2026   __attribute__ ((no_split_stack, noinline));
2027
2028 void
2029 runtime_entersyscall(int32 dummy __attribute__ ((unused)))
2030 {
2031         // Save the registers in the g structure so that any pointers
2032         // held in registers will be seen by the garbage collector.
2033         getcontext(ucontext_arg(&g->gcregs[0]));
2034
2035         // Do the work in a separate function, so that this function
2036         // doesn't save any registers on its own stack.  If this
2037         // function does save any registers, we might store the wrong
2038         // value in the call to getcontext.
2039         //
2040         // FIXME: This assumes that we do not need to save any
2041         // callee-saved registers to access the TLS variable g.  We
2042         // don't want to put the ucontext_t on the stack because it is
2043         // large and we can not split the stack here.
2044         doentersyscall((uintptr)runtime_getcallerpc(&dummy),
2045                        (uintptr)runtime_getcallersp(&dummy));
2046 }
2047
2048 static void
2049 doentersyscall(uintptr pc, uintptr sp)
2050 {
2051         // Disable preemption because during this function g is in _Gsyscall status,
2052         // but can have inconsistent g->sched, do not let GC observe it.
2053         g->m->locks++;
2054
2055         // Leave SP around for GC and traceback.
2056 #ifdef USING_SPLIT_STACK
2057         {
2058           size_t gcstacksize;
2059           g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2060                                          &g->gcnextsegment, &g->gcnextsp,
2061                                          &g->gcinitialsp);
2062           g->gcstacksize = (uintptr)gcstacksize;
2063         }
2064 #else
2065         {
2066                 void *v;
2067
2068                 g->gcnextsp = (byte *) &v;
2069         }
2070 #endif
2071
2072         g->syscallsp = sp;
2073         g->syscallpc = pc;
2074
2075         g->atomicstatus = _Gsyscall;
2076
2077         if(runtime_atomicload(&runtime_sched.sysmonwait)) {  // TODO: fast atomic
2078                 runtime_lock(&runtime_sched);
2079                 if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2080                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2081                         runtime_notewakeup(&runtime_sched.sysmonnote);
2082                 }
2083                 runtime_unlock(&runtime_sched);
2084         }
2085
2086         g->m->mcache = nil;
2087         ((P*)(g->m->p))->m = 0;
2088         runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
2089         if(runtime_atomicload(&runtime_sched.gcwaiting)) {
2090                 runtime_lock(&runtime_sched);
2091                 if (runtime_sched.stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
2092                         if(--runtime_sched.stopwait == 0)
2093                                 runtime_notewakeup(&runtime_sched.stopnote);
2094                 }
2095                 runtime_unlock(&runtime_sched);
2096         }
2097
2098         g->m->locks--;
2099 }
2100
2101 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
2102 void
2103 runtime_entersyscallblock(int32 dummy __attribute__ ((unused)))
2104 {
2105         P *p;
2106
2107         g->m->locks++;  // see comment in entersyscall
2108
2109         // Leave SP around for GC and traceback.
2110 #ifdef USING_SPLIT_STACK
2111         {
2112           size_t gcstacksize;
2113           g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2114                                          &g->gcnextsegment, &g->gcnextsp,
2115                                          &g->gcinitialsp);
2116           g->gcstacksize = (uintptr)gcstacksize;
2117         }
2118 #else
2119         g->gcnextsp = (byte *) &p;
2120 #endif
2121
2122         // Save the registers in the g structure so that any pointers
2123         // held in registers will be seen by the garbage collector.
2124         getcontext(ucontext_arg(&g->gcregs[0]));
2125
2126         g->syscallpc = (uintptr)runtime_getcallerpc(&dummy);
2127         g->syscallsp = (uintptr)runtime_getcallersp(&dummy);
2128
2129         g->atomicstatus = _Gsyscall;
2130
2131         p = releasep();
2132         handoffp(p);
2133         if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
2134                 incidlelocked(1);
2135
2136         g->m->locks--;
2137 }
2138
2139 // The goroutine g exited its system call.
2140 // Arrange for it to run on a cpu again.
2141 // This is called only from the go syscall library, not
2142 // from the low-level system calls used by the runtime.
2143 void
2144 runtime_exitsyscall(int32 dummy __attribute__ ((unused)))
2145 {
2146         G *gp;
2147
2148         gp = g;
2149         gp->m->locks++;  // see comment in entersyscall
2150
2151         if(gp->isbackground)  // do not consider blocked scavenger for deadlock detection
2152                 incidlelocked(-1);
2153
2154         gp->waitsince = 0;
2155         if(exitsyscallfast()) {
2156                 // There's a cpu for us, so we can run.
2157                 ((P*)gp->m->p)->syscalltick++;
2158                 gp->atomicstatus = _Grunning;
2159                 // Garbage collector isn't running (since we are),
2160                 // so okay to clear gcstack and gcsp.
2161 #ifdef USING_SPLIT_STACK
2162                 gp->gcstack = nil;
2163 #endif
2164                 gp->gcnextsp = nil;
2165                 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2166                 gp->syscallsp = 0;
2167                 gp->m->locks--;
2168                 return;
2169         }
2170
2171         gp->m->locks--;
2172
2173         // Call the scheduler.
2174         runtime_mcall(exitsyscall0);
2175
2176         // Scheduler returned, so we're allowed to run now.
2177         // Delete the gcstack information that we left for
2178         // the garbage collector during the system call.
2179         // Must wait until now because until gosched returns
2180         // we don't know for sure that the garbage collector
2181         // is not running.
2182 #ifdef USING_SPLIT_STACK
2183         gp->gcstack = nil;
2184 #endif
2185         gp->gcnextsp = nil;
2186         runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2187
2188         gp->syscallsp = 0;
2189
2190         // Note that this gp->m might be different than the earlier
2191         // gp->m after returning from runtime_mcall.
2192         ((P*)gp->m->p)->syscalltick++;
2193 }
2194
2195 static bool
2196 exitsyscallfast(void)
2197 {
2198         G *gp;
2199         P *p;
2200
2201         gp = g;
2202
2203         // Freezetheworld sets stopwait but does not retake P's.
2204         if(runtime_sched.stopwait) {
2205                 gp->m->p = 0;
2206                 return false;
2207         }
2208
2209         // Try to re-acquire the last P.
2210         if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) {
2211                 // There's a cpu for us, so we can run.
2212                 gp->m->mcache = ((P*)gp->m->p)->mcache;
2213                 ((P*)gp->m->p)->m = (uintptr)gp->m;
2214                 return true;
2215         }
2216         // Try to get any other idle P.
2217         gp->m->p = 0;
2218         if(runtime_sched.pidle) {
2219                 runtime_lock(&runtime_sched);
2220                 p = pidleget();
2221                 if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
2222                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2223                         runtime_notewakeup(&runtime_sched.sysmonnote);
2224                 }
2225                 runtime_unlock(&runtime_sched);
2226                 if(p) {
2227                         acquirep(p);
2228                         return true;
2229                 }
2230         }
2231         return false;
2232 }
2233
2234 // runtime_exitsyscall slow path on g0.
2235 // Failed to acquire P, enqueue gp as runnable.
2236 static void
2237 exitsyscall0(G *gp)
2238 {
2239         M *m;
2240         P *p;
2241
2242         m = g->m;
2243         gp->atomicstatus = _Grunnable;
2244         gp->m = nil;
2245         m->curg = nil;
2246         runtime_lock(&runtime_sched);
2247         p = pidleget();
2248         if(p == nil)
2249                 globrunqput(gp);
2250         else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2251                 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2252                 runtime_notewakeup(&runtime_sched.sysmonnote);
2253         }
2254         runtime_unlock(&runtime_sched);
2255         if(p) {
2256                 acquirep(p);
2257                 execute(gp);  // Never returns.
2258         }
2259         if(m->lockedg) {
2260                 // Wait until another thread schedules gp and so m again.
2261                 stoplockedm();
2262                 execute(gp);  // Never returns.
2263         }
2264         stopm();
2265         schedule();  // Never returns.
2266 }
2267
2268 void syscall_entersyscall(void)
2269   __asm__(GOSYM_PREFIX "syscall.Entersyscall");
2270
2271 void syscall_entersyscall(void) __attribute__ ((no_split_stack));
2272
2273 void
2274 syscall_entersyscall()
2275 {
2276   runtime_entersyscall(0);
2277 }
2278
2279 void syscall_exitsyscall(void)
2280   __asm__(GOSYM_PREFIX "syscall.Exitsyscall");
2281
2282 void syscall_exitsyscall(void) __attribute__ ((no_split_stack));
2283
2284 void
2285 syscall_exitsyscall()
2286 {
2287   runtime_exitsyscall(0);
2288 }
2289
2290 // Called from syscall package before fork.
2291 void syscall_runtime_BeforeFork(void)
2292   __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2293 void
2294 syscall_runtime_BeforeFork(void)
2295 {
2296         // Fork can hang if preempted with signals frequently enough (see issue 5517).
2297         // Ensure that we stay on the same M where we disable profiling.
2298         runtime_m()->locks++;
2299         if(runtime_m()->profilehz != 0)
2300                 runtime_resetcpuprofiler(0);
2301 }
2302
2303 // Called from syscall package after fork in parent.
2304 void syscall_runtime_AfterFork(void)
2305   __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2306 void
2307 syscall_runtime_AfterFork(void)
2308 {
2309         int32 hz;
2310
2311         hz = runtime_sched.profilehz;
2312         if(hz != 0)
2313                 runtime_resetcpuprofiler(hz);
2314         runtime_m()->locks--;
2315 }
2316
2317 // Allocate a new g, with a stack big enough for stacksize bytes.
2318 G*
2319 runtime_malg(int32 stacksize, byte** ret_stack, uintptr* ret_stacksize)
2320 {
2321         G *newg;
2322
2323         newg = allocg();
2324         if(stacksize >= 0) {
2325 #if USING_SPLIT_STACK
2326                 int dont_block_signals = 0;
2327                 size_t ss_stacksize;
2328
2329                 *ret_stack = __splitstack_makecontext(stacksize,
2330                                                       &newg->stackcontext[0],
2331                                                       &ss_stacksize);
2332                 *ret_stacksize = (uintptr)ss_stacksize;
2333                 __splitstack_block_signals_context(&newg->stackcontext[0],
2334                                                    &dont_block_signals, nil);
2335 #else
2336                 // In 64-bit mode, the maximum Go allocation space is
2337                 // 128G.  Our stack size is 4M, which only permits 32K
2338                 // goroutines.  In order to not limit ourselves,
2339                 // allocate the stacks out of separate memory.  In
2340                 // 32-bit mode, the Go allocation space is all of
2341                 // memory anyhow.
2342                 if(sizeof(void*) == 8) {
2343                         void *p = runtime_SysAlloc(stacksize, &mstats()->other_sys);
2344                         if(p == nil)
2345                                 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2346                         *ret_stack = (byte*)p;
2347                 } else {
2348                         *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2349                         runtime_xadd(&runtime_stacks_sys, stacksize);
2350                 }
2351                 *ret_stacksize = (uintptr)stacksize;
2352                 newg->gcinitialsp = *ret_stack;
2353                 newg->gcstacksize = (uintptr)stacksize;
2354 #endif
2355         }
2356         return newg;
2357 }
2358
2359 G*
2360 __go_go(void (*fn)(void*), void* arg)
2361 {
2362         byte *sp;
2363         size_t spsize;
2364         G *newg;
2365         P *p;
2366
2367 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2368         if(fn == nil) {
2369                 g->m->throwing = -1;  // do not dump full stacks
2370                 runtime_throw("go of nil func value");
2371         }
2372         g->m->locks++;  // disable preemption because it can be holding p in a local var
2373
2374         p = (P*)g->m->p;
2375         if((newg = gfget(p)) != nil) {
2376 #ifdef USING_SPLIT_STACK
2377                 int dont_block_signals = 0;
2378
2379                 sp = __splitstack_resetcontext(&newg->stackcontext[0],
2380                                                &spsize);
2381                 __splitstack_block_signals_context(&newg->stackcontext[0],
2382                                                    &dont_block_signals, nil);
2383 #else
2384                 sp = newg->gcinitialsp;
2385                 spsize = newg->gcstacksize;
2386                 if(spsize == 0)
2387                         runtime_throw("bad spsize in __go_go");
2388                 newg->gcnextsp = sp;
2389 #endif
2390         } else {
2391                 uintptr malsize;
2392
2393                 newg = runtime_malg(StackMin, &sp, &malsize);
2394                 spsize = (size_t)malsize;
2395                 allgadd(newg);
2396         }
2397
2398         newg->entry = (byte*)fn;
2399         newg->param = arg;
2400         newg->gopc = (uintptr)__builtin_return_address(0);
2401         newg->atomicstatus = _Grunnable;
2402         if(p->goidcache == p->goidcacheend) {
2403                 p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch);
2404                 p->goidcacheend = p->goidcache + GoidCacheBatch;
2405         }
2406         newg->goid = p->goidcache++;
2407
2408         {
2409                 // Avoid warnings about variables clobbered by
2410                 // longjmp.
2411                 byte * volatile vsp = sp;
2412                 size_t volatile vspsize = spsize;
2413                 G * volatile vnewg = newg;
2414                 ucontext_t * volatile uc;
2415
2416                 uc = ucontext_arg(&vnewg->context[0]);
2417                 getcontext(uc);
2418                 uc->uc_stack.ss_sp = vsp;
2419                 uc->uc_stack.ss_size = vspsize;
2420                 makecontext(uc, kickoff, 0);
2421
2422                 runqput(p, vnewg);
2423
2424                 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
2425                         wakep();
2426                 g->m->locks--;
2427                 return vnewg;
2428         }
2429 }
2430
2431 static void
2432 allgadd(G *gp)
2433 {
2434         G **new;
2435         uintptr cap;
2436
2437         runtime_lock(&allglock);
2438         if(runtime_allglen >= allgcap) {
2439                 cap = 4096/sizeof(new[0]);
2440                 if(cap < 2*allgcap)
2441                         cap = 2*allgcap;
2442                 new = runtime_malloc(cap*sizeof(new[0]));
2443                 if(new == nil)
2444                         runtime_throw("runtime: cannot allocate memory");
2445                 if(runtime_allg != nil) {
2446                         runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
2447                         runtime_free(runtime_allg);
2448                 }
2449                 runtime_allg = new;
2450                 allgcap = cap;
2451         }
2452         runtime_allg[runtime_allglen++] = gp;
2453         runtime_unlock(&allglock);
2454 }
2455
2456 // Put on gfree list.
2457 // If local list is too long, transfer a batch to the global list.
2458 static void
2459 gfput(P *p, G *gp)
2460 {
2461         gp->schedlink = (uintptr)p->gfree;
2462         p->gfree = gp;
2463         p->gfreecnt++;
2464         if(p->gfreecnt >= 64) {
2465                 runtime_lock(&runtime_sched.gflock);
2466                 while(p->gfreecnt >= 32) {
2467                         p->gfreecnt--;
2468                         gp = p->gfree;
2469                         p->gfree = (G*)gp->schedlink;
2470                         gp->schedlink = (uintptr)runtime_sched.gfree;
2471                         runtime_sched.gfree = gp;
2472                 }
2473                 runtime_unlock(&runtime_sched.gflock);
2474         }
2475 }
2476
2477 // Get from gfree list.
2478 // If local list is empty, grab a batch from global list.
2479 static G*
2480 gfget(P *p)
2481 {
2482         G *gp;
2483
2484 retry:
2485         gp = p->gfree;
2486         if(gp == nil && runtime_sched.gfree) {
2487                 runtime_lock(&runtime_sched.gflock);
2488                 while(p->gfreecnt < 32 && runtime_sched.gfree) {
2489                         p->gfreecnt++;
2490                         gp = runtime_sched.gfree;
2491                         runtime_sched.gfree = (G*)gp->schedlink;
2492                         gp->schedlink = (uintptr)p->gfree;
2493                         p->gfree = gp;
2494                 }
2495                 runtime_unlock(&runtime_sched.gflock);
2496                 goto retry;
2497         }
2498         if(gp) {
2499                 p->gfree = (G*)gp->schedlink;
2500                 p->gfreecnt--;
2501         }
2502         return gp;
2503 }
2504
2505 // Purge all cached G's from gfree list to the global list.
2506 static void
2507 gfpurge(P *p)
2508 {
2509         G *gp;
2510
2511         runtime_lock(&runtime_sched.gflock);
2512         while(p->gfreecnt) {
2513                 p->gfreecnt--;
2514                 gp = p->gfree;
2515                 p->gfree = (G*)gp->schedlink;
2516                 gp->schedlink = (uintptr)runtime_sched.gfree;
2517                 runtime_sched.gfree = gp;
2518         }
2519         runtime_unlock(&runtime_sched.gflock);
2520 }
2521
2522 void
2523 runtime_Breakpoint(void)
2524 {
2525         runtime_breakpoint();
2526 }
2527
2528 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2529
2530 void
2531 runtime_Gosched(void)
2532 {
2533         runtime_gosched();
2534 }
2535
2536 // Implementation of runtime.GOMAXPROCS.
2537 // delete when scheduler is even stronger
2538 int32
2539 runtime_gomaxprocsfunc(int32 n)
2540 {
2541         int32 ret;
2542
2543         if(n > _MaxGomaxprocs)
2544                 n = _MaxGomaxprocs;
2545         runtime_lock(&runtime_sched);
2546         ret = runtime_gomaxprocs;
2547         if(n <= 0 || n == ret) {
2548                 runtime_unlock(&runtime_sched);
2549                 return ret;
2550         }
2551         runtime_unlock(&runtime_sched);
2552
2553         runtime_acquireWorldsema();
2554         g->m->gcing = 1;
2555         runtime_stopTheWorldWithSema();
2556         newprocs = n;
2557         g->m->gcing = 0;
2558         runtime_releaseWorldsema();
2559         runtime_startTheWorldWithSema();
2560
2561         return ret;
2562 }
2563
2564 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2565 // after they modify m->locked. Do not allow preemption during this call,
2566 // or else the m might be different in this function than in the caller.
2567 static void
2568 lockOSThread(void)
2569 {
2570         g->m->lockedg = g;
2571         g->lockedm = g->m;
2572 }
2573
2574 void    runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2575 void
2576 runtime_LockOSThread(void)
2577 {
2578         g->m->locked |= _LockExternal;
2579         lockOSThread();
2580 }
2581
2582 void
2583 runtime_lockOSThread(void)
2584 {
2585         g->m->locked += _LockInternal;
2586         lockOSThread();
2587 }
2588
2589
2590 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2591 // after they update m->locked. Do not allow preemption during this call,
2592 // or else the m might be in different in this function than in the caller.
2593 static void
2594 unlockOSThread(void)
2595 {
2596         if(g->m->locked != 0)
2597                 return;
2598         g->m->lockedg = nil;
2599         g->lockedm = nil;
2600 }
2601
2602 void    runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2603
2604 void
2605 runtime_UnlockOSThread(void)
2606 {
2607         g->m->locked &= ~_LockExternal;
2608         unlockOSThread();
2609 }
2610
2611 void
2612 runtime_unlockOSThread(void)
2613 {
2614         if(g->m->locked < _LockInternal)
2615                 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2616         g->m->locked -= _LockInternal;
2617         unlockOSThread();
2618 }
2619
2620 bool
2621 runtime_lockedOSThread(void)
2622 {
2623         return g->lockedm != nil && g->m->lockedg != nil;
2624 }
2625
2626 int32
2627 runtime_gcount(void)
2628 {
2629         G *gp;
2630         int32 n, s;
2631         uintptr i;
2632
2633         n = 0;
2634         runtime_lock(&allglock);
2635         // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2636         // We do not want to increment/decrement centralized counter in newproc/goexit,
2637         // just to make runtime.NumGoroutine() faster.
2638         // Compromise solution is to introduce per-P counters of active goroutines.
2639         for(i = 0; i < runtime_allglen; i++) {
2640                 gp = runtime_allg[i];
2641                 s = gp->atomicstatus;
2642                 if(s == _Grunnable || s == _Grunning || s == _Gsyscall || s == _Gwaiting)
2643                         n++;
2644         }
2645         runtime_unlock(&allglock);
2646         return n;
2647 }
2648
2649 int32
2650 runtime_mcount(void)
2651 {
2652         return runtime_sched.mcount;
2653 }
2654
2655 static struct {
2656         uint32 lock;
2657         int32 hz;
2658 } prof;
2659
2660 static void System(void) {}
2661 static void GC(void) {}
2662
2663 // Called if we receive a SIGPROF signal.
2664 void
2665 runtime_sigprof()
2666 {
2667         M *mp = g->m;
2668         int32 n, i;
2669         bool traceback;
2670         uintptr pcbuf[TracebackMaxFrames];
2671         Location locbuf[TracebackMaxFrames];
2672         Slice stk;
2673
2674         if(prof.hz == 0)
2675                 return;
2676
2677         if(mp == nil)
2678                 return;
2679
2680         // Profiling runs concurrently with GC, so it must not allocate.
2681         mp->mallocing++;
2682
2683         traceback = true;
2684
2685         if(mp->mcache == nil)
2686                 traceback = false;
2687
2688         n = 0;
2689
2690         if(runtime_atomicload(&runtime_in_callers) > 0) {
2691                 // If SIGPROF arrived while already fetching runtime
2692                 // callers we can have trouble on older systems
2693                 // because the unwind library calls dl_iterate_phdr
2694                 // which was not recursive in the past.
2695                 traceback = false;
2696         }
2697
2698         if(traceback) {
2699                 n = runtime_callers(0, locbuf, nelem(locbuf), false);
2700                 for(i = 0; i < n; i++)
2701                         pcbuf[i] = locbuf[i].pc;
2702         }
2703         if(!traceback || n <= 0) {
2704                 n = 2;
2705                 pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2706                 if(mp->gcing || mp->helpgc)
2707                         pcbuf[1] = (uintptr)GC;
2708                 else
2709                         pcbuf[1] = (uintptr)System;
2710         }
2711
2712         if (prof.hz != 0) {
2713                 stk.__values = &pcbuf[0];
2714                 stk.__count = n;
2715                 stk.__capacity = n;
2716
2717                 // Simple cas-lock to coordinate with setcpuprofilerate.
2718                 while (!runtime_cas(&prof.lock, 0, 1)) {
2719                         runtime_osyield();
2720                 }
2721                 if (prof.hz != 0) {
2722                         runtime_cpuprofAdd(stk);
2723                 }
2724                 runtime_atomicstore(&prof.lock, 0);
2725         }
2726
2727         mp->mallocing--;
2728 }
2729
2730 // Arrange to call fn with a traceback hz times a second.
2731 void
2732 runtime_setcpuprofilerate_m(int32 hz)
2733 {
2734         // Force sane arguments.
2735         if(hz < 0)
2736                 hz = 0;
2737
2738         // Disable preemption, otherwise we can be rescheduled to another thread
2739         // that has profiling enabled.
2740         g->m->locks++;
2741
2742         // Stop profiler on this thread so that it is safe to lock prof.
2743         // if a profiling signal came in while we had prof locked,
2744         // it would deadlock.
2745         runtime_resetcpuprofiler(0);
2746
2747         while (!runtime_cas(&prof.lock, 0, 1)) {
2748                 runtime_osyield();
2749         }
2750         prof.hz = hz;
2751         runtime_atomicstore(&prof.lock, 0);
2752
2753         runtime_lock(&runtime_sched);
2754         runtime_sched.profilehz = hz;
2755         runtime_unlock(&runtime_sched);
2756
2757         if(hz != 0)
2758                 runtime_resetcpuprofiler(hz);
2759
2760         g->m->locks--;
2761 }
2762
2763 // Change number of processors.  The world is stopped, sched is locked.
2764 static void
2765 procresize(int32 new)
2766 {
2767         int32 i, old;
2768         bool pempty;
2769         G *gp;
2770         P *p;
2771
2772         old = runtime_gomaxprocs;
2773         if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs)
2774                 runtime_throw("procresize: invalid arg");
2775         // initialize new P's
2776         for(i = 0; i < new; i++) {
2777                 p = runtime_allp[i];
2778                 if(p == nil) {
2779                         p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2780                         p->id = i;
2781                         p->status = _Pgcstop;
2782                         runtime_atomicstorep(&runtime_allp[i], p);
2783                 }
2784                 if(p->mcache == nil) {
2785                         if(old==0 && i==0)
2786                                 p->mcache = g->m->mcache;  // bootstrap
2787                         else
2788                                 p->mcache = runtime_allocmcache();
2789                 }
2790         }
2791
2792         // redistribute runnable G's evenly
2793         // collect all runnable goroutines in global queue preserving FIFO order
2794         // FIFO order is required to ensure fairness even during frequent GCs
2795         // see http://golang.org/issue/7126
2796         pempty = false;
2797         while(!pempty) {
2798                 pempty = true;
2799                 for(i = 0; i < old; i++) {
2800                         p = runtime_allp[i];
2801                         if(p->runqhead == p->runqtail)
2802                                 continue;
2803                         pempty = false;
2804                         // pop from tail of local queue
2805                         p->runqtail--;
2806                         gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
2807                         // push onto head of global queue
2808                         gp->schedlink = (uintptr)runtime_sched.runqhead;
2809                         runtime_sched.runqhead = gp;
2810                         if(runtime_sched.runqtail == nil)
2811                                 runtime_sched.runqtail = gp;
2812                         runtime_sched.runqsize++;
2813                 }
2814         }
2815         // fill local queues with at most nelem(p->runq)/2 goroutines
2816         // start at 1 because current M already executes some G and will acquire allp[0] below,
2817         // so if we have a spare G we want to put it into allp[1].
2818         for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) {
2819                 gp = runtime_sched.runqhead;
2820                 runtime_sched.runqhead = (G*)gp->schedlink;
2821                 if(runtime_sched.runqhead == nil)
2822                         runtime_sched.runqtail = nil;
2823                 runtime_sched.runqsize--;
2824                 runqput(runtime_allp[i%new], gp);
2825         }
2826
2827         // free unused P's
2828         for(i = new; i < old; i++) {
2829                 p = runtime_allp[i];
2830                 runtime_freemcache(p->mcache);
2831                 p->mcache = nil;
2832                 gfpurge(p);
2833                 p->status = _Pdead;
2834                 // can't free P itself because it can be referenced by an M in syscall
2835         }
2836
2837         if(g->m->p)
2838                 ((P*)g->m->p)->m = 0;
2839         g->m->p = 0;
2840         g->m->mcache = nil;
2841         p = runtime_allp[0];
2842         p->m = 0;
2843         p->status = _Pidle;
2844         acquirep(p);
2845         for(i = new-1; i > 0; i--) {
2846                 p = runtime_allp[i];
2847                 p->status = _Pidle;
2848                 pidleput(p);
2849         }
2850         runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2851 }
2852
2853 // Associate p and the current m.
2854 static void
2855 acquirep(P *p)
2856 {
2857         M *m;
2858
2859         m = g->m;
2860         if(m->p || m->mcache)
2861                 runtime_throw("acquirep: already in go");
2862         if(p->m || p->status != _Pidle) {
2863                 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status);
2864                 runtime_throw("acquirep: invalid p state");
2865         }
2866         m->mcache = p->mcache;
2867         m->p = (uintptr)p;
2868         p->m = (uintptr)m;
2869         p->status = _Prunning;
2870 }
2871
2872 // Disassociate p and the current m.
2873 static P*
2874 releasep(void)
2875 {
2876         M *m;
2877         P *p;
2878
2879         m = g->m;
2880         if(m->p == 0 || m->mcache == nil)
2881                 runtime_throw("releasep: invalid arg");
2882         p = (P*)m->p;
2883         if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) {
2884                 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2885                         m, m->p, p->m, m->mcache, p->mcache, p->status);
2886                 runtime_throw("releasep: invalid p state");
2887         }
2888         m->p = 0;
2889         m->mcache = nil;
2890         p->m = 0;
2891         p->status = _Pidle;
2892         return p;
2893 }
2894
2895 static void
2896 incidlelocked(int32 v)
2897 {
2898         runtime_lock(&runtime_sched);
2899         runtime_sched.nmidlelocked += v;
2900         if(v > 0)
2901                 checkdead();
2902         runtime_unlock(&runtime_sched);
2903 }
2904
2905 // Check for deadlock situation.
2906 // The check is based on number of running M's, if 0 -> deadlock.
2907 static void
2908 checkdead(void)
2909 {
2910         G *gp;
2911         int32 run, grunning, s;
2912         uintptr i;
2913
2914         // For -buildmode=c-shared or -buildmode=c-archive it's OK if
2915         // there are no running goroutines.  The calling program is
2916         // assumed to be running.
2917         if(runtime_isarchive) {
2918                 return;
2919         }
2920
2921         // -1 for sysmon
2922         run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
2923         if(run > 0)
2924                 return;
2925         // If we are dying because of a signal caught on an already idle thread,
2926         // freezetheworld will cause all running threads to block.
2927         // And runtime will essentially enter into deadlock state,
2928         // except that there is a thread that will call runtime_exit soon.
2929         if(runtime_panicking > 0)
2930                 return;
2931         if(run < 0) {
2932                 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2933                         runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
2934                 runtime_throw("checkdead: inconsistent counts");
2935         }
2936         grunning = 0;
2937         runtime_lock(&allglock);
2938         for(i = 0; i < runtime_allglen; i++) {
2939                 gp = runtime_allg[i];
2940                 if(gp->isbackground)
2941                         continue;
2942                 s = gp->atomicstatus;
2943                 if(s == _Gwaiting)
2944                         grunning++;
2945                 else if(s == _Grunnable || s == _Grunning || s == _Gsyscall) {
2946                         runtime_unlock(&allglock);
2947                         runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
2948                         runtime_throw("checkdead: runnable g");
2949                 }
2950         }
2951         runtime_unlock(&allglock);
2952         if(grunning == 0)  // possible if main goroutine calls runtime_Goexit()
2953                 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
2954         g->m->throwing = -1;  // do not dump full stacks
2955         runtime_throw("all goroutines are asleep - deadlock!");
2956 }
2957
2958 static void
2959 sysmon(void)
2960 {
2961         uint32 idle, delay;
2962         int64 now, lastpoll, lasttrace;
2963         G *gp;
2964
2965         lasttrace = 0;
2966         idle = 0;  // how many cycles in succession we had not wokeup somebody
2967         delay = 0;
2968         for(;;) {
2969                 if(idle == 0)  // start with 20us sleep...
2970                         delay = 20;
2971                 else if(idle > 50)  // start doubling the sleep after 1ms...
2972                         delay *= 2;
2973                 if(delay > 10*1000)  // up to 10ms
2974                         delay = 10*1000;
2975                 runtime_usleep(delay);
2976                 if(runtime_debug.schedtrace <= 0 &&
2977                         (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) {  // TODO: fast atomic
2978                         runtime_lock(&runtime_sched);
2979                         if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
2980                                 runtime_atomicstore(&runtime_sched.sysmonwait, 1);
2981                                 runtime_unlock(&runtime_sched);
2982                                 runtime_notesleep(&runtime_sched.sysmonnote);
2983                                 runtime_noteclear(&runtime_sched.sysmonnote);
2984                                 idle = 0;
2985                                 delay = 20;
2986                         } else
2987                                 runtime_unlock(&runtime_sched);
2988                 }
2989                 // poll network if not polled for more than 10ms
2990                 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
2991                 now = runtime_nanotime();
2992                 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2993                         runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
2994                         gp = runtime_netpoll(false);  // non-blocking
2995                         if(gp) {
2996                                 // Need to decrement number of idle locked M's
2997                                 // (pretending that one more is running) before injectglist.
2998                                 // Otherwise it can lead to the following situation:
2999                                 // injectglist grabs all P's but before it starts M's to run the P's,
3000                                 // another M returns from syscall, finishes running its G,
3001                                 // observes that there is no work to do and no other running M's
3002                                 // and reports deadlock.
3003                                 incidlelocked(-1);
3004                                 injectglist(gp);
3005                                 incidlelocked(1);
3006                         }
3007                 }
3008                 // retake P's blocked in syscalls
3009                 // and preempt long running G's
3010                 if(retake(now))
3011                         idle = 0;
3012                 else
3013                         idle++;
3014
3015                 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
3016                         lasttrace = now;
3017                         runtime_schedtrace(runtime_debug.scheddetail);
3018                 }
3019         }
3020 }
3021
3022 typedef struct Pdesc Pdesc;
3023 struct Pdesc
3024 {
3025         uint32  schedtick;
3026         int64   schedwhen;
3027         uint32  syscalltick;
3028         int64   syscallwhen;
3029 };
3030 static Pdesc pdesc[_MaxGomaxprocs];
3031
3032 static uint32
3033 retake(int64 now)
3034 {
3035         uint32 i, s, n;
3036         int64 t;
3037         P *p;
3038         Pdesc *pd;
3039
3040         n = 0;
3041         for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
3042                 p = runtime_allp[i];
3043                 if(p==nil)
3044                         continue;
3045                 pd = &pdesc[i];
3046                 s = p->status;
3047                 if(s == _Psyscall) {
3048                         // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
3049                         t = p->syscalltick;
3050                         if(pd->syscalltick != t) {
3051                                 pd->syscalltick = t;
3052                                 pd->syscallwhen = now;
3053                                 continue;
3054                         }
3055                         // On the one hand we don't want to retake Ps if there is no other work to do,
3056                         // but on the other hand we want to retake them eventually
3057                         // because they can prevent the sysmon thread from deep sleep.
3058                         if(p->runqhead == p->runqtail &&
3059                                 runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 &&
3060                                 pd->syscallwhen + 10*1000*1000 > now)
3061                                 continue;
3062                         // Need to decrement number of idle locked M's
3063                         // (pretending that one more is running) before the CAS.
3064                         // Otherwise the M from which we retake can exit the syscall,
3065                         // increment nmidle and report deadlock.
3066                         incidlelocked(-1);
3067                         if(runtime_cas(&p->status, s, _Pidle)) {
3068                                 n++;
3069                                 handoffp(p);
3070                         }
3071                         incidlelocked(1);
3072                 } else if(s == _Prunning) {
3073                         // Preempt G if it's running for more than 10ms.
3074                         t = p->schedtick;
3075                         if(pd->schedtick != t) {
3076                                 pd->schedtick = t;
3077                                 pd->schedwhen = now;
3078                                 continue;
3079                         }
3080                         if(pd->schedwhen + 10*1000*1000 > now)
3081                                 continue;
3082                         // preemptone(p);
3083                 }
3084         }
3085         return n;
3086 }
3087
3088 // Tell all goroutines that they have been preempted and they should stop.
3089 // This function is purely best-effort.  It can fail to inform a goroutine if a
3090 // processor just started running it.
3091 // No locks need to be held.
3092 // Returns true if preemption request was issued to at least one goroutine.
3093 static bool
3094 preemptall(void)
3095 {
3096         return false;
3097 }
3098
3099 void
3100 runtime_schedtrace(bool detailed)
3101 {
3102         static int64 starttime;
3103         int64 now;
3104         int64 id1, id2, id3;
3105         int32 i, t, h;
3106         uintptr gi;
3107         const char *fmt;
3108         M *mp, *lockedm;
3109         G *gp, *lockedg;
3110         P *p;
3111
3112         now = runtime_nanotime();
3113         if(starttime == 0)
3114                 starttime = now;
3115
3116         runtime_lock(&runtime_sched);
3117         runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
3118                 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
3119                 runtime_sched.nmidle, runtime_sched.runqsize);
3120         if(detailed) {
3121                 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
3122                         runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
3123                         runtime_sched.stopwait, runtime_sched.sysmonwait);
3124         }
3125         // We must be careful while reading data from P's, M's and G's.
3126         // Even if we hold schedlock, most data can be changed concurrently.
3127         // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
3128         for(i = 0; i < runtime_gomaxprocs; i++) {
3129                 p = runtime_allp[i];
3130                 if(p == nil)
3131                         continue;
3132                 mp = (M*)p->m;
3133                 h = runtime_atomicload(&p->runqhead);
3134                 t = runtime_atomicload(&p->runqtail);
3135                 if(detailed)
3136                         runtime_printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
3137                                 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
3138                 else {
3139                         // In non-detailed mode format lengths of per-P run queues as:
3140                         // [len1 len2 len3 len4]
3141                         fmt = " %d";
3142                         if(runtime_gomaxprocs == 1)
3143                                 fmt = " [%d]\n";
3144                         else if(i == 0)
3145                                 fmt = " [%d";
3146                         else if(i == runtime_gomaxprocs-1)
3147                                 fmt = " %d]\n";
3148                         runtime_printf(fmt, t-h);
3149                 }
3150         }
3151         if(!detailed) {
3152                 runtime_unlock(&runtime_sched);
3153                 return;
3154         }
3155         for(mp = runtime_allm; mp; mp = mp->alllink) {
3156                 p = (P*)mp->p;
3157                 gp = mp->curg;
3158                 lockedg = mp->lockedg;
3159                 id1 = -1;
3160                 if(p)
3161                         id1 = p->id;
3162                 id2 = -1;
3163                 if(gp)
3164                         id2 = gp->goid;
3165                 id3 = -1;
3166                 if(lockedg)
3167                         id3 = lockedg->goid;
3168                 runtime_printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
3169                         " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
3170                         mp->id, id1, id2,
3171                         mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
3172                         mp->spinning, mp->blocked, id3);
3173         }
3174         runtime_lock(&allglock);
3175         for(gi = 0; gi < runtime_allglen; gi++) {
3176                 gp = runtime_allg[gi];
3177                 mp = gp->m;
3178                 lockedm = gp->lockedm;
3179                 runtime_printf("  G%D: status=%d(%S) m=%d lockedm=%d\n",
3180                         gp->goid, gp->atomicstatus, gp->waitreason, mp ? mp->id : -1,
3181                         lockedm ? lockedm->id : -1);
3182         }
3183         runtime_unlock(&allglock);
3184         runtime_unlock(&runtime_sched);
3185 }
3186
3187 // Put mp on midle list.
3188 // Sched must be locked.
3189 static void
3190 mput(M *mp)
3191 {
3192         mp->schedlink = (uintptr)runtime_sched.midle;
3193         runtime_sched.midle = mp;
3194         runtime_sched.nmidle++;
3195         checkdead();
3196 }
3197
3198 // Try to get an m from midle list.
3199 // Sched must be locked.
3200 static M*
3201 mget(void)
3202 {
3203         M *mp;
3204
3205         if((mp = runtime_sched.midle) != nil){
3206                 runtime_sched.midle = (M*)mp->schedlink;
3207                 runtime_sched.nmidle--;
3208         }
3209         return mp;
3210 }
3211
3212 // Put gp on the global runnable queue.
3213 // Sched must be locked.
3214 static void
3215 globrunqput(G *gp)
3216 {
3217         gp->schedlink = 0;
3218         if(runtime_sched.runqtail)
3219                 runtime_sched.runqtail->schedlink = (uintptr)gp;
3220         else
3221                 runtime_sched.runqhead = gp;
3222         runtime_sched.runqtail = gp;
3223         runtime_sched.runqsize++;
3224 }
3225
3226 // Put a batch of runnable goroutines on the global runnable queue.
3227 // Sched must be locked.
3228 static void
3229 globrunqputbatch(G *ghead, G *gtail, int32 n)
3230 {
3231         gtail->schedlink = 0;
3232         if(runtime_sched.runqtail)
3233                 runtime_sched.runqtail->schedlink = (uintptr)ghead;
3234         else
3235                 runtime_sched.runqhead = ghead;
3236         runtime_sched.runqtail = gtail;
3237         runtime_sched.runqsize += n;
3238 }
3239
3240 // Try get a batch of G's from the global runnable queue.
3241 // Sched must be locked.
3242 static G*
3243 globrunqget(P *p, int32 max)
3244 {
3245         G *gp, *gp1;
3246         int32 n;
3247
3248         if(runtime_sched.runqsize == 0)
3249                 return nil;
3250         n = runtime_sched.runqsize/runtime_gomaxprocs+1;
3251         if(n > runtime_sched.runqsize)
3252                 n = runtime_sched.runqsize;
3253         if(max > 0 && n > max)
3254                 n = max;
3255         if((uint32)n > nelem(p->runq)/2)
3256                 n = nelem(p->runq)/2;
3257         runtime_sched.runqsize -= n;
3258         if(runtime_sched.runqsize == 0)
3259                 runtime_sched.runqtail = nil;
3260         gp = runtime_sched.runqhead;
3261         runtime_sched.runqhead = (G*)gp->schedlink;
3262         n--;
3263         while(n--) {
3264                 gp1 = runtime_sched.runqhead;
3265                 runtime_sched.runqhead = (G*)gp1->schedlink;
3266                 runqput(p, gp1);
3267         }
3268         return gp;
3269 }
3270
3271 // Put p to on pidle list.
3272 // Sched must be locked.
3273 static void
3274 pidleput(P *p)
3275 {
3276         p->link = (uintptr)runtime_sched.pidle;
3277         runtime_sched.pidle = p;
3278         runtime_xadd(&runtime_sched.npidle, 1);  // TODO: fast atomic
3279 }
3280
3281 // Try get a p from pidle list.
3282 // Sched must be locked.
3283 static P*
3284 pidleget(void)
3285 {
3286         P *p;
3287
3288         p = runtime_sched.pidle;
3289         if(p) {
3290                 runtime_sched.pidle = (P*)p->link;
3291                 runtime_xadd(&runtime_sched.npidle, -1);  // TODO: fast atomic
3292         }
3293         return p;
3294 }
3295
3296 // Try to put g on local runnable queue.
3297 // If it's full, put onto global queue.
3298 // Executed only by the owner P.
3299 static void
3300 runqput(P *p, G *gp)
3301 {
3302         uint32 h, t;
3303
3304 retry:
3305         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3306         t = p->runqtail;
3307         if(t - h < nelem(p->runq)) {
3308                 p->runq[t%nelem(p->runq)] = (uintptr)gp;
3309                 runtime_atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
3310                 return;
3311         }
3312         if(runqputslow(p, gp, h, t))
3313                 return;
3314         // the queue is not full, now the put above must suceed
3315         goto retry;
3316 }
3317
3318 // Put g and a batch of work from local runnable queue on global queue.
3319 // Executed only by the owner P.
3320 static bool
3321 runqputslow(P *p, G *gp, uint32 h, uint32 t)
3322 {
3323         G *batch[nelem(p->runq)/2+1];
3324         uint32 n, i;
3325
3326         // First, grab a batch from local queue.
3327         n = t-h;
3328         n = n/2;
3329         if(n != nelem(p->runq)/2)
3330                 runtime_throw("runqputslow: queue is not full");
3331         for(i=0; i<n; i++)
3332                 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3333         if(!runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3334                 return false;
3335         batch[n] = gp;
3336         // Link the goroutines.
3337         for(i=0; i<n; i++)
3338                 batch[i]->schedlink = (uintptr)batch[i+1];
3339         // Now put the batch on global queue.
3340         runtime_lock(&runtime_sched);
3341         globrunqputbatch(batch[0], batch[n], n+1);
3342         runtime_unlock(&runtime_sched);
3343         return true;
3344 }
3345
3346 // Get g from local runnable queue.
3347 // Executed only by the owner P.
3348 static G*
3349 runqget(P *p)
3350 {
3351         G *gp;
3352         uint32 t, h;
3353
3354         for(;;) {
3355                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3356                 t = p->runqtail;
3357                 if(t == h)
3358                         return nil;
3359                 gp = (G*)p->runq[h%nelem(p->runq)];
3360                 if(runtime_cas(&p->runqhead, h, h+1))  // cas-release, commits consume
3361                         return gp;
3362         }
3363 }
3364
3365 // Grabs a batch of goroutines from local runnable queue.
3366 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3367 // Can be executed by any P.
3368 static uint32
3369 runqgrab(P *p, G **batch)
3370 {
3371         uint32 t, h, n, i;
3372
3373         for(;;) {
3374                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3375                 t = runtime_atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
3376                 n = t-h;
3377                 n = n - n/2;
3378                 if(n == 0)
3379                         break;
3380                 if(n > nelem(p->runq)/2)  // read inconsistent h and t
3381                         continue;
3382                 for(i=0; i<n; i++)
3383                         batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3384                 if(runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3385                         break;
3386         }
3387         return n;
3388 }
3389
3390 // Steal half of elements from local runnable queue of p2
3391 // and put onto local runnable queue of p.
3392 // Returns one of the stolen elements (or nil if failed).
3393 static G*
3394 runqsteal(P *p, P *p2)
3395 {
3396         G *gp;
3397         G *batch[nelem(p->runq)/2];
3398         uint32 t, h, n, i;
3399
3400         n = runqgrab(p2, batch);
3401         if(n == 0)
3402                 return nil;
3403         n--;
3404         gp = batch[n];
3405         if(n == 0)
3406                 return gp;
3407         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3408         t = p->runqtail;
3409         if(t - h + n >= nelem(p->runq))
3410                 runtime_throw("runqsteal: runq overflow");
3411         for(i=0; i<n; i++, t++)
3412                 p->runq[t%nelem(p->runq)] = (uintptr)batch[i];
3413         runtime_atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
3414         return gp;
3415 }
3416
3417 void runtime_testSchedLocalQueue(void)
3418   __asm__("runtime.testSchedLocalQueue");
3419
3420 void
3421 runtime_testSchedLocalQueue(void)
3422 {
3423         P p;
3424         G gs[nelem(p.runq)];
3425         int32 i, j;
3426
3427         runtime_memclr((byte*)&p, sizeof(p));
3428
3429         for(i = 0; i < (int32)nelem(gs); i++) {
3430                 if(runqget(&p) != nil)
3431                         runtime_throw("runq is not empty initially");
3432                 for(j = 0; j < i; j++)
3433                         runqput(&p, &gs[i]);
3434                 for(j = 0; j < i; j++) {
3435                         if(runqget(&p) != &gs[i]) {
3436                                 runtime_printf("bad element at iter %d/%d\n", i, j);
3437                                 runtime_throw("bad element");
3438                         }
3439                 }
3440                 if(runqget(&p) != nil)
3441                         runtime_throw("runq is not empty afterwards");
3442         }
3443 }
3444
3445 void runtime_testSchedLocalQueueSteal(void)
3446   __asm__("runtime.testSchedLocalQueueSteal");
3447
3448 void
3449 runtime_testSchedLocalQueueSteal(void)
3450 {
3451         P p1, p2;
3452         G gs[nelem(p1.runq)], *gp;
3453         int32 i, j, s;
3454
3455         runtime_memclr((byte*)&p1, sizeof(p1));
3456         runtime_memclr((byte*)&p2, sizeof(p2));
3457
3458         for(i = 0; i < (int32)nelem(gs); i++) {
3459                 for(j = 0; j < i; j++) {
3460                         gs[j].sig = 0;
3461                         runqput(&p1, &gs[j]);
3462                 }
3463                 gp = runqsteal(&p2, &p1);
3464                 s = 0;
3465                 if(gp) {
3466                         s++;
3467                         gp->sig++;
3468                 }
3469                 while((gp = runqget(&p2)) != nil) {
3470                         s++;
3471                         gp->sig++;
3472                 }
3473                 while((gp = runqget(&p1)) != nil)
3474                         gp->sig++;
3475                 for(j = 0; j < i; j++) {
3476                         if(gs[j].sig != 1) {
3477                                 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3478                                 runtime_throw("bad element");
3479                         }
3480                 }
3481                 if(s != i/2 && s != i/2+1) {
3482                         runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3483                                 s, i/2, i/2+1, i);
3484                         runtime_throw("bad steal");
3485                 }
3486         }
3487 }
3488
3489 intgo
3490 runtime_setmaxthreads(intgo in)
3491 {
3492         intgo out;
3493
3494         runtime_lock(&runtime_sched);
3495         out = (intgo)runtime_sched.maxmcount;
3496         runtime_sched.maxmcount = (int32)in;
3497         checkmcount();
3498         runtime_unlock(&runtime_sched);
3499         return out;
3500 }
3501
3502 void
3503 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
3504 {
3505         enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
3506         enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
3507 }
3508
3509 // Return whether we are waiting for a GC.  This gc toolchain uses
3510 // preemption instead.
3511 bool
3512 runtime_gcwaiting(void)
3513 {
3514         return runtime_sched.gcwaiting;
3515 }
3516
3517 // os_beforeExit is called from os.Exit(0).
3518 //go:linkname os_beforeExit os.runtime_beforeExit
3519
3520 extern void os_beforeExit() __asm__ (GOSYM_PREFIX "os.runtime_beforeExit");
3521
3522 void
3523 os_beforeExit()
3524 {
3525 }
3526
3527 // Active spinning for sync.Mutex.
3528 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3529
3530 enum
3531 {
3532         ACTIVE_SPIN = 4,
3533         ACTIVE_SPIN_CNT = 30,
3534 };
3535
3536 extern _Bool sync_runtime_canSpin(intgo i)
3537   __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
3538
3539 _Bool
3540 sync_runtime_canSpin(intgo i)
3541 {
3542         P *p;
3543
3544         // sync.Mutex is cooperative, so we are conservative with spinning.
3545         // Spin only few times and only if running on a multicore machine and
3546         // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3547         // As opposed to runtime mutex we don't do passive spinning here,
3548         // because there can be work on global runq on on other Ps.
3549         if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched.npidle+runtime_sched.nmspinning)+1) {
3550                 return false;
3551         }
3552         p = (P*)g->m->p;
3553         return p != nil && p->runqhead == p->runqtail;
3554 }
3555
3556 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3557 //go:nosplit
3558
3559 extern void sync_runtime_doSpin(void)
3560   __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
3561
3562 void
3563 sync_runtime_doSpin()
3564 {
3565         runtime_procyield(ACTIVE_SPIN_CNT);
3566 }
3567
3568 // For Go code to look at variables, until we port proc.go.
3569
3570 extern M** runtime_go_allm(void)
3571   __asm__ (GOSYM_PREFIX "runtime.allm");
3572
3573 M**
3574 runtime_go_allm()
3575 {
3576         return &runtime_allm;
3577 }
3578
3579 extern Slice runtime_go_allgs(void)
3580   __asm__ (GOSYM_PREFIX "runtime.allgs");
3581
3582 Slice
3583 runtime_go_allgs()
3584 {
3585         Slice s;
3586
3587         s.__values = runtime_allg;
3588         s.__count = runtime_allglen;
3589         s.__capacity = allgcap;
3590         return s;
3591 }