libgo/runtime/proc.c

   1 // Copyright 2009 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 #include <limits.h>
   6 #include <signal.h>
   7 #include <stdlib.h>
   8 #include <pthread.h>
   9 #include <unistd.h>
  10
  11 #include "config.h"
  12
  13 #ifdef HAVE_DL_ITERATE_PHDR
  14 #include <link.h>
  15 #endif
  16
  17 #include "runtime.h"
  18 #include "arch.h"
  19 #include "defs.h"
  20 #include "malloc.h"
  21 #include "go-type.h"
  22 #include "go-defer.h"
  23
  24 #ifdef USING_SPLIT_STACK
  25
  26 /* FIXME: These are not declared anywhere.  */
  27
  28 extern void __splitstack_getcontext(void *context[10]);
  29
  30 extern void __splitstack_setcontext(void *context[10]);
  31
  32 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
  33
  34 extern void * __splitstack_resetcontext(void *context[10], size_t *);
  35
  36 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
  37                                void **);
  38
  39 extern void __splitstack_block_signals (int *, int *);
  40
  41 extern void __splitstack_block_signals_context (void *context[10], int *,
  42                                                 int *);
  43
  44 #endif
  45
  46 #ifndef PTHREAD_STACK_MIN
  47 # define PTHREAD_STACK_MIN 8192
  48 #endif
  49
  50 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
  51 # define StackMin PTHREAD_STACK_MIN
  52 #else
  53 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
  54 #endif
  55
  56 uintptr runtime_stacks_sys;
  57
  58 static void gtraceback(G*);
  59
  60 #ifdef __rtems__
  61 #define __thread
  62 #endif
  63
  64 static __thread G *g;
  65 static __thread M *m;
  66
  67 #ifndef SETCONTEXT_CLOBBERS_TLS
  68
  69 static inline void
  70 initcontext(void)
  71 {
  72 }
  73
  74 static inline void
  75 fixcontext(ucontext_t *c __attribute__ ((unused)))
  76 {
  77 }
  78
  79 #else
  80
  81 # if defined(__x86_64__) && defined(__sun__)
  82
  83 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
  84 // register to that of the thread which called getcontext.  The effect
  85 // is that the address of all __thread variables changes.  This bug
  86 // also affects pthread_self() and pthread_getspecific.  We work
  87 // around it by clobbering the context field directly to keep %fs the
  88 // same.
  89
  90 static __thread greg_t fs;
  91
  92 static inline void
  93 initcontext(void)
  94 {
  95         ucontext_t c;
  96
  97         getcontext(&c);
  98         fs = c.uc_mcontext.gregs[REG_FSBASE];
  99 }
 100
 101 static inline void
 102 fixcontext(ucontext_t* c)
 103 {
 104         c->uc_mcontext.gregs[REG_FSBASE] = fs;
 105 }
 106
 107 # elif defined(__NetBSD__)
 108
 109 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
 110 // and restore it ourselves.
 111
 112 static __thread __greg_t tlsbase;
 113
 114 static inline void
 115 initcontext(void)
 116 {
 117         ucontext_t c;
 118
 119         getcontext(&c);
 120         tlsbase = c.uc_mcontext._mc_tlsbase;
 121 }
 122
 123 static inline void
 124 fixcontext(ucontext_t* c)
 125 {
 126         c->uc_mcontext._mc_tlsbase = tlsbase;
 127 }
 128
 129 # elif defined(__sparc__)
 130
 131 static inline void
 132 initcontext(void)
 133 {
 134 }
 135
 136 static inline void
 137 fixcontext(ucontext_t *c)
 138 {
 139         /* ??? Using
 140              register unsigned long thread __asm__("%g7");
 141              c->uc_mcontext.gregs[REG_G7] = thread;
 142            results in
 143              error: variable ‘thread’ might be clobbered by \
 144                 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
 145            which ought to be false, as %g7 is a fixed register.  */
 146
 147         if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8)
 148                 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 149         else
 150                 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
 151 }
 152
 153 # else
 154
 155 #  error unknown case for SETCONTEXT_CLOBBERS_TLS
 156
 157 # endif
 158
 159 #endif
 160
 161 // We can not always refer to the TLS variables directly.  The
 162 // compiler will call tls_get_addr to get the address of the variable,
 163 // and it may hold it in a register across a call to schedule.  When
 164 // we get back from the call we may be running in a different thread,
 165 // in which case the register now points to the TLS variable for a
 166 // different thread.  We use non-inlinable functions to avoid this
 167 // when necessary.
 168
 169 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
 170
 171 G*
 172 runtime_g(void)
 173 {
 174         return g;
 175 }
 176
 177 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
 178
 179 M*
 180 runtime_m(void)
 181 {
 182         return m;
 183 }
 184
 185 // Set m and g.
 186 void
 187 runtime_setmg(M* mp, G* gp)
 188 {
 189         m = mp;
 190         g = gp;
 191 }
 192
 193 // Start a new thread.
 194 static void
 195 runtime_newosproc(M *mp)
 196 {
 197         pthread_attr_t attr;
 198         sigset_t clear, old;
 199         pthread_t tid;
 200         int ret;
 201
 202         if(pthread_attr_init(&attr) != 0)
 203                 runtime_throw("pthread_attr_init");
 204         if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
 205                 runtime_throw("pthread_attr_setdetachstate");
 206
 207         // Block signals during pthread_create so that the new thread
 208         // starts with signals disabled.  It will enable them in minit.
 209         sigfillset(&clear);
 210
 211 #ifdef SIGTRAP
 212         // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
 213         sigdelset(&clear, SIGTRAP);
 214 #endif
 215
 216         sigemptyset(&old);
 217         pthread_sigmask(SIG_BLOCK, &clear, &old);
 218         ret = pthread_create(&tid, &attr, runtime_mstart, mp);
 219         pthread_sigmask(SIG_SETMASK, &old, nil);
 220
 221         if (ret != 0)
 222                 runtime_throw("pthread_create");
 223 }
 224
 225 // First function run by a new goroutine.  This replaces gogocall.
 226 static void
 227 kickoff(void)
 228 {
 229         void (*fn)(void*);
 230
 231         if(g->traceback != nil)
 232                 gtraceback(g);
 233
 234         fn = (void (*)(void*))(g->entry);
 235         fn(g->param);
 236         runtime_goexit();
 237 }
 238
 239 // Switch context to a different goroutine.  This is like longjmp.
 240 void runtime_gogo(G*) __attribute__ ((noinline));
 241 void
 242 runtime_gogo(G* newg)
 243 {
 244 #ifdef USING_SPLIT_STACK
 245         __splitstack_setcontext(&newg->stack_context[0]);
 246 #endif
 247         g = newg;
 248         newg->fromgogo = true;
 249         fixcontext(&newg->context);
 250         setcontext(&newg->context);
 251         runtime_throw("gogo setcontext returned");
 252 }
 253
 254 // Save context and call fn passing g as a parameter.  This is like
 255 // setjmp.  Because getcontext always returns 0, unlike setjmp, we use
 256 // g->fromgogo as a code.  It will be true if we got here via
 257 // setcontext.  g == nil the first time this is called in a new m.
 258 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
 259 void
 260 runtime_mcall(void (*pfn)(G*))
 261 {
 262         M *mp;
 263         G *gp;
 264
 265         // Ensure that all registers are on the stack for the garbage
 266         // collector.
 267         __builtin_unwind_init();
 268
 269         mp = m;
 270         gp = g;
 271         if(gp == mp->g0)
 272                 runtime_throw("runtime: mcall called on m->g0 stack");
 273
 274         if(gp != nil) {
 275
 276 #ifdef USING_SPLIT_STACK
 277                 __splitstack_getcontext(&g->stack_context[0]);
 278 #else
 279                 gp->gcnext_sp = &pfn;
 280 #endif
 281                 gp->fromgogo = false;
 282                 getcontext(&gp->context);
 283
 284                 // When we return from getcontext, we may be running
 285                 // in a new thread.  That means that m and g may have
 286                 // changed.  They are global variables so we will
 287                 // reload them, but the addresses of m and g may be
 288                 // cached in our local stack frame, and those
 289                 // addresses may be wrong.  Call functions to reload
 290                 // the values for this thread.
 291                 mp = runtime_m();
 292                 gp = runtime_g();
 293
 294                 if(gp->traceback != nil)
 295                         gtraceback(gp);
 296         }
 297         if (gp == nil || !gp->fromgogo) {
 298 #ifdef USING_SPLIT_STACK
 299                 __splitstack_setcontext(&mp->g0->stack_context[0]);
 300 #endif
 301                 mp->g0->entry = (byte*)pfn;
 302                 mp->g0->param = gp;
 303
 304                 // It's OK to set g directly here because this case
 305                 // can not occur if we got here via a setcontext to
 306                 // the getcontext call just above.
 307                 g = mp->g0;
 308
 309                 fixcontext(&mp->g0->context);
 310                 setcontext(&mp->g0->context);
 311                 runtime_throw("runtime: mcall function returned");
 312         }
 313 }
 314
 315 // Goroutine scheduler
 316 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
 317 //
 318 // The main concepts are:
 319 // G - goroutine.
 320 // M - worker thread, or machine.
 321 // P - processor, a resource that is required to execute Go code.
 322 //     M must have an associated P to execute Go code, however it can be
 323 //     blocked or in a syscall w/o an associated P.
 324 //
 325 // Design doc at http://golang.org/s/go11sched.
 326
 327 typedef struct Sched Sched;
 328 struct Sched {
 329         Lock;
 330
 331         uint64  goidgen;
 332         M*      midle;   // idle m's waiting for work
 333         int32   nmidle;  // number of idle m's waiting for work
 334         int32   nmidlelocked; // number of locked m's waiting for work
 335         int32   mcount;  // number of m's that have been created
 336         int32   maxmcount;      // maximum number of m's allowed (or die)
 337
 338         P*      pidle;  // idle P's
 339         uint32  npidle;
 340         uint32  nmspinning;
 341
 342         // Global runnable queue.
 343         G*      runqhead;
 344         G*      runqtail;
 345         int32   runqsize;
 346
 347         // Global cache of dead G's.
 348         Lock    gflock;
 349         G*      gfree;
 350
 351         uint32  gcwaiting;      // gc is waiting to run
 352         int32   stopwait;
 353         Note    stopnote;
 354         uint32  sysmonwait;
 355         Note    sysmonnote;
 356         uint64  lastpoll;
 357
 358         int32   profilehz;      // cpu profiling rate
 359 };
 360
 361 enum
 362 {
 363         // The max value of GOMAXPROCS.
 364         // There are no fundamental restrictions on the value.
 365         MaxGomaxprocs = 1<<8,
 366
 367         // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once.
 368         // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
 369         GoidCacheBatch = 16,
 370 };
 371
 372 Sched   runtime_sched;
 373 int32   runtime_gomaxprocs;
 374 uint32  runtime_needextram = 1;
 375 M       runtime_m0;
 376 G       runtime_g0;     // idle goroutine for m0
 377 G*      runtime_lastg;
 378 M*      runtime_allm;
 379 P**     runtime_allp;
 380 M*      runtime_extram;
 381 int8*   runtime_goos;
 382 int32   runtime_ncpu;
 383 bool    runtime_precisestack;
 384 static int32    newprocs;
 385
 386 static  Lock allglock;  // the following vars are protected by this lock or by stoptheworld
 387 G**     runtime_allg;
 388 uintptr runtime_allglen;
 389 static  uintptr allgcap;
 390
 391 bool    runtime_isarchive;
 392
 393 void* runtime_mstart(void*);
 394 static void runqput(P*, G*);
 395 static G* runqget(P*);
 396 static bool runqputslow(P*, G*, uint32, uint32);
 397 static G* runqsteal(P*, P*);
 398 static void mput(M*);
 399 static M* mget(void);
 400 static void mcommoninit(M*);
 401 static void schedule(void);
 402 static void procresize(int32);
 403 static void acquirep(P*);
 404 static P* releasep(void);
 405 static void newm(void(*)(void), P*);
 406 static void stopm(void);
 407 static void startm(P*, bool);
 408 static void handoffp(P*);
 409 static void wakep(void);
 410 static void stoplockedm(void);
 411 static void startlockedm(G*);
 412 static void sysmon(void);
 413 static uint32 retake(int64);
 414 static void incidlelocked(int32);
 415 static void checkdead(void);
 416 static void exitsyscall0(G*);
 417 static void park0(G*);
 418 static void goexit0(G*);
 419 static void gfput(P*, G*);
 420 static G* gfget(P*);
 421 static void gfpurge(P*);
 422 static void globrunqput(G*);
 423 static void globrunqputbatch(G*, G*, int32);
 424 static G* globrunqget(P*, int32);
 425 static P* pidleget(void);
 426 static void pidleput(P*);
 427 static void injectglist(G*);
 428 static bool preemptall(void);
 429 static bool exitsyscallfast(void);
 430 static void allgadd(G*);
 431
 432 bool runtime_isstarted;
 433
 434 // The bootstrap sequence is:
 435 //
 436 //      call osinit
 437 //      call schedinit
 438 //      make & queue new G
 439 //      call runtime_mstart
 440 //
 441 // The new G calls runtime_main.
 442 void
 443 runtime_schedinit(void)
 444 {
 445         int32 n, procs;
 446         String s;
 447         const byte *p;
 448         Eface i;
 449
 450         m = &runtime_m0;
 451         g = &runtime_g0;
 452         m->g0 = g;
 453         m->curg = g;
 454         g->m = m;
 455
 456         initcontext();
 457
 458         runtime_sched.maxmcount = 10000;
 459         runtime_precisestack = 0;
 460
 461         // runtime_symtabinit();
 462         runtime_mallocinit();
 463         mcommoninit(m);
 464
 465         // Initialize the itable value for newErrorCString,
 466         // so that the next time it gets called, possibly
 467         // in a fault during a garbage collection, it will not
 468         // need to allocated memory.
 469         runtime_newErrorCString(0, &i);
 470
 471         // Initialize the cached gotraceback value, since
 472         // gotraceback calls getenv, which mallocs on Plan 9.
 473         runtime_gotraceback(nil);
 474
 475         runtime_goargs();
 476         runtime_goenvs();
 477         runtime_parsedebugvars();
 478
 479         runtime_sched.lastpoll = runtime_nanotime();
 480         procs = 1;
 481         s = runtime_getenv("GOMAXPROCS");
 482         p = s.str;
 483         if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
 484                 if(n > MaxGomaxprocs)
 485                         n = MaxGomaxprocs;
 486                 procs = n;
 487         }
 488         runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
 489         procresize(procs);
 490
 491         // Can not enable GC until all roots are registered.
 492         // mstats.enablegc = 1;
 493 }
 494
 495 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
 496 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
 497
 498 // Used to determine the field alignment.
 499
 500 struct field_align
 501 {
 502   char c;
 503   Hchan *p;
 504 };
 505
 506 // main_init_done is a signal used by cgocallbackg that initialization
 507 // has been completed.  It is made before _cgo_notify_runtime_init_done,
 508 // so all cgo calls can rely on it existing.  When main_init is
 509 // complete, it is closed, meaning cgocallbackg can reliably receive
 510 // from it.
 511 Hchan *runtime_main_init_done;
 512
 513 // The chan bool type, for runtime_main_init_done.
 514
 515 extern const struct __go_type_descriptor bool_type_descriptor
 516   __asm__ (GOSYM_PREFIX "__go_tdn_bool");
 517
 518 static struct __go_channel_type chan_bool_type_descriptor =
 519   {
 520     /* __common */
 521     {
 522       /* __code */
 523       GO_CHAN,
 524       /* __align */
 525       __alignof (Hchan *),
 526       /* __field_align */
 527       offsetof (struct field_align, p) - 1,
 528       /* __size */
 529       sizeof (Hchan *),
 530       /* __hash */
 531       0, /* This value doesn't matter.  */
 532       /* __hashfn */
 533       &__go_type_hash_error_descriptor,
 534       /* __equalfn */
 535       &__go_type_equal_error_descriptor,
 536       /* __gc */
 537       NULL, /* This value doesn't matter */
 538       /* __reflection */
 539       NULL, /* This value doesn't matter */
 540       /* __uncommon */
 541       NULL,
 542       /* __pointer_to_this */
 543       NULL
 544     },
 545     /* __element_type */
 546     &bool_type_descriptor,
 547     /* __dir */
 548     CHANNEL_BOTH_DIR
 549   };
 550
 551 extern Hchan *__go_new_channel (ChanType *, uintptr);
 552 extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan");
 553
 554 static void
 555 initDone(void *arg __attribute__ ((unused))) {
 556         runtime_unlockOSThread();
 557 };
 558
 559 // The main goroutine.
 560 // Note: C frames in general are not copyable during stack growth, for two reasons:
 561 //   1) We don't know where in a frame to find pointers to other stack locations.
 562 //   2) There's no guarantee that globals or heap values do not point into the frame.
 563 //
 564 // The C frame for runtime.main is copyable, because:
 565 //   1) There are no pointers to other stack locations in the frame
 566 //      (d.fn points at a global, d.link is nil, d.argp is -1).
 567 //   2) The only pointer into this frame is from the defer chain,
 568 //      which is explicitly handled during stack copying.
 569 void
 570 runtime_main(void* dummy __attribute__((unused)))
 571 {
 572         Defer d;
 573         _Bool frame;
 574
 575         newm(sysmon, nil);
 576
 577         // Lock the main goroutine onto this, the main OS thread,
 578         // during initialization.  Most programs won't care, but a few
 579         // do require certain calls to be made by the main thread.
 580         // Those can arrange for main.main to run in the main thread
 581         // by calling runtime.LockOSThread during initialization
 582         // to preserve the lock.
 583         runtime_lockOSThread();
 584
 585         // Defer unlock so that runtime.Goexit during init does the unlock too.
 586         d.__pfn = initDone;
 587         d.__next = g->defer;
 588         d.__arg = (void*)-1;
 589         d.__panic = g->panic;
 590         d.__retaddr = nil;
 591         d.__makefunc_can_recover = 0;
 592         d.__frame = &frame;
 593         d.__special = true;
 594         g->defer = &d;
 595
 596         if(m != &runtime_m0)
 597                 runtime_throw("runtime_main not on m0");
 598         __go_go(runtime_MHeap_Scavenger, nil);
 599
 600         runtime_main_init_done = __go_new_channel(&chan_bool_type_descriptor, 0);
 601
 602         _cgo_notify_runtime_init_done();
 603
 604         main_init();
 605
 606         closechan(runtime_main_init_done);
 607
 608         if(g->defer != &d || d.__pfn != initDone)
 609                 runtime_throw("runtime: bad defer entry after init");
 610         g->defer = d.__next;
 611         runtime_unlockOSThread();
 612
 613         // For gccgo we have to wait until after main is initialized
 614         // to enable GC, because initializing main registers the GC
 615         // roots.
 616         mstats.enablegc = 1;
 617
 618         if(runtime_isarchive) {
 619                 // This is not a complete program, but is instead a
 620                 // library built using -buildmode=c-archive or
 621                 // c-shared.  Now that we are initialized, there is
 622                 // nothing further to do.
 623                 return;
 624         }
 625
 626         main_main();
 627
 628         // Make racy client program work: if panicking on
 629         // another goroutine at the same time as main returns,
 630         // let the other goroutine finish printing the panic trace.
 631         // Once it does, it will exit. See issue 3934.
 632         if(runtime_panicking)
 633                 runtime_park(nil, nil, "panicwait");
 634
 635         runtime_exit(0);
 636         for(;;)
 637                 *(int32*)0 = 0;
 638 }
 639
 640 void
 641 runtime_goroutineheader(G *gp)
 642 {
 643         const char *status;
 644         int64 waitfor;
 645
 646         switch(gp->status) {
 647         case Gidle:
 648                 status = "idle";
 649                 break;
 650         case Grunnable:
 651                 status = "runnable";
 652                 break;
 653         case Grunning:
 654                 status = "running";
 655                 break;
 656         case Gsyscall:
 657                 status = "syscall";
 658                 break;
 659         case Gwaiting:
 660                 if(gp->waitreason)
 661                         status = gp->waitreason;
 662                 else
 663                         status = "waiting";
 664                 break;
 665         default:
 666                 status = "???";
 667                 break;
 668         }
 669
 670         // approx time the G is blocked, in minutes
 671         waitfor = 0;
 672         if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0)
 673                 waitfor = (runtime_nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
 674
 675         if(waitfor < 1)
 676                 runtime_printf("goroutine %D [%s]:\n", gp->goid, status);
 677         else
 678                 runtime_printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor);
 679 }
 680
 681 void
 682 runtime_printcreatedby(G *g)
 683 {
 684         if(g != nil && g->gopc != 0 && g->goid != 1) {
 685                 String fn;
 686                 String file;
 687                 intgo line;
 688
 689                 if(__go_file_line(g->gopc - 1, &fn, &file, &line)) {
 690                         runtime_printf("created by %S\n", fn);
 691                         runtime_printf("\t%S:%D\n", file, (int64) line);
 692                 }
 693         }
 694 }
 695
 696 struct Traceback
 697 {
 698         G* gp;
 699         Location locbuf[TracebackMaxFrames];
 700         int32 c;
 701 };
 702
 703 void
 704 runtime_tracebackothers(G * volatile me)
 705 {
 706         G * volatile gp;
 707         Traceback tb;
 708         int32 traceback;
 709         volatile uintptr i;
 710
 711         tb.gp = me;
 712         traceback = runtime_gotraceback(nil);
 713
 714         // Show the current goroutine first, if we haven't already.
 715         if((gp = m->curg) != nil && gp != me) {
 716                 runtime_printf("\n");
 717                 runtime_goroutineheader(gp);
 718                 gp->traceback = &tb;
 719
 720 #ifdef USING_SPLIT_STACK
 721                 __splitstack_getcontext(&me->stack_context[0]);
 722 #endif
 723                 getcontext(&me->context);
 724
 725                 if(gp->traceback != nil) {
 726                   runtime_gogo(gp);
 727                 }
 728
 729                 runtime_printtrace(tb.locbuf, tb.c, false);
 730                 runtime_printcreatedby(gp);
 731         }
 732
 733         runtime_lock(&allglock);
 734         for(i = 0; i < runtime_allglen; i++) {
 735                 gp = runtime_allg[i];
 736                 if(gp == me || gp == m->curg || gp->status == Gdead)
 737                         continue;
 738                 if(gp->issystem && traceback < 2)
 739                         continue;
 740                 runtime_printf("\n");
 741                 runtime_goroutineheader(gp);
 742
 743                 // Our only mechanism for doing a stack trace is
 744                 // _Unwind_Backtrace.  And that only works for the
 745                 // current thread, not for other random goroutines.
 746                 // So we need to switch context to the goroutine, get
 747                 // the backtrace, and then switch back.
 748
 749                 // This means that if g is running or in a syscall, we
 750                 // can't reliably print a stack trace.  FIXME.
 751
 752                 if(gp->status == Grunning) {
 753                         runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
 754                         runtime_printcreatedby(gp);
 755                 } else if(gp->status == Gsyscall) {
 756                         runtime_printf("\tgoroutine in C code; stack unavailable\n");
 757                         runtime_printcreatedby(gp);
 758                 } else {
 759                         gp->traceback = &tb;
 760
 761 #ifdef USING_SPLIT_STACK
 762                         __splitstack_getcontext(&me->stack_context[0]);
 763 #endif
 764                         getcontext(&me->context);
 765
 766                         if(gp->traceback != nil) {
 767                                 runtime_gogo(gp);
 768                         }
 769
 770                         runtime_printtrace(tb.locbuf, tb.c, false);
 771                         runtime_printcreatedby(gp);
 772                 }
 773         }
 774         runtime_unlock(&allglock);
 775 }
 776
 777 static void
 778 checkmcount(void)
 779 {
 780         // sched lock is held
 781         if(runtime_sched.mcount > runtime_sched.maxmcount) {
 782                 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
 783                 runtime_throw("thread exhaustion");
 784         }
 785 }
 786
 787 // Do a stack trace of gp, and then restore the context to
 788 // gp->dotraceback.
 789
 790 static void
 791 gtraceback(G* gp)
 792 {
 793         Traceback* traceback;
 794
 795         traceback = gp->traceback;
 796         gp->traceback = nil;
 797         traceback->c = runtime_callers(1, traceback->locbuf,
 798                 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
 799         runtime_gogo(traceback->gp);
 800 }
 801
 802 static void
 803 mcommoninit(M *mp)
 804 {
 805         // If there is no mcache runtime_callers() will crash,
 806         // and we are most likely in sysmon thread so the stack is senseless anyway.
 807         if(m->mcache)
 808                 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
 809
 810         mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
 811
 812         runtime_lock(&runtime_sched);
 813         mp->id = runtime_sched.mcount++;
 814         checkmcount();
 815         runtime_mpreinit(mp);
 816
 817         // Add to runtime_allm so garbage collector doesn't free m
 818         // when it is just in a register or thread-local storage.
 819         mp->alllink = runtime_allm;
 820         // runtime_NumCgoCall() iterates over allm w/o schedlock,
 821         // so we need to publish it safely.
 822         runtime_atomicstorep(&runtime_allm, mp);
 823         runtime_unlock(&runtime_sched);
 824 }
 825
 826 // Mark gp ready to run.
 827 void
 828 runtime_ready(G *gp)
 829 {
 830         // Mark runnable.
 831         m->locks++;  // disable preemption because it can be holding p in a local var
 832         if(gp->status != Gwaiting) {
 833                 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status);
 834                 runtime_throw("bad g->status in ready");
 835         }
 836         gp->status = Grunnable;
 837         runqput(m->p, gp);
 838         if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0)  // TODO: fast atomic
 839                 wakep();
 840         m->locks--;
 841 }
 842
 843 int32
 844 runtime_gcprocs(void)
 845 {
 846         int32 n;
 847
 848         // Figure out how many CPUs to use during GC.
 849         // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
 850         runtime_lock(&runtime_sched);
 851         n = runtime_gomaxprocs;
 852         if(n > runtime_ncpu)
 853                 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
 854         if(n > MaxGcproc)
 855                 n = MaxGcproc;
 856         if(n > runtime_sched.nmidle+1) // one M is currently running
 857                 n = runtime_sched.nmidle+1;
 858         runtime_unlock(&runtime_sched);
 859         return n;
 860 }
 861
 862 static bool
 863 needaddgcproc(void)
 864 {
 865         int32 n;
 866
 867         runtime_lock(&runtime_sched);
 868         n = runtime_gomaxprocs;
 869         if(n > runtime_ncpu)
 870                 n = runtime_ncpu;
 871         if(n > MaxGcproc)
 872                 n = MaxGcproc;
 873         n -= runtime_sched.nmidle+1; // one M is currently running
 874         runtime_unlock(&runtime_sched);
 875         return n > 0;
 876 }
 877
 878 void
 879 runtime_helpgc(int32 nproc)
 880 {
 881         M *mp;
 882         int32 n, pos;
 883
 884         runtime_lock(&runtime_sched);
 885         pos = 0;
 886         for(n = 1; n < nproc; n++) {  // one M is currently running
 887                 if(runtime_allp[pos]->mcache == m->mcache)
 888                         pos++;
 889                 mp = mget();
 890                 if(mp == nil)
 891                         runtime_throw("runtime_gcprocs inconsistency");
 892                 mp->helpgc = n;
 893                 mp->mcache = runtime_allp[pos]->mcache;
 894                 pos++;
 895                 runtime_notewakeup(&mp->park);
 896         }
 897         runtime_unlock(&runtime_sched);
 898 }
 899
 900 // Similar to stoptheworld but best-effort and can be called several times.
 901 // There is no reverse operation, used during crashing.
 902 // This function must not lock any mutexes.
 903 void
 904 runtime_freezetheworld(void)
 905 {
 906         int32 i;
 907
 908         if(runtime_gomaxprocs == 1)
 909                 return;
 910         // stopwait and preemption requests can be lost
 911         // due to races with concurrently executing threads,
 912         // so try several times
 913         for(i = 0; i < 5; i++) {
 914                 // this should tell the scheduler to not start any new goroutines
 915                 runtime_sched.stopwait = 0x7fffffff;
 916                 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
 917                 // this should stop running goroutines
 918                 if(!preemptall())
 919                         break;  // no running goroutines
 920                 runtime_usleep(1000);
 921         }
 922         // to be sure
 923         runtime_usleep(1000);
 924         preemptall();
 925         runtime_usleep(1000);
 926 }
 927
 928 void
 929 runtime_stoptheworld(void)
 930 {
 931         int32 i;
 932         uint32 s;
 933         P *p;
 934         bool wait;
 935
 936         runtime_lock(&runtime_sched);
 937         runtime_sched.stopwait = runtime_gomaxprocs;
 938         runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
 939         preemptall();
 940         // stop current P
 941         m->p->status = Pgcstop;
 942         runtime_sched.stopwait--;
 943         // try to retake all P's in Psyscall status
 944         for(i = 0; i < runtime_gomaxprocs; i++) {
 945                 p = runtime_allp[i];
 946                 s = p->status;
 947                 if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop))
 948                         runtime_sched.stopwait--;
 949         }
 950         // stop idle P's
 951         while((p = pidleget()) != nil) {
 952                 p->status = Pgcstop;
 953                 runtime_sched.stopwait--;
 954         }
 955         wait = runtime_sched.stopwait > 0;
 956         runtime_unlock(&runtime_sched);
 957
 958         // wait for remaining P's to stop voluntarily
 959         if(wait) {
 960                 runtime_notesleep(&runtime_sched.stopnote);
 961                 runtime_noteclear(&runtime_sched.stopnote);
 962         }
 963         if(runtime_sched.stopwait)
 964                 runtime_throw("stoptheworld: not stopped");
 965         for(i = 0; i < runtime_gomaxprocs; i++) {
 966                 p = runtime_allp[i];
 967                 if(p->status != Pgcstop)
 968                         runtime_throw("stoptheworld: not stopped");
 969         }
 970 }
 971
 972 static void
 973 mhelpgc(void)
 974 {
 975         m->helpgc = -1;
 976 }
 977
 978 void
 979 runtime_starttheworld(void)
 980 {
 981         P *p, *p1;
 982         M *mp;
 983         G *gp;
 984         bool add;
 985
 986         m->locks++;  // disable preemption because it can be holding p in a local var
 987         gp = runtime_netpoll(false);  // non-blocking
 988         injectglist(gp);
 989         add = needaddgcproc();
 990         runtime_lock(&runtime_sched);
 991         if(newprocs) {
 992                 procresize(newprocs);
 993                 newprocs = 0;
 994         } else
 995                 procresize(runtime_gomaxprocs);
 996         runtime_sched.gcwaiting = 0;
 997
 998         p1 = nil;
 999         while((p = pidleget()) != nil) {
1000                 // procresize() puts p's with work at the beginning of the list.
1001                 // Once we reach a p without a run queue, the rest don't have one either.
1002                 if(p->runqhead == p->runqtail) {
1003                         pidleput(p);
1004                         break;
1005                 }
1006                 p->m = mget();
1007                 p->link = p1;
1008                 p1 = p;
1009         }
1010         if(runtime_sched.sysmonwait) {
1011                 runtime_sched.sysmonwait = false;
1012                 runtime_notewakeup(&runtime_sched.sysmonnote);
1013         }
1014         runtime_unlock(&runtime_sched);
1015
1016         while(p1) {
1017                 p = p1;
1018                 p1 = p1->link;
1019                 if(p->m) {
1020                         mp = p->m;
1021                         p->m = nil;
1022                         if(mp->nextp)
1023                                 runtime_throw("starttheworld: inconsistent mp->nextp");
1024                         mp->nextp = p;
1025                         runtime_notewakeup(&mp->park);
1026                 } else {
1027                         // Start M to run P.  Do not start another M below.
1028                         newm(nil, p);
1029                         add = false;
1030                 }
1031         }
1032
1033         if(add) {
1034                 // If GC could have used another helper proc, start one now,
1035                 // in the hope that it will be available next time.
1036                 // It would have been even better to start it before the collection,
1037                 // but doing so requires allocating memory, so it's tricky to
1038                 // coordinate.  This lazy approach works out in practice:
1039                 // we don't mind if the first couple gc rounds don't have quite
1040                 // the maximum number of procs.
1041                 newm(mhelpgc, nil);
1042         }
1043         m->locks--;
1044 }
1045
1046 // Called to start an M.
1047 void*
1048 runtime_mstart(void* mp)
1049 {
1050         m = (M*)mp;
1051         g = m->g0;
1052
1053         initcontext();
1054
1055         g->entry = nil;
1056         g->param = nil;
1057
1058         // Record top of stack for use by mcall.
1059         // Once we call schedule we're never coming back,
1060         // so other calls can reuse this stack space.
1061 #ifdef USING_SPLIT_STACK
1062         __splitstack_getcontext(&g->stack_context[0]);
1063 #else
1064         g->gcinitial_sp = &mp;
1065         // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
1066         // is the top of the stack, not the bottom.
1067         g->gcstack_size = 0;
1068         g->gcnext_sp = &mp;
1069 #endif
1070         getcontext(&g->context);
1071
1072         if(g->entry != nil) {
1073                 // Got here from mcall.
1074                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1075                 G* gp = (G*)g->param;
1076                 pfn(gp);
1077                 *(int*)0x21 = 0x21;
1078         }
1079         runtime_minit();
1080
1081 #ifdef USING_SPLIT_STACK
1082         {
1083                 int dont_block_signals = 0;
1084                 __splitstack_block_signals(&dont_block_signals, nil);
1085         }
1086 #endif
1087
1088         // Install signal handlers; after minit so that minit can
1089         // prepare the thread to be able to handle the signals.
1090         if(m == &runtime_m0) {
1091                 if(runtime_iscgo && !runtime_cgoHasExtraM) {
1092                         runtime_cgoHasExtraM = true;
1093                         runtime_newextram();
1094                         runtime_needextram = 0;
1095                 }
1096                 runtime_initsig(false);
1097         }
1098
1099         if(m->mstartfn)
1100                 m->mstartfn();
1101
1102         if(m->helpgc) {
1103                 m->helpgc = 0;
1104                 stopm();
1105         } else if(m != &runtime_m0) {
1106                 acquirep(m->nextp);
1107                 m->nextp = nil;
1108         }
1109         schedule();
1110
1111         // TODO(brainman): This point is never reached, because scheduler
1112         // does not release os threads at the moment. But once this path
1113         // is enabled, we must remove our seh here.
1114
1115         return nil;
1116 }
1117
1118 typedef struct CgoThreadStart CgoThreadStart;
1119 struct CgoThreadStart
1120 {
1121         M *m;
1122         G *g;
1123         uintptr *tls;
1124         void (*fn)(void);
1125 };
1126
1127 // Allocate a new m unassociated with any thread.
1128 // Can use p for allocation context if needed.
1129 M*
1130 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacksize)
1131 {
1132         M *mp;
1133
1134         m->locks++;  // disable GC because it can be called from sysmon
1135         if(m->p == nil)
1136                 acquirep(p);  // temporarily borrow p for mallocs in this function
1137 #if 0
1138         if(mtype == nil) {
1139                 Eface e;
1140                 runtime_gc_m_ptr(&e);
1141                 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1142         }
1143 #endif
1144
1145         mp = runtime_mal(sizeof *mp);
1146         mcommoninit(mp);
1147         mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1148
1149         if(p == m->p)
1150                 releasep();
1151         m->locks--;
1152
1153         return mp;
1154 }
1155
1156 static G*
1157 allocg(void)
1158 {
1159         G *gp;
1160         // static Type *gtype;
1161
1162         // if(gtype == nil) {
1163         //      Eface e;
1164         //      runtime_gc_g_ptr(&e);
1165         //      gtype = ((PtrType*)e.__type_descriptor)->__element_type;
1166         // }
1167         // gp = runtime_cnew(gtype);
1168         gp = runtime_malloc(sizeof(G));
1169         return gp;
1170 }
1171
1172 static M* lockextra(bool nilokay);
1173 static void unlockextra(M*);
1174
1175 // needm is called when a cgo callback happens on a
1176 // thread without an m (a thread not created by Go).
1177 // In this case, needm is expected to find an m to use
1178 // and return with m, g initialized correctly.
1179 // Since m and g are not set now (likely nil, but see below)
1180 // needm is limited in what routines it can call. In particular
1181 // it can only call nosplit functions (textflag 7) and cannot
1182 // do any scheduling that requires an m.
1183 //
1184 // In order to avoid needing heavy lifting here, we adopt
1185 // the following strategy: there is a stack of available m's
1186 // that can be stolen. Using compare-and-swap
1187 // to pop from the stack has ABA races, so we simulate
1188 // a lock by doing an exchange (via casp) to steal the stack
1189 // head and replace the top pointer with MLOCKED (1).
1190 // This serves as a simple spin lock that we can use even
1191 // without an m. The thread that locks the stack in this way
1192 // unlocks the stack by storing a valid stack head pointer.
1193 //
1194 // In order to make sure that there is always an m structure
1195 // available to be stolen, we maintain the invariant that there
1196 // is always one more than needed. At the beginning of the
1197 // program (if cgo is in use) the list is seeded with a single m.
1198 // If needm finds that it has taken the last m off the list, its job
1199 // is - once it has installed its own m so that it can do things like
1200 // allocate memory - to create a spare m and put it on the list.
1201 //
1202 // Each of these extra m's also has a g0 and a curg that are
1203 // pressed into service as the scheduling stack and current
1204 // goroutine for the duration of the cgo callback.
1205 //
1206 // When the callback is done with the m, it calls dropm to
1207 // put the m back on the list.
1208 //
1209 // Unlike the gc toolchain, we start running on curg, since we are
1210 // just going to return and let the caller continue.
1211 void
1212 runtime_needm(void)
1213 {
1214         M *mp;
1215
1216         if(runtime_needextram) {
1217                 // Can happen if C/C++ code calls Go from a global ctor.
1218                 // Can not throw, because scheduler is not initialized yet.
1219                 int rv __attribute__((unused));
1220                 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
1221                         sizeof("fatal error: cgo callback before cgo call\n")-1);
1222                 runtime_exit(1);
1223         }
1224
1225         // Lock extra list, take head, unlock popped list.
1226         // nilokay=false is safe here because of the invariant above,
1227         // that the extra list always contains or will soon contain
1228         // at least one m.
1229         mp = lockextra(false);
1230
1231         // Set needextram when we've just emptied the list,
1232         // so that the eventual call into cgocallbackg will
1233         // allocate a new m for the extra list. We delay the
1234         // allocation until then so that it can be done
1235         // after exitsyscall makes sure it is okay to be
1236         // running at all (that is, there's no garbage collection
1237         // running right now).
1238         mp->needextram = mp->schedlink == nil;
1239         unlockextra(mp->schedlink);
1240
1241         // Install m and g (= m->curg).
1242         runtime_setmg(mp, mp->curg);
1243
1244         // Initialize g's context as in mstart.
1245         initcontext();
1246         g->status = Gsyscall;
1247         g->entry = nil;
1248         g->param = nil;
1249 #ifdef USING_SPLIT_STACK
1250         __splitstack_getcontext(&g->stack_context[0]);
1251 #else
1252         g->gcinitial_sp = &mp;
1253         g->gcstack = nil;
1254         g->gcstack_size = 0;
1255         g->gcnext_sp = &mp;
1256 #endif
1257         getcontext(&g->context);
1258
1259         if(g->entry != nil) {
1260                 // Got here from mcall.
1261                 void (*pfn)(G*) = (void (*)(G*))g->entry;
1262                 G* gp = (G*)g->param;
1263                 pfn(gp);
1264                 *(int*)0x22 = 0x22;
1265         }
1266
1267         // Initialize this thread to use the m.
1268         runtime_minit();
1269
1270 #ifdef USING_SPLIT_STACK
1271         {
1272                 int dont_block_signals = 0;
1273                 __splitstack_block_signals(&dont_block_signals, nil);
1274         }
1275 #endif
1276 }
1277
1278 // newextram allocates an m and puts it on the extra list.
1279 // It is called with a working local m, so that it can do things
1280 // like call schedlock and allocate.
1281 void
1282 runtime_newextram(void)
1283 {
1284         M *mp, *mnext;
1285         G *gp;
1286         byte *g0_sp, *sp;
1287         size_t g0_spsize, spsize;
1288
1289         // Create extra goroutine locked to extra m.
1290         // The goroutine is the context in which the cgo callback will run.
1291         // The sched.pc will never be returned to, but setting it to
1292         // runtime.goexit makes clear to the traceback routines where
1293         // the goroutine stack ends.
1294         mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1295         gp = runtime_malg(StackMin, &sp, &spsize);
1296         gp->status = Gdead;
1297         mp->curg = gp;
1298         mp->locked = LockInternal;
1299         mp->lockedg = gp;
1300         gp->lockedm = mp;
1301         gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1302         // put on allg for garbage collector
1303         allgadd(gp);
1304
1305         // The context for gp will be set up in runtime_needm.  But
1306         // here we need to set up the context for g0.
1307         getcontext(&mp->g0->context);
1308         mp->g0->context.uc_stack.ss_sp = g0_sp;
1309         mp->g0->context.uc_stack.ss_size = g0_spsize;
1310         makecontext(&mp->g0->context, kickoff, 0);
1311
1312         // Add m to the extra list.
1313         mnext = lockextra(true);
1314         mp->schedlink = mnext;
1315         unlockextra(mp);
1316 }
1317
1318 // dropm is called when a cgo callback has called needm but is now
1319 // done with the callback and returning back into the non-Go thread.
1320 // It puts the current m back onto the extra list.
1321 //
1322 // The main expense here is the call to signalstack to release the
1323 // m's signal stack, and then the call to needm on the next callback
1324 // from this thread. It is tempting to try to save the m for next time,
1325 // which would eliminate both these costs, but there might not be
1326 // a next time: the current thread (which Go does not control) might exit.
1327 // If we saved the m for that thread, there would be an m leak each time
1328 // such a thread exited. Instead, we acquire and release an m on each
1329 // call. These should typically not be scheduling operations, just a few
1330 // atomics, so the cost should be small.
1331 //
1332 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1333 // variable using pthread_key_create. Unlike the pthread keys we already use
1334 // on OS X, this dummy key would never be read by Go code. It would exist
1335 // only so that we could register at thread-exit-time destructor.
1336 // That destructor would put the m back onto the extra list.
1337 // This is purely a performance optimization. The current version,
1338 // in which dropm happens on each cgo call, is still correct too.
1339 // We may have to keep the current version on systems with cgo
1340 // but without pthreads, like Windows.
1341 void
1342 runtime_dropm(void)
1343 {
1344         M *mp, *mnext;
1345
1346         // Undo whatever initialization minit did during needm.
1347         runtime_unminit();
1348
1349         // Clear m and g, and return m to the extra list.
1350         // After the call to setmg we can only call nosplit functions.
1351         mp = m;
1352         runtime_setmg(nil, nil);
1353
1354         mp->curg->status = Gdead;
1355         mp->curg->gcstack = nil;
1356         mp->curg->gcnext_sp = nil;
1357
1358         mnext = lockextra(true);
1359         mp->schedlink = mnext;
1360         unlockextra(mp);
1361 }
1362
1363 #define MLOCKED ((M*)1)
1364
1365 // lockextra locks the extra list and returns the list head.
1366 // The caller must unlock the list by storing a new list head
1367 // to runtime.extram. If nilokay is true, then lockextra will
1368 // return a nil list head if that's what it finds. If nilokay is false,
1369 // lockextra will keep waiting until the list head is no longer nil.
1370 static M*
1371 lockextra(bool nilokay)
1372 {
1373         M *mp;
1374         void (*yield)(void);
1375
1376         for(;;) {
1377                 mp = runtime_atomicloadp(&runtime_extram);
1378                 if(mp == MLOCKED) {
1379                         yield = runtime_osyield;
1380                         yield();
1381                         continue;
1382                 }
1383                 if(mp == nil && !nilokay) {
1384                         runtime_usleep(1);
1385                         continue;
1386                 }
1387                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1388                         yield = runtime_osyield;
1389                         yield();
1390                         continue;
1391                 }
1392                 break;
1393         }
1394         return mp;
1395 }
1396
1397 static void
1398 unlockextra(M *mp)
1399 {
1400         runtime_atomicstorep(&runtime_extram, mp);
1401 }
1402
1403 static int32
1404 countextra()
1405 {
1406         M *mp, *mc;
1407         int32 c;
1408
1409         for(;;) {
1410                 mp = runtime_atomicloadp(&runtime_extram);
1411                 if(mp == MLOCKED) {
1412                         runtime_osyield();
1413                         continue;
1414                 }
1415                 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1416                         runtime_osyield();
1417                         continue;
1418                 }
1419                 c = 0;
1420                 for(mc = mp; mc != nil; mc = mc->schedlink)
1421                         c++;
1422                 runtime_atomicstorep(&runtime_extram, mp);
1423                 return c;
1424         }
1425 }
1426
1427 // Create a new m.  It will start off with a call to fn, or else the scheduler.
1428 static void
1429 newm(void(*fn)(void), P *p)
1430 {
1431         M *mp;
1432
1433         mp = runtime_allocm(p, -1, nil, nil);
1434         mp->nextp = p;
1435         mp->mstartfn = fn;
1436
1437         runtime_newosproc(mp);
1438 }
1439
1440 // Stops execution of the current m until new work is available.
1441 // Returns with acquired P.
1442 static void
1443 stopm(void)
1444 {
1445         if(m->locks)
1446                 runtime_throw("stopm holding locks");
1447         if(m->p)
1448                 runtime_throw("stopm holding p");
1449         if(m->spinning) {
1450                 m->spinning = false;
1451                 runtime_xadd(&runtime_sched.nmspinning, -1);
1452         }
1453
1454 retry:
1455         runtime_lock(&runtime_sched);
1456         mput(m);
1457         runtime_unlock(&runtime_sched);
1458         runtime_notesleep(&m->park);
1459         runtime_noteclear(&m->park);
1460         if(m->helpgc) {
1461                 runtime_gchelper();
1462                 m->helpgc = 0;
1463                 m->mcache = nil;
1464                 goto retry;
1465         }
1466         acquirep(m->nextp);
1467         m->nextp = nil;
1468 }
1469
1470 static void
1471 mspinning(void)
1472 {
1473         m->spinning = true;
1474 }
1475
1476 // Schedules some M to run the p (creates an M if necessary).
1477 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1478 static void
1479 startm(P *p, bool spinning)
1480 {
1481         M *mp;
1482         void (*fn)(void);
1483
1484         runtime_lock(&runtime_sched);
1485         if(p == nil) {
1486                 p = pidleget();
1487                 if(p == nil) {
1488                         runtime_unlock(&runtime_sched);
1489                         if(spinning)
1490                                 runtime_xadd(&runtime_sched.nmspinning, -1);
1491                         return;
1492                 }
1493         }
1494         mp = mget();
1495         runtime_unlock(&runtime_sched);
1496         if(mp == nil) {
1497                 fn = nil;
1498                 if(spinning)
1499                         fn = mspinning;
1500                 newm(fn, p);
1501                 return;
1502         }
1503         if(mp->spinning)
1504                 runtime_throw("startm: m is spinning");
1505         if(mp->nextp)
1506                 runtime_throw("startm: m has p");
1507         mp->spinning = spinning;
1508         mp->nextp = p;
1509         runtime_notewakeup(&mp->park);
1510 }
1511
1512 // Hands off P from syscall or locked M.
1513 static void
1514 handoffp(P *p)
1515 {
1516         // if it has local work, start it straight away
1517         if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
1518                 startm(p, false);
1519                 return;
1520         }
1521         // no local work, check that there are no spinning/idle M's,
1522         // otherwise our help is not required
1523         if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 &&  // TODO: fast atomic
1524                 runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
1525                 startm(p, true);
1526                 return;
1527         }
1528         runtime_lock(&runtime_sched);
1529         if(runtime_sched.gcwaiting) {
1530                 p->status = Pgcstop;
1531                 if(--runtime_sched.stopwait == 0)
1532                         runtime_notewakeup(&runtime_sched.stopnote);
1533                 runtime_unlock(&runtime_sched);
1534                 return;
1535         }
1536         if(runtime_sched.runqsize) {
1537                 runtime_unlock(&runtime_sched);
1538                 startm(p, false);
1539                 return;
1540         }
1541         // If this is the last running P and nobody is polling network,
1542         // need to wakeup another M to poll network.
1543         if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
1544                 runtime_unlock(&runtime_sched);
1545                 startm(p, false);
1546                 return;
1547         }
1548         pidleput(p);
1549         runtime_unlock(&runtime_sched);
1550 }
1551
1552 // Tries to add one more P to execute G's.
1553 // Called when a G is made runnable (newproc, ready).
1554 static void
1555 wakep(void)
1556 {
1557         // be conservative about spinning threads
1558         if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
1559                 return;
1560         startm(nil, true);
1561 }
1562
1563 // Stops execution of the current m that is locked to a g until the g is runnable again.
1564 // Returns with acquired P.
1565 static void
1566 stoplockedm(void)
1567 {
1568         P *p;
1569
1570         if(m->lockedg == nil || m->lockedg->lockedm != m)
1571                 runtime_throw("stoplockedm: inconsistent locking");
1572         if(m->p) {
1573                 // Schedule another M to run this p.
1574                 p = releasep();
1575                 handoffp(p);
1576         }
1577         incidlelocked(1);
1578         // Wait until another thread schedules lockedg again.
1579         runtime_notesleep(&m->park);
1580         runtime_noteclear(&m->park);
1581         if(m->lockedg->status != Grunnable)
1582                 runtime_throw("stoplockedm: not runnable");
1583         acquirep(m->nextp);
1584         m->nextp = nil;
1585 }
1586
1587 // Schedules the locked m to run the locked gp.
1588 static void
1589 startlockedm(G *gp)
1590 {
1591         M *mp;
1592         P *p;
1593
1594         mp = gp->lockedm;
1595         if(mp == m)
1596                 runtime_throw("startlockedm: locked to me");
1597         if(mp->nextp)
1598                 runtime_throw("startlockedm: m has p");
1599         // directly handoff current P to the locked m
1600         incidlelocked(-1);
1601         p = releasep();
1602         mp->nextp = p;
1603         runtime_notewakeup(&mp->park);
1604         stopm();
1605 }
1606
1607 // Stops the current m for stoptheworld.
1608 // Returns when the world is restarted.
1609 static void
1610 gcstopm(void)
1611 {
1612         P *p;
1613
1614         if(!runtime_sched.gcwaiting)
1615                 runtime_throw("gcstopm: not waiting for gc");
1616         if(m->spinning) {
1617                 m->spinning = false;
1618                 runtime_xadd(&runtime_sched.nmspinning, -1);
1619         }
1620         p = releasep();
1621         runtime_lock(&runtime_sched);
1622         p->status = Pgcstop;
1623         if(--runtime_sched.stopwait == 0)
1624                 runtime_notewakeup(&runtime_sched.stopnote);
1625         runtime_unlock(&runtime_sched);
1626         stopm();
1627 }
1628
1629 // Schedules gp to run on the current M.
1630 // Never returns.
1631 static void
1632 execute(G *gp)
1633 {
1634         int32 hz;
1635
1636         if(gp->status != Grunnable) {
1637                 runtime_printf("execute: bad g status %d\n", gp->status);
1638                 runtime_throw("execute: bad g status");
1639         }
1640         gp->status = Grunning;
1641         gp->waitsince = 0;
1642         m->p->schedtick++;
1643         m->curg = gp;
1644         gp->m = m;
1645
1646         // Check whether the profiler needs to be turned on or off.
1647         hz = runtime_sched.profilehz;
1648         if(m->profilehz != hz)
1649                 runtime_resetcpuprofiler(hz);
1650
1651         runtime_gogo(gp);
1652 }
1653
1654 // Finds a runnable goroutine to execute.
1655 // Tries to steal from other P's, get g from global queue, poll network.
1656 static G*
1657 findrunnable(void)
1658 {
1659         G *gp;
1660         P *p;
1661         int32 i;
1662
1663 top:
1664         if(runtime_sched.gcwaiting) {
1665                 gcstopm();
1666                 goto top;
1667         }
1668         if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1669                 runtime_ready(gp);
1670         // local runq
1671         gp = runqget(m->p);
1672         if(gp)
1673                 return gp;
1674         // global runq
1675         if(runtime_sched.runqsize) {
1676                 runtime_lock(&runtime_sched);
1677                 gp = globrunqget(m->p, 0);
1678                 runtime_unlock(&runtime_sched);
1679                 if(gp)
1680                         return gp;
1681         }
1682         // poll network
1683         gp = runtime_netpoll(false);  // non-blocking
1684         if(gp) {
1685                 injectglist(gp->schedlink);
1686                 gp->status = Grunnable;
1687                 return gp;
1688         }
1689         // If number of spinning M's >= number of busy P's, block.
1690         // This is necessary to prevent excessive CPU consumption
1691         // when GOMAXPROCS>>1 but the program parallelism is low.
1692         if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle))  // TODO: fast atomic
1693                 goto stop;
1694         if(!m->spinning) {
1695                 m->spinning = true;
1696                 runtime_xadd(&runtime_sched.nmspinning, 1);
1697         }
1698         // random steal from other P's
1699         for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1700                 if(runtime_sched.gcwaiting)
1701                         goto top;
1702                 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1703                 if(p == m->p)
1704                         gp = runqget(p);
1705                 else
1706                         gp = runqsteal(m->p, p);
1707                 if(gp)
1708                         return gp;
1709         }
1710 stop:
1711         // return P and block
1712         runtime_lock(&runtime_sched);
1713         if(runtime_sched.gcwaiting) {
1714                 runtime_unlock(&runtime_sched);
1715                 goto top;
1716         }
1717         if(runtime_sched.runqsize) {
1718                 gp = globrunqget(m->p, 0);
1719                 runtime_unlock(&runtime_sched);
1720                 return gp;
1721         }
1722         p = releasep();
1723         pidleput(p);
1724         runtime_unlock(&runtime_sched);
1725         if(m->spinning) {
1726                 m->spinning = false;
1727                 runtime_xadd(&runtime_sched.nmspinning, -1);
1728         }
1729         // check all runqueues once again
1730         for(i = 0; i < runtime_gomaxprocs; i++) {
1731                 p = runtime_allp[i];
1732                 if(p && p->runqhead != p->runqtail) {
1733                         runtime_lock(&runtime_sched);
1734                         p = pidleget();
1735                         runtime_unlock(&runtime_sched);
1736                         if(p) {
1737                                 acquirep(p);
1738                                 goto top;
1739                         }
1740                         break;
1741                 }
1742         }
1743         // poll network
1744         if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
1745                 if(m->p)
1746                         runtime_throw("findrunnable: netpoll with p");
1747                 if(m->spinning)
1748                         runtime_throw("findrunnable: netpoll with spinning");
1749                 gp = runtime_netpoll(true);  // block until new work is available
1750                 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
1751                 if(gp) {
1752                         runtime_lock(&runtime_sched);
1753                         p = pidleget();
1754                         runtime_unlock(&runtime_sched);
1755                         if(p) {
1756                                 acquirep(p);
1757                                 injectglist(gp->schedlink);
1758                                 gp->status = Grunnable;
1759                                 return gp;
1760                         }
1761                         injectglist(gp);
1762                 }
1763         }
1764         stopm();
1765         goto top;
1766 }
1767
1768 static void
1769 resetspinning(void)
1770 {
1771         int32 nmspinning;
1772
1773         if(m->spinning) {
1774                 m->spinning = false;
1775                 nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
1776                 if(nmspinning < 0)
1777                         runtime_throw("findrunnable: negative nmspinning");
1778         } else
1779                 nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
1780
1781         // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1782         // so see if we need to wakeup another P here.
1783         if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
1784                 wakep();
1785 }
1786
1787 // Injects the list of runnable G's into the scheduler.
1788 // Can run concurrently with GC.
1789 static void
1790 injectglist(G *glist)
1791 {
1792         int32 n;
1793         G *gp;
1794
1795         if(glist == nil)
1796                 return;
1797         runtime_lock(&runtime_sched);
1798         for(n = 0; glist; n++) {
1799                 gp = glist;
1800                 glist = gp->schedlink;
1801                 gp->status = Grunnable;
1802                 globrunqput(gp);
1803         }
1804         runtime_unlock(&runtime_sched);
1805
1806         for(; n && runtime_sched.npidle; n--)
1807                 startm(nil, false);
1808 }
1809
1810 // One round of scheduler: find a runnable goroutine and execute it.
1811 // Never returns.
1812 static void
1813 schedule(void)
1814 {
1815         G *gp;
1816         uint32 tick;
1817
1818         if(m->locks)
1819                 runtime_throw("schedule: holding locks");
1820
1821 top:
1822         if(runtime_sched.gcwaiting) {
1823                 gcstopm();
1824                 goto top;
1825         }
1826
1827         gp = nil;
1828         // Check the global runnable queue once in a while to ensure fairness.
1829         // Otherwise two goroutines can completely occupy the local runqueue
1830         // by constantly respawning each other.
1831         tick = m->p->schedtick;
1832         // This is a fancy way to say tick%61==0,
1833         // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1834         if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
1835                 runtime_lock(&runtime_sched);
1836                 gp = globrunqget(m->p, 1);
1837                 runtime_unlock(&runtime_sched);
1838                 if(gp)
1839                         resetspinning();
1840         }
1841         if(gp == nil) {
1842                 gp = runqget(m->p);
1843                 if(gp && m->spinning)
1844                         runtime_throw("schedule: spinning with local work");
1845         }
1846         if(gp == nil) {
1847                 gp = findrunnable();  // blocks until work is available
1848                 resetspinning();
1849         }
1850
1851         if(gp->lockedm) {
1852                 // Hands off own p to the locked m,
1853                 // then blocks waiting for a new p.
1854                 startlockedm(gp);
1855                 goto top;
1856         }
1857
1858         execute(gp);
1859 }
1860
1861 // Puts the current goroutine into a waiting state and calls unlockf.
1862 // If unlockf returns false, the goroutine is resumed.
1863 void
1864 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1865 {
1866         if(g->status != Grunning)
1867                 runtime_throw("bad g status");
1868         m->waitlock = lock;
1869         m->waitunlockf = unlockf;
1870         g->waitreason = reason;
1871         runtime_mcall(park0);
1872 }
1873
1874 static bool
1875 parkunlock(G *gp, void *lock)
1876 {
1877         USED(gp);
1878         runtime_unlock(lock);
1879         return true;
1880 }
1881
1882 // Puts the current goroutine into a waiting state and unlocks the lock.
1883 // The goroutine can be made runnable again by calling runtime_ready(gp).
1884 void
1885 runtime_parkunlock(Lock *lock, const char *reason)
1886 {
1887         runtime_park(parkunlock, lock, reason);
1888 }
1889
1890 // runtime_park continuation on g0.
1891 static void
1892 park0(G *gp)
1893 {
1894         bool ok;
1895
1896         gp->status = Gwaiting;
1897         gp->m = nil;
1898         m->curg = nil;
1899         if(m->waitunlockf) {
1900                 ok = m->waitunlockf(gp, m->waitlock);
1901                 m->waitunlockf = nil;
1902                 m->waitlock = nil;
1903                 if(!ok) {
1904                         gp->status = Grunnable;
1905                         execute(gp);  // Schedule it back, never returns.
1906                 }
1907         }
1908         if(m->lockedg) {
1909                 stoplockedm();
1910                 execute(gp);  // Never returns.
1911         }
1912         schedule();
1913 }
1914
1915 // Scheduler yield.
1916 void
1917 runtime_gosched(void)
1918 {
1919         if(g->status != Grunning)
1920                 runtime_throw("bad g status");
1921         runtime_mcall(runtime_gosched0);
1922 }
1923
1924 // runtime_gosched continuation on g0.
1925 void
1926 runtime_gosched0(G *gp)
1927 {
1928         gp->status = Grunnable;
1929         gp->m = nil;
1930         m->curg = nil;
1931         runtime_lock(&runtime_sched);
1932         globrunqput(gp);
1933         runtime_unlock(&runtime_sched);
1934         if(m->lockedg) {
1935                 stoplockedm();
1936                 execute(gp);  // Never returns.
1937         }
1938         schedule();
1939 }
1940
1941 // Finishes execution of the current goroutine.
1942 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1943 // Since it does not return it does not matter.  But if it is preempted
1944 // at the split stack check, GC will complain about inconsistent sp.
1945 void runtime_goexit(void) __attribute__ ((noinline));
1946 void
1947 runtime_goexit(void)
1948 {
1949         if(g->status != Grunning)
1950                 runtime_throw("bad g status");
1951         runtime_mcall(goexit0);
1952 }
1953
1954 // runtime_goexit continuation on g0.
1955 static void
1956 goexit0(G *gp)
1957 {
1958         gp->status = Gdead;
1959         gp->entry = nil;
1960         gp->m = nil;
1961         gp->lockedm = nil;
1962         gp->paniconfault = 0;
1963         gp->defer = nil; // should be true already but just in case.
1964         gp->panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
1965         gp->writenbuf = 0;
1966         gp->writebuf = nil;
1967         gp->waitreason = nil;
1968         gp->param = nil;
1969         m->curg = nil;
1970         m->lockedg = nil;
1971         if(m->locked & ~LockExternal) {
1972                 runtime_printf("invalid m->locked = %d\n", m->locked);
1973                 runtime_throw("internal lockOSThread error");
1974         }
1975         m->locked = 0;
1976         gfput(m->p, gp);
1977         schedule();
1978 }
1979
1980 // The goroutine g is about to enter a system call.
1981 // Record that it's not using the cpu anymore.
1982 // This is called only from the go syscall library and cgocall,
1983 // not from the low-level system calls used by the runtime.
1984 //
1985 // Entersyscall cannot split the stack: the runtime_gosave must
1986 // make g->sched refer to the caller's stack segment, because
1987 // entersyscall is going to return immediately after.
1988
1989 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1990 static void doentersyscall(void) __attribute__ ((no_split_stack, noinline));
1991
1992 void
1993 runtime_entersyscall()
1994 {
1995         // Save the registers in the g structure so that any pointers
1996         // held in registers will be seen by the garbage collector.
1997         getcontext(&g->gcregs);
1998
1999         // Do the work in a separate function, so that this function
2000         // doesn't save any registers on its own stack.  If this
2001         // function does save any registers, we might store the wrong
2002         // value in the call to getcontext.
2003         //
2004         // FIXME: This assumes that we do not need to save any
2005         // callee-saved registers to access the TLS variable g.  We
2006         // don't want to put the ucontext_t on the stack because it is
2007         // large and we can not split the stack here.
2008         doentersyscall();
2009 }
2010
2011 static void
2012 doentersyscall()
2013 {
2014         // Disable preemption because during this function g is in Gsyscall status,
2015         // but can have inconsistent g->sched, do not let GC observe it.
2016         m->locks++;
2017
2018         // Leave SP around for GC and traceback.
2019 #ifdef USING_SPLIT_STACK
2020         g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
2021                                        &g->gcnext_segment, &g->gcnext_sp,
2022                                        &g->gcinitial_sp);
2023 #else
2024         {
2025                 void *v;
2026
2027                 g->gcnext_sp = (byte *) &v;
2028         }
2029 #endif
2030
2031         g->status = Gsyscall;
2032
2033         if(runtime_atomicload(&runtime_sched.sysmonwait)) {  // TODO: fast atomic
2034                 runtime_lock(&runtime_sched);
2035                 if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2036                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2037                         runtime_notewakeup(&runtime_sched.sysmonnote);
2038                 }
2039                 runtime_unlock(&runtime_sched);
2040         }
2041
2042         m->mcache = nil;
2043         m->p->m = nil;
2044         runtime_atomicstore(&m->p->status, Psyscall);
2045         if(runtime_atomicload(&runtime_sched.gcwaiting)) {
2046                 runtime_lock(&runtime_sched);
2047                 if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) {
2048                         if(--runtime_sched.stopwait == 0)
2049                                 runtime_notewakeup(&runtime_sched.stopnote);
2050                 }
2051                 runtime_unlock(&runtime_sched);
2052         }
2053
2054         m->locks--;
2055 }
2056
2057 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
2058 void
2059 runtime_entersyscallblock(void)
2060 {
2061         P *p;
2062
2063         m->locks++;  // see comment in entersyscall
2064
2065         // Leave SP around for GC and traceback.
2066 #ifdef USING_SPLIT_STACK
2067         g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
2068                                        &g->gcnext_segment, &g->gcnext_sp,
2069                                        &g->gcinitial_sp);
2070 #else
2071         g->gcnext_sp = (byte *) &p;
2072 #endif
2073
2074         // Save the registers in the g structure so that any pointers
2075         // held in registers will be seen by the garbage collector.
2076         getcontext(&g->gcregs);
2077
2078         g->status = Gsyscall;
2079
2080         p = releasep();
2081         handoffp(p);
2082         if(g->isbackground)  // do not consider blocked scavenger for deadlock detection
2083                 incidlelocked(1);
2084
2085         m->locks--;
2086 }
2087
2088 // The goroutine g exited its system call.
2089 // Arrange for it to run on a cpu again.
2090 // This is called only from the go syscall library, not
2091 // from the low-level system calls used by the runtime.
2092 void
2093 runtime_exitsyscall(void)
2094 {
2095         G *gp;
2096
2097         m->locks++;  // see comment in entersyscall
2098
2099         gp = g;
2100         if(gp->isbackground)  // do not consider blocked scavenger for deadlock detection
2101                 incidlelocked(-1);
2102
2103         g->waitsince = 0;
2104         if(exitsyscallfast()) {
2105                 // There's a cpu for us, so we can run.
2106                 m->p->syscalltick++;
2107                 gp->status = Grunning;
2108                 // Garbage collector isn't running (since we are),
2109                 // so okay to clear gcstack and gcsp.
2110 #ifdef USING_SPLIT_STACK
2111                 gp->gcstack = nil;
2112 #endif
2113                 gp->gcnext_sp = nil;
2114                 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
2115                 m->locks--;
2116                 return;
2117         }
2118
2119         m->locks--;
2120
2121         // Call the scheduler.
2122         runtime_mcall(exitsyscall0);
2123
2124         // Scheduler returned, so we're allowed to run now.
2125         // Delete the gcstack information that we left for
2126         // the garbage collector during the system call.
2127         // Must wait until now because until gosched returns
2128         // we don't know for sure that the garbage collector
2129         // is not running.
2130 #ifdef USING_SPLIT_STACK
2131         gp->gcstack = nil;
2132 #endif
2133         gp->gcnext_sp = nil;
2134         runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
2135
2136         // Don't refer to m again, we might be running on a different
2137         // thread after returning from runtime_mcall.
2138         runtime_m()->p->syscalltick++;
2139 }
2140
2141 static bool
2142 exitsyscallfast(void)
2143 {
2144         P *p;
2145
2146         // Freezetheworld sets stopwait but does not retake P's.
2147         if(runtime_sched.stopwait) {
2148                 m->p = nil;
2149                 return false;
2150         }
2151
2152         // Try to re-acquire the last P.
2153         if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) {
2154                 // There's a cpu for us, so we can run.
2155                 m->mcache = m->p->mcache;
2156                 m->p->m = m;
2157                 return true;
2158         }
2159         // Try to get any other idle P.
2160         m->p = nil;
2161         if(runtime_sched.pidle) {
2162                 runtime_lock(&runtime_sched);
2163                 p = pidleget();
2164                 if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
2165                         runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2166                         runtime_notewakeup(&runtime_sched.sysmonnote);
2167                 }
2168                 runtime_unlock(&runtime_sched);
2169                 if(p) {
2170                         acquirep(p);
2171                         return true;
2172                 }
2173         }
2174         return false;
2175 }
2176
2177 // runtime_exitsyscall slow path on g0.
2178 // Failed to acquire P, enqueue gp as runnable.
2179 static void
2180 exitsyscall0(G *gp)
2181 {
2182         P *p;
2183
2184         gp->status = Grunnable;
2185         gp->m = nil;
2186         m->curg = nil;
2187         runtime_lock(&runtime_sched);
2188         p = pidleget();
2189         if(p == nil)
2190                 globrunqput(gp);
2191         else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2192                 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2193                 runtime_notewakeup(&runtime_sched.sysmonnote);
2194         }
2195         runtime_unlock(&runtime_sched);
2196         if(p) {
2197                 acquirep(p);
2198                 execute(gp);  // Never returns.
2199         }
2200         if(m->lockedg) {
2201                 // Wait until another thread schedules gp and so m again.
2202                 stoplockedm();
2203                 execute(gp);  // Never returns.
2204         }
2205         stopm();
2206         schedule();  // Never returns.
2207 }
2208
2209 // Called from syscall package before fork.
2210 void syscall_runtime_BeforeFork(void)
2211   __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2212 void
2213 syscall_runtime_BeforeFork(void)
2214 {
2215         // Fork can hang if preempted with signals frequently enough (see issue 5517).
2216         // Ensure that we stay on the same M where we disable profiling.
2217         runtime_m()->locks++;
2218         if(runtime_m()->profilehz != 0)
2219                 runtime_resetcpuprofiler(0);
2220 }
2221
2222 // Called from syscall package after fork in parent.
2223 void syscall_runtime_AfterFork(void)
2224   __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2225 void
2226 syscall_runtime_AfterFork(void)
2227 {
2228         int32 hz;
2229
2230         hz = runtime_sched.profilehz;
2231         if(hz != 0)
2232                 runtime_resetcpuprofiler(hz);
2233         runtime_m()->locks--;
2234 }
2235
2236 // Allocate a new g, with a stack big enough for stacksize bytes.
2237 G*
2238 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
2239 {
2240         G *newg;
2241
2242         newg = allocg();
2243         if(stacksize >= 0) {
2244 #if USING_SPLIT_STACK
2245                 int dont_block_signals = 0;
2246
2247                 *ret_stack = __splitstack_makecontext(stacksize,
2248                                                       &newg->stack_context[0],
2249                                                       ret_stacksize);
2250                 __splitstack_block_signals_context(&newg->stack_context[0],
2251                                                    &dont_block_signals, nil);
2252 #else
2253                 // In 64-bit mode, the maximum Go allocation space is
2254                 // 128G.  Our stack size is 4M, which only permits 32K
2255                 // goroutines.  In order to not limit ourselves,
2256                 // allocate the stacks out of separate memory.  In
2257                 // 32-bit mode, the Go allocation space is all of
2258                 // memory anyhow.
2259                 if(sizeof(void*) == 8) {
2260                         void *p = runtime_SysAlloc(stacksize, &mstats.other_sys);
2261                         if(p == nil)
2262                                 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2263                         *ret_stack = (byte*)p;
2264                 } else {
2265                         *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2266                         runtime_xadd(&runtime_stacks_sys, stacksize);
2267                 }
2268                 *ret_stacksize = stacksize;
2269                 newg->gcinitial_sp = *ret_stack;
2270                 newg->gcstack_size = (size_t)stacksize;
2271 #endif
2272         }
2273         return newg;
2274 }
2275
2276 /* For runtime package testing.  */
2277
2278
2279 // Create a new g running fn with siz bytes of arguments.
2280 // Put it on the queue of g's waiting to run.
2281 // The compiler turns a go statement into a call to this.
2282 // Cannot split the stack because it assumes that the arguments
2283 // are available sequentially after &fn; they would not be
2284 // copied if a stack split occurred.  It's OK for this to call
2285 // functions that split the stack.
2286 void runtime_testing_entersyscall(int32)
2287   __asm__ (GOSYM_PREFIX "runtime.entersyscall");
2288 void
2289 runtime_testing_entersyscall(int32 dummy __attribute__ ((unused)))
2290 {
2291         runtime_entersyscall();
2292 }
2293
2294 void runtime_testing_exitsyscall(int32)
2295   __asm__ (GOSYM_PREFIX "runtime.exitsyscall");
2296
2297 void
2298 runtime_testing_exitsyscall(int32 dummy __attribute__ ((unused)))
2299 {
2300         runtime_exitsyscall();
2301 }
2302
2303 G*
2304 __go_go(void (*fn)(void*), void* arg)
2305 {
2306         byte *sp;
2307         size_t spsize;
2308         G *newg;
2309         P *p;
2310
2311 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2312         if(fn == nil) {
2313                 m->throwing = -1;  // do not dump full stacks
2314                 runtime_throw("go of nil func value");
2315         }
2316         m->locks++;  // disable preemption because it can be holding p in a local var
2317
2318         p = m->p;
2319         if((newg = gfget(p)) != nil) {
2320 #ifdef USING_SPLIT_STACK
2321                 int dont_block_signals = 0;
2322
2323                 sp = __splitstack_resetcontext(&newg->stack_context[0],
2324                                                &spsize);
2325                 __splitstack_block_signals_context(&newg->stack_context[0],
2326                                                    &dont_block_signals, nil);
2327 #else
2328                 sp = newg->gcinitial_sp;
2329                 spsize = newg->gcstack_size;
2330                 if(spsize == 0)
2331                         runtime_throw("bad spsize in __go_go");
2332                 newg->gcnext_sp = sp;
2333 #endif
2334         } else {
2335                 newg = runtime_malg(StackMin, &sp, &spsize);
2336                 allgadd(newg);
2337         }
2338
2339         newg->entry = (byte*)fn;
2340         newg->param = arg;
2341         newg->gopc = (uintptr)__builtin_return_address(0);
2342         newg->status = Grunnable;
2343         if(p->goidcache == p->goidcacheend) {
2344                 p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch);
2345                 p->goidcacheend = p->goidcache + GoidCacheBatch;
2346         }
2347         newg->goid = p->goidcache++;
2348
2349         {
2350                 // Avoid warnings about variables clobbered by
2351                 // longjmp.
2352                 byte * volatile vsp = sp;
2353                 size_t volatile vspsize = spsize;
2354                 G * volatile vnewg = newg;
2355
2356                 getcontext(&vnewg->context);
2357                 vnewg->context.uc_stack.ss_sp = vsp;
2358 #ifdef MAKECONTEXT_STACK_TOP
2359                 vnewg->context.uc_stack.ss_sp += vspsize;
2360 #endif
2361                 vnewg->context.uc_stack.ss_size = vspsize;
2362                 makecontext(&vnewg->context, kickoff, 0);
2363
2364                 runqput(p, vnewg);
2365
2366                 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main)  // TODO: fast atomic
2367                         wakep();
2368                 m->locks--;
2369                 return vnewg;
2370         }
2371 }
2372
2373 static void
2374 allgadd(G *gp)
2375 {
2376         G **new;
2377         uintptr cap;
2378
2379         runtime_lock(&allglock);
2380         if(runtime_allglen >= allgcap) {
2381                 cap = 4096/sizeof(new[0]);
2382                 if(cap < 2*allgcap)
2383                         cap = 2*allgcap;
2384                 new = runtime_malloc(cap*sizeof(new[0]));
2385                 if(new == nil)
2386                         runtime_throw("runtime: cannot allocate memory");
2387                 if(runtime_allg != nil) {
2388                         runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
2389                         runtime_free(runtime_allg);
2390                 }
2391                 runtime_allg = new;
2392                 allgcap = cap;
2393         }
2394         runtime_allg[runtime_allglen++] = gp;
2395         runtime_unlock(&allglock);
2396 }
2397
2398 // Put on gfree list.
2399 // If local list is too long, transfer a batch to the global list.
2400 static void
2401 gfput(P *p, G *gp)
2402 {
2403         gp->schedlink = p->gfree;
2404         p->gfree = gp;
2405         p->gfreecnt++;
2406         if(p->gfreecnt >= 64) {
2407                 runtime_lock(&runtime_sched.gflock);
2408                 while(p->gfreecnt >= 32) {
2409                         p->gfreecnt--;
2410                         gp = p->gfree;
2411                         p->gfree = gp->schedlink;
2412                         gp->schedlink = runtime_sched.gfree;
2413                         runtime_sched.gfree = gp;
2414                 }
2415                 runtime_unlock(&runtime_sched.gflock);
2416         }
2417 }
2418
2419 // Get from gfree list.
2420 // If local list is empty, grab a batch from global list.
2421 static G*
2422 gfget(P *p)
2423 {
2424         G *gp;
2425
2426 retry:
2427         gp = p->gfree;
2428         if(gp == nil && runtime_sched.gfree) {
2429                 runtime_lock(&runtime_sched.gflock);
2430                 while(p->gfreecnt < 32 && runtime_sched.gfree) {
2431                         p->gfreecnt++;
2432                         gp = runtime_sched.gfree;
2433                         runtime_sched.gfree = gp->schedlink;
2434                         gp->schedlink = p->gfree;
2435                         p->gfree = gp;
2436                 }
2437                 runtime_unlock(&runtime_sched.gflock);
2438                 goto retry;
2439         }
2440         if(gp) {
2441                 p->gfree = gp->schedlink;
2442                 p->gfreecnt--;
2443         }
2444         return gp;
2445 }
2446
2447 // Purge all cached G's from gfree list to the global list.
2448 static void
2449 gfpurge(P *p)
2450 {
2451         G *gp;
2452
2453         runtime_lock(&runtime_sched.gflock);
2454         while(p->gfreecnt) {
2455                 p->gfreecnt--;
2456                 gp = p->gfree;
2457                 p->gfree = gp->schedlink;
2458                 gp->schedlink = runtime_sched.gfree;
2459                 runtime_sched.gfree = gp;
2460         }
2461         runtime_unlock(&runtime_sched.gflock);
2462 }
2463
2464 void
2465 runtime_Breakpoint(void)
2466 {
2467         runtime_breakpoint();
2468 }
2469
2470 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2471
2472 void
2473 runtime_Gosched(void)
2474 {
2475         runtime_gosched();
2476 }
2477
2478 // Implementation of runtime.GOMAXPROCS.
2479 // delete when scheduler is even stronger
2480 int32
2481 runtime_gomaxprocsfunc(int32 n)
2482 {
2483         int32 ret;
2484
2485         if(n > MaxGomaxprocs)
2486                 n = MaxGomaxprocs;
2487         runtime_lock(&runtime_sched);
2488         ret = runtime_gomaxprocs;
2489         if(n <= 0 || n == ret) {
2490                 runtime_unlock(&runtime_sched);
2491                 return ret;
2492         }
2493         runtime_unlock(&runtime_sched);
2494
2495         runtime_semacquire(&runtime_worldsema, false);
2496         m->gcing = 1;
2497         runtime_stoptheworld();
2498         newprocs = n;
2499         m->gcing = 0;
2500         runtime_semrelease(&runtime_worldsema);
2501         runtime_starttheworld();
2502
2503         return ret;
2504 }
2505
2506 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2507 // after they modify m->locked. Do not allow preemption during this call,
2508 // or else the m might be different in this function than in the caller.
2509 static void
2510 lockOSThread(void)
2511 {
2512         m->lockedg = g;
2513         g->lockedm = m;
2514 }
2515
2516 void    runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2517 void
2518 runtime_LockOSThread(void)
2519 {
2520         m->locked |= LockExternal;
2521         lockOSThread();
2522 }
2523
2524 void
2525 runtime_lockOSThread(void)
2526 {
2527         m->locked += LockInternal;
2528         lockOSThread();
2529 }
2530
2531
2532 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2533 // after they update m->locked. Do not allow preemption during this call,
2534 // or else the m might be in different in this function than in the caller.
2535 static void
2536 unlockOSThread(void)
2537 {
2538         if(m->locked != 0)
2539                 return;
2540         m->lockedg = nil;
2541         g->lockedm = nil;
2542 }
2543
2544 void    runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2545
2546 void
2547 runtime_UnlockOSThread(void)
2548 {
2549         m->locked &= ~LockExternal;
2550         unlockOSThread();
2551 }
2552
2553 void
2554 runtime_unlockOSThread(void)
2555 {
2556         if(m->locked < LockInternal)
2557                 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2558         m->locked -= LockInternal;
2559         unlockOSThread();
2560 }
2561
2562 bool
2563 runtime_lockedOSThread(void)
2564 {
2565         return g->lockedm != nil && m->lockedg != nil;
2566 }
2567
2568 int32
2569 runtime_gcount(void)
2570 {
2571         G *gp;
2572         int32 n, s;
2573         uintptr i;
2574
2575         n = 0;
2576         runtime_lock(&allglock);
2577         // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2578         // We do not want to increment/decrement centralized counter in newproc/goexit,
2579         // just to make runtime.NumGoroutine() faster.
2580         // Compromise solution is to introduce per-P counters of active goroutines.
2581         for(i = 0; i < runtime_allglen; i++) {
2582                 gp = runtime_allg[i];
2583                 s = gp->status;
2584                 if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
2585                         n++;
2586         }
2587         runtime_unlock(&allglock);
2588         return n;
2589 }
2590
2591 int32
2592 runtime_mcount(void)
2593 {
2594         return runtime_sched.mcount;
2595 }
2596
2597 static struct {
2598         Lock;
2599         void (*fn)(uintptr*, int32);
2600         int32 hz;
2601         uintptr pcbuf[TracebackMaxFrames];
2602         Location locbuf[TracebackMaxFrames];
2603 } prof;
2604
2605 static void System(void) {}
2606 static void GC(void) {}
2607
2608 // Called if we receive a SIGPROF signal.
2609 void
2610 runtime_sigprof()
2611 {
2612         M *mp = m;
2613         int32 n, i;
2614         bool traceback;
2615
2616         if(prof.fn == nil || prof.hz == 0)
2617                 return;
2618
2619         if(mp == nil)
2620                 return;
2621
2622         // Profiling runs concurrently with GC, so it must not allocate.
2623         mp->mallocing++;
2624
2625         traceback = true;
2626
2627         if(mp->mcache == nil)
2628                 traceback = false;
2629
2630         runtime_lock(&prof);
2631         if(prof.fn == nil) {
2632                 runtime_unlock(&prof);
2633                 mp->mallocing--;
2634                 return;
2635         }
2636         n = 0;
2637
2638         if(runtime_atomicload(&runtime_in_callers) > 0) {
2639                 // If SIGPROF arrived while already fetching runtime
2640                 // callers we can have trouble on older systems
2641                 // because the unwind library calls dl_iterate_phdr
2642                 // which was not recursive in the past.
2643                 traceback = false;
2644         }
2645
2646         if(traceback) {
2647                 n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf), false);
2648                 for(i = 0; i < n; i++)
2649                         prof.pcbuf[i] = prof.locbuf[i].pc;
2650         }
2651         if(!traceback || n <= 0) {
2652                 n = 2;
2653                 prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2654                 if(mp->gcing || mp->helpgc)
2655                         prof.pcbuf[1] = (uintptr)GC;
2656                 else
2657                         prof.pcbuf[1] = (uintptr)System;
2658         }
2659         prof.fn(prof.pcbuf, n);
2660         runtime_unlock(&prof);
2661         mp->mallocing--;
2662 }
2663
2664 // Arrange to call fn with a traceback hz times a second.
2665 void
2666 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
2667 {
2668         // Force sane arguments.
2669         if(hz < 0)
2670                 hz = 0;
2671         if(hz == 0)
2672                 fn = nil;
2673         if(fn == nil)
2674                 hz = 0;
2675
2676         // Disable preemption, otherwise we can be rescheduled to another thread
2677         // that has profiling enabled.
2678         m->locks++;
2679
2680         // Stop profiler on this thread so that it is safe to lock prof.
2681         // if a profiling signal came in while we had prof locked,
2682         // it would deadlock.
2683         runtime_resetcpuprofiler(0);
2684
2685         runtime_lock(&prof);
2686         prof.fn = fn;
2687         prof.hz = hz;
2688         runtime_unlock(&prof);
2689         runtime_lock(&runtime_sched);
2690         runtime_sched.profilehz = hz;
2691         runtime_unlock(&runtime_sched);
2692
2693         if(hz != 0)
2694                 runtime_resetcpuprofiler(hz);
2695
2696         m->locks--;
2697 }
2698
2699 // Change number of processors.  The world is stopped, sched is locked.
2700 static void
2701 procresize(int32 new)
2702 {
2703         int32 i, old;
2704         bool empty;
2705         G *gp;
2706         P *p;
2707
2708         old = runtime_gomaxprocs;
2709         if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
2710                 runtime_throw("procresize: invalid arg");
2711         // initialize new P's
2712         for(i = 0; i < new; i++) {
2713                 p = runtime_allp[i];
2714                 if(p == nil) {
2715                         p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2716                         p->id = i;
2717                         p->status = Pgcstop;
2718                         runtime_atomicstorep(&runtime_allp[i], p);
2719                 }
2720                 if(p->mcache == nil) {
2721                         if(old==0 && i==0)
2722                                 p->mcache = m->mcache;  // bootstrap
2723                         else
2724                                 p->mcache = runtime_allocmcache();
2725                 }
2726         }
2727
2728         // redistribute runnable G's evenly
2729         // collect all runnable goroutines in global queue preserving FIFO order
2730         // FIFO order is required to ensure fairness even during frequent GCs
2731         // see http://golang.org/issue/7126
2732         empty = false;
2733         while(!empty) {
2734                 empty = true;
2735                 for(i = 0; i < old; i++) {
2736                         p = runtime_allp[i];
2737                         if(p->runqhead == p->runqtail)
2738                                 continue;
2739                         empty = false;
2740                         // pop from tail of local queue
2741                         p->runqtail--;
2742                         gp = p->runq[p->runqtail%nelem(p->runq)];
2743                         // push onto head of global queue
2744                         gp->schedlink = runtime_sched.runqhead;
2745                         runtime_sched.runqhead = gp;
2746                         if(runtime_sched.runqtail == nil)
2747                                 runtime_sched.runqtail = gp;
2748                         runtime_sched.runqsize++;
2749                 }
2750         }
2751         // fill local queues with at most nelem(p->runq)/2 goroutines
2752         // start at 1 because current M already executes some G and will acquire allp[0] below,
2753         // so if we have a spare G we want to put it into allp[1].
2754         for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) {
2755                 gp = runtime_sched.runqhead;
2756                 runtime_sched.runqhead = gp->schedlink;
2757                 if(runtime_sched.runqhead == nil)
2758                         runtime_sched.runqtail = nil;
2759                 runtime_sched.runqsize--;
2760                 runqput(runtime_allp[i%new], gp);
2761         }
2762
2763         // free unused P's
2764         for(i = new; i < old; i++) {
2765                 p = runtime_allp[i];
2766                 runtime_freemcache(p->mcache);
2767                 p->mcache = nil;
2768                 gfpurge(p);
2769                 p->status = Pdead;
2770                 // can't free P itself because it can be referenced by an M in syscall
2771         }
2772
2773         if(m->p)
2774                 m->p->m = nil;
2775         m->p = nil;
2776         m->mcache = nil;
2777         p = runtime_allp[0];
2778         p->m = nil;
2779         p->status = Pidle;
2780         acquirep(p);
2781         for(i = new-1; i > 0; i--) {
2782                 p = runtime_allp[i];
2783                 p->status = Pidle;
2784                 pidleput(p);
2785         }
2786         runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2787 }
2788
2789 // Associate p and the current m.
2790 static void
2791 acquirep(P *p)
2792 {
2793         if(m->p || m->mcache)
2794                 runtime_throw("acquirep: already in go");
2795         if(p->m || p->status != Pidle) {
2796                 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
2797                 runtime_throw("acquirep: invalid p state");
2798         }
2799         m->mcache = p->mcache;
2800         m->p = p;
2801         p->m = m;
2802         p->status = Prunning;
2803 }
2804
2805 // Disassociate p and the current m.
2806 static P*
2807 releasep(void)
2808 {
2809         P *p;
2810
2811         if(m->p == nil || m->mcache == nil)
2812                 runtime_throw("releasep: invalid arg");
2813         p = m->p;
2814         if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
2815                 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2816                         m, m->p, p->m, m->mcache, p->mcache, p->status);
2817                 runtime_throw("releasep: invalid p state");
2818         }
2819         m->p = nil;
2820         m->mcache = nil;
2821         p->m = nil;
2822         p->status = Pidle;
2823         return p;
2824 }
2825
2826 static void
2827 incidlelocked(int32 v)
2828 {
2829         runtime_lock(&runtime_sched);
2830         runtime_sched.nmidlelocked += v;
2831         if(v > 0)
2832                 checkdead();
2833         runtime_unlock(&runtime_sched);
2834 }
2835
2836 // Check for deadlock situation.
2837 // The check is based on number of running M's, if 0 -> deadlock.
2838 static void
2839 checkdead(void)
2840 {
2841         G *gp;
2842         int32 run, grunning, s;
2843         uintptr i;
2844
2845         // For -buildmode=c-shared or -buildmode=c-archive it's OK if
2846         // there are no running goroutines.  The calling program is
2847         // assumed to be running.
2848         if(runtime_isarchive) {
2849                 return;
2850         }
2851
2852         // -1 for sysmon
2853         run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
2854         if(run > 0)
2855                 return;
2856         // If we are dying because of a signal caught on an already idle thread,
2857         // freezetheworld will cause all running threads to block.
2858         // And runtime will essentially enter into deadlock state,
2859         // except that there is a thread that will call runtime_exit soon.
2860         if(runtime_panicking > 0)
2861                 return;
2862         if(run < 0) {
2863                 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2864                         runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
2865                 runtime_throw("checkdead: inconsistent counts");
2866         }
2867         grunning = 0;
2868         runtime_lock(&allglock);
2869         for(i = 0; i < runtime_allglen; i++) {
2870                 gp = runtime_allg[i];
2871                 if(gp->isbackground)
2872                         continue;
2873                 s = gp->status;
2874                 if(s == Gwaiting)
2875                         grunning++;
2876                 else if(s == Grunnable || s == Grunning || s == Gsyscall) {
2877                         runtime_unlock(&allglock);
2878                         runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
2879                         runtime_throw("checkdead: runnable g");
2880                 }
2881         }
2882         runtime_unlock(&allglock);
2883         if(grunning == 0)  // possible if main goroutine calls runtime_Goexit()
2884                 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
2885         m->throwing = -1;  // do not dump full stacks
2886         runtime_throw("all goroutines are asleep - deadlock!");
2887 }
2888
2889 static void
2890 sysmon(void)
2891 {
2892         uint32 idle, delay;
2893         int64 now, lastpoll, lasttrace;
2894         G *gp;
2895
2896         lasttrace = 0;
2897         idle = 0;  // how many cycles in succession we had not wokeup somebody
2898         delay = 0;
2899         for(;;) {
2900                 if(idle == 0)  // start with 20us sleep...
2901                         delay = 20;
2902                 else if(idle > 50)  // start doubling the sleep after 1ms...
2903                         delay *= 2;
2904                 if(delay > 10*1000)  // up to 10ms
2905                         delay = 10*1000;
2906                 runtime_usleep(delay);
2907                 if(runtime_debug.schedtrace <= 0 &&
2908                         (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) {  // TODO: fast atomic
2909                         runtime_lock(&runtime_sched);
2910                         if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
2911                                 runtime_atomicstore(&runtime_sched.sysmonwait, 1);
2912                                 runtime_unlock(&runtime_sched);
2913                                 runtime_notesleep(&runtime_sched.sysmonnote);
2914                                 runtime_noteclear(&runtime_sched.sysmonnote);
2915                                 idle = 0;
2916                                 delay = 20;
2917                         } else
2918                                 runtime_unlock(&runtime_sched);
2919                 }
2920                 // poll network if not polled for more than 10ms
2921                 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
2922                 now = runtime_nanotime();
2923                 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2924                         runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
2925                         gp = runtime_netpoll(false);  // non-blocking
2926                         if(gp) {
2927                                 // Need to decrement number of idle locked M's
2928                                 // (pretending that one more is running) before injectglist.
2929                                 // Otherwise it can lead to the following situation:
2930                                 // injectglist grabs all P's but before it starts M's to run the P's,
2931                                 // another M returns from syscall, finishes running its G,
2932                                 // observes that there is no work to do and no other running M's
2933                                 // and reports deadlock.
2934                                 incidlelocked(-1);
2935                                 injectglist(gp);
2936                                 incidlelocked(1);
2937                         }
2938                 }
2939                 // retake P's blocked in syscalls
2940                 // and preempt long running G's
2941                 if(retake(now))
2942                         idle = 0;
2943                 else
2944                         idle++;
2945
2946                 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
2947                         lasttrace = now;
2948                         runtime_schedtrace(runtime_debug.scheddetail);
2949                 }
2950         }
2951 }
2952
2953 typedef struct Pdesc Pdesc;
2954 struct Pdesc
2955 {
2956         uint32  schedtick;
2957         int64   schedwhen;
2958         uint32  syscalltick;
2959         int64   syscallwhen;
2960 };
2961 static Pdesc pdesc[MaxGomaxprocs];
2962
2963 static uint32
2964 retake(int64 now)
2965 {
2966         uint32 i, s, n;
2967         int64 t;
2968         P *p;
2969         Pdesc *pd;
2970
2971         n = 0;
2972         for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
2973                 p = runtime_allp[i];
2974                 if(p==nil)
2975                         continue;
2976                 pd = &pdesc[i];
2977                 s = p->status;
2978                 if(s == Psyscall) {
2979                         // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
2980                         t = p->syscalltick;
2981                         if(pd->syscalltick != t) {
2982                                 pd->syscalltick = t;
2983                                 pd->syscallwhen = now;
2984                                 continue;
2985                         }
2986                         // On the one hand we don't want to retake Ps if there is no other work to do,
2987                         // but on the other hand we want to retake them eventually
2988                         // because they can prevent the sysmon thread from deep sleep.
2989                         if(p->runqhead == p->runqtail &&
2990                                 runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 &&
2991                                 pd->syscallwhen + 10*1000*1000 > now)
2992                                 continue;
2993                         // Need to decrement number of idle locked M's
2994                         // (pretending that one more is running) before the CAS.
2995                         // Otherwise the M from which we retake can exit the syscall,
2996                         // increment nmidle and report deadlock.
2997                         incidlelocked(-1);
2998                         if(runtime_cas(&p->status, s, Pidle)) {
2999                                 n++;
3000                                 handoffp(p);
3001                         }
3002                         incidlelocked(1);
3003                 } else if(s == Prunning) {
3004                         // Preempt G if it's running for more than 10ms.
3005                         t = p->schedtick;
3006                         if(pd->schedtick != t) {
3007                                 pd->schedtick = t;
3008                                 pd->schedwhen = now;
3009                                 continue;
3010                         }
3011                         if(pd->schedwhen + 10*1000*1000 > now)
3012                                 continue;
3013                         // preemptone(p);
3014                 }
3015         }
3016         return n;
3017 }
3018
3019 // Tell all goroutines that they have been preempted and they should stop.
3020 // This function is purely best-effort.  It can fail to inform a goroutine if a
3021 // processor just started running it.
3022 // No locks need to be held.
3023 // Returns true if preemption request was issued to at least one goroutine.
3024 static bool
3025 preemptall(void)
3026 {
3027         return false;
3028 }
3029
3030 void
3031 runtime_schedtrace(bool detailed)
3032 {
3033         static int64 starttime;
3034         int64 now;
3035         int64 id1, id2, id3;
3036         int32 i, t, h;
3037         uintptr gi;
3038         const char *fmt;
3039         M *mp, *lockedm;
3040         G *gp, *lockedg;
3041         P *p;
3042
3043         now = runtime_nanotime();
3044         if(starttime == 0)
3045                 starttime = now;
3046
3047         runtime_lock(&runtime_sched);
3048         runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
3049                 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
3050                 runtime_sched.nmidle, runtime_sched.runqsize);
3051         if(detailed) {
3052                 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
3053                         runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
3054                         runtime_sched.stopwait, runtime_sched.sysmonwait);
3055         }
3056         // We must be careful while reading data from P's, M's and G's.
3057         // Even if we hold schedlock, most data can be changed concurrently.
3058         // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
3059         for(i = 0; i < runtime_gomaxprocs; i++) {
3060                 p = runtime_allp[i];
3061                 if(p == nil)
3062                         continue;
3063                 mp = p->m;
3064                 h = runtime_atomicload(&p->runqhead);
3065                 t = runtime_atomicload(&p->runqtail);
3066                 if(detailed)
3067                         runtime_printf("  P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
3068                                 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
3069                 else {
3070                         // In non-detailed mode format lengths of per-P run queues as:
3071                         // [len1 len2 len3 len4]
3072                         fmt = " %d";
3073                         if(runtime_gomaxprocs == 1)
3074                                 fmt = " [%d]\n";
3075                         else if(i == 0)
3076                                 fmt = " [%d";
3077                         else if(i == runtime_gomaxprocs-1)
3078                                 fmt = " %d]\n";
3079                         runtime_printf(fmt, t-h);
3080                 }
3081         }
3082         if(!detailed) {
3083                 runtime_unlock(&runtime_sched);
3084                 return;
3085         }
3086         for(mp = runtime_allm; mp; mp = mp->alllink) {
3087                 p = mp->p;
3088                 gp = mp->curg;
3089                 lockedg = mp->lockedg;
3090                 id1 = -1;
3091                 if(p)
3092                         id1 = p->id;
3093                 id2 = -1;
3094                 if(gp)
3095                         id2 = gp->goid;
3096                 id3 = -1;
3097                 if(lockedg)
3098                         id3 = lockedg->goid;
3099                 runtime_printf("  M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
3100                         " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
3101                         mp->id, id1, id2,
3102                         mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
3103                         mp->spinning, m->blocked, id3);
3104         }
3105         runtime_lock(&allglock);
3106         for(gi = 0; gi < runtime_allglen; gi++) {
3107                 gp = runtime_allg[gi];
3108                 mp = gp->m;
3109                 lockedm = gp->lockedm;
3110                 runtime_printf("  G%D: status=%d(%s) m=%d lockedm=%d\n",
3111                         gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
3112                         lockedm ? lockedm->id : -1);
3113         }
3114         runtime_unlock(&allglock);
3115         runtime_unlock(&runtime_sched);
3116 }
3117
3118 // Put mp on midle list.
3119 // Sched must be locked.
3120 static void
3121 mput(M *mp)
3122 {
3123         mp->schedlink = runtime_sched.midle;
3124         runtime_sched.midle = mp;
3125         runtime_sched.nmidle++;
3126         checkdead();
3127 }
3128
3129 // Try to get an m from midle list.
3130 // Sched must be locked.
3131 static M*
3132 mget(void)
3133 {
3134         M *mp;
3135
3136         if((mp = runtime_sched.midle) != nil){
3137                 runtime_sched.midle = mp->schedlink;
3138                 runtime_sched.nmidle--;
3139         }
3140         return mp;
3141 }
3142
3143 // Put gp on the global runnable queue.
3144 // Sched must be locked.
3145 static void
3146 globrunqput(G *gp)
3147 {
3148         gp->schedlink = nil;
3149         if(runtime_sched.runqtail)
3150                 runtime_sched.runqtail->schedlink = gp;
3151         else
3152                 runtime_sched.runqhead = gp;
3153         runtime_sched.runqtail = gp;
3154         runtime_sched.runqsize++;
3155 }
3156
3157 // Put a batch of runnable goroutines on the global runnable queue.
3158 // Sched must be locked.
3159 static void
3160 globrunqputbatch(G *ghead, G *gtail, int32 n)
3161 {
3162         gtail->schedlink = nil;
3163         if(runtime_sched.runqtail)
3164                 runtime_sched.runqtail->schedlink = ghead;
3165         else
3166                 runtime_sched.runqhead = ghead;
3167         runtime_sched.runqtail = gtail;
3168         runtime_sched.runqsize += n;
3169 }
3170
3171 // Try get a batch of G's from the global runnable queue.
3172 // Sched must be locked.
3173 static G*
3174 globrunqget(P *p, int32 max)
3175 {
3176         G *gp, *gp1;
3177         int32 n;
3178
3179         if(runtime_sched.runqsize == 0)
3180                 return nil;
3181         n = runtime_sched.runqsize/runtime_gomaxprocs+1;
3182         if(n > runtime_sched.runqsize)
3183                 n = runtime_sched.runqsize;
3184         if(max > 0 && n > max)
3185                 n = max;
3186         if((uint32)n > nelem(p->runq)/2)
3187                 n = nelem(p->runq)/2;
3188         runtime_sched.runqsize -= n;
3189         if(runtime_sched.runqsize == 0)
3190                 runtime_sched.runqtail = nil;
3191         gp = runtime_sched.runqhead;
3192         runtime_sched.runqhead = gp->schedlink;
3193         n--;
3194         while(n--) {
3195                 gp1 = runtime_sched.runqhead;
3196                 runtime_sched.runqhead = gp1->schedlink;
3197                 runqput(p, gp1);
3198         }
3199         return gp;
3200 }
3201
3202 // Put p to on pidle list.
3203 // Sched must be locked.
3204 static void
3205 pidleput(P *p)
3206 {
3207         p->link = runtime_sched.pidle;
3208         runtime_sched.pidle = p;
3209         runtime_xadd(&runtime_sched.npidle, 1);  // TODO: fast atomic
3210 }
3211
3212 // Try get a p from pidle list.
3213 // Sched must be locked.
3214 static P*
3215 pidleget(void)
3216 {
3217         P *p;
3218
3219         p = runtime_sched.pidle;
3220         if(p) {
3221                 runtime_sched.pidle = p->link;
3222                 runtime_xadd(&runtime_sched.npidle, -1);  // TODO: fast atomic
3223         }
3224         return p;
3225 }
3226
3227 // Try to put g on local runnable queue.
3228 // If it's full, put onto global queue.
3229 // Executed only by the owner P.
3230 static void
3231 runqput(P *p, G *gp)
3232 {
3233         uint32 h, t;
3234
3235 retry:
3236         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3237         t = p->runqtail;
3238         if(t - h < nelem(p->runq)) {
3239                 p->runq[t%nelem(p->runq)] = gp;
3240                 runtime_atomicstore(&p->runqtail, t+1);  // store-release, makes the item available for consumption
3241                 return;
3242         }
3243         if(runqputslow(p, gp, h, t))
3244                 return;
3245         // the queue is not full, now the put above must suceed
3246         goto retry;
3247 }
3248
3249 // Put g and a batch of work from local runnable queue on global queue.
3250 // Executed only by the owner P.
3251 static bool
3252 runqputslow(P *p, G *gp, uint32 h, uint32 t)
3253 {
3254         G *batch[nelem(p->runq)/2+1];
3255         uint32 n, i;
3256
3257         // First, grab a batch from local queue.
3258         n = t-h;
3259         n = n/2;
3260         if(n != nelem(p->runq)/2)
3261                 runtime_throw("runqputslow: queue is not full");
3262         for(i=0; i<n; i++)
3263                 batch[i] = p->runq[(h+i)%nelem(p->runq)];
3264         if(!runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3265                 return false;
3266         batch[n] = gp;
3267         // Link the goroutines.
3268         for(i=0; i<n; i++)
3269                 batch[i]->schedlink = batch[i+1];
3270         // Now put the batch on global queue.
3271         runtime_lock(&runtime_sched);
3272         globrunqputbatch(batch[0], batch[n], n+1);
3273         runtime_unlock(&runtime_sched);
3274         return true;
3275 }
3276
3277 // Get g from local runnable queue.
3278 // Executed only by the owner P.
3279 static G*
3280 runqget(P *p)
3281 {
3282         G *gp;
3283         uint32 t, h;
3284
3285         for(;;) {
3286                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3287                 t = p->runqtail;
3288                 if(t == h)
3289                         return nil;
3290                 gp = p->runq[h%nelem(p->runq)];
3291                 if(runtime_cas(&p->runqhead, h, h+1))  // cas-release, commits consume
3292                         return gp;
3293         }
3294 }
3295
3296 // Grabs a batch of goroutines from local runnable queue.
3297 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3298 // Can be executed by any P.
3299 static uint32
3300 runqgrab(P *p, G **batch)
3301 {
3302         uint32 t, h, n, i;
3303
3304         for(;;) {
3305                 h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with other consumers
3306                 t = runtime_atomicload(&p->runqtail);  // load-acquire, synchronize with the producer
3307                 n = t-h;
3308                 n = n - n/2;
3309                 if(n == 0)
3310                         break;
3311                 if(n > nelem(p->runq)/2)  // read inconsistent h and t
3312                         continue;
3313                 for(i=0; i<n; i++)
3314                         batch[i] = p->runq[(h+i)%nelem(p->runq)];
3315                 if(runtime_cas(&p->runqhead, h, h+n))  // cas-release, commits consume
3316                         break;
3317         }
3318         return n;
3319 }
3320
3321 // Steal half of elements from local runnable queue of p2
3322 // and put onto local runnable queue of p.
3323 // Returns one of the stolen elements (or nil if failed).
3324 static G*
3325 runqsteal(P *p, P *p2)
3326 {
3327         G *gp;
3328         G *batch[nelem(p->runq)/2];
3329         uint32 t, h, n, i;
3330
3331         n = runqgrab(p2, batch);
3332         if(n == 0)
3333                 return nil;
3334         n--;
3335         gp = batch[n];
3336         if(n == 0)
3337                 return gp;
3338         h = runtime_atomicload(&p->runqhead);  // load-acquire, synchronize with consumers
3339         t = p->runqtail;
3340         if(t - h + n >= nelem(p->runq))
3341                 runtime_throw("runqsteal: runq overflow");
3342         for(i=0; i<n; i++, t++)
3343                 p->runq[t%nelem(p->runq)] = batch[i];
3344         runtime_atomicstore(&p->runqtail, t);  // store-release, makes the item available for consumption
3345         return gp;
3346 }
3347
3348 void runtime_testSchedLocalQueue(void)
3349   __asm__("runtime.testSchedLocalQueue");
3350
3351 void
3352 runtime_testSchedLocalQueue(void)
3353 {
3354         P p;
3355         G gs[nelem(p.runq)];
3356         int32 i, j;
3357
3358         runtime_memclr((byte*)&p, sizeof(p));
3359
3360         for(i = 0; i < (int32)nelem(gs); i++) {
3361                 if(runqget(&p) != nil)
3362                         runtime_throw("runq is not empty initially");
3363                 for(j = 0; j < i; j++)
3364                         runqput(&p, &gs[i]);
3365                 for(j = 0; j < i; j++) {
3366                         if(runqget(&p) != &gs[i]) {
3367                                 runtime_printf("bad element at iter %d/%d\n", i, j);
3368                                 runtime_throw("bad element");
3369                         }
3370                 }
3371                 if(runqget(&p) != nil)
3372                         runtime_throw("runq is not empty afterwards");
3373         }
3374 }
3375
3376 void runtime_testSchedLocalQueueSteal(void)
3377   __asm__("runtime.testSchedLocalQueueSteal");
3378
3379 void
3380 runtime_testSchedLocalQueueSteal(void)
3381 {
3382         P p1, p2;
3383         G gs[nelem(p1.runq)], *gp;
3384         int32 i, j, s;
3385
3386         runtime_memclr((byte*)&p1, sizeof(p1));
3387         runtime_memclr((byte*)&p2, sizeof(p2));
3388
3389         for(i = 0; i < (int32)nelem(gs); i++) {
3390                 for(j = 0; j < i; j++) {
3391                         gs[j].sig = 0;
3392                         runqput(&p1, &gs[j]);
3393                 }
3394                 gp = runqsteal(&p2, &p1);
3395                 s = 0;
3396                 if(gp) {
3397                         s++;
3398                         gp->sig++;
3399                 }
3400                 while((gp = runqget(&p2)) != nil) {
3401                         s++;
3402                         gp->sig++;
3403                 }
3404                 while((gp = runqget(&p1)) != nil)
3405                         gp->sig++;
3406                 for(j = 0; j < i; j++) {
3407                         if(gs[j].sig != 1) {
3408                                 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3409                                 runtime_throw("bad element");
3410                         }
3411                 }
3412                 if(s != i/2 && s != i/2+1) {
3413                         runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3414                                 s, i/2, i/2+1, i);
3415                         runtime_throw("bad steal");
3416                 }
3417         }
3418 }
3419
3420 int32
3421 runtime_setmaxthreads(int32 in)
3422 {
3423         int32 out;
3424
3425         runtime_lock(&runtime_sched);
3426         out = runtime_sched.maxmcount;
3427         runtime_sched.maxmcount = in;
3428         checkmcount();
3429         runtime_unlock(&runtime_sched);
3430         return out;
3431 }
3432
3433 void
3434 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
3435 {
3436         enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
3437         enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
3438 }
3439
3440 // Return whether we are waiting for a GC.  This gc toolchain uses
3441 // preemption instead.
3442 bool
3443 runtime_gcwaiting(void)
3444 {
3445         return runtime_sched.gcwaiting;
3446 }
3447
3448 // os_beforeExit is called from os.Exit(0).
3449 //go:linkname os_beforeExit os.runtime_beforeExit
3450
3451 extern void os_beforeExit() __asm__ (GOSYM_PREFIX "os.runtime_beforeExit");
3452
3453 void
3454 os_beforeExit()
3455 {
3456 }
3457
3458 // Active spinning for sync.Mutex.
3459 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3460
3461 enum
3462 {
3463         ACTIVE_SPIN = 4,
3464         ACTIVE_SPIN_CNT = 30,
3465 };
3466
3467 extern _Bool sync_runtime_canSpin(intgo i)
3468   __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
3469
3470 _Bool
3471 sync_runtime_canSpin(intgo i)
3472 {
3473         P *p;
3474
3475         // sync.Mutex is cooperative, so we are conservative with spinning.
3476         // Spin only few times and only if running on a multicore machine and
3477         // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3478         // As opposed to runtime mutex we don't do passive spinning here,
3479         // because there can be work on global runq on on other Ps.
3480         if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched.npidle+runtime_sched.nmspinning)+1) {
3481                 return false;
3482         }
3483         p = m->p;
3484         return p != nil && p->runqhead == p->runqtail;
3485 }
3486
3487 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3488 //go:nosplit
3489
3490 extern void sync_runtime_doSpin(void)
3491   __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
3492
3493 void
3494 sync_runtime_doSpin()
3495 {
3496         runtime_procyield(ACTIVE_SPIN_CNT);
3497 }