PR fortran/77666
[official-gcc.git] / libgo / runtime / proc.c
blob32d0fb2a7be7c2c8a5f17490be290ba90c552b76
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 #include <limits.h>
6 #include <signal.h>
7 #include <stdlib.h>
8 #include <pthread.h>
9 #include <unistd.h>
11 #include "config.h"
13 #ifdef HAVE_DL_ITERATE_PHDR
14 #include <link.h>
15 #endif
17 #include "runtime.h"
18 #include "arch.h"
19 #include "defs.h"
20 #include "malloc.h"
21 #include "go-type.h"
23 #ifdef USING_SPLIT_STACK
25 /* FIXME: These are not declared anywhere. */
27 extern void __splitstack_getcontext(void *context[10]);
29 extern void __splitstack_setcontext(void *context[10]);
31 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
33 extern void * __splitstack_resetcontext(void *context[10], size_t *);
35 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
36 void **);
38 extern void __splitstack_block_signals (int *, int *);
40 extern void __splitstack_block_signals_context (void *context[10], int *,
41 int *);
43 #endif
45 #ifndef PTHREAD_STACK_MIN
46 # define PTHREAD_STACK_MIN 8192
47 #endif
49 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
50 # define StackMin PTHREAD_STACK_MIN
51 #else
52 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
53 #endif
55 uintptr runtime_stacks_sys;
57 static void gtraceback(G*);
59 #ifdef __rtems__
60 #define __thread
61 #endif
63 static __thread G *g;
65 #ifndef SETCONTEXT_CLOBBERS_TLS
67 static inline void
68 initcontext(void)
72 static inline void
73 fixcontext(ucontext_t *c __attribute__ ((unused)))
77 #else
79 # if defined(__x86_64__) && defined(__sun__)
81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
82 // register to that of the thread which called getcontext. The effect
83 // is that the address of all __thread variables changes. This bug
84 // also affects pthread_self() and pthread_getspecific. We work
85 // around it by clobbering the context field directly to keep %fs the
86 // same.
88 static __thread greg_t fs;
90 static inline void
91 initcontext(void)
93 ucontext_t c;
95 getcontext(&c);
96 fs = c.uc_mcontext.gregs[REG_FSBASE];
99 static inline void
100 fixcontext(ucontext_t* c)
102 c->uc_mcontext.gregs[REG_FSBASE] = fs;
105 # elif defined(__NetBSD__)
107 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
108 // and restore it ourselves.
110 static __thread __greg_t tlsbase;
112 static inline void
113 initcontext(void)
115 ucontext_t c;
117 getcontext(&c);
118 tlsbase = c.uc_mcontext._mc_tlsbase;
121 static inline void
122 fixcontext(ucontext_t* c)
124 c->uc_mcontext._mc_tlsbase = tlsbase;
127 # elif defined(__sparc__)
129 static inline void
130 initcontext(void)
134 static inline void
135 fixcontext(ucontext_t *c)
137 /* ??? Using
138 register unsigned long thread __asm__("%g7");
139 c->uc_mcontext.gregs[REG_G7] = thread;
140 results in
141 error: variable ‘thread’ might be clobbered by \
142 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
143 which ought to be false, as %g7 is a fixed register. */
145 if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8)
146 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
147 else
148 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
151 # else
153 # error unknown case for SETCONTEXT_CLOBBERS_TLS
155 # endif
157 #endif
159 // ucontext_arg returns a properly aligned ucontext_t value. On some
160 // systems a ucontext_t value must be aligned to a 16-byte boundary.
161 // The g structure that has fields of type ucontext_t is defined in
162 // Go, and Go has no simple way to align a field to such a boundary.
163 // So we make the field larger in runtime2.go and pick an appropriate
164 // offset within the field here.
165 static ucontext_t*
166 ucontext_arg(void** go_ucontext)
168 uintptr_t p = (uintptr_t)go_ucontext;
169 size_t align = __alignof__(ucontext_t);
170 if(align > 16) {
171 // We only ensured space for up to a 16 byte alignment
172 // in libgo/go/runtime/runtime2.go.
173 runtime_throw("required alignment of ucontext_t too large");
175 p = (p + align - 1) &~ (uintptr_t)(align - 1);
176 return (ucontext_t*)p;
179 // We can not always refer to the TLS variables directly. The
180 // compiler will call tls_get_addr to get the address of the variable,
181 // and it may hold it in a register across a call to schedule. When
182 // we get back from the call we may be running in a different thread,
183 // in which case the register now points to the TLS variable for a
184 // different thread. We use non-inlinable functions to avoid this
185 // when necessary.
187 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
190 runtime_g(void)
192 return g;
195 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
198 runtime_m(void)
200 if(g == nil)
201 return nil;
202 return g->m;
205 // Set g.
206 void
207 runtime_setg(G* gp)
209 g = gp;
212 // Start a new thread.
213 static void
214 runtime_newosproc(M *mp)
216 pthread_attr_t attr;
217 sigset_t clear, old;
218 pthread_t tid;
219 int ret;
221 if(pthread_attr_init(&attr) != 0)
222 runtime_throw("pthread_attr_init");
223 if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
224 runtime_throw("pthread_attr_setdetachstate");
226 // Block signals during pthread_create so that the new thread
227 // starts with signals disabled. It will enable them in minit.
228 sigfillset(&clear);
230 #ifdef SIGTRAP
231 // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
232 sigdelset(&clear, SIGTRAP);
233 #endif
235 sigemptyset(&old);
236 pthread_sigmask(SIG_BLOCK, &clear, &old);
237 ret = pthread_create(&tid, &attr, runtime_mstart, mp);
238 pthread_sigmask(SIG_SETMASK, &old, nil);
240 if (ret != 0)
241 runtime_throw("pthread_create");
244 // First function run by a new goroutine. This replaces gogocall.
245 static void
246 kickoff(void)
248 void (*fn)(void*);
250 if(g->traceback != nil)
251 gtraceback(g);
253 fn = (void (*)(void*))(g->entry);
254 fn(g->param);
255 runtime_goexit();
258 // Switch context to a different goroutine. This is like longjmp.
259 void runtime_gogo(G*) __attribute__ ((noinline));
260 void
261 runtime_gogo(G* newg)
263 #ifdef USING_SPLIT_STACK
264 __splitstack_setcontext(&newg->stackcontext[0]);
265 #endif
266 g = newg;
267 newg->fromgogo = true;
268 fixcontext(ucontext_arg(&newg->context[0]));
269 setcontext(ucontext_arg(&newg->context[0]));
270 runtime_throw("gogo setcontext returned");
273 // Save context and call fn passing g as a parameter. This is like
274 // setjmp. Because getcontext always returns 0, unlike setjmp, we use
275 // g->fromgogo as a code. It will be true if we got here via
276 // setcontext. g == nil the first time this is called in a new m.
277 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
278 void
279 runtime_mcall(void (*pfn)(G*))
281 M *mp;
282 G *gp;
284 // Ensure that all registers are on the stack for the garbage
285 // collector.
286 __builtin_unwind_init();
288 gp = g;
289 mp = gp->m;
290 if(gp == mp->g0)
291 runtime_throw("runtime: mcall called on m->g0 stack");
293 if(gp != nil) {
295 #ifdef USING_SPLIT_STACK
296 __splitstack_getcontext(&g->stackcontext[0]);
297 #else
298 gp->gcnextsp = &pfn;
299 #endif
300 gp->fromgogo = false;
301 getcontext(ucontext_arg(&gp->context[0]));
303 // When we return from getcontext, we may be running
304 // in a new thread. That means that g may have
305 // changed. It is a global variables so we will
306 // reload it, but the address of g may be cached in
307 // our local stack frame, and that address may be
308 // wrong. Call the function to reload the value for
309 // this thread.
310 gp = runtime_g();
311 mp = gp->m;
313 if(gp->traceback != nil)
314 gtraceback(gp);
316 if (gp == nil || !gp->fromgogo) {
317 #ifdef USING_SPLIT_STACK
318 __splitstack_setcontext(&mp->g0->stackcontext[0]);
319 #endif
320 mp->g0->entry = (byte*)pfn;
321 mp->g0->param = gp;
323 // It's OK to set g directly here because this case
324 // can not occur if we got here via a setcontext to
325 // the getcontext call just above.
326 g = mp->g0;
328 fixcontext(ucontext_arg(&mp->g0->context[0]));
329 setcontext(ucontext_arg(&mp->g0->context[0]));
330 runtime_throw("runtime: mcall function returned");
334 // Goroutine scheduler
335 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
337 // The main concepts are:
338 // G - goroutine.
339 // M - worker thread, or machine.
340 // P - processor, a resource that is required to execute Go code.
341 // M must have an associated P to execute Go code, however it can be
342 // blocked or in a syscall w/o an associated P.
344 // Design doc at http://golang.org/s/go11sched.
346 typedef struct Sched Sched;
347 struct Sched {
348 Lock;
350 uint64 goidgen;
351 M* midle; // idle m's waiting for work
352 int32 nmidle; // number of idle m's waiting for work
353 int32 nmidlelocked; // number of locked m's waiting for work
354 int32 mcount; // number of m's that have been created
355 int32 maxmcount; // maximum number of m's allowed (or die)
357 P* pidle; // idle P's
358 uint32 npidle;
359 uint32 nmspinning;
361 // Global runnable queue.
362 G* runqhead;
363 G* runqtail;
364 int32 runqsize;
366 // Global cache of dead G's.
367 Lock gflock;
368 G* gfree;
370 uint32 gcwaiting; // gc is waiting to run
371 int32 stopwait;
372 Note stopnote;
373 uint32 sysmonwait;
374 Note sysmonnote;
375 uint64 lastpoll;
377 int32 profilehz; // cpu profiling rate
380 enum
382 // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once.
383 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
384 GoidCacheBatch = 16,
387 Sched runtime_sched;
388 int32 runtime_gomaxprocs;
389 uint32 runtime_needextram = 1;
390 M runtime_m0;
391 G runtime_g0; // idle goroutine for m0
392 G* runtime_lastg;
393 M* runtime_allm;
394 P** runtime_allp;
395 M* runtime_extram;
396 int8* runtime_goos;
397 int32 runtime_ncpu;
398 bool runtime_precisestack;
399 static int32 newprocs;
401 static Lock allglock; // the following vars are protected by this lock or by stoptheworld
402 G** runtime_allg;
403 uintptr runtime_allglen;
404 static uintptr allgcap;
406 bool runtime_isarchive;
408 void* runtime_mstart(void*);
409 static void runqput(P*, G*);
410 static G* runqget(P*);
411 static bool runqputslow(P*, G*, uint32, uint32);
412 static G* runqsteal(P*, P*);
413 static void mput(M*);
414 static M* mget(void);
415 static void mcommoninit(M*);
416 static void schedule(void);
417 static void procresize(int32);
418 static void acquirep(P*);
419 static P* releasep(void);
420 static void newm(void(*)(void), P*);
421 static void stopm(void);
422 static void startm(P*, bool);
423 static void handoffp(P*);
424 static void wakep(void);
425 static void stoplockedm(void);
426 static void startlockedm(G*);
427 static void sysmon(void);
428 static uint32 retake(int64);
429 static void incidlelocked(int32);
430 static void checkdead(void);
431 static void exitsyscall0(G*);
432 static void park0(G*);
433 static void goexit0(G*);
434 static void gfput(P*, G*);
435 static G* gfget(P*);
436 static void gfpurge(P*);
437 static void globrunqput(G*);
438 static void globrunqputbatch(G*, G*, int32);
439 static G* globrunqget(P*, int32);
440 static P* pidleget(void);
441 static void pidleput(P*);
442 static void injectglist(G*);
443 static bool preemptall(void);
444 static bool exitsyscallfast(void);
445 static void allgadd(G*);
447 bool runtime_isstarted;
449 // The bootstrap sequence is:
451 // call osinit
452 // call schedinit
453 // make & queue new G
454 // call runtime_mstart
456 // The new G calls runtime_main.
457 void
458 runtime_schedinit(void)
460 M *m;
461 int32 n, procs;
462 String s;
463 const byte *p;
464 Eface i;
466 m = &runtime_m0;
467 g = &runtime_g0;
468 m->g0 = g;
469 m->curg = g;
470 g->m = m;
472 initcontext();
474 runtime_sched.maxmcount = 10000;
475 runtime_precisestack = 0;
477 // runtime_symtabinit();
478 runtime_mallocinit();
479 mcommoninit(m);
481 // Initialize the itable value for newErrorCString,
482 // so that the next time it gets called, possibly
483 // in a fault during a garbage collection, it will not
484 // need to allocated memory.
485 runtime_newErrorCString(0, &i);
487 // Initialize the cached gotraceback value, since
488 // gotraceback calls getenv, which mallocs on Plan 9.
489 runtime_gotraceback(nil);
491 runtime_goargs();
492 runtime_goenvs();
493 runtime_parsedebugvars();
495 runtime_sched.lastpoll = runtime_nanotime();
496 procs = 1;
497 s = runtime_getenv("GOMAXPROCS");
498 p = s.str;
499 if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
500 if(n > _MaxGomaxprocs)
501 n = _MaxGomaxprocs;
502 procs = n;
504 runtime_allp = runtime_malloc((_MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
505 procresize(procs);
507 // Can not enable GC until all roots are registered.
508 // mstats.enablegc = 1;
511 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
512 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
514 // Used to determine the field alignment.
516 struct field_align
518 char c;
519 Hchan *p;
522 // main_init_done is a signal used by cgocallbackg that initialization
523 // has been completed. It is made before _cgo_notify_runtime_init_done,
524 // so all cgo calls can rely on it existing. When main_init is
525 // complete, it is closed, meaning cgocallbackg can reliably receive
526 // from it.
527 Hchan *runtime_main_init_done;
529 // The chan bool type, for runtime_main_init_done.
531 extern const struct __go_type_descriptor bool_type_descriptor
532 __asm__ (GOSYM_PREFIX "__go_tdn_bool");
534 static struct __go_channel_type chan_bool_type_descriptor =
536 /* __common */
538 /* __code */
539 GO_CHAN,
540 /* __align */
541 __alignof (Hchan *),
542 /* __field_align */
543 offsetof (struct field_align, p) - 1,
544 /* __size */
545 sizeof (Hchan *),
546 /* __hash */
547 0, /* This value doesn't matter. */
548 /* __hashfn */
549 NULL,
550 /* __equalfn */
551 NULL,
552 /* __gc */
553 NULL, /* This value doesn't matter */
554 /* __reflection */
555 NULL, /* This value doesn't matter */
556 /* __uncommon */
557 NULL,
558 /* __pointer_to_this */
559 NULL
561 /* __element_type */
562 &bool_type_descriptor,
563 /* __dir */
564 CHANNEL_BOTH_DIR
567 extern Hchan *__go_new_channel (ChanType *, uintptr);
568 extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan");
570 static void
571 initDone(void *arg __attribute__ ((unused))) {
572 runtime_unlockOSThread();
575 // The main goroutine.
576 // Note: C frames in general are not copyable during stack growth, for two reasons:
577 // 1) We don't know where in a frame to find pointers to other stack locations.
578 // 2) There's no guarantee that globals or heap values do not point into the frame.
580 // The C frame for runtime.main is copyable, because:
581 // 1) There are no pointers to other stack locations in the frame
582 // (d.fn points at a global, d.link is nil, d.argp is -1).
583 // 2) The only pointer into this frame is from the defer chain,
584 // which is explicitly handled during stack copying.
585 void
586 runtime_main(void* dummy __attribute__((unused)))
588 Defer d;
589 _Bool frame;
591 newm(sysmon, nil);
593 // Lock the main goroutine onto this, the main OS thread,
594 // during initialization. Most programs won't care, but a few
595 // do require certain calls to be made by the main thread.
596 // Those can arrange for main.main to run in the main thread
597 // by calling runtime.LockOSThread during initialization
598 // to preserve the lock.
599 runtime_lockOSThread();
601 // Defer unlock so that runtime.Goexit during init does the unlock too.
602 d.pfn = (uintptr)(void*)initDone;
603 d.next = g->_defer;
604 d.arg = (void*)-1;
605 d._panic = g->_panic;
606 d.retaddr = 0;
607 d.makefunccanrecover = 0;
608 d.frame = &frame;
609 d.special = true;
610 g->_defer = &d;
612 if(g->m != &runtime_m0)
613 runtime_throw("runtime_main not on m0");
614 __go_go(runtime_MHeap_Scavenger, nil);
616 runtime_main_init_done = __go_new_channel(&chan_bool_type_descriptor, 0);
618 _cgo_notify_runtime_init_done();
620 main_init();
622 closechan(runtime_main_init_done);
624 if(g->_defer != &d || (void*)d.pfn != initDone)
625 runtime_throw("runtime: bad defer entry after init");
626 g->_defer = d.next;
627 runtime_unlockOSThread();
629 // For gccgo we have to wait until after main is initialized
630 // to enable GC, because initializing main registers the GC
631 // roots.
632 mstats.enablegc = 1;
634 if(runtime_isarchive) {
635 // This is not a complete program, but is instead a
636 // library built using -buildmode=c-archive or
637 // c-shared. Now that we are initialized, there is
638 // nothing further to do.
639 return;
642 main_main();
644 // Make racy client program work: if panicking on
645 // another goroutine at the same time as main returns,
646 // let the other goroutine finish printing the panic trace.
647 // Once it does, it will exit. See issue 3934.
648 if(runtime_panicking)
649 runtime_park(nil, nil, "panicwait");
651 runtime_exit(0);
652 for(;;)
653 *(int32*)0 = 0;
656 void
657 runtime_goroutineheader(G *gp)
659 String status;
660 int64 waitfor;
662 switch(gp->atomicstatus) {
663 case _Gidle:
664 status = runtime_gostringnocopy((const byte*)"idle");
665 break;
666 case _Grunnable:
667 status = runtime_gostringnocopy((const byte*)"runnable");
668 break;
669 case _Grunning:
670 status = runtime_gostringnocopy((const byte*)"running");
671 break;
672 case _Gsyscall:
673 status = runtime_gostringnocopy((const byte*)"syscall");
674 break;
675 case _Gwaiting:
676 if(gp->waitreason.len > 0)
677 status = gp->waitreason;
678 else
679 status = runtime_gostringnocopy((const byte*)"waiting");
680 break;
681 default:
682 status = runtime_gostringnocopy((const byte*)"???");
683 break;
686 // approx time the G is blocked, in minutes
687 waitfor = 0;
688 if((gp->atomicstatus == _Gwaiting || gp->atomicstatus == _Gsyscall) && gp->waitsince != 0)
689 waitfor = (runtime_nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
691 if(waitfor < 1)
692 runtime_printf("goroutine %D [%S]:\n", gp->goid, status);
693 else
694 runtime_printf("goroutine %D [%S, %D minutes]:\n", gp->goid, status, waitfor);
697 void
698 runtime_printcreatedby(G *g)
700 if(g != nil && g->gopc != 0 && g->goid != 1) {
701 String fn;
702 String file;
703 intgo line;
705 if(__go_file_line(g->gopc - 1, -1, &fn, &file, &line)) {
706 runtime_printf("created by %S\n", fn);
707 runtime_printf("\t%S:%D\n", file, (int64) line);
712 void
713 runtime_tracebackothers(G * volatile me)
715 G * volatile gp;
716 Traceback tb;
717 int32 traceback;
718 volatile uintptr i;
720 tb.gp = me;
721 traceback = runtime_gotraceback(nil);
723 // Show the current goroutine first, if we haven't already.
724 if((gp = g->m->curg) != nil && gp != me) {
725 runtime_printf("\n");
726 runtime_goroutineheader(gp);
727 gp->traceback = &tb;
729 #ifdef USING_SPLIT_STACK
730 __splitstack_getcontext(&me->stackcontext[0]);
731 #endif
732 getcontext(ucontext_arg(&me->context[0]));
734 if(gp->traceback != nil) {
735 runtime_gogo(gp);
738 runtime_printtrace(tb.locbuf, tb.c, false);
739 runtime_printcreatedby(gp);
742 runtime_lock(&allglock);
743 for(i = 0; i < runtime_allglen; i++) {
744 gp = runtime_allg[i];
745 if(gp == me || gp == g->m->curg || gp->atomicstatus == _Gdead)
746 continue;
747 if(gp->issystem && traceback < 2)
748 continue;
749 runtime_printf("\n");
750 runtime_goroutineheader(gp);
752 // Our only mechanism for doing a stack trace is
753 // _Unwind_Backtrace. And that only works for the
754 // current thread, not for other random goroutines.
755 // So we need to switch context to the goroutine, get
756 // the backtrace, and then switch back.
758 // This means that if g is running or in a syscall, we
759 // can't reliably print a stack trace. FIXME.
761 if(gp->atomicstatus == _Grunning) {
762 runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
763 runtime_printcreatedby(gp);
764 } else if(gp->atomicstatus == _Gsyscall) {
765 runtime_printf("\tgoroutine in C code; stack unavailable\n");
766 runtime_printcreatedby(gp);
767 } else {
768 gp->traceback = &tb;
770 #ifdef USING_SPLIT_STACK
771 __splitstack_getcontext(&me->stackcontext[0]);
772 #endif
773 getcontext(ucontext_arg(&me->context[0]));
775 if(gp->traceback != nil) {
776 runtime_gogo(gp);
779 runtime_printtrace(tb.locbuf, tb.c, false);
780 runtime_printcreatedby(gp);
783 runtime_unlock(&allglock);
786 static void
787 checkmcount(void)
789 // sched lock is held
790 if(runtime_sched.mcount > runtime_sched.maxmcount) {
791 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
792 runtime_throw("thread exhaustion");
796 // Do a stack trace of gp, and then restore the context to
797 // gp->dotraceback.
799 static void
800 gtraceback(G* gp)
802 Traceback* traceback;
804 traceback = gp->traceback;
805 gp->traceback = nil;
806 if(gp->m != nil)
807 runtime_throw("gtraceback: m is not nil");
808 gp->m = traceback->gp->m;
809 traceback->c = runtime_callers(1, traceback->locbuf,
810 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
811 gp->m = nil;
812 runtime_gogo(traceback->gp);
815 static void
816 mcommoninit(M *mp)
818 // If there is no mcache runtime_callers() will crash,
819 // and we are most likely in sysmon thread so the stack is senseless anyway.
820 if(g->m->mcache)
821 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
823 mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
825 runtime_lock(&runtime_sched);
826 mp->id = runtime_sched.mcount++;
827 checkmcount();
828 runtime_mpreinit(mp);
830 // Add to runtime_allm so garbage collector doesn't free m
831 // when it is just in a register or thread-local storage.
832 mp->alllink = runtime_allm;
833 // runtime_NumCgoCall() iterates over allm w/o schedlock,
834 // so we need to publish it safely.
835 runtime_atomicstorep(&runtime_allm, mp);
836 runtime_unlock(&runtime_sched);
839 // Mark gp ready to run.
840 void
841 runtime_ready(G *gp)
843 // Mark runnable.
844 g->m->locks++; // disable preemption because it can be holding p in a local var
845 if(gp->atomicstatus != _Gwaiting) {
846 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus);
847 runtime_throw("bad g->atomicstatus in ready");
849 gp->atomicstatus = _Grunnable;
850 runqput((P*)g->m->p, gp);
851 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic
852 wakep();
853 g->m->locks--;
856 int32
857 runtime_gcprocs(void)
859 int32 n;
861 // Figure out how many CPUs to use during GC.
862 // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
863 runtime_lock(&runtime_sched);
864 n = runtime_gomaxprocs;
865 if(n > runtime_ncpu)
866 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
867 if(n > MaxGcproc)
868 n = MaxGcproc;
869 if(n > runtime_sched.nmidle+1) // one M is currently running
870 n = runtime_sched.nmidle+1;
871 runtime_unlock(&runtime_sched);
872 return n;
875 static bool
876 needaddgcproc(void)
878 int32 n;
880 runtime_lock(&runtime_sched);
881 n = runtime_gomaxprocs;
882 if(n > runtime_ncpu)
883 n = runtime_ncpu;
884 if(n > MaxGcproc)
885 n = MaxGcproc;
886 n -= runtime_sched.nmidle+1; // one M is currently running
887 runtime_unlock(&runtime_sched);
888 return n > 0;
891 void
892 runtime_helpgc(int32 nproc)
894 M *mp;
895 int32 n, pos;
897 runtime_lock(&runtime_sched);
898 pos = 0;
899 for(n = 1; n < nproc; n++) { // one M is currently running
900 if(runtime_allp[pos]->mcache == g->m->mcache)
901 pos++;
902 mp = mget();
903 if(mp == nil)
904 runtime_throw("runtime_gcprocs inconsistency");
905 mp->helpgc = n;
906 mp->mcache = runtime_allp[pos]->mcache;
907 pos++;
908 runtime_notewakeup(&mp->park);
910 runtime_unlock(&runtime_sched);
913 // Similar to stoptheworld but best-effort and can be called several times.
914 // There is no reverse operation, used during crashing.
915 // This function must not lock any mutexes.
916 void
917 runtime_freezetheworld(void)
919 int32 i;
921 if(runtime_gomaxprocs == 1)
922 return;
923 // stopwait and preemption requests can be lost
924 // due to races with concurrently executing threads,
925 // so try several times
926 for(i = 0; i < 5; i++) {
927 // this should tell the scheduler to not start any new goroutines
928 runtime_sched.stopwait = 0x7fffffff;
929 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
930 // this should stop running goroutines
931 if(!preemptall())
932 break; // no running goroutines
933 runtime_usleep(1000);
935 // to be sure
936 runtime_usleep(1000);
937 preemptall();
938 runtime_usleep(1000);
941 void
942 runtime_stoptheworld(void)
944 int32 i;
945 uint32 s;
946 P *p;
947 bool wait;
949 runtime_lock(&runtime_sched);
950 runtime_sched.stopwait = runtime_gomaxprocs;
951 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
952 preemptall();
953 // stop current P
954 ((P*)g->m->p)->status = _Pgcstop;
955 runtime_sched.stopwait--;
956 // try to retake all P's in _Psyscall status
957 for(i = 0; i < runtime_gomaxprocs; i++) {
958 p = runtime_allp[i];
959 s = p->status;
960 if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
961 runtime_sched.stopwait--;
963 // stop idle P's
964 while((p = pidleget()) != nil) {
965 p->status = _Pgcstop;
966 runtime_sched.stopwait--;
968 wait = runtime_sched.stopwait > 0;
969 runtime_unlock(&runtime_sched);
971 // wait for remaining P's to stop voluntarily
972 if(wait) {
973 runtime_notesleep(&runtime_sched.stopnote);
974 runtime_noteclear(&runtime_sched.stopnote);
976 if(runtime_sched.stopwait)
977 runtime_throw("stoptheworld: not stopped");
978 for(i = 0; i < runtime_gomaxprocs; i++) {
979 p = runtime_allp[i];
980 if(p->status != _Pgcstop)
981 runtime_throw("stoptheworld: not stopped");
985 static void
986 mhelpgc(void)
988 g->m->helpgc = -1;
991 void
992 runtime_starttheworld(void)
994 P *p, *p1;
995 M *mp;
996 G *gp;
997 bool add;
999 g->m->locks++; // disable preemption because it can be holding p in a local var
1000 gp = runtime_netpoll(false); // non-blocking
1001 injectglist(gp);
1002 add = needaddgcproc();
1003 runtime_lock(&runtime_sched);
1004 if(newprocs) {
1005 procresize(newprocs);
1006 newprocs = 0;
1007 } else
1008 procresize(runtime_gomaxprocs);
1009 runtime_sched.gcwaiting = 0;
1011 p1 = nil;
1012 while((p = pidleget()) != nil) {
1013 // procresize() puts p's with work at the beginning of the list.
1014 // Once we reach a p without a run queue, the rest don't have one either.
1015 if(p->runqhead == p->runqtail) {
1016 pidleput(p);
1017 break;
1019 p->m = (uintptr)mget();
1020 p->link = (uintptr)p1;
1021 p1 = p;
1023 if(runtime_sched.sysmonwait) {
1024 runtime_sched.sysmonwait = false;
1025 runtime_notewakeup(&runtime_sched.sysmonnote);
1027 runtime_unlock(&runtime_sched);
1029 while(p1) {
1030 p = p1;
1031 p1 = (P*)p1->link;
1032 if(p->m) {
1033 mp = (M*)p->m;
1034 p->m = 0;
1035 if(mp->nextp)
1036 runtime_throw("starttheworld: inconsistent mp->nextp");
1037 mp->nextp = (uintptr)p;
1038 runtime_notewakeup(&mp->park);
1039 } else {
1040 // Start M to run P. Do not start another M below.
1041 newm(nil, p);
1042 add = false;
1046 if(add) {
1047 // If GC could have used another helper proc, start one now,
1048 // in the hope that it will be available next time.
1049 // It would have been even better to start it before the collection,
1050 // but doing so requires allocating memory, so it's tricky to
1051 // coordinate. This lazy approach works out in practice:
1052 // we don't mind if the first couple gc rounds don't have quite
1053 // the maximum number of procs.
1054 newm(mhelpgc, nil);
1056 g->m->locks--;
1059 // Called to start an M.
1060 void*
1061 runtime_mstart(void* mp)
1063 M *m;
1065 m = (M*)mp;
1066 g = m->g0;
1067 g->m = m;
1069 initcontext();
1071 g->entry = nil;
1072 g->param = nil;
1074 // Record top of stack for use by mcall.
1075 // Once we call schedule we're never coming back,
1076 // so other calls can reuse this stack space.
1077 #ifdef USING_SPLIT_STACK
1078 __splitstack_getcontext(&g->stackcontext[0]);
1079 #else
1080 g->gcinitialsp = &mp;
1081 // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
1082 // is the top of the stack, not the bottom.
1083 g->gcstacksize = 0;
1084 g->gcnextsp = &mp;
1085 #endif
1086 getcontext(ucontext_arg(&g->context[0]));
1088 if(g->entry != nil) {
1089 // Got here from mcall.
1090 void (*pfn)(G*) = (void (*)(G*))g->entry;
1091 G* gp = (G*)g->param;
1092 pfn(gp);
1093 *(int*)0x21 = 0x21;
1095 runtime_minit();
1097 #ifdef USING_SPLIT_STACK
1099 int dont_block_signals = 0;
1100 __splitstack_block_signals(&dont_block_signals, nil);
1102 #endif
1104 // Install signal handlers; after minit so that minit can
1105 // prepare the thread to be able to handle the signals.
1106 if(m == &runtime_m0) {
1107 if(runtime_iscgo && !runtime_cgoHasExtraM) {
1108 runtime_cgoHasExtraM = true;
1109 runtime_newextram();
1110 runtime_needextram = 0;
1112 runtime_initsig(false);
1115 if(m->mstartfn)
1116 ((void (*)(void))m->mstartfn)();
1118 if(m->helpgc) {
1119 m->helpgc = 0;
1120 stopm();
1121 } else if(m != &runtime_m0) {
1122 acquirep((P*)m->nextp);
1123 m->nextp = 0;
1125 schedule();
1127 // TODO(brainman): This point is never reached, because scheduler
1128 // does not release os threads at the moment. But once this path
1129 // is enabled, we must remove our seh here.
1131 return nil;
1134 typedef struct CgoThreadStart CgoThreadStart;
1135 struct CgoThreadStart
1137 M *m;
1138 G *g;
1139 uintptr *tls;
1140 void (*fn)(void);
1143 // Allocate a new m unassociated with any thread.
1144 // Can use p for allocation context if needed.
1146 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
1148 M *mp;
1150 g->m->locks++; // disable GC because it can be called from sysmon
1151 if(g->m->p == 0)
1152 acquirep(p); // temporarily borrow p for mallocs in this function
1153 #if 0
1154 if(mtype == nil) {
1155 Eface e;
1156 runtime_gc_m_ptr(&e);
1157 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1159 #endif
1161 mp = runtime_mal(sizeof *mp);
1162 mcommoninit(mp);
1163 mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1164 mp->g0->m = mp;
1166 if(p == (P*)g->m->p)
1167 releasep();
1168 g->m->locks--;
1170 return mp;
1173 static G*
1174 allocg(void)
1176 G *gp;
1177 // static Type *gtype;
1179 // if(gtype == nil) {
1180 // Eface e;
1181 // runtime_gc_g_ptr(&e);
1182 // gtype = ((PtrType*)e.__type_descriptor)->__element_type;
1183 // }
1184 // gp = runtime_cnew(gtype);
1185 gp = runtime_malloc(sizeof(G));
1186 return gp;
1189 static M* lockextra(bool nilokay);
1190 static void unlockextra(M*);
1192 // needm is called when a cgo callback happens on a
1193 // thread without an m (a thread not created by Go).
1194 // In this case, needm is expected to find an m to use
1195 // and return with m, g initialized correctly.
1196 // Since m and g are not set now (likely nil, but see below)
1197 // needm is limited in what routines it can call. In particular
1198 // it can only call nosplit functions (textflag 7) and cannot
1199 // do any scheduling that requires an m.
1201 // In order to avoid needing heavy lifting here, we adopt
1202 // the following strategy: there is a stack of available m's
1203 // that can be stolen. Using compare-and-swap
1204 // to pop from the stack has ABA races, so we simulate
1205 // a lock by doing an exchange (via casp) to steal the stack
1206 // head and replace the top pointer with MLOCKED (1).
1207 // This serves as a simple spin lock that we can use even
1208 // without an m. The thread that locks the stack in this way
1209 // unlocks the stack by storing a valid stack head pointer.
1211 // In order to make sure that there is always an m structure
1212 // available to be stolen, we maintain the invariant that there
1213 // is always one more than needed. At the beginning of the
1214 // program (if cgo is in use) the list is seeded with a single m.
1215 // If needm finds that it has taken the last m off the list, its job
1216 // is - once it has installed its own m so that it can do things like
1217 // allocate memory - to create a spare m and put it on the list.
1219 // Each of these extra m's also has a g0 and a curg that are
1220 // pressed into service as the scheduling stack and current
1221 // goroutine for the duration of the cgo callback.
1223 // When the callback is done with the m, it calls dropm to
1224 // put the m back on the list.
1226 // Unlike the gc toolchain, we start running on curg, since we are
1227 // just going to return and let the caller continue.
1228 void
1229 runtime_needm(void)
1231 M *mp;
1233 if(runtime_needextram) {
1234 // Can happen if C/C++ code calls Go from a global ctor.
1235 // Can not throw, because scheduler is not initialized yet.
1236 int rv __attribute__((unused));
1237 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
1238 sizeof("fatal error: cgo callback before cgo call\n")-1);
1239 runtime_exit(1);
1242 // Lock extra list, take head, unlock popped list.
1243 // nilokay=false is safe here because of the invariant above,
1244 // that the extra list always contains or will soon contain
1245 // at least one m.
1246 mp = lockextra(false);
1248 // Set needextram when we've just emptied the list,
1249 // so that the eventual call into cgocallbackg will
1250 // allocate a new m for the extra list. We delay the
1251 // allocation until then so that it can be done
1252 // after exitsyscall makes sure it is okay to be
1253 // running at all (that is, there's no garbage collection
1254 // running right now).
1255 mp->needextram = mp->schedlink == 0;
1256 unlockextra((M*)mp->schedlink);
1258 // Install g (= m->curg).
1259 runtime_setg(mp->curg);
1261 // Initialize g's context as in mstart.
1262 initcontext();
1263 g->atomicstatus = _Gsyscall;
1264 g->entry = nil;
1265 g->param = nil;
1266 #ifdef USING_SPLIT_STACK
1267 __splitstack_getcontext(&g->stackcontext[0]);
1268 #else
1269 g->gcinitialsp = &mp;
1270 g->gcstack = nil;
1271 g->gcstacksize = 0;
1272 g->gcnextsp = &mp;
1273 #endif
1274 getcontext(ucontext_arg(&g->context[0]));
1276 if(g->entry != nil) {
1277 // Got here from mcall.
1278 void (*pfn)(G*) = (void (*)(G*))g->entry;
1279 G* gp = (G*)g->param;
1280 pfn(gp);
1281 *(int*)0x22 = 0x22;
1284 // Initialize this thread to use the m.
1285 runtime_minit();
1287 #ifdef USING_SPLIT_STACK
1289 int dont_block_signals = 0;
1290 __splitstack_block_signals(&dont_block_signals, nil);
1292 #endif
1295 // newextram allocates an m and puts it on the extra list.
1296 // It is called with a working local m, so that it can do things
1297 // like call schedlock and allocate.
1298 void
1299 runtime_newextram(void)
1301 M *mp, *mnext;
1302 G *gp;
1303 byte *g0_sp, *sp;
1304 uintptr g0_spsize, spsize;
1305 ucontext_t *uc;
1307 // Create extra goroutine locked to extra m.
1308 // The goroutine is the context in which the cgo callback will run.
1309 // The sched.pc will never be returned to, but setting it to
1310 // runtime.goexit makes clear to the traceback routines where
1311 // the goroutine stack ends.
1312 mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1313 gp = runtime_malg(StackMin, &sp, &spsize);
1314 gp->atomicstatus = _Gdead;
1315 gp->m = mp;
1316 mp->curg = gp;
1317 mp->locked = _LockInternal;
1318 mp->lockedg = gp;
1319 gp->lockedm = mp;
1320 gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1321 // put on allg for garbage collector
1322 allgadd(gp);
1324 // The context for gp will be set up in runtime_needm. But
1325 // here we need to set up the context for g0.
1326 uc = ucontext_arg(&mp->g0->context[0]);
1327 getcontext(uc);
1328 uc->uc_stack.ss_sp = g0_sp;
1329 uc->uc_stack.ss_size = (size_t)g0_spsize;
1330 makecontext(uc, kickoff, 0);
1332 // Add m to the extra list.
1333 mnext = lockextra(true);
1334 mp->schedlink = (uintptr)mnext;
1335 unlockextra(mp);
1338 // dropm is called when a cgo callback has called needm but is now
1339 // done with the callback and returning back into the non-Go thread.
1340 // It puts the current m back onto the extra list.
1342 // The main expense here is the call to signalstack to release the
1343 // m's signal stack, and then the call to needm on the next callback
1344 // from this thread. It is tempting to try to save the m for next time,
1345 // which would eliminate both these costs, but there might not be
1346 // a next time: the current thread (which Go does not control) might exit.
1347 // If we saved the m for that thread, there would be an m leak each time
1348 // such a thread exited. Instead, we acquire and release an m on each
1349 // call. These should typically not be scheduling operations, just a few
1350 // atomics, so the cost should be small.
1352 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1353 // variable using pthread_key_create. Unlike the pthread keys we already use
1354 // on OS X, this dummy key would never be read by Go code. It would exist
1355 // only so that we could register at thread-exit-time destructor.
1356 // That destructor would put the m back onto the extra list.
1357 // This is purely a performance optimization. The current version,
1358 // in which dropm happens on each cgo call, is still correct too.
1359 // We may have to keep the current version on systems with cgo
1360 // but without pthreads, like Windows.
1361 void
1362 runtime_dropm(void)
1364 M *mp, *mnext;
1366 // Undo whatever initialization minit did during needm.
1367 runtime_unminit();
1369 // Clear m and g, and return m to the extra list.
1370 // After the call to setg we can only call nosplit functions.
1371 mp = g->m;
1372 runtime_setg(nil);
1374 mp->curg->atomicstatus = _Gdead;
1375 mp->curg->gcstack = nil;
1376 mp->curg->gcnextsp = nil;
1378 mnext = lockextra(true);
1379 mp->schedlink = (uintptr)mnext;
1380 unlockextra(mp);
1383 #define MLOCKED ((M*)1)
1385 // lockextra locks the extra list and returns the list head.
1386 // The caller must unlock the list by storing a new list head
1387 // to runtime.extram. If nilokay is true, then lockextra will
1388 // return a nil list head if that's what it finds. If nilokay is false,
1389 // lockextra will keep waiting until the list head is no longer nil.
1390 static M*
1391 lockextra(bool nilokay)
1393 M *mp;
1394 void (*yield)(void);
1396 for(;;) {
1397 mp = runtime_atomicloadp(&runtime_extram);
1398 if(mp == MLOCKED) {
1399 yield = runtime_osyield;
1400 yield();
1401 continue;
1403 if(mp == nil && !nilokay) {
1404 runtime_usleep(1);
1405 continue;
1407 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1408 yield = runtime_osyield;
1409 yield();
1410 continue;
1412 break;
1414 return mp;
1417 static void
1418 unlockextra(M *mp)
1420 runtime_atomicstorep(&runtime_extram, mp);
1423 static int32
1424 countextra()
1426 M *mp, *mc;
1427 int32 c;
1429 for(;;) {
1430 mp = runtime_atomicloadp(&runtime_extram);
1431 if(mp == MLOCKED) {
1432 runtime_osyield();
1433 continue;
1435 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1436 runtime_osyield();
1437 continue;
1439 c = 0;
1440 for(mc = mp; mc != nil; mc = (M*)mc->schedlink)
1441 c++;
1442 runtime_atomicstorep(&runtime_extram, mp);
1443 return c;
1447 // Create a new m. It will start off with a call to fn, or else the scheduler.
1448 static void
1449 newm(void(*fn)(void), P *p)
1451 M *mp;
1453 mp = runtime_allocm(p, -1, nil, nil);
1454 mp->nextp = (uintptr)p;
1455 mp->mstartfn = (uintptr)(void*)fn;
1457 runtime_newosproc(mp);
1460 // Stops execution of the current m until new work is available.
1461 // Returns with acquired P.
1462 static void
1463 stopm(void)
1465 M* m;
1467 m = g->m;
1468 if(m->locks)
1469 runtime_throw("stopm holding locks");
1470 if(m->p)
1471 runtime_throw("stopm holding p");
1472 if(m->spinning) {
1473 m->spinning = false;
1474 runtime_xadd(&runtime_sched.nmspinning, -1);
1477 retry:
1478 runtime_lock(&runtime_sched);
1479 mput(m);
1480 runtime_unlock(&runtime_sched);
1481 runtime_notesleep(&m->park);
1482 m = g->m;
1483 runtime_noteclear(&m->park);
1484 if(m->helpgc) {
1485 runtime_gchelper();
1486 m->helpgc = 0;
1487 m->mcache = nil;
1488 goto retry;
1490 acquirep((P*)m->nextp);
1491 m->nextp = 0;
1494 static void
1495 mspinning(void)
1497 g->m->spinning = true;
1500 // Schedules some M to run the p (creates an M if necessary).
1501 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1502 static void
1503 startm(P *p, bool spinning)
1505 M *mp;
1506 void (*fn)(void);
1508 runtime_lock(&runtime_sched);
1509 if(p == nil) {
1510 p = pidleget();
1511 if(p == nil) {
1512 runtime_unlock(&runtime_sched);
1513 if(spinning)
1514 runtime_xadd(&runtime_sched.nmspinning, -1);
1515 return;
1518 mp = mget();
1519 runtime_unlock(&runtime_sched);
1520 if(mp == nil) {
1521 fn = nil;
1522 if(spinning)
1523 fn = mspinning;
1524 newm(fn, p);
1525 return;
1527 if(mp->spinning)
1528 runtime_throw("startm: m is spinning");
1529 if(mp->nextp)
1530 runtime_throw("startm: m has p");
1531 mp->spinning = spinning;
1532 mp->nextp = (uintptr)p;
1533 runtime_notewakeup(&mp->park);
1536 // Hands off P from syscall or locked M.
1537 static void
1538 handoffp(P *p)
1540 // if it has local work, start it straight away
1541 if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
1542 startm(p, false);
1543 return;
1545 // no local work, check that there are no spinning/idle M's,
1546 // otherwise our help is not required
1547 if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 && // TODO: fast atomic
1548 runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
1549 startm(p, true);
1550 return;
1552 runtime_lock(&runtime_sched);
1553 if(runtime_sched.gcwaiting) {
1554 p->status = _Pgcstop;
1555 if(--runtime_sched.stopwait == 0)
1556 runtime_notewakeup(&runtime_sched.stopnote);
1557 runtime_unlock(&runtime_sched);
1558 return;
1560 if(runtime_sched.runqsize) {
1561 runtime_unlock(&runtime_sched);
1562 startm(p, false);
1563 return;
1565 // If this is the last running P and nobody is polling network,
1566 // need to wakeup another M to poll network.
1567 if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
1568 runtime_unlock(&runtime_sched);
1569 startm(p, false);
1570 return;
1572 pidleput(p);
1573 runtime_unlock(&runtime_sched);
1576 // Tries to add one more P to execute G's.
1577 // Called when a G is made runnable (newproc, ready).
1578 static void
1579 wakep(void)
1581 // be conservative about spinning threads
1582 if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
1583 return;
1584 startm(nil, true);
1587 // Stops execution of the current m that is locked to a g until the g is runnable again.
1588 // Returns with acquired P.
1589 static void
1590 stoplockedm(void)
1592 M *m;
1593 P *p;
1595 m = g->m;
1596 if(m->lockedg == nil || m->lockedg->lockedm != m)
1597 runtime_throw("stoplockedm: inconsistent locking");
1598 if(m->p) {
1599 // Schedule another M to run this p.
1600 p = releasep();
1601 handoffp(p);
1603 incidlelocked(1);
1604 // Wait until another thread schedules lockedg again.
1605 runtime_notesleep(&m->park);
1606 m = g->m;
1607 runtime_noteclear(&m->park);
1608 if(m->lockedg->atomicstatus != _Grunnable)
1609 runtime_throw("stoplockedm: not runnable");
1610 acquirep((P*)m->nextp);
1611 m->nextp = 0;
1614 // Schedules the locked m to run the locked gp.
1615 static void
1616 startlockedm(G *gp)
1618 M *mp;
1619 P *p;
1621 mp = gp->lockedm;
1622 if(mp == g->m)
1623 runtime_throw("startlockedm: locked to me");
1624 if(mp->nextp)
1625 runtime_throw("startlockedm: m has p");
1626 // directly handoff current P to the locked m
1627 incidlelocked(-1);
1628 p = releasep();
1629 mp->nextp = (uintptr)p;
1630 runtime_notewakeup(&mp->park);
1631 stopm();
1634 // Stops the current m for stoptheworld.
1635 // Returns when the world is restarted.
1636 static void
1637 gcstopm(void)
1639 P *p;
1641 if(!runtime_sched.gcwaiting)
1642 runtime_throw("gcstopm: not waiting for gc");
1643 if(g->m->spinning) {
1644 g->m->spinning = false;
1645 runtime_xadd(&runtime_sched.nmspinning, -1);
1647 p = releasep();
1648 runtime_lock(&runtime_sched);
1649 p->status = _Pgcstop;
1650 if(--runtime_sched.stopwait == 0)
1651 runtime_notewakeup(&runtime_sched.stopnote);
1652 runtime_unlock(&runtime_sched);
1653 stopm();
1656 // Schedules gp to run on the current M.
1657 // Never returns.
1658 static void
1659 execute(G *gp)
1661 int32 hz;
1663 if(gp->atomicstatus != _Grunnable) {
1664 runtime_printf("execute: bad g status %d\n", gp->atomicstatus);
1665 runtime_throw("execute: bad g status");
1667 gp->atomicstatus = _Grunning;
1668 gp->waitsince = 0;
1669 ((P*)g->m->p)->schedtick++;
1670 g->m->curg = gp;
1671 gp->m = g->m;
1673 // Check whether the profiler needs to be turned on or off.
1674 hz = runtime_sched.profilehz;
1675 if(g->m->profilehz != hz)
1676 runtime_resetcpuprofiler(hz);
1678 runtime_gogo(gp);
1681 // Finds a runnable goroutine to execute.
1682 // Tries to steal from other P's, get g from global queue, poll network.
1683 static G*
1684 findrunnable(void)
1686 G *gp;
1687 P *p;
1688 int32 i;
1690 top:
1691 if(runtime_sched.gcwaiting) {
1692 gcstopm();
1693 goto top;
1695 if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1696 runtime_ready(gp);
1697 // local runq
1698 gp = runqget((P*)g->m->p);
1699 if(gp)
1700 return gp;
1701 // global runq
1702 if(runtime_sched.runqsize) {
1703 runtime_lock(&runtime_sched);
1704 gp = globrunqget((P*)g->m->p, 0);
1705 runtime_unlock(&runtime_sched);
1706 if(gp)
1707 return gp;
1709 // poll network
1710 gp = runtime_netpoll(false); // non-blocking
1711 if(gp) {
1712 injectglist((G*)gp->schedlink);
1713 gp->atomicstatus = _Grunnable;
1714 return gp;
1716 // If number of spinning M's >= number of busy P's, block.
1717 // This is necessary to prevent excessive CPU consumption
1718 // when GOMAXPROCS>>1 but the program parallelism is low.
1719 if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle)) // TODO: fast atomic
1720 goto stop;
1721 if(!g->m->spinning) {
1722 g->m->spinning = true;
1723 runtime_xadd(&runtime_sched.nmspinning, 1);
1725 // random steal from other P's
1726 for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1727 if(runtime_sched.gcwaiting)
1728 goto top;
1729 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1730 if(p == (P*)g->m->p)
1731 gp = runqget(p);
1732 else
1733 gp = runqsteal((P*)g->m->p, p);
1734 if(gp)
1735 return gp;
1737 stop:
1738 // return P and block
1739 runtime_lock(&runtime_sched);
1740 if(runtime_sched.gcwaiting) {
1741 runtime_unlock(&runtime_sched);
1742 goto top;
1744 if(runtime_sched.runqsize) {
1745 gp = globrunqget((P*)g->m->p, 0);
1746 runtime_unlock(&runtime_sched);
1747 return gp;
1749 p = releasep();
1750 pidleput(p);
1751 runtime_unlock(&runtime_sched);
1752 if(g->m->spinning) {
1753 g->m->spinning = false;
1754 runtime_xadd(&runtime_sched.nmspinning, -1);
1756 // check all runqueues once again
1757 for(i = 0; i < runtime_gomaxprocs; i++) {
1758 p = runtime_allp[i];
1759 if(p && p->runqhead != p->runqtail) {
1760 runtime_lock(&runtime_sched);
1761 p = pidleget();
1762 runtime_unlock(&runtime_sched);
1763 if(p) {
1764 acquirep(p);
1765 goto top;
1767 break;
1770 // poll network
1771 if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
1772 if(g->m->p)
1773 runtime_throw("findrunnable: netpoll with p");
1774 if(g->m->spinning)
1775 runtime_throw("findrunnable: netpoll with spinning");
1776 gp = runtime_netpoll(true); // block until new work is available
1777 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
1778 if(gp) {
1779 runtime_lock(&runtime_sched);
1780 p = pidleget();
1781 runtime_unlock(&runtime_sched);
1782 if(p) {
1783 acquirep(p);
1784 injectglist((G*)gp->schedlink);
1785 gp->atomicstatus = _Grunnable;
1786 return gp;
1788 injectglist(gp);
1791 stopm();
1792 goto top;
1795 static void
1796 resetspinning(void)
1798 int32 nmspinning;
1800 if(g->m->spinning) {
1801 g->m->spinning = false;
1802 nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
1803 if(nmspinning < 0)
1804 runtime_throw("findrunnable: negative nmspinning");
1805 } else
1806 nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
1808 // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1809 // so see if we need to wakeup another P here.
1810 if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
1811 wakep();
1814 // Injects the list of runnable G's into the scheduler.
1815 // Can run concurrently with GC.
1816 static void
1817 injectglist(G *glist)
1819 int32 n;
1820 G *gp;
1822 if(glist == nil)
1823 return;
1824 runtime_lock(&runtime_sched);
1825 for(n = 0; glist; n++) {
1826 gp = glist;
1827 glist = (G*)gp->schedlink;
1828 gp->atomicstatus = _Grunnable;
1829 globrunqput(gp);
1831 runtime_unlock(&runtime_sched);
1833 for(; n && runtime_sched.npidle; n--)
1834 startm(nil, false);
1837 // One round of scheduler: find a runnable goroutine and execute it.
1838 // Never returns.
1839 static void
1840 schedule(void)
1842 G *gp;
1843 uint32 tick;
1845 if(g->m->locks)
1846 runtime_throw("schedule: holding locks");
1848 top:
1849 if(runtime_sched.gcwaiting) {
1850 gcstopm();
1851 goto top;
1854 gp = nil;
1855 // Check the global runnable queue once in a while to ensure fairness.
1856 // Otherwise two goroutines can completely occupy the local runqueue
1857 // by constantly respawning each other.
1858 tick = ((P*)g->m->p)->schedtick;
1859 // This is a fancy way to say tick%61==0,
1860 // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1861 if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
1862 runtime_lock(&runtime_sched);
1863 gp = globrunqget((P*)g->m->p, 1);
1864 runtime_unlock(&runtime_sched);
1865 if(gp)
1866 resetspinning();
1868 if(gp == nil) {
1869 gp = runqget((P*)g->m->p);
1870 if(gp && g->m->spinning)
1871 runtime_throw("schedule: spinning with local work");
1873 if(gp == nil) {
1874 gp = findrunnable(); // blocks until work is available
1875 resetspinning();
1878 if(gp->lockedm) {
1879 // Hands off own p to the locked m,
1880 // then blocks waiting for a new p.
1881 startlockedm(gp);
1882 goto top;
1885 execute(gp);
1888 // Puts the current goroutine into a waiting state and calls unlockf.
1889 // If unlockf returns false, the goroutine is resumed.
1890 void
1891 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1893 if(g->atomicstatus != _Grunning)
1894 runtime_throw("bad g status");
1895 g->m->waitlock = lock;
1896 g->m->waitunlockf = unlockf;
1897 g->waitreason = runtime_gostringnocopy((const byte*)reason);
1898 runtime_mcall(park0);
1901 static bool
1902 parkunlock(G *gp, void *lock)
1904 USED(gp);
1905 runtime_unlock(lock);
1906 return true;
1909 // Puts the current goroutine into a waiting state and unlocks the lock.
1910 // The goroutine can be made runnable again by calling runtime_ready(gp).
1911 void
1912 runtime_parkunlock(Lock *lock, const char *reason)
1914 runtime_park(parkunlock, lock, reason);
1917 // runtime_park continuation on g0.
1918 static void
1919 park0(G *gp)
1921 M *m;
1922 bool ok;
1924 m = g->m;
1925 gp->atomicstatus = _Gwaiting;
1926 gp->m = nil;
1927 m->curg = nil;
1928 if(m->waitunlockf) {
1929 ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock);
1930 m->waitunlockf = nil;
1931 m->waitlock = nil;
1932 if(!ok) {
1933 gp->atomicstatus = _Grunnable;
1934 execute(gp); // Schedule it back, never returns.
1937 if(m->lockedg) {
1938 stoplockedm();
1939 execute(gp); // Never returns.
1941 schedule();
1944 // Scheduler yield.
1945 void
1946 runtime_gosched(void)
1948 if(g->atomicstatus != _Grunning)
1949 runtime_throw("bad g status");
1950 runtime_mcall(runtime_gosched0);
1953 // runtime_gosched continuation on g0.
1954 void
1955 runtime_gosched0(G *gp)
1957 M *m;
1959 m = g->m;
1960 gp->atomicstatus = _Grunnable;
1961 gp->m = nil;
1962 m->curg = nil;
1963 runtime_lock(&runtime_sched);
1964 globrunqput(gp);
1965 runtime_unlock(&runtime_sched);
1966 if(m->lockedg) {
1967 stoplockedm();
1968 execute(gp); // Never returns.
1970 schedule();
1973 // Finishes execution of the current goroutine.
1974 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1975 // Since it does not return it does not matter. But if it is preempted
1976 // at the split stack check, GC will complain about inconsistent sp.
1977 void runtime_goexit(void) __attribute__ ((noinline));
1978 void
1979 runtime_goexit(void)
1981 if(g->atomicstatus != _Grunning)
1982 runtime_throw("bad g status");
1983 runtime_mcall(goexit0);
1986 // runtime_goexit continuation on g0.
1987 static void
1988 goexit0(G *gp)
1990 M *m;
1992 m = g->m;
1993 gp->atomicstatus = _Gdead;
1994 gp->entry = nil;
1995 gp->m = nil;
1996 gp->lockedm = nil;
1997 gp->paniconfault = 0;
1998 gp->_defer = nil; // should be true already but just in case.
1999 gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
2000 gp->writenbuf = 0;
2001 gp->writebuf = nil;
2002 gp->waitreason = runtime_gostringnocopy(nil);
2003 gp->param = nil;
2004 m->curg = nil;
2005 m->lockedg = nil;
2006 if(m->locked & ~_LockExternal) {
2007 runtime_printf("invalid m->locked = %d\n", m->locked);
2008 runtime_throw("internal lockOSThread error");
2010 m->locked = 0;
2011 gfput((P*)m->p, gp);
2012 schedule();
2015 // The goroutine g is about to enter a system call.
2016 // Record that it's not using the cpu anymore.
2017 // This is called only from the go syscall library and cgocall,
2018 // not from the low-level system calls used by the runtime.
2020 // Entersyscall cannot split the stack: the runtime_gosave must
2021 // make g->sched refer to the caller's stack segment, because
2022 // entersyscall is going to return immediately after.
2024 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
2025 static void doentersyscall(void) __attribute__ ((no_split_stack, noinline));
2027 void
2028 runtime_entersyscall()
2030 // Save the registers in the g structure so that any pointers
2031 // held in registers will be seen by the garbage collector.
2032 getcontext(ucontext_arg(&g->gcregs[0]));
2034 // Do the work in a separate function, so that this function
2035 // doesn't save any registers on its own stack. If this
2036 // function does save any registers, we might store the wrong
2037 // value in the call to getcontext.
2039 // FIXME: This assumes that we do not need to save any
2040 // callee-saved registers to access the TLS variable g. We
2041 // don't want to put the ucontext_t on the stack because it is
2042 // large and we can not split the stack here.
2043 doentersyscall();
2046 static void
2047 doentersyscall()
2049 // Disable preemption because during this function g is in _Gsyscall status,
2050 // but can have inconsistent g->sched, do not let GC observe it.
2051 g->m->locks++;
2053 // Leave SP around for GC and traceback.
2054 #ifdef USING_SPLIT_STACK
2056 size_t gcstacksize;
2057 g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2058 &g->gcnextsegment, &g->gcnextsp,
2059 &g->gcinitialsp);
2060 g->gcstacksize = (uintptr)gcstacksize;
2062 #else
2064 void *v;
2066 g->gcnextsp = (byte *) &v;
2068 #endif
2070 g->atomicstatus = _Gsyscall;
2072 if(runtime_atomicload(&runtime_sched.sysmonwait)) { // TODO: fast atomic
2073 runtime_lock(&runtime_sched);
2074 if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2075 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2076 runtime_notewakeup(&runtime_sched.sysmonnote);
2078 runtime_unlock(&runtime_sched);
2081 g->m->mcache = nil;
2082 ((P*)(g->m->p))->m = 0;
2083 runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
2084 if(runtime_atomicload(&runtime_sched.gcwaiting)) {
2085 runtime_lock(&runtime_sched);
2086 if (runtime_sched.stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
2087 if(--runtime_sched.stopwait == 0)
2088 runtime_notewakeup(&runtime_sched.stopnote);
2090 runtime_unlock(&runtime_sched);
2093 g->m->locks--;
2096 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
2097 void
2098 runtime_entersyscallblock(void)
2100 P *p;
2102 g->m->locks++; // see comment in entersyscall
2104 // Leave SP around for GC and traceback.
2105 #ifdef USING_SPLIT_STACK
2107 size_t gcstacksize;
2108 g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2109 &g->gcnextsegment, &g->gcnextsp,
2110 &g->gcinitialsp);
2111 g->gcstacksize = (uintptr)gcstacksize;
2113 #else
2114 g->gcnextsp = (byte *) &p;
2115 #endif
2117 // Save the registers in the g structure so that any pointers
2118 // held in registers will be seen by the garbage collector.
2119 getcontext(ucontext_arg(&g->gcregs[0]));
2121 g->atomicstatus = _Gsyscall;
2123 p = releasep();
2124 handoffp(p);
2125 if(g->isbackground) // do not consider blocked scavenger for deadlock detection
2126 incidlelocked(1);
2128 g->m->locks--;
2131 // The goroutine g exited its system call.
2132 // Arrange for it to run on a cpu again.
2133 // This is called only from the go syscall library, not
2134 // from the low-level system calls used by the runtime.
2135 void
2136 runtime_exitsyscall(void)
2138 G *gp;
2140 gp = g;
2141 gp->m->locks++; // see comment in entersyscall
2143 if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
2144 incidlelocked(-1);
2146 gp->waitsince = 0;
2147 if(exitsyscallfast()) {
2148 // There's a cpu for us, so we can run.
2149 ((P*)gp->m->p)->syscalltick++;
2150 gp->atomicstatus = _Grunning;
2151 // Garbage collector isn't running (since we are),
2152 // so okay to clear gcstack and gcsp.
2153 #ifdef USING_SPLIT_STACK
2154 gp->gcstack = nil;
2155 #endif
2156 gp->gcnextsp = nil;
2157 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2158 gp->m->locks--;
2159 return;
2162 gp->m->locks--;
2164 // Call the scheduler.
2165 runtime_mcall(exitsyscall0);
2167 // Scheduler returned, so we're allowed to run now.
2168 // Delete the gcstack information that we left for
2169 // the garbage collector during the system call.
2170 // Must wait until now because until gosched returns
2171 // we don't know for sure that the garbage collector
2172 // is not running.
2173 #ifdef USING_SPLIT_STACK
2174 gp->gcstack = nil;
2175 #endif
2176 gp->gcnextsp = nil;
2177 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2179 // Note that this gp->m might be different than the earlier
2180 // gp->m after returning from runtime_mcall.
2181 ((P*)gp->m->p)->syscalltick++;
2184 static bool
2185 exitsyscallfast(void)
2187 G *gp;
2188 P *p;
2190 gp = g;
2192 // Freezetheworld sets stopwait but does not retake P's.
2193 if(runtime_sched.stopwait) {
2194 gp->m->p = 0;
2195 return false;
2198 // Try to re-acquire the last P.
2199 if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) {
2200 // There's a cpu for us, so we can run.
2201 gp->m->mcache = ((P*)gp->m->p)->mcache;
2202 ((P*)gp->m->p)->m = (uintptr)gp->m;
2203 return true;
2205 // Try to get any other idle P.
2206 gp->m->p = 0;
2207 if(runtime_sched.pidle) {
2208 runtime_lock(&runtime_sched);
2209 p = pidleget();
2210 if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
2211 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2212 runtime_notewakeup(&runtime_sched.sysmonnote);
2214 runtime_unlock(&runtime_sched);
2215 if(p) {
2216 acquirep(p);
2217 return true;
2220 return false;
2223 // runtime_exitsyscall slow path on g0.
2224 // Failed to acquire P, enqueue gp as runnable.
2225 static void
2226 exitsyscall0(G *gp)
2228 M *m;
2229 P *p;
2231 m = g->m;
2232 gp->atomicstatus = _Grunnable;
2233 gp->m = nil;
2234 m->curg = nil;
2235 runtime_lock(&runtime_sched);
2236 p = pidleget();
2237 if(p == nil)
2238 globrunqput(gp);
2239 else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2240 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2241 runtime_notewakeup(&runtime_sched.sysmonnote);
2243 runtime_unlock(&runtime_sched);
2244 if(p) {
2245 acquirep(p);
2246 execute(gp); // Never returns.
2248 if(m->lockedg) {
2249 // Wait until another thread schedules gp and so m again.
2250 stoplockedm();
2251 execute(gp); // Never returns.
2253 stopm();
2254 schedule(); // Never returns.
2257 // Called from syscall package before fork.
2258 void syscall_runtime_BeforeFork(void)
2259 __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2260 void
2261 syscall_runtime_BeforeFork(void)
2263 // Fork can hang if preempted with signals frequently enough (see issue 5517).
2264 // Ensure that we stay on the same M where we disable profiling.
2265 runtime_m()->locks++;
2266 if(runtime_m()->profilehz != 0)
2267 runtime_resetcpuprofiler(0);
2270 // Called from syscall package after fork in parent.
2271 void syscall_runtime_AfterFork(void)
2272 __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2273 void
2274 syscall_runtime_AfterFork(void)
2276 int32 hz;
2278 hz = runtime_sched.profilehz;
2279 if(hz != 0)
2280 runtime_resetcpuprofiler(hz);
2281 runtime_m()->locks--;
2284 // Allocate a new g, with a stack big enough for stacksize bytes.
2286 runtime_malg(int32 stacksize, byte** ret_stack, uintptr* ret_stacksize)
2288 G *newg;
2290 newg = allocg();
2291 if(stacksize >= 0) {
2292 #if USING_SPLIT_STACK
2293 int dont_block_signals = 0;
2294 size_t ss_stacksize;
2296 *ret_stack = __splitstack_makecontext(stacksize,
2297 &newg->stackcontext[0],
2298 &ss_stacksize);
2299 *ret_stacksize = (uintptr)ss_stacksize;
2300 __splitstack_block_signals_context(&newg->stackcontext[0],
2301 &dont_block_signals, nil);
2302 #else
2303 // In 64-bit mode, the maximum Go allocation space is
2304 // 128G. Our stack size is 4M, which only permits 32K
2305 // goroutines. In order to not limit ourselves,
2306 // allocate the stacks out of separate memory. In
2307 // 32-bit mode, the Go allocation space is all of
2308 // memory anyhow.
2309 if(sizeof(void*) == 8) {
2310 void *p = runtime_SysAlloc(stacksize, &mstats.other_sys);
2311 if(p == nil)
2312 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2313 *ret_stack = (byte*)p;
2314 } else {
2315 *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2316 runtime_xadd(&runtime_stacks_sys, stacksize);
2318 *ret_stacksize = (uintptr)stacksize;
2319 newg->gcinitialsp = *ret_stack;
2320 newg->gcstacksize = (uintptr)stacksize;
2321 #endif
2323 return newg;
2326 /* For runtime package testing. */
2329 // Create a new g running fn with siz bytes of arguments.
2330 // Put it on the queue of g's waiting to run.
2331 // The compiler turns a go statement into a call to this.
2332 // Cannot split the stack because it assumes that the arguments
2333 // are available sequentially after &fn; they would not be
2334 // copied if a stack split occurred. It's OK for this to call
2335 // functions that split the stack.
2336 void runtime_testing_entersyscall(int32)
2337 __asm__ (GOSYM_PREFIX "runtime.entersyscall");
2338 void
2339 runtime_testing_entersyscall(int32 dummy __attribute__ ((unused)))
2341 runtime_entersyscall();
2344 void runtime_testing_exitsyscall(int32)
2345 __asm__ (GOSYM_PREFIX "runtime.exitsyscall");
2347 void
2348 runtime_testing_exitsyscall(int32 dummy __attribute__ ((unused)))
2350 runtime_exitsyscall();
2354 __go_go(void (*fn)(void*), void* arg)
2356 byte *sp;
2357 size_t spsize;
2358 G *newg;
2359 P *p;
2361 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2362 if(fn == nil) {
2363 g->m->throwing = -1; // do not dump full stacks
2364 runtime_throw("go of nil func value");
2366 g->m->locks++; // disable preemption because it can be holding p in a local var
2368 p = (P*)g->m->p;
2369 if((newg = gfget(p)) != nil) {
2370 #ifdef USING_SPLIT_STACK
2371 int dont_block_signals = 0;
2373 sp = __splitstack_resetcontext(&newg->stackcontext[0],
2374 &spsize);
2375 __splitstack_block_signals_context(&newg->stackcontext[0],
2376 &dont_block_signals, nil);
2377 #else
2378 sp = newg->gcinitialsp;
2379 spsize = newg->gcstacksize;
2380 if(spsize == 0)
2381 runtime_throw("bad spsize in __go_go");
2382 newg->gcnextsp = sp;
2383 #endif
2384 } else {
2385 uintptr malsize;
2387 newg = runtime_malg(StackMin, &sp, &malsize);
2388 spsize = (size_t)malsize;
2389 allgadd(newg);
2392 newg->entry = (byte*)fn;
2393 newg->param = arg;
2394 newg->gopc = (uintptr)__builtin_return_address(0);
2395 newg->atomicstatus = _Grunnable;
2396 if(p->goidcache == p->goidcacheend) {
2397 p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch);
2398 p->goidcacheend = p->goidcache + GoidCacheBatch;
2400 newg->goid = p->goidcache++;
2403 // Avoid warnings about variables clobbered by
2404 // longjmp.
2405 byte * volatile vsp = sp;
2406 size_t volatile vspsize = spsize;
2407 G * volatile vnewg = newg;
2408 ucontext_t * volatile uc;
2410 uc = ucontext_arg(&vnewg->context[0]);
2411 getcontext(uc);
2412 uc->uc_stack.ss_sp = vsp;
2413 uc->uc_stack.ss_size = vspsize;
2414 makecontext(uc, kickoff, 0);
2416 runqput(p, vnewg);
2418 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
2419 wakep();
2420 g->m->locks--;
2421 return vnewg;
2425 static void
2426 allgadd(G *gp)
2428 G **new;
2429 uintptr cap;
2431 runtime_lock(&allglock);
2432 if(runtime_allglen >= allgcap) {
2433 cap = 4096/sizeof(new[0]);
2434 if(cap < 2*allgcap)
2435 cap = 2*allgcap;
2436 new = runtime_malloc(cap*sizeof(new[0]));
2437 if(new == nil)
2438 runtime_throw("runtime: cannot allocate memory");
2439 if(runtime_allg != nil) {
2440 runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
2441 runtime_free(runtime_allg);
2443 runtime_allg = new;
2444 allgcap = cap;
2446 runtime_allg[runtime_allglen++] = gp;
2447 runtime_unlock(&allglock);
2450 // Put on gfree list.
2451 // If local list is too long, transfer a batch to the global list.
2452 static void
2453 gfput(P *p, G *gp)
2455 gp->schedlink = (uintptr)p->gfree;
2456 p->gfree = gp;
2457 p->gfreecnt++;
2458 if(p->gfreecnt >= 64) {
2459 runtime_lock(&runtime_sched.gflock);
2460 while(p->gfreecnt >= 32) {
2461 p->gfreecnt--;
2462 gp = p->gfree;
2463 p->gfree = (G*)gp->schedlink;
2464 gp->schedlink = (uintptr)runtime_sched.gfree;
2465 runtime_sched.gfree = gp;
2467 runtime_unlock(&runtime_sched.gflock);
2471 // Get from gfree list.
2472 // If local list is empty, grab a batch from global list.
2473 static G*
2474 gfget(P *p)
2476 G *gp;
2478 retry:
2479 gp = p->gfree;
2480 if(gp == nil && runtime_sched.gfree) {
2481 runtime_lock(&runtime_sched.gflock);
2482 while(p->gfreecnt < 32 && runtime_sched.gfree) {
2483 p->gfreecnt++;
2484 gp = runtime_sched.gfree;
2485 runtime_sched.gfree = (G*)gp->schedlink;
2486 gp->schedlink = (uintptr)p->gfree;
2487 p->gfree = gp;
2489 runtime_unlock(&runtime_sched.gflock);
2490 goto retry;
2492 if(gp) {
2493 p->gfree = (G*)gp->schedlink;
2494 p->gfreecnt--;
2496 return gp;
2499 // Purge all cached G's from gfree list to the global list.
2500 static void
2501 gfpurge(P *p)
2503 G *gp;
2505 runtime_lock(&runtime_sched.gflock);
2506 while(p->gfreecnt) {
2507 p->gfreecnt--;
2508 gp = p->gfree;
2509 p->gfree = (G*)gp->schedlink;
2510 gp->schedlink = (uintptr)runtime_sched.gfree;
2511 runtime_sched.gfree = gp;
2513 runtime_unlock(&runtime_sched.gflock);
2516 void
2517 runtime_Breakpoint(void)
2519 runtime_breakpoint();
2522 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2524 void
2525 runtime_Gosched(void)
2527 runtime_gosched();
2530 // Implementation of runtime.GOMAXPROCS.
2531 // delete when scheduler is even stronger
2532 int32
2533 runtime_gomaxprocsfunc(int32 n)
2535 int32 ret;
2537 if(n > _MaxGomaxprocs)
2538 n = _MaxGomaxprocs;
2539 runtime_lock(&runtime_sched);
2540 ret = runtime_gomaxprocs;
2541 if(n <= 0 || n == ret) {
2542 runtime_unlock(&runtime_sched);
2543 return ret;
2545 runtime_unlock(&runtime_sched);
2547 runtime_semacquire(&runtime_worldsema, false);
2548 g->m->gcing = 1;
2549 runtime_stoptheworld();
2550 newprocs = n;
2551 g->m->gcing = 0;
2552 runtime_semrelease(&runtime_worldsema);
2553 runtime_starttheworld();
2555 return ret;
2558 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2559 // after they modify m->locked. Do not allow preemption during this call,
2560 // or else the m might be different in this function than in the caller.
2561 static void
2562 lockOSThread(void)
2564 g->m->lockedg = g;
2565 g->lockedm = g->m;
2568 void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2569 void
2570 runtime_LockOSThread(void)
2572 g->m->locked |= _LockExternal;
2573 lockOSThread();
2576 void
2577 runtime_lockOSThread(void)
2579 g->m->locked += _LockInternal;
2580 lockOSThread();
2584 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2585 // after they update m->locked. Do not allow preemption during this call,
2586 // or else the m might be in different in this function than in the caller.
2587 static void
2588 unlockOSThread(void)
2590 if(g->m->locked != 0)
2591 return;
2592 g->m->lockedg = nil;
2593 g->lockedm = nil;
2596 void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2598 void
2599 runtime_UnlockOSThread(void)
2601 g->m->locked &= ~_LockExternal;
2602 unlockOSThread();
2605 void
2606 runtime_unlockOSThread(void)
2608 if(g->m->locked < _LockInternal)
2609 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2610 g->m->locked -= _LockInternal;
2611 unlockOSThread();
2614 bool
2615 runtime_lockedOSThread(void)
2617 return g->lockedm != nil && g->m->lockedg != nil;
2620 int32
2621 runtime_gcount(void)
2623 G *gp;
2624 int32 n, s;
2625 uintptr i;
2627 n = 0;
2628 runtime_lock(&allglock);
2629 // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2630 // We do not want to increment/decrement centralized counter in newproc/goexit,
2631 // just to make runtime.NumGoroutine() faster.
2632 // Compromise solution is to introduce per-P counters of active goroutines.
2633 for(i = 0; i < runtime_allglen; i++) {
2634 gp = runtime_allg[i];
2635 s = gp->atomicstatus;
2636 if(s == _Grunnable || s == _Grunning || s == _Gsyscall || s == _Gwaiting)
2637 n++;
2639 runtime_unlock(&allglock);
2640 return n;
2643 int32
2644 runtime_mcount(void)
2646 return runtime_sched.mcount;
2649 static struct {
2650 Lock;
2651 void (*fn)(uintptr*, int32);
2652 int32 hz;
2653 uintptr pcbuf[TracebackMaxFrames];
2654 Location locbuf[TracebackMaxFrames];
2655 } prof;
2657 static void System(void) {}
2658 static void GC(void) {}
2660 // Called if we receive a SIGPROF signal.
2661 void
2662 runtime_sigprof()
2664 M *mp = g->m;
2665 int32 n, i;
2666 bool traceback;
2668 if(prof.fn == nil || prof.hz == 0)
2669 return;
2671 if(mp == nil)
2672 return;
2674 // Profiling runs concurrently with GC, so it must not allocate.
2675 mp->mallocing++;
2677 traceback = true;
2679 if(mp->mcache == nil)
2680 traceback = false;
2682 runtime_lock(&prof);
2683 if(prof.fn == nil) {
2684 runtime_unlock(&prof);
2685 mp->mallocing--;
2686 return;
2688 n = 0;
2690 if(runtime_atomicload(&runtime_in_callers) > 0) {
2691 // If SIGPROF arrived while already fetching runtime
2692 // callers we can have trouble on older systems
2693 // because the unwind library calls dl_iterate_phdr
2694 // which was not recursive in the past.
2695 traceback = false;
2698 if(traceback) {
2699 n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf), false);
2700 for(i = 0; i < n; i++)
2701 prof.pcbuf[i] = prof.locbuf[i].pc;
2703 if(!traceback || n <= 0) {
2704 n = 2;
2705 prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2706 if(mp->gcing || mp->helpgc)
2707 prof.pcbuf[1] = (uintptr)GC;
2708 else
2709 prof.pcbuf[1] = (uintptr)System;
2711 prof.fn(prof.pcbuf, n);
2712 runtime_unlock(&prof);
2713 mp->mallocing--;
2716 // Arrange to call fn with a traceback hz times a second.
2717 void
2718 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
2720 // Force sane arguments.
2721 if(hz < 0)
2722 hz = 0;
2723 if(hz == 0)
2724 fn = nil;
2725 if(fn == nil)
2726 hz = 0;
2728 // Disable preemption, otherwise we can be rescheduled to another thread
2729 // that has profiling enabled.
2730 g->m->locks++;
2732 // Stop profiler on this thread so that it is safe to lock prof.
2733 // if a profiling signal came in while we had prof locked,
2734 // it would deadlock.
2735 runtime_resetcpuprofiler(0);
2737 runtime_lock(&prof);
2738 prof.fn = fn;
2739 prof.hz = hz;
2740 runtime_unlock(&prof);
2741 runtime_lock(&runtime_sched);
2742 runtime_sched.profilehz = hz;
2743 runtime_unlock(&runtime_sched);
2745 if(hz != 0)
2746 runtime_resetcpuprofiler(hz);
2748 g->m->locks--;
2751 // Change number of processors. The world is stopped, sched is locked.
2752 static void
2753 procresize(int32 new)
2755 int32 i, old;
2756 bool pempty;
2757 G *gp;
2758 P *p;
2760 old = runtime_gomaxprocs;
2761 if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs)
2762 runtime_throw("procresize: invalid arg");
2763 // initialize new P's
2764 for(i = 0; i < new; i++) {
2765 p = runtime_allp[i];
2766 if(p == nil) {
2767 p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2768 p->id = i;
2769 p->status = _Pgcstop;
2770 runtime_atomicstorep(&runtime_allp[i], p);
2772 if(p->mcache == nil) {
2773 if(old==0 && i==0)
2774 p->mcache = g->m->mcache; // bootstrap
2775 else
2776 p->mcache = runtime_allocmcache();
2780 // redistribute runnable G's evenly
2781 // collect all runnable goroutines in global queue preserving FIFO order
2782 // FIFO order is required to ensure fairness even during frequent GCs
2783 // see http://golang.org/issue/7126
2784 pempty = false;
2785 while(!pempty) {
2786 pempty = true;
2787 for(i = 0; i < old; i++) {
2788 p = runtime_allp[i];
2789 if(p->runqhead == p->runqtail)
2790 continue;
2791 pempty = false;
2792 // pop from tail of local queue
2793 p->runqtail--;
2794 gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
2795 // push onto head of global queue
2796 gp->schedlink = (uintptr)runtime_sched.runqhead;
2797 runtime_sched.runqhead = gp;
2798 if(runtime_sched.runqtail == nil)
2799 runtime_sched.runqtail = gp;
2800 runtime_sched.runqsize++;
2803 // fill local queues with at most nelem(p->runq)/2 goroutines
2804 // start at 1 because current M already executes some G and will acquire allp[0] below,
2805 // so if we have a spare G we want to put it into allp[1].
2806 for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) {
2807 gp = runtime_sched.runqhead;
2808 runtime_sched.runqhead = (G*)gp->schedlink;
2809 if(runtime_sched.runqhead == nil)
2810 runtime_sched.runqtail = nil;
2811 runtime_sched.runqsize--;
2812 runqput(runtime_allp[i%new], gp);
2815 // free unused P's
2816 for(i = new; i < old; i++) {
2817 p = runtime_allp[i];
2818 runtime_freemcache(p->mcache);
2819 p->mcache = nil;
2820 gfpurge(p);
2821 p->status = _Pdead;
2822 // can't free P itself because it can be referenced by an M in syscall
2825 if(g->m->p)
2826 ((P*)g->m->p)->m = 0;
2827 g->m->p = 0;
2828 g->m->mcache = nil;
2829 p = runtime_allp[0];
2830 p->m = 0;
2831 p->status = _Pidle;
2832 acquirep(p);
2833 for(i = new-1; i > 0; i--) {
2834 p = runtime_allp[i];
2835 p->status = _Pidle;
2836 pidleput(p);
2838 runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2841 // Associate p and the current m.
2842 static void
2843 acquirep(P *p)
2845 M *m;
2847 m = g->m;
2848 if(m->p || m->mcache)
2849 runtime_throw("acquirep: already in go");
2850 if(p->m || p->status != _Pidle) {
2851 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status);
2852 runtime_throw("acquirep: invalid p state");
2854 m->mcache = p->mcache;
2855 m->p = (uintptr)p;
2856 p->m = (uintptr)m;
2857 p->status = _Prunning;
2860 // Disassociate p and the current m.
2861 static P*
2862 releasep(void)
2864 M *m;
2865 P *p;
2867 m = g->m;
2868 if(m->p == 0 || m->mcache == nil)
2869 runtime_throw("releasep: invalid arg");
2870 p = (P*)m->p;
2871 if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) {
2872 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2873 m, m->p, p->m, m->mcache, p->mcache, p->status);
2874 runtime_throw("releasep: invalid p state");
2876 m->p = 0;
2877 m->mcache = nil;
2878 p->m = 0;
2879 p->status = _Pidle;
2880 return p;
2883 static void
2884 incidlelocked(int32 v)
2886 runtime_lock(&runtime_sched);
2887 runtime_sched.nmidlelocked += v;
2888 if(v > 0)
2889 checkdead();
2890 runtime_unlock(&runtime_sched);
2893 // Check for deadlock situation.
2894 // The check is based on number of running M's, if 0 -> deadlock.
2895 static void
2896 checkdead(void)
2898 G *gp;
2899 int32 run, grunning, s;
2900 uintptr i;
2902 // For -buildmode=c-shared or -buildmode=c-archive it's OK if
2903 // there are no running goroutines. The calling program is
2904 // assumed to be running.
2905 if(runtime_isarchive) {
2906 return;
2909 // -1 for sysmon
2910 run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
2911 if(run > 0)
2912 return;
2913 // If we are dying because of a signal caught on an already idle thread,
2914 // freezetheworld will cause all running threads to block.
2915 // And runtime will essentially enter into deadlock state,
2916 // except that there is a thread that will call runtime_exit soon.
2917 if(runtime_panicking > 0)
2918 return;
2919 if(run < 0) {
2920 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2921 runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
2922 runtime_throw("checkdead: inconsistent counts");
2924 grunning = 0;
2925 runtime_lock(&allglock);
2926 for(i = 0; i < runtime_allglen; i++) {
2927 gp = runtime_allg[i];
2928 if(gp->isbackground)
2929 continue;
2930 s = gp->atomicstatus;
2931 if(s == _Gwaiting)
2932 grunning++;
2933 else if(s == _Grunnable || s == _Grunning || s == _Gsyscall) {
2934 runtime_unlock(&allglock);
2935 runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
2936 runtime_throw("checkdead: runnable g");
2939 runtime_unlock(&allglock);
2940 if(grunning == 0) // possible if main goroutine calls runtime_Goexit()
2941 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
2942 g->m->throwing = -1; // do not dump full stacks
2943 runtime_throw("all goroutines are asleep - deadlock!");
2946 static void
2947 sysmon(void)
2949 uint32 idle, delay;
2950 int64 now, lastpoll, lasttrace;
2951 G *gp;
2953 lasttrace = 0;
2954 idle = 0; // how many cycles in succession we had not wokeup somebody
2955 delay = 0;
2956 for(;;) {
2957 if(idle == 0) // start with 20us sleep...
2958 delay = 20;
2959 else if(idle > 50) // start doubling the sleep after 1ms...
2960 delay *= 2;
2961 if(delay > 10*1000) // up to 10ms
2962 delay = 10*1000;
2963 runtime_usleep(delay);
2964 if(runtime_debug.schedtrace <= 0 &&
2965 (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic
2966 runtime_lock(&runtime_sched);
2967 if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
2968 runtime_atomicstore(&runtime_sched.sysmonwait, 1);
2969 runtime_unlock(&runtime_sched);
2970 runtime_notesleep(&runtime_sched.sysmonnote);
2971 runtime_noteclear(&runtime_sched.sysmonnote);
2972 idle = 0;
2973 delay = 20;
2974 } else
2975 runtime_unlock(&runtime_sched);
2977 // poll network if not polled for more than 10ms
2978 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
2979 now = runtime_nanotime();
2980 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2981 runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
2982 gp = runtime_netpoll(false); // non-blocking
2983 if(gp) {
2984 // Need to decrement number of idle locked M's
2985 // (pretending that one more is running) before injectglist.
2986 // Otherwise it can lead to the following situation:
2987 // injectglist grabs all P's but before it starts M's to run the P's,
2988 // another M returns from syscall, finishes running its G,
2989 // observes that there is no work to do and no other running M's
2990 // and reports deadlock.
2991 incidlelocked(-1);
2992 injectglist(gp);
2993 incidlelocked(1);
2996 // retake P's blocked in syscalls
2997 // and preempt long running G's
2998 if(retake(now))
2999 idle = 0;
3000 else
3001 idle++;
3003 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
3004 lasttrace = now;
3005 runtime_schedtrace(runtime_debug.scheddetail);
3010 typedef struct Pdesc Pdesc;
3011 struct Pdesc
3013 uint32 schedtick;
3014 int64 schedwhen;
3015 uint32 syscalltick;
3016 int64 syscallwhen;
3018 static Pdesc pdesc[_MaxGomaxprocs];
3020 static uint32
3021 retake(int64 now)
3023 uint32 i, s, n;
3024 int64 t;
3025 P *p;
3026 Pdesc *pd;
3028 n = 0;
3029 for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
3030 p = runtime_allp[i];
3031 if(p==nil)
3032 continue;
3033 pd = &pdesc[i];
3034 s = p->status;
3035 if(s == _Psyscall) {
3036 // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
3037 t = p->syscalltick;
3038 if(pd->syscalltick != t) {
3039 pd->syscalltick = t;
3040 pd->syscallwhen = now;
3041 continue;
3043 // On the one hand we don't want to retake Ps if there is no other work to do,
3044 // but on the other hand we want to retake them eventually
3045 // because they can prevent the sysmon thread from deep sleep.
3046 if(p->runqhead == p->runqtail &&
3047 runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 &&
3048 pd->syscallwhen + 10*1000*1000 > now)
3049 continue;
3050 // Need to decrement number of idle locked M's
3051 // (pretending that one more is running) before the CAS.
3052 // Otherwise the M from which we retake can exit the syscall,
3053 // increment nmidle and report deadlock.
3054 incidlelocked(-1);
3055 if(runtime_cas(&p->status, s, _Pidle)) {
3056 n++;
3057 handoffp(p);
3059 incidlelocked(1);
3060 } else if(s == _Prunning) {
3061 // Preempt G if it's running for more than 10ms.
3062 t = p->schedtick;
3063 if(pd->schedtick != t) {
3064 pd->schedtick = t;
3065 pd->schedwhen = now;
3066 continue;
3068 if(pd->schedwhen + 10*1000*1000 > now)
3069 continue;
3070 // preemptone(p);
3073 return n;
3076 // Tell all goroutines that they have been preempted and they should stop.
3077 // This function is purely best-effort. It can fail to inform a goroutine if a
3078 // processor just started running it.
3079 // No locks need to be held.
3080 // Returns true if preemption request was issued to at least one goroutine.
3081 static bool
3082 preemptall(void)
3084 return false;
3087 void
3088 runtime_schedtrace(bool detailed)
3090 static int64 starttime;
3091 int64 now;
3092 int64 id1, id2, id3;
3093 int32 i, t, h;
3094 uintptr gi;
3095 const char *fmt;
3096 M *mp, *lockedm;
3097 G *gp, *lockedg;
3098 P *p;
3100 now = runtime_nanotime();
3101 if(starttime == 0)
3102 starttime = now;
3104 runtime_lock(&runtime_sched);
3105 runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
3106 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
3107 runtime_sched.nmidle, runtime_sched.runqsize);
3108 if(detailed) {
3109 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
3110 runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
3111 runtime_sched.stopwait, runtime_sched.sysmonwait);
3113 // We must be careful while reading data from P's, M's and G's.
3114 // Even if we hold schedlock, most data can be changed concurrently.
3115 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
3116 for(i = 0; i < runtime_gomaxprocs; i++) {
3117 p = runtime_allp[i];
3118 if(p == nil)
3119 continue;
3120 mp = (M*)p->m;
3121 h = runtime_atomicload(&p->runqhead);
3122 t = runtime_atomicload(&p->runqtail);
3123 if(detailed)
3124 runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
3125 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
3126 else {
3127 // In non-detailed mode format lengths of per-P run queues as:
3128 // [len1 len2 len3 len4]
3129 fmt = " %d";
3130 if(runtime_gomaxprocs == 1)
3131 fmt = " [%d]\n";
3132 else if(i == 0)
3133 fmt = " [%d";
3134 else if(i == runtime_gomaxprocs-1)
3135 fmt = " %d]\n";
3136 runtime_printf(fmt, t-h);
3139 if(!detailed) {
3140 runtime_unlock(&runtime_sched);
3141 return;
3143 for(mp = runtime_allm; mp; mp = mp->alllink) {
3144 p = (P*)mp->p;
3145 gp = mp->curg;
3146 lockedg = mp->lockedg;
3147 id1 = -1;
3148 if(p)
3149 id1 = p->id;
3150 id2 = -1;
3151 if(gp)
3152 id2 = gp->goid;
3153 id3 = -1;
3154 if(lockedg)
3155 id3 = lockedg->goid;
3156 runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
3157 " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
3158 mp->id, id1, id2,
3159 mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
3160 mp->spinning, mp->blocked, id3);
3162 runtime_lock(&allglock);
3163 for(gi = 0; gi < runtime_allglen; gi++) {
3164 gp = runtime_allg[gi];
3165 mp = gp->m;
3166 lockedm = gp->lockedm;
3167 runtime_printf(" G%D: status=%d(%S) m=%d lockedm=%d\n",
3168 gp->goid, gp->atomicstatus, gp->waitreason, mp ? mp->id : -1,
3169 lockedm ? lockedm->id : -1);
3171 runtime_unlock(&allglock);
3172 runtime_unlock(&runtime_sched);
3175 // Put mp on midle list.
3176 // Sched must be locked.
3177 static void
3178 mput(M *mp)
3180 mp->schedlink = (uintptr)runtime_sched.midle;
3181 runtime_sched.midle = mp;
3182 runtime_sched.nmidle++;
3183 checkdead();
3186 // Try to get an m from midle list.
3187 // Sched must be locked.
3188 static M*
3189 mget(void)
3191 M *mp;
3193 if((mp = runtime_sched.midle) != nil){
3194 runtime_sched.midle = (M*)mp->schedlink;
3195 runtime_sched.nmidle--;
3197 return mp;
3200 // Put gp on the global runnable queue.
3201 // Sched must be locked.
3202 static void
3203 globrunqput(G *gp)
3205 gp->schedlink = 0;
3206 if(runtime_sched.runqtail)
3207 runtime_sched.runqtail->schedlink = (uintptr)gp;
3208 else
3209 runtime_sched.runqhead = gp;
3210 runtime_sched.runqtail = gp;
3211 runtime_sched.runqsize++;
3214 // Put a batch of runnable goroutines on the global runnable queue.
3215 // Sched must be locked.
3216 static void
3217 globrunqputbatch(G *ghead, G *gtail, int32 n)
3219 gtail->schedlink = 0;
3220 if(runtime_sched.runqtail)
3221 runtime_sched.runqtail->schedlink = (uintptr)ghead;
3222 else
3223 runtime_sched.runqhead = ghead;
3224 runtime_sched.runqtail = gtail;
3225 runtime_sched.runqsize += n;
3228 // Try get a batch of G's from the global runnable queue.
3229 // Sched must be locked.
3230 static G*
3231 globrunqget(P *p, int32 max)
3233 G *gp, *gp1;
3234 int32 n;
3236 if(runtime_sched.runqsize == 0)
3237 return nil;
3238 n = runtime_sched.runqsize/runtime_gomaxprocs+1;
3239 if(n > runtime_sched.runqsize)
3240 n = runtime_sched.runqsize;
3241 if(max > 0 && n > max)
3242 n = max;
3243 if((uint32)n > nelem(p->runq)/2)
3244 n = nelem(p->runq)/2;
3245 runtime_sched.runqsize -= n;
3246 if(runtime_sched.runqsize == 0)
3247 runtime_sched.runqtail = nil;
3248 gp = runtime_sched.runqhead;
3249 runtime_sched.runqhead = (G*)gp->schedlink;
3250 n--;
3251 while(n--) {
3252 gp1 = runtime_sched.runqhead;
3253 runtime_sched.runqhead = (G*)gp1->schedlink;
3254 runqput(p, gp1);
3256 return gp;
3259 // Put p to on pidle list.
3260 // Sched must be locked.
3261 static void
3262 pidleput(P *p)
3264 p->link = (uintptr)runtime_sched.pidle;
3265 runtime_sched.pidle = p;
3266 runtime_xadd(&runtime_sched.npidle, 1); // TODO: fast atomic
3269 // Try get a p from pidle list.
3270 // Sched must be locked.
3271 static P*
3272 pidleget(void)
3274 P *p;
3276 p = runtime_sched.pidle;
3277 if(p) {
3278 runtime_sched.pidle = (P*)p->link;
3279 runtime_xadd(&runtime_sched.npidle, -1); // TODO: fast atomic
3281 return p;
3284 // Try to put g on local runnable queue.
3285 // If it's full, put onto global queue.
3286 // Executed only by the owner P.
3287 static void
3288 runqput(P *p, G *gp)
3290 uint32 h, t;
3292 retry:
3293 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
3294 t = p->runqtail;
3295 if(t - h < nelem(p->runq)) {
3296 p->runq[t%nelem(p->runq)] = (uintptr)gp;
3297 runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption
3298 return;
3300 if(runqputslow(p, gp, h, t))
3301 return;
3302 // the queue is not full, now the put above must suceed
3303 goto retry;
3306 // Put g and a batch of work from local runnable queue on global queue.
3307 // Executed only by the owner P.
3308 static bool
3309 runqputslow(P *p, G *gp, uint32 h, uint32 t)
3311 G *batch[nelem(p->runq)/2+1];
3312 uint32 n, i;
3314 // First, grab a batch from local queue.
3315 n = t-h;
3316 n = n/2;
3317 if(n != nelem(p->runq)/2)
3318 runtime_throw("runqputslow: queue is not full");
3319 for(i=0; i<n; i++)
3320 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3321 if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
3322 return false;
3323 batch[n] = gp;
3324 // Link the goroutines.
3325 for(i=0; i<n; i++)
3326 batch[i]->schedlink = (uintptr)batch[i+1];
3327 // Now put the batch on global queue.
3328 runtime_lock(&runtime_sched);
3329 globrunqputbatch(batch[0], batch[n], n+1);
3330 runtime_unlock(&runtime_sched);
3331 return true;
3334 // Get g from local runnable queue.
3335 // Executed only by the owner P.
3336 static G*
3337 runqget(P *p)
3339 G *gp;
3340 uint32 t, h;
3342 for(;;) {
3343 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
3344 t = p->runqtail;
3345 if(t == h)
3346 return nil;
3347 gp = (G*)p->runq[h%nelem(p->runq)];
3348 if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume
3349 return gp;
3353 // Grabs a batch of goroutines from local runnable queue.
3354 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3355 // Can be executed by any P.
3356 static uint32
3357 runqgrab(P *p, G **batch)
3359 uint32 t, h, n, i;
3361 for(;;) {
3362 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
3363 t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer
3364 n = t-h;
3365 n = n - n/2;
3366 if(n == 0)
3367 break;
3368 if(n > nelem(p->runq)/2) // read inconsistent h and t
3369 continue;
3370 for(i=0; i<n; i++)
3371 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3372 if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
3373 break;
3375 return n;
3378 // Steal half of elements from local runnable queue of p2
3379 // and put onto local runnable queue of p.
3380 // Returns one of the stolen elements (or nil if failed).
3381 static G*
3382 runqsteal(P *p, P *p2)
3384 G *gp;
3385 G *batch[nelem(p->runq)/2];
3386 uint32 t, h, n, i;
3388 n = runqgrab(p2, batch);
3389 if(n == 0)
3390 return nil;
3391 n--;
3392 gp = batch[n];
3393 if(n == 0)
3394 return gp;
3395 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
3396 t = p->runqtail;
3397 if(t - h + n >= nelem(p->runq))
3398 runtime_throw("runqsteal: runq overflow");
3399 for(i=0; i<n; i++, t++)
3400 p->runq[t%nelem(p->runq)] = (uintptr)batch[i];
3401 runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption
3402 return gp;
3405 void runtime_testSchedLocalQueue(void)
3406 __asm__("runtime.testSchedLocalQueue");
3408 void
3409 runtime_testSchedLocalQueue(void)
3411 P p;
3412 G gs[nelem(p.runq)];
3413 int32 i, j;
3415 runtime_memclr((byte*)&p, sizeof(p));
3417 for(i = 0; i < (int32)nelem(gs); i++) {
3418 if(runqget(&p) != nil)
3419 runtime_throw("runq is not empty initially");
3420 for(j = 0; j < i; j++)
3421 runqput(&p, &gs[i]);
3422 for(j = 0; j < i; j++) {
3423 if(runqget(&p) != &gs[i]) {
3424 runtime_printf("bad element at iter %d/%d\n", i, j);
3425 runtime_throw("bad element");
3428 if(runqget(&p) != nil)
3429 runtime_throw("runq is not empty afterwards");
3433 void runtime_testSchedLocalQueueSteal(void)
3434 __asm__("runtime.testSchedLocalQueueSteal");
3436 void
3437 runtime_testSchedLocalQueueSteal(void)
3439 P p1, p2;
3440 G gs[nelem(p1.runq)], *gp;
3441 int32 i, j, s;
3443 runtime_memclr((byte*)&p1, sizeof(p1));
3444 runtime_memclr((byte*)&p2, sizeof(p2));
3446 for(i = 0; i < (int32)nelem(gs); i++) {
3447 for(j = 0; j < i; j++) {
3448 gs[j].sig = 0;
3449 runqput(&p1, &gs[j]);
3451 gp = runqsteal(&p2, &p1);
3452 s = 0;
3453 if(gp) {
3454 s++;
3455 gp->sig++;
3457 while((gp = runqget(&p2)) != nil) {
3458 s++;
3459 gp->sig++;
3461 while((gp = runqget(&p1)) != nil)
3462 gp->sig++;
3463 for(j = 0; j < i; j++) {
3464 if(gs[j].sig != 1) {
3465 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3466 runtime_throw("bad element");
3469 if(s != i/2 && s != i/2+1) {
3470 runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3471 s, i/2, i/2+1, i);
3472 runtime_throw("bad steal");
3477 int32
3478 runtime_setmaxthreads(int32 in)
3480 int32 out;
3482 runtime_lock(&runtime_sched);
3483 out = runtime_sched.maxmcount;
3484 runtime_sched.maxmcount = in;
3485 checkmcount();
3486 runtime_unlock(&runtime_sched);
3487 return out;
3490 void
3491 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
3493 enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
3494 enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
3497 // Return whether we are waiting for a GC. This gc toolchain uses
3498 // preemption instead.
3499 bool
3500 runtime_gcwaiting(void)
3502 return runtime_sched.gcwaiting;
3505 // os_beforeExit is called from os.Exit(0).
3506 //go:linkname os_beforeExit os.runtime_beforeExit
3508 extern void os_beforeExit() __asm__ (GOSYM_PREFIX "os.runtime_beforeExit");
3510 void
3511 os_beforeExit()
3515 // Active spinning for sync.Mutex.
3516 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3518 enum
3520 ACTIVE_SPIN = 4,
3521 ACTIVE_SPIN_CNT = 30,
3524 extern _Bool sync_runtime_canSpin(intgo i)
3525 __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
3527 _Bool
3528 sync_runtime_canSpin(intgo i)
3530 P *p;
3532 // sync.Mutex is cooperative, so we are conservative with spinning.
3533 // Spin only few times and only if running on a multicore machine and
3534 // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3535 // As opposed to runtime mutex we don't do passive spinning here,
3536 // because there can be work on global runq on on other Ps.
3537 if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched.npidle+runtime_sched.nmspinning)+1) {
3538 return false;
3540 p = (P*)g->m->p;
3541 return p != nil && p->runqhead == p->runqtail;
3544 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3545 //go:nosplit
3547 extern void sync_runtime_doSpin(void)
3548 __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
3550 void
3551 sync_runtime_doSpin()
3553 runtime_procyield(ACTIVE_SPIN_CNT);