[Patch Doc] Update documentation for __fp16 type
[official-gcc.git] / libgo / runtime / proc.c
blobbe7e083f080be6c4b1c06f3bde8c663a492497dd
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 #include <limits.h>
6 #include <signal.h>
7 #include <stdlib.h>
8 #include <pthread.h>
9 #include <unistd.h>
11 #include "config.h"
13 #ifdef HAVE_DL_ITERATE_PHDR
14 #include <link.h>
15 #endif
17 #include "runtime.h"
18 #include "arch.h"
19 #include "defs.h"
20 #include "malloc.h"
21 #include "go-type.h"
23 #ifdef USING_SPLIT_STACK
25 /* FIXME: These are not declared anywhere. */
27 extern void __splitstack_getcontext(void *context[10]);
29 extern void __splitstack_setcontext(void *context[10]);
31 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
33 extern void * __splitstack_resetcontext(void *context[10], size_t *);
35 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
36 void **);
38 extern void __splitstack_block_signals (int *, int *);
40 extern void __splitstack_block_signals_context (void *context[10], int *,
41 int *);
43 #endif
45 #ifndef PTHREAD_STACK_MIN
46 # define PTHREAD_STACK_MIN 8192
47 #endif
49 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
50 # define StackMin PTHREAD_STACK_MIN
51 #else
52 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
53 #endif
55 uintptr runtime_stacks_sys;
57 static void gtraceback(G*);
59 #ifdef __rtems__
60 #define __thread
61 #endif
63 static __thread G *g;
65 #ifndef SETCONTEXT_CLOBBERS_TLS
67 static inline void
68 initcontext(void)
72 static inline void
73 fixcontext(ucontext_t *c __attribute__ ((unused)))
77 #else
79 # if defined(__x86_64__) && defined(__sun__)
81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
82 // register to that of the thread which called getcontext. The effect
83 // is that the address of all __thread variables changes. This bug
84 // also affects pthread_self() and pthread_getspecific. We work
85 // around it by clobbering the context field directly to keep %fs the
86 // same.
88 static __thread greg_t fs;
90 static inline void
91 initcontext(void)
93 ucontext_t c;
95 getcontext(&c);
96 fs = c.uc_mcontext.gregs[REG_FSBASE];
99 static inline void
100 fixcontext(ucontext_t* c)
102 c->uc_mcontext.gregs[REG_FSBASE] = fs;
105 # elif defined(__NetBSD__)
107 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
108 // and restore it ourselves.
110 static __thread __greg_t tlsbase;
112 static inline void
113 initcontext(void)
115 ucontext_t c;
117 getcontext(&c);
118 tlsbase = c.uc_mcontext._mc_tlsbase;
121 static inline void
122 fixcontext(ucontext_t* c)
124 c->uc_mcontext._mc_tlsbase = tlsbase;
127 # elif defined(__sparc__)
129 static inline void
130 initcontext(void)
134 static inline void
135 fixcontext(ucontext_t *c)
137 /* ??? Using
138 register unsigned long thread __asm__("%g7");
139 c->uc_mcontext.gregs[REG_G7] = thread;
140 results in
141 error: variable ‘thread’ might be clobbered by \
142 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
143 which ought to be false, as %g7 is a fixed register. */
145 if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8)
146 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
147 else
148 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
151 # else
153 # error unknown case for SETCONTEXT_CLOBBERS_TLS
155 # endif
157 #endif
159 // ucontext_arg returns a properly aligned ucontext_t value. On some
160 // systems a ucontext_t value must be aligned to a 16-byte boundary.
161 // The g structure that has fields of type ucontext_t is defined in
162 // Go, and Go has no simple way to align a field to such a boundary.
163 // So we make the field larger in runtime2.go and pick an appropriate
164 // offset within the field here.
165 static ucontext_t*
166 ucontext_arg(void** go_ucontext)
168 uintptr_t p = (uintptr_t)go_ucontext;
169 size_t align = __alignof__(ucontext_t);
170 if(align > 16) {
171 // We only ensured space for up to a 16 byte alignment
172 // in libgo/go/runtime/runtime2.go.
173 runtime_throw("required alignment of ucontext_t too large");
175 p = (p + align - 1) &~ (uintptr_t)(align - 1);
176 return (ucontext_t*)p;
179 // We can not always refer to the TLS variables directly. The
180 // compiler will call tls_get_addr to get the address of the variable,
181 // and it may hold it in a register across a call to schedule. When
182 // we get back from the call we may be running in a different thread,
183 // in which case the register now points to the TLS variable for a
184 // different thread. We use non-inlinable functions to avoid this
185 // when necessary.
187 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
190 runtime_g(void)
192 return g;
195 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
198 runtime_m(void)
200 if(g == nil)
201 return nil;
202 return g->m;
205 // Set g.
206 void
207 runtime_setg(G* gp)
209 g = gp;
212 // Start a new thread.
213 static void
214 runtime_newosproc(M *mp)
216 pthread_attr_t attr;
217 sigset_t clear, old;
218 pthread_t tid;
219 int ret;
221 if(pthread_attr_init(&attr) != 0)
222 runtime_throw("pthread_attr_init");
223 if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
224 runtime_throw("pthread_attr_setdetachstate");
226 // Block signals during pthread_create so that the new thread
227 // starts with signals disabled. It will enable them in minit.
228 sigfillset(&clear);
230 #ifdef SIGTRAP
231 // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
232 sigdelset(&clear, SIGTRAP);
233 #endif
235 sigemptyset(&old);
236 pthread_sigmask(SIG_BLOCK, &clear, &old);
237 ret = pthread_create(&tid, &attr, runtime_mstart, mp);
238 pthread_sigmask(SIG_SETMASK, &old, nil);
240 if (ret != 0)
241 runtime_throw("pthread_create");
244 // First function run by a new goroutine. This replaces gogocall.
245 static void
246 kickoff(void)
248 void (*fn)(void*);
249 void *param;
251 if(g->traceback != nil)
252 gtraceback(g);
254 fn = (void (*)(void*))(g->entry);
255 param = g->param;
256 g->param = nil;
257 fn(param);
258 runtime_goexit1();
261 // Switch context to a different goroutine. This is like longjmp.
262 void runtime_gogo(G*) __attribute__ ((noinline));
263 void
264 runtime_gogo(G* newg)
266 #ifdef USING_SPLIT_STACK
267 __splitstack_setcontext(&newg->stackcontext[0]);
268 #endif
269 g = newg;
270 newg->fromgogo = true;
271 fixcontext(ucontext_arg(&newg->context[0]));
272 setcontext(ucontext_arg(&newg->context[0]));
273 runtime_throw("gogo setcontext returned");
276 // Save context and call fn passing g as a parameter. This is like
277 // setjmp. Because getcontext always returns 0, unlike setjmp, we use
278 // g->fromgogo as a code. It will be true if we got here via
279 // setcontext. g == nil the first time this is called in a new m.
280 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
281 void
282 runtime_mcall(void (*pfn)(G*))
284 M *mp;
285 G *gp;
286 #ifndef USING_SPLIT_STACK
287 void *afterregs;
288 #endif
290 // Ensure that all registers are on the stack for the garbage
291 // collector.
292 __builtin_unwind_init();
294 gp = g;
295 mp = gp->m;
296 if(gp == mp->g0)
297 runtime_throw("runtime: mcall called on m->g0 stack");
299 if(gp != nil) {
301 #ifdef USING_SPLIT_STACK
302 __splitstack_getcontext(&g->stackcontext[0]);
303 #else
304 // We have to point to an address on the stack that is
305 // below the saved registers.
306 gp->gcnextsp = &afterregs;
307 #endif
308 gp->fromgogo = false;
309 getcontext(ucontext_arg(&gp->context[0]));
311 // When we return from getcontext, we may be running
312 // in a new thread. That means that g may have
313 // changed. It is a global variables so we will
314 // reload it, but the address of g may be cached in
315 // our local stack frame, and that address may be
316 // wrong. Call the function to reload the value for
317 // this thread.
318 gp = runtime_g();
319 mp = gp->m;
321 if(gp->traceback != nil)
322 gtraceback(gp);
324 if (gp == nil || !gp->fromgogo) {
325 #ifdef USING_SPLIT_STACK
326 __splitstack_setcontext(&mp->g0->stackcontext[0]);
327 #endif
328 mp->g0->entry = (byte*)pfn;
329 mp->g0->param = gp;
331 // It's OK to set g directly here because this case
332 // can not occur if we got here via a setcontext to
333 // the getcontext call just above.
334 g = mp->g0;
336 fixcontext(ucontext_arg(&mp->g0->context[0]));
337 setcontext(ucontext_arg(&mp->g0->context[0]));
338 runtime_throw("runtime: mcall function returned");
342 // Goroutine scheduler
343 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
345 // The main concepts are:
346 // G - goroutine.
347 // M - worker thread, or machine.
348 // P - processor, a resource that is required to execute Go code.
349 // M must have an associated P to execute Go code, however it can be
350 // blocked or in a syscall w/o an associated P.
352 // Design doc at http://golang.org/s/go11sched.
354 enum
356 // Number of goroutine ids to grab from runtime_sched->goidgen to local per-P cache at once.
357 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
358 GoidCacheBatch = 16,
361 extern Sched* runtime_getsched() __asm__ (GOSYM_PREFIX "runtime.getsched");
363 Sched* runtime_sched;
364 int32 runtime_gomaxprocs;
365 uint32 runtime_needextram = 1;
366 M runtime_m0;
367 G runtime_g0; // idle goroutine for m0
368 G* runtime_lastg;
369 M* runtime_allm;
370 P** runtime_allp;
371 M* runtime_extram;
372 int8* runtime_goos;
373 int32 runtime_ncpu;
374 bool runtime_precisestack;
375 static int32 newprocs;
377 static Lock allglock; // the following vars are protected by this lock or by stoptheworld
378 G** runtime_allg;
379 uintptr runtime_allglen;
380 static uintptr allgcap;
382 bool runtime_isarchive;
384 void* runtime_mstart(void*);
385 static void runqput(P*, G*);
386 static G* runqget(P*);
387 static bool runqputslow(P*, G*, uint32, uint32);
388 static G* runqsteal(P*, P*);
389 static void mput(M*);
390 static M* mget(void);
391 static void mcommoninit(M*);
392 static void schedule(void);
393 static void procresize(int32);
394 static void acquirep(P*);
395 static P* releasep(void);
396 static void newm(void(*)(void), P*);
397 static void stopm(void);
398 static void startm(P*, bool);
399 static void handoffp(P*);
400 static void wakep(void);
401 static void stoplockedm(void);
402 static void startlockedm(G*);
403 static void sysmon(void);
404 static uint32 retake(int64);
405 static void incidlelocked(int32);
406 static void checkdead(void);
407 static void exitsyscall0(G*);
408 static void park0(G*);
409 static void goexit0(G*);
410 static void gfput(P*, G*);
411 static G* gfget(P*);
412 static void gfpurge(P*);
413 static void globrunqput(G*);
414 static void globrunqputbatch(G*, G*, int32);
415 static G* globrunqget(P*, int32);
416 static P* pidleget(void);
417 static void pidleput(P*);
418 static void injectglist(G*);
419 static bool preemptall(void);
420 static bool exitsyscallfast(void);
421 static void allgadd(G*);
423 bool runtime_isstarted;
425 // The bootstrap sequence is:
427 // call osinit
428 // call schedinit
429 // make & queue new G
430 // call runtime_mstart
432 // The new G calls runtime_main.
433 void
434 runtime_schedinit(void)
436 M *m;
437 int32 n, procs;
438 String s;
439 const byte *p;
440 Eface i;
442 runtime_sched = runtime_getsched();
444 m = &runtime_m0;
445 g = &runtime_g0;
446 m->g0 = g;
447 m->curg = g;
448 g->m = m;
450 initcontext();
452 runtime_sched->maxmcount = 10000;
453 runtime_precisestack = 0;
455 // runtime_symtabinit();
456 runtime_mallocinit();
457 mcommoninit(m);
458 runtime_alginit(); // maps must not be used before this call
460 // Initialize the itable value for newErrorCString,
461 // so that the next time it gets called, possibly
462 // in a fault during a garbage collection, it will not
463 // need to allocated memory.
464 runtime_newErrorCString(0, &i);
466 // Initialize the cached gotraceback value, since
467 // gotraceback calls getenv, which mallocs on Plan 9.
468 runtime_gotraceback(nil);
470 runtime_goargs();
471 runtime_goenvs();
472 runtime_parsedebugvars();
474 runtime_sched->lastpoll = runtime_nanotime();
475 procs = 1;
476 s = runtime_getenv("GOMAXPROCS");
477 p = s.str;
478 if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
479 if(n > _MaxGomaxprocs)
480 n = _MaxGomaxprocs;
481 procs = n;
483 runtime_allp = runtime_malloc((_MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
484 procresize(procs);
486 // Can not enable GC until all roots are registered.
487 // mstats()->enablegc = 1;
490 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
491 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
493 // Used to determine the field alignment.
495 struct field_align
497 char c;
498 Hchan *p;
501 // main_init_done is a signal used by cgocallbackg that initialization
502 // has been completed. It is made before _cgo_notify_runtime_init_done,
503 // so all cgo calls can rely on it existing. When main_init is
504 // complete, it is closed, meaning cgocallbackg can reliably receive
505 // from it.
506 Hchan *runtime_main_init_done;
508 // The chan bool type, for runtime_main_init_done.
510 extern const struct __go_type_descriptor bool_type_descriptor
511 __asm__ (GOSYM_PREFIX "__go_tdn_bool");
513 static struct __go_channel_type chan_bool_type_descriptor =
515 /* __common */
517 /* __code */
518 GO_CHAN,
519 /* __align */
520 __alignof (Hchan *),
521 /* __field_align */
522 offsetof (struct field_align, p) - 1,
523 /* __size */
524 sizeof (Hchan *),
525 /* __hash */
526 0, /* This value doesn't matter. */
527 /* __hashfn */
528 NULL,
529 /* __equalfn */
530 NULL,
531 /* __gc */
532 NULL, /* This value doesn't matter */
533 /* __reflection */
534 NULL, /* This value doesn't matter */
535 /* __uncommon */
536 NULL,
537 /* __pointer_to_this */
538 NULL
540 /* __element_type */
541 &bool_type_descriptor,
542 /* __dir */
543 CHANNEL_BOTH_DIR
546 extern Hchan *makechan (ChanType *, int64)
547 __asm__ (GOSYM_PREFIX "runtime.makechan");
548 extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan");
550 static void
551 initDone(void *arg __attribute__ ((unused))) {
552 runtime_unlockOSThread();
555 // The main goroutine.
556 // Note: C frames in general are not copyable during stack growth, for two reasons:
557 // 1) We don't know where in a frame to find pointers to other stack locations.
558 // 2) There's no guarantee that globals or heap values do not point into the frame.
560 // The C frame for runtime.main is copyable, because:
561 // 1) There are no pointers to other stack locations in the frame
562 // (d.fn points at a global, d.link is nil, d.argp is -1).
563 // 2) The only pointer into this frame is from the defer chain,
564 // which is explicitly handled during stack copying.
565 void
566 runtime_main(void* dummy __attribute__((unused)))
568 Defer d;
569 _Bool frame;
571 newm(sysmon, nil);
573 // Lock the main goroutine onto this, the main OS thread,
574 // during initialization. Most programs won't care, but a few
575 // do require certain calls to be made by the main thread.
576 // Those can arrange for main.main to run in the main thread
577 // by calling runtime.LockOSThread during initialization
578 // to preserve the lock.
579 runtime_lockOSThread();
581 // Defer unlock so that runtime.Goexit during init does the unlock too.
582 d.pfn = (uintptr)(void*)initDone;
583 d.link = g->_defer;
584 d.arg = (void*)-1;
585 d._panic = g->_panic;
586 d.retaddr = 0;
587 d.makefunccanrecover = 0;
588 d.frame = &frame;
589 d.special = true;
590 g->_defer = &d;
592 if(g->m != &runtime_m0)
593 runtime_throw("runtime_main not on m0");
594 __go_go(runtime_MHeap_Scavenger, nil);
596 runtime_main_init_done = makechan(&chan_bool_type_descriptor, 0);
598 _cgo_notify_runtime_init_done();
600 main_init();
602 closechan(runtime_main_init_done);
604 if(g->_defer != &d || (void*)d.pfn != initDone)
605 runtime_throw("runtime: bad defer entry after init");
606 g->_defer = d.link;
607 runtime_unlockOSThread();
609 // For gccgo we have to wait until after main is initialized
610 // to enable GC, because initializing main registers the GC
611 // roots.
612 mstats()->enablegc = 1;
614 if(runtime_isarchive) {
615 // This is not a complete program, but is instead a
616 // library built using -buildmode=c-archive or
617 // c-shared. Now that we are initialized, there is
618 // nothing further to do.
619 return;
622 main_main();
624 // Make racy client program work: if panicking on
625 // another goroutine at the same time as main returns,
626 // let the other goroutine finish printing the panic trace.
627 // Once it does, it will exit. See issue 3934.
628 if(runtime_panicking())
629 runtime_park(nil, nil, "panicwait");
631 runtime_exit(0);
632 for(;;)
633 *(int32*)0 = 0;
636 void
637 runtime_tracebackothers(G * volatile me)
639 G * volatile gp;
640 Traceback tb;
641 int32 traceback;
642 Slice slice;
643 volatile uintptr i;
645 tb.gp = me;
646 traceback = runtime_gotraceback(nil);
648 // Show the current goroutine first, if we haven't already.
649 if((gp = g->m->curg) != nil && gp != me) {
650 runtime_printf("\n");
651 runtime_goroutineheader(gp);
652 gp->traceback = &tb;
654 #ifdef USING_SPLIT_STACK
655 __splitstack_getcontext(&me->stackcontext[0]);
656 #endif
657 getcontext(ucontext_arg(&me->context[0]));
659 if(gp->traceback != nil) {
660 runtime_gogo(gp);
663 slice.__values = &tb.locbuf[0];
664 slice.__count = tb.c;
665 slice.__capacity = tb.c;
666 runtime_printtrace(slice, nil);
667 runtime_printcreatedby(gp);
670 runtime_lock(&allglock);
671 for(i = 0; i < runtime_allglen; i++) {
672 gp = runtime_allg[i];
673 if(gp == me || gp == g->m->curg || gp->atomicstatus == _Gdead)
674 continue;
675 if(gp->issystem && traceback < 2)
676 continue;
677 runtime_printf("\n");
678 runtime_goroutineheader(gp);
680 // Our only mechanism for doing a stack trace is
681 // _Unwind_Backtrace. And that only works for the
682 // current thread, not for other random goroutines.
683 // So we need to switch context to the goroutine, get
684 // the backtrace, and then switch back.
686 // This means that if g is running or in a syscall, we
687 // can't reliably print a stack trace. FIXME.
689 if(gp->atomicstatus == _Grunning) {
690 runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
691 runtime_printcreatedby(gp);
692 } else if(gp->atomicstatus == _Gsyscall) {
693 runtime_printf("\tgoroutine in C code; stack unavailable\n");
694 runtime_printcreatedby(gp);
695 } else {
696 gp->traceback = &tb;
698 #ifdef USING_SPLIT_STACK
699 __splitstack_getcontext(&me->stackcontext[0]);
700 #endif
701 getcontext(ucontext_arg(&me->context[0]));
703 if(gp->traceback != nil) {
704 runtime_gogo(gp);
707 slice.__values = &tb.locbuf[0];
708 slice.__count = tb.c;
709 slice.__capacity = tb.c;
710 runtime_printtrace(slice, nil);
711 runtime_printcreatedby(gp);
714 runtime_unlock(&allglock);
717 static void
718 checkmcount(void)
720 // sched lock is held
721 if(runtime_sched->mcount > runtime_sched->maxmcount) {
722 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched->maxmcount);
723 runtime_throw("thread exhaustion");
727 // Do a stack trace of gp, and then restore the context to
728 // gp->dotraceback.
730 static void
731 gtraceback(G* gp)
733 Traceback* traceback;
735 traceback = gp->traceback;
736 gp->traceback = nil;
737 if(gp->m != nil)
738 runtime_throw("gtraceback: m is not nil");
739 gp->m = traceback->gp->m;
740 traceback->c = runtime_callers(1, traceback->locbuf,
741 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
742 gp->m = nil;
743 runtime_gogo(traceback->gp);
746 static void
747 mcommoninit(M *mp)
749 // If there is no mcache runtime_callers() will crash,
750 // and we are most likely in sysmon thread so the stack is senseless anyway.
751 if(g->m->mcache)
752 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
754 mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
756 runtime_lock(&runtime_sched->lock);
757 mp->id = runtime_sched->mcount++;
758 checkmcount();
759 runtime_mpreinit(mp);
761 // Add to runtime_allm so garbage collector doesn't free m
762 // when it is just in a register or thread-local storage.
763 mp->alllink = runtime_allm;
764 // runtime_NumCgoCall() iterates over allm w/o schedlock,
765 // so we need to publish it safely.
766 runtime_atomicstorep(&runtime_allm, mp);
767 runtime_unlock(&runtime_sched->lock);
770 // Mark gp ready to run.
771 void
772 runtime_ready(G *gp)
774 // Mark runnable.
775 g->m->locks++; // disable preemption because it can be holding p in a local var
776 if(gp->atomicstatus != _Gwaiting) {
777 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus);
778 runtime_throw("bad g->atomicstatus in ready");
780 gp->atomicstatus = _Grunnable;
781 runqput((P*)g->m->p, gp);
782 if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0) // TODO: fast atomic
783 wakep();
784 g->m->locks--;
787 void goready(G*, int) __asm__ (GOSYM_PREFIX "runtime.goready");
789 void
790 goready(G* gp, int traceskip __attribute__ ((unused)))
792 runtime_ready(gp);
795 int32
796 runtime_gcprocs(void)
798 int32 n;
800 // Figure out how many CPUs to use during GC.
801 // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
802 runtime_lock(&runtime_sched->lock);
803 n = runtime_gomaxprocs;
804 if(n > runtime_ncpu)
805 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
806 if(n > MaxGcproc)
807 n = MaxGcproc;
808 if(n > runtime_sched->nmidle+1) // one M is currently running
809 n = runtime_sched->nmidle+1;
810 runtime_unlock(&runtime_sched->lock);
811 return n;
814 static bool
815 needaddgcproc(void)
817 int32 n;
819 runtime_lock(&runtime_sched->lock);
820 n = runtime_gomaxprocs;
821 if(n > runtime_ncpu)
822 n = runtime_ncpu;
823 if(n > MaxGcproc)
824 n = MaxGcproc;
825 n -= runtime_sched->nmidle+1; // one M is currently running
826 runtime_unlock(&runtime_sched->lock);
827 return n > 0;
830 void
831 runtime_helpgc(int32 nproc)
833 M *mp;
834 int32 n, pos;
836 runtime_lock(&runtime_sched->lock);
837 pos = 0;
838 for(n = 1; n < nproc; n++) { // one M is currently running
839 if(runtime_allp[pos]->mcache == g->m->mcache)
840 pos++;
841 mp = mget();
842 if(mp == nil)
843 runtime_throw("runtime_gcprocs inconsistency");
844 mp->helpgc = n;
845 mp->mcache = runtime_allp[pos]->mcache;
846 pos++;
847 runtime_notewakeup(&mp->park);
849 runtime_unlock(&runtime_sched->lock);
852 // Similar to stoptheworld but best-effort and can be called several times.
853 // There is no reverse operation, used during crashing.
854 // This function must not lock any mutexes.
855 void
856 runtime_freezetheworld(void)
858 int32 i;
860 if(runtime_gomaxprocs == 1)
861 return;
862 // stopwait and preemption requests can be lost
863 // due to races with concurrently executing threads,
864 // so try several times
865 for(i = 0; i < 5; i++) {
866 // this should tell the scheduler to not start any new goroutines
867 runtime_sched->stopwait = 0x7fffffff;
868 runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
869 // this should stop running goroutines
870 if(!preemptall())
871 break; // no running goroutines
872 runtime_usleep(1000);
874 // to be sure
875 runtime_usleep(1000);
876 preemptall();
877 runtime_usleep(1000);
880 void
881 runtime_stopTheWorldWithSema(void)
883 int32 i;
884 uint32 s;
885 P *p;
886 bool wait;
888 runtime_lock(&runtime_sched->lock);
889 runtime_sched->stopwait = runtime_gomaxprocs;
890 runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
891 preemptall();
892 // stop current P
893 ((P*)g->m->p)->status = _Pgcstop;
894 runtime_sched->stopwait--;
895 // try to retake all P's in _Psyscall status
896 for(i = 0; i < runtime_gomaxprocs; i++) {
897 p = runtime_allp[i];
898 s = p->status;
899 if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
900 runtime_sched->stopwait--;
902 // stop idle P's
903 while((p = pidleget()) != nil) {
904 p->status = _Pgcstop;
905 runtime_sched->stopwait--;
907 wait = runtime_sched->stopwait > 0;
908 runtime_unlock(&runtime_sched->lock);
910 // wait for remaining P's to stop voluntarily
911 if(wait) {
912 runtime_notesleep(&runtime_sched->stopnote);
913 runtime_noteclear(&runtime_sched->stopnote);
915 if(runtime_sched->stopwait)
916 runtime_throw("stoptheworld: not stopped");
917 for(i = 0; i < runtime_gomaxprocs; i++) {
918 p = runtime_allp[i];
919 if(p->status != _Pgcstop)
920 runtime_throw("stoptheworld: not stopped");
924 static void
925 mhelpgc(void)
927 g->m->helpgc = -1;
930 void
931 runtime_startTheWorldWithSema(void)
933 P *p, *p1;
934 M *mp;
935 G *gp;
936 bool add;
938 g->m->locks++; // disable preemption because it can be holding p in a local var
939 gp = runtime_netpoll(false); // non-blocking
940 injectglist(gp);
941 add = needaddgcproc();
942 runtime_lock(&runtime_sched->lock);
943 if(newprocs) {
944 procresize(newprocs);
945 newprocs = 0;
946 } else
947 procresize(runtime_gomaxprocs);
948 runtime_sched->gcwaiting = 0;
950 p1 = nil;
951 while((p = pidleget()) != nil) {
952 // procresize() puts p's with work at the beginning of the list.
953 // Once we reach a p without a run queue, the rest don't have one either.
954 if(p->runqhead == p->runqtail) {
955 pidleput(p);
956 break;
958 p->m = (uintptr)mget();
959 p->link = (uintptr)p1;
960 p1 = p;
962 if(runtime_sched->sysmonwait) {
963 runtime_sched->sysmonwait = false;
964 runtime_notewakeup(&runtime_sched->sysmonnote);
966 runtime_unlock(&runtime_sched->lock);
968 while(p1) {
969 p = p1;
970 p1 = (P*)p1->link;
971 if(p->m) {
972 mp = (M*)p->m;
973 p->m = 0;
974 if(mp->nextp)
975 runtime_throw("startTheWorldWithSema: inconsistent mp->nextp");
976 mp->nextp = (uintptr)p;
977 runtime_notewakeup(&mp->park);
978 } else {
979 // Start M to run P. Do not start another M below.
980 newm(nil, p);
981 add = false;
985 if(add) {
986 // If GC could have used another helper proc, start one now,
987 // in the hope that it will be available next time.
988 // It would have been even better to start it before the collection,
989 // but doing so requires allocating memory, so it's tricky to
990 // coordinate. This lazy approach works out in practice:
991 // we don't mind if the first couple gc rounds don't have quite
992 // the maximum number of procs.
993 newm(mhelpgc, nil);
995 g->m->locks--;
998 // Called to start an M.
999 void*
1000 runtime_mstart(void* mp)
1002 M *m;
1004 m = (M*)mp;
1005 g = m->g0;
1006 g->m = m;
1008 initcontext();
1010 g->entry = nil;
1011 g->param = nil;
1013 // Record top of stack for use by mcall.
1014 // Once we call schedule we're never coming back,
1015 // so other calls can reuse this stack space.
1016 #ifdef USING_SPLIT_STACK
1017 __splitstack_getcontext(&g->stackcontext[0]);
1018 #else
1019 g->gcinitialsp = &mp;
1020 // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
1021 // is the top of the stack, not the bottom.
1022 g->gcstacksize = 0;
1023 g->gcnextsp = &mp;
1024 #endif
1025 getcontext(ucontext_arg(&g->context[0]));
1027 if(g->entry != nil) {
1028 // Got here from mcall.
1029 void (*pfn)(G*) = (void (*)(G*))g->entry;
1030 G* gp = (G*)g->param;
1031 pfn(gp);
1032 *(int*)0x21 = 0x21;
1034 runtime_minit();
1036 #ifdef USING_SPLIT_STACK
1038 int dont_block_signals = 0;
1039 __splitstack_block_signals(&dont_block_signals, nil);
1041 #endif
1043 // Install signal handlers; after minit so that minit can
1044 // prepare the thread to be able to handle the signals.
1045 if(m == &runtime_m0) {
1046 if(runtime_iscgo && !runtime_cgoHasExtraM) {
1047 runtime_cgoHasExtraM = true;
1048 runtime_newextram();
1049 runtime_needextram = 0;
1051 runtime_initsig(false);
1054 if(m->mstartfn)
1055 ((void (*)(void))m->mstartfn)();
1057 if(m->helpgc) {
1058 m->helpgc = 0;
1059 stopm();
1060 } else if(m != &runtime_m0) {
1061 acquirep((P*)m->nextp);
1062 m->nextp = 0;
1064 schedule();
1066 // TODO(brainman): This point is never reached, because scheduler
1067 // does not release os threads at the moment. But once this path
1068 // is enabled, we must remove our seh here.
1070 return nil;
1073 typedef struct CgoThreadStart CgoThreadStart;
1074 struct CgoThreadStart
1076 M *m;
1077 G *g;
1078 uintptr *tls;
1079 void (*fn)(void);
1082 // Allocate a new m unassociated with any thread.
1083 // Can use p for allocation context if needed.
1085 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
1087 M *mp;
1089 g->m->locks++; // disable GC because it can be called from sysmon
1090 if(g->m->p == 0)
1091 acquirep(p); // temporarily borrow p for mallocs in this function
1092 #if 0
1093 if(mtype == nil) {
1094 Eface e;
1095 runtime_gc_m_ptr(&e);
1096 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1098 #endif
1100 mp = runtime_mal(sizeof *mp);
1101 mcommoninit(mp);
1102 mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1103 mp->g0->m = mp;
1105 if(p == (P*)g->m->p)
1106 releasep();
1107 g->m->locks--;
1109 return mp;
1112 static G*
1113 allocg(void)
1115 G *gp;
1116 // static Type *gtype;
1118 // if(gtype == nil) {
1119 // Eface e;
1120 // runtime_gc_g_ptr(&e);
1121 // gtype = ((PtrType*)e.__type_descriptor)->__element_type;
1122 // }
1123 // gp = runtime_cnew(gtype);
1124 gp = runtime_malloc(sizeof(G));
1125 return gp;
1128 static M* lockextra(bool nilokay);
1129 static void unlockextra(M*);
1131 // needm is called when a cgo callback happens on a
1132 // thread without an m (a thread not created by Go).
1133 // In this case, needm is expected to find an m to use
1134 // and return with m, g initialized correctly.
1135 // Since m and g are not set now (likely nil, but see below)
1136 // needm is limited in what routines it can call. In particular
1137 // it can only call nosplit functions (textflag 7) and cannot
1138 // do any scheduling that requires an m.
1140 // In order to avoid needing heavy lifting here, we adopt
1141 // the following strategy: there is a stack of available m's
1142 // that can be stolen. Using compare-and-swap
1143 // to pop from the stack has ABA races, so we simulate
1144 // a lock by doing an exchange (via casp) to steal the stack
1145 // head and replace the top pointer with MLOCKED (1).
1146 // This serves as a simple spin lock that we can use even
1147 // without an m. The thread that locks the stack in this way
1148 // unlocks the stack by storing a valid stack head pointer.
1150 // In order to make sure that there is always an m structure
1151 // available to be stolen, we maintain the invariant that there
1152 // is always one more than needed. At the beginning of the
1153 // program (if cgo is in use) the list is seeded with a single m.
1154 // If needm finds that it has taken the last m off the list, its job
1155 // is - once it has installed its own m so that it can do things like
1156 // allocate memory - to create a spare m and put it on the list.
1158 // Each of these extra m's also has a g0 and a curg that are
1159 // pressed into service as the scheduling stack and current
1160 // goroutine for the duration of the cgo callback.
1162 // When the callback is done with the m, it calls dropm to
1163 // put the m back on the list.
1165 // Unlike the gc toolchain, we start running on curg, since we are
1166 // just going to return and let the caller continue.
1167 void
1168 runtime_needm(void)
1170 M *mp;
1172 if(runtime_needextram) {
1173 // Can happen if C/C++ code calls Go from a global ctor.
1174 // Can not throw, because scheduler is not initialized yet.
1175 int rv __attribute__((unused));
1176 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
1177 sizeof("fatal error: cgo callback before cgo call\n")-1);
1178 runtime_exit(1);
1181 // Lock extra list, take head, unlock popped list.
1182 // nilokay=false is safe here because of the invariant above,
1183 // that the extra list always contains or will soon contain
1184 // at least one m.
1185 mp = lockextra(false);
1187 // Set needextram when we've just emptied the list,
1188 // so that the eventual call into cgocallbackg will
1189 // allocate a new m for the extra list. We delay the
1190 // allocation until then so that it can be done
1191 // after exitsyscall makes sure it is okay to be
1192 // running at all (that is, there's no garbage collection
1193 // running right now).
1194 mp->needextram = mp->schedlink == 0;
1195 unlockextra((M*)mp->schedlink);
1197 // Install g (= m->curg).
1198 runtime_setg(mp->curg);
1200 // Initialize g's context as in mstart.
1201 initcontext();
1202 g->atomicstatus = _Gsyscall;
1203 g->entry = nil;
1204 g->param = nil;
1205 #ifdef USING_SPLIT_STACK
1206 __splitstack_getcontext(&g->stackcontext[0]);
1207 #else
1208 g->gcinitialsp = &mp;
1209 g->gcstack = nil;
1210 g->gcstacksize = 0;
1211 g->gcnextsp = &mp;
1212 #endif
1213 getcontext(ucontext_arg(&g->context[0]));
1215 if(g->entry != nil) {
1216 // Got here from mcall.
1217 void (*pfn)(G*) = (void (*)(G*))g->entry;
1218 G* gp = (G*)g->param;
1219 pfn(gp);
1220 *(int*)0x22 = 0x22;
1223 // Initialize this thread to use the m.
1224 runtime_minit();
1226 #ifdef USING_SPLIT_STACK
1228 int dont_block_signals = 0;
1229 __splitstack_block_signals(&dont_block_signals, nil);
1231 #endif
1234 // newextram allocates an m and puts it on the extra list.
1235 // It is called with a working local m, so that it can do things
1236 // like call schedlock and allocate.
1237 void
1238 runtime_newextram(void)
1240 M *mp, *mnext;
1241 G *gp;
1242 byte *g0_sp, *sp;
1243 uintptr g0_spsize, spsize;
1244 ucontext_t *uc;
1246 // Create extra goroutine locked to extra m.
1247 // The goroutine is the context in which the cgo callback will run.
1248 // The sched.pc will never be returned to, but setting it to
1249 // runtime.goexit makes clear to the traceback routines where
1250 // the goroutine stack ends.
1251 mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1252 gp = runtime_malg(StackMin, &sp, &spsize);
1253 gp->atomicstatus = _Gdead;
1254 gp->m = mp;
1255 mp->curg = gp;
1256 mp->locked = _LockInternal;
1257 mp->lockedg = gp;
1258 gp->lockedm = mp;
1259 gp->goid = runtime_xadd64(&runtime_sched->goidgen, 1);
1260 // put on allg for garbage collector
1261 allgadd(gp);
1263 // The context for gp will be set up in runtime_needm. But
1264 // here we need to set up the context for g0.
1265 uc = ucontext_arg(&mp->g0->context[0]);
1266 getcontext(uc);
1267 uc->uc_stack.ss_sp = g0_sp;
1268 uc->uc_stack.ss_size = (size_t)g0_spsize;
1269 makecontext(uc, kickoff, 0);
1271 // Add m to the extra list.
1272 mnext = lockextra(true);
1273 mp->schedlink = (uintptr)mnext;
1274 unlockextra(mp);
1277 // dropm is called when a cgo callback has called needm but is now
1278 // done with the callback and returning back into the non-Go thread.
1279 // It puts the current m back onto the extra list.
1281 // The main expense here is the call to signalstack to release the
1282 // m's signal stack, and then the call to needm on the next callback
1283 // from this thread. It is tempting to try to save the m for next time,
1284 // which would eliminate both these costs, but there might not be
1285 // a next time: the current thread (which Go does not control) might exit.
1286 // If we saved the m for that thread, there would be an m leak each time
1287 // such a thread exited. Instead, we acquire and release an m on each
1288 // call. These should typically not be scheduling operations, just a few
1289 // atomics, so the cost should be small.
1291 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1292 // variable using pthread_key_create. Unlike the pthread keys we already use
1293 // on OS X, this dummy key would never be read by Go code. It would exist
1294 // only so that we could register at thread-exit-time destructor.
1295 // That destructor would put the m back onto the extra list.
1296 // This is purely a performance optimization. The current version,
1297 // in which dropm happens on each cgo call, is still correct too.
1298 // We may have to keep the current version on systems with cgo
1299 // but without pthreads, like Windows.
1300 void
1301 runtime_dropm(void)
1303 M *mp, *mnext;
1305 // Undo whatever initialization minit did during needm.
1306 runtime_unminit();
1308 // Clear m and g, and return m to the extra list.
1309 // After the call to setg we can only call nosplit functions.
1310 mp = g->m;
1311 runtime_setg(nil);
1313 mp->curg->atomicstatus = _Gdead;
1314 mp->curg->gcstack = nil;
1315 mp->curg->gcnextsp = nil;
1317 mnext = lockextra(true);
1318 mp->schedlink = (uintptr)mnext;
1319 unlockextra(mp);
1322 #define MLOCKED ((M*)1)
1324 // lockextra locks the extra list and returns the list head.
1325 // The caller must unlock the list by storing a new list head
1326 // to runtime.extram. If nilokay is true, then lockextra will
1327 // return a nil list head if that's what it finds. If nilokay is false,
1328 // lockextra will keep waiting until the list head is no longer nil.
1329 static M*
1330 lockextra(bool nilokay)
1332 M *mp;
1333 void (*yield)(void);
1335 for(;;) {
1336 mp = runtime_atomicloadp(&runtime_extram);
1337 if(mp == MLOCKED) {
1338 yield = runtime_osyield;
1339 yield();
1340 continue;
1342 if(mp == nil && !nilokay) {
1343 runtime_usleep(1);
1344 continue;
1346 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1347 yield = runtime_osyield;
1348 yield();
1349 continue;
1351 break;
1353 return mp;
1356 static void
1357 unlockextra(M *mp)
1359 runtime_atomicstorep(&runtime_extram, mp);
1362 static int32
1363 countextra()
1365 M *mp, *mc;
1366 int32 c;
1368 for(;;) {
1369 mp = runtime_atomicloadp(&runtime_extram);
1370 if(mp == MLOCKED) {
1371 runtime_osyield();
1372 continue;
1374 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1375 runtime_osyield();
1376 continue;
1378 c = 0;
1379 for(mc = mp; mc != nil; mc = (M*)mc->schedlink)
1380 c++;
1381 runtime_atomicstorep(&runtime_extram, mp);
1382 return c;
1386 // Create a new m. It will start off with a call to fn, or else the scheduler.
1387 static void
1388 newm(void(*fn)(void), P *p)
1390 M *mp;
1392 mp = runtime_allocm(p, -1, nil, nil);
1393 mp->nextp = (uintptr)p;
1394 mp->mstartfn = (uintptr)(void*)fn;
1396 runtime_newosproc(mp);
1399 // Stops execution of the current m until new work is available.
1400 // Returns with acquired P.
1401 static void
1402 stopm(void)
1404 M* m;
1406 m = g->m;
1407 if(m->locks)
1408 runtime_throw("stopm holding locks");
1409 if(m->p)
1410 runtime_throw("stopm holding p");
1411 if(m->spinning) {
1412 m->spinning = false;
1413 runtime_xadd(&runtime_sched->nmspinning, -1);
1416 retry:
1417 runtime_lock(&runtime_sched->lock);
1418 mput(m);
1419 runtime_unlock(&runtime_sched->lock);
1420 runtime_notesleep(&m->park);
1421 m = g->m;
1422 runtime_noteclear(&m->park);
1423 if(m->helpgc) {
1424 runtime_gchelper();
1425 m->helpgc = 0;
1426 m->mcache = nil;
1427 goto retry;
1429 acquirep((P*)m->nextp);
1430 m->nextp = 0;
1433 static void
1434 mspinning(void)
1436 g->m->spinning = true;
1439 // Schedules some M to run the p (creates an M if necessary).
1440 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1441 static void
1442 startm(P *p, bool spinning)
1444 M *mp;
1445 void (*fn)(void);
1447 runtime_lock(&runtime_sched->lock);
1448 if(p == nil) {
1449 p = pidleget();
1450 if(p == nil) {
1451 runtime_unlock(&runtime_sched->lock);
1452 if(spinning)
1453 runtime_xadd(&runtime_sched->nmspinning, -1);
1454 return;
1457 mp = mget();
1458 runtime_unlock(&runtime_sched->lock);
1459 if(mp == nil) {
1460 fn = nil;
1461 if(spinning)
1462 fn = mspinning;
1463 newm(fn, p);
1464 return;
1466 if(mp->spinning)
1467 runtime_throw("startm: m is spinning");
1468 if(mp->nextp)
1469 runtime_throw("startm: m has p");
1470 mp->spinning = spinning;
1471 mp->nextp = (uintptr)p;
1472 runtime_notewakeup(&mp->park);
1475 // Hands off P from syscall or locked M.
1476 static void
1477 handoffp(P *p)
1479 // if it has local work, start it straight away
1480 if(p->runqhead != p->runqtail || runtime_sched->runqsize) {
1481 startm(p, false);
1482 return;
1484 // no local work, check that there are no spinning/idle M's,
1485 // otherwise our help is not required
1486 if(runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) == 0 && // TODO: fast atomic
1487 runtime_cas(&runtime_sched->nmspinning, 0, 1)) {
1488 startm(p, true);
1489 return;
1491 runtime_lock(&runtime_sched->lock);
1492 if(runtime_sched->gcwaiting) {
1493 p->status = _Pgcstop;
1494 if(--runtime_sched->stopwait == 0)
1495 runtime_notewakeup(&runtime_sched->stopnote);
1496 runtime_unlock(&runtime_sched->lock);
1497 return;
1499 if(runtime_sched->runqsize) {
1500 runtime_unlock(&runtime_sched->lock);
1501 startm(p, false);
1502 return;
1504 // If this is the last running P and nobody is polling network,
1505 // need to wakeup another M to poll network.
1506 if(runtime_sched->npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched->lastpoll) != 0) {
1507 runtime_unlock(&runtime_sched->lock);
1508 startm(p, false);
1509 return;
1511 pidleput(p);
1512 runtime_unlock(&runtime_sched->lock);
1515 // Tries to add one more P to execute G's.
1516 // Called when a G is made runnable (newproc, ready).
1517 static void
1518 wakep(void)
1520 // be conservative about spinning threads
1521 if(!runtime_cas(&runtime_sched->nmspinning, 0, 1))
1522 return;
1523 startm(nil, true);
1526 // Stops execution of the current m that is locked to a g until the g is runnable again.
1527 // Returns with acquired P.
1528 static void
1529 stoplockedm(void)
1531 M *m;
1532 P *p;
1534 m = g->m;
1535 if(m->lockedg == nil || m->lockedg->lockedm != m)
1536 runtime_throw("stoplockedm: inconsistent locking");
1537 if(m->p) {
1538 // Schedule another M to run this p.
1539 p = releasep();
1540 handoffp(p);
1542 incidlelocked(1);
1543 // Wait until another thread schedules lockedg again.
1544 runtime_notesleep(&m->park);
1545 m = g->m;
1546 runtime_noteclear(&m->park);
1547 if(m->lockedg->atomicstatus != _Grunnable)
1548 runtime_throw("stoplockedm: not runnable");
1549 acquirep((P*)m->nextp);
1550 m->nextp = 0;
1553 // Schedules the locked m to run the locked gp.
1554 static void
1555 startlockedm(G *gp)
1557 M *mp;
1558 P *p;
1560 mp = gp->lockedm;
1561 if(mp == g->m)
1562 runtime_throw("startlockedm: locked to me");
1563 if(mp->nextp)
1564 runtime_throw("startlockedm: m has p");
1565 // directly handoff current P to the locked m
1566 incidlelocked(-1);
1567 p = releasep();
1568 mp->nextp = (uintptr)p;
1569 runtime_notewakeup(&mp->park);
1570 stopm();
1573 // Stops the current m for stoptheworld.
1574 // Returns when the world is restarted.
1575 static void
1576 gcstopm(void)
1578 P *p;
1580 if(!runtime_sched->gcwaiting)
1581 runtime_throw("gcstopm: not waiting for gc");
1582 if(g->m->spinning) {
1583 g->m->spinning = false;
1584 runtime_xadd(&runtime_sched->nmspinning, -1);
1586 p = releasep();
1587 runtime_lock(&runtime_sched->lock);
1588 p->status = _Pgcstop;
1589 if(--runtime_sched->stopwait == 0)
1590 runtime_notewakeup(&runtime_sched->stopnote);
1591 runtime_unlock(&runtime_sched->lock);
1592 stopm();
1595 // Schedules gp to run on the current M.
1596 // Never returns.
1597 static void
1598 execute(G *gp)
1600 int32 hz;
1602 if(gp->atomicstatus != _Grunnable) {
1603 runtime_printf("execute: bad g status %d\n", gp->atomicstatus);
1604 runtime_throw("execute: bad g status");
1606 gp->atomicstatus = _Grunning;
1607 gp->waitsince = 0;
1608 ((P*)g->m->p)->schedtick++;
1609 g->m->curg = gp;
1610 gp->m = g->m;
1612 // Check whether the profiler needs to be turned on or off.
1613 hz = runtime_sched->profilehz;
1614 if(g->m->profilehz != hz)
1615 runtime_resetcpuprofiler(hz);
1617 runtime_gogo(gp);
1620 // Finds a runnable goroutine to execute.
1621 // Tries to steal from other P's, get g from global queue, poll network.
1622 static G*
1623 findrunnable(void)
1625 G *gp;
1626 P *p;
1627 int32 i;
1629 top:
1630 if(runtime_sched->gcwaiting) {
1631 gcstopm();
1632 goto top;
1634 if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1635 runtime_ready(gp);
1636 // local runq
1637 gp = runqget((P*)g->m->p);
1638 if(gp)
1639 return gp;
1640 // global runq
1641 if(runtime_sched->runqsize) {
1642 runtime_lock(&runtime_sched->lock);
1643 gp = globrunqget((P*)g->m->p, 0);
1644 runtime_unlock(&runtime_sched->lock);
1645 if(gp)
1646 return gp;
1648 // poll network
1649 gp = runtime_netpoll(false); // non-blocking
1650 if(gp) {
1651 injectglist((G*)gp->schedlink);
1652 gp->atomicstatus = _Grunnable;
1653 return gp;
1655 // If number of spinning M's >= number of busy P's, block.
1656 // This is necessary to prevent excessive CPU consumption
1657 // when GOMAXPROCS>>1 but the program parallelism is low.
1658 if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched->nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched->npidle)) // TODO: fast atomic
1659 goto stop;
1660 if(!g->m->spinning) {
1661 g->m->spinning = true;
1662 runtime_xadd(&runtime_sched->nmspinning, 1);
1664 // random steal from other P's
1665 for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1666 if(runtime_sched->gcwaiting)
1667 goto top;
1668 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1669 if(p == (P*)g->m->p)
1670 gp = runqget(p);
1671 else
1672 gp = runqsteal((P*)g->m->p, p);
1673 if(gp)
1674 return gp;
1676 stop:
1677 // return P and block
1678 runtime_lock(&runtime_sched->lock);
1679 if(runtime_sched->gcwaiting) {
1680 runtime_unlock(&runtime_sched->lock);
1681 goto top;
1683 if(runtime_sched->runqsize) {
1684 gp = globrunqget((P*)g->m->p, 0);
1685 runtime_unlock(&runtime_sched->lock);
1686 return gp;
1688 p = releasep();
1689 pidleput(p);
1690 runtime_unlock(&runtime_sched->lock);
1691 if(g->m->spinning) {
1692 g->m->spinning = false;
1693 runtime_xadd(&runtime_sched->nmspinning, -1);
1695 // check all runqueues once again
1696 for(i = 0; i < runtime_gomaxprocs; i++) {
1697 p = runtime_allp[i];
1698 if(p && p->runqhead != p->runqtail) {
1699 runtime_lock(&runtime_sched->lock);
1700 p = pidleget();
1701 runtime_unlock(&runtime_sched->lock);
1702 if(p) {
1703 acquirep(p);
1704 goto top;
1706 break;
1709 // poll network
1710 if(runtime_xchg64(&runtime_sched->lastpoll, 0) != 0) {
1711 if(g->m->p)
1712 runtime_throw("findrunnable: netpoll with p");
1713 if(g->m->spinning)
1714 runtime_throw("findrunnable: netpoll with spinning");
1715 gp = runtime_netpoll(true); // block until new work is available
1716 runtime_atomicstore64(&runtime_sched->lastpoll, runtime_nanotime());
1717 if(gp) {
1718 runtime_lock(&runtime_sched->lock);
1719 p = pidleget();
1720 runtime_unlock(&runtime_sched->lock);
1721 if(p) {
1722 acquirep(p);
1723 injectglist((G*)gp->schedlink);
1724 gp->atomicstatus = _Grunnable;
1725 return gp;
1727 injectglist(gp);
1730 stopm();
1731 goto top;
1734 static void
1735 resetspinning(void)
1737 int32 nmspinning;
1739 if(g->m->spinning) {
1740 g->m->spinning = false;
1741 nmspinning = runtime_xadd(&runtime_sched->nmspinning, -1);
1742 if(nmspinning < 0)
1743 runtime_throw("findrunnable: negative nmspinning");
1744 } else
1745 nmspinning = runtime_atomicload(&runtime_sched->nmspinning);
1747 // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1748 // so see if we need to wakeup another P here.
1749 if (nmspinning == 0 && runtime_atomicload(&runtime_sched->npidle) > 0)
1750 wakep();
1753 // Injects the list of runnable G's into the scheduler.
1754 // Can run concurrently with GC.
1755 static void
1756 injectglist(G *glist)
1758 int32 n;
1759 G *gp;
1761 if(glist == nil)
1762 return;
1763 runtime_lock(&runtime_sched->lock);
1764 for(n = 0; glist; n++) {
1765 gp = glist;
1766 glist = (G*)gp->schedlink;
1767 gp->atomicstatus = _Grunnable;
1768 globrunqput(gp);
1770 runtime_unlock(&runtime_sched->lock);
1772 for(; n && runtime_sched->npidle; n--)
1773 startm(nil, false);
1776 // One round of scheduler: find a runnable goroutine and execute it.
1777 // Never returns.
1778 static void
1779 schedule(void)
1781 G *gp;
1782 uint32 tick;
1784 if(g->m->locks)
1785 runtime_throw("schedule: holding locks");
1787 top:
1788 if(runtime_sched->gcwaiting) {
1789 gcstopm();
1790 goto top;
1793 gp = nil;
1794 // Check the global runnable queue once in a while to ensure fairness.
1795 // Otherwise two goroutines can completely occupy the local runqueue
1796 // by constantly respawning each other.
1797 tick = ((P*)g->m->p)->schedtick;
1798 // This is a fancy way to say tick%61==0,
1799 // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1800 if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched->runqsize > 0) {
1801 runtime_lock(&runtime_sched->lock);
1802 gp = globrunqget((P*)g->m->p, 1);
1803 runtime_unlock(&runtime_sched->lock);
1804 if(gp)
1805 resetspinning();
1807 if(gp == nil) {
1808 gp = runqget((P*)g->m->p);
1809 if(gp && g->m->spinning)
1810 runtime_throw("schedule: spinning with local work");
1812 if(gp == nil) {
1813 gp = findrunnable(); // blocks until work is available
1814 resetspinning();
1817 if(gp->lockedm) {
1818 // Hands off own p to the locked m,
1819 // then blocks waiting for a new p.
1820 startlockedm(gp);
1821 goto top;
1824 execute(gp);
1827 // Puts the current goroutine into a waiting state and calls unlockf.
1828 // If unlockf returns false, the goroutine is resumed.
1829 void
1830 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1832 if(g->atomicstatus != _Grunning)
1833 runtime_throw("bad g status");
1834 g->m->waitlock = lock;
1835 g->m->waitunlockf = unlockf;
1836 g->waitreason = runtime_gostringnocopy((const byte*)reason);
1837 runtime_mcall(park0);
1840 void gopark(FuncVal *, void *, String, byte, int)
1841 __asm__ ("runtime.gopark");
1843 void
1844 gopark(FuncVal *unlockf, void *lock, String reason,
1845 byte traceEv __attribute__ ((unused)),
1846 int traceskip __attribute__ ((unused)))
1848 if(g->atomicstatus != _Grunning)
1849 runtime_throw("bad g status");
1850 g->m->waitlock = lock;
1851 g->m->waitunlockf = unlockf == nil ? nil : (void*)unlockf->fn;
1852 g->waitreason = reason;
1853 runtime_mcall(park0);
1856 static bool
1857 parkunlock(G *gp, void *lock)
1859 USED(gp);
1860 runtime_unlock(lock);
1861 return true;
1864 // Puts the current goroutine into a waiting state and unlocks the lock.
1865 // The goroutine can be made runnable again by calling runtime_ready(gp).
1866 void
1867 runtime_parkunlock(Lock *lock, const char *reason)
1869 runtime_park(parkunlock, lock, reason);
1872 void goparkunlock(Lock *, String, byte, int)
1873 __asm__ (GOSYM_PREFIX "runtime.goparkunlock");
1875 void
1876 goparkunlock(Lock *lock, String reason, byte traceEv __attribute__ ((unused)),
1877 int traceskip __attribute__ ((unused)))
1879 if(g->atomicstatus != _Grunning)
1880 runtime_throw("bad g status");
1881 g->m->waitlock = lock;
1882 g->m->waitunlockf = parkunlock;
1883 g->waitreason = reason;
1884 runtime_mcall(park0);
1887 // runtime_park continuation on g0.
1888 static void
1889 park0(G *gp)
1891 M *m;
1892 bool ok;
1894 m = g->m;
1895 gp->atomicstatus = _Gwaiting;
1896 gp->m = nil;
1897 m->curg = nil;
1898 if(m->waitunlockf) {
1899 ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock);
1900 m->waitunlockf = nil;
1901 m->waitlock = nil;
1902 if(!ok) {
1903 gp->atomicstatus = _Grunnable;
1904 execute(gp); // Schedule it back, never returns.
1907 if(m->lockedg) {
1908 stoplockedm();
1909 execute(gp); // Never returns.
1911 schedule();
1914 // Scheduler yield.
1915 void
1916 runtime_gosched(void)
1918 if(g->atomicstatus != _Grunning)
1919 runtime_throw("bad g status");
1920 runtime_mcall(runtime_gosched0);
1923 // runtime_gosched continuation on g0.
1924 void
1925 runtime_gosched0(G *gp)
1927 M *m;
1929 m = g->m;
1930 gp->atomicstatus = _Grunnable;
1931 gp->m = nil;
1932 m->curg = nil;
1933 runtime_lock(&runtime_sched->lock);
1934 globrunqput(gp);
1935 runtime_unlock(&runtime_sched->lock);
1936 if(m->lockedg) {
1937 stoplockedm();
1938 execute(gp); // Never returns.
1940 schedule();
1943 // Finishes execution of the current goroutine.
1944 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1945 // Since it does not return it does not matter. But if it is preempted
1946 // at the split stack check, GC will complain about inconsistent sp.
1947 void runtime_goexit1(void) __attribute__ ((noinline));
1948 void
1949 runtime_goexit1(void)
1951 if(g->atomicstatus != _Grunning)
1952 runtime_throw("bad g status");
1953 runtime_mcall(goexit0);
1956 // runtime_goexit1 continuation on g0.
1957 static void
1958 goexit0(G *gp)
1960 M *m;
1962 m = g->m;
1963 gp->atomicstatus = _Gdead;
1964 gp->entry = nil;
1965 gp->m = nil;
1966 gp->lockedm = nil;
1967 gp->paniconfault = 0;
1968 gp->_defer = nil; // should be true already but just in case.
1969 gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
1970 gp->writebuf.__values = nil;
1971 gp->writebuf.__count = 0;
1972 gp->writebuf.__capacity = 0;
1973 gp->waitreason = runtime_gostringnocopy(nil);
1974 gp->param = nil;
1975 m->curg = nil;
1976 m->lockedg = nil;
1977 if(m->locked & ~_LockExternal) {
1978 runtime_printf("invalid m->locked = %d\n", m->locked);
1979 runtime_throw("internal lockOSThread error");
1981 m->locked = 0;
1982 gfput((P*)m->p, gp);
1983 schedule();
1986 // The goroutine g is about to enter a system call.
1987 // Record that it's not using the cpu anymore.
1988 // This is called only from the go syscall library and cgocall,
1989 // not from the low-level system calls used by the runtime.
1991 // Entersyscall cannot split the stack: the runtime_gosave must
1992 // make g->sched refer to the caller's stack segment, because
1993 // entersyscall is going to return immediately after.
1995 void runtime_entersyscall(int32) __attribute__ ((no_split_stack));
1996 static void doentersyscall(uintptr, uintptr)
1997 __attribute__ ((no_split_stack, noinline));
1999 void
2000 runtime_entersyscall(int32 dummy __attribute__ ((unused)))
2002 // Save the registers in the g structure so that any pointers
2003 // held in registers will be seen by the garbage collector.
2004 getcontext(ucontext_arg(&g->gcregs[0]));
2006 // Do the work in a separate function, so that this function
2007 // doesn't save any registers on its own stack. If this
2008 // function does save any registers, we might store the wrong
2009 // value in the call to getcontext.
2011 // FIXME: This assumes that we do not need to save any
2012 // callee-saved registers to access the TLS variable g. We
2013 // don't want to put the ucontext_t on the stack because it is
2014 // large and we can not split the stack here.
2015 doentersyscall((uintptr)runtime_getcallerpc(&dummy),
2016 (uintptr)runtime_getcallersp(&dummy));
2019 static void
2020 doentersyscall(uintptr pc, uintptr sp)
2022 // Disable preemption because during this function g is in _Gsyscall status,
2023 // but can have inconsistent g->sched, do not let GC observe it.
2024 g->m->locks++;
2026 // Leave SP around for GC and traceback.
2027 #ifdef USING_SPLIT_STACK
2029 size_t gcstacksize;
2030 g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2031 &g->gcnextsegment, &g->gcnextsp,
2032 &g->gcinitialsp);
2033 g->gcstacksize = (uintptr)gcstacksize;
2035 #else
2037 void *v;
2039 g->gcnextsp = (byte *) &v;
2041 #endif
2043 g->syscallsp = sp;
2044 g->syscallpc = pc;
2046 g->atomicstatus = _Gsyscall;
2048 if(runtime_atomicload(&runtime_sched->sysmonwait)) { // TODO: fast atomic
2049 runtime_lock(&runtime_sched->lock);
2050 if(runtime_atomicload(&runtime_sched->sysmonwait)) {
2051 runtime_atomicstore(&runtime_sched->sysmonwait, 0);
2052 runtime_notewakeup(&runtime_sched->sysmonnote);
2054 runtime_unlock(&runtime_sched->lock);
2057 g->m->mcache = nil;
2058 ((P*)(g->m->p))->m = 0;
2059 runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
2060 if(runtime_atomicload(&runtime_sched->gcwaiting)) {
2061 runtime_lock(&runtime_sched->lock);
2062 if (runtime_sched->stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
2063 if(--runtime_sched->stopwait == 0)
2064 runtime_notewakeup(&runtime_sched->stopnote);
2066 runtime_unlock(&runtime_sched->lock);
2069 g->m->locks--;
2072 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
2073 void
2074 runtime_entersyscallblock(int32 dummy __attribute__ ((unused)))
2076 P *p;
2078 g->m->locks++; // see comment in entersyscall
2080 // Leave SP around for GC and traceback.
2081 #ifdef USING_SPLIT_STACK
2083 size_t gcstacksize;
2084 g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2085 &g->gcnextsegment, &g->gcnextsp,
2086 &g->gcinitialsp);
2087 g->gcstacksize = (uintptr)gcstacksize;
2089 #else
2090 g->gcnextsp = (byte *) &p;
2091 #endif
2093 // Save the registers in the g structure so that any pointers
2094 // held in registers will be seen by the garbage collector.
2095 getcontext(ucontext_arg(&g->gcregs[0]));
2097 g->syscallpc = (uintptr)runtime_getcallerpc(&dummy);
2098 g->syscallsp = (uintptr)runtime_getcallersp(&dummy);
2100 g->atomicstatus = _Gsyscall;
2102 p = releasep();
2103 handoffp(p);
2104 if(g->isbackground) // do not consider blocked scavenger for deadlock detection
2105 incidlelocked(1);
2107 g->m->locks--;
2110 // The goroutine g exited its system call.
2111 // Arrange for it to run on a cpu again.
2112 // This is called only from the go syscall library, not
2113 // from the low-level system calls used by the runtime.
2114 void
2115 runtime_exitsyscall(int32 dummy __attribute__ ((unused)))
2117 G *gp;
2119 gp = g;
2120 gp->m->locks++; // see comment in entersyscall
2122 if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
2123 incidlelocked(-1);
2125 gp->waitsince = 0;
2126 if(exitsyscallfast()) {
2127 // There's a cpu for us, so we can run.
2128 ((P*)gp->m->p)->syscalltick++;
2129 gp->atomicstatus = _Grunning;
2130 // Garbage collector isn't running (since we are),
2131 // so okay to clear gcstack and gcsp.
2132 #ifdef USING_SPLIT_STACK
2133 gp->gcstack = nil;
2134 #endif
2135 gp->gcnextsp = nil;
2136 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2137 gp->syscallsp = 0;
2138 gp->m->locks--;
2139 return;
2142 gp->m->locks--;
2144 // Call the scheduler.
2145 runtime_mcall(exitsyscall0);
2147 // Scheduler returned, so we're allowed to run now.
2148 // Delete the gcstack information that we left for
2149 // the garbage collector during the system call.
2150 // Must wait until now because until gosched returns
2151 // we don't know for sure that the garbage collector
2152 // is not running.
2153 #ifdef USING_SPLIT_STACK
2154 gp->gcstack = nil;
2155 #endif
2156 gp->gcnextsp = nil;
2157 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2159 gp->syscallsp = 0;
2161 // Note that this gp->m might be different than the earlier
2162 // gp->m after returning from runtime_mcall.
2163 ((P*)gp->m->p)->syscalltick++;
2166 static bool
2167 exitsyscallfast(void)
2169 G *gp;
2170 P *p;
2172 gp = g;
2174 // Freezetheworld sets stopwait but does not retake P's.
2175 if(runtime_sched->stopwait) {
2176 gp->m->p = 0;
2177 return false;
2180 // Try to re-acquire the last P.
2181 if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) {
2182 // There's a cpu for us, so we can run.
2183 gp->m->mcache = ((P*)gp->m->p)->mcache;
2184 ((P*)gp->m->p)->m = (uintptr)gp->m;
2185 return true;
2187 // Try to get any other idle P.
2188 gp->m->p = 0;
2189 if(runtime_sched->pidle) {
2190 runtime_lock(&runtime_sched->lock);
2191 p = pidleget();
2192 if(p && runtime_atomicload(&runtime_sched->sysmonwait)) {
2193 runtime_atomicstore(&runtime_sched->sysmonwait, 0);
2194 runtime_notewakeup(&runtime_sched->sysmonnote);
2196 runtime_unlock(&runtime_sched->lock);
2197 if(p) {
2198 acquirep(p);
2199 return true;
2202 return false;
2205 // runtime_exitsyscall slow path on g0.
2206 // Failed to acquire P, enqueue gp as runnable.
2207 static void
2208 exitsyscall0(G *gp)
2210 M *m;
2211 P *p;
2213 m = g->m;
2214 gp->atomicstatus = _Grunnable;
2215 gp->m = nil;
2216 m->curg = nil;
2217 runtime_lock(&runtime_sched->lock);
2218 p = pidleget();
2219 if(p == nil)
2220 globrunqput(gp);
2221 else if(runtime_atomicload(&runtime_sched->sysmonwait)) {
2222 runtime_atomicstore(&runtime_sched->sysmonwait, 0);
2223 runtime_notewakeup(&runtime_sched->sysmonnote);
2225 runtime_unlock(&runtime_sched->lock);
2226 if(p) {
2227 acquirep(p);
2228 execute(gp); // Never returns.
2230 if(m->lockedg) {
2231 // Wait until another thread schedules gp and so m again.
2232 stoplockedm();
2233 execute(gp); // Never returns.
2235 stopm();
2236 schedule(); // Never returns.
2239 void syscall_entersyscall(void)
2240 __asm__(GOSYM_PREFIX "syscall.Entersyscall");
2242 void syscall_entersyscall(void) __attribute__ ((no_split_stack));
2244 void
2245 syscall_entersyscall()
2247 runtime_entersyscall(0);
2250 void syscall_exitsyscall(void)
2251 __asm__(GOSYM_PREFIX "syscall.Exitsyscall");
2253 void syscall_exitsyscall(void) __attribute__ ((no_split_stack));
2255 void
2256 syscall_exitsyscall()
2258 runtime_exitsyscall(0);
2261 // Called from syscall package before fork.
2262 void syscall_runtime_BeforeFork(void)
2263 __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2264 void
2265 syscall_runtime_BeforeFork(void)
2267 // Fork can hang if preempted with signals frequently enough (see issue 5517).
2268 // Ensure that we stay on the same M where we disable profiling.
2269 runtime_m()->locks++;
2270 if(runtime_m()->profilehz != 0)
2271 runtime_resetcpuprofiler(0);
2274 // Called from syscall package after fork in parent.
2275 void syscall_runtime_AfterFork(void)
2276 __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2277 void
2278 syscall_runtime_AfterFork(void)
2280 int32 hz;
2282 hz = runtime_sched->profilehz;
2283 if(hz != 0)
2284 runtime_resetcpuprofiler(hz);
2285 runtime_m()->locks--;
2288 // Allocate a new g, with a stack big enough for stacksize bytes.
2290 runtime_malg(int32 stacksize, byte** ret_stack, uintptr* ret_stacksize)
2292 G *newg;
2294 newg = allocg();
2295 if(stacksize >= 0) {
2296 #if USING_SPLIT_STACK
2297 int dont_block_signals = 0;
2298 size_t ss_stacksize;
2300 *ret_stack = __splitstack_makecontext(stacksize,
2301 &newg->stackcontext[0],
2302 &ss_stacksize);
2303 *ret_stacksize = (uintptr)ss_stacksize;
2304 __splitstack_block_signals_context(&newg->stackcontext[0],
2305 &dont_block_signals, nil);
2306 #else
2307 // In 64-bit mode, the maximum Go allocation space is
2308 // 128G. Our stack size is 4M, which only permits 32K
2309 // goroutines. In order to not limit ourselves,
2310 // allocate the stacks out of separate memory. In
2311 // 32-bit mode, the Go allocation space is all of
2312 // memory anyhow.
2313 if(sizeof(void*) == 8) {
2314 void *p = runtime_SysAlloc(stacksize, &mstats()->other_sys);
2315 if(p == nil)
2316 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2317 *ret_stack = (byte*)p;
2318 } else {
2319 *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2320 runtime_xadd(&runtime_stacks_sys, stacksize);
2322 *ret_stacksize = (uintptr)stacksize;
2323 newg->gcinitialsp = *ret_stack;
2324 newg->gcstacksize = (uintptr)stacksize;
2325 #endif
2327 return newg;
2331 __go_go(void (*fn)(void*), void* arg)
2333 byte *sp;
2334 size_t spsize;
2335 G *newg;
2336 P *p;
2338 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2339 if(fn == nil) {
2340 g->m->throwing = -1; // do not dump full stacks
2341 runtime_throw("go of nil func value");
2343 g->m->locks++; // disable preemption because it can be holding p in a local var
2345 p = (P*)g->m->p;
2346 if((newg = gfget(p)) != nil) {
2347 #ifdef USING_SPLIT_STACK
2348 int dont_block_signals = 0;
2350 sp = __splitstack_resetcontext(&newg->stackcontext[0],
2351 &spsize);
2352 __splitstack_block_signals_context(&newg->stackcontext[0],
2353 &dont_block_signals, nil);
2354 #else
2355 sp = newg->gcinitialsp;
2356 spsize = newg->gcstacksize;
2357 if(spsize == 0)
2358 runtime_throw("bad spsize in __go_go");
2359 newg->gcnextsp = sp;
2360 #endif
2361 } else {
2362 uintptr malsize;
2364 newg = runtime_malg(StackMin, &sp, &malsize);
2365 spsize = (size_t)malsize;
2366 allgadd(newg);
2369 newg->entry = (byte*)fn;
2370 newg->param = arg;
2371 newg->gopc = (uintptr)__builtin_return_address(0);
2372 newg->atomicstatus = _Grunnable;
2373 if(p->goidcache == p->goidcacheend) {
2374 p->goidcache = runtime_xadd64(&runtime_sched->goidgen, GoidCacheBatch);
2375 p->goidcacheend = p->goidcache + GoidCacheBatch;
2377 newg->goid = p->goidcache++;
2380 // Avoid warnings about variables clobbered by
2381 // longjmp.
2382 byte * volatile vsp = sp;
2383 size_t volatile vspsize = spsize;
2384 G * volatile vnewg = newg;
2385 ucontext_t * volatile uc;
2387 uc = ucontext_arg(&vnewg->context[0]);
2388 getcontext(uc);
2389 uc->uc_stack.ss_sp = vsp;
2390 uc->uc_stack.ss_size = vspsize;
2391 makecontext(uc, kickoff, 0);
2393 runqput(p, vnewg);
2395 if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
2396 wakep();
2397 g->m->locks--;
2398 return vnewg;
2402 static void
2403 allgadd(G *gp)
2405 G **new;
2406 uintptr cap;
2408 runtime_lock(&allglock);
2409 if(runtime_allglen >= allgcap) {
2410 cap = 4096/sizeof(new[0]);
2411 if(cap < 2*allgcap)
2412 cap = 2*allgcap;
2413 new = runtime_malloc(cap*sizeof(new[0]));
2414 if(new == nil)
2415 runtime_throw("runtime: cannot allocate memory");
2416 if(runtime_allg != nil) {
2417 runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
2418 runtime_free(runtime_allg);
2420 runtime_allg = new;
2421 allgcap = cap;
2423 runtime_allg[runtime_allglen++] = gp;
2424 runtime_unlock(&allglock);
2427 // Put on gfree list.
2428 // If local list is too long, transfer a batch to the global list.
2429 static void
2430 gfput(P *p, G *gp)
2432 gp->schedlink = (uintptr)p->gfree;
2433 p->gfree = gp;
2434 p->gfreecnt++;
2435 if(p->gfreecnt >= 64) {
2436 runtime_lock(&runtime_sched->gflock);
2437 while(p->gfreecnt >= 32) {
2438 p->gfreecnt--;
2439 gp = p->gfree;
2440 p->gfree = (G*)gp->schedlink;
2441 gp->schedlink = (uintptr)runtime_sched->gfree;
2442 runtime_sched->gfree = gp;
2444 runtime_unlock(&runtime_sched->gflock);
2448 // Get from gfree list.
2449 // If local list is empty, grab a batch from global list.
2450 static G*
2451 gfget(P *p)
2453 G *gp;
2455 retry:
2456 gp = p->gfree;
2457 if(gp == nil && runtime_sched->gfree) {
2458 runtime_lock(&runtime_sched->gflock);
2459 while(p->gfreecnt < 32 && runtime_sched->gfree) {
2460 p->gfreecnt++;
2461 gp = runtime_sched->gfree;
2462 runtime_sched->gfree = (G*)gp->schedlink;
2463 gp->schedlink = (uintptr)p->gfree;
2464 p->gfree = gp;
2466 runtime_unlock(&runtime_sched->gflock);
2467 goto retry;
2469 if(gp) {
2470 p->gfree = (G*)gp->schedlink;
2471 p->gfreecnt--;
2473 return gp;
2476 // Purge all cached G's from gfree list to the global list.
2477 static void
2478 gfpurge(P *p)
2480 G *gp;
2482 runtime_lock(&runtime_sched->gflock);
2483 while(p->gfreecnt) {
2484 p->gfreecnt--;
2485 gp = p->gfree;
2486 p->gfree = (G*)gp->schedlink;
2487 gp->schedlink = (uintptr)runtime_sched->gfree;
2488 runtime_sched->gfree = gp;
2490 runtime_unlock(&runtime_sched->gflock);
2493 void
2494 runtime_Breakpoint(void)
2496 runtime_breakpoint();
2499 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2501 void
2502 runtime_Gosched(void)
2504 runtime_gosched();
2507 // Implementation of runtime.GOMAXPROCS.
2508 // delete when scheduler is even stronger
2510 intgo runtime_GOMAXPROCS(intgo)
2511 __asm__(GOSYM_PREFIX "runtime.GOMAXPROCS");
2513 intgo
2514 runtime_GOMAXPROCS(intgo n)
2516 intgo ret;
2518 if(n > _MaxGomaxprocs)
2519 n = _MaxGomaxprocs;
2520 runtime_lock(&runtime_sched->lock);
2521 ret = (intgo)runtime_gomaxprocs;
2522 if(n <= 0 || n == ret) {
2523 runtime_unlock(&runtime_sched->lock);
2524 return ret;
2526 runtime_unlock(&runtime_sched->lock);
2528 runtime_acquireWorldsema();
2529 g->m->gcing = 1;
2530 runtime_stopTheWorldWithSema();
2531 newprocs = (int32)n;
2532 g->m->gcing = 0;
2533 runtime_releaseWorldsema();
2534 runtime_startTheWorldWithSema();
2536 return ret;
2539 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2540 // after they modify m->locked. Do not allow preemption during this call,
2541 // or else the m might be different in this function than in the caller.
2542 static void
2543 lockOSThread(void)
2545 g->m->lockedg = g;
2546 g->lockedm = g->m;
2549 void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2550 void
2551 runtime_LockOSThread(void)
2553 g->m->locked |= _LockExternal;
2554 lockOSThread();
2557 void
2558 runtime_lockOSThread(void)
2560 g->m->locked += _LockInternal;
2561 lockOSThread();
2565 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2566 // after they update m->locked. Do not allow preemption during this call,
2567 // or else the m might be in different in this function than in the caller.
2568 static void
2569 unlockOSThread(void)
2571 if(g->m->locked != 0)
2572 return;
2573 g->m->lockedg = nil;
2574 g->lockedm = nil;
2577 void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2579 void
2580 runtime_UnlockOSThread(void)
2582 g->m->locked &= ~_LockExternal;
2583 unlockOSThread();
2586 void
2587 runtime_unlockOSThread(void)
2589 if(g->m->locked < _LockInternal)
2590 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2591 g->m->locked -= _LockInternal;
2592 unlockOSThread();
2595 bool
2596 runtime_lockedOSThread(void)
2598 return g->lockedm != nil && g->m->lockedg != nil;
2601 int32
2602 runtime_gcount(void)
2604 G *gp;
2605 int32 n, s;
2606 uintptr i;
2608 n = 0;
2609 runtime_lock(&allglock);
2610 // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2611 // We do not want to increment/decrement centralized counter in newproc/goexit,
2612 // just to make runtime.NumGoroutine() faster.
2613 // Compromise solution is to introduce per-P counters of active goroutines.
2614 for(i = 0; i < runtime_allglen; i++) {
2615 gp = runtime_allg[i];
2616 s = gp->atomicstatus;
2617 if(s == _Grunnable || s == _Grunning || s == _Gsyscall || s == _Gwaiting)
2618 n++;
2620 runtime_unlock(&allglock);
2621 return n;
2624 int32
2625 runtime_mcount(void)
2627 return runtime_sched->mcount;
2630 static struct {
2631 uint32 lock;
2632 int32 hz;
2633 } prof;
2635 static void System(void) {}
2636 static void GC(void) {}
2638 // Called if we receive a SIGPROF signal.
2639 void
2640 runtime_sigprof()
2642 M *mp = g->m;
2643 int32 n, i;
2644 bool traceback;
2645 uintptr pcbuf[TracebackMaxFrames];
2646 Location locbuf[TracebackMaxFrames];
2647 Slice stk;
2649 if(prof.hz == 0)
2650 return;
2652 if(mp == nil)
2653 return;
2655 // Profiling runs concurrently with GC, so it must not allocate.
2656 mp->mallocing++;
2658 traceback = true;
2660 if(mp->mcache == nil)
2661 traceback = false;
2663 n = 0;
2665 if(runtime_atomicload(&runtime_in_callers) > 0) {
2666 // If SIGPROF arrived while already fetching runtime
2667 // callers we can have trouble on older systems
2668 // because the unwind library calls dl_iterate_phdr
2669 // which was not recursive in the past.
2670 traceback = false;
2673 if(traceback) {
2674 n = runtime_callers(0, locbuf, nelem(locbuf), false);
2675 for(i = 0; i < n; i++)
2676 pcbuf[i] = locbuf[i].pc;
2678 if(!traceback || n <= 0) {
2679 n = 2;
2680 pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2681 if(mp->gcing || mp->helpgc)
2682 pcbuf[1] = (uintptr)GC;
2683 else
2684 pcbuf[1] = (uintptr)System;
2687 if (prof.hz != 0) {
2688 stk.__values = &pcbuf[0];
2689 stk.__count = n;
2690 stk.__capacity = n;
2692 // Simple cas-lock to coordinate with setcpuprofilerate.
2693 while (!runtime_cas(&prof.lock, 0, 1)) {
2694 runtime_osyield();
2696 if (prof.hz != 0) {
2697 runtime_cpuprofAdd(stk);
2699 runtime_atomicstore(&prof.lock, 0);
2702 mp->mallocing--;
2705 // Arrange to call fn with a traceback hz times a second.
2706 void
2707 runtime_setcpuprofilerate_m(int32 hz)
2709 // Force sane arguments.
2710 if(hz < 0)
2711 hz = 0;
2713 // Disable preemption, otherwise we can be rescheduled to another thread
2714 // that has profiling enabled.
2715 g->m->locks++;
2717 // Stop profiler on this thread so that it is safe to lock prof.
2718 // if a profiling signal came in while we had prof locked,
2719 // it would deadlock.
2720 runtime_resetcpuprofiler(0);
2722 while (!runtime_cas(&prof.lock, 0, 1)) {
2723 runtime_osyield();
2725 prof.hz = hz;
2726 runtime_atomicstore(&prof.lock, 0);
2728 runtime_lock(&runtime_sched->lock);
2729 runtime_sched->profilehz = hz;
2730 runtime_unlock(&runtime_sched->lock);
2732 if(hz != 0)
2733 runtime_resetcpuprofiler(hz);
2735 g->m->locks--;
2738 // Change number of processors. The world is stopped, sched is locked.
2739 static void
2740 procresize(int32 new)
2742 int32 i, old;
2743 bool pempty;
2744 G *gp;
2745 P *p;
2746 intgo j;
2748 old = runtime_gomaxprocs;
2749 if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs)
2750 runtime_throw("procresize: invalid arg");
2751 // initialize new P's
2752 for(i = 0; i < new; i++) {
2753 p = runtime_allp[i];
2754 if(p == nil) {
2755 p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2756 p->id = i;
2757 p->status = _Pgcstop;
2758 p->deferpool.__values = &p->deferpoolbuf[0];
2759 p->deferpool.__count = 0;
2760 p->deferpool.__capacity = nelem(p->deferpoolbuf);
2761 runtime_atomicstorep(&runtime_allp[i], p);
2763 if(p->mcache == nil) {
2764 if(old==0 && i==0)
2765 p->mcache = g->m->mcache; // bootstrap
2766 else
2767 p->mcache = runtime_allocmcache();
2771 // redistribute runnable G's evenly
2772 // collect all runnable goroutines in global queue preserving FIFO order
2773 // FIFO order is required to ensure fairness even during frequent GCs
2774 // see http://golang.org/issue/7126
2775 pempty = false;
2776 while(!pempty) {
2777 pempty = true;
2778 for(i = 0; i < old; i++) {
2779 p = runtime_allp[i];
2780 if(p->runqhead == p->runqtail)
2781 continue;
2782 pempty = false;
2783 // pop from tail of local queue
2784 p->runqtail--;
2785 gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
2786 // push onto head of global queue
2787 gp->schedlink = runtime_sched->runqhead;
2788 runtime_sched->runqhead = (uintptr)gp;
2789 if(runtime_sched->runqtail == 0)
2790 runtime_sched->runqtail = (uintptr)gp;
2791 runtime_sched->runqsize++;
2794 // fill local queues with at most nelem(p->runq)/2 goroutines
2795 // start at 1 because current M already executes some G and will acquire allp[0] below,
2796 // so if we have a spare G we want to put it into allp[1].
2797 for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched->runqsize > 0; i++) {
2798 gp = (G*)runtime_sched->runqhead;
2799 runtime_sched->runqhead = gp->schedlink;
2800 if(runtime_sched->runqhead == 0)
2801 runtime_sched->runqtail = 0;
2802 runtime_sched->runqsize--;
2803 runqput(runtime_allp[i%new], gp);
2806 // free unused P's
2807 for(i = new; i < old; i++) {
2808 p = runtime_allp[i];
2809 for(j = 0; j < p->deferpool.__count; j++) {
2810 ((struct _defer**)p->deferpool.__values)[j] = nil;
2812 p->deferpool.__count = 0;
2813 runtime_freemcache(p->mcache);
2814 p->mcache = nil;
2815 gfpurge(p);
2816 p->status = _Pdead;
2817 // can't free P itself because it can be referenced by an M in syscall
2820 if(g->m->p)
2821 ((P*)g->m->p)->m = 0;
2822 g->m->p = 0;
2823 g->m->mcache = nil;
2824 p = runtime_allp[0];
2825 p->m = 0;
2826 p->status = _Pidle;
2827 acquirep(p);
2828 for(i = new-1; i > 0; i--) {
2829 p = runtime_allp[i];
2830 p->status = _Pidle;
2831 pidleput(p);
2833 runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2836 // Associate p and the current m.
2837 static void
2838 acquirep(P *p)
2840 M *m;
2842 m = g->m;
2843 if(m->p || m->mcache)
2844 runtime_throw("acquirep: already in go");
2845 if(p->m || p->status != _Pidle) {
2846 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status);
2847 runtime_throw("acquirep: invalid p state");
2849 m->mcache = p->mcache;
2850 m->p = (uintptr)p;
2851 p->m = (uintptr)m;
2852 p->status = _Prunning;
2855 // Disassociate p and the current m.
2856 static P*
2857 releasep(void)
2859 M *m;
2860 P *p;
2862 m = g->m;
2863 if(m->p == 0 || m->mcache == nil)
2864 runtime_throw("releasep: invalid arg");
2865 p = (P*)m->p;
2866 if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) {
2867 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2868 m, m->p, p->m, m->mcache, p->mcache, p->status);
2869 runtime_throw("releasep: invalid p state");
2871 m->p = 0;
2872 m->mcache = nil;
2873 p->m = 0;
2874 p->status = _Pidle;
2875 return p;
2878 static void
2879 incidlelocked(int32 v)
2881 runtime_lock(&runtime_sched->lock);
2882 runtime_sched->nmidlelocked += v;
2883 if(v > 0)
2884 checkdead();
2885 runtime_unlock(&runtime_sched->lock);
2888 // Check for deadlock situation.
2889 // The check is based on number of running M's, if 0 -> deadlock.
2890 static void
2891 checkdead(void)
2893 G *gp;
2894 int32 run, grunning, s;
2895 uintptr i;
2897 // For -buildmode=c-shared or -buildmode=c-archive it's OK if
2898 // there are no running goroutines. The calling program is
2899 // assumed to be running.
2900 if(runtime_isarchive) {
2901 return;
2904 // -1 for sysmon
2905 run = runtime_sched->mcount - runtime_sched->nmidle - runtime_sched->nmidlelocked - 1 - countextra();
2906 if(run > 0)
2907 return;
2908 // If we are dying because of a signal caught on an already idle thread,
2909 // freezetheworld will cause all running threads to block.
2910 // And runtime will essentially enter into deadlock state,
2911 // except that there is a thread that will call runtime_exit soon.
2912 if(runtime_panicking() > 0)
2913 return;
2914 if(run < 0) {
2915 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2916 runtime_sched->nmidle, runtime_sched->nmidlelocked, runtime_sched->mcount);
2917 runtime_throw("checkdead: inconsistent counts");
2919 grunning = 0;
2920 runtime_lock(&allglock);
2921 for(i = 0; i < runtime_allglen; i++) {
2922 gp = runtime_allg[i];
2923 if(gp->isbackground)
2924 continue;
2925 s = gp->atomicstatus;
2926 if(s == _Gwaiting)
2927 grunning++;
2928 else if(s == _Grunnable || s == _Grunning || s == _Gsyscall) {
2929 runtime_unlock(&allglock);
2930 runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
2931 runtime_throw("checkdead: runnable g");
2934 runtime_unlock(&allglock);
2935 if(grunning == 0) // possible if main goroutine calls runtime_Goexit()
2936 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
2937 g->m->throwing = -1; // do not dump full stacks
2938 runtime_throw("all goroutines are asleep - deadlock!");
2941 static void
2942 sysmon(void)
2944 uint32 idle, delay;
2945 int64 now, lastpoll, lasttrace;
2946 G *gp;
2948 lasttrace = 0;
2949 idle = 0; // how many cycles in succession we had not wokeup somebody
2950 delay = 0;
2951 for(;;) {
2952 if(idle == 0) // start with 20us sleep...
2953 delay = 20;
2954 else if(idle > 50) // start doubling the sleep after 1ms...
2955 delay *= 2;
2956 if(delay > 10*1000) // up to 10ms
2957 delay = 10*1000;
2958 runtime_usleep(delay);
2959 if(runtime_debug.schedtrace <= 0 &&
2960 (runtime_sched->gcwaiting || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic
2961 runtime_lock(&runtime_sched->lock);
2962 if(runtime_atomicload(&runtime_sched->gcwaiting) || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs) {
2963 runtime_atomicstore(&runtime_sched->sysmonwait, 1);
2964 runtime_unlock(&runtime_sched->lock);
2965 runtime_notesleep(&runtime_sched->sysmonnote);
2966 runtime_noteclear(&runtime_sched->sysmonnote);
2967 idle = 0;
2968 delay = 20;
2969 } else
2970 runtime_unlock(&runtime_sched->lock);
2972 // poll network if not polled for more than 10ms
2973 lastpoll = runtime_atomicload64(&runtime_sched->lastpoll);
2974 now = runtime_nanotime();
2975 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2976 runtime_cas64(&runtime_sched->lastpoll, lastpoll, now);
2977 gp = runtime_netpoll(false); // non-blocking
2978 if(gp) {
2979 // Need to decrement number of idle locked M's
2980 // (pretending that one more is running) before injectglist.
2981 // Otherwise it can lead to the following situation:
2982 // injectglist grabs all P's but before it starts M's to run the P's,
2983 // another M returns from syscall, finishes running its G,
2984 // observes that there is no work to do and no other running M's
2985 // and reports deadlock.
2986 incidlelocked(-1);
2987 injectglist(gp);
2988 incidlelocked(1);
2991 // retake P's blocked in syscalls
2992 // and preempt long running G's
2993 if(retake(now))
2994 idle = 0;
2995 else
2996 idle++;
2998 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
2999 lasttrace = now;
3000 runtime_schedtrace(runtime_debug.scheddetail);
3005 typedef struct Pdesc Pdesc;
3006 struct Pdesc
3008 uint32 schedtick;
3009 int64 schedwhen;
3010 uint32 syscalltick;
3011 int64 syscallwhen;
3013 static Pdesc pdesc[_MaxGomaxprocs];
3015 static uint32
3016 retake(int64 now)
3018 uint32 i, s, n;
3019 int64 t;
3020 P *p;
3021 Pdesc *pd;
3023 n = 0;
3024 for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
3025 p = runtime_allp[i];
3026 if(p==nil)
3027 continue;
3028 pd = &pdesc[i];
3029 s = p->status;
3030 if(s == _Psyscall) {
3031 // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
3032 t = p->syscalltick;
3033 if(pd->syscalltick != t) {
3034 pd->syscalltick = t;
3035 pd->syscallwhen = now;
3036 continue;
3038 // On the one hand we don't want to retake Ps if there is no other work to do,
3039 // but on the other hand we want to retake them eventually
3040 // because they can prevent the sysmon thread from deep sleep.
3041 if(p->runqhead == p->runqtail &&
3042 runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) > 0 &&
3043 pd->syscallwhen + 10*1000*1000 > now)
3044 continue;
3045 // Need to decrement number of idle locked M's
3046 // (pretending that one more is running) before the CAS.
3047 // Otherwise the M from which we retake can exit the syscall,
3048 // increment nmidle and report deadlock.
3049 incidlelocked(-1);
3050 if(runtime_cas(&p->status, s, _Pidle)) {
3051 n++;
3052 handoffp(p);
3054 incidlelocked(1);
3055 } else if(s == _Prunning) {
3056 // Preempt G if it's running for more than 10ms.
3057 t = p->schedtick;
3058 if(pd->schedtick != t) {
3059 pd->schedtick = t;
3060 pd->schedwhen = now;
3061 continue;
3063 if(pd->schedwhen + 10*1000*1000 > now)
3064 continue;
3065 // preemptone(p);
3068 return n;
3071 // Tell all goroutines that they have been preempted and they should stop.
3072 // This function is purely best-effort. It can fail to inform a goroutine if a
3073 // processor just started running it.
3074 // No locks need to be held.
3075 // Returns true if preemption request was issued to at least one goroutine.
3076 static bool
3077 preemptall(void)
3079 return false;
3082 void
3083 runtime_schedtrace(bool detailed)
3085 static int64 starttime;
3086 int64 now;
3087 int64 id1, id2, id3;
3088 int32 i, t, h;
3089 uintptr gi;
3090 const char *fmt;
3091 M *mp, *lockedm;
3092 G *gp, *lockedg;
3093 P *p;
3095 now = runtime_nanotime();
3096 if(starttime == 0)
3097 starttime = now;
3099 runtime_lock(&runtime_sched->lock);
3100 runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
3101 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched->npidle, runtime_sched->mcount,
3102 runtime_sched->nmidle, runtime_sched->runqsize);
3103 if(detailed) {
3104 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
3105 runtime_sched->gcwaiting, runtime_sched->nmidlelocked, runtime_sched->nmspinning,
3106 runtime_sched->stopwait, runtime_sched->sysmonwait);
3108 // We must be careful while reading data from P's, M's and G's.
3109 // Even if we hold schedlock, most data can be changed concurrently.
3110 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
3111 for(i = 0; i < runtime_gomaxprocs; i++) {
3112 p = runtime_allp[i];
3113 if(p == nil)
3114 continue;
3115 mp = (M*)p->m;
3116 h = runtime_atomicload(&p->runqhead);
3117 t = runtime_atomicload(&p->runqtail);
3118 if(detailed)
3119 runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
3120 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
3121 else {
3122 // In non-detailed mode format lengths of per-P run queues as:
3123 // [len1 len2 len3 len4]
3124 fmt = " %d";
3125 if(runtime_gomaxprocs == 1)
3126 fmt = " [%d]\n";
3127 else if(i == 0)
3128 fmt = " [%d";
3129 else if(i == runtime_gomaxprocs-1)
3130 fmt = " %d]\n";
3131 runtime_printf(fmt, t-h);
3134 if(!detailed) {
3135 runtime_unlock(&runtime_sched->lock);
3136 return;
3138 for(mp = runtime_allm; mp; mp = mp->alllink) {
3139 p = (P*)mp->p;
3140 gp = mp->curg;
3141 lockedg = mp->lockedg;
3142 id1 = -1;
3143 if(p)
3144 id1 = p->id;
3145 id2 = -1;
3146 if(gp)
3147 id2 = gp->goid;
3148 id3 = -1;
3149 if(lockedg)
3150 id3 = lockedg->goid;
3151 runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
3152 " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
3153 mp->id, id1, id2,
3154 mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
3155 mp->spinning, mp->blocked, id3);
3157 runtime_lock(&allglock);
3158 for(gi = 0; gi < runtime_allglen; gi++) {
3159 gp = runtime_allg[gi];
3160 mp = gp->m;
3161 lockedm = gp->lockedm;
3162 runtime_printf(" G%D: status=%d(%S) m=%d lockedm=%d\n",
3163 gp->goid, gp->atomicstatus, gp->waitreason, mp ? mp->id : -1,
3164 lockedm ? lockedm->id : -1);
3166 runtime_unlock(&allglock);
3167 runtime_unlock(&runtime_sched->lock);
3170 // Put mp on midle list.
3171 // Sched must be locked.
3172 static void
3173 mput(M *mp)
3175 mp->schedlink = runtime_sched->midle;
3176 runtime_sched->midle = (uintptr)mp;
3177 runtime_sched->nmidle++;
3178 checkdead();
3181 // Try to get an m from midle list.
3182 // Sched must be locked.
3183 static M*
3184 mget(void)
3186 M *mp;
3188 if((mp = (M*)runtime_sched->midle) != nil){
3189 runtime_sched->midle = mp->schedlink;
3190 runtime_sched->nmidle--;
3192 return mp;
3195 // Put gp on the global runnable queue.
3196 // Sched must be locked.
3197 static void
3198 globrunqput(G *gp)
3200 gp->schedlink = 0;
3201 if(runtime_sched->runqtail)
3202 ((G*)runtime_sched->runqtail)->schedlink = (uintptr)gp;
3203 else
3204 runtime_sched->runqhead = (uintptr)gp;
3205 runtime_sched->runqtail = (uintptr)gp;
3206 runtime_sched->runqsize++;
3209 // Put a batch of runnable goroutines on the global runnable queue.
3210 // Sched must be locked.
3211 static void
3212 globrunqputbatch(G *ghead, G *gtail, int32 n)
3214 gtail->schedlink = 0;
3215 if(runtime_sched->runqtail)
3216 ((G*)runtime_sched->runqtail)->schedlink = (uintptr)ghead;
3217 else
3218 runtime_sched->runqhead = (uintptr)ghead;
3219 runtime_sched->runqtail = (uintptr)gtail;
3220 runtime_sched->runqsize += n;
3223 // Try get a batch of G's from the global runnable queue.
3224 // Sched must be locked.
3225 static G*
3226 globrunqget(P *p, int32 max)
3228 G *gp, *gp1;
3229 int32 n;
3231 if(runtime_sched->runqsize == 0)
3232 return nil;
3233 n = runtime_sched->runqsize/runtime_gomaxprocs+1;
3234 if(n > runtime_sched->runqsize)
3235 n = runtime_sched->runqsize;
3236 if(max > 0 && n > max)
3237 n = max;
3238 if((uint32)n > nelem(p->runq)/2)
3239 n = nelem(p->runq)/2;
3240 runtime_sched->runqsize -= n;
3241 if(runtime_sched->runqsize == 0)
3242 runtime_sched->runqtail = 0;
3243 gp = (G*)runtime_sched->runqhead;
3244 runtime_sched->runqhead = gp->schedlink;
3245 n--;
3246 while(n--) {
3247 gp1 = (G*)runtime_sched->runqhead;
3248 runtime_sched->runqhead = gp1->schedlink;
3249 runqput(p, gp1);
3251 return gp;
3254 // Put p to on pidle list.
3255 // Sched must be locked.
3256 static void
3257 pidleput(P *p)
3259 p->link = runtime_sched->pidle;
3260 runtime_sched->pidle = (uintptr)p;
3261 runtime_xadd(&runtime_sched->npidle, 1); // TODO: fast atomic
3264 // Try get a p from pidle list.
3265 // Sched must be locked.
3266 static P*
3267 pidleget(void)
3269 P *p;
3271 p = (P*)runtime_sched->pidle;
3272 if(p) {
3273 runtime_sched->pidle = p->link;
3274 runtime_xadd(&runtime_sched->npidle, -1); // TODO: fast atomic
3276 return p;
3279 // Try to put g on local runnable queue.
3280 // If it's full, put onto global queue.
3281 // Executed only by the owner P.
3282 static void
3283 runqput(P *p, G *gp)
3285 uint32 h, t;
3287 retry:
3288 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
3289 t = p->runqtail;
3290 if(t - h < nelem(p->runq)) {
3291 p->runq[t%nelem(p->runq)] = (uintptr)gp;
3292 runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption
3293 return;
3295 if(runqputslow(p, gp, h, t))
3296 return;
3297 // the queue is not full, now the put above must suceed
3298 goto retry;
3301 // Put g and a batch of work from local runnable queue on global queue.
3302 // Executed only by the owner P.
3303 static bool
3304 runqputslow(P *p, G *gp, uint32 h, uint32 t)
3306 G *batch[nelem(p->runq)/2+1];
3307 uint32 n, i;
3309 // First, grab a batch from local queue.
3310 n = t-h;
3311 n = n/2;
3312 if(n != nelem(p->runq)/2)
3313 runtime_throw("runqputslow: queue is not full");
3314 for(i=0; i<n; i++)
3315 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3316 if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
3317 return false;
3318 batch[n] = gp;
3319 // Link the goroutines.
3320 for(i=0; i<n; i++)
3321 batch[i]->schedlink = (uintptr)batch[i+1];
3322 // Now put the batch on global queue.
3323 runtime_lock(&runtime_sched->lock);
3324 globrunqputbatch(batch[0], batch[n], n+1);
3325 runtime_unlock(&runtime_sched->lock);
3326 return true;
3329 // Get g from local runnable queue.
3330 // Executed only by the owner P.
3331 static G*
3332 runqget(P *p)
3334 G *gp;
3335 uint32 t, h;
3337 for(;;) {
3338 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
3339 t = p->runqtail;
3340 if(t == h)
3341 return nil;
3342 gp = (G*)p->runq[h%nelem(p->runq)];
3343 if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume
3344 return gp;
3348 // Grabs a batch of goroutines from local runnable queue.
3349 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3350 // Can be executed by any P.
3351 static uint32
3352 runqgrab(P *p, G **batch)
3354 uint32 t, h, n, i;
3356 for(;;) {
3357 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
3358 t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer
3359 n = t-h;
3360 n = n - n/2;
3361 if(n == 0)
3362 break;
3363 if(n > nelem(p->runq)/2) // read inconsistent h and t
3364 continue;
3365 for(i=0; i<n; i++)
3366 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3367 if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
3368 break;
3370 return n;
3373 // Steal half of elements from local runnable queue of p2
3374 // and put onto local runnable queue of p.
3375 // Returns one of the stolen elements (or nil if failed).
3376 static G*
3377 runqsteal(P *p, P *p2)
3379 G *gp;
3380 G *batch[nelem(p->runq)/2];
3381 uint32 t, h, n, i;
3383 n = runqgrab(p2, batch);
3384 if(n == 0)
3385 return nil;
3386 n--;
3387 gp = batch[n];
3388 if(n == 0)
3389 return gp;
3390 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
3391 t = p->runqtail;
3392 if(t - h + n >= nelem(p->runq))
3393 runtime_throw("runqsteal: runq overflow");
3394 for(i=0; i<n; i++, t++)
3395 p->runq[t%nelem(p->runq)] = (uintptr)batch[i];
3396 runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption
3397 return gp;
3400 void runtime_testSchedLocalQueue(void)
3401 __asm__("runtime.testSchedLocalQueue");
3403 void
3404 runtime_testSchedLocalQueue(void)
3406 P p;
3407 G gs[nelem(p.runq)];
3408 int32 i, j;
3410 runtime_memclr((byte*)&p, sizeof(p));
3412 for(i = 0; i < (int32)nelem(gs); i++) {
3413 if(runqget(&p) != nil)
3414 runtime_throw("runq is not empty initially");
3415 for(j = 0; j < i; j++)
3416 runqput(&p, &gs[i]);
3417 for(j = 0; j < i; j++) {
3418 if(runqget(&p) != &gs[i]) {
3419 runtime_printf("bad element at iter %d/%d\n", i, j);
3420 runtime_throw("bad element");
3423 if(runqget(&p) != nil)
3424 runtime_throw("runq is not empty afterwards");
3428 void runtime_testSchedLocalQueueSteal(void)
3429 __asm__("runtime.testSchedLocalQueueSteal");
3431 void
3432 runtime_testSchedLocalQueueSteal(void)
3434 P p1, p2;
3435 G gs[nelem(p1.runq)], *gp;
3436 int32 i, j, s;
3438 runtime_memclr((byte*)&p1, sizeof(p1));
3439 runtime_memclr((byte*)&p2, sizeof(p2));
3441 for(i = 0; i < (int32)nelem(gs); i++) {
3442 for(j = 0; j < i; j++) {
3443 gs[j].sig = 0;
3444 runqput(&p1, &gs[j]);
3446 gp = runqsteal(&p2, &p1);
3447 s = 0;
3448 if(gp) {
3449 s++;
3450 gp->sig++;
3452 while((gp = runqget(&p2)) != nil) {
3453 s++;
3454 gp->sig++;
3456 while((gp = runqget(&p1)) != nil)
3457 gp->sig++;
3458 for(j = 0; j < i; j++) {
3459 if(gs[j].sig != 1) {
3460 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3461 runtime_throw("bad element");
3464 if(s != i/2 && s != i/2+1) {
3465 runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3466 s, i/2, i/2+1, i);
3467 runtime_throw("bad steal");
3472 intgo
3473 runtime_setmaxthreads(intgo in)
3475 intgo out;
3477 runtime_lock(&runtime_sched->lock);
3478 out = (intgo)runtime_sched->maxmcount;
3479 runtime_sched->maxmcount = (int32)in;
3480 checkmcount();
3481 runtime_unlock(&runtime_sched->lock);
3482 return out;
3485 static intgo
3486 procPin()
3488 M *mp;
3490 mp = runtime_m();
3491 mp->locks++;
3492 return (intgo)(((P*)mp->p)->id);
3495 static void
3496 procUnpin()
3498 runtime_m()->locks--;
3501 intgo sync_runtime_procPin(void)
3502 __asm__ (GOSYM_PREFIX "sync.runtime_procPin");
3504 intgo
3505 sync_runtime_procPin()
3507 return procPin();
3510 void sync_runtime_procUnpin(void)
3511 __asm__ (GOSYM_PREFIX "sync.runtime_procUnpin");
3513 void
3514 sync_runtime_procUnpin()
3516 procUnpin();
3519 intgo sync_atomic_runtime_procPin(void)
3520 __asm__ (GOSYM_PREFIX "sync_atomic.runtime_procPin");
3522 intgo
3523 sync_atomic_runtime_procPin()
3525 return procPin();
3528 void sync_atomic_runtime_procUnpin(void)
3529 __asm__ (GOSYM_PREFIX "sync_atomic.runtime_procUnpin");
3531 void
3532 sync_atomic_runtime_procUnpin()
3534 procUnpin();
3537 void
3538 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
3540 enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
3543 // Return whether we are waiting for a GC. This gc toolchain uses
3544 // preemption instead.
3545 bool
3546 runtime_gcwaiting(void)
3548 return runtime_sched->gcwaiting;
3551 // os_beforeExit is called from os.Exit(0).
3552 //go:linkname os_beforeExit os.runtime_beforeExit
3554 extern void os_beforeExit() __asm__ (GOSYM_PREFIX "os.runtime_beforeExit");
3556 void
3557 os_beforeExit()
3561 // Active spinning for sync.Mutex.
3562 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3564 enum
3566 ACTIVE_SPIN = 4,
3567 ACTIVE_SPIN_CNT = 30,
3570 extern _Bool sync_runtime_canSpin(intgo i)
3571 __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
3573 _Bool
3574 sync_runtime_canSpin(intgo i)
3576 P *p;
3578 // sync.Mutex is cooperative, so we are conservative with spinning.
3579 // Spin only few times and only if running on a multicore machine and
3580 // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3581 // As opposed to runtime mutex we don't do passive spinning here,
3582 // because there can be work on global runq on on other Ps.
3583 if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched->npidle+runtime_sched->nmspinning)+1) {
3584 return false;
3586 p = (P*)g->m->p;
3587 return p != nil && p->runqhead == p->runqtail;
3590 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3591 //go:nosplit
3593 extern void sync_runtime_doSpin(void)
3594 __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
3596 void
3597 sync_runtime_doSpin()
3599 runtime_procyield(ACTIVE_SPIN_CNT);
3602 // For Go code to look at variables, until we port proc.go.
3604 extern M** runtime_go_allm(void)
3605 __asm__ (GOSYM_PREFIX "runtime.allm");
3608 runtime_go_allm()
3610 return &runtime_allm;
3613 extern Slice runtime_go_allgs(void)
3614 __asm__ (GOSYM_PREFIX "runtime.allgs");
3616 Slice
3617 runtime_go_allgs()
3619 Slice s;
3621 s.__values = runtime_allg;
3622 s.__count = runtime_allglen;
3623 s.__capacity = allgcap;
3624 return s;
3627 intgo NumCPU(void) __asm__ (GOSYM_PREFIX "runtime.NumCPU");
3629 intgo
3630 NumCPU()
3632 return (intgo)(runtime_ncpu);