Do not error when -E provided (PR pch/78970).
[official-gcc.git] / libgo / runtime / proc.c
blob8a7a2d76ae6b283e5afd7f4bdc43fcacdb2f01f0
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 #include <limits.h>
6 #include <signal.h>
7 #include <stdlib.h>
8 #include <pthread.h>
9 #include <unistd.h>
11 #include "config.h"
13 #ifdef HAVE_DL_ITERATE_PHDR
14 #include <link.h>
15 #endif
17 #include "runtime.h"
18 #include "arch.h"
19 #include "defs.h"
20 #include "malloc.h"
21 #include "go-type.h"
23 #ifdef USING_SPLIT_STACK
25 /* FIXME: These are not declared anywhere. */
27 extern void __splitstack_getcontext(void *context[10]);
29 extern void __splitstack_setcontext(void *context[10]);
31 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
33 extern void * __splitstack_resetcontext(void *context[10], size_t *);
35 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
36 void **);
38 extern void __splitstack_block_signals (int *, int *);
40 extern void __splitstack_block_signals_context (void *context[10], int *,
41 int *);
43 #endif
45 #ifndef PTHREAD_STACK_MIN
46 # define PTHREAD_STACK_MIN 8192
47 #endif
49 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
50 # define StackMin PTHREAD_STACK_MIN
51 #else
52 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
53 #endif
55 uintptr runtime_stacks_sys;
57 static void gtraceback(G*);
59 #ifdef __rtems__
60 #define __thread
61 #endif
63 static __thread G *g;
65 #ifndef SETCONTEXT_CLOBBERS_TLS
67 static inline void
68 initcontext(void)
72 static inline void
73 fixcontext(ucontext_t *c __attribute__ ((unused)))
77 #else
79 # if defined(__x86_64__) && defined(__sun__)
81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
82 // register to that of the thread which called getcontext. The effect
83 // is that the address of all __thread variables changes. This bug
84 // also affects pthread_self() and pthread_getspecific. We work
85 // around it by clobbering the context field directly to keep %fs the
86 // same.
88 static __thread greg_t fs;
90 static inline void
91 initcontext(void)
93 ucontext_t c;
95 getcontext(&c);
96 fs = c.uc_mcontext.gregs[REG_FSBASE];
99 static inline void
100 fixcontext(ucontext_t* c)
102 c->uc_mcontext.gregs[REG_FSBASE] = fs;
105 # elif defined(__NetBSD__)
107 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
108 // and restore it ourselves.
110 static __thread __greg_t tlsbase;
112 static inline void
113 initcontext(void)
115 ucontext_t c;
117 getcontext(&c);
118 tlsbase = c.uc_mcontext._mc_tlsbase;
121 static inline void
122 fixcontext(ucontext_t* c)
124 c->uc_mcontext._mc_tlsbase = tlsbase;
127 # elif defined(__sparc__)
129 static inline void
130 initcontext(void)
134 static inline void
135 fixcontext(ucontext_t *c)
137 /* ??? Using
138 register unsigned long thread __asm__("%g7");
139 c->uc_mcontext.gregs[REG_G7] = thread;
140 results in
141 error: variable ‘thread’ might be clobbered by \
142 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
143 which ought to be false, as %g7 is a fixed register. */
145 if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8)
146 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
147 else
148 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
151 # else
153 # error unknown case for SETCONTEXT_CLOBBERS_TLS
155 # endif
157 #endif
159 // ucontext_arg returns a properly aligned ucontext_t value. On some
160 // systems a ucontext_t value must be aligned to a 16-byte boundary.
161 // The g structure that has fields of type ucontext_t is defined in
162 // Go, and Go has no simple way to align a field to such a boundary.
163 // So we make the field larger in runtime2.go and pick an appropriate
164 // offset within the field here.
165 static ucontext_t*
166 ucontext_arg(void** go_ucontext)
168 uintptr_t p = (uintptr_t)go_ucontext;
169 size_t align = __alignof__(ucontext_t);
170 if(align > 16) {
171 // We only ensured space for up to a 16 byte alignment
172 // in libgo/go/runtime/runtime2.go.
173 runtime_throw("required alignment of ucontext_t too large");
175 p = (p + align - 1) &~ (uintptr_t)(align - 1);
176 return (ucontext_t*)p;
179 // We can not always refer to the TLS variables directly. The
180 // compiler will call tls_get_addr to get the address of the variable,
181 // and it may hold it in a register across a call to schedule. When
182 // we get back from the call we may be running in a different thread,
183 // in which case the register now points to the TLS variable for a
184 // different thread. We use non-inlinable functions to avoid this
185 // when necessary.
187 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
190 runtime_g(void)
192 return g;
195 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
198 runtime_m(void)
200 if(g == nil)
201 return nil;
202 return g->m;
205 // Set g.
206 void
207 runtime_setg(G* gp)
209 g = gp;
212 // Start a new thread.
213 static void
214 runtime_newosproc(M *mp)
216 pthread_attr_t attr;
217 sigset_t clear, old;
218 pthread_t tid;
219 int ret;
221 if(pthread_attr_init(&attr) != 0)
222 runtime_throw("pthread_attr_init");
223 if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
224 runtime_throw("pthread_attr_setdetachstate");
226 // Block signals during pthread_create so that the new thread
227 // starts with signals disabled. It will enable them in minit.
228 sigfillset(&clear);
230 #ifdef SIGTRAP
231 // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
232 sigdelset(&clear, SIGTRAP);
233 #endif
235 sigemptyset(&old);
236 pthread_sigmask(SIG_BLOCK, &clear, &old);
237 ret = pthread_create(&tid, &attr, runtime_mstart, mp);
238 pthread_sigmask(SIG_SETMASK, &old, nil);
240 if (ret != 0)
241 runtime_throw("pthread_create");
244 // First function run by a new goroutine. This replaces gogocall.
245 static void
246 kickoff(void)
248 void (*fn)(void*);
249 void *param;
251 if(g->traceback != nil)
252 gtraceback(g);
254 fn = (void (*)(void*))(g->entry);
255 param = g->param;
256 g->param = nil;
257 fn(param);
258 runtime_goexit1();
261 // Switch context to a different goroutine. This is like longjmp.
262 void runtime_gogo(G*) __attribute__ ((noinline));
263 void
264 runtime_gogo(G* newg)
266 #ifdef USING_SPLIT_STACK
267 __splitstack_setcontext(&newg->stackcontext[0]);
268 #endif
269 g = newg;
270 newg->fromgogo = true;
271 fixcontext(ucontext_arg(&newg->context[0]));
272 setcontext(ucontext_arg(&newg->context[0]));
273 runtime_throw("gogo setcontext returned");
276 // Save context and call fn passing g as a parameter. This is like
277 // setjmp. Because getcontext always returns 0, unlike setjmp, we use
278 // g->fromgogo as a code. It will be true if we got here via
279 // setcontext. g == nil the first time this is called in a new m.
280 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
281 void
282 runtime_mcall(void (*pfn)(G*))
284 M *mp;
285 G *gp;
286 #ifndef USING_SPLIT_STACK
287 void *afterregs;
288 #endif
290 // Ensure that all registers are on the stack for the garbage
291 // collector.
292 __builtin_unwind_init();
294 gp = g;
295 mp = gp->m;
296 if(gp == mp->g0)
297 runtime_throw("runtime: mcall called on m->g0 stack");
299 if(gp != nil) {
301 #ifdef USING_SPLIT_STACK
302 __splitstack_getcontext(&g->stackcontext[0]);
303 #else
304 // We have to point to an address on the stack that is
305 // below the saved registers.
306 gp->gcnextsp = &afterregs;
307 #endif
308 gp->fromgogo = false;
309 getcontext(ucontext_arg(&gp->context[0]));
311 // When we return from getcontext, we may be running
312 // in a new thread. That means that g may have
313 // changed. It is a global variables so we will
314 // reload it, but the address of g may be cached in
315 // our local stack frame, and that address may be
316 // wrong. Call the function to reload the value for
317 // this thread.
318 gp = runtime_g();
319 mp = gp->m;
321 if(gp->traceback != nil)
322 gtraceback(gp);
324 if (gp == nil || !gp->fromgogo) {
325 #ifdef USING_SPLIT_STACK
326 __splitstack_setcontext(&mp->g0->stackcontext[0]);
327 #endif
328 mp->g0->entry = (byte*)pfn;
329 mp->g0->param = gp;
331 // It's OK to set g directly here because this case
332 // can not occur if we got here via a setcontext to
333 // the getcontext call just above.
334 g = mp->g0;
336 fixcontext(ucontext_arg(&mp->g0->context[0]));
337 setcontext(ucontext_arg(&mp->g0->context[0]));
338 runtime_throw("runtime: mcall function returned");
342 // Goroutine scheduler
343 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
345 // The main concepts are:
346 // G - goroutine.
347 // M - worker thread, or machine.
348 // P - processor, a resource that is required to execute Go code.
349 // M must have an associated P to execute Go code, however it can be
350 // blocked or in a syscall w/o an associated P.
352 // Design doc at http://golang.org/s/go11sched.
354 enum
356 // Number of goroutine ids to grab from runtime_sched->goidgen to local per-P cache at once.
357 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
358 GoidCacheBatch = 16,
361 extern Sched* runtime_getsched() __asm__ (GOSYM_PREFIX "runtime.getsched");
362 extern bool* runtime_getCgoHasExtraM()
363 __asm__ (GOSYM_PREFIX "runtime.getCgoHasExtraM");
364 extern P** runtime_getAllP()
365 __asm__ (GOSYM_PREFIX "runtime.getAllP");
366 extern G* allocg(void)
367 __asm__ (GOSYM_PREFIX "runtime.allocg");
369 Sched* runtime_sched;
370 int32 runtime_gomaxprocs;
371 M runtime_m0;
372 G runtime_g0; // idle goroutine for m0
373 G* runtime_lastg;
374 M* runtime_allm;
375 P** runtime_allp;
376 int8* runtime_goos;
377 int32 runtime_ncpu;
378 bool runtime_precisestack;
379 static int32 newprocs;
381 bool runtime_isarchive;
383 void* runtime_mstart(void*);
384 static void runqput(P*, G*);
385 static G* runqget(P*);
386 static bool runqputslow(P*, G*, uint32, uint32);
387 static G* runqsteal(P*, P*);
388 static void mput(M*);
389 static M* mget(void);
390 static void mcommoninit(M*);
391 static void schedule(void);
392 static void procresize(int32);
393 static void acquirep(P*);
394 static P* releasep(void);
395 static void newm(void(*)(void), P*);
396 static void stopm(void);
397 static void startm(P*, bool);
398 static void handoffp(P*);
399 static void wakep(void);
400 static void stoplockedm(void);
401 static void startlockedm(G*);
402 static void sysmon(void);
403 static uint32 retake(int64);
404 static void incidlelocked(int32);
405 static void exitsyscall0(G*);
406 static void park0(G*);
407 static void goexit0(G*);
408 static void gfput(P*, G*);
409 static G* gfget(P*);
410 static void gfpurge(P*);
411 static void globrunqput(G*);
412 static void globrunqputbatch(G*, G*, int32);
413 static G* globrunqget(P*, int32);
414 static P* pidleget(void);
415 static void pidleput(P*);
416 static void injectglist(G*);
417 static bool preemptall(void);
418 static bool exitsyscallfast(void);
420 void allgadd(G*)
421 __asm__(GOSYM_PREFIX "runtime.allgadd");
422 void checkdead(void)
423 __asm__(GOSYM_PREFIX "runtime.checkdead");
425 bool runtime_isstarted;
427 // The bootstrap sequence is:
429 // call osinit
430 // call schedinit
431 // make & queue new G
432 // call runtime_mstart
434 // The new G calls runtime_main.
435 void
436 runtime_schedinit(void)
438 M *m;
439 int32 n, procs;
440 String s;
441 const byte *p;
442 Eface i;
444 runtime_sched = runtime_getsched();
446 m = &runtime_m0;
447 g = &runtime_g0;
448 m->g0 = g;
449 m->curg = g;
450 g->m = m;
452 initcontext();
454 runtime_sched->maxmcount = 10000;
455 runtime_precisestack = 0;
457 // runtime_symtabinit();
458 runtime_mallocinit();
459 mcommoninit(m);
460 runtime_alginit(); // maps must not be used before this call
462 // Initialize the itable value for newErrorCString,
463 // so that the next time it gets called, possibly
464 // in a fault during a garbage collection, it will not
465 // need to allocated memory.
466 runtime_newErrorCString(0, &i);
468 // Initialize the cached gotraceback value, since
469 // gotraceback calls getenv, which mallocs on Plan 9.
470 runtime_gotraceback(nil);
472 runtime_goargs();
473 runtime_goenvs();
474 runtime_parsedebugvars();
476 runtime_sched->lastpoll = runtime_nanotime();
477 procs = 1;
478 s = runtime_getenv("GOMAXPROCS");
479 p = s.str;
480 if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
481 if(n > _MaxGomaxprocs)
482 n = _MaxGomaxprocs;
483 procs = n;
485 runtime_allp = runtime_getAllP();
486 procresize(procs);
488 // Can not enable GC until all roots are registered.
489 // mstats()->enablegc = 1;
492 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
493 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
495 // Used to determine the field alignment.
497 struct field_align
499 char c;
500 Hchan *p;
503 static void
504 initDone(void *arg __attribute__ ((unused))) {
505 runtime_unlockOSThread();
508 // The main goroutine.
509 // Note: C frames in general are not copyable during stack growth, for two reasons:
510 // 1) We don't know where in a frame to find pointers to other stack locations.
511 // 2) There's no guarantee that globals or heap values do not point into the frame.
513 // The C frame for runtime.main is copyable, because:
514 // 1) There are no pointers to other stack locations in the frame
515 // (d.fn points at a global, d.link is nil, d.argp is -1).
516 // 2) The only pointer into this frame is from the defer chain,
517 // which is explicitly handled during stack copying.
518 void
519 runtime_main(void* dummy __attribute__((unused)))
521 Defer d;
522 _Bool frame;
524 newm(sysmon, nil);
526 // Lock the main goroutine onto this, the main OS thread,
527 // during initialization. Most programs won't care, but a few
528 // do require certain calls to be made by the main thread.
529 // Those can arrange for main.main to run in the main thread
530 // by calling runtime.LockOSThread during initialization
531 // to preserve the lock.
532 runtime_lockOSThread();
534 // Defer unlock so that runtime.Goexit during init does the unlock too.
535 d.pfn = (uintptr)(void*)initDone;
536 d.link = g->_defer;
537 d.arg = (void*)-1;
538 d._panic = g->_panic;
539 d.retaddr = 0;
540 d.makefunccanrecover = 0;
541 d.frame = &frame;
542 d.special = true;
543 g->_defer = &d;
545 if(g->m != &runtime_m0)
546 runtime_throw("runtime_main not on m0");
547 __go_go(runtime_MHeap_Scavenger, nil);
549 makeMainInitDone();
551 _cgo_notify_runtime_init_done();
553 main_init();
555 closeMainInitDone();
557 if(g->_defer != &d || (void*)d.pfn != initDone)
558 runtime_throw("runtime: bad defer entry after init");
559 g->_defer = d.link;
560 runtime_unlockOSThread();
562 // For gccgo we have to wait until after main is initialized
563 // to enable GC, because initializing main registers the GC
564 // roots.
565 mstats()->enablegc = 1;
567 if(runtime_isarchive) {
568 // This is not a complete program, but is instead a
569 // library built using -buildmode=c-archive or
570 // c-shared. Now that we are initialized, there is
571 // nothing further to do.
572 return;
575 main_main();
577 // Make racy client program work: if panicking on
578 // another goroutine at the same time as main returns,
579 // let the other goroutine finish printing the panic trace.
580 // Once it does, it will exit. See issue 3934.
581 if(runtime_panicking())
582 runtime_park(nil, nil, "panicwait");
584 runtime_exit(0);
585 for(;;)
586 *(int32*)0 = 0;
589 void getTraceback(G*, G*) __asm__(GOSYM_PREFIX "runtime.getTraceback");
591 // getTraceback stores a traceback of gp in the g's traceback field
592 // and then returns to me. We expect that gp's traceback is not nil.
593 // It works by saving me's current context, and checking gp's traceback field.
594 // If gp's traceback field is not nil, it starts running gp.
595 // In places where we call getcontext, we check the traceback field.
596 // If it is not nil, we collect a traceback, and then return to the
597 // goroutine stored in the traceback field, which is me.
598 void getTraceback(G* me, G* gp)
600 #ifdef USING_SPLIT_STACK
601 __splitstack_getcontext(&me->stackcontext[0]);
602 #endif
603 getcontext(ucontext_arg(&me->stackcontext[0]));
605 if (gp->traceback != nil) {
606 runtime_gogo(gp);
610 static void
611 checkmcount(void)
613 // sched lock is held
614 if(runtime_sched->mcount > runtime_sched->maxmcount) {
615 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched->maxmcount);
616 runtime_throw("thread exhaustion");
620 // Do a stack trace of gp, and then restore the context to
621 // gp->dotraceback.
623 static void
624 gtraceback(G* gp)
626 Traceback* traceback;
628 traceback = gp->traceback;
629 gp->traceback = nil;
630 if(gp->m != nil)
631 runtime_throw("gtraceback: m is not nil");
632 gp->m = traceback->gp->m;
633 traceback->c = runtime_callers(1, traceback->locbuf,
634 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
635 gp->m = nil;
636 runtime_gogo(traceback->gp);
639 static void
640 mcommoninit(M *mp)
642 // If there is no mcache runtime_callers() will crash,
643 // and we are most likely in sysmon thread so the stack is senseless anyway.
644 if(g->m->mcache)
645 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
647 mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
649 runtime_lock(&runtime_sched->lock);
650 mp->id = runtime_sched->mcount++;
651 checkmcount();
652 runtime_mpreinit(mp);
654 // Add to runtime_allm so garbage collector doesn't free m
655 // when it is just in a register or thread-local storage.
656 mp->alllink = runtime_allm;
657 // runtime_NumCgoCall() iterates over allm w/o schedlock,
658 // so we need to publish it safely.
659 runtime_atomicstorep(&runtime_allm, mp);
660 runtime_unlock(&runtime_sched->lock);
663 // Mark gp ready to run.
664 void
665 runtime_ready(G *gp)
667 // Mark runnable.
668 g->m->locks++; // disable preemption because it can be holding p in a local var
669 if(gp->atomicstatus != _Gwaiting) {
670 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus);
671 runtime_throw("bad g->atomicstatus in ready");
673 gp->atomicstatus = _Grunnable;
674 runqput((P*)g->m->p, gp);
675 if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0) // TODO: fast atomic
676 wakep();
677 g->m->locks--;
680 void goready(G*, int) __asm__ (GOSYM_PREFIX "runtime.goready");
682 void
683 goready(G* gp, int traceskip __attribute__ ((unused)))
685 runtime_ready(gp);
688 int32
689 runtime_gcprocs(void)
691 int32 n;
693 // Figure out how many CPUs to use during GC.
694 // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
695 runtime_lock(&runtime_sched->lock);
696 n = runtime_gomaxprocs;
697 if(n > runtime_ncpu)
698 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
699 if(n > MaxGcproc)
700 n = MaxGcproc;
701 if(n > runtime_sched->nmidle+1) // one M is currently running
702 n = runtime_sched->nmidle+1;
703 runtime_unlock(&runtime_sched->lock);
704 return n;
707 static bool
708 needaddgcproc(void)
710 int32 n;
712 runtime_lock(&runtime_sched->lock);
713 n = runtime_gomaxprocs;
714 if(n > runtime_ncpu)
715 n = runtime_ncpu;
716 if(n > MaxGcproc)
717 n = MaxGcproc;
718 n -= runtime_sched->nmidle+1; // one M is currently running
719 runtime_unlock(&runtime_sched->lock);
720 return n > 0;
723 void
724 runtime_helpgc(int32 nproc)
726 M *mp;
727 int32 n, pos;
729 runtime_lock(&runtime_sched->lock);
730 pos = 0;
731 for(n = 1; n < nproc; n++) { // one M is currently running
732 if(runtime_allp[pos]->mcache == g->m->mcache)
733 pos++;
734 mp = mget();
735 if(mp == nil)
736 runtime_throw("runtime_gcprocs inconsistency");
737 mp->helpgc = n;
738 mp->mcache = runtime_allp[pos]->mcache;
739 pos++;
740 runtime_notewakeup(&mp->park);
742 runtime_unlock(&runtime_sched->lock);
745 // Similar to stoptheworld but best-effort and can be called several times.
746 // There is no reverse operation, used during crashing.
747 // This function must not lock any mutexes.
748 void
749 runtime_freezetheworld(void)
751 int32 i;
753 if(runtime_gomaxprocs == 1)
754 return;
755 // stopwait and preemption requests can be lost
756 // due to races with concurrently executing threads,
757 // so try several times
758 for(i = 0; i < 5; i++) {
759 // this should tell the scheduler to not start any new goroutines
760 runtime_sched->stopwait = 0x7fffffff;
761 runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
762 // this should stop running goroutines
763 if(!preemptall())
764 break; // no running goroutines
765 runtime_usleep(1000);
767 // to be sure
768 runtime_usleep(1000);
769 preemptall();
770 runtime_usleep(1000);
773 void
774 runtime_stopTheWorldWithSema(void)
776 int32 i;
777 uint32 s;
778 P *p;
779 bool wait;
781 runtime_lock(&runtime_sched->lock);
782 runtime_sched->stopwait = runtime_gomaxprocs;
783 runtime_atomicstore((uint32*)&runtime_sched->gcwaiting, 1);
784 preemptall();
785 // stop current P
786 ((P*)g->m->p)->status = _Pgcstop;
787 runtime_sched->stopwait--;
788 // try to retake all P's in _Psyscall status
789 for(i = 0; i < runtime_gomaxprocs; i++) {
790 p = runtime_allp[i];
791 s = p->status;
792 if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
793 runtime_sched->stopwait--;
795 // stop idle P's
796 while((p = pidleget()) != nil) {
797 p->status = _Pgcstop;
798 runtime_sched->stopwait--;
800 wait = runtime_sched->stopwait > 0;
801 runtime_unlock(&runtime_sched->lock);
803 // wait for remaining P's to stop voluntarily
804 if(wait) {
805 runtime_notesleep(&runtime_sched->stopnote);
806 runtime_noteclear(&runtime_sched->stopnote);
808 if(runtime_sched->stopwait)
809 runtime_throw("stoptheworld: not stopped");
810 for(i = 0; i < runtime_gomaxprocs; i++) {
811 p = runtime_allp[i];
812 if(p->status != _Pgcstop)
813 runtime_throw("stoptheworld: not stopped");
817 static void
818 mhelpgc(void)
820 g->m->helpgc = -1;
823 void
824 runtime_startTheWorldWithSema(void)
826 P *p, *p1;
827 M *mp;
828 G *gp;
829 bool add;
831 g->m->locks++; // disable preemption because it can be holding p in a local var
832 gp = runtime_netpoll(false); // non-blocking
833 injectglist(gp);
834 add = needaddgcproc();
835 runtime_lock(&runtime_sched->lock);
836 if(newprocs) {
837 procresize(newprocs);
838 newprocs = 0;
839 } else
840 procresize(runtime_gomaxprocs);
841 runtime_sched->gcwaiting = 0;
843 p1 = nil;
844 while((p = pidleget()) != nil) {
845 // procresize() puts p's with work at the beginning of the list.
846 // Once we reach a p without a run queue, the rest don't have one either.
847 if(p->runqhead == p->runqtail) {
848 pidleput(p);
849 break;
851 p->m = (uintptr)mget();
852 p->link = (uintptr)p1;
853 p1 = p;
855 if(runtime_sched->sysmonwait) {
856 runtime_sched->sysmonwait = false;
857 runtime_notewakeup(&runtime_sched->sysmonnote);
859 runtime_unlock(&runtime_sched->lock);
861 while(p1) {
862 p = p1;
863 p1 = (P*)p1->link;
864 if(p->m) {
865 mp = (M*)p->m;
866 p->m = 0;
867 if(mp->nextp)
868 runtime_throw("startTheWorldWithSema: inconsistent mp->nextp");
869 mp->nextp = (uintptr)p;
870 runtime_notewakeup(&mp->park);
871 } else {
872 // Start M to run P. Do not start another M below.
873 newm(nil, p);
874 add = false;
878 if(add) {
879 // If GC could have used another helper proc, start one now,
880 // in the hope that it will be available next time.
881 // It would have been even better to start it before the collection,
882 // but doing so requires allocating memory, so it's tricky to
883 // coordinate. This lazy approach works out in practice:
884 // we don't mind if the first couple gc rounds don't have quite
885 // the maximum number of procs.
886 newm(mhelpgc, nil);
888 g->m->locks--;
891 // Called to start an M.
892 void*
893 runtime_mstart(void* mp)
895 M *m;
897 m = (M*)mp;
898 g = m->g0;
899 g->m = m;
901 initcontext();
903 g->entry = nil;
904 g->param = nil;
906 // Record top of stack for use by mcall.
907 // Once we call schedule we're never coming back,
908 // so other calls can reuse this stack space.
909 #ifdef USING_SPLIT_STACK
910 __splitstack_getcontext(&g->stackcontext[0]);
911 #else
912 g->gcinitialsp = &mp;
913 // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
914 // is the top of the stack, not the bottom.
915 g->gcstacksize = 0;
916 g->gcnextsp = &mp;
917 #endif
918 getcontext(ucontext_arg(&g->context[0]));
920 if(g->entry != nil) {
921 // Got here from mcall.
922 void (*pfn)(G*) = (void (*)(G*))g->entry;
923 G* gp = (G*)g->param;
924 pfn(gp);
925 *(int*)0x21 = 0x21;
927 runtime_minit();
929 #ifdef USING_SPLIT_STACK
931 int dont_block_signals = 0;
932 __splitstack_block_signals(&dont_block_signals, nil);
934 #endif
936 // Install signal handlers; after minit so that minit can
937 // prepare the thread to be able to handle the signals.
938 if(m == &runtime_m0) {
939 if(runtime_iscgo) {
940 bool* cgoHasExtraM = runtime_getCgoHasExtraM();
941 if(!*cgoHasExtraM) {
942 *cgoHasExtraM = true;
943 runtime_newextram();
946 runtime_initsig(false);
949 if(m->mstartfn)
950 ((void (*)(void))m->mstartfn)();
952 if(m->helpgc) {
953 m->helpgc = 0;
954 stopm();
955 } else if(m != &runtime_m0) {
956 acquirep((P*)m->nextp);
957 m->nextp = 0;
959 schedule();
961 // TODO(brainman): This point is never reached, because scheduler
962 // does not release os threads at the moment. But once this path
963 // is enabled, we must remove our seh here.
965 return nil;
968 typedef struct CgoThreadStart CgoThreadStart;
969 struct CgoThreadStart
971 M *m;
972 G *g;
973 uintptr *tls;
974 void (*fn)(void);
977 M* runtime_allocm(P*, bool, byte**, uintptr*)
978 __asm__(GOSYM_PREFIX "runtime.allocm");
980 // Allocate a new m unassociated with any thread.
981 // Can use p for allocation context if needed.
983 runtime_allocm(P *p, bool allocatestack, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
985 M *mp;
987 g->m->locks++; // disable GC because it can be called from sysmon
988 if(g->m->p == 0)
989 acquirep(p); // temporarily borrow p for mallocs in this function
990 #if 0
991 if(mtype == nil) {
992 Eface e;
993 runtime_gc_m_ptr(&e);
994 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
996 #endif
998 mp = runtime_mal(sizeof *mp);
999 mcommoninit(mp);
1000 mp->g0 = runtime_malg(allocatestack, false, ret_g0_stack, ret_g0_stacksize);
1001 mp->g0->m = mp;
1003 if(p == (P*)g->m->p)
1004 releasep();
1005 g->m->locks--;
1007 return mp;
1010 void setGContext(void) __asm__ (GOSYM_PREFIX "runtime.setGContext");
1012 // setGContext sets up a new goroutine context for the current g.
1013 void
1014 setGContext()
1016 int val;
1018 initcontext();
1019 g->entry = nil;
1020 g->param = nil;
1021 #ifdef USING_SPLIT_STACK
1022 __splitstack_getcontext(&g->stackcontext[0]);
1023 val = 0;
1024 __splitstack_block_signals(&val, nil);
1025 #else
1026 g->gcinitialsp = &val;
1027 g->gcstack = nil;
1028 g->gcstacksize = 0;
1029 g->gcnextsp = &val;
1030 #endif
1031 getcontext(ucontext_arg(&g->context[0]));
1033 if(g->entry != nil) {
1034 // Got here from mcall.
1035 void (*pfn)(G*) = (void (*)(G*))g->entry;
1036 G* gp = (G*)g->param;
1037 pfn(gp);
1038 *(int*)0x22 = 0x22;
1042 void makeGContext(G*, byte*, uintptr)
1043 __asm__(GOSYM_PREFIX "runtime.makeGContext");
1045 // makeGContext makes a new context for a g.
1046 void
1047 makeGContext(G* gp, byte* sp, uintptr spsize) {
1048 ucontext_t *uc;
1050 uc = ucontext_arg(&gp->context[0]);
1051 getcontext(uc);
1052 uc->uc_stack.ss_sp = sp;
1053 uc->uc_stack.ss_size = (size_t)spsize;
1054 makecontext(uc, kickoff, 0);
1057 // Create a new m. It will start off with a call to fn, or else the scheduler.
1058 static void
1059 newm(void(*fn)(void), P *p)
1061 M *mp;
1063 mp = runtime_allocm(p, false, nil, nil);
1064 mp->nextp = (uintptr)p;
1065 mp->mstartfn = (uintptr)(void*)fn;
1067 runtime_newosproc(mp);
1070 // Stops execution of the current m until new work is available.
1071 // Returns with acquired P.
1072 static void
1073 stopm(void)
1075 M* m;
1077 m = g->m;
1078 if(m->locks)
1079 runtime_throw("stopm holding locks");
1080 if(m->p)
1081 runtime_throw("stopm holding p");
1082 if(m->spinning) {
1083 m->spinning = false;
1084 runtime_xadd(&runtime_sched->nmspinning, -1);
1087 retry:
1088 runtime_lock(&runtime_sched->lock);
1089 mput(m);
1090 runtime_unlock(&runtime_sched->lock);
1091 runtime_notesleep(&m->park);
1092 m = g->m;
1093 runtime_noteclear(&m->park);
1094 if(m->helpgc) {
1095 runtime_gchelper();
1096 m->helpgc = 0;
1097 m->mcache = nil;
1098 goto retry;
1100 acquirep((P*)m->nextp);
1101 m->nextp = 0;
1104 static void
1105 mspinning(void)
1107 g->m->spinning = true;
1110 // Schedules some M to run the p (creates an M if necessary).
1111 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1112 static void
1113 startm(P *p, bool spinning)
1115 M *mp;
1116 void (*fn)(void);
1118 runtime_lock(&runtime_sched->lock);
1119 if(p == nil) {
1120 p = pidleget();
1121 if(p == nil) {
1122 runtime_unlock(&runtime_sched->lock);
1123 if(spinning)
1124 runtime_xadd(&runtime_sched->nmspinning, -1);
1125 return;
1128 mp = mget();
1129 runtime_unlock(&runtime_sched->lock);
1130 if(mp == nil) {
1131 fn = nil;
1132 if(spinning)
1133 fn = mspinning;
1134 newm(fn, p);
1135 return;
1137 if(mp->spinning)
1138 runtime_throw("startm: m is spinning");
1139 if(mp->nextp)
1140 runtime_throw("startm: m has p");
1141 mp->spinning = spinning;
1142 mp->nextp = (uintptr)p;
1143 runtime_notewakeup(&mp->park);
1146 // Hands off P from syscall or locked M.
1147 static void
1148 handoffp(P *p)
1150 // if it has local work, start it straight away
1151 if(p->runqhead != p->runqtail || runtime_sched->runqsize) {
1152 startm(p, false);
1153 return;
1155 // no local work, check that there are no spinning/idle M's,
1156 // otherwise our help is not required
1157 if(runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) == 0 && // TODO: fast atomic
1158 runtime_cas(&runtime_sched->nmspinning, 0, 1)) {
1159 startm(p, true);
1160 return;
1162 runtime_lock(&runtime_sched->lock);
1163 if(runtime_sched->gcwaiting) {
1164 p->status = _Pgcstop;
1165 if(--runtime_sched->stopwait == 0)
1166 runtime_notewakeup(&runtime_sched->stopnote);
1167 runtime_unlock(&runtime_sched->lock);
1168 return;
1170 if(runtime_sched->runqsize) {
1171 runtime_unlock(&runtime_sched->lock);
1172 startm(p, false);
1173 return;
1175 // If this is the last running P and nobody is polling network,
1176 // need to wakeup another M to poll network.
1177 if(runtime_sched->npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched->lastpoll) != 0) {
1178 runtime_unlock(&runtime_sched->lock);
1179 startm(p, false);
1180 return;
1182 pidleput(p);
1183 runtime_unlock(&runtime_sched->lock);
1186 // Tries to add one more P to execute G's.
1187 // Called when a G is made runnable (newproc, ready).
1188 static void
1189 wakep(void)
1191 // be conservative about spinning threads
1192 if(!runtime_cas(&runtime_sched->nmspinning, 0, 1))
1193 return;
1194 startm(nil, true);
1197 // Stops execution of the current m that is locked to a g until the g is runnable again.
1198 // Returns with acquired P.
1199 static void
1200 stoplockedm(void)
1202 M *m;
1203 P *p;
1205 m = g->m;
1206 if(m->lockedg == nil || m->lockedg->lockedm != m)
1207 runtime_throw("stoplockedm: inconsistent locking");
1208 if(m->p) {
1209 // Schedule another M to run this p.
1210 p = releasep();
1211 handoffp(p);
1213 incidlelocked(1);
1214 // Wait until another thread schedules lockedg again.
1215 runtime_notesleep(&m->park);
1216 m = g->m;
1217 runtime_noteclear(&m->park);
1218 if(m->lockedg->atomicstatus != _Grunnable)
1219 runtime_throw("stoplockedm: not runnable");
1220 acquirep((P*)m->nextp);
1221 m->nextp = 0;
1224 // Schedules the locked m to run the locked gp.
1225 static void
1226 startlockedm(G *gp)
1228 M *mp;
1229 P *p;
1231 mp = gp->lockedm;
1232 if(mp == g->m)
1233 runtime_throw("startlockedm: locked to me");
1234 if(mp->nextp)
1235 runtime_throw("startlockedm: m has p");
1236 // directly handoff current P to the locked m
1237 incidlelocked(-1);
1238 p = releasep();
1239 mp->nextp = (uintptr)p;
1240 runtime_notewakeup(&mp->park);
1241 stopm();
1244 // Stops the current m for stoptheworld.
1245 // Returns when the world is restarted.
1246 static void
1247 gcstopm(void)
1249 P *p;
1251 if(!runtime_sched->gcwaiting)
1252 runtime_throw("gcstopm: not waiting for gc");
1253 if(g->m->spinning) {
1254 g->m->spinning = false;
1255 runtime_xadd(&runtime_sched->nmspinning, -1);
1257 p = releasep();
1258 runtime_lock(&runtime_sched->lock);
1259 p->status = _Pgcstop;
1260 if(--runtime_sched->stopwait == 0)
1261 runtime_notewakeup(&runtime_sched->stopnote);
1262 runtime_unlock(&runtime_sched->lock);
1263 stopm();
1266 // Schedules gp to run on the current M.
1267 // Never returns.
1268 static void
1269 execute(G *gp)
1271 int32 hz;
1273 if(gp->atomicstatus != _Grunnable) {
1274 runtime_printf("execute: bad g status %d\n", gp->atomicstatus);
1275 runtime_throw("execute: bad g status");
1277 gp->atomicstatus = _Grunning;
1278 gp->waitsince = 0;
1279 ((P*)g->m->p)->schedtick++;
1280 g->m->curg = gp;
1281 gp->m = g->m;
1283 // Check whether the profiler needs to be turned on or off.
1284 hz = runtime_sched->profilehz;
1285 if(g->m->profilehz != hz)
1286 runtime_resetcpuprofiler(hz);
1288 runtime_gogo(gp);
1291 // Finds a runnable goroutine to execute.
1292 // Tries to steal from other P's, get g from global queue, poll network.
1293 static G*
1294 findrunnable(void)
1296 G *gp;
1297 P *p;
1298 int32 i;
1300 top:
1301 if(runtime_sched->gcwaiting) {
1302 gcstopm();
1303 goto top;
1305 if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1306 runtime_ready(gp);
1307 // local runq
1308 gp = runqget((P*)g->m->p);
1309 if(gp)
1310 return gp;
1311 // global runq
1312 if(runtime_sched->runqsize) {
1313 runtime_lock(&runtime_sched->lock);
1314 gp = globrunqget((P*)g->m->p, 0);
1315 runtime_unlock(&runtime_sched->lock);
1316 if(gp)
1317 return gp;
1319 // poll network
1320 gp = runtime_netpoll(false); // non-blocking
1321 if(gp) {
1322 injectglist((G*)gp->schedlink);
1323 gp->atomicstatus = _Grunnable;
1324 return gp;
1326 // If number of spinning M's >= number of busy P's, block.
1327 // This is necessary to prevent excessive CPU consumption
1328 // when GOMAXPROCS>>1 but the program parallelism is low.
1329 if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched->nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched->npidle)) // TODO: fast atomic
1330 goto stop;
1331 if(!g->m->spinning) {
1332 g->m->spinning = true;
1333 runtime_xadd(&runtime_sched->nmspinning, 1);
1335 // random steal from other P's
1336 for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1337 if(runtime_sched->gcwaiting)
1338 goto top;
1339 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1340 if(p == (P*)g->m->p)
1341 gp = runqget(p);
1342 else
1343 gp = runqsteal((P*)g->m->p, p);
1344 if(gp)
1345 return gp;
1347 stop:
1348 // return P and block
1349 runtime_lock(&runtime_sched->lock);
1350 if(runtime_sched->gcwaiting) {
1351 runtime_unlock(&runtime_sched->lock);
1352 goto top;
1354 if(runtime_sched->runqsize) {
1355 gp = globrunqget((P*)g->m->p, 0);
1356 runtime_unlock(&runtime_sched->lock);
1357 return gp;
1359 p = releasep();
1360 pidleput(p);
1361 runtime_unlock(&runtime_sched->lock);
1362 if(g->m->spinning) {
1363 g->m->spinning = false;
1364 runtime_xadd(&runtime_sched->nmspinning, -1);
1366 // check all runqueues once again
1367 for(i = 0; i < runtime_gomaxprocs; i++) {
1368 p = runtime_allp[i];
1369 if(p && p->runqhead != p->runqtail) {
1370 runtime_lock(&runtime_sched->lock);
1371 p = pidleget();
1372 runtime_unlock(&runtime_sched->lock);
1373 if(p) {
1374 acquirep(p);
1375 goto top;
1377 break;
1380 // poll network
1381 if(runtime_xchg64(&runtime_sched->lastpoll, 0) != 0) {
1382 if(g->m->p)
1383 runtime_throw("findrunnable: netpoll with p");
1384 if(g->m->spinning)
1385 runtime_throw("findrunnable: netpoll with spinning");
1386 gp = runtime_netpoll(true); // block until new work is available
1387 runtime_atomicstore64(&runtime_sched->lastpoll, runtime_nanotime());
1388 if(gp) {
1389 runtime_lock(&runtime_sched->lock);
1390 p = pidleget();
1391 runtime_unlock(&runtime_sched->lock);
1392 if(p) {
1393 acquirep(p);
1394 injectglist((G*)gp->schedlink);
1395 gp->atomicstatus = _Grunnable;
1396 return gp;
1398 injectglist(gp);
1401 stopm();
1402 goto top;
1405 static void
1406 resetspinning(void)
1408 int32 nmspinning;
1410 if(g->m->spinning) {
1411 g->m->spinning = false;
1412 nmspinning = runtime_xadd(&runtime_sched->nmspinning, -1);
1413 if(nmspinning < 0)
1414 runtime_throw("findrunnable: negative nmspinning");
1415 } else
1416 nmspinning = runtime_atomicload(&runtime_sched->nmspinning);
1418 // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1419 // so see if we need to wakeup another P here.
1420 if (nmspinning == 0 && runtime_atomicload(&runtime_sched->npidle) > 0)
1421 wakep();
1424 // Injects the list of runnable G's into the scheduler.
1425 // Can run concurrently with GC.
1426 static void
1427 injectglist(G *glist)
1429 int32 n;
1430 G *gp;
1432 if(glist == nil)
1433 return;
1434 runtime_lock(&runtime_sched->lock);
1435 for(n = 0; glist; n++) {
1436 gp = glist;
1437 glist = (G*)gp->schedlink;
1438 gp->atomicstatus = _Grunnable;
1439 globrunqput(gp);
1441 runtime_unlock(&runtime_sched->lock);
1443 for(; n && runtime_sched->npidle; n--)
1444 startm(nil, false);
1447 // One round of scheduler: find a runnable goroutine and execute it.
1448 // Never returns.
1449 static void
1450 schedule(void)
1452 G *gp;
1453 uint32 tick;
1455 if(g->m->locks)
1456 runtime_throw("schedule: holding locks");
1458 top:
1459 if(runtime_sched->gcwaiting) {
1460 gcstopm();
1461 goto top;
1464 gp = nil;
1465 // Check the global runnable queue once in a while to ensure fairness.
1466 // Otherwise two goroutines can completely occupy the local runqueue
1467 // by constantly respawning each other.
1468 tick = ((P*)g->m->p)->schedtick;
1469 // This is a fancy way to say tick%61==0,
1470 // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1471 if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched->runqsize > 0) {
1472 runtime_lock(&runtime_sched->lock);
1473 gp = globrunqget((P*)g->m->p, 1);
1474 runtime_unlock(&runtime_sched->lock);
1475 if(gp)
1476 resetspinning();
1478 if(gp == nil) {
1479 gp = runqget((P*)g->m->p);
1480 if(gp && g->m->spinning)
1481 runtime_throw("schedule: spinning with local work");
1483 if(gp == nil) {
1484 gp = findrunnable(); // blocks until work is available
1485 resetspinning();
1488 if(gp->lockedm) {
1489 // Hands off own p to the locked m,
1490 // then blocks waiting for a new p.
1491 startlockedm(gp);
1492 goto top;
1495 execute(gp);
1498 // Puts the current goroutine into a waiting state and calls unlockf.
1499 // If unlockf returns false, the goroutine is resumed.
1500 void
1501 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1503 if(g->atomicstatus != _Grunning)
1504 runtime_throw("bad g status");
1505 g->m->waitlock = lock;
1506 g->m->waitunlockf = unlockf;
1507 g->waitreason = runtime_gostringnocopy((const byte*)reason);
1508 runtime_mcall(park0);
1511 void gopark(FuncVal *, void *, String, byte, int)
1512 __asm__ ("runtime.gopark");
1514 void
1515 gopark(FuncVal *unlockf, void *lock, String reason,
1516 byte traceEv __attribute__ ((unused)),
1517 int traceskip __attribute__ ((unused)))
1519 if(g->atomicstatus != _Grunning)
1520 runtime_throw("bad g status");
1521 g->m->waitlock = lock;
1522 g->m->waitunlockf = unlockf == nil ? nil : (void*)unlockf->fn;
1523 g->waitreason = reason;
1524 runtime_mcall(park0);
1527 static bool
1528 parkunlock(G *gp, void *lock)
1530 USED(gp);
1531 runtime_unlock(lock);
1532 return true;
1535 // Puts the current goroutine into a waiting state and unlocks the lock.
1536 // The goroutine can be made runnable again by calling runtime_ready(gp).
1537 void
1538 runtime_parkunlock(Lock *lock, const char *reason)
1540 runtime_park(parkunlock, lock, reason);
1543 void goparkunlock(Lock *, String, byte, int)
1544 __asm__ (GOSYM_PREFIX "runtime.goparkunlock");
1546 void
1547 goparkunlock(Lock *lock, String reason, byte traceEv __attribute__ ((unused)),
1548 int traceskip __attribute__ ((unused)))
1550 if(g->atomicstatus != _Grunning)
1551 runtime_throw("bad g status");
1552 g->m->waitlock = lock;
1553 g->m->waitunlockf = parkunlock;
1554 g->waitreason = reason;
1555 runtime_mcall(park0);
1558 // runtime_park continuation on g0.
1559 static void
1560 park0(G *gp)
1562 M *m;
1563 bool ok;
1565 m = g->m;
1566 gp->atomicstatus = _Gwaiting;
1567 gp->m = nil;
1568 m->curg = nil;
1569 if(m->waitunlockf) {
1570 ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock);
1571 m->waitunlockf = nil;
1572 m->waitlock = nil;
1573 if(!ok) {
1574 gp->atomicstatus = _Grunnable;
1575 execute(gp); // Schedule it back, never returns.
1578 if(m->lockedg) {
1579 stoplockedm();
1580 execute(gp); // Never returns.
1582 schedule();
1585 // Scheduler yield.
1586 void
1587 runtime_gosched(void)
1589 if(g->atomicstatus != _Grunning)
1590 runtime_throw("bad g status");
1591 runtime_mcall(runtime_gosched0);
1594 // runtime_gosched continuation on g0.
1595 void
1596 runtime_gosched0(G *gp)
1598 M *m;
1600 m = g->m;
1601 gp->atomicstatus = _Grunnable;
1602 gp->m = nil;
1603 m->curg = nil;
1604 runtime_lock(&runtime_sched->lock);
1605 globrunqput(gp);
1606 runtime_unlock(&runtime_sched->lock);
1607 if(m->lockedg) {
1608 stoplockedm();
1609 execute(gp); // Never returns.
1611 schedule();
1614 // Finishes execution of the current goroutine.
1615 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1616 // Since it does not return it does not matter. But if it is preempted
1617 // at the split stack check, GC will complain about inconsistent sp.
1618 void runtime_goexit1(void) __attribute__ ((noinline));
1619 void
1620 runtime_goexit1(void)
1622 if(g->atomicstatus != _Grunning)
1623 runtime_throw("bad g status");
1624 runtime_mcall(goexit0);
1627 // runtime_goexit1 continuation on g0.
1628 static void
1629 goexit0(G *gp)
1631 M *m;
1633 m = g->m;
1634 gp->atomicstatus = _Gdead;
1635 gp->entry = nil;
1636 gp->m = nil;
1637 gp->lockedm = nil;
1638 gp->paniconfault = 0;
1639 gp->_defer = nil; // should be true already but just in case.
1640 gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
1641 gp->writebuf.__values = nil;
1642 gp->writebuf.__count = 0;
1643 gp->writebuf.__capacity = 0;
1644 gp->waitreason = runtime_gostringnocopy(nil);
1645 gp->param = nil;
1646 m->curg = nil;
1647 m->lockedg = nil;
1648 if(m->locked & ~_LockExternal) {
1649 runtime_printf("invalid m->locked = %d\n", m->locked);
1650 runtime_throw("internal lockOSThread error");
1652 m->locked = 0;
1653 gfput((P*)m->p, gp);
1654 schedule();
1657 // The goroutine g is about to enter a system call.
1658 // Record that it's not using the cpu anymore.
1659 // This is called only from the go syscall library and cgocall,
1660 // not from the low-level system calls used by the runtime.
1662 // Entersyscall cannot split the stack: the runtime_gosave must
1663 // make g->sched refer to the caller's stack segment, because
1664 // entersyscall is going to return immediately after.
1666 void runtime_entersyscall(int32) __attribute__ ((no_split_stack));
1667 static void doentersyscall(uintptr, uintptr)
1668 __attribute__ ((no_split_stack, noinline));
1670 void
1671 runtime_entersyscall(int32 dummy __attribute__ ((unused)))
1673 // Save the registers in the g structure so that any pointers
1674 // held in registers will be seen by the garbage collector.
1675 getcontext(ucontext_arg(&g->gcregs[0]));
1677 // Do the work in a separate function, so that this function
1678 // doesn't save any registers on its own stack. If this
1679 // function does save any registers, we might store the wrong
1680 // value in the call to getcontext.
1682 // FIXME: This assumes that we do not need to save any
1683 // callee-saved registers to access the TLS variable g. We
1684 // don't want to put the ucontext_t on the stack because it is
1685 // large and we can not split the stack here.
1686 doentersyscall((uintptr)runtime_getcallerpc(&dummy),
1687 (uintptr)runtime_getcallersp(&dummy));
1690 static void
1691 doentersyscall(uintptr pc, uintptr sp)
1693 // Disable preemption because during this function g is in _Gsyscall status,
1694 // but can have inconsistent g->sched, do not let GC observe it.
1695 g->m->locks++;
1697 // Leave SP around for GC and traceback.
1698 #ifdef USING_SPLIT_STACK
1700 size_t gcstacksize;
1701 g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
1702 &g->gcnextsegment, &g->gcnextsp,
1703 &g->gcinitialsp);
1704 g->gcstacksize = (uintptr)gcstacksize;
1706 #else
1708 void *v;
1710 g->gcnextsp = (byte *) &v;
1712 #endif
1714 g->syscallsp = sp;
1715 g->syscallpc = pc;
1717 g->atomicstatus = _Gsyscall;
1719 if(runtime_atomicload(&runtime_sched->sysmonwait)) { // TODO: fast atomic
1720 runtime_lock(&runtime_sched->lock);
1721 if(runtime_atomicload(&runtime_sched->sysmonwait)) {
1722 runtime_atomicstore(&runtime_sched->sysmonwait, 0);
1723 runtime_notewakeup(&runtime_sched->sysmonnote);
1725 runtime_unlock(&runtime_sched->lock);
1728 g->m->mcache = nil;
1729 ((P*)(g->m->p))->m = 0;
1730 runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
1731 if(runtime_atomicload(&runtime_sched->gcwaiting)) {
1732 runtime_lock(&runtime_sched->lock);
1733 if (runtime_sched->stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
1734 if(--runtime_sched->stopwait == 0)
1735 runtime_notewakeup(&runtime_sched->stopnote);
1737 runtime_unlock(&runtime_sched->lock);
1740 g->m->locks--;
1743 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
1744 void
1745 runtime_entersyscallblock(int32 dummy __attribute__ ((unused)))
1747 P *p;
1749 g->m->locks++; // see comment in entersyscall
1751 // Leave SP around for GC and traceback.
1752 #ifdef USING_SPLIT_STACK
1754 size_t gcstacksize;
1755 g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
1756 &g->gcnextsegment, &g->gcnextsp,
1757 &g->gcinitialsp);
1758 g->gcstacksize = (uintptr)gcstacksize;
1760 #else
1761 g->gcnextsp = (byte *) &p;
1762 #endif
1764 // Save the registers in the g structure so that any pointers
1765 // held in registers will be seen by the garbage collector.
1766 getcontext(ucontext_arg(&g->gcregs[0]));
1768 g->syscallpc = (uintptr)runtime_getcallerpc(&dummy);
1769 g->syscallsp = (uintptr)runtime_getcallersp(&dummy);
1771 g->atomicstatus = _Gsyscall;
1773 p = releasep();
1774 handoffp(p);
1775 if(g->isbackground) // do not consider blocked scavenger for deadlock detection
1776 incidlelocked(1);
1778 g->m->locks--;
1781 // The goroutine g exited its system call.
1782 // Arrange for it to run on a cpu again.
1783 // This is called only from the go syscall library, not
1784 // from the low-level system calls used by the runtime.
1785 void
1786 runtime_exitsyscall(int32 dummy __attribute__ ((unused)))
1788 G *gp;
1790 gp = g;
1791 gp->m->locks++; // see comment in entersyscall
1793 if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
1794 incidlelocked(-1);
1796 gp->waitsince = 0;
1797 if(exitsyscallfast()) {
1798 // There's a cpu for us, so we can run.
1799 ((P*)gp->m->p)->syscalltick++;
1800 gp->atomicstatus = _Grunning;
1801 // Garbage collector isn't running (since we are),
1802 // so okay to clear gcstack and gcsp.
1803 #ifdef USING_SPLIT_STACK
1804 gp->gcstack = nil;
1805 #endif
1806 gp->gcnextsp = nil;
1807 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
1808 gp->syscallsp = 0;
1809 gp->m->locks--;
1810 return;
1813 gp->m->locks--;
1815 // Call the scheduler.
1816 runtime_mcall(exitsyscall0);
1818 // Scheduler returned, so we're allowed to run now.
1819 // Delete the gcstack information that we left for
1820 // the garbage collector during the system call.
1821 // Must wait until now because until gosched returns
1822 // we don't know for sure that the garbage collector
1823 // is not running.
1824 #ifdef USING_SPLIT_STACK
1825 gp->gcstack = nil;
1826 #endif
1827 gp->gcnextsp = nil;
1828 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
1830 gp->syscallsp = 0;
1832 // Note that this gp->m might be different than the earlier
1833 // gp->m after returning from runtime_mcall.
1834 ((P*)gp->m->p)->syscalltick++;
1837 static bool
1838 exitsyscallfast(void)
1840 G *gp;
1841 P *p;
1843 gp = g;
1845 // Freezetheworld sets stopwait but does not retake P's.
1846 if(runtime_sched->stopwait) {
1847 gp->m->p = 0;
1848 return false;
1851 // Try to re-acquire the last P.
1852 if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) {
1853 // There's a cpu for us, so we can run.
1854 gp->m->mcache = ((P*)gp->m->p)->mcache;
1855 ((P*)gp->m->p)->m = (uintptr)gp->m;
1856 return true;
1858 // Try to get any other idle P.
1859 gp->m->p = 0;
1860 if(runtime_sched->pidle) {
1861 runtime_lock(&runtime_sched->lock);
1862 p = pidleget();
1863 if(p && runtime_atomicload(&runtime_sched->sysmonwait)) {
1864 runtime_atomicstore(&runtime_sched->sysmonwait, 0);
1865 runtime_notewakeup(&runtime_sched->sysmonnote);
1867 runtime_unlock(&runtime_sched->lock);
1868 if(p) {
1869 acquirep(p);
1870 return true;
1873 return false;
1876 // runtime_exitsyscall slow path on g0.
1877 // Failed to acquire P, enqueue gp as runnable.
1878 static void
1879 exitsyscall0(G *gp)
1881 M *m;
1882 P *p;
1884 m = g->m;
1885 gp->atomicstatus = _Grunnable;
1886 gp->m = nil;
1887 m->curg = nil;
1888 runtime_lock(&runtime_sched->lock);
1889 p = pidleget();
1890 if(p == nil)
1891 globrunqput(gp);
1892 else if(runtime_atomicload(&runtime_sched->sysmonwait)) {
1893 runtime_atomicstore(&runtime_sched->sysmonwait, 0);
1894 runtime_notewakeup(&runtime_sched->sysmonnote);
1896 runtime_unlock(&runtime_sched->lock);
1897 if(p) {
1898 acquirep(p);
1899 execute(gp); // Never returns.
1901 if(m->lockedg) {
1902 // Wait until another thread schedules gp and so m again.
1903 stoplockedm();
1904 execute(gp); // Never returns.
1906 stopm();
1907 schedule(); // Never returns.
1910 void syscall_entersyscall(void)
1911 __asm__(GOSYM_PREFIX "syscall.Entersyscall");
1913 void syscall_entersyscall(void) __attribute__ ((no_split_stack));
1915 void
1916 syscall_entersyscall()
1918 runtime_entersyscall(0);
1921 void syscall_exitsyscall(void)
1922 __asm__(GOSYM_PREFIX "syscall.Exitsyscall");
1924 void syscall_exitsyscall(void) __attribute__ ((no_split_stack));
1926 void
1927 syscall_exitsyscall()
1929 runtime_exitsyscall(0);
1932 // Called from syscall package before fork.
1933 void syscall_runtime_BeforeFork(void)
1934 __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
1935 void
1936 syscall_runtime_BeforeFork(void)
1938 // Fork can hang if preempted with signals frequently enough (see issue 5517).
1939 // Ensure that we stay on the same M where we disable profiling.
1940 runtime_m()->locks++;
1941 if(runtime_m()->profilehz != 0)
1942 runtime_resetcpuprofiler(0);
1945 // Called from syscall package after fork in parent.
1946 void syscall_runtime_AfterFork(void)
1947 __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
1948 void
1949 syscall_runtime_AfterFork(void)
1951 int32 hz;
1953 hz = runtime_sched->profilehz;
1954 if(hz != 0)
1955 runtime_resetcpuprofiler(hz);
1956 runtime_m()->locks--;
1959 // Allocate a new g, with a stack big enough for stacksize bytes.
1961 runtime_malg(bool allocatestack, bool signalstack, byte** ret_stack, uintptr* ret_stacksize)
1963 uintptr stacksize;
1964 G *newg;
1965 byte* unused_stack;
1966 uintptr unused_stacksize;
1967 #if USING_SPLIT_STACK
1968 int dont_block_signals = 0;
1969 size_t ss_stacksize;
1970 #endif
1972 if (ret_stack == nil) {
1973 ret_stack = &unused_stack;
1975 if (ret_stacksize == nil) {
1976 ret_stacksize = &unused_stacksize;
1978 newg = allocg();
1979 if(allocatestack) {
1980 stacksize = StackMin;
1981 if(signalstack) {
1982 stacksize = 32 * 1024; // OS X wants >= 8K, GNU/Linux >= 2K
1983 #ifdef SIGSTKSZ
1984 if(stacksize < SIGSTKSZ)
1985 stacksize = SIGSTKSZ;
1986 #endif
1989 #if USING_SPLIT_STACK
1990 *ret_stack = __splitstack_makecontext(stacksize,
1991 &newg->stackcontext[0],
1992 &ss_stacksize);
1993 *ret_stacksize = (uintptr)ss_stacksize;
1994 __splitstack_block_signals_context(&newg->stackcontext[0],
1995 &dont_block_signals, nil);
1996 #else
1997 // In 64-bit mode, the maximum Go allocation space is
1998 // 128G. Our stack size is 4M, which only permits 32K
1999 // goroutines. In order to not limit ourselves,
2000 // allocate the stacks out of separate memory. In
2001 // 32-bit mode, the Go allocation space is all of
2002 // memory anyhow.
2003 if(sizeof(void*) == 8) {
2004 void *p = runtime_SysAlloc(stacksize, &mstats()->other_sys);
2005 if(p == nil)
2006 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2007 *ret_stack = (byte*)p;
2008 } else {
2009 *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2010 runtime_xadd(&runtime_stacks_sys, stacksize);
2012 *ret_stacksize = (uintptr)stacksize;
2013 newg->gcinitialsp = *ret_stack;
2014 newg->gcstacksize = (uintptr)stacksize;
2015 #endif
2017 return newg;
2021 __go_go(void (*fn)(void*), void* arg)
2023 byte *sp;
2024 size_t spsize;
2025 G *newg;
2026 P *p;
2028 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2029 if(fn == nil) {
2030 g->m->throwing = -1; // do not dump full stacks
2031 runtime_throw("go of nil func value");
2033 g->m->locks++; // disable preemption because it can be holding p in a local var
2035 p = (P*)g->m->p;
2036 if((newg = gfget(p)) != nil) {
2037 #ifdef USING_SPLIT_STACK
2038 int dont_block_signals = 0;
2040 sp = __splitstack_resetcontext(&newg->stackcontext[0],
2041 &spsize);
2042 __splitstack_block_signals_context(&newg->stackcontext[0],
2043 &dont_block_signals, nil);
2044 #else
2045 sp = newg->gcinitialsp;
2046 spsize = newg->gcstacksize;
2047 if(spsize == 0)
2048 runtime_throw("bad spsize in __go_go");
2049 newg->gcnextsp = sp;
2050 #endif
2051 } else {
2052 uintptr malsize;
2054 newg = runtime_malg(true, false, &sp, &malsize);
2055 spsize = (size_t)malsize;
2056 newg->atomicstatus = _Gdead;
2057 allgadd(newg);
2060 newg->entry = (byte*)fn;
2061 newg->param = arg;
2062 newg->gopc = (uintptr)__builtin_return_address(0);
2063 newg->atomicstatus = _Grunnable;
2064 if(p->goidcache == p->goidcacheend) {
2065 p->goidcache = runtime_xadd64(&runtime_sched->goidgen, GoidCacheBatch);
2066 p->goidcacheend = p->goidcache + GoidCacheBatch;
2068 newg->goid = p->goidcache++;
2070 makeGContext(newg, sp, (uintptr)spsize);
2072 runqput(p, newg);
2074 if(runtime_atomicload(&runtime_sched->npidle) != 0 && runtime_atomicload(&runtime_sched->nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
2075 wakep();
2076 g->m->locks--;
2077 return newg;
2080 // Put on gfree list.
2081 // If local list is too long, transfer a batch to the global list.
2082 static void
2083 gfput(P *p, G *gp)
2085 gp->schedlink = (uintptr)p->gfree;
2086 p->gfree = gp;
2087 p->gfreecnt++;
2088 if(p->gfreecnt >= 64) {
2089 runtime_lock(&runtime_sched->gflock);
2090 while(p->gfreecnt >= 32) {
2091 p->gfreecnt--;
2092 gp = p->gfree;
2093 p->gfree = (G*)gp->schedlink;
2094 gp->schedlink = (uintptr)runtime_sched->gfree;
2095 runtime_sched->gfree = gp;
2097 runtime_unlock(&runtime_sched->gflock);
2101 // Get from gfree list.
2102 // If local list is empty, grab a batch from global list.
2103 static G*
2104 gfget(P *p)
2106 G *gp;
2108 retry:
2109 gp = p->gfree;
2110 if(gp == nil && runtime_sched->gfree) {
2111 runtime_lock(&runtime_sched->gflock);
2112 while(p->gfreecnt < 32 && runtime_sched->gfree) {
2113 p->gfreecnt++;
2114 gp = runtime_sched->gfree;
2115 runtime_sched->gfree = (G*)gp->schedlink;
2116 gp->schedlink = (uintptr)p->gfree;
2117 p->gfree = gp;
2119 runtime_unlock(&runtime_sched->gflock);
2120 goto retry;
2122 if(gp) {
2123 p->gfree = (G*)gp->schedlink;
2124 p->gfreecnt--;
2126 return gp;
2129 // Purge all cached G's from gfree list to the global list.
2130 static void
2131 gfpurge(P *p)
2133 G *gp;
2135 runtime_lock(&runtime_sched->gflock);
2136 while(p->gfreecnt) {
2137 p->gfreecnt--;
2138 gp = p->gfree;
2139 p->gfree = (G*)gp->schedlink;
2140 gp->schedlink = (uintptr)runtime_sched->gfree;
2141 runtime_sched->gfree = gp;
2143 runtime_unlock(&runtime_sched->gflock);
2146 void
2147 runtime_Breakpoint(void)
2149 runtime_breakpoint();
2152 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2154 void
2155 runtime_Gosched(void)
2157 runtime_gosched();
2160 // Implementation of runtime.GOMAXPROCS.
2161 // delete when scheduler is even stronger
2163 intgo runtime_GOMAXPROCS(intgo)
2164 __asm__(GOSYM_PREFIX "runtime.GOMAXPROCS");
2166 intgo
2167 runtime_GOMAXPROCS(intgo n)
2169 intgo ret;
2171 if(n > _MaxGomaxprocs)
2172 n = _MaxGomaxprocs;
2173 runtime_lock(&runtime_sched->lock);
2174 ret = (intgo)runtime_gomaxprocs;
2175 if(n <= 0 || n == ret) {
2176 runtime_unlock(&runtime_sched->lock);
2177 return ret;
2179 runtime_unlock(&runtime_sched->lock);
2181 runtime_acquireWorldsema();
2182 g->m->gcing = 1;
2183 runtime_stopTheWorldWithSema();
2184 newprocs = (int32)n;
2185 g->m->gcing = 0;
2186 runtime_releaseWorldsema();
2187 runtime_startTheWorldWithSema();
2189 return ret;
2192 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2193 // after they modify m->locked. Do not allow preemption during this call,
2194 // or else the m might be different in this function than in the caller.
2195 static void
2196 lockOSThread(void)
2198 g->m->lockedg = g;
2199 g->lockedm = g->m;
2202 void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2203 void
2204 runtime_LockOSThread(void)
2206 g->m->locked |= _LockExternal;
2207 lockOSThread();
2210 void
2211 runtime_lockOSThread(void)
2213 g->m->locked += _LockInternal;
2214 lockOSThread();
2218 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2219 // after they update m->locked. Do not allow preemption during this call,
2220 // or else the m might be in different in this function than in the caller.
2221 static void
2222 unlockOSThread(void)
2224 if(g->m->locked != 0)
2225 return;
2226 g->m->lockedg = nil;
2227 g->lockedm = nil;
2230 void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2232 void
2233 runtime_UnlockOSThread(void)
2235 g->m->locked &= ~_LockExternal;
2236 unlockOSThread();
2239 void
2240 runtime_unlockOSThread(void)
2242 if(g->m->locked < _LockInternal)
2243 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2244 g->m->locked -= _LockInternal;
2245 unlockOSThread();
2248 bool
2249 runtime_lockedOSThread(void)
2251 return g->lockedm != nil && g->m->lockedg != nil;
2254 int32
2255 runtime_mcount(void)
2257 return runtime_sched->mcount;
2260 static struct {
2261 uint32 lock;
2262 int32 hz;
2263 } prof;
2265 static void System(void) {}
2266 static void GC(void) {}
2268 // Called if we receive a SIGPROF signal.
2269 void
2270 runtime_sigprof()
2272 M *mp = g->m;
2273 int32 n, i;
2274 bool traceback;
2275 uintptr pcbuf[TracebackMaxFrames];
2276 Location locbuf[TracebackMaxFrames];
2277 Slice stk;
2279 if(prof.hz == 0)
2280 return;
2282 if(mp == nil)
2283 return;
2285 // Profiling runs concurrently with GC, so it must not allocate.
2286 mp->mallocing++;
2288 traceback = true;
2290 if(mp->mcache == nil)
2291 traceback = false;
2293 n = 0;
2295 if(runtime_atomicload(&runtime_in_callers) > 0) {
2296 // If SIGPROF arrived while already fetching runtime
2297 // callers we can have trouble on older systems
2298 // because the unwind library calls dl_iterate_phdr
2299 // which was not recursive in the past.
2300 traceback = false;
2303 if(traceback) {
2304 n = runtime_callers(0, locbuf, nelem(locbuf), false);
2305 for(i = 0; i < n; i++)
2306 pcbuf[i] = locbuf[i].pc;
2308 if(!traceback || n <= 0) {
2309 n = 2;
2310 pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2311 if(mp->gcing || mp->helpgc)
2312 pcbuf[1] = (uintptr)GC;
2313 else
2314 pcbuf[1] = (uintptr)System;
2317 if (prof.hz != 0) {
2318 stk.__values = &pcbuf[0];
2319 stk.__count = n;
2320 stk.__capacity = n;
2322 // Simple cas-lock to coordinate with setcpuprofilerate.
2323 while (!runtime_cas(&prof.lock, 0, 1)) {
2324 runtime_osyield();
2326 if (prof.hz != 0) {
2327 runtime_cpuprofAdd(stk);
2329 runtime_atomicstore(&prof.lock, 0);
2332 mp->mallocing--;
2335 // Arrange to call fn with a traceback hz times a second.
2336 void
2337 runtime_setcpuprofilerate_m(int32 hz)
2339 // Force sane arguments.
2340 if(hz < 0)
2341 hz = 0;
2343 // Disable preemption, otherwise we can be rescheduled to another thread
2344 // that has profiling enabled.
2345 g->m->locks++;
2347 // Stop profiler on this thread so that it is safe to lock prof.
2348 // if a profiling signal came in while we had prof locked,
2349 // it would deadlock.
2350 runtime_resetcpuprofiler(0);
2352 while (!runtime_cas(&prof.lock, 0, 1)) {
2353 runtime_osyield();
2355 prof.hz = hz;
2356 runtime_atomicstore(&prof.lock, 0);
2358 runtime_lock(&runtime_sched->lock);
2359 runtime_sched->profilehz = hz;
2360 runtime_unlock(&runtime_sched->lock);
2362 if(hz != 0)
2363 runtime_resetcpuprofiler(hz);
2365 g->m->locks--;
2368 // Change number of processors. The world is stopped, sched is locked.
2369 static void
2370 procresize(int32 new)
2372 int32 i, old;
2373 bool pempty;
2374 G *gp;
2375 P *p;
2376 intgo j;
2378 old = runtime_gomaxprocs;
2379 if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs)
2380 runtime_throw("procresize: invalid arg");
2381 // initialize new P's
2382 for(i = 0; i < new; i++) {
2383 p = runtime_allp[i];
2384 if(p == nil) {
2385 p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2386 p->id = i;
2387 p->status = _Pgcstop;
2388 p->deferpool.__values = &p->deferpoolbuf[0];
2389 p->deferpool.__count = 0;
2390 p->deferpool.__capacity = nelem(p->deferpoolbuf);
2391 runtime_atomicstorep(&runtime_allp[i], p);
2393 if(p->mcache == nil) {
2394 if(old==0 && i==0)
2395 p->mcache = g->m->mcache; // bootstrap
2396 else
2397 p->mcache = runtime_allocmcache();
2401 // redistribute runnable G's evenly
2402 // collect all runnable goroutines in global queue preserving FIFO order
2403 // FIFO order is required to ensure fairness even during frequent GCs
2404 // see http://golang.org/issue/7126
2405 pempty = false;
2406 while(!pempty) {
2407 pempty = true;
2408 for(i = 0; i < old; i++) {
2409 p = runtime_allp[i];
2410 if(p->runqhead == p->runqtail)
2411 continue;
2412 pempty = false;
2413 // pop from tail of local queue
2414 p->runqtail--;
2415 gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
2416 // push onto head of global queue
2417 gp->schedlink = runtime_sched->runqhead;
2418 runtime_sched->runqhead = (uintptr)gp;
2419 if(runtime_sched->runqtail == 0)
2420 runtime_sched->runqtail = (uintptr)gp;
2421 runtime_sched->runqsize++;
2424 // fill local queues with at most nelem(p->runq)/2 goroutines
2425 // start at 1 because current M already executes some G and will acquire allp[0] below,
2426 // so if we have a spare G we want to put it into allp[1].
2427 for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched->runqsize > 0; i++) {
2428 gp = (G*)runtime_sched->runqhead;
2429 runtime_sched->runqhead = gp->schedlink;
2430 if(runtime_sched->runqhead == 0)
2431 runtime_sched->runqtail = 0;
2432 runtime_sched->runqsize--;
2433 runqput(runtime_allp[i%new], gp);
2436 // free unused P's
2437 for(i = new; i < old; i++) {
2438 p = runtime_allp[i];
2439 for(j = 0; j < p->deferpool.__count; j++) {
2440 ((struct _defer**)p->deferpool.__values)[j] = nil;
2442 p->deferpool.__count = 0;
2443 runtime_freemcache(p->mcache);
2444 p->mcache = nil;
2445 gfpurge(p);
2446 p->status = _Pdead;
2447 // can't free P itself because it can be referenced by an M in syscall
2450 if(g->m->p)
2451 ((P*)g->m->p)->m = 0;
2452 g->m->p = 0;
2453 g->m->mcache = nil;
2454 p = runtime_allp[0];
2455 p->m = 0;
2456 p->status = _Pidle;
2457 acquirep(p);
2458 for(i = new-1; i > 0; i--) {
2459 p = runtime_allp[i];
2460 p->status = _Pidle;
2461 pidleput(p);
2463 runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2466 // Associate p and the current m.
2467 static void
2468 acquirep(P *p)
2470 M *m;
2472 m = g->m;
2473 if(m->p || m->mcache)
2474 runtime_throw("acquirep: already in go");
2475 if(p->m || p->status != _Pidle) {
2476 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status);
2477 runtime_throw("acquirep: invalid p state");
2479 m->mcache = p->mcache;
2480 m->p = (uintptr)p;
2481 p->m = (uintptr)m;
2482 p->status = _Prunning;
2485 // Disassociate p and the current m.
2486 static P*
2487 releasep(void)
2489 M *m;
2490 P *p;
2492 m = g->m;
2493 if(m->p == 0 || m->mcache == nil)
2494 runtime_throw("releasep: invalid arg");
2495 p = (P*)m->p;
2496 if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) {
2497 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2498 m, m->p, p->m, m->mcache, p->mcache, p->status);
2499 runtime_throw("releasep: invalid p state");
2501 m->p = 0;
2502 m->mcache = nil;
2503 p->m = 0;
2504 p->status = _Pidle;
2505 return p;
2508 static void
2509 incidlelocked(int32 v)
2511 runtime_lock(&runtime_sched->lock);
2512 runtime_sched->nmidlelocked += v;
2513 if(v > 0)
2514 checkdead();
2515 runtime_unlock(&runtime_sched->lock);
2518 static void
2519 sysmon(void)
2521 uint32 idle, delay;
2522 int64 now, lastpoll, lasttrace;
2523 G *gp;
2525 lasttrace = 0;
2526 idle = 0; // how many cycles in succession we had not wokeup somebody
2527 delay = 0;
2528 for(;;) {
2529 if(idle == 0) // start with 20us sleep...
2530 delay = 20;
2531 else if(idle > 50) // start doubling the sleep after 1ms...
2532 delay *= 2;
2533 if(delay > 10*1000) // up to 10ms
2534 delay = 10*1000;
2535 runtime_usleep(delay);
2536 if(runtime_debug.schedtrace <= 0 &&
2537 (runtime_sched->gcwaiting || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic
2538 runtime_lock(&runtime_sched->lock);
2539 if(runtime_atomicload(&runtime_sched->gcwaiting) || runtime_atomicload(&runtime_sched->npidle) == (uint32)runtime_gomaxprocs) {
2540 runtime_atomicstore(&runtime_sched->sysmonwait, 1);
2541 runtime_unlock(&runtime_sched->lock);
2542 runtime_notesleep(&runtime_sched->sysmonnote);
2543 runtime_noteclear(&runtime_sched->sysmonnote);
2544 idle = 0;
2545 delay = 20;
2546 } else
2547 runtime_unlock(&runtime_sched->lock);
2549 // poll network if not polled for more than 10ms
2550 lastpoll = runtime_atomicload64(&runtime_sched->lastpoll);
2551 now = runtime_nanotime();
2552 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2553 runtime_cas64(&runtime_sched->lastpoll, lastpoll, now);
2554 gp = runtime_netpoll(false); // non-blocking
2555 if(gp) {
2556 // Need to decrement number of idle locked M's
2557 // (pretending that one more is running) before injectglist.
2558 // Otherwise it can lead to the following situation:
2559 // injectglist grabs all P's but before it starts M's to run the P's,
2560 // another M returns from syscall, finishes running its G,
2561 // observes that there is no work to do and no other running M's
2562 // and reports deadlock.
2563 incidlelocked(-1);
2564 injectglist(gp);
2565 incidlelocked(1);
2568 // retake P's blocked in syscalls
2569 // and preempt long running G's
2570 if(retake(now))
2571 idle = 0;
2572 else
2573 idle++;
2575 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
2576 lasttrace = now;
2577 runtime_schedtrace(runtime_debug.scheddetail);
2582 typedef struct Pdesc Pdesc;
2583 struct Pdesc
2585 uint32 schedtick;
2586 int64 schedwhen;
2587 uint32 syscalltick;
2588 int64 syscallwhen;
2590 static Pdesc pdesc[_MaxGomaxprocs];
2592 static uint32
2593 retake(int64 now)
2595 uint32 i, s, n;
2596 int64 t;
2597 P *p;
2598 Pdesc *pd;
2600 n = 0;
2601 for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
2602 p = runtime_allp[i];
2603 if(p==nil)
2604 continue;
2605 pd = &pdesc[i];
2606 s = p->status;
2607 if(s == _Psyscall) {
2608 // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
2609 t = p->syscalltick;
2610 if(pd->syscalltick != t) {
2611 pd->syscalltick = t;
2612 pd->syscallwhen = now;
2613 continue;
2615 // On the one hand we don't want to retake Ps if there is no other work to do,
2616 // but on the other hand we want to retake them eventually
2617 // because they can prevent the sysmon thread from deep sleep.
2618 if(p->runqhead == p->runqtail &&
2619 runtime_atomicload(&runtime_sched->nmspinning) + runtime_atomicload(&runtime_sched->npidle) > 0 &&
2620 pd->syscallwhen + 10*1000*1000 > now)
2621 continue;
2622 // Need to decrement number of idle locked M's
2623 // (pretending that one more is running) before the CAS.
2624 // Otherwise the M from which we retake can exit the syscall,
2625 // increment nmidle and report deadlock.
2626 incidlelocked(-1);
2627 if(runtime_cas(&p->status, s, _Pidle)) {
2628 n++;
2629 handoffp(p);
2631 incidlelocked(1);
2632 } else if(s == _Prunning) {
2633 // Preempt G if it's running for more than 10ms.
2634 t = p->schedtick;
2635 if(pd->schedtick != t) {
2636 pd->schedtick = t;
2637 pd->schedwhen = now;
2638 continue;
2640 if(pd->schedwhen + 10*1000*1000 > now)
2641 continue;
2642 // preemptone(p);
2645 return n;
2648 // Tell all goroutines that they have been preempted and they should stop.
2649 // This function is purely best-effort. It can fail to inform a goroutine if a
2650 // processor just started running it.
2651 // No locks need to be held.
2652 // Returns true if preemption request was issued to at least one goroutine.
2653 static bool
2654 preemptall(void)
2656 return false;
2659 // Put mp on midle list.
2660 // Sched must be locked.
2661 static void
2662 mput(M *mp)
2664 mp->schedlink = runtime_sched->midle;
2665 runtime_sched->midle = (uintptr)mp;
2666 runtime_sched->nmidle++;
2667 checkdead();
2670 // Try to get an m from midle list.
2671 // Sched must be locked.
2672 static M*
2673 mget(void)
2675 M *mp;
2677 if((mp = (M*)runtime_sched->midle) != nil){
2678 runtime_sched->midle = mp->schedlink;
2679 runtime_sched->nmidle--;
2681 return mp;
2684 // Put gp on the global runnable queue.
2685 // Sched must be locked.
2686 static void
2687 globrunqput(G *gp)
2689 gp->schedlink = 0;
2690 if(runtime_sched->runqtail)
2691 ((G*)runtime_sched->runqtail)->schedlink = (uintptr)gp;
2692 else
2693 runtime_sched->runqhead = (uintptr)gp;
2694 runtime_sched->runqtail = (uintptr)gp;
2695 runtime_sched->runqsize++;
2698 // Put a batch of runnable goroutines on the global runnable queue.
2699 // Sched must be locked.
2700 static void
2701 globrunqputbatch(G *ghead, G *gtail, int32 n)
2703 gtail->schedlink = 0;
2704 if(runtime_sched->runqtail)
2705 ((G*)runtime_sched->runqtail)->schedlink = (uintptr)ghead;
2706 else
2707 runtime_sched->runqhead = (uintptr)ghead;
2708 runtime_sched->runqtail = (uintptr)gtail;
2709 runtime_sched->runqsize += n;
2712 // Try get a batch of G's from the global runnable queue.
2713 // Sched must be locked.
2714 static G*
2715 globrunqget(P *p, int32 max)
2717 G *gp, *gp1;
2718 int32 n;
2720 if(runtime_sched->runqsize == 0)
2721 return nil;
2722 n = runtime_sched->runqsize/runtime_gomaxprocs+1;
2723 if(n > runtime_sched->runqsize)
2724 n = runtime_sched->runqsize;
2725 if(max > 0 && n > max)
2726 n = max;
2727 if((uint32)n > nelem(p->runq)/2)
2728 n = nelem(p->runq)/2;
2729 runtime_sched->runqsize -= n;
2730 if(runtime_sched->runqsize == 0)
2731 runtime_sched->runqtail = 0;
2732 gp = (G*)runtime_sched->runqhead;
2733 runtime_sched->runqhead = gp->schedlink;
2734 n--;
2735 while(n--) {
2736 gp1 = (G*)runtime_sched->runqhead;
2737 runtime_sched->runqhead = gp1->schedlink;
2738 runqput(p, gp1);
2740 return gp;
2743 // Put p to on pidle list.
2744 // Sched must be locked.
2745 static void
2746 pidleput(P *p)
2748 p->link = runtime_sched->pidle;
2749 runtime_sched->pidle = (uintptr)p;
2750 runtime_xadd(&runtime_sched->npidle, 1); // TODO: fast atomic
2753 // Try get a p from pidle list.
2754 // Sched must be locked.
2755 static P*
2756 pidleget(void)
2758 P *p;
2760 p = (P*)runtime_sched->pidle;
2761 if(p) {
2762 runtime_sched->pidle = p->link;
2763 runtime_xadd(&runtime_sched->npidle, -1); // TODO: fast atomic
2765 return p;
2768 // Try to put g on local runnable queue.
2769 // If it's full, put onto global queue.
2770 // Executed only by the owner P.
2771 static void
2772 runqput(P *p, G *gp)
2774 uint32 h, t;
2776 retry:
2777 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
2778 t = p->runqtail;
2779 if(t - h < nelem(p->runq)) {
2780 p->runq[t%nelem(p->runq)] = (uintptr)gp;
2781 runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption
2782 return;
2784 if(runqputslow(p, gp, h, t))
2785 return;
2786 // the queue is not full, now the put above must suceed
2787 goto retry;
2790 // Put g and a batch of work from local runnable queue on global queue.
2791 // Executed only by the owner P.
2792 static bool
2793 runqputslow(P *p, G *gp, uint32 h, uint32 t)
2795 G *batch[nelem(p->runq)/2+1];
2796 uint32 n, i;
2798 // First, grab a batch from local queue.
2799 n = t-h;
2800 n = n/2;
2801 if(n != nelem(p->runq)/2)
2802 runtime_throw("runqputslow: queue is not full");
2803 for(i=0; i<n; i++)
2804 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
2805 if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
2806 return false;
2807 batch[n] = gp;
2808 // Link the goroutines.
2809 for(i=0; i<n; i++)
2810 batch[i]->schedlink = (uintptr)batch[i+1];
2811 // Now put the batch on global queue.
2812 runtime_lock(&runtime_sched->lock);
2813 globrunqputbatch(batch[0], batch[n], n+1);
2814 runtime_unlock(&runtime_sched->lock);
2815 return true;
2818 // Get g from local runnable queue.
2819 // Executed only by the owner P.
2820 static G*
2821 runqget(P *p)
2823 G *gp;
2824 uint32 t, h;
2826 for(;;) {
2827 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
2828 t = p->runqtail;
2829 if(t == h)
2830 return nil;
2831 gp = (G*)p->runq[h%nelem(p->runq)];
2832 if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume
2833 return gp;
2837 // Grabs a batch of goroutines from local runnable queue.
2838 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
2839 // Can be executed by any P.
2840 static uint32
2841 runqgrab(P *p, G **batch)
2843 uint32 t, h, n, i;
2845 for(;;) {
2846 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
2847 t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer
2848 n = t-h;
2849 n = n - n/2;
2850 if(n == 0)
2851 break;
2852 if(n > nelem(p->runq)/2) // read inconsistent h and t
2853 continue;
2854 for(i=0; i<n; i++)
2855 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
2856 if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
2857 break;
2859 return n;
2862 // Steal half of elements from local runnable queue of p2
2863 // and put onto local runnable queue of p.
2864 // Returns one of the stolen elements (or nil if failed).
2865 static G*
2866 runqsteal(P *p, P *p2)
2868 G *gp;
2869 G *batch[nelem(p->runq)/2];
2870 uint32 t, h, n, i;
2872 n = runqgrab(p2, batch);
2873 if(n == 0)
2874 return nil;
2875 n--;
2876 gp = batch[n];
2877 if(n == 0)
2878 return gp;
2879 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
2880 t = p->runqtail;
2881 if(t - h + n >= nelem(p->runq))
2882 runtime_throw("runqsteal: runq overflow");
2883 for(i=0; i<n; i++, t++)
2884 p->runq[t%nelem(p->runq)] = (uintptr)batch[i];
2885 runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption
2886 return gp;
2889 void runtime_testSchedLocalQueue(void)
2890 __asm__("runtime.testSchedLocalQueue");
2892 void
2893 runtime_testSchedLocalQueue(void)
2895 P p;
2896 G gs[nelem(p.runq)];
2897 int32 i, j;
2899 runtime_memclr((byte*)&p, sizeof(p));
2901 for(i = 0; i < (int32)nelem(gs); i++) {
2902 if(runqget(&p) != nil)
2903 runtime_throw("runq is not empty initially");
2904 for(j = 0; j < i; j++)
2905 runqput(&p, &gs[i]);
2906 for(j = 0; j < i; j++) {
2907 if(runqget(&p) != &gs[i]) {
2908 runtime_printf("bad element at iter %d/%d\n", i, j);
2909 runtime_throw("bad element");
2912 if(runqget(&p) != nil)
2913 runtime_throw("runq is not empty afterwards");
2917 void runtime_testSchedLocalQueueSteal(void)
2918 __asm__("runtime.testSchedLocalQueueSteal");
2920 void
2921 runtime_testSchedLocalQueueSteal(void)
2923 P p1, p2;
2924 G gs[nelem(p1.runq)], *gp;
2925 int32 i, j, s;
2927 runtime_memclr((byte*)&p1, sizeof(p1));
2928 runtime_memclr((byte*)&p2, sizeof(p2));
2930 for(i = 0; i < (int32)nelem(gs); i++) {
2931 for(j = 0; j < i; j++) {
2932 gs[j].sig = 0;
2933 runqput(&p1, &gs[j]);
2935 gp = runqsteal(&p2, &p1);
2936 s = 0;
2937 if(gp) {
2938 s++;
2939 gp->sig++;
2941 while((gp = runqget(&p2)) != nil) {
2942 s++;
2943 gp->sig++;
2945 while((gp = runqget(&p1)) != nil)
2946 gp->sig++;
2947 for(j = 0; j < i; j++) {
2948 if(gs[j].sig != 1) {
2949 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
2950 runtime_throw("bad element");
2953 if(s != i/2 && s != i/2+1) {
2954 runtime_printf("bad steal %d, want %d or %d, iter %d\n",
2955 s, i/2, i/2+1, i);
2956 runtime_throw("bad steal");
2961 intgo
2962 runtime_setmaxthreads(intgo in)
2964 intgo out;
2966 runtime_lock(&runtime_sched->lock);
2967 out = (intgo)runtime_sched->maxmcount;
2968 runtime_sched->maxmcount = (int32)in;
2969 checkmcount();
2970 runtime_unlock(&runtime_sched->lock);
2971 return out;
2974 static intgo
2975 procPin()
2977 M *mp;
2979 mp = runtime_m();
2980 mp->locks++;
2981 return (intgo)(((P*)mp->p)->id);
2984 static void
2985 procUnpin()
2987 runtime_m()->locks--;
2990 intgo sync_runtime_procPin(void)
2991 __asm__ (GOSYM_PREFIX "sync.runtime_procPin");
2993 intgo
2994 sync_runtime_procPin()
2996 return procPin();
2999 void sync_runtime_procUnpin(void)
3000 __asm__ (GOSYM_PREFIX "sync.runtime_procUnpin");
3002 void
3003 sync_runtime_procUnpin()
3005 procUnpin();
3008 intgo sync_atomic_runtime_procPin(void)
3009 __asm__ (GOSYM_PREFIX "sync_atomic.runtime_procPin");
3011 intgo
3012 sync_atomic_runtime_procPin()
3014 return procPin();
3017 void sync_atomic_runtime_procUnpin(void)
3018 __asm__ (GOSYM_PREFIX "sync_atomic.runtime_procUnpin");
3020 void
3021 sync_atomic_runtime_procUnpin()
3023 procUnpin();
3026 // Return whether we are waiting for a GC. This gc toolchain uses
3027 // preemption instead.
3028 bool
3029 runtime_gcwaiting(void)
3031 return runtime_sched->gcwaiting;
3034 // os_beforeExit is called from os.Exit(0).
3035 //go:linkname os_beforeExit os.runtime_beforeExit
3037 extern void os_beforeExit() __asm__ (GOSYM_PREFIX "os.runtime_beforeExit");
3039 void
3040 os_beforeExit()
3044 // Active spinning for sync.Mutex.
3045 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3047 enum
3049 ACTIVE_SPIN = 4,
3050 ACTIVE_SPIN_CNT = 30,
3053 extern _Bool sync_runtime_canSpin(intgo i)
3054 __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
3056 _Bool
3057 sync_runtime_canSpin(intgo i)
3059 P *p;
3061 // sync.Mutex is cooperative, so we are conservative with spinning.
3062 // Spin only few times and only if running on a multicore machine and
3063 // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3064 // As opposed to runtime mutex we don't do passive spinning here,
3065 // because there can be work on global runq on on other Ps.
3066 if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched->npidle+runtime_sched->nmspinning)+1) {
3067 return false;
3069 p = (P*)g->m->p;
3070 return p != nil && p->runqhead == p->runqtail;
3073 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3074 //go:nosplit
3076 extern void sync_runtime_doSpin(void)
3077 __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
3079 void
3080 sync_runtime_doSpin()
3082 runtime_procyield(ACTIVE_SPIN_CNT);
3085 // For Go code to look at variables, until we port proc.go.
3087 extern M** runtime_go_allm(void)
3088 __asm__ (GOSYM_PREFIX "runtime.allm");
3091 runtime_go_allm()
3093 return &runtime_allm;
3096 intgo NumCPU(void) __asm__ (GOSYM_PREFIX "runtime.NumCPU");
3098 intgo
3099 NumCPU()
3101 return (intgo)(runtime_ncpu);