Use df_read_modify_subreg_p in cprop.c
[official-gcc.git] / libgo / runtime / proc.c
blob62abc9d238ba88697b97d7f052a693d777c63aea
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 #include <limits.h>
6 #include <signal.h>
7 #include <stdlib.h>
8 #include <pthread.h>
9 #include <unistd.h>
11 #include "config.h"
13 #ifdef HAVE_DL_ITERATE_PHDR
14 #include <link.h>
15 #endif
17 #include "runtime.h"
18 #include "arch.h"
19 #include "defs.h"
20 #include "malloc.h"
21 #include "go-type.h"
23 #ifdef USING_SPLIT_STACK
25 /* FIXME: These are not declared anywhere. */
27 extern void __splitstack_getcontext(void *context[10]);
29 extern void __splitstack_setcontext(void *context[10]);
31 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
33 extern void * __splitstack_resetcontext(void *context[10], size_t *);
35 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
36 void **);
38 extern void __splitstack_block_signals (int *, int *);
40 extern void __splitstack_block_signals_context (void *context[10], int *,
41 int *);
43 #endif
45 #ifndef PTHREAD_STACK_MIN
46 # define PTHREAD_STACK_MIN 8192
47 #endif
49 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
50 # define StackMin PTHREAD_STACK_MIN
51 #else
52 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
53 #endif
55 uintptr runtime_stacks_sys;
57 static void gtraceback(G*);
59 #ifdef __rtems__
60 #define __thread
61 #endif
63 static __thread G *g;
65 #ifndef SETCONTEXT_CLOBBERS_TLS
67 static inline void
68 initcontext(void)
72 static inline void
73 fixcontext(ucontext_t *c __attribute__ ((unused)))
77 #else
79 # if defined(__x86_64__) && defined(__sun__)
81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
82 // register to that of the thread which called getcontext. The effect
83 // is that the address of all __thread variables changes. This bug
84 // also affects pthread_self() and pthread_getspecific. We work
85 // around it by clobbering the context field directly to keep %fs the
86 // same.
88 static __thread greg_t fs;
90 static inline void
91 initcontext(void)
93 ucontext_t c;
95 getcontext(&c);
96 fs = c.uc_mcontext.gregs[REG_FSBASE];
99 static inline void
100 fixcontext(ucontext_t* c)
102 c->uc_mcontext.gregs[REG_FSBASE] = fs;
105 # elif defined(__NetBSD__)
107 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
108 // and restore it ourselves.
110 static __thread __greg_t tlsbase;
112 static inline void
113 initcontext(void)
115 ucontext_t c;
117 getcontext(&c);
118 tlsbase = c.uc_mcontext._mc_tlsbase;
121 static inline void
122 fixcontext(ucontext_t* c)
124 c->uc_mcontext._mc_tlsbase = tlsbase;
127 # elif defined(__sparc__)
129 static inline void
130 initcontext(void)
134 static inline void
135 fixcontext(ucontext_t *c)
137 /* ??? Using
138 register unsigned long thread __asm__("%g7");
139 c->uc_mcontext.gregs[REG_G7] = thread;
140 results in
141 error: variable ‘thread’ might be clobbered by \
142 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
143 which ought to be false, as %g7 is a fixed register. */
145 if (sizeof (c->uc_mcontext.gregs[REG_G7]) == 8)
146 asm ("stx %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
147 else
148 asm ("st %%g7, %0" : "=m"(c->uc_mcontext.gregs[REG_G7]));
151 # else
153 # error unknown case for SETCONTEXT_CLOBBERS_TLS
155 # endif
157 #endif
159 // ucontext_arg returns a properly aligned ucontext_t value. On some
160 // systems a ucontext_t value must be aligned to a 16-byte boundary.
161 // The g structure that has fields of type ucontext_t is defined in
162 // Go, and Go has no simple way to align a field to such a boundary.
163 // So we make the field larger in runtime2.go and pick an appropriate
164 // offset within the field here.
165 static ucontext_t*
166 ucontext_arg(void** go_ucontext)
168 uintptr_t p = (uintptr_t)go_ucontext;
169 size_t align = __alignof__(ucontext_t);
170 if(align > 16) {
171 // We only ensured space for up to a 16 byte alignment
172 // in libgo/go/runtime/runtime2.go.
173 runtime_throw("required alignment of ucontext_t too large");
175 p = (p + align - 1) &~ (uintptr_t)(align - 1);
176 return (ucontext_t*)p;
179 // We can not always refer to the TLS variables directly. The
180 // compiler will call tls_get_addr to get the address of the variable,
181 // and it may hold it in a register across a call to schedule. When
182 // we get back from the call we may be running in a different thread,
183 // in which case the register now points to the TLS variable for a
184 // different thread. We use non-inlinable functions to avoid this
185 // when necessary.
187 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
190 runtime_g(void)
192 return g;
195 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
198 runtime_m(void)
200 if(g == nil)
201 return nil;
202 return g->m;
205 // Set g.
206 void
207 runtime_setg(G* gp)
209 g = gp;
212 // Start a new thread.
213 static void
214 runtime_newosproc(M *mp)
216 pthread_attr_t attr;
217 sigset_t clear, old;
218 pthread_t tid;
219 int ret;
221 if(pthread_attr_init(&attr) != 0)
222 runtime_throw("pthread_attr_init");
223 if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
224 runtime_throw("pthread_attr_setdetachstate");
226 // Block signals during pthread_create so that the new thread
227 // starts with signals disabled. It will enable them in minit.
228 sigfillset(&clear);
230 #ifdef SIGTRAP
231 // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
232 sigdelset(&clear, SIGTRAP);
233 #endif
235 sigemptyset(&old);
236 pthread_sigmask(SIG_BLOCK, &clear, &old);
237 ret = pthread_create(&tid, &attr, runtime_mstart, mp);
238 pthread_sigmask(SIG_SETMASK, &old, nil);
240 if (ret != 0)
241 runtime_throw("pthread_create");
244 // First function run by a new goroutine. This replaces gogocall.
245 static void
246 kickoff(void)
248 void (*fn)(void*);
249 void *param;
251 if(g->traceback != nil)
252 gtraceback(g);
254 fn = (void (*)(void*))(g->entry);
255 param = g->param;
256 g->param = nil;
257 fn(param);
258 runtime_goexit();
261 // Switch context to a different goroutine. This is like longjmp.
262 void runtime_gogo(G*) __attribute__ ((noinline));
263 void
264 runtime_gogo(G* newg)
266 #ifdef USING_SPLIT_STACK
267 __splitstack_setcontext(&newg->stackcontext[0]);
268 #endif
269 g = newg;
270 newg->fromgogo = true;
271 fixcontext(ucontext_arg(&newg->context[0]));
272 setcontext(ucontext_arg(&newg->context[0]));
273 runtime_throw("gogo setcontext returned");
276 // Save context and call fn passing g as a parameter. This is like
277 // setjmp. Because getcontext always returns 0, unlike setjmp, we use
278 // g->fromgogo as a code. It will be true if we got here via
279 // setcontext. g == nil the first time this is called in a new m.
280 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
281 void
282 runtime_mcall(void (*pfn)(G*))
284 M *mp;
285 G *gp;
286 #ifndef USING_SPLIT_STACK
287 void *afterregs;
288 #endif
290 // Ensure that all registers are on the stack for the garbage
291 // collector.
292 __builtin_unwind_init();
294 gp = g;
295 mp = gp->m;
296 if(gp == mp->g0)
297 runtime_throw("runtime: mcall called on m->g0 stack");
299 if(gp != nil) {
301 #ifdef USING_SPLIT_STACK
302 __splitstack_getcontext(&g->stackcontext[0]);
303 #else
304 // We have to point to an address on the stack that is
305 // below the saved registers.
306 gp->gcnextsp = &afterregs;
307 #endif
308 gp->fromgogo = false;
309 getcontext(ucontext_arg(&gp->context[0]));
311 // When we return from getcontext, we may be running
312 // in a new thread. That means that g may have
313 // changed. It is a global variables so we will
314 // reload it, but the address of g may be cached in
315 // our local stack frame, and that address may be
316 // wrong. Call the function to reload the value for
317 // this thread.
318 gp = runtime_g();
319 mp = gp->m;
321 if(gp->traceback != nil)
322 gtraceback(gp);
324 if (gp == nil || !gp->fromgogo) {
325 #ifdef USING_SPLIT_STACK
326 __splitstack_setcontext(&mp->g0->stackcontext[0]);
327 #endif
328 mp->g0->entry = (byte*)pfn;
329 mp->g0->param = gp;
331 // It's OK to set g directly here because this case
332 // can not occur if we got here via a setcontext to
333 // the getcontext call just above.
334 g = mp->g0;
336 fixcontext(ucontext_arg(&mp->g0->context[0]));
337 setcontext(ucontext_arg(&mp->g0->context[0]));
338 runtime_throw("runtime: mcall function returned");
342 // Goroutine scheduler
343 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
345 // The main concepts are:
346 // G - goroutine.
347 // M - worker thread, or machine.
348 // P - processor, a resource that is required to execute Go code.
349 // M must have an associated P to execute Go code, however it can be
350 // blocked or in a syscall w/o an associated P.
352 // Design doc at http://golang.org/s/go11sched.
354 typedef struct Sched Sched;
355 struct Sched {
356 Lock;
358 uint64 goidgen;
359 M* midle; // idle m's waiting for work
360 int32 nmidle; // number of idle m's waiting for work
361 int32 nmidlelocked; // number of locked m's waiting for work
362 int32 mcount; // number of m's that have been created
363 int32 maxmcount; // maximum number of m's allowed (or die)
365 P* pidle; // idle P's
366 uint32 npidle;
367 uint32 nmspinning;
369 // Global runnable queue.
370 G* runqhead;
371 G* runqtail;
372 int32 runqsize;
374 // Global cache of dead G's.
375 Lock gflock;
376 G* gfree;
378 uint32 gcwaiting; // gc is waiting to run
379 int32 stopwait;
380 Note stopnote;
381 uint32 sysmonwait;
382 Note sysmonnote;
383 uint64 lastpoll;
385 int32 profilehz; // cpu profiling rate
388 enum
390 // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once.
391 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
392 GoidCacheBatch = 16,
395 Sched runtime_sched;
396 int32 runtime_gomaxprocs;
397 uint32 runtime_needextram = 1;
398 M runtime_m0;
399 G runtime_g0; // idle goroutine for m0
400 G* runtime_lastg;
401 M* runtime_allm;
402 P** runtime_allp;
403 M* runtime_extram;
404 int8* runtime_goos;
405 int32 runtime_ncpu;
406 bool runtime_precisestack;
407 static int32 newprocs;
409 static Lock allglock; // the following vars are protected by this lock or by stoptheworld
410 G** runtime_allg;
411 uintptr runtime_allglen;
412 static uintptr allgcap;
414 bool runtime_isarchive;
416 void* runtime_mstart(void*);
417 static void runqput(P*, G*);
418 static G* runqget(P*);
419 static bool runqputslow(P*, G*, uint32, uint32);
420 static G* runqsteal(P*, P*);
421 static void mput(M*);
422 static M* mget(void);
423 static void mcommoninit(M*);
424 static void schedule(void);
425 static void procresize(int32);
426 static void acquirep(P*);
427 static P* releasep(void);
428 static void newm(void(*)(void), P*);
429 static void stopm(void);
430 static void startm(P*, bool);
431 static void handoffp(P*);
432 static void wakep(void);
433 static void stoplockedm(void);
434 static void startlockedm(G*);
435 static void sysmon(void);
436 static uint32 retake(int64);
437 static void incidlelocked(int32);
438 static void checkdead(void);
439 static void exitsyscall0(G*);
440 static void park0(G*);
441 static void goexit0(G*);
442 static void gfput(P*, G*);
443 static G* gfget(P*);
444 static void gfpurge(P*);
445 static void globrunqput(G*);
446 static void globrunqputbatch(G*, G*, int32);
447 static G* globrunqget(P*, int32);
448 static P* pidleget(void);
449 static void pidleput(P*);
450 static void injectglist(G*);
451 static bool preemptall(void);
452 static bool exitsyscallfast(void);
453 static void allgadd(G*);
455 bool runtime_isstarted;
457 // The bootstrap sequence is:
459 // call osinit
460 // call schedinit
461 // make & queue new G
462 // call runtime_mstart
464 // The new G calls runtime_main.
465 void
466 runtime_schedinit(void)
468 M *m;
469 int32 n, procs;
470 String s;
471 const byte *p;
472 Eface i;
474 m = &runtime_m0;
475 g = &runtime_g0;
476 m->g0 = g;
477 m->curg = g;
478 g->m = m;
480 initcontext();
482 runtime_sched.maxmcount = 10000;
483 runtime_precisestack = 0;
485 // runtime_symtabinit();
486 runtime_mallocinit();
487 mcommoninit(m);
489 // Initialize the itable value for newErrorCString,
490 // so that the next time it gets called, possibly
491 // in a fault during a garbage collection, it will not
492 // need to allocated memory.
493 runtime_newErrorCString(0, &i);
495 // Initialize the cached gotraceback value, since
496 // gotraceback calls getenv, which mallocs on Plan 9.
497 runtime_gotraceback(nil);
499 runtime_goargs();
500 runtime_goenvs();
501 runtime_parsedebugvars();
503 runtime_sched.lastpoll = runtime_nanotime();
504 procs = 1;
505 s = runtime_getenv("GOMAXPROCS");
506 p = s.str;
507 if(p != nil && (n = runtime_atoi(p, s.len)) > 0) {
508 if(n > _MaxGomaxprocs)
509 n = _MaxGomaxprocs;
510 procs = n;
512 runtime_allp = runtime_malloc((_MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
513 procresize(procs);
515 // Can not enable GC until all roots are registered.
516 // mstats()->enablegc = 1;
519 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
520 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
522 // Used to determine the field alignment.
524 struct field_align
526 char c;
527 Hchan *p;
530 // main_init_done is a signal used by cgocallbackg that initialization
531 // has been completed. It is made before _cgo_notify_runtime_init_done,
532 // so all cgo calls can rely on it existing. When main_init is
533 // complete, it is closed, meaning cgocallbackg can reliably receive
534 // from it.
535 Hchan *runtime_main_init_done;
537 // The chan bool type, for runtime_main_init_done.
539 extern const struct __go_type_descriptor bool_type_descriptor
540 __asm__ (GOSYM_PREFIX "__go_tdn_bool");
542 static struct __go_channel_type chan_bool_type_descriptor =
544 /* __common */
546 /* __code */
547 GO_CHAN,
548 /* __align */
549 __alignof (Hchan *),
550 /* __field_align */
551 offsetof (struct field_align, p) - 1,
552 /* __size */
553 sizeof (Hchan *),
554 /* __hash */
555 0, /* This value doesn't matter. */
556 /* __hashfn */
557 NULL,
558 /* __equalfn */
559 NULL,
560 /* __gc */
561 NULL, /* This value doesn't matter */
562 /* __reflection */
563 NULL, /* This value doesn't matter */
564 /* __uncommon */
565 NULL,
566 /* __pointer_to_this */
567 NULL
569 /* __element_type */
570 &bool_type_descriptor,
571 /* __dir */
572 CHANNEL_BOTH_DIR
575 extern Hchan *makechan (ChanType *, int64)
576 __asm__ (GOSYM_PREFIX "runtime.makechan");
577 extern void closechan(Hchan *) __asm__ (GOSYM_PREFIX "runtime.closechan");
579 static void
580 initDone(void *arg __attribute__ ((unused))) {
581 runtime_unlockOSThread();
584 // The main goroutine.
585 // Note: C frames in general are not copyable during stack growth, for two reasons:
586 // 1) We don't know where in a frame to find pointers to other stack locations.
587 // 2) There's no guarantee that globals or heap values do not point into the frame.
589 // The C frame for runtime.main is copyable, because:
590 // 1) There are no pointers to other stack locations in the frame
591 // (d.fn points at a global, d.link is nil, d.argp is -1).
592 // 2) The only pointer into this frame is from the defer chain,
593 // which is explicitly handled during stack copying.
594 void
595 runtime_main(void* dummy __attribute__((unused)))
597 Defer d;
598 _Bool frame;
600 newm(sysmon, nil);
602 // Lock the main goroutine onto this, the main OS thread,
603 // during initialization. Most programs won't care, but a few
604 // do require certain calls to be made by the main thread.
605 // Those can arrange for main.main to run in the main thread
606 // by calling runtime.LockOSThread during initialization
607 // to preserve the lock.
608 runtime_lockOSThread();
610 // Defer unlock so that runtime.Goexit during init does the unlock too.
611 d.pfn = (uintptr)(void*)initDone;
612 d.next = g->_defer;
613 d.arg = (void*)-1;
614 d._panic = g->_panic;
615 d.retaddr = 0;
616 d.makefunccanrecover = 0;
617 d.frame = &frame;
618 d.special = true;
619 g->_defer = &d;
621 if(g->m != &runtime_m0)
622 runtime_throw("runtime_main not on m0");
623 __go_go(runtime_MHeap_Scavenger, nil);
625 runtime_main_init_done = makechan(&chan_bool_type_descriptor, 0);
627 _cgo_notify_runtime_init_done();
629 main_init();
631 closechan(runtime_main_init_done);
633 if(g->_defer != &d || (void*)d.pfn != initDone)
634 runtime_throw("runtime: bad defer entry after init");
635 g->_defer = d.next;
636 runtime_unlockOSThread();
638 // For gccgo we have to wait until after main is initialized
639 // to enable GC, because initializing main registers the GC
640 // roots.
641 mstats()->enablegc = 1;
643 if(runtime_isarchive) {
644 // This is not a complete program, but is instead a
645 // library built using -buildmode=c-archive or
646 // c-shared. Now that we are initialized, there is
647 // nothing further to do.
648 return;
651 main_main();
653 // Make racy client program work: if panicking on
654 // another goroutine at the same time as main returns,
655 // let the other goroutine finish printing the panic trace.
656 // Once it does, it will exit. See issue 3934.
657 if(runtime_panicking)
658 runtime_park(nil, nil, "panicwait");
660 runtime_exit(0);
661 for(;;)
662 *(int32*)0 = 0;
665 void
666 runtime_tracebackothers(G * volatile me)
668 G * volatile gp;
669 Traceback tb;
670 int32 traceback;
671 Slice slice;
672 volatile uintptr i;
674 tb.gp = me;
675 traceback = runtime_gotraceback(nil);
677 // Show the current goroutine first, if we haven't already.
678 if((gp = g->m->curg) != nil && gp != me) {
679 runtime_printf("\n");
680 runtime_goroutineheader(gp);
681 gp->traceback = &tb;
683 #ifdef USING_SPLIT_STACK
684 __splitstack_getcontext(&me->stackcontext[0]);
685 #endif
686 getcontext(ucontext_arg(&me->context[0]));
688 if(gp->traceback != nil) {
689 runtime_gogo(gp);
692 slice.__values = &tb.locbuf[0];
693 slice.__count = tb.c;
694 slice.__capacity = tb.c;
695 runtime_printtrace(slice, nil);
696 runtime_printcreatedby(gp);
699 runtime_lock(&allglock);
700 for(i = 0; i < runtime_allglen; i++) {
701 gp = runtime_allg[i];
702 if(gp == me || gp == g->m->curg || gp->atomicstatus == _Gdead)
703 continue;
704 if(gp->issystem && traceback < 2)
705 continue;
706 runtime_printf("\n");
707 runtime_goroutineheader(gp);
709 // Our only mechanism for doing a stack trace is
710 // _Unwind_Backtrace. And that only works for the
711 // current thread, not for other random goroutines.
712 // So we need to switch context to the goroutine, get
713 // the backtrace, and then switch back.
715 // This means that if g is running or in a syscall, we
716 // can't reliably print a stack trace. FIXME.
718 if(gp->atomicstatus == _Grunning) {
719 runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
720 runtime_printcreatedby(gp);
721 } else if(gp->atomicstatus == _Gsyscall) {
722 runtime_printf("\tgoroutine in C code; stack unavailable\n");
723 runtime_printcreatedby(gp);
724 } else {
725 gp->traceback = &tb;
727 #ifdef USING_SPLIT_STACK
728 __splitstack_getcontext(&me->stackcontext[0]);
729 #endif
730 getcontext(ucontext_arg(&me->context[0]));
732 if(gp->traceback != nil) {
733 runtime_gogo(gp);
736 slice.__values = &tb.locbuf[0];
737 slice.__count = tb.c;
738 slice.__capacity = tb.c;
739 runtime_printtrace(slice, nil);
740 runtime_printcreatedby(gp);
743 runtime_unlock(&allglock);
746 static void
747 checkmcount(void)
749 // sched lock is held
750 if(runtime_sched.mcount > runtime_sched.maxmcount) {
751 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
752 runtime_throw("thread exhaustion");
756 // Do a stack trace of gp, and then restore the context to
757 // gp->dotraceback.
759 static void
760 gtraceback(G* gp)
762 Traceback* traceback;
764 traceback = gp->traceback;
765 gp->traceback = nil;
766 if(gp->m != nil)
767 runtime_throw("gtraceback: m is not nil");
768 gp->m = traceback->gp->m;
769 traceback->c = runtime_callers(1, traceback->locbuf,
770 sizeof traceback->locbuf / sizeof traceback->locbuf[0], false);
771 gp->m = nil;
772 runtime_gogo(traceback->gp);
775 static void
776 mcommoninit(M *mp)
778 // If there is no mcache runtime_callers() will crash,
779 // and we are most likely in sysmon thread so the stack is senseless anyway.
780 if(g->m->mcache)
781 runtime_callers(1, mp->createstack, nelem(mp->createstack), false);
783 mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
785 runtime_lock(&runtime_sched);
786 mp->id = runtime_sched.mcount++;
787 checkmcount();
788 runtime_mpreinit(mp);
790 // Add to runtime_allm so garbage collector doesn't free m
791 // when it is just in a register or thread-local storage.
792 mp->alllink = runtime_allm;
793 // runtime_NumCgoCall() iterates over allm w/o schedlock,
794 // so we need to publish it safely.
795 runtime_atomicstorep(&runtime_allm, mp);
796 runtime_unlock(&runtime_sched);
799 // Mark gp ready to run.
800 void
801 runtime_ready(G *gp)
803 // Mark runnable.
804 g->m->locks++; // disable preemption because it can be holding p in a local var
805 if(gp->atomicstatus != _Gwaiting) {
806 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->atomicstatus);
807 runtime_throw("bad g->atomicstatus in ready");
809 gp->atomicstatus = _Grunnable;
810 runqput((P*)g->m->p, gp);
811 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic
812 wakep();
813 g->m->locks--;
816 void goready(G*, int) __asm__ (GOSYM_PREFIX "runtime.goready");
818 void
819 goready(G* gp, int traceskip __attribute__ ((unused)))
821 runtime_ready(gp);
824 int32
825 runtime_gcprocs(void)
827 int32 n;
829 // Figure out how many CPUs to use during GC.
830 // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
831 runtime_lock(&runtime_sched);
832 n = runtime_gomaxprocs;
833 if(n > runtime_ncpu)
834 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
835 if(n > MaxGcproc)
836 n = MaxGcproc;
837 if(n > runtime_sched.nmidle+1) // one M is currently running
838 n = runtime_sched.nmidle+1;
839 runtime_unlock(&runtime_sched);
840 return n;
843 static bool
844 needaddgcproc(void)
846 int32 n;
848 runtime_lock(&runtime_sched);
849 n = runtime_gomaxprocs;
850 if(n > runtime_ncpu)
851 n = runtime_ncpu;
852 if(n > MaxGcproc)
853 n = MaxGcproc;
854 n -= runtime_sched.nmidle+1; // one M is currently running
855 runtime_unlock(&runtime_sched);
856 return n > 0;
859 void
860 runtime_helpgc(int32 nproc)
862 M *mp;
863 int32 n, pos;
865 runtime_lock(&runtime_sched);
866 pos = 0;
867 for(n = 1; n < nproc; n++) { // one M is currently running
868 if(runtime_allp[pos]->mcache == g->m->mcache)
869 pos++;
870 mp = mget();
871 if(mp == nil)
872 runtime_throw("runtime_gcprocs inconsistency");
873 mp->helpgc = n;
874 mp->mcache = runtime_allp[pos]->mcache;
875 pos++;
876 runtime_notewakeup(&mp->park);
878 runtime_unlock(&runtime_sched);
881 // Similar to stoptheworld but best-effort and can be called several times.
882 // There is no reverse operation, used during crashing.
883 // This function must not lock any mutexes.
884 void
885 runtime_freezetheworld(void)
887 int32 i;
889 if(runtime_gomaxprocs == 1)
890 return;
891 // stopwait and preemption requests can be lost
892 // due to races with concurrently executing threads,
893 // so try several times
894 for(i = 0; i < 5; i++) {
895 // this should tell the scheduler to not start any new goroutines
896 runtime_sched.stopwait = 0x7fffffff;
897 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
898 // this should stop running goroutines
899 if(!preemptall())
900 break; // no running goroutines
901 runtime_usleep(1000);
903 // to be sure
904 runtime_usleep(1000);
905 preemptall();
906 runtime_usleep(1000);
909 void
910 runtime_stopTheWorldWithSema(void)
912 int32 i;
913 uint32 s;
914 P *p;
915 bool wait;
917 runtime_lock(&runtime_sched);
918 runtime_sched.stopwait = runtime_gomaxprocs;
919 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
920 preemptall();
921 // stop current P
922 ((P*)g->m->p)->status = _Pgcstop;
923 runtime_sched.stopwait--;
924 // try to retake all P's in _Psyscall status
925 for(i = 0; i < runtime_gomaxprocs; i++) {
926 p = runtime_allp[i];
927 s = p->status;
928 if(s == _Psyscall && runtime_cas(&p->status, s, _Pgcstop))
929 runtime_sched.stopwait--;
931 // stop idle P's
932 while((p = pidleget()) != nil) {
933 p->status = _Pgcstop;
934 runtime_sched.stopwait--;
936 wait = runtime_sched.stopwait > 0;
937 runtime_unlock(&runtime_sched);
939 // wait for remaining P's to stop voluntarily
940 if(wait) {
941 runtime_notesleep(&runtime_sched.stopnote);
942 runtime_noteclear(&runtime_sched.stopnote);
944 if(runtime_sched.stopwait)
945 runtime_throw("stoptheworld: not stopped");
946 for(i = 0; i < runtime_gomaxprocs; i++) {
947 p = runtime_allp[i];
948 if(p->status != _Pgcstop)
949 runtime_throw("stoptheworld: not stopped");
953 static void
954 mhelpgc(void)
956 g->m->helpgc = -1;
959 void
960 runtime_startTheWorldWithSema(void)
962 P *p, *p1;
963 M *mp;
964 G *gp;
965 bool add;
967 g->m->locks++; // disable preemption because it can be holding p in a local var
968 gp = runtime_netpoll(false); // non-blocking
969 injectglist(gp);
970 add = needaddgcproc();
971 runtime_lock(&runtime_sched);
972 if(newprocs) {
973 procresize(newprocs);
974 newprocs = 0;
975 } else
976 procresize(runtime_gomaxprocs);
977 runtime_sched.gcwaiting = 0;
979 p1 = nil;
980 while((p = pidleget()) != nil) {
981 // procresize() puts p's with work at the beginning of the list.
982 // Once we reach a p without a run queue, the rest don't have one either.
983 if(p->runqhead == p->runqtail) {
984 pidleput(p);
985 break;
987 p->m = (uintptr)mget();
988 p->link = (uintptr)p1;
989 p1 = p;
991 if(runtime_sched.sysmonwait) {
992 runtime_sched.sysmonwait = false;
993 runtime_notewakeup(&runtime_sched.sysmonnote);
995 runtime_unlock(&runtime_sched);
997 while(p1) {
998 p = p1;
999 p1 = (P*)p1->link;
1000 if(p->m) {
1001 mp = (M*)p->m;
1002 p->m = 0;
1003 if(mp->nextp)
1004 runtime_throw("startTheWorldWithSema: inconsistent mp->nextp");
1005 mp->nextp = (uintptr)p;
1006 runtime_notewakeup(&mp->park);
1007 } else {
1008 // Start M to run P. Do not start another M below.
1009 newm(nil, p);
1010 add = false;
1014 if(add) {
1015 // If GC could have used another helper proc, start one now,
1016 // in the hope that it will be available next time.
1017 // It would have been even better to start it before the collection,
1018 // but doing so requires allocating memory, so it's tricky to
1019 // coordinate. This lazy approach works out in practice:
1020 // we don't mind if the first couple gc rounds don't have quite
1021 // the maximum number of procs.
1022 newm(mhelpgc, nil);
1024 g->m->locks--;
1027 // Called to start an M.
1028 void*
1029 runtime_mstart(void* mp)
1031 M *m;
1033 m = (M*)mp;
1034 g = m->g0;
1035 g->m = m;
1037 initcontext();
1039 g->entry = nil;
1040 g->param = nil;
1042 // Record top of stack for use by mcall.
1043 // Once we call schedule we're never coming back,
1044 // so other calls can reuse this stack space.
1045 #ifdef USING_SPLIT_STACK
1046 __splitstack_getcontext(&g->stackcontext[0]);
1047 #else
1048 g->gcinitialsp = &mp;
1049 // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
1050 // is the top of the stack, not the bottom.
1051 g->gcstacksize = 0;
1052 g->gcnextsp = &mp;
1053 #endif
1054 getcontext(ucontext_arg(&g->context[0]));
1056 if(g->entry != nil) {
1057 // Got here from mcall.
1058 void (*pfn)(G*) = (void (*)(G*))g->entry;
1059 G* gp = (G*)g->param;
1060 pfn(gp);
1061 *(int*)0x21 = 0x21;
1063 runtime_minit();
1065 #ifdef USING_SPLIT_STACK
1067 int dont_block_signals = 0;
1068 __splitstack_block_signals(&dont_block_signals, nil);
1070 #endif
1072 // Install signal handlers; after minit so that minit can
1073 // prepare the thread to be able to handle the signals.
1074 if(m == &runtime_m0) {
1075 if(runtime_iscgo && !runtime_cgoHasExtraM) {
1076 runtime_cgoHasExtraM = true;
1077 runtime_newextram();
1078 runtime_needextram = 0;
1080 runtime_initsig(false);
1083 if(m->mstartfn)
1084 ((void (*)(void))m->mstartfn)();
1086 if(m->helpgc) {
1087 m->helpgc = 0;
1088 stopm();
1089 } else if(m != &runtime_m0) {
1090 acquirep((P*)m->nextp);
1091 m->nextp = 0;
1093 schedule();
1095 // TODO(brainman): This point is never reached, because scheduler
1096 // does not release os threads at the moment. But once this path
1097 // is enabled, we must remove our seh here.
1099 return nil;
1102 typedef struct CgoThreadStart CgoThreadStart;
1103 struct CgoThreadStart
1105 M *m;
1106 G *g;
1107 uintptr *tls;
1108 void (*fn)(void);
1111 // Allocate a new m unassociated with any thread.
1112 // Can use p for allocation context if needed.
1114 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, uintptr* ret_g0_stacksize)
1116 M *mp;
1118 g->m->locks++; // disable GC because it can be called from sysmon
1119 if(g->m->p == 0)
1120 acquirep(p); // temporarily borrow p for mallocs in this function
1121 #if 0
1122 if(mtype == nil) {
1123 Eface e;
1124 runtime_gc_m_ptr(&e);
1125 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1127 #endif
1129 mp = runtime_mal(sizeof *mp);
1130 mcommoninit(mp);
1131 mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1132 mp->g0->m = mp;
1134 if(p == (P*)g->m->p)
1135 releasep();
1136 g->m->locks--;
1138 return mp;
1141 static G*
1142 allocg(void)
1144 G *gp;
1145 // static Type *gtype;
1147 // if(gtype == nil) {
1148 // Eface e;
1149 // runtime_gc_g_ptr(&e);
1150 // gtype = ((PtrType*)e.__type_descriptor)->__element_type;
1151 // }
1152 // gp = runtime_cnew(gtype);
1153 gp = runtime_malloc(sizeof(G));
1154 return gp;
1157 static M* lockextra(bool nilokay);
1158 static void unlockextra(M*);
1160 // needm is called when a cgo callback happens on a
1161 // thread without an m (a thread not created by Go).
1162 // In this case, needm is expected to find an m to use
1163 // and return with m, g initialized correctly.
1164 // Since m and g are not set now (likely nil, but see below)
1165 // needm is limited in what routines it can call. In particular
1166 // it can only call nosplit functions (textflag 7) and cannot
1167 // do any scheduling that requires an m.
1169 // In order to avoid needing heavy lifting here, we adopt
1170 // the following strategy: there is a stack of available m's
1171 // that can be stolen. Using compare-and-swap
1172 // to pop from the stack has ABA races, so we simulate
1173 // a lock by doing an exchange (via casp) to steal the stack
1174 // head and replace the top pointer with MLOCKED (1).
1175 // This serves as a simple spin lock that we can use even
1176 // without an m. The thread that locks the stack in this way
1177 // unlocks the stack by storing a valid stack head pointer.
1179 // In order to make sure that there is always an m structure
1180 // available to be stolen, we maintain the invariant that there
1181 // is always one more than needed. At the beginning of the
1182 // program (if cgo is in use) the list is seeded with a single m.
1183 // If needm finds that it has taken the last m off the list, its job
1184 // is - once it has installed its own m so that it can do things like
1185 // allocate memory - to create a spare m and put it on the list.
1187 // Each of these extra m's also has a g0 and a curg that are
1188 // pressed into service as the scheduling stack and current
1189 // goroutine for the duration of the cgo callback.
1191 // When the callback is done with the m, it calls dropm to
1192 // put the m back on the list.
1194 // Unlike the gc toolchain, we start running on curg, since we are
1195 // just going to return and let the caller continue.
1196 void
1197 runtime_needm(void)
1199 M *mp;
1201 if(runtime_needextram) {
1202 // Can happen if C/C++ code calls Go from a global ctor.
1203 // Can not throw, because scheduler is not initialized yet.
1204 int rv __attribute__((unused));
1205 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
1206 sizeof("fatal error: cgo callback before cgo call\n")-1);
1207 runtime_exit(1);
1210 // Lock extra list, take head, unlock popped list.
1211 // nilokay=false is safe here because of the invariant above,
1212 // that the extra list always contains or will soon contain
1213 // at least one m.
1214 mp = lockextra(false);
1216 // Set needextram when we've just emptied the list,
1217 // so that the eventual call into cgocallbackg will
1218 // allocate a new m for the extra list. We delay the
1219 // allocation until then so that it can be done
1220 // after exitsyscall makes sure it is okay to be
1221 // running at all (that is, there's no garbage collection
1222 // running right now).
1223 mp->needextram = mp->schedlink == 0;
1224 unlockextra((M*)mp->schedlink);
1226 // Install g (= m->curg).
1227 runtime_setg(mp->curg);
1229 // Initialize g's context as in mstart.
1230 initcontext();
1231 g->atomicstatus = _Gsyscall;
1232 g->entry = nil;
1233 g->param = nil;
1234 #ifdef USING_SPLIT_STACK
1235 __splitstack_getcontext(&g->stackcontext[0]);
1236 #else
1237 g->gcinitialsp = &mp;
1238 g->gcstack = nil;
1239 g->gcstacksize = 0;
1240 g->gcnextsp = &mp;
1241 #endif
1242 getcontext(ucontext_arg(&g->context[0]));
1244 if(g->entry != nil) {
1245 // Got here from mcall.
1246 void (*pfn)(G*) = (void (*)(G*))g->entry;
1247 G* gp = (G*)g->param;
1248 pfn(gp);
1249 *(int*)0x22 = 0x22;
1252 // Initialize this thread to use the m.
1253 runtime_minit();
1255 #ifdef USING_SPLIT_STACK
1257 int dont_block_signals = 0;
1258 __splitstack_block_signals(&dont_block_signals, nil);
1260 #endif
1263 // newextram allocates an m and puts it on the extra list.
1264 // It is called with a working local m, so that it can do things
1265 // like call schedlock and allocate.
1266 void
1267 runtime_newextram(void)
1269 M *mp, *mnext;
1270 G *gp;
1271 byte *g0_sp, *sp;
1272 uintptr g0_spsize, spsize;
1273 ucontext_t *uc;
1275 // Create extra goroutine locked to extra m.
1276 // The goroutine is the context in which the cgo callback will run.
1277 // The sched.pc will never be returned to, but setting it to
1278 // runtime.goexit makes clear to the traceback routines where
1279 // the goroutine stack ends.
1280 mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1281 gp = runtime_malg(StackMin, &sp, &spsize);
1282 gp->atomicstatus = _Gdead;
1283 gp->m = mp;
1284 mp->curg = gp;
1285 mp->locked = _LockInternal;
1286 mp->lockedg = gp;
1287 gp->lockedm = mp;
1288 gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1289 // put on allg for garbage collector
1290 allgadd(gp);
1292 // The context for gp will be set up in runtime_needm. But
1293 // here we need to set up the context for g0.
1294 uc = ucontext_arg(&mp->g0->context[0]);
1295 getcontext(uc);
1296 uc->uc_stack.ss_sp = g0_sp;
1297 uc->uc_stack.ss_size = (size_t)g0_spsize;
1298 makecontext(uc, kickoff, 0);
1300 // Add m to the extra list.
1301 mnext = lockextra(true);
1302 mp->schedlink = (uintptr)mnext;
1303 unlockextra(mp);
1306 // dropm is called when a cgo callback has called needm but is now
1307 // done with the callback and returning back into the non-Go thread.
1308 // It puts the current m back onto the extra list.
1310 // The main expense here is the call to signalstack to release the
1311 // m's signal stack, and then the call to needm on the next callback
1312 // from this thread. It is tempting to try to save the m for next time,
1313 // which would eliminate both these costs, but there might not be
1314 // a next time: the current thread (which Go does not control) might exit.
1315 // If we saved the m for that thread, there would be an m leak each time
1316 // such a thread exited. Instead, we acquire and release an m on each
1317 // call. These should typically not be scheduling operations, just a few
1318 // atomics, so the cost should be small.
1320 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1321 // variable using pthread_key_create. Unlike the pthread keys we already use
1322 // on OS X, this dummy key would never be read by Go code. It would exist
1323 // only so that we could register at thread-exit-time destructor.
1324 // That destructor would put the m back onto the extra list.
1325 // This is purely a performance optimization. The current version,
1326 // in which dropm happens on each cgo call, is still correct too.
1327 // We may have to keep the current version on systems with cgo
1328 // but without pthreads, like Windows.
1329 void
1330 runtime_dropm(void)
1332 M *mp, *mnext;
1334 // Undo whatever initialization minit did during needm.
1335 runtime_unminit();
1337 // Clear m and g, and return m to the extra list.
1338 // After the call to setg we can only call nosplit functions.
1339 mp = g->m;
1340 runtime_setg(nil);
1342 mp->curg->atomicstatus = _Gdead;
1343 mp->curg->gcstack = nil;
1344 mp->curg->gcnextsp = nil;
1346 mnext = lockextra(true);
1347 mp->schedlink = (uintptr)mnext;
1348 unlockextra(mp);
1351 #define MLOCKED ((M*)1)
1353 // lockextra locks the extra list and returns the list head.
1354 // The caller must unlock the list by storing a new list head
1355 // to runtime.extram. If nilokay is true, then lockextra will
1356 // return a nil list head if that's what it finds. If nilokay is false,
1357 // lockextra will keep waiting until the list head is no longer nil.
1358 static M*
1359 lockextra(bool nilokay)
1361 M *mp;
1362 void (*yield)(void);
1364 for(;;) {
1365 mp = runtime_atomicloadp(&runtime_extram);
1366 if(mp == MLOCKED) {
1367 yield = runtime_osyield;
1368 yield();
1369 continue;
1371 if(mp == nil && !nilokay) {
1372 runtime_usleep(1);
1373 continue;
1375 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1376 yield = runtime_osyield;
1377 yield();
1378 continue;
1380 break;
1382 return mp;
1385 static void
1386 unlockextra(M *mp)
1388 runtime_atomicstorep(&runtime_extram, mp);
1391 static int32
1392 countextra()
1394 M *mp, *mc;
1395 int32 c;
1397 for(;;) {
1398 mp = runtime_atomicloadp(&runtime_extram);
1399 if(mp == MLOCKED) {
1400 runtime_osyield();
1401 continue;
1403 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1404 runtime_osyield();
1405 continue;
1407 c = 0;
1408 for(mc = mp; mc != nil; mc = (M*)mc->schedlink)
1409 c++;
1410 runtime_atomicstorep(&runtime_extram, mp);
1411 return c;
1415 // Create a new m. It will start off with a call to fn, or else the scheduler.
1416 static void
1417 newm(void(*fn)(void), P *p)
1419 M *mp;
1421 mp = runtime_allocm(p, -1, nil, nil);
1422 mp->nextp = (uintptr)p;
1423 mp->mstartfn = (uintptr)(void*)fn;
1425 runtime_newosproc(mp);
1428 // Stops execution of the current m until new work is available.
1429 // Returns with acquired P.
1430 static void
1431 stopm(void)
1433 M* m;
1435 m = g->m;
1436 if(m->locks)
1437 runtime_throw("stopm holding locks");
1438 if(m->p)
1439 runtime_throw("stopm holding p");
1440 if(m->spinning) {
1441 m->spinning = false;
1442 runtime_xadd(&runtime_sched.nmspinning, -1);
1445 retry:
1446 runtime_lock(&runtime_sched);
1447 mput(m);
1448 runtime_unlock(&runtime_sched);
1449 runtime_notesleep(&m->park);
1450 m = g->m;
1451 runtime_noteclear(&m->park);
1452 if(m->helpgc) {
1453 runtime_gchelper();
1454 m->helpgc = 0;
1455 m->mcache = nil;
1456 goto retry;
1458 acquirep((P*)m->nextp);
1459 m->nextp = 0;
1462 static void
1463 mspinning(void)
1465 g->m->spinning = true;
1468 // Schedules some M to run the p (creates an M if necessary).
1469 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1470 static void
1471 startm(P *p, bool spinning)
1473 M *mp;
1474 void (*fn)(void);
1476 runtime_lock(&runtime_sched);
1477 if(p == nil) {
1478 p = pidleget();
1479 if(p == nil) {
1480 runtime_unlock(&runtime_sched);
1481 if(spinning)
1482 runtime_xadd(&runtime_sched.nmspinning, -1);
1483 return;
1486 mp = mget();
1487 runtime_unlock(&runtime_sched);
1488 if(mp == nil) {
1489 fn = nil;
1490 if(spinning)
1491 fn = mspinning;
1492 newm(fn, p);
1493 return;
1495 if(mp->spinning)
1496 runtime_throw("startm: m is spinning");
1497 if(mp->nextp)
1498 runtime_throw("startm: m has p");
1499 mp->spinning = spinning;
1500 mp->nextp = (uintptr)p;
1501 runtime_notewakeup(&mp->park);
1504 // Hands off P from syscall or locked M.
1505 static void
1506 handoffp(P *p)
1508 // if it has local work, start it straight away
1509 if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
1510 startm(p, false);
1511 return;
1513 // no local work, check that there are no spinning/idle M's,
1514 // otherwise our help is not required
1515 if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 && // TODO: fast atomic
1516 runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
1517 startm(p, true);
1518 return;
1520 runtime_lock(&runtime_sched);
1521 if(runtime_sched.gcwaiting) {
1522 p->status = _Pgcstop;
1523 if(--runtime_sched.stopwait == 0)
1524 runtime_notewakeup(&runtime_sched.stopnote);
1525 runtime_unlock(&runtime_sched);
1526 return;
1528 if(runtime_sched.runqsize) {
1529 runtime_unlock(&runtime_sched);
1530 startm(p, false);
1531 return;
1533 // If this is the last running P and nobody is polling network,
1534 // need to wakeup another M to poll network.
1535 if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
1536 runtime_unlock(&runtime_sched);
1537 startm(p, false);
1538 return;
1540 pidleput(p);
1541 runtime_unlock(&runtime_sched);
1544 // Tries to add one more P to execute G's.
1545 // Called when a G is made runnable (newproc, ready).
1546 static void
1547 wakep(void)
1549 // be conservative about spinning threads
1550 if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
1551 return;
1552 startm(nil, true);
1555 // Stops execution of the current m that is locked to a g until the g is runnable again.
1556 // Returns with acquired P.
1557 static void
1558 stoplockedm(void)
1560 M *m;
1561 P *p;
1563 m = g->m;
1564 if(m->lockedg == nil || m->lockedg->lockedm != m)
1565 runtime_throw("stoplockedm: inconsistent locking");
1566 if(m->p) {
1567 // Schedule another M to run this p.
1568 p = releasep();
1569 handoffp(p);
1571 incidlelocked(1);
1572 // Wait until another thread schedules lockedg again.
1573 runtime_notesleep(&m->park);
1574 m = g->m;
1575 runtime_noteclear(&m->park);
1576 if(m->lockedg->atomicstatus != _Grunnable)
1577 runtime_throw("stoplockedm: not runnable");
1578 acquirep((P*)m->nextp);
1579 m->nextp = 0;
1582 // Schedules the locked m to run the locked gp.
1583 static void
1584 startlockedm(G *gp)
1586 M *mp;
1587 P *p;
1589 mp = gp->lockedm;
1590 if(mp == g->m)
1591 runtime_throw("startlockedm: locked to me");
1592 if(mp->nextp)
1593 runtime_throw("startlockedm: m has p");
1594 // directly handoff current P to the locked m
1595 incidlelocked(-1);
1596 p = releasep();
1597 mp->nextp = (uintptr)p;
1598 runtime_notewakeup(&mp->park);
1599 stopm();
1602 // Stops the current m for stoptheworld.
1603 // Returns when the world is restarted.
1604 static void
1605 gcstopm(void)
1607 P *p;
1609 if(!runtime_sched.gcwaiting)
1610 runtime_throw("gcstopm: not waiting for gc");
1611 if(g->m->spinning) {
1612 g->m->spinning = false;
1613 runtime_xadd(&runtime_sched.nmspinning, -1);
1615 p = releasep();
1616 runtime_lock(&runtime_sched);
1617 p->status = _Pgcstop;
1618 if(--runtime_sched.stopwait == 0)
1619 runtime_notewakeup(&runtime_sched.stopnote);
1620 runtime_unlock(&runtime_sched);
1621 stopm();
1624 // Schedules gp to run on the current M.
1625 // Never returns.
1626 static void
1627 execute(G *gp)
1629 int32 hz;
1631 if(gp->atomicstatus != _Grunnable) {
1632 runtime_printf("execute: bad g status %d\n", gp->atomicstatus);
1633 runtime_throw("execute: bad g status");
1635 gp->atomicstatus = _Grunning;
1636 gp->waitsince = 0;
1637 ((P*)g->m->p)->schedtick++;
1638 g->m->curg = gp;
1639 gp->m = g->m;
1641 // Check whether the profiler needs to be turned on or off.
1642 hz = runtime_sched.profilehz;
1643 if(g->m->profilehz != hz)
1644 runtime_resetcpuprofiler(hz);
1646 runtime_gogo(gp);
1649 // Finds a runnable goroutine to execute.
1650 // Tries to steal from other P's, get g from global queue, poll network.
1651 static G*
1652 findrunnable(void)
1654 G *gp;
1655 P *p;
1656 int32 i;
1658 top:
1659 if(runtime_sched.gcwaiting) {
1660 gcstopm();
1661 goto top;
1663 if(runtime_fingwait && runtime_fingwake && (gp = runtime_wakefing()) != nil)
1664 runtime_ready(gp);
1665 // local runq
1666 gp = runqget((P*)g->m->p);
1667 if(gp)
1668 return gp;
1669 // global runq
1670 if(runtime_sched.runqsize) {
1671 runtime_lock(&runtime_sched);
1672 gp = globrunqget((P*)g->m->p, 0);
1673 runtime_unlock(&runtime_sched);
1674 if(gp)
1675 return gp;
1677 // poll network
1678 gp = runtime_netpoll(false); // non-blocking
1679 if(gp) {
1680 injectglist((G*)gp->schedlink);
1681 gp->atomicstatus = _Grunnable;
1682 return gp;
1684 // If number of spinning M's >= number of busy P's, block.
1685 // This is necessary to prevent excessive CPU consumption
1686 // when GOMAXPROCS>>1 but the program parallelism is low.
1687 if(!g->m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle)) // TODO: fast atomic
1688 goto stop;
1689 if(!g->m->spinning) {
1690 g->m->spinning = true;
1691 runtime_xadd(&runtime_sched.nmspinning, 1);
1693 // random steal from other P's
1694 for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1695 if(runtime_sched.gcwaiting)
1696 goto top;
1697 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1698 if(p == (P*)g->m->p)
1699 gp = runqget(p);
1700 else
1701 gp = runqsteal((P*)g->m->p, p);
1702 if(gp)
1703 return gp;
1705 stop:
1706 // return P and block
1707 runtime_lock(&runtime_sched);
1708 if(runtime_sched.gcwaiting) {
1709 runtime_unlock(&runtime_sched);
1710 goto top;
1712 if(runtime_sched.runqsize) {
1713 gp = globrunqget((P*)g->m->p, 0);
1714 runtime_unlock(&runtime_sched);
1715 return gp;
1717 p = releasep();
1718 pidleput(p);
1719 runtime_unlock(&runtime_sched);
1720 if(g->m->spinning) {
1721 g->m->spinning = false;
1722 runtime_xadd(&runtime_sched.nmspinning, -1);
1724 // check all runqueues once again
1725 for(i = 0; i < runtime_gomaxprocs; i++) {
1726 p = runtime_allp[i];
1727 if(p && p->runqhead != p->runqtail) {
1728 runtime_lock(&runtime_sched);
1729 p = pidleget();
1730 runtime_unlock(&runtime_sched);
1731 if(p) {
1732 acquirep(p);
1733 goto top;
1735 break;
1738 // poll network
1739 if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
1740 if(g->m->p)
1741 runtime_throw("findrunnable: netpoll with p");
1742 if(g->m->spinning)
1743 runtime_throw("findrunnable: netpoll with spinning");
1744 gp = runtime_netpoll(true); // block until new work is available
1745 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
1746 if(gp) {
1747 runtime_lock(&runtime_sched);
1748 p = pidleget();
1749 runtime_unlock(&runtime_sched);
1750 if(p) {
1751 acquirep(p);
1752 injectglist((G*)gp->schedlink);
1753 gp->atomicstatus = _Grunnable;
1754 return gp;
1756 injectglist(gp);
1759 stopm();
1760 goto top;
1763 static void
1764 resetspinning(void)
1766 int32 nmspinning;
1768 if(g->m->spinning) {
1769 g->m->spinning = false;
1770 nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
1771 if(nmspinning < 0)
1772 runtime_throw("findrunnable: negative nmspinning");
1773 } else
1774 nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
1776 // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1777 // so see if we need to wakeup another P here.
1778 if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
1779 wakep();
1782 // Injects the list of runnable G's into the scheduler.
1783 // Can run concurrently with GC.
1784 static void
1785 injectglist(G *glist)
1787 int32 n;
1788 G *gp;
1790 if(glist == nil)
1791 return;
1792 runtime_lock(&runtime_sched);
1793 for(n = 0; glist; n++) {
1794 gp = glist;
1795 glist = (G*)gp->schedlink;
1796 gp->atomicstatus = _Grunnable;
1797 globrunqput(gp);
1799 runtime_unlock(&runtime_sched);
1801 for(; n && runtime_sched.npidle; n--)
1802 startm(nil, false);
1805 // One round of scheduler: find a runnable goroutine and execute it.
1806 // Never returns.
1807 static void
1808 schedule(void)
1810 G *gp;
1811 uint32 tick;
1813 if(g->m->locks)
1814 runtime_throw("schedule: holding locks");
1816 top:
1817 if(runtime_sched.gcwaiting) {
1818 gcstopm();
1819 goto top;
1822 gp = nil;
1823 // Check the global runnable queue once in a while to ensure fairness.
1824 // Otherwise two goroutines can completely occupy the local runqueue
1825 // by constantly respawning each other.
1826 tick = ((P*)g->m->p)->schedtick;
1827 // This is a fancy way to say tick%61==0,
1828 // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1829 if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
1830 runtime_lock(&runtime_sched);
1831 gp = globrunqget((P*)g->m->p, 1);
1832 runtime_unlock(&runtime_sched);
1833 if(gp)
1834 resetspinning();
1836 if(gp == nil) {
1837 gp = runqget((P*)g->m->p);
1838 if(gp && g->m->spinning)
1839 runtime_throw("schedule: spinning with local work");
1841 if(gp == nil) {
1842 gp = findrunnable(); // blocks until work is available
1843 resetspinning();
1846 if(gp->lockedm) {
1847 // Hands off own p to the locked m,
1848 // then blocks waiting for a new p.
1849 startlockedm(gp);
1850 goto top;
1853 execute(gp);
1856 // Puts the current goroutine into a waiting state and calls unlockf.
1857 // If unlockf returns false, the goroutine is resumed.
1858 void
1859 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1861 if(g->atomicstatus != _Grunning)
1862 runtime_throw("bad g status");
1863 g->m->waitlock = lock;
1864 g->m->waitunlockf = unlockf;
1865 g->waitreason = runtime_gostringnocopy((const byte*)reason);
1866 runtime_mcall(park0);
1869 void gopark(FuncVal *, void *, String, byte, int)
1870 __asm__ ("runtime.gopark");
1872 void
1873 gopark(FuncVal *unlockf, void *lock, String reason,
1874 byte traceEv __attribute__ ((unused)),
1875 int traceskip __attribute__ ((unused)))
1877 if(g->atomicstatus != _Grunning)
1878 runtime_throw("bad g status");
1879 g->m->waitlock = lock;
1880 g->m->waitunlockf = unlockf == nil ? nil : (void*)unlockf->fn;
1881 g->waitreason = reason;
1882 runtime_mcall(park0);
1885 static bool
1886 parkunlock(G *gp, void *lock)
1888 USED(gp);
1889 runtime_unlock(lock);
1890 return true;
1893 // Puts the current goroutine into a waiting state and unlocks the lock.
1894 // The goroutine can be made runnable again by calling runtime_ready(gp).
1895 void
1896 runtime_parkunlock(Lock *lock, const char *reason)
1898 runtime_park(parkunlock, lock, reason);
1901 void goparkunlock(Lock *, String, byte, int)
1902 __asm__ (GOSYM_PREFIX "runtime.goparkunlock");
1904 void
1905 goparkunlock(Lock *lock, String reason, byte traceEv __attribute__ ((unused)),
1906 int traceskip __attribute__ ((unused)))
1908 if(g->atomicstatus != _Grunning)
1909 runtime_throw("bad g status");
1910 g->m->waitlock = lock;
1911 g->m->waitunlockf = parkunlock;
1912 g->waitreason = reason;
1913 runtime_mcall(park0);
1916 // runtime_park continuation on g0.
1917 static void
1918 park0(G *gp)
1920 M *m;
1921 bool ok;
1923 m = g->m;
1924 gp->atomicstatus = _Gwaiting;
1925 gp->m = nil;
1926 m->curg = nil;
1927 if(m->waitunlockf) {
1928 ok = ((bool (*)(G*, void*))m->waitunlockf)(gp, m->waitlock);
1929 m->waitunlockf = nil;
1930 m->waitlock = nil;
1931 if(!ok) {
1932 gp->atomicstatus = _Grunnable;
1933 execute(gp); // Schedule it back, never returns.
1936 if(m->lockedg) {
1937 stoplockedm();
1938 execute(gp); // Never returns.
1940 schedule();
1943 // Scheduler yield.
1944 void
1945 runtime_gosched(void)
1947 if(g->atomicstatus != _Grunning)
1948 runtime_throw("bad g status");
1949 runtime_mcall(runtime_gosched0);
1952 // runtime_gosched continuation on g0.
1953 void
1954 runtime_gosched0(G *gp)
1956 M *m;
1958 m = g->m;
1959 gp->atomicstatus = _Grunnable;
1960 gp->m = nil;
1961 m->curg = nil;
1962 runtime_lock(&runtime_sched);
1963 globrunqput(gp);
1964 runtime_unlock(&runtime_sched);
1965 if(m->lockedg) {
1966 stoplockedm();
1967 execute(gp); // Never returns.
1969 schedule();
1972 // Finishes execution of the current goroutine.
1973 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1974 // Since it does not return it does not matter. But if it is preempted
1975 // at the split stack check, GC will complain about inconsistent sp.
1976 void runtime_goexit(void) __attribute__ ((noinline));
1977 void
1978 runtime_goexit(void)
1980 if(g->atomicstatus != _Grunning)
1981 runtime_throw("bad g status");
1982 runtime_mcall(goexit0);
1985 // runtime_goexit continuation on g0.
1986 static void
1987 goexit0(G *gp)
1989 M *m;
1991 m = g->m;
1992 gp->atomicstatus = _Gdead;
1993 gp->entry = nil;
1994 gp->m = nil;
1995 gp->lockedm = nil;
1996 gp->paniconfault = 0;
1997 gp->_defer = nil; // should be true already but just in case.
1998 gp->_panic = nil; // non-nil for Goexit during panic. points at stack-allocated data.
1999 gp->writebuf.__values = nil;
2000 gp->writebuf.__count = 0;
2001 gp->writebuf.__capacity = 0;
2002 gp->waitreason = runtime_gostringnocopy(nil);
2003 gp->param = nil;
2004 m->curg = nil;
2005 m->lockedg = nil;
2006 if(m->locked & ~_LockExternal) {
2007 runtime_printf("invalid m->locked = %d\n", m->locked);
2008 runtime_throw("internal lockOSThread error");
2010 m->locked = 0;
2011 gfput((P*)m->p, gp);
2012 schedule();
2015 // The goroutine g is about to enter a system call.
2016 // Record that it's not using the cpu anymore.
2017 // This is called only from the go syscall library and cgocall,
2018 // not from the low-level system calls used by the runtime.
2020 // Entersyscall cannot split the stack: the runtime_gosave must
2021 // make g->sched refer to the caller's stack segment, because
2022 // entersyscall is going to return immediately after.
2024 void runtime_entersyscall(int32) __attribute__ ((no_split_stack));
2025 static void doentersyscall(uintptr, uintptr)
2026 __attribute__ ((no_split_stack, noinline));
2028 void
2029 runtime_entersyscall(int32 dummy __attribute__ ((unused)))
2031 // Save the registers in the g structure so that any pointers
2032 // held in registers will be seen by the garbage collector.
2033 getcontext(ucontext_arg(&g->gcregs[0]));
2035 // Do the work in a separate function, so that this function
2036 // doesn't save any registers on its own stack. If this
2037 // function does save any registers, we might store the wrong
2038 // value in the call to getcontext.
2040 // FIXME: This assumes that we do not need to save any
2041 // callee-saved registers to access the TLS variable g. We
2042 // don't want to put the ucontext_t on the stack because it is
2043 // large and we can not split the stack here.
2044 doentersyscall((uintptr)runtime_getcallerpc(&dummy),
2045 (uintptr)runtime_getcallersp(&dummy));
2048 static void
2049 doentersyscall(uintptr pc, uintptr sp)
2051 // Disable preemption because during this function g is in _Gsyscall status,
2052 // but can have inconsistent g->sched, do not let GC observe it.
2053 g->m->locks++;
2055 // Leave SP around for GC and traceback.
2056 #ifdef USING_SPLIT_STACK
2058 size_t gcstacksize;
2059 g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2060 &g->gcnextsegment, &g->gcnextsp,
2061 &g->gcinitialsp);
2062 g->gcstacksize = (uintptr)gcstacksize;
2064 #else
2066 void *v;
2068 g->gcnextsp = (byte *) &v;
2070 #endif
2072 g->syscallsp = sp;
2073 g->syscallpc = pc;
2075 g->atomicstatus = _Gsyscall;
2077 if(runtime_atomicload(&runtime_sched.sysmonwait)) { // TODO: fast atomic
2078 runtime_lock(&runtime_sched);
2079 if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2080 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2081 runtime_notewakeup(&runtime_sched.sysmonnote);
2083 runtime_unlock(&runtime_sched);
2086 g->m->mcache = nil;
2087 ((P*)(g->m->p))->m = 0;
2088 runtime_atomicstore(&((P*)g->m->p)->status, _Psyscall);
2089 if(runtime_atomicload(&runtime_sched.gcwaiting)) {
2090 runtime_lock(&runtime_sched);
2091 if (runtime_sched.stopwait > 0 && runtime_cas(&((P*)g->m->p)->status, _Psyscall, _Pgcstop)) {
2092 if(--runtime_sched.stopwait == 0)
2093 runtime_notewakeup(&runtime_sched.stopnote);
2095 runtime_unlock(&runtime_sched);
2098 g->m->locks--;
2101 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
2102 void
2103 runtime_entersyscallblock(int32 dummy __attribute__ ((unused)))
2105 P *p;
2107 g->m->locks++; // see comment in entersyscall
2109 // Leave SP around for GC and traceback.
2110 #ifdef USING_SPLIT_STACK
2112 size_t gcstacksize;
2113 g->gcstack = __splitstack_find(nil, nil, &gcstacksize,
2114 &g->gcnextsegment, &g->gcnextsp,
2115 &g->gcinitialsp);
2116 g->gcstacksize = (uintptr)gcstacksize;
2118 #else
2119 g->gcnextsp = (byte *) &p;
2120 #endif
2122 // Save the registers in the g structure so that any pointers
2123 // held in registers will be seen by the garbage collector.
2124 getcontext(ucontext_arg(&g->gcregs[0]));
2126 g->syscallpc = (uintptr)runtime_getcallerpc(&dummy);
2127 g->syscallsp = (uintptr)runtime_getcallersp(&dummy);
2129 g->atomicstatus = _Gsyscall;
2131 p = releasep();
2132 handoffp(p);
2133 if(g->isbackground) // do not consider blocked scavenger for deadlock detection
2134 incidlelocked(1);
2136 g->m->locks--;
2139 // The goroutine g exited its system call.
2140 // Arrange for it to run on a cpu again.
2141 // This is called only from the go syscall library, not
2142 // from the low-level system calls used by the runtime.
2143 void
2144 runtime_exitsyscall(int32 dummy __attribute__ ((unused)))
2146 G *gp;
2148 gp = g;
2149 gp->m->locks++; // see comment in entersyscall
2151 if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
2152 incidlelocked(-1);
2154 gp->waitsince = 0;
2155 if(exitsyscallfast()) {
2156 // There's a cpu for us, so we can run.
2157 ((P*)gp->m->p)->syscalltick++;
2158 gp->atomicstatus = _Grunning;
2159 // Garbage collector isn't running (since we are),
2160 // so okay to clear gcstack and gcsp.
2161 #ifdef USING_SPLIT_STACK
2162 gp->gcstack = nil;
2163 #endif
2164 gp->gcnextsp = nil;
2165 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2166 gp->syscallsp = 0;
2167 gp->m->locks--;
2168 return;
2171 gp->m->locks--;
2173 // Call the scheduler.
2174 runtime_mcall(exitsyscall0);
2176 // Scheduler returned, so we're allowed to run now.
2177 // Delete the gcstack information that we left for
2178 // the garbage collector during the system call.
2179 // Must wait until now because until gosched returns
2180 // we don't know for sure that the garbage collector
2181 // is not running.
2182 #ifdef USING_SPLIT_STACK
2183 gp->gcstack = nil;
2184 #endif
2185 gp->gcnextsp = nil;
2186 runtime_memclr(&gp->gcregs[0], sizeof gp->gcregs);
2188 gp->syscallsp = 0;
2190 // Note that this gp->m might be different than the earlier
2191 // gp->m after returning from runtime_mcall.
2192 ((P*)gp->m->p)->syscalltick++;
2195 static bool
2196 exitsyscallfast(void)
2198 G *gp;
2199 P *p;
2201 gp = g;
2203 // Freezetheworld sets stopwait but does not retake P's.
2204 if(runtime_sched.stopwait) {
2205 gp->m->p = 0;
2206 return false;
2209 // Try to re-acquire the last P.
2210 if(gp->m->p && ((P*)gp->m->p)->status == _Psyscall && runtime_cas(&((P*)gp->m->p)->status, _Psyscall, _Prunning)) {
2211 // There's a cpu for us, so we can run.
2212 gp->m->mcache = ((P*)gp->m->p)->mcache;
2213 ((P*)gp->m->p)->m = (uintptr)gp->m;
2214 return true;
2216 // Try to get any other idle P.
2217 gp->m->p = 0;
2218 if(runtime_sched.pidle) {
2219 runtime_lock(&runtime_sched);
2220 p = pidleget();
2221 if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
2222 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2223 runtime_notewakeup(&runtime_sched.sysmonnote);
2225 runtime_unlock(&runtime_sched);
2226 if(p) {
2227 acquirep(p);
2228 return true;
2231 return false;
2234 // runtime_exitsyscall slow path on g0.
2235 // Failed to acquire P, enqueue gp as runnable.
2236 static void
2237 exitsyscall0(G *gp)
2239 M *m;
2240 P *p;
2242 m = g->m;
2243 gp->atomicstatus = _Grunnable;
2244 gp->m = nil;
2245 m->curg = nil;
2246 runtime_lock(&runtime_sched);
2247 p = pidleget();
2248 if(p == nil)
2249 globrunqput(gp);
2250 else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2251 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2252 runtime_notewakeup(&runtime_sched.sysmonnote);
2254 runtime_unlock(&runtime_sched);
2255 if(p) {
2256 acquirep(p);
2257 execute(gp); // Never returns.
2259 if(m->lockedg) {
2260 // Wait until another thread schedules gp and so m again.
2261 stoplockedm();
2262 execute(gp); // Never returns.
2264 stopm();
2265 schedule(); // Never returns.
2268 void syscall_entersyscall(void)
2269 __asm__(GOSYM_PREFIX "syscall.Entersyscall");
2271 void syscall_entersyscall(void) __attribute__ ((no_split_stack));
2273 void
2274 syscall_entersyscall()
2276 runtime_entersyscall(0);
2279 void syscall_exitsyscall(void)
2280 __asm__(GOSYM_PREFIX "syscall.Exitsyscall");
2282 void syscall_exitsyscall(void) __attribute__ ((no_split_stack));
2284 void
2285 syscall_exitsyscall()
2287 runtime_exitsyscall(0);
2290 // Called from syscall package before fork.
2291 void syscall_runtime_BeforeFork(void)
2292 __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2293 void
2294 syscall_runtime_BeforeFork(void)
2296 // Fork can hang if preempted with signals frequently enough (see issue 5517).
2297 // Ensure that we stay on the same M where we disable profiling.
2298 runtime_m()->locks++;
2299 if(runtime_m()->profilehz != 0)
2300 runtime_resetcpuprofiler(0);
2303 // Called from syscall package after fork in parent.
2304 void syscall_runtime_AfterFork(void)
2305 __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2306 void
2307 syscall_runtime_AfterFork(void)
2309 int32 hz;
2311 hz = runtime_sched.profilehz;
2312 if(hz != 0)
2313 runtime_resetcpuprofiler(hz);
2314 runtime_m()->locks--;
2317 // Allocate a new g, with a stack big enough for stacksize bytes.
2319 runtime_malg(int32 stacksize, byte** ret_stack, uintptr* ret_stacksize)
2321 G *newg;
2323 newg = allocg();
2324 if(stacksize >= 0) {
2325 #if USING_SPLIT_STACK
2326 int dont_block_signals = 0;
2327 size_t ss_stacksize;
2329 *ret_stack = __splitstack_makecontext(stacksize,
2330 &newg->stackcontext[0],
2331 &ss_stacksize);
2332 *ret_stacksize = (uintptr)ss_stacksize;
2333 __splitstack_block_signals_context(&newg->stackcontext[0],
2334 &dont_block_signals, nil);
2335 #else
2336 // In 64-bit mode, the maximum Go allocation space is
2337 // 128G. Our stack size is 4M, which only permits 32K
2338 // goroutines. In order to not limit ourselves,
2339 // allocate the stacks out of separate memory. In
2340 // 32-bit mode, the Go allocation space is all of
2341 // memory anyhow.
2342 if(sizeof(void*) == 8) {
2343 void *p = runtime_SysAlloc(stacksize, &mstats()->other_sys);
2344 if(p == nil)
2345 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2346 *ret_stack = (byte*)p;
2347 } else {
2348 *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2349 runtime_xadd(&runtime_stacks_sys, stacksize);
2351 *ret_stacksize = (uintptr)stacksize;
2352 newg->gcinitialsp = *ret_stack;
2353 newg->gcstacksize = (uintptr)stacksize;
2354 #endif
2356 return newg;
2360 __go_go(void (*fn)(void*), void* arg)
2362 byte *sp;
2363 size_t spsize;
2364 G *newg;
2365 P *p;
2367 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2368 if(fn == nil) {
2369 g->m->throwing = -1; // do not dump full stacks
2370 runtime_throw("go of nil func value");
2372 g->m->locks++; // disable preemption because it can be holding p in a local var
2374 p = (P*)g->m->p;
2375 if((newg = gfget(p)) != nil) {
2376 #ifdef USING_SPLIT_STACK
2377 int dont_block_signals = 0;
2379 sp = __splitstack_resetcontext(&newg->stackcontext[0],
2380 &spsize);
2381 __splitstack_block_signals_context(&newg->stackcontext[0],
2382 &dont_block_signals, nil);
2383 #else
2384 sp = newg->gcinitialsp;
2385 spsize = newg->gcstacksize;
2386 if(spsize == 0)
2387 runtime_throw("bad spsize in __go_go");
2388 newg->gcnextsp = sp;
2389 #endif
2390 } else {
2391 uintptr malsize;
2393 newg = runtime_malg(StackMin, &sp, &malsize);
2394 spsize = (size_t)malsize;
2395 allgadd(newg);
2398 newg->entry = (byte*)fn;
2399 newg->param = arg;
2400 newg->gopc = (uintptr)__builtin_return_address(0);
2401 newg->atomicstatus = _Grunnable;
2402 if(p->goidcache == p->goidcacheend) {
2403 p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch);
2404 p->goidcacheend = p->goidcache + GoidCacheBatch;
2406 newg->goid = p->goidcache++;
2409 // Avoid warnings about variables clobbered by
2410 // longjmp.
2411 byte * volatile vsp = sp;
2412 size_t volatile vspsize = spsize;
2413 G * volatile vnewg = newg;
2414 ucontext_t * volatile uc;
2416 uc = ucontext_arg(&vnewg->context[0]);
2417 getcontext(uc);
2418 uc->uc_stack.ss_sp = vsp;
2419 uc->uc_stack.ss_size = vspsize;
2420 makecontext(uc, kickoff, 0);
2422 runqput(p, vnewg);
2424 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
2425 wakep();
2426 g->m->locks--;
2427 return vnewg;
2431 static void
2432 allgadd(G *gp)
2434 G **new;
2435 uintptr cap;
2437 runtime_lock(&allglock);
2438 if(runtime_allglen >= allgcap) {
2439 cap = 4096/sizeof(new[0]);
2440 if(cap < 2*allgcap)
2441 cap = 2*allgcap;
2442 new = runtime_malloc(cap*sizeof(new[0]));
2443 if(new == nil)
2444 runtime_throw("runtime: cannot allocate memory");
2445 if(runtime_allg != nil) {
2446 runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
2447 runtime_free(runtime_allg);
2449 runtime_allg = new;
2450 allgcap = cap;
2452 runtime_allg[runtime_allglen++] = gp;
2453 runtime_unlock(&allglock);
2456 // Put on gfree list.
2457 // If local list is too long, transfer a batch to the global list.
2458 static void
2459 gfput(P *p, G *gp)
2461 gp->schedlink = (uintptr)p->gfree;
2462 p->gfree = gp;
2463 p->gfreecnt++;
2464 if(p->gfreecnt >= 64) {
2465 runtime_lock(&runtime_sched.gflock);
2466 while(p->gfreecnt >= 32) {
2467 p->gfreecnt--;
2468 gp = p->gfree;
2469 p->gfree = (G*)gp->schedlink;
2470 gp->schedlink = (uintptr)runtime_sched.gfree;
2471 runtime_sched.gfree = gp;
2473 runtime_unlock(&runtime_sched.gflock);
2477 // Get from gfree list.
2478 // If local list is empty, grab a batch from global list.
2479 static G*
2480 gfget(P *p)
2482 G *gp;
2484 retry:
2485 gp = p->gfree;
2486 if(gp == nil && runtime_sched.gfree) {
2487 runtime_lock(&runtime_sched.gflock);
2488 while(p->gfreecnt < 32 && runtime_sched.gfree) {
2489 p->gfreecnt++;
2490 gp = runtime_sched.gfree;
2491 runtime_sched.gfree = (G*)gp->schedlink;
2492 gp->schedlink = (uintptr)p->gfree;
2493 p->gfree = gp;
2495 runtime_unlock(&runtime_sched.gflock);
2496 goto retry;
2498 if(gp) {
2499 p->gfree = (G*)gp->schedlink;
2500 p->gfreecnt--;
2502 return gp;
2505 // Purge all cached G's from gfree list to the global list.
2506 static void
2507 gfpurge(P *p)
2509 G *gp;
2511 runtime_lock(&runtime_sched.gflock);
2512 while(p->gfreecnt) {
2513 p->gfreecnt--;
2514 gp = p->gfree;
2515 p->gfree = (G*)gp->schedlink;
2516 gp->schedlink = (uintptr)runtime_sched.gfree;
2517 runtime_sched.gfree = gp;
2519 runtime_unlock(&runtime_sched.gflock);
2522 void
2523 runtime_Breakpoint(void)
2525 runtime_breakpoint();
2528 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2530 void
2531 runtime_Gosched(void)
2533 runtime_gosched();
2536 // Implementation of runtime.GOMAXPROCS.
2537 // delete when scheduler is even stronger
2538 int32
2539 runtime_gomaxprocsfunc(int32 n)
2541 int32 ret;
2543 if(n > _MaxGomaxprocs)
2544 n = _MaxGomaxprocs;
2545 runtime_lock(&runtime_sched);
2546 ret = runtime_gomaxprocs;
2547 if(n <= 0 || n == ret) {
2548 runtime_unlock(&runtime_sched);
2549 return ret;
2551 runtime_unlock(&runtime_sched);
2553 runtime_acquireWorldsema();
2554 g->m->gcing = 1;
2555 runtime_stopTheWorldWithSema();
2556 newprocs = n;
2557 g->m->gcing = 0;
2558 runtime_releaseWorldsema();
2559 runtime_startTheWorldWithSema();
2561 return ret;
2564 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2565 // after they modify m->locked. Do not allow preemption during this call,
2566 // or else the m might be different in this function than in the caller.
2567 static void
2568 lockOSThread(void)
2570 g->m->lockedg = g;
2571 g->lockedm = g->m;
2574 void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2575 void
2576 runtime_LockOSThread(void)
2578 g->m->locked |= _LockExternal;
2579 lockOSThread();
2582 void
2583 runtime_lockOSThread(void)
2585 g->m->locked += _LockInternal;
2586 lockOSThread();
2590 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2591 // after they update m->locked. Do not allow preemption during this call,
2592 // or else the m might be in different in this function than in the caller.
2593 static void
2594 unlockOSThread(void)
2596 if(g->m->locked != 0)
2597 return;
2598 g->m->lockedg = nil;
2599 g->lockedm = nil;
2602 void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2604 void
2605 runtime_UnlockOSThread(void)
2607 g->m->locked &= ~_LockExternal;
2608 unlockOSThread();
2611 void
2612 runtime_unlockOSThread(void)
2614 if(g->m->locked < _LockInternal)
2615 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2616 g->m->locked -= _LockInternal;
2617 unlockOSThread();
2620 bool
2621 runtime_lockedOSThread(void)
2623 return g->lockedm != nil && g->m->lockedg != nil;
2626 int32
2627 runtime_gcount(void)
2629 G *gp;
2630 int32 n, s;
2631 uintptr i;
2633 n = 0;
2634 runtime_lock(&allglock);
2635 // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2636 // We do not want to increment/decrement centralized counter in newproc/goexit,
2637 // just to make runtime.NumGoroutine() faster.
2638 // Compromise solution is to introduce per-P counters of active goroutines.
2639 for(i = 0; i < runtime_allglen; i++) {
2640 gp = runtime_allg[i];
2641 s = gp->atomicstatus;
2642 if(s == _Grunnable || s == _Grunning || s == _Gsyscall || s == _Gwaiting)
2643 n++;
2645 runtime_unlock(&allglock);
2646 return n;
2649 int32
2650 runtime_mcount(void)
2652 return runtime_sched.mcount;
2655 static struct {
2656 uint32 lock;
2657 int32 hz;
2658 } prof;
2660 static void System(void) {}
2661 static void GC(void) {}
2663 // Called if we receive a SIGPROF signal.
2664 void
2665 runtime_sigprof()
2667 M *mp = g->m;
2668 int32 n, i;
2669 bool traceback;
2670 uintptr pcbuf[TracebackMaxFrames];
2671 Location locbuf[TracebackMaxFrames];
2672 Slice stk;
2674 if(prof.hz == 0)
2675 return;
2677 if(mp == nil)
2678 return;
2680 // Profiling runs concurrently with GC, so it must not allocate.
2681 mp->mallocing++;
2683 traceback = true;
2685 if(mp->mcache == nil)
2686 traceback = false;
2688 n = 0;
2690 if(runtime_atomicload(&runtime_in_callers) > 0) {
2691 // If SIGPROF arrived while already fetching runtime
2692 // callers we can have trouble on older systems
2693 // because the unwind library calls dl_iterate_phdr
2694 // which was not recursive in the past.
2695 traceback = false;
2698 if(traceback) {
2699 n = runtime_callers(0, locbuf, nelem(locbuf), false);
2700 for(i = 0; i < n; i++)
2701 pcbuf[i] = locbuf[i].pc;
2703 if(!traceback || n <= 0) {
2704 n = 2;
2705 pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2706 if(mp->gcing || mp->helpgc)
2707 pcbuf[1] = (uintptr)GC;
2708 else
2709 pcbuf[1] = (uintptr)System;
2712 if (prof.hz != 0) {
2713 stk.__values = &pcbuf[0];
2714 stk.__count = n;
2715 stk.__capacity = n;
2717 // Simple cas-lock to coordinate with setcpuprofilerate.
2718 while (!runtime_cas(&prof.lock, 0, 1)) {
2719 runtime_osyield();
2721 if (prof.hz != 0) {
2722 runtime_cpuprofAdd(stk);
2724 runtime_atomicstore(&prof.lock, 0);
2727 mp->mallocing--;
2730 // Arrange to call fn with a traceback hz times a second.
2731 void
2732 runtime_setcpuprofilerate_m(int32 hz)
2734 // Force sane arguments.
2735 if(hz < 0)
2736 hz = 0;
2738 // Disable preemption, otherwise we can be rescheduled to another thread
2739 // that has profiling enabled.
2740 g->m->locks++;
2742 // Stop profiler on this thread so that it is safe to lock prof.
2743 // if a profiling signal came in while we had prof locked,
2744 // it would deadlock.
2745 runtime_resetcpuprofiler(0);
2747 while (!runtime_cas(&prof.lock, 0, 1)) {
2748 runtime_osyield();
2750 prof.hz = hz;
2751 runtime_atomicstore(&prof.lock, 0);
2753 runtime_lock(&runtime_sched);
2754 runtime_sched.profilehz = hz;
2755 runtime_unlock(&runtime_sched);
2757 if(hz != 0)
2758 runtime_resetcpuprofiler(hz);
2760 g->m->locks--;
2763 // Change number of processors. The world is stopped, sched is locked.
2764 static void
2765 procresize(int32 new)
2767 int32 i, old;
2768 bool pempty;
2769 G *gp;
2770 P *p;
2772 old = runtime_gomaxprocs;
2773 if(old < 0 || old > _MaxGomaxprocs || new <= 0 || new >_MaxGomaxprocs)
2774 runtime_throw("procresize: invalid arg");
2775 // initialize new P's
2776 for(i = 0; i < new; i++) {
2777 p = runtime_allp[i];
2778 if(p == nil) {
2779 p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2780 p->id = i;
2781 p->status = _Pgcstop;
2782 runtime_atomicstorep(&runtime_allp[i], p);
2784 if(p->mcache == nil) {
2785 if(old==0 && i==0)
2786 p->mcache = g->m->mcache; // bootstrap
2787 else
2788 p->mcache = runtime_allocmcache();
2792 // redistribute runnable G's evenly
2793 // collect all runnable goroutines in global queue preserving FIFO order
2794 // FIFO order is required to ensure fairness even during frequent GCs
2795 // see http://golang.org/issue/7126
2796 pempty = false;
2797 while(!pempty) {
2798 pempty = true;
2799 for(i = 0; i < old; i++) {
2800 p = runtime_allp[i];
2801 if(p->runqhead == p->runqtail)
2802 continue;
2803 pempty = false;
2804 // pop from tail of local queue
2805 p->runqtail--;
2806 gp = (G*)p->runq[p->runqtail%nelem(p->runq)];
2807 // push onto head of global queue
2808 gp->schedlink = (uintptr)runtime_sched.runqhead;
2809 runtime_sched.runqhead = gp;
2810 if(runtime_sched.runqtail == nil)
2811 runtime_sched.runqtail = gp;
2812 runtime_sched.runqsize++;
2815 // fill local queues with at most nelem(p->runq)/2 goroutines
2816 // start at 1 because current M already executes some G and will acquire allp[0] below,
2817 // so if we have a spare G we want to put it into allp[1].
2818 for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) {
2819 gp = runtime_sched.runqhead;
2820 runtime_sched.runqhead = (G*)gp->schedlink;
2821 if(runtime_sched.runqhead == nil)
2822 runtime_sched.runqtail = nil;
2823 runtime_sched.runqsize--;
2824 runqput(runtime_allp[i%new], gp);
2827 // free unused P's
2828 for(i = new; i < old; i++) {
2829 p = runtime_allp[i];
2830 runtime_freemcache(p->mcache);
2831 p->mcache = nil;
2832 gfpurge(p);
2833 p->status = _Pdead;
2834 // can't free P itself because it can be referenced by an M in syscall
2837 if(g->m->p)
2838 ((P*)g->m->p)->m = 0;
2839 g->m->p = 0;
2840 g->m->mcache = nil;
2841 p = runtime_allp[0];
2842 p->m = 0;
2843 p->status = _Pidle;
2844 acquirep(p);
2845 for(i = new-1; i > 0; i--) {
2846 p = runtime_allp[i];
2847 p->status = _Pidle;
2848 pidleput(p);
2850 runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2853 // Associate p and the current m.
2854 static void
2855 acquirep(P *p)
2857 M *m;
2859 m = g->m;
2860 if(m->p || m->mcache)
2861 runtime_throw("acquirep: already in go");
2862 if(p->m || p->status != _Pidle) {
2863 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? ((M*)p->m)->id : 0, p->status);
2864 runtime_throw("acquirep: invalid p state");
2866 m->mcache = p->mcache;
2867 m->p = (uintptr)p;
2868 p->m = (uintptr)m;
2869 p->status = _Prunning;
2872 // Disassociate p and the current m.
2873 static P*
2874 releasep(void)
2876 M *m;
2877 P *p;
2879 m = g->m;
2880 if(m->p == 0 || m->mcache == nil)
2881 runtime_throw("releasep: invalid arg");
2882 p = (P*)m->p;
2883 if((M*)p->m != m || p->mcache != m->mcache || p->status != _Prunning) {
2884 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2885 m, m->p, p->m, m->mcache, p->mcache, p->status);
2886 runtime_throw("releasep: invalid p state");
2888 m->p = 0;
2889 m->mcache = nil;
2890 p->m = 0;
2891 p->status = _Pidle;
2892 return p;
2895 static void
2896 incidlelocked(int32 v)
2898 runtime_lock(&runtime_sched);
2899 runtime_sched.nmidlelocked += v;
2900 if(v > 0)
2901 checkdead();
2902 runtime_unlock(&runtime_sched);
2905 // Check for deadlock situation.
2906 // The check is based on number of running M's, if 0 -> deadlock.
2907 static void
2908 checkdead(void)
2910 G *gp;
2911 int32 run, grunning, s;
2912 uintptr i;
2914 // For -buildmode=c-shared or -buildmode=c-archive it's OK if
2915 // there are no running goroutines. The calling program is
2916 // assumed to be running.
2917 if(runtime_isarchive) {
2918 return;
2921 // -1 for sysmon
2922 run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
2923 if(run > 0)
2924 return;
2925 // If we are dying because of a signal caught on an already idle thread,
2926 // freezetheworld will cause all running threads to block.
2927 // And runtime will essentially enter into deadlock state,
2928 // except that there is a thread that will call runtime_exit soon.
2929 if(runtime_panicking > 0)
2930 return;
2931 if(run < 0) {
2932 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2933 runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
2934 runtime_throw("checkdead: inconsistent counts");
2936 grunning = 0;
2937 runtime_lock(&allglock);
2938 for(i = 0; i < runtime_allglen; i++) {
2939 gp = runtime_allg[i];
2940 if(gp->isbackground)
2941 continue;
2942 s = gp->atomicstatus;
2943 if(s == _Gwaiting)
2944 grunning++;
2945 else if(s == _Grunnable || s == _Grunning || s == _Gsyscall) {
2946 runtime_unlock(&allglock);
2947 runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
2948 runtime_throw("checkdead: runnable g");
2951 runtime_unlock(&allglock);
2952 if(grunning == 0) // possible if main goroutine calls runtime_Goexit()
2953 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
2954 g->m->throwing = -1; // do not dump full stacks
2955 runtime_throw("all goroutines are asleep - deadlock!");
2958 static void
2959 sysmon(void)
2961 uint32 idle, delay;
2962 int64 now, lastpoll, lasttrace;
2963 G *gp;
2965 lasttrace = 0;
2966 idle = 0; // how many cycles in succession we had not wokeup somebody
2967 delay = 0;
2968 for(;;) {
2969 if(idle == 0) // start with 20us sleep...
2970 delay = 20;
2971 else if(idle > 50) // start doubling the sleep after 1ms...
2972 delay *= 2;
2973 if(delay > 10*1000) // up to 10ms
2974 delay = 10*1000;
2975 runtime_usleep(delay);
2976 if(runtime_debug.schedtrace <= 0 &&
2977 (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic
2978 runtime_lock(&runtime_sched);
2979 if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
2980 runtime_atomicstore(&runtime_sched.sysmonwait, 1);
2981 runtime_unlock(&runtime_sched);
2982 runtime_notesleep(&runtime_sched.sysmonnote);
2983 runtime_noteclear(&runtime_sched.sysmonnote);
2984 idle = 0;
2985 delay = 20;
2986 } else
2987 runtime_unlock(&runtime_sched);
2989 // poll network if not polled for more than 10ms
2990 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
2991 now = runtime_nanotime();
2992 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2993 runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
2994 gp = runtime_netpoll(false); // non-blocking
2995 if(gp) {
2996 // Need to decrement number of idle locked M's
2997 // (pretending that one more is running) before injectglist.
2998 // Otherwise it can lead to the following situation:
2999 // injectglist grabs all P's but before it starts M's to run the P's,
3000 // another M returns from syscall, finishes running its G,
3001 // observes that there is no work to do and no other running M's
3002 // and reports deadlock.
3003 incidlelocked(-1);
3004 injectglist(gp);
3005 incidlelocked(1);
3008 // retake P's blocked in syscalls
3009 // and preempt long running G's
3010 if(retake(now))
3011 idle = 0;
3012 else
3013 idle++;
3015 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
3016 lasttrace = now;
3017 runtime_schedtrace(runtime_debug.scheddetail);
3022 typedef struct Pdesc Pdesc;
3023 struct Pdesc
3025 uint32 schedtick;
3026 int64 schedwhen;
3027 uint32 syscalltick;
3028 int64 syscallwhen;
3030 static Pdesc pdesc[_MaxGomaxprocs];
3032 static uint32
3033 retake(int64 now)
3035 uint32 i, s, n;
3036 int64 t;
3037 P *p;
3038 Pdesc *pd;
3040 n = 0;
3041 for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
3042 p = runtime_allp[i];
3043 if(p==nil)
3044 continue;
3045 pd = &pdesc[i];
3046 s = p->status;
3047 if(s == _Psyscall) {
3048 // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
3049 t = p->syscalltick;
3050 if(pd->syscalltick != t) {
3051 pd->syscalltick = t;
3052 pd->syscallwhen = now;
3053 continue;
3055 // On the one hand we don't want to retake Ps if there is no other work to do,
3056 // but on the other hand we want to retake them eventually
3057 // because they can prevent the sysmon thread from deep sleep.
3058 if(p->runqhead == p->runqtail &&
3059 runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 &&
3060 pd->syscallwhen + 10*1000*1000 > now)
3061 continue;
3062 // Need to decrement number of idle locked M's
3063 // (pretending that one more is running) before the CAS.
3064 // Otherwise the M from which we retake can exit the syscall,
3065 // increment nmidle and report deadlock.
3066 incidlelocked(-1);
3067 if(runtime_cas(&p->status, s, _Pidle)) {
3068 n++;
3069 handoffp(p);
3071 incidlelocked(1);
3072 } else if(s == _Prunning) {
3073 // Preempt G if it's running for more than 10ms.
3074 t = p->schedtick;
3075 if(pd->schedtick != t) {
3076 pd->schedtick = t;
3077 pd->schedwhen = now;
3078 continue;
3080 if(pd->schedwhen + 10*1000*1000 > now)
3081 continue;
3082 // preemptone(p);
3085 return n;
3088 // Tell all goroutines that they have been preempted and they should stop.
3089 // This function is purely best-effort. It can fail to inform a goroutine if a
3090 // processor just started running it.
3091 // No locks need to be held.
3092 // Returns true if preemption request was issued to at least one goroutine.
3093 static bool
3094 preemptall(void)
3096 return false;
3099 void
3100 runtime_schedtrace(bool detailed)
3102 static int64 starttime;
3103 int64 now;
3104 int64 id1, id2, id3;
3105 int32 i, t, h;
3106 uintptr gi;
3107 const char *fmt;
3108 M *mp, *lockedm;
3109 G *gp, *lockedg;
3110 P *p;
3112 now = runtime_nanotime();
3113 if(starttime == 0)
3114 starttime = now;
3116 runtime_lock(&runtime_sched);
3117 runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
3118 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
3119 runtime_sched.nmidle, runtime_sched.runqsize);
3120 if(detailed) {
3121 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
3122 runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
3123 runtime_sched.stopwait, runtime_sched.sysmonwait);
3125 // We must be careful while reading data from P's, M's and G's.
3126 // Even if we hold schedlock, most data can be changed concurrently.
3127 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
3128 for(i = 0; i < runtime_gomaxprocs; i++) {
3129 p = runtime_allp[i];
3130 if(p == nil)
3131 continue;
3132 mp = (M*)p->m;
3133 h = runtime_atomicload(&p->runqhead);
3134 t = runtime_atomicload(&p->runqtail);
3135 if(detailed)
3136 runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
3137 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
3138 else {
3139 // In non-detailed mode format lengths of per-P run queues as:
3140 // [len1 len2 len3 len4]
3141 fmt = " %d";
3142 if(runtime_gomaxprocs == 1)
3143 fmt = " [%d]\n";
3144 else if(i == 0)
3145 fmt = " [%d";
3146 else if(i == runtime_gomaxprocs-1)
3147 fmt = " %d]\n";
3148 runtime_printf(fmt, t-h);
3151 if(!detailed) {
3152 runtime_unlock(&runtime_sched);
3153 return;
3155 for(mp = runtime_allm; mp; mp = mp->alllink) {
3156 p = (P*)mp->p;
3157 gp = mp->curg;
3158 lockedg = mp->lockedg;
3159 id1 = -1;
3160 if(p)
3161 id1 = p->id;
3162 id2 = -1;
3163 if(gp)
3164 id2 = gp->goid;
3165 id3 = -1;
3166 if(lockedg)
3167 id3 = lockedg->goid;
3168 runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
3169 " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
3170 mp->id, id1, id2,
3171 mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
3172 mp->spinning, mp->blocked, id3);
3174 runtime_lock(&allglock);
3175 for(gi = 0; gi < runtime_allglen; gi++) {
3176 gp = runtime_allg[gi];
3177 mp = gp->m;
3178 lockedm = gp->lockedm;
3179 runtime_printf(" G%D: status=%d(%S) m=%d lockedm=%d\n",
3180 gp->goid, gp->atomicstatus, gp->waitreason, mp ? mp->id : -1,
3181 lockedm ? lockedm->id : -1);
3183 runtime_unlock(&allglock);
3184 runtime_unlock(&runtime_sched);
3187 // Put mp on midle list.
3188 // Sched must be locked.
3189 static void
3190 mput(M *mp)
3192 mp->schedlink = (uintptr)runtime_sched.midle;
3193 runtime_sched.midle = mp;
3194 runtime_sched.nmidle++;
3195 checkdead();
3198 // Try to get an m from midle list.
3199 // Sched must be locked.
3200 static M*
3201 mget(void)
3203 M *mp;
3205 if((mp = runtime_sched.midle) != nil){
3206 runtime_sched.midle = (M*)mp->schedlink;
3207 runtime_sched.nmidle--;
3209 return mp;
3212 // Put gp on the global runnable queue.
3213 // Sched must be locked.
3214 static void
3215 globrunqput(G *gp)
3217 gp->schedlink = 0;
3218 if(runtime_sched.runqtail)
3219 runtime_sched.runqtail->schedlink = (uintptr)gp;
3220 else
3221 runtime_sched.runqhead = gp;
3222 runtime_sched.runqtail = gp;
3223 runtime_sched.runqsize++;
3226 // Put a batch of runnable goroutines on the global runnable queue.
3227 // Sched must be locked.
3228 static void
3229 globrunqputbatch(G *ghead, G *gtail, int32 n)
3231 gtail->schedlink = 0;
3232 if(runtime_sched.runqtail)
3233 runtime_sched.runqtail->schedlink = (uintptr)ghead;
3234 else
3235 runtime_sched.runqhead = ghead;
3236 runtime_sched.runqtail = gtail;
3237 runtime_sched.runqsize += n;
3240 // Try get a batch of G's from the global runnable queue.
3241 // Sched must be locked.
3242 static G*
3243 globrunqget(P *p, int32 max)
3245 G *gp, *gp1;
3246 int32 n;
3248 if(runtime_sched.runqsize == 0)
3249 return nil;
3250 n = runtime_sched.runqsize/runtime_gomaxprocs+1;
3251 if(n > runtime_sched.runqsize)
3252 n = runtime_sched.runqsize;
3253 if(max > 0 && n > max)
3254 n = max;
3255 if((uint32)n > nelem(p->runq)/2)
3256 n = nelem(p->runq)/2;
3257 runtime_sched.runqsize -= n;
3258 if(runtime_sched.runqsize == 0)
3259 runtime_sched.runqtail = nil;
3260 gp = runtime_sched.runqhead;
3261 runtime_sched.runqhead = (G*)gp->schedlink;
3262 n--;
3263 while(n--) {
3264 gp1 = runtime_sched.runqhead;
3265 runtime_sched.runqhead = (G*)gp1->schedlink;
3266 runqput(p, gp1);
3268 return gp;
3271 // Put p to on pidle list.
3272 // Sched must be locked.
3273 static void
3274 pidleput(P *p)
3276 p->link = (uintptr)runtime_sched.pidle;
3277 runtime_sched.pidle = p;
3278 runtime_xadd(&runtime_sched.npidle, 1); // TODO: fast atomic
3281 // Try get a p from pidle list.
3282 // Sched must be locked.
3283 static P*
3284 pidleget(void)
3286 P *p;
3288 p = runtime_sched.pidle;
3289 if(p) {
3290 runtime_sched.pidle = (P*)p->link;
3291 runtime_xadd(&runtime_sched.npidle, -1); // TODO: fast atomic
3293 return p;
3296 // Try to put g on local runnable queue.
3297 // If it's full, put onto global queue.
3298 // Executed only by the owner P.
3299 static void
3300 runqput(P *p, G *gp)
3302 uint32 h, t;
3304 retry:
3305 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
3306 t = p->runqtail;
3307 if(t - h < nelem(p->runq)) {
3308 p->runq[t%nelem(p->runq)] = (uintptr)gp;
3309 runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption
3310 return;
3312 if(runqputslow(p, gp, h, t))
3313 return;
3314 // the queue is not full, now the put above must suceed
3315 goto retry;
3318 // Put g and a batch of work from local runnable queue on global queue.
3319 // Executed only by the owner P.
3320 static bool
3321 runqputslow(P *p, G *gp, uint32 h, uint32 t)
3323 G *batch[nelem(p->runq)/2+1];
3324 uint32 n, i;
3326 // First, grab a batch from local queue.
3327 n = t-h;
3328 n = n/2;
3329 if(n != nelem(p->runq)/2)
3330 runtime_throw("runqputslow: queue is not full");
3331 for(i=0; i<n; i++)
3332 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3333 if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
3334 return false;
3335 batch[n] = gp;
3336 // Link the goroutines.
3337 for(i=0; i<n; i++)
3338 batch[i]->schedlink = (uintptr)batch[i+1];
3339 // Now put the batch on global queue.
3340 runtime_lock(&runtime_sched);
3341 globrunqputbatch(batch[0], batch[n], n+1);
3342 runtime_unlock(&runtime_sched);
3343 return true;
3346 // Get g from local runnable queue.
3347 // Executed only by the owner P.
3348 static G*
3349 runqget(P *p)
3351 G *gp;
3352 uint32 t, h;
3354 for(;;) {
3355 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
3356 t = p->runqtail;
3357 if(t == h)
3358 return nil;
3359 gp = (G*)p->runq[h%nelem(p->runq)];
3360 if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume
3361 return gp;
3365 // Grabs a batch of goroutines from local runnable queue.
3366 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3367 // Can be executed by any P.
3368 static uint32
3369 runqgrab(P *p, G **batch)
3371 uint32 t, h, n, i;
3373 for(;;) {
3374 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
3375 t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer
3376 n = t-h;
3377 n = n - n/2;
3378 if(n == 0)
3379 break;
3380 if(n > nelem(p->runq)/2) // read inconsistent h and t
3381 continue;
3382 for(i=0; i<n; i++)
3383 batch[i] = (G*)p->runq[(h+i)%nelem(p->runq)];
3384 if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
3385 break;
3387 return n;
3390 // Steal half of elements from local runnable queue of p2
3391 // and put onto local runnable queue of p.
3392 // Returns one of the stolen elements (or nil if failed).
3393 static G*
3394 runqsteal(P *p, P *p2)
3396 G *gp;
3397 G *batch[nelem(p->runq)/2];
3398 uint32 t, h, n, i;
3400 n = runqgrab(p2, batch);
3401 if(n == 0)
3402 return nil;
3403 n--;
3404 gp = batch[n];
3405 if(n == 0)
3406 return gp;
3407 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
3408 t = p->runqtail;
3409 if(t - h + n >= nelem(p->runq))
3410 runtime_throw("runqsteal: runq overflow");
3411 for(i=0; i<n; i++, t++)
3412 p->runq[t%nelem(p->runq)] = (uintptr)batch[i];
3413 runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption
3414 return gp;
3417 void runtime_testSchedLocalQueue(void)
3418 __asm__("runtime.testSchedLocalQueue");
3420 void
3421 runtime_testSchedLocalQueue(void)
3423 P p;
3424 G gs[nelem(p.runq)];
3425 int32 i, j;
3427 runtime_memclr((byte*)&p, sizeof(p));
3429 for(i = 0; i < (int32)nelem(gs); i++) {
3430 if(runqget(&p) != nil)
3431 runtime_throw("runq is not empty initially");
3432 for(j = 0; j < i; j++)
3433 runqput(&p, &gs[i]);
3434 for(j = 0; j < i; j++) {
3435 if(runqget(&p) != &gs[i]) {
3436 runtime_printf("bad element at iter %d/%d\n", i, j);
3437 runtime_throw("bad element");
3440 if(runqget(&p) != nil)
3441 runtime_throw("runq is not empty afterwards");
3445 void runtime_testSchedLocalQueueSteal(void)
3446 __asm__("runtime.testSchedLocalQueueSteal");
3448 void
3449 runtime_testSchedLocalQueueSteal(void)
3451 P p1, p2;
3452 G gs[nelem(p1.runq)], *gp;
3453 int32 i, j, s;
3455 runtime_memclr((byte*)&p1, sizeof(p1));
3456 runtime_memclr((byte*)&p2, sizeof(p2));
3458 for(i = 0; i < (int32)nelem(gs); i++) {
3459 for(j = 0; j < i; j++) {
3460 gs[j].sig = 0;
3461 runqput(&p1, &gs[j]);
3463 gp = runqsteal(&p2, &p1);
3464 s = 0;
3465 if(gp) {
3466 s++;
3467 gp->sig++;
3469 while((gp = runqget(&p2)) != nil) {
3470 s++;
3471 gp->sig++;
3473 while((gp = runqget(&p1)) != nil)
3474 gp->sig++;
3475 for(j = 0; j < i; j++) {
3476 if(gs[j].sig != 1) {
3477 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3478 runtime_throw("bad element");
3481 if(s != i/2 && s != i/2+1) {
3482 runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3483 s, i/2, i/2+1, i);
3484 runtime_throw("bad steal");
3489 intgo
3490 runtime_setmaxthreads(intgo in)
3492 intgo out;
3494 runtime_lock(&runtime_sched);
3495 out = (intgo)runtime_sched.maxmcount;
3496 runtime_sched.maxmcount = (int32)in;
3497 checkmcount();
3498 runtime_unlock(&runtime_sched);
3499 return out;
3502 void
3503 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
3505 enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
3506 enqueue1(wbufp, (Obj){(byte*)&runtime_main_init_done, sizeof runtime_main_init_done, 0});
3509 // Return whether we are waiting for a GC. This gc toolchain uses
3510 // preemption instead.
3511 bool
3512 runtime_gcwaiting(void)
3514 return runtime_sched.gcwaiting;
3517 // os_beforeExit is called from os.Exit(0).
3518 //go:linkname os_beforeExit os.runtime_beforeExit
3520 extern void os_beforeExit() __asm__ (GOSYM_PREFIX "os.runtime_beforeExit");
3522 void
3523 os_beforeExit()
3527 // Active spinning for sync.Mutex.
3528 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3530 enum
3532 ACTIVE_SPIN = 4,
3533 ACTIVE_SPIN_CNT = 30,
3536 extern _Bool sync_runtime_canSpin(intgo i)
3537 __asm__ (GOSYM_PREFIX "sync.runtime_canSpin");
3539 _Bool
3540 sync_runtime_canSpin(intgo i)
3542 P *p;
3544 // sync.Mutex is cooperative, so we are conservative with spinning.
3545 // Spin only few times and only if running on a multicore machine and
3546 // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3547 // As opposed to runtime mutex we don't do passive spinning here,
3548 // because there can be work on global runq on on other Ps.
3549 if (i >= ACTIVE_SPIN || runtime_ncpu <= 1 || runtime_gomaxprocs <= (int32)(runtime_sched.npidle+runtime_sched.nmspinning)+1) {
3550 return false;
3552 p = (P*)g->m->p;
3553 return p != nil && p->runqhead == p->runqtail;
3556 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3557 //go:nosplit
3559 extern void sync_runtime_doSpin(void)
3560 __asm__ (GOSYM_PREFIX "sync.runtime_doSpin");
3562 void
3563 sync_runtime_doSpin()
3565 runtime_procyield(ACTIVE_SPIN_CNT);
3568 // For Go code to look at variables, until we port proc.go.
3570 extern M** runtime_go_allm(void)
3571 __asm__ (GOSYM_PREFIX "runtime.allm");
3574 runtime_go_allm()
3576 return &runtime_allm;
3579 extern Slice runtime_go_allgs(void)
3580 __asm__ (GOSYM_PREFIX "runtime.allgs");
3582 Slice
3583 runtime_go_allgs()
3585 Slice s;
3587 s.__values = runtime_allg;
3588 s.__count = runtime_allglen;
3589 s.__capacity = allgcap;
3590 return s;