PR go/61498
[official-gcc.git] / libgo / runtime / proc.c
blobd21308447171ad0b96eed2fe406a3ad7d6dff44f
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 #include <limits.h>
6 #include <signal.h>
7 #include <stdlib.h>
8 #include <pthread.h>
9 #include <unistd.h>
11 #include "config.h"
13 #ifdef HAVE_DL_ITERATE_PHDR
14 #include <link.h>
15 #endif
17 #include "runtime.h"
18 #include "arch.h"
19 #include "defs.h"
20 #include "malloc.h"
21 #include "race.h"
22 #include "go-type.h"
23 #include "go-defer.h"
25 #ifdef USING_SPLIT_STACK
27 /* FIXME: These are not declared anywhere. */
29 extern void __splitstack_getcontext(void *context[10]);
31 extern void __splitstack_setcontext(void *context[10]);
33 extern void *__splitstack_makecontext(size_t, void *context[10], size_t *);
35 extern void * __splitstack_resetcontext(void *context[10], size_t *);
37 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
38 void **);
40 extern void __splitstack_block_signals (int *, int *);
42 extern void __splitstack_block_signals_context (void *context[10], int *,
43 int *);
45 #endif
47 #ifndef PTHREAD_STACK_MIN
48 # define PTHREAD_STACK_MIN 8192
49 #endif
51 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
52 # define StackMin PTHREAD_STACK_MIN
53 #else
54 # define StackMin 2 * 1024 * 1024
55 #endif
57 uintptr runtime_stacks_sys;
59 static void gtraceback(G*);
61 #ifdef __rtems__
62 #define __thread
63 #endif
65 static __thread G *g;
66 static __thread M *m;
68 #ifndef SETCONTEXT_CLOBBERS_TLS
70 static inline void
71 initcontext(void)
75 static inline void
76 fixcontext(ucontext_t *c __attribute__ ((unused)))
80 #else
82 # if defined(__x86_64__) && defined(__sun__)
84 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
85 // register to that of the thread which called getcontext. The effect
86 // is that the address of all __thread variables changes. This bug
87 // also affects pthread_self() and pthread_getspecific. We work
88 // around it by clobbering the context field directly to keep %fs the
89 // same.
91 static __thread greg_t fs;
93 static inline void
94 initcontext(void)
96 ucontext_t c;
98 getcontext(&c);
99 fs = c.uc_mcontext.gregs[REG_FSBASE];
102 static inline void
103 fixcontext(ucontext_t* c)
105 c->uc_mcontext.gregs[REG_FSBASE] = fs;
108 # elif defined(__NetBSD__)
110 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
111 // and restore it ourselves.
113 static __thread __greg_t tlsbase;
115 static inline void
116 initcontext(void)
118 ucontext_t c;
120 getcontext(&c);
121 tlsbase = c.uc_mcontext._mc_tlsbase;
124 static inline void
125 fixcontext(ucontext_t* c)
127 c->uc_mcontext._mc_tlsbase = tlsbase;
130 # else
132 # error unknown case for SETCONTEXT_CLOBBERS_TLS
134 # endif
136 #endif
138 // We can not always refer to the TLS variables directly. The
139 // compiler will call tls_get_addr to get the address of the variable,
140 // and it may hold it in a register across a call to schedule. When
141 // we get back from the call we may be running in a different thread,
142 // in which case the register now points to the TLS variable for a
143 // different thread. We use non-inlinable functions to avoid this
144 // when necessary.
146 G* runtime_g(void) __attribute__ ((noinline, no_split_stack));
149 runtime_g(void)
151 return g;
154 M* runtime_m(void) __attribute__ ((noinline, no_split_stack));
157 runtime_m(void)
159 return m;
162 // Set m and g.
163 void
164 runtime_setmg(M* mp, G* gp)
166 m = mp;
167 g = gp;
170 // The static TLS size. See runtime_newm.
171 static int tlssize;
173 // Start a new thread.
174 static void
175 runtime_newosproc(M *mp)
177 pthread_attr_t attr;
178 size_t stacksize;
179 sigset_t clear, old;
180 pthread_t tid;
181 int ret;
183 if(pthread_attr_init(&attr) != 0)
184 runtime_throw("pthread_attr_init");
185 if(pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED) != 0)
186 runtime_throw("pthread_attr_setdetachstate");
188 stacksize = PTHREAD_STACK_MIN;
190 // With glibc before version 2.16 the static TLS size is taken
191 // out of the stack size, and we get an error or a crash if
192 // there is not enough stack space left. Add it back in if we
193 // can, in case the program uses a lot of TLS space. FIXME:
194 // This can be disabled in glibc 2.16 and later, if the bug is
195 // indeed fixed then.
196 stacksize += tlssize;
198 if(pthread_attr_setstacksize(&attr, stacksize) != 0)
199 runtime_throw("pthread_attr_setstacksize");
201 // Block signals during pthread_create so that the new thread
202 // starts with signals disabled. It will enable them in minit.
203 sigfillset(&clear);
205 #ifdef SIGTRAP
206 // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
207 sigdelset(&clear, SIGTRAP);
208 #endif
210 sigemptyset(&old);
211 pthread_sigmask(SIG_BLOCK, &clear, &old);
212 ret = pthread_create(&tid, &attr, runtime_mstart, mp);
213 pthread_sigmask(SIG_SETMASK, &old, nil);
215 if (ret != 0)
216 runtime_throw("pthread_create");
219 // First function run by a new goroutine. This replaces gogocall.
220 static void
221 kickoff(void)
223 void (*fn)(void*);
225 if(g->traceback != nil)
226 gtraceback(g);
228 fn = (void (*)(void*))(g->entry);
229 fn(g->param);
230 runtime_goexit();
233 // Switch context to a different goroutine. This is like longjmp.
234 void runtime_gogo(G*) __attribute__ ((noinline));
235 void
236 runtime_gogo(G* newg)
238 #ifdef USING_SPLIT_STACK
239 __splitstack_setcontext(&newg->stack_context[0]);
240 #endif
241 g = newg;
242 newg->fromgogo = true;
243 fixcontext(&newg->context);
244 setcontext(&newg->context);
245 runtime_throw("gogo setcontext returned");
248 // Save context and call fn passing g as a parameter. This is like
249 // setjmp. Because getcontext always returns 0, unlike setjmp, we use
250 // g->fromgogo as a code. It will be true if we got here via
251 // setcontext. g == nil the first time this is called in a new m.
252 void runtime_mcall(void (*)(G*)) __attribute__ ((noinline));
253 void
254 runtime_mcall(void (*pfn)(G*))
256 M *mp;
257 G *gp;
259 // Ensure that all registers are on the stack for the garbage
260 // collector.
261 __builtin_unwind_init();
263 mp = m;
264 gp = g;
265 if(gp == mp->g0)
266 runtime_throw("runtime: mcall called on m->g0 stack");
268 if(gp != nil) {
270 #ifdef USING_SPLIT_STACK
271 __splitstack_getcontext(&g->stack_context[0]);
272 #else
273 gp->gcnext_sp = &pfn;
274 #endif
275 gp->fromgogo = false;
276 getcontext(&gp->context);
278 // When we return from getcontext, we may be running
279 // in a new thread. That means that m and g may have
280 // changed. They are global variables so we will
281 // reload them, but the addresses of m and g may be
282 // cached in our local stack frame, and those
283 // addresses may be wrong. Call functions to reload
284 // the values for this thread.
285 mp = runtime_m();
286 gp = runtime_g();
288 if(gp->traceback != nil)
289 gtraceback(gp);
291 if (gp == nil || !gp->fromgogo) {
292 #ifdef USING_SPLIT_STACK
293 __splitstack_setcontext(&mp->g0->stack_context[0]);
294 #endif
295 mp->g0->entry = (byte*)pfn;
296 mp->g0->param = gp;
298 // It's OK to set g directly here because this case
299 // can not occur if we got here via a setcontext to
300 // the getcontext call just above.
301 g = mp->g0;
303 fixcontext(&mp->g0->context);
304 setcontext(&mp->g0->context);
305 runtime_throw("runtime: mcall function returned");
309 #ifdef HAVE_DL_ITERATE_PHDR
311 // Called via dl_iterate_phdr.
313 static int
314 addtls(struct dl_phdr_info* info, size_t size __attribute__ ((unused)), void *data)
316 size_t *total = (size_t *)data;
317 unsigned int i;
319 for(i = 0; i < info->dlpi_phnum; ++i) {
320 if(info->dlpi_phdr[i].p_type == PT_TLS)
321 *total += info->dlpi_phdr[i].p_memsz;
323 return 0;
326 // Set the total TLS size.
328 static void
329 inittlssize()
331 size_t total = 0;
333 dl_iterate_phdr(addtls, (void *)&total);
334 tlssize = total;
337 #else
339 static void
340 inittlssize()
344 #endif
346 // Goroutine scheduler
347 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
349 // The main concepts are:
350 // G - goroutine.
351 // M - worker thread, or machine.
352 // P - processor, a resource that is required to execute Go code.
353 // M must have an associated P to execute Go code, however it can be
354 // blocked or in a syscall w/o an associated P.
356 // Design doc at http://golang.org/s/go11sched.
358 typedef struct Sched Sched;
359 struct Sched {
360 Lock;
362 uint64 goidgen;
363 M* midle; // idle m's waiting for work
364 int32 nmidle; // number of idle m's waiting for work
365 int32 nmidlelocked; // number of locked m's waiting for work
366 int32 mcount; // number of m's that have been created
367 int32 maxmcount; // maximum number of m's allowed (or die)
369 P* pidle; // idle P's
370 uint32 npidle;
371 uint32 nmspinning;
373 // Global runnable queue.
374 G* runqhead;
375 G* runqtail;
376 int32 runqsize;
378 // Global cache of dead G's.
379 Lock gflock;
380 G* gfree;
382 uint32 gcwaiting; // gc is waiting to run
383 int32 stopwait;
384 Note stopnote;
385 uint32 sysmonwait;
386 Note sysmonnote;
387 uint64 lastpoll;
389 int32 profilehz; // cpu profiling rate
392 enum
394 // The max value of GOMAXPROCS.
395 // There are no fundamental restrictions on the value.
396 MaxGomaxprocs = 1<<8,
398 // Number of goroutine ids to grab from runtime_sched.goidgen to local per-P cache at once.
399 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
400 GoidCacheBatch = 16,
403 Sched runtime_sched;
404 int32 runtime_gomaxprocs;
405 uint32 runtime_needextram = 1;
406 bool runtime_iscgo = true;
407 M runtime_m0;
408 G runtime_g0; // idle goroutine for m0
409 G* runtime_lastg;
410 M* runtime_allm;
411 P** runtime_allp;
412 M* runtime_extram;
413 int8* runtime_goos;
414 int32 runtime_ncpu;
415 bool runtime_precisestack;
416 static int32 newprocs;
418 static Lock allglock; // the following vars are protected by this lock or by stoptheworld
419 G** runtime_allg;
420 uintptr runtime_allglen;
421 static uintptr allgcap;
423 void* runtime_mstart(void*);
424 static void runqput(P*, G*);
425 static G* runqget(P*);
426 static bool runqputslow(P*, G*, uint32, uint32);
427 static G* runqsteal(P*, P*);
428 static void mput(M*);
429 static M* mget(void);
430 static void mcommoninit(M*);
431 static void schedule(void);
432 static void procresize(int32);
433 static void acquirep(P*);
434 static P* releasep(void);
435 static void newm(void(*)(void), P*);
436 static void stopm(void);
437 static void startm(P*, bool);
438 static void handoffp(P*);
439 static void wakep(void);
440 static void stoplockedm(void);
441 static void startlockedm(G*);
442 static void sysmon(void);
443 static uint32 retake(int64);
444 static void incidlelocked(int32);
445 static void checkdead(void);
446 static void exitsyscall0(G*);
447 static void park0(G*);
448 static void goexit0(G*);
449 static void gfput(P*, G*);
450 static G* gfget(P*);
451 static void gfpurge(P*);
452 static void globrunqput(G*);
453 static void globrunqputbatch(G*, G*, int32);
454 static G* globrunqget(P*, int32);
455 static P* pidleget(void);
456 static void pidleput(P*);
457 static void injectglist(G*);
458 static bool preemptall(void);
459 static bool exitsyscallfast(void);
460 static void allgadd(G*);
462 // The bootstrap sequence is:
464 // call osinit
465 // call schedinit
466 // make & queue new G
467 // call runtime_mstart
469 // The new G calls runtime_main.
470 void
471 runtime_schedinit(void)
473 int32 n, procs;
474 const byte *p;
475 Eface i;
477 m = &runtime_m0;
478 g = &runtime_g0;
479 m->g0 = g;
480 m->curg = g;
481 g->m = m;
483 initcontext();
484 inittlssize();
486 runtime_sched.maxmcount = 10000;
487 runtime_precisestack = 0;
489 runtime_mallocinit();
490 mcommoninit(m);
492 // Initialize the itable value for newErrorCString,
493 // so that the next time it gets called, possibly
494 // in a fault during a garbage collection, it will not
495 // need to allocated memory.
496 runtime_newErrorCString(0, &i);
498 runtime_goargs();
499 runtime_goenvs();
500 runtime_parsedebugvars();
502 runtime_sched.lastpoll = runtime_nanotime();
503 procs = 1;
504 p = runtime_getenv("GOMAXPROCS");
505 if(p != nil && (n = runtime_atoi(p)) > 0) {
506 if(n > MaxGomaxprocs)
507 n = MaxGomaxprocs;
508 procs = n;
510 runtime_allp = runtime_malloc((MaxGomaxprocs+1)*sizeof(runtime_allp[0]));
511 procresize(procs);
513 // Can not enable GC until all roots are registered.
514 // mstats.enablegc = 1;
516 // if(raceenabled)
517 // g->racectx = runtime_raceinit();
520 extern void main_init(void) __asm__ (GOSYM_PREFIX "__go_init_main");
521 extern void main_main(void) __asm__ (GOSYM_PREFIX "main.main");
523 static void
524 initDone(void *arg __attribute__ ((unused))) {
525 runtime_unlockOSThread();
528 // The main goroutine.
529 void
530 runtime_main(void* dummy __attribute__((unused)))
532 Defer d;
533 _Bool frame;
535 newm(sysmon, nil);
537 // Lock the main goroutine onto this, the main OS thread,
538 // during initialization. Most programs won't care, but a few
539 // do require certain calls to be made by the main thread.
540 // Those can arrange for main.main to run in the main thread
541 // by calling runtime.LockOSThread during initialization
542 // to preserve the lock.
543 runtime_lockOSThread();
545 // Defer unlock so that runtime.Goexit during init does the unlock too.
546 d.__pfn = initDone;
547 d.__next = g->defer;
548 d.__arg = (void*)-1;
549 d.__panic = g->panic;
550 d.__retaddr = nil;
551 d.__makefunc_can_recover = 0;
552 d.__frame = &frame;
553 d.__special = true;
554 g->defer = &d;
556 if(m != &runtime_m0)
557 runtime_throw("runtime_main not on m0");
558 __go_go(runtime_MHeap_Scavenger, nil);
559 main_init();
561 if(g->defer != &d || d.__pfn != initDone)
562 runtime_throw("runtime: bad defer entry after init");
563 g->defer = d.__next;
564 runtime_unlockOSThread();
566 // For gccgo we have to wait until after main is initialized
567 // to enable GC, because initializing main registers the GC
568 // roots.
569 mstats.enablegc = 1;
571 main_main();
572 if(raceenabled)
573 runtime_racefini();
575 // Make racy client program work: if panicking on
576 // another goroutine at the same time as main returns,
577 // let the other goroutine finish printing the panic trace.
578 // Once it does, it will exit. See issue 3934.
579 if(runtime_panicking)
580 runtime_park(nil, nil, "panicwait");
582 runtime_exit(0);
583 for(;;)
584 *(int32*)0 = 0;
587 void
588 runtime_goroutineheader(G *gp)
590 const char *status;
591 int64 waitfor;
593 switch(gp->status) {
594 case Gidle:
595 status = "idle";
596 break;
597 case Grunnable:
598 status = "runnable";
599 break;
600 case Grunning:
601 status = "running";
602 break;
603 case Gsyscall:
604 status = "syscall";
605 break;
606 case Gwaiting:
607 if(gp->waitreason)
608 status = gp->waitreason;
609 else
610 status = "waiting";
611 break;
612 default:
613 status = "???";
614 break;
617 // approx time the G is blocked, in minutes
618 waitfor = 0;
619 if((gp->status == Gwaiting || gp->status == Gsyscall) && gp->waitsince != 0)
620 waitfor = (runtime_nanotime() - gp->waitsince) / (60LL*1000*1000*1000);
622 if(waitfor < 1)
623 runtime_printf("goroutine %D [%s]:\n", gp->goid, status);
624 else
625 runtime_printf("goroutine %D [%s, %D minutes]:\n", gp->goid, status, waitfor);
628 void
629 runtime_printcreatedby(G *g)
631 if(g != nil && g->gopc != 0 && g->goid != 1) {
632 String fn;
633 String file;
634 intgo line;
636 if(__go_file_line(g->gopc - 1, &fn, &file, &line)) {
637 runtime_printf("created by %S\n", fn);
638 runtime_printf("\t%S:%D\n", file, (int64) line);
643 struct Traceback
645 G* gp;
646 Location locbuf[TracebackMaxFrames];
647 int32 c;
650 void
651 runtime_tracebackothers(G * volatile me)
653 G * volatile gp;
654 Traceback tb;
655 int32 traceback;
656 volatile uintptr i;
658 tb.gp = me;
659 traceback = runtime_gotraceback(nil);
661 // Show the current goroutine first, if we haven't already.
662 if((gp = m->curg) != nil && gp != me) {
663 runtime_printf("\n");
664 runtime_goroutineheader(gp);
665 gp->traceback = &tb;
667 #ifdef USING_SPLIT_STACK
668 __splitstack_getcontext(&me->stack_context[0]);
669 #endif
670 getcontext(&me->context);
672 if(gp->traceback != nil) {
673 runtime_gogo(gp);
676 runtime_printtrace(tb.locbuf, tb.c, false);
677 runtime_printcreatedby(gp);
680 runtime_lock(&allglock);
681 for(i = 0; i < runtime_allglen; i++) {
682 gp = runtime_allg[i];
683 if(gp == me || gp == m->curg || gp->status == Gdead)
684 continue;
685 if(gp->issystem && traceback < 2)
686 continue;
687 runtime_printf("\n");
688 runtime_goroutineheader(gp);
690 // Our only mechanism for doing a stack trace is
691 // _Unwind_Backtrace. And that only works for the
692 // current thread, not for other random goroutines.
693 // So we need to switch context to the goroutine, get
694 // the backtrace, and then switch back.
696 // This means that if g is running or in a syscall, we
697 // can't reliably print a stack trace. FIXME.
699 if(gp->status == Grunning) {
700 runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
701 runtime_printcreatedby(gp);
702 } else if(gp->status == Gsyscall) {
703 runtime_printf("\tgoroutine in C code; stack unavailable\n");
704 runtime_printcreatedby(gp);
705 } else {
706 gp->traceback = &tb;
708 #ifdef USING_SPLIT_STACK
709 __splitstack_getcontext(&me->stack_context[0]);
710 #endif
711 getcontext(&me->context);
713 if(gp->traceback != nil) {
714 runtime_gogo(gp);
717 runtime_printtrace(tb.locbuf, tb.c, false);
718 runtime_printcreatedby(gp);
721 runtime_unlock(&allglock);
724 static void
725 checkmcount(void)
727 // sched lock is held
728 if(runtime_sched.mcount > runtime_sched.maxmcount) {
729 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched.maxmcount);
730 runtime_throw("thread exhaustion");
734 // Do a stack trace of gp, and then restore the context to
735 // gp->dotraceback.
737 static void
738 gtraceback(G* gp)
740 Traceback* traceback;
742 traceback = gp->traceback;
743 gp->traceback = nil;
744 traceback->c = runtime_callers(1, traceback->locbuf,
745 sizeof traceback->locbuf / sizeof traceback->locbuf[0]);
746 runtime_gogo(traceback->gp);
749 static void
750 mcommoninit(M *mp)
752 // If there is no mcache runtime_callers() will crash,
753 // and we are most likely in sysmon thread so the stack is senseless anyway.
754 if(m->mcache)
755 runtime_callers(1, mp->createstack, nelem(mp->createstack));
757 mp->fastrand = 0x49f6428aUL + mp->id + runtime_cputicks();
759 runtime_lock(&runtime_sched);
760 mp->id = runtime_sched.mcount++;
761 checkmcount();
762 runtime_mpreinit(mp);
764 // Add to runtime_allm so garbage collector doesn't free m
765 // when it is just in a register or thread-local storage.
766 mp->alllink = runtime_allm;
767 // runtime_NumCgoCall() iterates over allm w/o schedlock,
768 // so we need to publish it safely.
769 runtime_atomicstorep(&runtime_allm, mp);
770 runtime_unlock(&runtime_sched);
773 // Mark gp ready to run.
774 void
775 runtime_ready(G *gp)
777 // Mark runnable.
778 m->locks++; // disable preemption because it can be holding p in a local var
779 if(gp->status != Gwaiting) {
780 runtime_printf("goroutine %D has status %d\n", gp->goid, gp->status);
781 runtime_throw("bad g->status in ready");
783 gp->status = Grunnable;
784 runqput(m->p, gp);
785 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0) // TODO: fast atomic
786 wakep();
787 m->locks--;
790 int32
791 runtime_gcprocs(void)
793 int32 n;
795 // Figure out how many CPUs to use during GC.
796 // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
797 runtime_lock(&runtime_sched);
798 n = runtime_gomaxprocs;
799 if(n > runtime_ncpu)
800 n = runtime_ncpu > 0 ? runtime_ncpu : 1;
801 if(n > MaxGcproc)
802 n = MaxGcproc;
803 if(n > runtime_sched.nmidle+1) // one M is currently running
804 n = runtime_sched.nmidle+1;
805 runtime_unlock(&runtime_sched);
806 return n;
809 static bool
810 needaddgcproc(void)
812 int32 n;
814 runtime_lock(&runtime_sched);
815 n = runtime_gomaxprocs;
816 if(n > runtime_ncpu)
817 n = runtime_ncpu;
818 if(n > MaxGcproc)
819 n = MaxGcproc;
820 n -= runtime_sched.nmidle+1; // one M is currently running
821 runtime_unlock(&runtime_sched);
822 return n > 0;
825 void
826 runtime_helpgc(int32 nproc)
828 M *mp;
829 int32 n, pos;
831 runtime_lock(&runtime_sched);
832 pos = 0;
833 for(n = 1; n < nproc; n++) { // one M is currently running
834 if(runtime_allp[pos]->mcache == m->mcache)
835 pos++;
836 mp = mget();
837 if(mp == nil)
838 runtime_throw("runtime_gcprocs inconsistency");
839 mp->helpgc = n;
840 mp->mcache = runtime_allp[pos]->mcache;
841 pos++;
842 runtime_notewakeup(&mp->park);
844 runtime_unlock(&runtime_sched);
847 // Similar to stoptheworld but best-effort and can be called several times.
848 // There is no reverse operation, used during crashing.
849 // This function must not lock any mutexes.
850 void
851 runtime_freezetheworld(void)
853 int32 i;
855 if(runtime_gomaxprocs == 1)
856 return;
857 // stopwait and preemption requests can be lost
858 // due to races with concurrently executing threads,
859 // so try several times
860 for(i = 0; i < 5; i++) {
861 // this should tell the scheduler to not start any new goroutines
862 runtime_sched.stopwait = 0x7fffffff;
863 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
864 // this should stop running goroutines
865 if(!preemptall())
866 break; // no running goroutines
867 runtime_usleep(1000);
869 // to be sure
870 runtime_usleep(1000);
871 preemptall();
872 runtime_usleep(1000);
875 void
876 runtime_stoptheworld(void)
878 int32 i;
879 uint32 s;
880 P *p;
881 bool wait;
883 runtime_lock(&runtime_sched);
884 runtime_sched.stopwait = runtime_gomaxprocs;
885 runtime_atomicstore((uint32*)&runtime_sched.gcwaiting, 1);
886 preemptall();
887 // stop current P
888 m->p->status = Pgcstop;
889 runtime_sched.stopwait--;
890 // try to retake all P's in Psyscall status
891 for(i = 0; i < runtime_gomaxprocs; i++) {
892 p = runtime_allp[i];
893 s = p->status;
894 if(s == Psyscall && runtime_cas(&p->status, s, Pgcstop))
895 runtime_sched.stopwait--;
897 // stop idle P's
898 while((p = pidleget()) != nil) {
899 p->status = Pgcstop;
900 runtime_sched.stopwait--;
902 wait = runtime_sched.stopwait > 0;
903 runtime_unlock(&runtime_sched);
905 // wait for remaining P's to stop voluntarily
906 if(wait) {
907 runtime_notesleep(&runtime_sched.stopnote);
908 runtime_noteclear(&runtime_sched.stopnote);
910 if(runtime_sched.stopwait)
911 runtime_throw("stoptheworld: not stopped");
912 for(i = 0; i < runtime_gomaxprocs; i++) {
913 p = runtime_allp[i];
914 if(p->status != Pgcstop)
915 runtime_throw("stoptheworld: not stopped");
919 static void
920 mhelpgc(void)
922 m->helpgc = -1;
925 void
926 runtime_starttheworld(void)
928 P *p, *p1;
929 M *mp;
930 G *gp;
931 bool add;
933 m->locks++; // disable preemption because it can be holding p in a local var
934 gp = runtime_netpoll(false); // non-blocking
935 injectglist(gp);
936 add = needaddgcproc();
937 runtime_lock(&runtime_sched);
938 if(newprocs) {
939 procresize(newprocs);
940 newprocs = 0;
941 } else
942 procresize(runtime_gomaxprocs);
943 runtime_sched.gcwaiting = 0;
945 p1 = nil;
946 while((p = pidleget()) != nil) {
947 // procresize() puts p's with work at the beginning of the list.
948 // Once we reach a p without a run queue, the rest don't have one either.
949 if(p->runqhead == p->runqtail) {
950 pidleput(p);
951 break;
953 p->m = mget();
954 p->link = p1;
955 p1 = p;
957 if(runtime_sched.sysmonwait) {
958 runtime_sched.sysmonwait = false;
959 runtime_notewakeup(&runtime_sched.sysmonnote);
961 runtime_unlock(&runtime_sched);
963 while(p1) {
964 p = p1;
965 p1 = p1->link;
966 if(p->m) {
967 mp = p->m;
968 p->m = nil;
969 if(mp->nextp)
970 runtime_throw("starttheworld: inconsistent mp->nextp");
971 mp->nextp = p;
972 runtime_notewakeup(&mp->park);
973 } else {
974 // Start M to run P. Do not start another M below.
975 newm(nil, p);
976 add = false;
980 if(add) {
981 // If GC could have used another helper proc, start one now,
982 // in the hope that it will be available next time.
983 // It would have been even better to start it before the collection,
984 // but doing so requires allocating memory, so it's tricky to
985 // coordinate. This lazy approach works out in practice:
986 // we don't mind if the first couple gc rounds don't have quite
987 // the maximum number of procs.
988 newm(mhelpgc, nil);
990 m->locks--;
993 // Called to start an M.
994 void*
995 runtime_mstart(void* mp)
997 m = (M*)mp;
998 g = m->g0;
1000 initcontext();
1002 g->entry = nil;
1003 g->param = nil;
1005 // Record top of stack for use by mcall.
1006 // Once we call schedule we're never coming back,
1007 // so other calls can reuse this stack space.
1008 #ifdef USING_SPLIT_STACK
1009 __splitstack_getcontext(&g->stack_context[0]);
1010 #else
1011 g->gcinitial_sp = &mp;
1012 // Setting gcstack_size to 0 is a marker meaning that gcinitial_sp
1013 // is the top of the stack, not the bottom.
1014 g->gcstack_size = 0;
1015 g->gcnext_sp = &mp;
1016 #endif
1017 getcontext(&g->context);
1019 if(g->entry != nil) {
1020 // Got here from mcall.
1021 void (*pfn)(G*) = (void (*)(G*))g->entry;
1022 G* gp = (G*)g->param;
1023 pfn(gp);
1024 *(int*)0x21 = 0x21;
1026 runtime_minit();
1028 #ifdef USING_SPLIT_STACK
1030 int dont_block_signals = 0;
1031 __splitstack_block_signals(&dont_block_signals, nil);
1033 #endif
1035 // Install signal handlers; after minit so that minit can
1036 // prepare the thread to be able to handle the signals.
1037 if(m == &runtime_m0)
1038 runtime_initsig();
1040 if(m->mstartfn)
1041 m->mstartfn();
1043 if(m->helpgc) {
1044 m->helpgc = 0;
1045 stopm();
1046 } else if(m != &runtime_m0) {
1047 acquirep(m->nextp);
1048 m->nextp = nil;
1050 schedule();
1052 // TODO(brainman): This point is never reached, because scheduler
1053 // does not release os threads at the moment. But once this path
1054 // is enabled, we must remove our seh here.
1056 return nil;
1059 typedef struct CgoThreadStart CgoThreadStart;
1060 struct CgoThreadStart
1062 M *m;
1063 G *g;
1064 uintptr *tls;
1065 void (*fn)(void);
1068 // Allocate a new m unassociated with any thread.
1069 // Can use p for allocation context if needed.
1071 runtime_allocm(P *p, int32 stacksize, byte** ret_g0_stack, size_t* ret_g0_stacksize)
1073 M *mp;
1075 m->locks++; // disable GC because it can be called from sysmon
1076 if(m->p == nil)
1077 acquirep(p); // temporarily borrow p for mallocs in this function
1078 #if 0
1079 if(mtype == nil) {
1080 Eface e;
1081 runtime_gc_m_ptr(&e);
1082 mtype = ((const PtrType*)e.__type_descriptor)->__element_type;
1084 #endif
1086 mp = runtime_mal(sizeof *mp);
1087 mcommoninit(mp);
1088 mp->g0 = runtime_malg(stacksize, ret_g0_stack, ret_g0_stacksize);
1090 if(p == m->p)
1091 releasep();
1092 m->locks--;
1094 return mp;
1097 static M* lockextra(bool nilokay);
1098 static void unlockextra(M*);
1100 // needm is called when a cgo callback happens on a
1101 // thread without an m (a thread not created by Go).
1102 // In this case, needm is expected to find an m to use
1103 // and return with m, g initialized correctly.
1104 // Since m and g are not set now (likely nil, but see below)
1105 // needm is limited in what routines it can call. In particular
1106 // it can only call nosplit functions (textflag 7) and cannot
1107 // do any scheduling that requires an m.
1109 // In order to avoid needing heavy lifting here, we adopt
1110 // the following strategy: there is a stack of available m's
1111 // that can be stolen. Using compare-and-swap
1112 // to pop from the stack has ABA races, so we simulate
1113 // a lock by doing an exchange (via casp) to steal the stack
1114 // head and replace the top pointer with MLOCKED (1).
1115 // This serves as a simple spin lock that we can use even
1116 // without an m. The thread that locks the stack in this way
1117 // unlocks the stack by storing a valid stack head pointer.
1119 // In order to make sure that there is always an m structure
1120 // available to be stolen, we maintain the invariant that there
1121 // is always one more than needed. At the beginning of the
1122 // program (if cgo is in use) the list is seeded with a single m.
1123 // If needm finds that it has taken the last m off the list, its job
1124 // is - once it has installed its own m so that it can do things like
1125 // allocate memory - to create a spare m and put it on the list.
1127 // Each of these extra m's also has a g0 and a curg that are
1128 // pressed into service as the scheduling stack and current
1129 // goroutine for the duration of the cgo callback.
1131 // When the callback is done with the m, it calls dropm to
1132 // put the m back on the list.
1134 // Unlike the gc toolchain, we start running on curg, since we are
1135 // just going to return and let the caller continue.
1136 void
1137 runtime_needm(void)
1139 M *mp;
1141 if(runtime_needextram) {
1142 // Can happen if C/C++ code calls Go from a global ctor.
1143 // Can not throw, because scheduler is not initialized yet.
1144 int rv __attribute__((unused));
1145 rv = runtime_write(2, "fatal error: cgo callback before cgo call\n",
1146 sizeof("fatal error: cgo callback before cgo call\n")-1);
1147 runtime_exit(1);
1150 // Lock extra list, take head, unlock popped list.
1151 // nilokay=false is safe here because of the invariant above,
1152 // that the extra list always contains or will soon contain
1153 // at least one m.
1154 mp = lockextra(false);
1156 // Set needextram when we've just emptied the list,
1157 // so that the eventual call into cgocallbackg will
1158 // allocate a new m for the extra list. We delay the
1159 // allocation until then so that it can be done
1160 // after exitsyscall makes sure it is okay to be
1161 // running at all (that is, there's no garbage collection
1162 // running right now).
1163 mp->needextram = mp->schedlink == nil;
1164 unlockextra(mp->schedlink);
1166 // Install m and g (= m->curg).
1167 runtime_setmg(mp, mp->curg);
1169 // Initialize g's context as in mstart.
1170 initcontext();
1171 g->status = Gsyscall;
1172 g->entry = nil;
1173 g->param = nil;
1174 #ifdef USING_SPLIT_STACK
1175 __splitstack_getcontext(&g->stack_context[0]);
1176 #else
1177 g->gcinitial_sp = &mp;
1178 g->gcstack_size = 0;
1179 g->gcnext_sp = &mp;
1180 #endif
1181 getcontext(&g->context);
1183 if(g->entry != nil) {
1184 // Got here from mcall.
1185 void (*pfn)(G*) = (void (*)(G*))g->entry;
1186 G* gp = (G*)g->param;
1187 pfn(gp);
1188 *(int*)0x22 = 0x22;
1191 // Initialize this thread to use the m.
1192 runtime_minit();
1194 #ifdef USING_SPLIT_STACK
1196 int dont_block_signals = 0;
1197 __splitstack_block_signals(&dont_block_signals, nil);
1199 #endif
1202 // newextram allocates an m and puts it on the extra list.
1203 // It is called with a working local m, so that it can do things
1204 // like call schedlock and allocate.
1205 void
1206 runtime_newextram(void)
1208 M *mp, *mnext;
1209 G *gp;
1210 byte *g0_sp, *sp;
1211 size_t g0_spsize, spsize;
1213 // Create extra goroutine locked to extra m.
1214 // The goroutine is the context in which the cgo callback will run.
1215 // The sched.pc will never be returned to, but setting it to
1216 // runtime.goexit makes clear to the traceback routines where
1217 // the goroutine stack ends.
1218 mp = runtime_allocm(nil, StackMin, &g0_sp, &g0_spsize);
1219 gp = runtime_malg(StackMin, &sp, &spsize);
1220 gp->status = Gdead;
1221 mp->curg = gp;
1222 mp->locked = LockInternal;
1223 mp->lockedg = gp;
1224 gp->lockedm = mp;
1225 gp->goid = runtime_xadd64(&runtime_sched.goidgen, 1);
1226 // put on allg for garbage collector
1227 allgadd(gp);
1229 // The context for gp will be set up in runtime_needm. But
1230 // here we need to set up the context for g0.
1231 getcontext(&mp->g0->context);
1232 mp->g0->context.uc_stack.ss_sp = g0_sp;
1233 mp->g0->context.uc_stack.ss_size = g0_spsize;
1234 makecontext(&mp->g0->context, kickoff, 0);
1236 // Add m to the extra list.
1237 mnext = lockextra(true);
1238 mp->schedlink = mnext;
1239 unlockextra(mp);
1242 // dropm is called when a cgo callback has called needm but is now
1243 // done with the callback and returning back into the non-Go thread.
1244 // It puts the current m back onto the extra list.
1246 // The main expense here is the call to signalstack to release the
1247 // m's signal stack, and then the call to needm on the next callback
1248 // from this thread. It is tempting to try to save the m for next time,
1249 // which would eliminate both these costs, but there might not be
1250 // a next time: the current thread (which Go does not control) might exit.
1251 // If we saved the m for that thread, there would be an m leak each time
1252 // such a thread exited. Instead, we acquire and release an m on each
1253 // call. These should typically not be scheduling operations, just a few
1254 // atomics, so the cost should be small.
1256 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1257 // variable using pthread_key_create. Unlike the pthread keys we already use
1258 // on OS X, this dummy key would never be read by Go code. It would exist
1259 // only so that we could register at thread-exit-time destructor.
1260 // That destructor would put the m back onto the extra list.
1261 // This is purely a performance optimization. The current version,
1262 // in which dropm happens on each cgo call, is still correct too.
1263 // We may have to keep the current version on systems with cgo
1264 // but without pthreads, like Windows.
1265 void
1266 runtime_dropm(void)
1268 M *mp, *mnext;
1270 // Undo whatever initialization minit did during needm.
1271 runtime_unminit();
1273 // Clear m and g, and return m to the extra list.
1274 // After the call to setmg we can only call nosplit functions.
1275 mp = m;
1276 runtime_setmg(nil, nil);
1278 mp->curg->status = Gdead;
1280 mnext = lockextra(true);
1281 mp->schedlink = mnext;
1282 unlockextra(mp);
1285 #define MLOCKED ((M*)1)
1287 // lockextra locks the extra list and returns the list head.
1288 // The caller must unlock the list by storing a new list head
1289 // to runtime.extram. If nilokay is true, then lockextra will
1290 // return a nil list head if that's what it finds. If nilokay is false,
1291 // lockextra will keep waiting until the list head is no longer nil.
1292 static M*
1293 lockextra(bool nilokay)
1295 M *mp;
1296 void (*yield)(void);
1298 for(;;) {
1299 mp = runtime_atomicloadp(&runtime_extram);
1300 if(mp == MLOCKED) {
1301 yield = runtime_osyield;
1302 yield();
1303 continue;
1305 if(mp == nil && !nilokay) {
1306 runtime_usleep(1);
1307 continue;
1309 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1310 yield = runtime_osyield;
1311 yield();
1312 continue;
1314 break;
1316 return mp;
1319 static void
1320 unlockextra(M *mp)
1322 runtime_atomicstorep(&runtime_extram, mp);
1325 static int32
1326 countextra()
1328 M *mp, *mc;
1329 int32 c;
1331 for(;;) {
1332 mp = runtime_atomicloadp(&runtime_extram);
1333 if(mp == MLOCKED) {
1334 runtime_osyield();
1335 continue;
1337 if(!runtime_casp(&runtime_extram, mp, MLOCKED)) {
1338 runtime_osyield();
1339 continue;
1341 c = 0;
1342 for(mc = mp; mc != nil; mc = mc->schedlink)
1343 c++;
1344 runtime_atomicstorep(&runtime_extram, mp);
1345 return c;
1349 // Create a new m. It will start off with a call to fn, or else the scheduler.
1350 static void
1351 newm(void(*fn)(void), P *p)
1353 M *mp;
1355 mp = runtime_allocm(p, -1, nil, nil);
1356 mp->nextp = p;
1357 mp->mstartfn = fn;
1359 runtime_newosproc(mp);
1362 // Stops execution of the current m until new work is available.
1363 // Returns with acquired P.
1364 static void
1365 stopm(void)
1367 if(m->locks)
1368 runtime_throw("stopm holding locks");
1369 if(m->p)
1370 runtime_throw("stopm holding p");
1371 if(m->spinning) {
1372 m->spinning = false;
1373 runtime_xadd(&runtime_sched.nmspinning, -1);
1376 retry:
1377 runtime_lock(&runtime_sched);
1378 mput(m);
1379 runtime_unlock(&runtime_sched);
1380 runtime_notesleep(&m->park);
1381 runtime_noteclear(&m->park);
1382 if(m->helpgc) {
1383 runtime_gchelper();
1384 m->helpgc = 0;
1385 m->mcache = nil;
1386 goto retry;
1388 acquirep(m->nextp);
1389 m->nextp = nil;
1392 static void
1393 mspinning(void)
1395 m->spinning = true;
1398 // Schedules some M to run the p (creates an M if necessary).
1399 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1400 static void
1401 startm(P *p, bool spinning)
1403 M *mp;
1404 void (*fn)(void);
1406 runtime_lock(&runtime_sched);
1407 if(p == nil) {
1408 p = pidleget();
1409 if(p == nil) {
1410 runtime_unlock(&runtime_sched);
1411 if(spinning)
1412 runtime_xadd(&runtime_sched.nmspinning, -1);
1413 return;
1416 mp = mget();
1417 runtime_unlock(&runtime_sched);
1418 if(mp == nil) {
1419 fn = nil;
1420 if(spinning)
1421 fn = mspinning;
1422 newm(fn, p);
1423 return;
1425 if(mp->spinning)
1426 runtime_throw("startm: m is spinning");
1427 if(mp->nextp)
1428 runtime_throw("startm: m has p");
1429 mp->spinning = spinning;
1430 mp->nextp = p;
1431 runtime_notewakeup(&mp->park);
1434 // Hands off P from syscall or locked M.
1435 static void
1436 handoffp(P *p)
1438 // if it has local work, start it straight away
1439 if(p->runqhead != p->runqtail || runtime_sched.runqsize) {
1440 startm(p, false);
1441 return;
1443 // no local work, check that there are no spinning/idle M's,
1444 // otherwise our help is not required
1445 if(runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) == 0 && // TODO: fast atomic
1446 runtime_cas(&runtime_sched.nmspinning, 0, 1)) {
1447 startm(p, true);
1448 return;
1450 runtime_lock(&runtime_sched);
1451 if(runtime_sched.gcwaiting) {
1452 p->status = Pgcstop;
1453 if(--runtime_sched.stopwait == 0)
1454 runtime_notewakeup(&runtime_sched.stopnote);
1455 runtime_unlock(&runtime_sched);
1456 return;
1458 if(runtime_sched.runqsize) {
1459 runtime_unlock(&runtime_sched);
1460 startm(p, false);
1461 return;
1463 // If this is the last running P and nobody is polling network,
1464 // need to wakeup another M to poll network.
1465 if(runtime_sched.npidle == (uint32)runtime_gomaxprocs-1 && runtime_atomicload64(&runtime_sched.lastpoll) != 0) {
1466 runtime_unlock(&runtime_sched);
1467 startm(p, false);
1468 return;
1470 pidleput(p);
1471 runtime_unlock(&runtime_sched);
1474 // Tries to add one more P to execute G's.
1475 // Called when a G is made runnable (newproc, ready).
1476 static void
1477 wakep(void)
1479 // be conservative about spinning threads
1480 if(!runtime_cas(&runtime_sched.nmspinning, 0, 1))
1481 return;
1482 startm(nil, true);
1485 // Stops execution of the current m that is locked to a g until the g is runnable again.
1486 // Returns with acquired P.
1487 static void
1488 stoplockedm(void)
1490 P *p;
1492 if(m->lockedg == nil || m->lockedg->lockedm != m)
1493 runtime_throw("stoplockedm: inconsistent locking");
1494 if(m->p) {
1495 // Schedule another M to run this p.
1496 p = releasep();
1497 handoffp(p);
1499 incidlelocked(1);
1500 // Wait until another thread schedules lockedg again.
1501 runtime_notesleep(&m->park);
1502 runtime_noteclear(&m->park);
1503 if(m->lockedg->status != Grunnable)
1504 runtime_throw("stoplockedm: not runnable");
1505 acquirep(m->nextp);
1506 m->nextp = nil;
1509 // Schedules the locked m to run the locked gp.
1510 static void
1511 startlockedm(G *gp)
1513 M *mp;
1514 P *p;
1516 mp = gp->lockedm;
1517 if(mp == m)
1518 runtime_throw("startlockedm: locked to me");
1519 if(mp->nextp)
1520 runtime_throw("startlockedm: m has p");
1521 // directly handoff current P to the locked m
1522 incidlelocked(-1);
1523 p = releasep();
1524 mp->nextp = p;
1525 runtime_notewakeup(&mp->park);
1526 stopm();
1529 // Stops the current m for stoptheworld.
1530 // Returns when the world is restarted.
1531 static void
1532 gcstopm(void)
1534 P *p;
1536 if(!runtime_sched.gcwaiting)
1537 runtime_throw("gcstopm: not waiting for gc");
1538 if(m->spinning) {
1539 m->spinning = false;
1540 runtime_xadd(&runtime_sched.nmspinning, -1);
1542 p = releasep();
1543 runtime_lock(&runtime_sched);
1544 p->status = Pgcstop;
1545 if(--runtime_sched.stopwait == 0)
1546 runtime_notewakeup(&runtime_sched.stopnote);
1547 runtime_unlock(&runtime_sched);
1548 stopm();
1551 // Schedules gp to run on the current M.
1552 // Never returns.
1553 static void
1554 execute(G *gp)
1556 int32 hz;
1558 if(gp->status != Grunnable) {
1559 runtime_printf("execute: bad g status %d\n", gp->status);
1560 runtime_throw("execute: bad g status");
1562 gp->status = Grunning;
1563 gp->waitsince = 0;
1564 m->p->schedtick++;
1565 m->curg = gp;
1566 gp->m = m;
1568 // Check whether the profiler needs to be turned on or off.
1569 hz = runtime_sched.profilehz;
1570 if(m->profilehz != hz)
1571 runtime_resetcpuprofiler(hz);
1573 runtime_gogo(gp);
1576 // Finds a runnable goroutine to execute.
1577 // Tries to steal from other P's, get g from global queue, poll network.
1578 static G*
1579 findrunnable(void)
1581 G *gp;
1582 P *p;
1583 int32 i;
1585 top:
1586 if(runtime_sched.gcwaiting) {
1587 gcstopm();
1588 goto top;
1590 // local runq
1591 gp = runqget(m->p);
1592 if(gp)
1593 return gp;
1594 // global runq
1595 if(runtime_sched.runqsize) {
1596 runtime_lock(&runtime_sched);
1597 gp = globrunqget(m->p, 0);
1598 runtime_unlock(&runtime_sched);
1599 if(gp)
1600 return gp;
1602 // poll network
1603 gp = runtime_netpoll(false); // non-blocking
1604 if(gp) {
1605 injectglist(gp->schedlink);
1606 gp->status = Grunnable;
1607 return gp;
1609 // If number of spinning M's >= number of busy P's, block.
1610 // This is necessary to prevent excessive CPU consumption
1611 // when GOMAXPROCS>>1 but the program parallelism is low.
1612 if(!m->spinning && 2 * runtime_atomicload(&runtime_sched.nmspinning) >= runtime_gomaxprocs - runtime_atomicload(&runtime_sched.npidle)) // TODO: fast atomic
1613 goto stop;
1614 if(!m->spinning) {
1615 m->spinning = true;
1616 runtime_xadd(&runtime_sched.nmspinning, 1);
1618 // random steal from other P's
1619 for(i = 0; i < 2*runtime_gomaxprocs; i++) {
1620 if(runtime_sched.gcwaiting)
1621 goto top;
1622 p = runtime_allp[runtime_fastrand1()%runtime_gomaxprocs];
1623 if(p == m->p)
1624 gp = runqget(p);
1625 else
1626 gp = runqsteal(m->p, p);
1627 if(gp)
1628 return gp;
1630 stop:
1631 // return P and block
1632 runtime_lock(&runtime_sched);
1633 if(runtime_sched.gcwaiting) {
1634 runtime_unlock(&runtime_sched);
1635 goto top;
1637 if(runtime_sched.runqsize) {
1638 gp = globrunqget(m->p, 0);
1639 runtime_unlock(&runtime_sched);
1640 return gp;
1642 p = releasep();
1643 pidleput(p);
1644 runtime_unlock(&runtime_sched);
1645 if(m->spinning) {
1646 m->spinning = false;
1647 runtime_xadd(&runtime_sched.nmspinning, -1);
1649 // check all runqueues once again
1650 for(i = 0; i < runtime_gomaxprocs; i++) {
1651 p = runtime_allp[i];
1652 if(p && p->runqhead != p->runqtail) {
1653 runtime_lock(&runtime_sched);
1654 p = pidleget();
1655 runtime_unlock(&runtime_sched);
1656 if(p) {
1657 acquirep(p);
1658 goto top;
1660 break;
1663 // poll network
1664 if(runtime_xchg64(&runtime_sched.lastpoll, 0) != 0) {
1665 if(m->p)
1666 runtime_throw("findrunnable: netpoll with p");
1667 if(m->spinning)
1668 runtime_throw("findrunnable: netpoll with spinning");
1669 gp = runtime_netpoll(true); // block until new work is available
1670 runtime_atomicstore64(&runtime_sched.lastpoll, runtime_nanotime());
1671 if(gp) {
1672 runtime_lock(&runtime_sched);
1673 p = pidleget();
1674 runtime_unlock(&runtime_sched);
1675 if(p) {
1676 acquirep(p);
1677 injectglist(gp->schedlink);
1678 gp->status = Grunnable;
1679 return gp;
1681 injectglist(gp);
1684 stopm();
1685 goto top;
1688 static void
1689 resetspinning(void)
1691 int32 nmspinning;
1693 if(m->spinning) {
1694 m->spinning = false;
1695 nmspinning = runtime_xadd(&runtime_sched.nmspinning, -1);
1696 if(nmspinning < 0)
1697 runtime_throw("findrunnable: negative nmspinning");
1698 } else
1699 nmspinning = runtime_atomicload(&runtime_sched.nmspinning);
1701 // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1702 // so see if we need to wakeup another P here.
1703 if (nmspinning == 0 && runtime_atomicload(&runtime_sched.npidle) > 0)
1704 wakep();
1707 // Injects the list of runnable G's into the scheduler.
1708 // Can run concurrently with GC.
1709 static void
1710 injectglist(G *glist)
1712 int32 n;
1713 G *gp;
1715 if(glist == nil)
1716 return;
1717 runtime_lock(&runtime_sched);
1718 for(n = 0; glist; n++) {
1719 gp = glist;
1720 glist = gp->schedlink;
1721 gp->status = Grunnable;
1722 globrunqput(gp);
1724 runtime_unlock(&runtime_sched);
1726 for(; n && runtime_sched.npidle; n--)
1727 startm(nil, false);
1730 // One round of scheduler: find a runnable goroutine and execute it.
1731 // Never returns.
1732 static void
1733 schedule(void)
1735 G *gp;
1736 uint32 tick;
1738 if(m->locks)
1739 runtime_throw("schedule: holding locks");
1741 top:
1742 if(runtime_sched.gcwaiting) {
1743 gcstopm();
1744 goto top;
1747 gp = nil;
1748 // Check the global runnable queue once in a while to ensure fairness.
1749 // Otherwise two goroutines can completely occupy the local runqueue
1750 // by constantly respawning each other.
1751 tick = m->p->schedtick;
1752 // This is a fancy way to say tick%61==0,
1753 // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1754 if(tick - (((uint64)tick*0x4325c53fu)>>36)*61 == 0 && runtime_sched.runqsize > 0) {
1755 runtime_lock(&runtime_sched);
1756 gp = globrunqget(m->p, 1);
1757 runtime_unlock(&runtime_sched);
1758 if(gp)
1759 resetspinning();
1761 if(gp == nil) {
1762 gp = runqget(m->p);
1763 if(gp && m->spinning)
1764 runtime_throw("schedule: spinning with local work");
1766 if(gp == nil) {
1767 gp = findrunnable(); // blocks until work is available
1768 resetspinning();
1771 if(gp->lockedm) {
1772 // Hands off own p to the locked m,
1773 // then blocks waiting for a new p.
1774 startlockedm(gp);
1775 goto top;
1778 execute(gp);
1781 // Puts the current goroutine into a waiting state and calls unlockf.
1782 // If unlockf returns false, the goroutine is resumed.
1783 void
1784 runtime_park(bool(*unlockf)(G*, void*), void *lock, const char *reason)
1786 m->waitlock = lock;
1787 m->waitunlockf = unlockf;
1788 g->waitreason = reason;
1789 runtime_mcall(park0);
1792 static bool
1793 parkunlock(G *gp, void *lock)
1795 USED(gp);
1796 runtime_unlock(lock);
1797 return true;
1800 // Puts the current goroutine into a waiting state and unlocks the lock.
1801 // The goroutine can be made runnable again by calling runtime_ready(gp).
1802 void
1803 runtime_parkunlock(Lock *lock, const char *reason)
1805 runtime_park(parkunlock, lock, reason);
1808 // runtime_park continuation on g0.
1809 static void
1810 park0(G *gp)
1812 bool ok;
1814 gp->status = Gwaiting;
1815 gp->m = nil;
1816 m->curg = nil;
1817 if(m->waitunlockf) {
1818 ok = m->waitunlockf(gp, m->waitlock);
1819 m->waitunlockf = nil;
1820 m->waitlock = nil;
1821 if(!ok) {
1822 gp->status = Grunnable;
1823 execute(gp); // Schedule it back, never returns.
1826 if(m->lockedg) {
1827 stoplockedm();
1828 execute(gp); // Never returns.
1830 schedule();
1833 // Scheduler yield.
1834 void
1835 runtime_gosched(void)
1837 runtime_mcall(runtime_gosched0);
1840 // runtime_gosched continuation on g0.
1841 void
1842 runtime_gosched0(G *gp)
1844 gp->status = Grunnable;
1845 gp->m = nil;
1846 m->curg = nil;
1847 runtime_lock(&runtime_sched);
1848 globrunqput(gp);
1849 runtime_unlock(&runtime_sched);
1850 if(m->lockedg) {
1851 stoplockedm();
1852 execute(gp); // Never returns.
1854 schedule();
1857 // Finishes execution of the current goroutine.
1858 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1859 // Since it does not return it does not matter. But if it is preempted
1860 // at the split stack check, GC will complain about inconsistent sp.
1861 void
1862 runtime_goexit(void)
1864 if(raceenabled)
1865 runtime_racegoend();
1866 runtime_mcall(goexit0);
1869 // runtime_goexit continuation on g0.
1870 static void
1871 goexit0(G *gp)
1873 gp->status = Gdead;
1874 gp->entry = nil;
1875 gp->m = nil;
1876 gp->lockedm = nil;
1877 m->curg = nil;
1878 m->lockedg = nil;
1879 if(m->locked & ~LockExternal) {
1880 runtime_printf("invalid m->locked = %d\n", m->locked);
1881 runtime_throw("internal lockOSThread error");
1883 m->locked = 0;
1884 gfput(m->p, gp);
1885 schedule();
1888 // The goroutine g is about to enter a system call.
1889 // Record that it's not using the cpu anymore.
1890 // This is called only from the go syscall library and cgocall,
1891 // not from the low-level system calls used by the runtime.
1893 // Entersyscall cannot split the stack: the runtime_gosave must
1894 // make g->sched refer to the caller's stack segment, because
1895 // entersyscall is going to return immediately after.
1897 void runtime_entersyscall(void) __attribute__ ((no_split_stack));
1898 static void doentersyscall(void) __attribute__ ((no_split_stack, noinline));
1900 void
1901 runtime_entersyscall()
1903 // Save the registers in the g structure so that any pointers
1904 // held in registers will be seen by the garbage collector.
1905 getcontext(&g->gcregs);
1907 // Do the work in a separate function, so that this function
1908 // doesn't save any registers on its own stack. If this
1909 // function does save any registers, we might store the wrong
1910 // value in the call to getcontext.
1912 // FIXME: This assumes that we do not need to save any
1913 // callee-saved registers to access the TLS variable g. We
1914 // don't want to put the ucontext_t on the stack because it is
1915 // large and we can not split the stack here.
1916 doentersyscall();
1919 static void
1920 doentersyscall()
1922 // Disable preemption because during this function g is in Gsyscall status,
1923 // but can have inconsistent g->sched, do not let GC observe it.
1924 m->locks++;
1926 // Leave SP around for GC and traceback.
1927 #ifdef USING_SPLIT_STACK
1928 g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1929 &g->gcnext_segment, &g->gcnext_sp,
1930 &g->gcinitial_sp);
1931 #else
1933 void *v;
1935 g->gcnext_sp = (byte *) &v;
1937 #endif
1939 g->status = Gsyscall;
1941 if(runtime_atomicload(&runtime_sched.sysmonwait)) { // TODO: fast atomic
1942 runtime_lock(&runtime_sched);
1943 if(runtime_atomicload(&runtime_sched.sysmonwait)) {
1944 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
1945 runtime_notewakeup(&runtime_sched.sysmonnote);
1947 runtime_unlock(&runtime_sched);
1950 m->mcache = nil;
1951 m->p->m = nil;
1952 runtime_atomicstore(&m->p->status, Psyscall);
1953 if(runtime_sched.gcwaiting) {
1954 runtime_lock(&runtime_sched);
1955 if (runtime_sched.stopwait > 0 && runtime_cas(&m->p->status, Psyscall, Pgcstop)) {
1956 if(--runtime_sched.stopwait == 0)
1957 runtime_notewakeup(&runtime_sched.stopnote);
1959 runtime_unlock(&runtime_sched);
1962 m->locks--;
1965 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
1966 void
1967 runtime_entersyscallblock(void)
1969 P *p;
1971 m->locks++; // see comment in entersyscall
1973 // Leave SP around for GC and traceback.
1974 #ifdef USING_SPLIT_STACK
1975 g->gcstack = __splitstack_find(nil, nil, &g->gcstack_size,
1976 &g->gcnext_segment, &g->gcnext_sp,
1977 &g->gcinitial_sp);
1978 #else
1979 g->gcnext_sp = (byte *) &p;
1980 #endif
1982 // Save the registers in the g structure so that any pointers
1983 // held in registers will be seen by the garbage collector.
1984 getcontext(&g->gcregs);
1986 g->status = Gsyscall;
1988 p = releasep();
1989 handoffp(p);
1990 if(g->isbackground) // do not consider blocked scavenger for deadlock detection
1991 incidlelocked(1);
1993 m->locks--;
1996 // The goroutine g exited its system call.
1997 // Arrange for it to run on a cpu again.
1998 // This is called only from the go syscall library, not
1999 // from the low-level system calls used by the runtime.
2000 void
2001 runtime_exitsyscall(void)
2003 G *gp;
2005 m->locks++; // see comment in entersyscall
2007 gp = g;
2008 if(gp->isbackground) // do not consider blocked scavenger for deadlock detection
2009 incidlelocked(-1);
2011 g->waitsince = 0;
2012 if(exitsyscallfast()) {
2013 // There's a cpu for us, so we can run.
2014 m->p->syscalltick++;
2015 gp->status = Grunning;
2016 // Garbage collector isn't running (since we are),
2017 // so okay to clear gcstack and gcsp.
2018 #ifdef USING_SPLIT_STACK
2019 gp->gcstack = nil;
2020 #endif
2021 gp->gcnext_sp = nil;
2022 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
2023 m->locks--;
2024 return;
2027 m->locks--;
2029 // Call the scheduler.
2030 runtime_mcall(exitsyscall0);
2032 // Scheduler returned, so we're allowed to run now.
2033 // Delete the gcstack information that we left for
2034 // the garbage collector during the system call.
2035 // Must wait until now because until gosched returns
2036 // we don't know for sure that the garbage collector
2037 // is not running.
2038 #ifdef USING_SPLIT_STACK
2039 gp->gcstack = nil;
2040 #endif
2041 gp->gcnext_sp = nil;
2042 runtime_memclr(&gp->gcregs, sizeof gp->gcregs);
2044 // Don't refer to m again, we might be running on a different
2045 // thread after returning from runtime_mcall.
2046 runtime_m()->p->syscalltick++;
2049 static bool
2050 exitsyscallfast(void)
2052 P *p;
2054 // Freezetheworld sets stopwait but does not retake P's.
2055 if(runtime_sched.stopwait) {
2056 m->p = nil;
2057 return false;
2060 // Try to re-acquire the last P.
2061 if(m->p && m->p->status == Psyscall && runtime_cas(&m->p->status, Psyscall, Prunning)) {
2062 // There's a cpu for us, so we can run.
2063 m->mcache = m->p->mcache;
2064 m->p->m = m;
2065 return true;
2067 // Try to get any other idle P.
2068 m->p = nil;
2069 if(runtime_sched.pidle) {
2070 runtime_lock(&runtime_sched);
2071 p = pidleget();
2072 if(p && runtime_atomicload(&runtime_sched.sysmonwait)) {
2073 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2074 runtime_notewakeup(&runtime_sched.sysmonnote);
2076 runtime_unlock(&runtime_sched);
2077 if(p) {
2078 acquirep(p);
2079 return true;
2082 return false;
2085 // runtime_exitsyscall slow path on g0.
2086 // Failed to acquire P, enqueue gp as runnable.
2087 static void
2088 exitsyscall0(G *gp)
2090 P *p;
2092 gp->status = Grunnable;
2093 gp->m = nil;
2094 m->curg = nil;
2095 runtime_lock(&runtime_sched);
2096 p = pidleget();
2097 if(p == nil)
2098 globrunqput(gp);
2099 else if(runtime_atomicload(&runtime_sched.sysmonwait)) {
2100 runtime_atomicstore(&runtime_sched.sysmonwait, 0);
2101 runtime_notewakeup(&runtime_sched.sysmonnote);
2103 runtime_unlock(&runtime_sched);
2104 if(p) {
2105 acquirep(p);
2106 execute(gp); // Never returns.
2108 if(m->lockedg) {
2109 // Wait until another thread schedules gp and so m again.
2110 stoplockedm();
2111 execute(gp); // Never returns.
2113 stopm();
2114 schedule(); // Never returns.
2117 // Called from syscall package before fork.
2118 void syscall_runtime_BeforeFork(void)
2119 __asm__(GOSYM_PREFIX "syscall.runtime_BeforeFork");
2120 void
2121 syscall_runtime_BeforeFork(void)
2123 // Fork can hang if preempted with signals frequently enough (see issue 5517).
2124 // Ensure that we stay on the same M where we disable profiling.
2125 m->locks++;
2126 if(m->profilehz != 0)
2127 runtime_resetcpuprofiler(0);
2130 // Called from syscall package after fork in parent.
2131 void syscall_runtime_AfterFork(void)
2132 __asm__(GOSYM_PREFIX "syscall.runtime_AfterFork");
2133 void
2134 syscall_runtime_AfterFork(void)
2136 int32 hz;
2138 hz = runtime_sched.profilehz;
2139 if(hz != 0)
2140 runtime_resetcpuprofiler(hz);
2141 m->locks--;
2144 // Allocate a new g, with a stack big enough for stacksize bytes.
2146 runtime_malg(int32 stacksize, byte** ret_stack, size_t* ret_stacksize)
2148 G *newg;
2150 newg = runtime_malloc(sizeof(G));
2151 if(stacksize >= 0) {
2152 #if USING_SPLIT_STACK
2153 int dont_block_signals = 0;
2155 *ret_stack = __splitstack_makecontext(stacksize,
2156 &newg->stack_context[0],
2157 ret_stacksize);
2158 __splitstack_block_signals_context(&newg->stack_context[0],
2159 &dont_block_signals, nil);
2160 #else
2161 *ret_stack = runtime_mallocgc(stacksize, 0, FlagNoProfiling|FlagNoGC);
2162 *ret_stacksize = stacksize;
2163 newg->gcinitial_sp = *ret_stack;
2164 newg->gcstack_size = stacksize;
2165 runtime_xadd(&runtime_stacks_sys, stacksize);
2166 #endif
2168 return newg;
2171 /* For runtime package testing. */
2174 // Create a new g running fn with siz bytes of arguments.
2175 // Put it on the queue of g's waiting to run.
2176 // The compiler turns a go statement into a call to this.
2177 // Cannot split the stack because it assumes that the arguments
2178 // are available sequentially after &fn; they would not be
2179 // copied if a stack split occurred. It's OK for this to call
2180 // functions that split the stack.
2181 void runtime_testing_entersyscall(void)
2182 __asm__ (GOSYM_PREFIX "runtime.entersyscall");
2183 void
2184 runtime_testing_entersyscall()
2186 runtime_entersyscall();
2189 void runtime_testing_exitsyscall(void)
2190 __asm__ (GOSYM_PREFIX "runtime.exitsyscall");
2192 void
2193 runtime_testing_exitsyscall()
2195 runtime_exitsyscall();
2199 __go_go(void (*fn)(void*), void* arg)
2201 byte *sp;
2202 size_t spsize;
2203 G *newg;
2204 P *p;
2206 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2207 m->locks++; // disable preemption because it can be holding p in a local var
2209 p = m->p;
2210 if((newg = gfget(p)) != nil) {
2211 #ifdef USING_SPLIT_STACK
2212 int dont_block_signals = 0;
2214 sp = __splitstack_resetcontext(&newg->stack_context[0],
2215 &spsize);
2216 __splitstack_block_signals_context(&newg->stack_context[0],
2217 &dont_block_signals, nil);
2218 #else
2219 sp = newg->gcinitial_sp;
2220 spsize = newg->gcstack_size;
2221 if(spsize == 0)
2222 runtime_throw("bad spsize in __go_go");
2223 newg->gcnext_sp = sp;
2224 #endif
2225 } else {
2226 newg = runtime_malg(StackMin, &sp, &spsize);
2227 allgadd(newg);
2230 newg->entry = (byte*)fn;
2231 newg->param = arg;
2232 newg->gopc = (uintptr)__builtin_return_address(0);
2233 newg->status = Grunnable;
2234 if(p->goidcache == p->goidcacheend) {
2235 p->goidcache = runtime_xadd64(&runtime_sched.goidgen, GoidCacheBatch);
2236 p->goidcacheend = p->goidcache + GoidCacheBatch;
2238 newg->goid = p->goidcache++;
2241 // Avoid warnings about variables clobbered by
2242 // longjmp.
2243 byte * volatile vsp = sp;
2244 size_t volatile vspsize = spsize;
2245 G * volatile vnewg = newg;
2247 getcontext(&vnewg->context);
2248 vnewg->context.uc_stack.ss_sp = vsp;
2249 #ifdef MAKECONTEXT_STACK_TOP
2250 vnewg->context.uc_stack.ss_sp += vspsize;
2251 #endif
2252 vnewg->context.uc_stack.ss_size = vspsize;
2253 makecontext(&vnewg->context, kickoff, 0);
2255 runqput(p, vnewg);
2257 if(runtime_atomicload(&runtime_sched.npidle) != 0 && runtime_atomicload(&runtime_sched.nmspinning) == 0 && fn != runtime_main) // TODO: fast atomic
2258 wakep();
2259 m->locks--;
2260 return vnewg;
2264 static void
2265 allgadd(G *gp)
2267 G **new;
2268 uintptr cap;
2270 runtime_lock(&allglock);
2271 if(runtime_allglen >= allgcap) {
2272 cap = 4096/sizeof(new[0]);
2273 if(cap < 2*allgcap)
2274 cap = 2*allgcap;
2275 new = runtime_malloc(cap*sizeof(new[0]));
2276 if(new == nil)
2277 runtime_throw("runtime: cannot allocate memory");
2278 if(runtime_allg != nil) {
2279 runtime_memmove(new, runtime_allg, runtime_allglen*sizeof(new[0]));
2280 runtime_free(runtime_allg);
2282 runtime_allg = new;
2283 allgcap = cap;
2285 runtime_allg[runtime_allglen++] = gp;
2286 runtime_unlock(&allglock);
2289 // Put on gfree list.
2290 // If local list is too long, transfer a batch to the global list.
2291 static void
2292 gfput(P *p, G *gp)
2294 gp->schedlink = p->gfree;
2295 p->gfree = gp;
2296 p->gfreecnt++;
2297 if(p->gfreecnt >= 64) {
2298 runtime_lock(&runtime_sched.gflock);
2299 while(p->gfreecnt >= 32) {
2300 p->gfreecnt--;
2301 gp = p->gfree;
2302 p->gfree = gp->schedlink;
2303 gp->schedlink = runtime_sched.gfree;
2304 runtime_sched.gfree = gp;
2306 runtime_unlock(&runtime_sched.gflock);
2310 // Get from gfree list.
2311 // If local list is empty, grab a batch from global list.
2312 static G*
2313 gfget(P *p)
2315 G *gp;
2317 retry:
2318 gp = p->gfree;
2319 if(gp == nil && runtime_sched.gfree) {
2320 runtime_lock(&runtime_sched.gflock);
2321 while(p->gfreecnt < 32 && runtime_sched.gfree) {
2322 p->gfreecnt++;
2323 gp = runtime_sched.gfree;
2324 runtime_sched.gfree = gp->schedlink;
2325 gp->schedlink = p->gfree;
2326 p->gfree = gp;
2328 runtime_unlock(&runtime_sched.gflock);
2329 goto retry;
2331 if(gp) {
2332 p->gfree = gp->schedlink;
2333 p->gfreecnt--;
2335 return gp;
2338 // Purge all cached G's from gfree list to the global list.
2339 static void
2340 gfpurge(P *p)
2342 G *gp;
2344 runtime_lock(&runtime_sched.gflock);
2345 while(p->gfreecnt) {
2346 p->gfreecnt--;
2347 gp = p->gfree;
2348 p->gfree = gp->schedlink;
2349 gp->schedlink = runtime_sched.gfree;
2350 runtime_sched.gfree = gp;
2352 runtime_unlock(&runtime_sched.gflock);
2355 void
2356 runtime_Breakpoint(void)
2358 runtime_breakpoint();
2361 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX "runtime.Gosched");
2363 void
2364 runtime_Gosched(void)
2366 runtime_gosched();
2369 // Implementation of runtime.GOMAXPROCS.
2370 // delete when scheduler is even stronger
2371 int32
2372 runtime_gomaxprocsfunc(int32 n)
2374 int32 ret;
2376 if(n > MaxGomaxprocs)
2377 n = MaxGomaxprocs;
2378 runtime_lock(&runtime_sched);
2379 ret = runtime_gomaxprocs;
2380 if(n <= 0 || n == ret) {
2381 runtime_unlock(&runtime_sched);
2382 return ret;
2384 runtime_unlock(&runtime_sched);
2386 runtime_semacquire(&runtime_worldsema, false);
2387 m->gcing = 1;
2388 runtime_stoptheworld();
2389 newprocs = n;
2390 m->gcing = 0;
2391 runtime_semrelease(&runtime_worldsema);
2392 runtime_starttheworld();
2394 return ret;
2397 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2398 // after they modify m->locked. Do not allow preemption during this call,
2399 // or else the m might be different in this function than in the caller.
2400 static void
2401 lockOSThread(void)
2403 m->lockedg = g;
2404 g->lockedm = m;
2407 void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.LockOSThread");
2408 void
2409 runtime_LockOSThread(void)
2411 m->locked |= LockExternal;
2412 lockOSThread();
2415 void
2416 runtime_lockOSThread(void)
2418 m->locked += LockInternal;
2419 lockOSThread();
2423 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2424 // after they update m->locked. Do not allow preemption during this call,
2425 // or else the m might be in different in this function than in the caller.
2426 static void
2427 unlockOSThread(void)
2429 if(m->locked != 0)
2430 return;
2431 m->lockedg = nil;
2432 g->lockedm = nil;
2435 void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX "runtime.UnlockOSThread");
2437 void
2438 runtime_UnlockOSThread(void)
2440 m->locked &= ~LockExternal;
2441 unlockOSThread();
2444 void
2445 runtime_unlockOSThread(void)
2447 if(m->locked < LockInternal)
2448 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2449 m->locked -= LockInternal;
2450 unlockOSThread();
2453 bool
2454 runtime_lockedOSThread(void)
2456 return g->lockedm != nil && m->lockedg != nil;
2459 // for testing of callbacks
2461 _Bool runtime_golockedOSThread(void)
2462 __asm__ (GOSYM_PREFIX "runtime.golockedOSThread");
2464 _Bool
2465 runtime_golockedOSThread(void)
2467 return runtime_lockedOSThread();
2470 intgo runtime_NumGoroutine (void)
2471 __asm__ (GOSYM_PREFIX "runtime.NumGoroutine");
2473 intgo
2474 runtime_NumGoroutine()
2476 return runtime_gcount();
2479 int32
2480 runtime_gcount(void)
2482 G *gp;
2483 int32 n, s;
2484 uintptr i;
2486 n = 0;
2487 runtime_lock(&allglock);
2488 // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2489 // We do not want to increment/decrement centralized counter in newproc/goexit,
2490 // just to make runtime.NumGoroutine() faster.
2491 // Compromise solution is to introduce per-P counters of active goroutines.
2492 for(i = 0; i < runtime_allglen; i++) {
2493 gp = runtime_allg[i];
2494 s = gp->status;
2495 if(s == Grunnable || s == Grunning || s == Gsyscall || s == Gwaiting)
2496 n++;
2498 runtime_unlock(&allglock);
2499 return n;
2502 int32
2503 runtime_mcount(void)
2505 return runtime_sched.mcount;
2508 static struct {
2509 Lock;
2510 void (*fn)(uintptr*, int32);
2511 int32 hz;
2512 uintptr pcbuf[TracebackMaxFrames];
2513 Location locbuf[TracebackMaxFrames];
2514 } prof;
2516 static void System(void) {}
2517 static void GC(void) {}
2519 // Called if we receive a SIGPROF signal.
2520 void
2521 runtime_sigprof()
2523 M *mp = m;
2524 int32 n, i;
2525 bool traceback;
2527 if(prof.fn == nil || prof.hz == 0)
2528 return;
2530 if(mp == nil)
2531 return;
2533 traceback = true;
2535 if(mp->mcache == nil)
2536 traceback = false;
2538 // Profiling runs concurrently with GC, so it must not allocate.
2539 mp->mallocing++;
2541 runtime_lock(&prof);
2542 if(prof.fn == nil) {
2543 runtime_unlock(&prof);
2544 mp->mallocing--;
2545 return;
2547 n = 0;
2549 if(runtime_atomicload(&runtime_in_callers) > 0) {
2550 // If SIGPROF arrived while already fetching runtime
2551 // callers we can have trouble on older systems
2552 // because the unwind library calls dl_iterate_phdr
2553 // which was not recursive in the past.
2554 traceback = false;
2557 if(traceback) {
2558 n = runtime_callers(0, prof.locbuf, nelem(prof.locbuf));
2559 for(i = 0; i < n; i++)
2560 prof.pcbuf[i] = prof.locbuf[i].pc;
2562 if(!traceback || n <= 0) {
2563 n = 2;
2564 prof.pcbuf[0] = (uintptr)runtime_getcallerpc(&n);
2565 if(mp->gcing || mp->helpgc)
2566 prof.pcbuf[1] = (uintptr)GC;
2567 else
2568 prof.pcbuf[1] = (uintptr)System;
2570 prof.fn(prof.pcbuf, n);
2571 runtime_unlock(&prof);
2572 mp->mallocing--;
2575 // Arrange to call fn with a traceback hz times a second.
2576 void
2577 runtime_setcpuprofilerate(void (*fn)(uintptr*, int32), int32 hz)
2579 // Force sane arguments.
2580 if(hz < 0)
2581 hz = 0;
2582 if(hz == 0)
2583 fn = nil;
2584 if(fn == nil)
2585 hz = 0;
2587 // Disable preemption, otherwise we can be rescheduled to another thread
2588 // that has profiling enabled.
2589 m->locks++;
2591 // Stop profiler on this thread so that it is safe to lock prof.
2592 // if a profiling signal came in while we had prof locked,
2593 // it would deadlock.
2594 runtime_resetcpuprofiler(0);
2596 runtime_lock(&prof);
2597 prof.fn = fn;
2598 prof.hz = hz;
2599 runtime_unlock(&prof);
2600 runtime_lock(&runtime_sched);
2601 runtime_sched.profilehz = hz;
2602 runtime_unlock(&runtime_sched);
2604 if(hz != 0)
2605 runtime_resetcpuprofiler(hz);
2607 m->locks--;
2610 // Change number of processors. The world is stopped, sched is locked.
2611 static void
2612 procresize(int32 new)
2614 int32 i, old;
2615 bool empty;
2616 G *gp;
2617 P *p;
2619 old = runtime_gomaxprocs;
2620 if(old < 0 || old > MaxGomaxprocs || new <= 0 || new >MaxGomaxprocs)
2621 runtime_throw("procresize: invalid arg");
2622 // initialize new P's
2623 for(i = 0; i < new; i++) {
2624 p = runtime_allp[i];
2625 if(p == nil) {
2626 p = (P*)runtime_mallocgc(sizeof(*p), 0, FlagNoInvokeGC);
2627 p->id = i;
2628 p->status = Pgcstop;
2629 runtime_atomicstorep(&runtime_allp[i], p);
2631 if(p->mcache == nil) {
2632 if(old==0 && i==0)
2633 p->mcache = m->mcache; // bootstrap
2634 else
2635 p->mcache = runtime_allocmcache();
2639 // redistribute runnable G's evenly
2640 // collect all runnable goroutines in global queue preserving FIFO order
2641 // FIFO order is required to ensure fairness even during frequent GCs
2642 // see http://golang.org/issue/7126
2643 empty = false;
2644 while(!empty) {
2645 empty = true;
2646 for(i = 0; i < old; i++) {
2647 p = runtime_allp[i];
2648 if(p->runqhead == p->runqtail)
2649 continue;
2650 empty = false;
2651 // pop from tail of local queue
2652 p->runqtail--;
2653 gp = p->runq[p->runqtail%nelem(p->runq)];
2654 // push onto head of global queue
2655 gp->schedlink = runtime_sched.runqhead;
2656 runtime_sched.runqhead = gp;
2657 if(runtime_sched.runqtail == nil)
2658 runtime_sched.runqtail = gp;
2659 runtime_sched.runqsize++;
2662 // fill local queues with at most nelem(p->runq)/2 goroutines
2663 // start at 1 because current M already executes some G and will acquire allp[0] below,
2664 // so if we have a spare G we want to put it into allp[1].
2665 for(i = 1; (uint32)i < (uint32)new * nelem(p->runq)/2 && runtime_sched.runqsize > 0; i++) {
2666 gp = runtime_sched.runqhead;
2667 runtime_sched.runqhead = gp->schedlink;
2668 if(runtime_sched.runqhead == nil)
2669 runtime_sched.runqtail = nil;
2670 runtime_sched.runqsize--;
2671 runqput(runtime_allp[i%new], gp);
2674 // free unused P's
2675 for(i = new; i < old; i++) {
2676 p = runtime_allp[i];
2677 runtime_freemcache(p->mcache);
2678 p->mcache = nil;
2679 gfpurge(p);
2680 p->status = Pdead;
2681 // can't free P itself because it can be referenced by an M in syscall
2684 if(m->p)
2685 m->p->m = nil;
2686 m->p = nil;
2687 m->mcache = nil;
2688 p = runtime_allp[0];
2689 p->m = nil;
2690 p->status = Pidle;
2691 acquirep(p);
2692 for(i = new-1; i > 0; i--) {
2693 p = runtime_allp[i];
2694 p->status = Pidle;
2695 pidleput(p);
2697 runtime_atomicstore((uint32*)&runtime_gomaxprocs, new);
2700 // Associate p and the current m.
2701 static void
2702 acquirep(P *p)
2704 if(m->p || m->mcache)
2705 runtime_throw("acquirep: already in go");
2706 if(p->m || p->status != Pidle) {
2707 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p->m, p->m ? p->m->id : 0, p->status);
2708 runtime_throw("acquirep: invalid p state");
2710 m->mcache = p->mcache;
2711 m->p = p;
2712 p->m = m;
2713 p->status = Prunning;
2716 // Disassociate p and the current m.
2717 static P*
2718 releasep(void)
2720 P *p;
2722 if(m->p == nil || m->mcache == nil)
2723 runtime_throw("releasep: invalid arg");
2724 p = m->p;
2725 if(p->m != m || p->mcache != m->mcache || p->status != Prunning) {
2726 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2727 m, m->p, p->m, m->mcache, p->mcache, p->status);
2728 runtime_throw("releasep: invalid p state");
2730 m->p = nil;
2731 m->mcache = nil;
2732 p->m = nil;
2733 p->status = Pidle;
2734 return p;
2737 static void
2738 incidlelocked(int32 v)
2740 runtime_lock(&runtime_sched);
2741 runtime_sched.nmidlelocked += v;
2742 if(v > 0)
2743 checkdead();
2744 runtime_unlock(&runtime_sched);
2747 // Check for deadlock situation.
2748 // The check is based on number of running M's, if 0 -> deadlock.
2749 static void
2750 checkdead(void)
2752 G *gp;
2753 int32 run, grunning, s;
2754 uintptr i;
2756 // -1 for sysmon
2757 run = runtime_sched.mcount - runtime_sched.nmidle - runtime_sched.nmidlelocked - 1 - countextra();
2758 if(run > 0)
2759 return;
2760 // If we are dying because of a signal caught on an already idle thread,
2761 // freezetheworld will cause all running threads to block.
2762 // And runtime will essentially enter into deadlock state,
2763 // except that there is a thread that will call runtime_exit soon.
2764 if(runtime_panicking > 0)
2765 return;
2766 if(run < 0) {
2767 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2768 runtime_sched.nmidle, runtime_sched.nmidlelocked, runtime_sched.mcount);
2769 runtime_throw("checkdead: inconsistent counts");
2771 grunning = 0;
2772 runtime_lock(&allglock);
2773 for(i = 0; i < runtime_allglen; i++) {
2774 gp = runtime_allg[i];
2775 if(gp->isbackground)
2776 continue;
2777 s = gp->status;
2778 if(s == Gwaiting)
2779 grunning++;
2780 else if(s == Grunnable || s == Grunning || s == Gsyscall) {
2781 runtime_unlock(&allglock);
2782 runtime_printf("runtime: checkdead: find g %D in status %d\n", gp->goid, s);
2783 runtime_throw("checkdead: runnable g");
2786 runtime_unlock(&allglock);
2787 if(grunning == 0) // possible if main goroutine calls runtime_Goexit()
2788 runtime_exit(0);
2789 m->throwing = -1; // do not dump full stacks
2790 runtime_throw("all goroutines are asleep - deadlock!");
2793 static void
2794 sysmon(void)
2796 uint32 idle, delay;
2797 int64 now, lastpoll, lasttrace;
2798 G *gp;
2800 lasttrace = 0;
2801 idle = 0; // how many cycles in succession we had not wokeup somebody
2802 delay = 0;
2803 for(;;) {
2804 if(idle == 0) // start with 20us sleep...
2805 delay = 20;
2806 else if(idle > 50) // start doubling the sleep after 1ms...
2807 delay *= 2;
2808 if(delay > 10*1000) // up to 10ms
2809 delay = 10*1000;
2810 runtime_usleep(delay);
2811 if(runtime_debug.schedtrace <= 0 &&
2812 (runtime_sched.gcwaiting || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs)) { // TODO: fast atomic
2813 runtime_lock(&runtime_sched);
2814 if(runtime_atomicload(&runtime_sched.gcwaiting) || runtime_atomicload(&runtime_sched.npidle) == (uint32)runtime_gomaxprocs) {
2815 runtime_atomicstore(&runtime_sched.sysmonwait, 1);
2816 runtime_unlock(&runtime_sched);
2817 runtime_notesleep(&runtime_sched.sysmonnote);
2818 runtime_noteclear(&runtime_sched.sysmonnote);
2819 idle = 0;
2820 delay = 20;
2821 } else
2822 runtime_unlock(&runtime_sched);
2824 // poll network if not polled for more than 10ms
2825 lastpoll = runtime_atomicload64(&runtime_sched.lastpoll);
2826 now = runtime_nanotime();
2827 if(lastpoll != 0 && lastpoll + 10*1000*1000 < now) {
2828 runtime_cas64(&runtime_sched.lastpoll, lastpoll, now);
2829 gp = runtime_netpoll(false); // non-blocking
2830 if(gp) {
2831 // Need to decrement number of idle locked M's
2832 // (pretending that one more is running) before injectglist.
2833 // Otherwise it can lead to the following situation:
2834 // injectglist grabs all P's but before it starts M's to run the P's,
2835 // another M returns from syscall, finishes running its G,
2836 // observes that there is no work to do and no other running M's
2837 // and reports deadlock.
2838 incidlelocked(-1);
2839 injectglist(gp);
2840 incidlelocked(1);
2843 // retake P's blocked in syscalls
2844 // and preempt long running G's
2845 if(retake(now))
2846 idle = 0;
2847 else
2848 idle++;
2850 if(runtime_debug.schedtrace > 0 && lasttrace + runtime_debug.schedtrace*1000000ll <= now) {
2851 lasttrace = now;
2852 runtime_schedtrace(runtime_debug.scheddetail);
2857 typedef struct Pdesc Pdesc;
2858 struct Pdesc
2860 uint32 schedtick;
2861 int64 schedwhen;
2862 uint32 syscalltick;
2863 int64 syscallwhen;
2865 static Pdesc pdesc[MaxGomaxprocs];
2867 static uint32
2868 retake(int64 now)
2870 uint32 i, s, n;
2871 int64 t;
2872 P *p;
2873 Pdesc *pd;
2875 n = 0;
2876 for(i = 0; i < (uint32)runtime_gomaxprocs; i++) {
2877 p = runtime_allp[i];
2878 if(p==nil)
2879 continue;
2880 pd = &pdesc[i];
2881 s = p->status;
2882 if(s == Psyscall) {
2883 // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
2884 t = p->syscalltick;
2885 if(pd->syscalltick != t) {
2886 pd->syscalltick = t;
2887 pd->syscallwhen = now;
2888 continue;
2890 // On the one hand we don't want to retake Ps if there is no other work to do,
2891 // but on the other hand we want to retake them eventually
2892 // because they can prevent the sysmon thread from deep sleep.
2893 if(p->runqhead == p->runqtail &&
2894 runtime_atomicload(&runtime_sched.nmspinning) + runtime_atomicload(&runtime_sched.npidle) > 0 &&
2895 pd->syscallwhen + 10*1000*1000 > now)
2896 continue;
2897 // Need to decrement number of idle locked M's
2898 // (pretending that one more is running) before the CAS.
2899 // Otherwise the M from which we retake can exit the syscall,
2900 // increment nmidle and report deadlock.
2901 incidlelocked(-1);
2902 if(runtime_cas(&p->status, s, Pidle)) {
2903 n++;
2904 handoffp(p);
2906 incidlelocked(1);
2907 } else if(s == Prunning) {
2908 // Preempt G if it's running for more than 10ms.
2909 t = p->schedtick;
2910 if(pd->schedtick != t) {
2911 pd->schedtick = t;
2912 pd->schedwhen = now;
2913 continue;
2915 if(pd->schedwhen + 10*1000*1000 > now)
2916 continue;
2917 // preemptone(p);
2920 return n;
2923 // Tell all goroutines that they have been preempted and they should stop.
2924 // This function is purely best-effort. It can fail to inform a goroutine if a
2925 // processor just started running it.
2926 // No locks need to be held.
2927 // Returns true if preemption request was issued to at least one goroutine.
2928 static bool
2929 preemptall(void)
2931 return false;
2934 void
2935 runtime_schedtrace(bool detailed)
2937 static int64 starttime;
2938 int64 now;
2939 int64 id1, id2, id3;
2940 int32 i, t, h;
2941 uintptr gi;
2942 const char *fmt;
2943 M *mp, *lockedm;
2944 G *gp, *lockedg;
2945 P *p;
2947 now = runtime_nanotime();
2948 if(starttime == 0)
2949 starttime = now;
2951 runtime_lock(&runtime_sched);
2952 runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
2953 (now-starttime)/1000000, runtime_gomaxprocs, runtime_sched.npidle, runtime_sched.mcount,
2954 runtime_sched.nmidle, runtime_sched.runqsize);
2955 if(detailed) {
2956 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
2957 runtime_sched.gcwaiting, runtime_sched.nmidlelocked, runtime_sched.nmspinning,
2958 runtime_sched.stopwait, runtime_sched.sysmonwait);
2960 // We must be careful while reading data from P's, M's and G's.
2961 // Even if we hold schedlock, most data can be changed concurrently.
2962 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
2963 for(i = 0; i < runtime_gomaxprocs; i++) {
2964 p = runtime_allp[i];
2965 if(p == nil)
2966 continue;
2967 mp = p->m;
2968 h = runtime_atomicload(&p->runqhead);
2969 t = runtime_atomicload(&p->runqtail);
2970 if(detailed)
2971 runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
2972 i, p->status, p->schedtick, p->syscalltick, mp ? mp->id : -1, t-h, p->gfreecnt);
2973 else {
2974 // In non-detailed mode format lengths of per-P run queues as:
2975 // [len1 len2 len3 len4]
2976 fmt = " %d";
2977 if(runtime_gomaxprocs == 1)
2978 fmt = " [%d]\n";
2979 else if(i == 0)
2980 fmt = " [%d";
2981 else if(i == runtime_gomaxprocs-1)
2982 fmt = " %d]\n";
2983 runtime_printf(fmt, t-h);
2986 if(!detailed) {
2987 runtime_unlock(&runtime_sched);
2988 return;
2990 for(mp = runtime_allm; mp; mp = mp->alllink) {
2991 p = mp->p;
2992 gp = mp->curg;
2993 lockedg = mp->lockedg;
2994 id1 = -1;
2995 if(p)
2996 id1 = p->id;
2997 id2 = -1;
2998 if(gp)
2999 id2 = gp->goid;
3000 id3 = -1;
3001 if(lockedg)
3002 id3 = lockedg->goid;
3003 runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
3004 " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
3005 mp->id, id1, id2,
3006 mp->mallocing, mp->throwing, mp->gcing, mp->locks, mp->dying, mp->helpgc,
3007 mp->spinning, m->blocked, id3);
3009 runtime_lock(&allglock);
3010 for(gi = 0; gi < runtime_allglen; gi++) {
3011 gp = runtime_allg[gi];
3012 mp = gp->m;
3013 lockedm = gp->lockedm;
3014 runtime_printf(" G%D: status=%d(%s) m=%d lockedm=%d\n",
3015 gp->goid, gp->status, gp->waitreason, mp ? mp->id : -1,
3016 lockedm ? lockedm->id : -1);
3018 runtime_unlock(&allglock);
3019 runtime_unlock(&runtime_sched);
3022 // Put mp on midle list.
3023 // Sched must be locked.
3024 static void
3025 mput(M *mp)
3027 mp->schedlink = runtime_sched.midle;
3028 runtime_sched.midle = mp;
3029 runtime_sched.nmidle++;
3030 checkdead();
3033 // Try to get an m from midle list.
3034 // Sched must be locked.
3035 static M*
3036 mget(void)
3038 M *mp;
3040 if((mp = runtime_sched.midle) != nil){
3041 runtime_sched.midle = mp->schedlink;
3042 runtime_sched.nmidle--;
3044 return mp;
3047 // Put gp on the global runnable queue.
3048 // Sched must be locked.
3049 static void
3050 globrunqput(G *gp)
3052 gp->schedlink = nil;
3053 if(runtime_sched.runqtail)
3054 runtime_sched.runqtail->schedlink = gp;
3055 else
3056 runtime_sched.runqhead = gp;
3057 runtime_sched.runqtail = gp;
3058 runtime_sched.runqsize++;
3061 // Put a batch of runnable goroutines on the global runnable queue.
3062 // Sched must be locked.
3063 static void
3064 globrunqputbatch(G *ghead, G *gtail, int32 n)
3066 gtail->schedlink = nil;
3067 if(runtime_sched.runqtail)
3068 runtime_sched.runqtail->schedlink = ghead;
3069 else
3070 runtime_sched.runqhead = ghead;
3071 runtime_sched.runqtail = gtail;
3072 runtime_sched.runqsize += n;
3075 // Try get a batch of G's from the global runnable queue.
3076 // Sched must be locked.
3077 static G*
3078 globrunqget(P *p, int32 max)
3080 G *gp, *gp1;
3081 int32 n;
3083 if(runtime_sched.runqsize == 0)
3084 return nil;
3085 n = runtime_sched.runqsize/runtime_gomaxprocs+1;
3086 if(n > runtime_sched.runqsize)
3087 n = runtime_sched.runqsize;
3088 if(max > 0 && n > max)
3089 n = max;
3090 if((uint32)n > nelem(p->runq)/2)
3091 n = nelem(p->runq)/2;
3092 runtime_sched.runqsize -= n;
3093 if(runtime_sched.runqsize == 0)
3094 runtime_sched.runqtail = nil;
3095 gp = runtime_sched.runqhead;
3096 runtime_sched.runqhead = gp->schedlink;
3097 n--;
3098 while(n--) {
3099 gp1 = runtime_sched.runqhead;
3100 runtime_sched.runqhead = gp1->schedlink;
3101 runqput(p, gp1);
3103 return gp;
3106 // Put p to on pidle list.
3107 // Sched must be locked.
3108 static void
3109 pidleput(P *p)
3111 p->link = runtime_sched.pidle;
3112 runtime_sched.pidle = p;
3113 runtime_xadd(&runtime_sched.npidle, 1); // TODO: fast atomic
3116 // Try get a p from pidle list.
3117 // Sched must be locked.
3118 static P*
3119 pidleget(void)
3121 P *p;
3123 p = runtime_sched.pidle;
3124 if(p) {
3125 runtime_sched.pidle = p->link;
3126 runtime_xadd(&runtime_sched.npidle, -1); // TODO: fast atomic
3128 return p;
3131 // Try to put g on local runnable queue.
3132 // If it's full, put onto global queue.
3133 // Executed only by the owner P.
3134 static void
3135 runqput(P *p, G *gp)
3137 uint32 h, t;
3139 retry:
3140 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
3141 t = p->runqtail;
3142 if(t - h < nelem(p->runq)) {
3143 p->runq[t%nelem(p->runq)] = gp;
3144 runtime_atomicstore(&p->runqtail, t+1); // store-release, makes the item available for consumption
3145 return;
3147 if(runqputslow(p, gp, h, t))
3148 return;
3149 // the queue is not full, now the put above must suceed
3150 goto retry;
3153 // Put g and a batch of work from local runnable queue on global queue.
3154 // Executed only by the owner P.
3155 static bool
3156 runqputslow(P *p, G *gp, uint32 h, uint32 t)
3158 G *batch[nelem(p->runq)/2+1];
3159 uint32 n, i;
3161 // First, grab a batch from local queue.
3162 n = t-h;
3163 n = n/2;
3164 if(n != nelem(p->runq)/2)
3165 runtime_throw("runqputslow: queue is not full");
3166 for(i=0; i<n; i++)
3167 batch[i] = p->runq[(h+i)%nelem(p->runq)];
3168 if(!runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
3169 return false;
3170 batch[n] = gp;
3171 // Link the goroutines.
3172 for(i=0; i<n; i++)
3173 batch[i]->schedlink = batch[i+1];
3174 // Now put the batch on global queue.
3175 runtime_lock(&runtime_sched);
3176 globrunqputbatch(batch[0], batch[n], n+1);
3177 runtime_unlock(&runtime_sched);
3178 return true;
3181 // Get g from local runnable queue.
3182 // Executed only by the owner P.
3183 static G*
3184 runqget(P *p)
3186 G *gp;
3187 uint32 t, h;
3189 for(;;) {
3190 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
3191 t = p->runqtail;
3192 if(t == h)
3193 return nil;
3194 gp = p->runq[h%nelem(p->runq)];
3195 if(runtime_cas(&p->runqhead, h, h+1)) // cas-release, commits consume
3196 return gp;
3200 // Grabs a batch of goroutines from local runnable queue.
3201 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3202 // Can be executed by any P.
3203 static uint32
3204 runqgrab(P *p, G **batch)
3206 uint32 t, h, n, i;
3208 for(;;) {
3209 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with other consumers
3210 t = runtime_atomicload(&p->runqtail); // load-acquire, synchronize with the producer
3211 n = t-h;
3212 n = n - n/2;
3213 if(n == 0)
3214 break;
3215 if(n > nelem(p->runq)/2) // read inconsistent h and t
3216 continue;
3217 for(i=0; i<n; i++)
3218 batch[i] = p->runq[(h+i)%nelem(p->runq)];
3219 if(runtime_cas(&p->runqhead, h, h+n)) // cas-release, commits consume
3220 break;
3222 return n;
3225 // Steal half of elements from local runnable queue of p2
3226 // and put onto local runnable queue of p.
3227 // Returns one of the stolen elements (or nil if failed).
3228 static G*
3229 runqsteal(P *p, P *p2)
3231 G *gp;
3232 G *batch[nelem(p->runq)/2];
3233 uint32 t, h, n, i;
3235 n = runqgrab(p2, batch);
3236 if(n == 0)
3237 return nil;
3238 n--;
3239 gp = batch[n];
3240 if(n == 0)
3241 return gp;
3242 h = runtime_atomicload(&p->runqhead); // load-acquire, synchronize with consumers
3243 t = p->runqtail;
3244 if(t - h + n >= nelem(p->runq))
3245 runtime_throw("runqsteal: runq overflow");
3246 for(i=0; i<n; i++, t++)
3247 p->runq[t%nelem(p->runq)] = batch[i];
3248 runtime_atomicstore(&p->runqtail, t); // store-release, makes the item available for consumption
3249 return gp;
3252 void runtime_testSchedLocalQueue(void)
3253 __asm__("runtime.testSchedLocalQueue");
3255 void
3256 runtime_testSchedLocalQueue(void)
3258 P p;
3259 G gs[nelem(p.runq)];
3260 int32 i, j;
3262 runtime_memclr((byte*)&p, sizeof(p));
3264 for(i = 0; i < (int32)nelem(gs); i++) {
3265 if(runqget(&p) != nil)
3266 runtime_throw("runq is not empty initially");
3267 for(j = 0; j < i; j++)
3268 runqput(&p, &gs[i]);
3269 for(j = 0; j < i; j++) {
3270 if(runqget(&p) != &gs[i]) {
3271 runtime_printf("bad element at iter %d/%d\n", i, j);
3272 runtime_throw("bad element");
3275 if(runqget(&p) != nil)
3276 runtime_throw("runq is not empty afterwards");
3280 void runtime_testSchedLocalQueueSteal(void)
3281 __asm__("runtime.testSchedLocalQueueSteal");
3283 void
3284 runtime_testSchedLocalQueueSteal(void)
3286 P p1, p2;
3287 G gs[nelem(p1.runq)], *gp;
3288 int32 i, j, s;
3290 runtime_memclr((byte*)&p1, sizeof(p1));
3291 runtime_memclr((byte*)&p2, sizeof(p2));
3293 for(i = 0; i < (int32)nelem(gs); i++) {
3294 for(j = 0; j < i; j++) {
3295 gs[j].sig = 0;
3296 runqput(&p1, &gs[j]);
3298 gp = runqsteal(&p2, &p1);
3299 s = 0;
3300 if(gp) {
3301 s++;
3302 gp->sig++;
3304 while((gp = runqget(&p2)) != nil) {
3305 s++;
3306 gp->sig++;
3308 while((gp = runqget(&p1)) != nil)
3309 gp->sig++;
3310 for(j = 0; j < i; j++) {
3311 if(gs[j].sig != 1) {
3312 runtime_printf("bad element %d(%d) at iter %d\n", j, gs[j].sig, i);
3313 runtime_throw("bad element");
3316 if(s != i/2 && s != i/2+1) {
3317 runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3318 s, i/2, i/2+1, i);
3319 runtime_throw("bad steal");
3324 intgo runtime_debug_setMaxThreads(intgo)
3325 __asm__(GOSYM_PREFIX "runtime_debug.setMaxThreads");
3327 intgo
3328 runtime_debug_setMaxThreads(intgo in)
3330 intgo out;
3332 runtime_lock(&runtime_sched);
3333 out = runtime_sched.maxmcount;
3334 runtime_sched.maxmcount = in;
3335 checkmcount();
3336 runtime_unlock(&runtime_sched);
3337 return out;
3340 void
3341 runtime_proc_scan(struct Workbuf** wbufp, void (*enqueue1)(struct Workbuf**, Obj))
3343 enqueue1(wbufp, (Obj){(byte*)&runtime_sched, sizeof runtime_sched, 0});
3346 // When a function calls a closure, it passes the closure value to
3347 // __go_set_closure immediately before the function call. When a
3348 // function uses a closure, it calls __go_get_closure immediately on
3349 // function entry. This is a hack, but it will work on any system.
3350 // It would be better to use the static chain register when there is
3351 // one. It is also worth considering expanding these functions
3352 // directly in the compiler.
3354 void
3355 __go_set_closure(void* v)
3357 g->closure = v;
3360 void *
3361 __go_get_closure(void)
3363 return g->closure;
3366 // Return whether we are waiting for a GC. This gc toolchain uses
3367 // preemption instead.
3368 bool
3369 runtime_gcwaiting(void)
3371 return runtime_sched.gcwaiting;
3374 // func runtime_procPin() int
3376 intgo sync_runtime_procPin(void)
3377 __asm__(GOSYM_PREFIX "sync.runtime_procPin");
3379 intgo
3380 sync_runtime_procPin()
3382 M *mp;
3384 mp = m;
3385 // Disable preemption.
3386 mp->locks++;
3387 return mp->p->id;
3390 // func runtime_procUnpin()
3392 void sync_runtime_procUnpin(void)
3393 __asm__ (GOSYM_PREFIX "sync.runtime_procUnpin");
3395 void
3396 sync_runtime_procUnpin(void)
3398 m->locks--;