1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
13 #ifdef HAVE_DL_ITERATE_PHDR
23 #ifdef USING_SPLIT_STACK
25 /* FIXME: These are not declared anywhere. */
27 extern void __splitstack_getcontext(void *context
[10]);
29 extern void __splitstack_setcontext(void *context
[10]);
31 extern void *__splitstack_makecontext(size_t, void *context
[10], size_t *);
33 extern void * __splitstack_resetcontext(void *context
[10], size_t *);
35 extern void *__splitstack_find(void *, void *, size_t *, void **, void **,
38 extern void __splitstack_block_signals (int *, int *);
40 extern void __splitstack_block_signals_context (void *context
[10], int *,
45 #ifndef PTHREAD_STACK_MIN
46 # define PTHREAD_STACK_MIN 8192
49 #if defined(USING_SPLIT_STACK) && defined(LINKER_SUPPORTS_SPLIT_STACK)
50 # define StackMin PTHREAD_STACK_MIN
52 # define StackMin ((sizeof(char *) < 8) ? 2 * 1024 * 1024 : 4 * 1024 * 1024)
55 uintptr runtime_stacks_sys
;
57 static void gtraceback(G
*);
65 #ifndef SETCONTEXT_CLOBBERS_TLS
73 fixcontext(ucontext_t
*c
__attribute__ ((unused
)))
79 # if defined(__x86_64__) && defined(__sun__)
81 // x86_64 Solaris 10 and 11 have a bug: setcontext switches the %fs
82 // register to that of the thread which called getcontext. The effect
83 // is that the address of all __thread variables changes. This bug
84 // also affects pthread_self() and pthread_getspecific. We work
85 // around it by clobbering the context field directly to keep %fs the
88 static __thread greg_t fs
;
96 fs
= c
.uc_mcontext
.gregs
[REG_FSBASE
];
100 fixcontext(ucontext_t
* c
)
102 c
->uc_mcontext
.gregs
[REG_FSBASE
] = fs
;
105 # elif defined(__NetBSD__)
107 // NetBSD has a bug: setcontext clobbers tlsbase, we need to save
108 // and restore it ourselves.
110 static __thread __greg_t tlsbase
;
118 tlsbase
= c
.uc_mcontext
._mc_tlsbase
;
122 fixcontext(ucontext_t
* c
)
124 c
->uc_mcontext
._mc_tlsbase
= tlsbase
;
127 # elif defined(__sparc__)
135 fixcontext(ucontext_t
*c
)
138 register unsigned long thread __asm__("%g7");
139 c->uc_mcontext.gregs[REG_G7] = thread;
141 error: variable ‘thread’ might be clobbered by \
142 ‘longjmp’ or ‘vfork’ [-Werror=clobbered]
143 which ought to be false, as %g7 is a fixed register. */
145 if (sizeof (c
->uc_mcontext
.gregs
[REG_G7
]) == 8)
146 asm ("stx %%g7, %0" : "=m"(c
->uc_mcontext
.gregs
[REG_G7
]));
148 asm ("st %%g7, %0" : "=m"(c
->uc_mcontext
.gregs
[REG_G7
]));
153 # error unknown case for SETCONTEXT_CLOBBERS_TLS
159 // ucontext_arg returns a properly aligned ucontext_t value. On some
160 // systems a ucontext_t value must be aligned to a 16-byte boundary.
161 // The g structure that has fields of type ucontext_t is defined in
162 // Go, and Go has no simple way to align a field to such a boundary.
163 // So we make the field larger in runtime2.go and pick an appropriate
164 // offset within the field here.
166 ucontext_arg(void** go_ucontext
)
168 uintptr_t p
= (uintptr_t)go_ucontext
;
169 size_t align
= __alignof__(ucontext_t
);
171 // We only ensured space for up to a 16 byte alignment
172 // in libgo/go/runtime/runtime2.go.
173 runtime_throw("required alignment of ucontext_t too large");
175 p
= (p
+ align
- 1) &~ (uintptr_t)(align
- 1);
176 return (ucontext_t
*)p
;
179 // We can not always refer to the TLS variables directly. The
180 // compiler will call tls_get_addr to get the address of the variable,
181 // and it may hold it in a register across a call to schedule. When
182 // we get back from the call we may be running in a different thread,
183 // in which case the register now points to the TLS variable for a
184 // different thread. We use non-inlinable functions to avoid this
187 G
* runtime_g(void) __attribute__ ((noinline
, no_split_stack
));
195 M
* runtime_m(void) __attribute__ ((noinline
, no_split_stack
));
212 // Start a new thread.
214 runtime_newosproc(M
*mp
)
221 if(pthread_attr_init(&attr
) != 0)
222 runtime_throw("pthread_attr_init");
223 if(pthread_attr_setdetachstate(&attr
, PTHREAD_CREATE_DETACHED
) != 0)
224 runtime_throw("pthread_attr_setdetachstate");
226 // Block signals during pthread_create so that the new thread
227 // starts with signals disabled. It will enable them in minit.
231 // Blocking SIGTRAP reportedly breaks gdb on Alpha GNU/Linux.
232 sigdelset(&clear
, SIGTRAP
);
236 pthread_sigmask(SIG_BLOCK
, &clear
, &old
);
237 ret
= pthread_create(&tid
, &attr
, runtime_mstart
, mp
);
238 pthread_sigmask(SIG_SETMASK
, &old
, nil
);
241 runtime_throw("pthread_create");
244 // First function run by a new goroutine. This replaces gogocall.
251 if(g
->traceback
!= nil
)
254 fn
= (void (*)(void*))(g
->entry
);
261 // Switch context to a different goroutine. This is like longjmp.
262 void runtime_gogo(G
*) __attribute__ ((noinline
));
264 runtime_gogo(G
* newg
)
266 #ifdef USING_SPLIT_STACK
267 __splitstack_setcontext(&newg
->stackcontext
[0]);
270 newg
->fromgogo
= true;
271 fixcontext(ucontext_arg(&newg
->context
[0]));
272 setcontext(ucontext_arg(&newg
->context
[0]));
273 runtime_throw("gogo setcontext returned");
276 // Save context and call fn passing g as a parameter. This is like
277 // setjmp. Because getcontext always returns 0, unlike setjmp, we use
278 // g->fromgogo as a code. It will be true if we got here via
279 // setcontext. g == nil the first time this is called in a new m.
280 void runtime_mcall(void (*)(G
*)) __attribute__ ((noinline
));
282 runtime_mcall(void (*pfn
)(G
*))
286 #ifndef USING_SPLIT_STACK
290 // Ensure that all registers are on the stack for the garbage
292 __builtin_unwind_init();
297 runtime_throw("runtime: mcall called on m->g0 stack");
301 #ifdef USING_SPLIT_STACK
302 __splitstack_getcontext(&g
->stackcontext
[0]);
304 // We have to point to an address on the stack that is
305 // below the saved registers.
306 gp
->gcnextsp
= &afterregs
;
308 gp
->fromgogo
= false;
309 getcontext(ucontext_arg(&gp
->context
[0]));
311 // When we return from getcontext, we may be running
312 // in a new thread. That means that g may have
313 // changed. It is a global variables so we will
314 // reload it, but the address of g may be cached in
315 // our local stack frame, and that address may be
316 // wrong. Call the function to reload the value for
321 if(gp
->traceback
!= nil
)
324 if (gp
== nil
|| !gp
->fromgogo
) {
325 #ifdef USING_SPLIT_STACK
326 __splitstack_setcontext(&mp
->g0
->stackcontext
[0]);
328 mp
->g0
->entry
= (byte
*)pfn
;
331 // It's OK to set g directly here because this case
332 // can not occur if we got here via a setcontext to
333 // the getcontext call just above.
336 fixcontext(ucontext_arg(&mp
->g0
->context
[0]));
337 setcontext(ucontext_arg(&mp
->g0
->context
[0]));
338 runtime_throw("runtime: mcall function returned");
342 // Goroutine scheduler
343 // The scheduler's job is to distribute ready-to-run goroutines over worker threads.
345 // The main concepts are:
347 // M - worker thread, or machine.
348 // P - processor, a resource that is required to execute Go code.
349 // M must have an associated P to execute Go code, however it can be
350 // blocked or in a syscall w/o an associated P.
352 // Design doc at http://golang.org/s/go11sched.
356 // Number of goroutine ids to grab from runtime_sched->goidgen to local per-P cache at once.
357 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number.
361 extern Sched
* runtime_getsched() __asm__ (GOSYM_PREFIX
"runtime.getsched");
363 Sched
* runtime_sched
;
364 int32 runtime_gomaxprocs
;
365 uint32 runtime_needextram
= 1;
367 G runtime_g0
; // idle goroutine for m0
374 bool runtime_precisestack
;
375 static int32 newprocs
;
377 static Lock allglock
; // the following vars are protected by this lock or by stoptheworld
379 uintptr runtime_allglen
;
380 static uintptr allgcap
;
382 bool runtime_isarchive
;
384 void* runtime_mstart(void*);
385 static void runqput(P
*, G
*);
386 static G
* runqget(P
*);
387 static bool runqputslow(P
*, G
*, uint32
, uint32
);
388 static G
* runqsteal(P
*, P
*);
389 static void mput(M
*);
390 static M
* mget(void);
391 static void mcommoninit(M
*);
392 static void schedule(void);
393 static void procresize(int32
);
394 static void acquirep(P
*);
395 static P
* releasep(void);
396 static void newm(void(*)(void), P
*);
397 static void stopm(void);
398 static void startm(P
*, bool);
399 static void handoffp(P
*);
400 static void wakep(void);
401 static void stoplockedm(void);
402 static void startlockedm(G
*);
403 static void sysmon(void);
404 static uint32
retake(int64
);
405 static void incidlelocked(int32
);
406 static void checkdead(void);
407 static void exitsyscall0(G
*);
408 static void park0(G
*);
409 static void goexit0(G
*);
410 static void gfput(P
*, G
*);
412 static void gfpurge(P
*);
413 static void globrunqput(G
*);
414 static void globrunqputbatch(G
*, G
*, int32
);
415 static G
* globrunqget(P
*, int32
);
416 static P
* pidleget(void);
417 static void pidleput(P
*);
418 static void injectglist(G
*);
419 static bool preemptall(void);
420 static bool exitsyscallfast(void);
421 static void allgadd(G
*);
423 bool runtime_isstarted
;
425 // The bootstrap sequence is:
429 // make & queue new G
430 // call runtime_mstart
432 // The new G calls runtime_main.
434 runtime_schedinit(void)
442 runtime_sched
= runtime_getsched();
452 runtime_sched
->maxmcount
= 10000;
453 runtime_precisestack
= 0;
455 // runtime_symtabinit();
456 runtime_mallocinit();
458 runtime_alginit(); // maps must not be used before this call
460 // Initialize the itable value for newErrorCString,
461 // so that the next time it gets called, possibly
462 // in a fault during a garbage collection, it will not
463 // need to allocated memory.
464 runtime_newErrorCString(0, &i
);
466 // Initialize the cached gotraceback value, since
467 // gotraceback calls getenv, which mallocs on Plan 9.
468 runtime_gotraceback(nil
);
472 runtime_parsedebugvars();
474 runtime_sched
->lastpoll
= runtime_nanotime();
476 s
= runtime_getenv("GOMAXPROCS");
478 if(p
!= nil
&& (n
= runtime_atoi(p
, s
.len
)) > 0) {
479 if(n
> _MaxGomaxprocs
)
483 runtime_allp
= runtime_malloc((_MaxGomaxprocs
+1)*sizeof(runtime_allp
[0]));
486 // Can not enable GC until all roots are registered.
487 // mstats()->enablegc = 1;
490 extern void main_init(void) __asm__ (GOSYM_PREFIX
"__go_init_main");
491 extern void main_main(void) __asm__ (GOSYM_PREFIX
"main.main");
493 // Used to determine the field alignment.
501 // main_init_done is a signal used by cgocallbackg that initialization
502 // has been completed. It is made before _cgo_notify_runtime_init_done,
503 // so all cgo calls can rely on it existing. When main_init is
504 // complete, it is closed, meaning cgocallbackg can reliably receive
506 Hchan
*runtime_main_init_done
;
508 // The chan bool type, for runtime_main_init_done.
510 extern const struct __go_type_descriptor bool_type_descriptor
511 __asm__ (GOSYM_PREFIX
"__go_tdn_bool");
513 static struct __go_channel_type chan_bool_type_descriptor
=
522 offsetof (struct field_align
, p
) - 1,
526 0, /* This value doesn't matter. */
532 NULL
, /* This value doesn't matter */
534 NULL
, /* This value doesn't matter */
537 /* __pointer_to_this */
541 &bool_type_descriptor
,
546 extern Hchan
*makechan (ChanType
*, int64
)
547 __asm__ (GOSYM_PREFIX
"runtime.makechan");
548 extern void closechan(Hchan
*) __asm__ (GOSYM_PREFIX
"runtime.closechan");
551 initDone(void *arg
__attribute__ ((unused
))) {
552 runtime_unlockOSThread();
555 // The main goroutine.
556 // Note: C frames in general are not copyable during stack growth, for two reasons:
557 // 1) We don't know where in a frame to find pointers to other stack locations.
558 // 2) There's no guarantee that globals or heap values do not point into the frame.
560 // The C frame for runtime.main is copyable, because:
561 // 1) There are no pointers to other stack locations in the frame
562 // (d.fn points at a global, d.link is nil, d.argp is -1).
563 // 2) The only pointer into this frame is from the defer chain,
564 // which is explicitly handled during stack copying.
566 runtime_main(void* dummy
__attribute__((unused
)))
573 // Lock the main goroutine onto this, the main OS thread,
574 // during initialization. Most programs won't care, but a few
575 // do require certain calls to be made by the main thread.
576 // Those can arrange for main.main to run in the main thread
577 // by calling runtime.LockOSThread during initialization
578 // to preserve the lock.
579 runtime_lockOSThread();
581 // Defer unlock so that runtime.Goexit during init does the unlock too.
582 d
.pfn
= (uintptr
)(void*)initDone
;
585 d
._panic
= g
->_panic
;
587 d
.makefunccanrecover
= 0;
592 if(g
->m
!= &runtime_m0
)
593 runtime_throw("runtime_main not on m0");
594 __go_go(runtime_MHeap_Scavenger
, nil
);
596 runtime_main_init_done
= makechan(&chan_bool_type_descriptor
, 0);
598 _cgo_notify_runtime_init_done();
602 closechan(runtime_main_init_done
);
604 if(g
->_defer
!= &d
|| (void*)d
.pfn
!= initDone
)
605 runtime_throw("runtime: bad defer entry after init");
607 runtime_unlockOSThread();
609 // For gccgo we have to wait until after main is initialized
610 // to enable GC, because initializing main registers the GC
612 mstats()->enablegc
= 1;
614 if(runtime_isarchive
) {
615 // This is not a complete program, but is instead a
616 // library built using -buildmode=c-archive or
617 // c-shared. Now that we are initialized, there is
618 // nothing further to do.
624 // Make racy client program work: if panicking on
625 // another goroutine at the same time as main returns,
626 // let the other goroutine finish printing the panic trace.
627 // Once it does, it will exit. See issue 3934.
628 if(runtime_panicking())
629 runtime_park(nil
, nil
, "panicwait");
637 runtime_tracebackothers(G
* volatile me
)
646 traceback
= runtime_gotraceback(nil
);
648 // Show the current goroutine first, if we haven't already.
649 if((gp
= g
->m
->curg
) != nil
&& gp
!= me
) {
650 runtime_printf("\n");
651 runtime_goroutineheader(gp
);
654 #ifdef USING_SPLIT_STACK
655 __splitstack_getcontext(&me
->stackcontext
[0]);
657 getcontext(ucontext_arg(&me
->context
[0]));
659 if(gp
->traceback
!= nil
) {
663 slice
.__values
= &tb
.locbuf
[0];
664 slice
.__count
= tb
.c
;
665 slice
.__capacity
= tb
.c
;
666 runtime_printtrace(slice
, nil
);
667 runtime_printcreatedby(gp
);
670 runtime_lock(&allglock
);
671 for(i
= 0; i
< runtime_allglen
; i
++) {
672 gp
= runtime_allg
[i
];
673 if(gp
== me
|| gp
== g
->m
->curg
|| gp
->atomicstatus
== _Gdead
)
675 if(gp
->issystem
&& traceback
< 2)
677 runtime_printf("\n");
678 runtime_goroutineheader(gp
);
680 // Our only mechanism for doing a stack trace is
681 // _Unwind_Backtrace. And that only works for the
682 // current thread, not for other random goroutines.
683 // So we need to switch context to the goroutine, get
684 // the backtrace, and then switch back.
686 // This means that if g is running or in a syscall, we
687 // can't reliably print a stack trace. FIXME.
689 if(gp
->atomicstatus
== _Grunning
) {
690 runtime_printf("\tgoroutine running on other thread; stack unavailable\n");
691 runtime_printcreatedby(gp
);
692 } else if(gp
->atomicstatus
== _Gsyscall
) {
693 runtime_printf("\tgoroutine in C code; stack unavailable\n");
694 runtime_printcreatedby(gp
);
698 #ifdef USING_SPLIT_STACK
699 __splitstack_getcontext(&me
->stackcontext
[0]);
701 getcontext(ucontext_arg(&me
->context
[0]));
703 if(gp
->traceback
!= nil
) {
707 slice
.__values
= &tb
.locbuf
[0];
708 slice
.__count
= tb
.c
;
709 slice
.__capacity
= tb
.c
;
710 runtime_printtrace(slice
, nil
);
711 runtime_printcreatedby(gp
);
714 runtime_unlock(&allglock
);
720 // sched lock is held
721 if(runtime_sched
->mcount
> runtime_sched
->maxmcount
) {
722 runtime_printf("runtime: program exceeds %d-thread limit\n", runtime_sched
->maxmcount
);
723 runtime_throw("thread exhaustion");
727 // Do a stack trace of gp, and then restore the context to
733 Traceback
* traceback
;
735 traceback
= gp
->traceback
;
738 runtime_throw("gtraceback: m is not nil");
739 gp
->m
= traceback
->gp
->m
;
740 traceback
->c
= runtime_callers(1, traceback
->locbuf
,
741 sizeof traceback
->locbuf
/ sizeof traceback
->locbuf
[0], false);
743 runtime_gogo(traceback
->gp
);
749 // If there is no mcache runtime_callers() will crash,
750 // and we are most likely in sysmon thread so the stack is senseless anyway.
752 runtime_callers(1, mp
->createstack
, nelem(mp
->createstack
), false);
754 mp
->fastrand
= 0x49f6428aUL
+ mp
->id
+ runtime_cputicks();
756 runtime_lock(&runtime_sched
->lock
);
757 mp
->id
= runtime_sched
->mcount
++;
759 runtime_mpreinit(mp
);
761 // Add to runtime_allm so garbage collector doesn't free m
762 // when it is just in a register or thread-local storage.
763 mp
->alllink
= runtime_allm
;
764 // runtime_NumCgoCall() iterates over allm w/o schedlock,
765 // so we need to publish it safely.
766 runtime_atomicstorep(&runtime_allm
, mp
);
767 runtime_unlock(&runtime_sched
->lock
);
770 // Mark gp ready to run.
775 g
->m
->locks
++; // disable preemption because it can be holding p in a local var
776 if(gp
->atomicstatus
!= _Gwaiting
) {
777 runtime_printf("goroutine %D has status %d\n", gp
->goid
, gp
->atomicstatus
);
778 runtime_throw("bad g->atomicstatus in ready");
780 gp
->atomicstatus
= _Grunnable
;
781 runqput((P
*)g
->m
->p
, gp
);
782 if(runtime_atomicload(&runtime_sched
->npidle
) != 0 && runtime_atomicload(&runtime_sched
->nmspinning
) == 0) // TODO: fast atomic
787 void goready(G
*, int) __asm__ (GOSYM_PREFIX
"runtime.goready");
790 goready(G
* gp
, int traceskip
__attribute__ ((unused
)))
796 runtime_gcprocs(void)
800 // Figure out how many CPUs to use during GC.
801 // Limited by gomaxprocs, number of actual CPUs, and MaxGcproc.
802 runtime_lock(&runtime_sched
->lock
);
803 n
= runtime_gomaxprocs
;
805 n
= runtime_ncpu
> 0 ? runtime_ncpu
: 1;
808 if(n
> runtime_sched
->nmidle
+1) // one M is currently running
809 n
= runtime_sched
->nmidle
+1;
810 runtime_unlock(&runtime_sched
->lock
);
819 runtime_lock(&runtime_sched
->lock
);
820 n
= runtime_gomaxprocs
;
825 n
-= runtime_sched
->nmidle
+1; // one M is currently running
826 runtime_unlock(&runtime_sched
->lock
);
831 runtime_helpgc(int32 nproc
)
836 runtime_lock(&runtime_sched
->lock
);
838 for(n
= 1; n
< nproc
; n
++) { // one M is currently running
839 if(runtime_allp
[pos
]->mcache
== g
->m
->mcache
)
843 runtime_throw("runtime_gcprocs inconsistency");
845 mp
->mcache
= runtime_allp
[pos
]->mcache
;
847 runtime_notewakeup(&mp
->park
);
849 runtime_unlock(&runtime_sched
->lock
);
852 // Similar to stoptheworld but best-effort and can be called several times.
853 // There is no reverse operation, used during crashing.
854 // This function must not lock any mutexes.
856 runtime_freezetheworld(void)
860 if(runtime_gomaxprocs
== 1)
862 // stopwait and preemption requests can be lost
863 // due to races with concurrently executing threads,
864 // so try several times
865 for(i
= 0; i
< 5; i
++) {
866 // this should tell the scheduler to not start any new goroutines
867 runtime_sched
->stopwait
= 0x7fffffff;
868 runtime_atomicstore((uint32
*)&runtime_sched
->gcwaiting
, 1);
869 // this should stop running goroutines
871 break; // no running goroutines
872 runtime_usleep(1000);
875 runtime_usleep(1000);
877 runtime_usleep(1000);
881 runtime_stopTheWorldWithSema(void)
888 runtime_lock(&runtime_sched
->lock
);
889 runtime_sched
->stopwait
= runtime_gomaxprocs
;
890 runtime_atomicstore((uint32
*)&runtime_sched
->gcwaiting
, 1);
893 ((P
*)g
->m
->p
)->status
= _Pgcstop
;
894 runtime_sched
->stopwait
--;
895 // try to retake all P's in _Psyscall status
896 for(i
= 0; i
< runtime_gomaxprocs
; i
++) {
899 if(s
== _Psyscall
&& runtime_cas(&p
->status
, s
, _Pgcstop
))
900 runtime_sched
->stopwait
--;
903 while((p
= pidleget()) != nil
) {
904 p
->status
= _Pgcstop
;
905 runtime_sched
->stopwait
--;
907 wait
= runtime_sched
->stopwait
> 0;
908 runtime_unlock(&runtime_sched
->lock
);
910 // wait for remaining P's to stop voluntarily
912 runtime_notesleep(&runtime_sched
->stopnote
);
913 runtime_noteclear(&runtime_sched
->stopnote
);
915 if(runtime_sched
->stopwait
)
916 runtime_throw("stoptheworld: not stopped");
917 for(i
= 0; i
< runtime_gomaxprocs
; i
++) {
919 if(p
->status
!= _Pgcstop
)
920 runtime_throw("stoptheworld: not stopped");
931 runtime_startTheWorldWithSema(void)
938 g
->m
->locks
++; // disable preemption because it can be holding p in a local var
939 gp
= runtime_netpoll(false); // non-blocking
941 add
= needaddgcproc();
942 runtime_lock(&runtime_sched
->lock
);
944 procresize(newprocs
);
947 procresize(runtime_gomaxprocs
);
948 runtime_sched
->gcwaiting
= 0;
951 while((p
= pidleget()) != nil
) {
952 // procresize() puts p's with work at the beginning of the list.
953 // Once we reach a p without a run queue, the rest don't have one either.
954 if(p
->runqhead
== p
->runqtail
) {
958 p
->m
= (uintptr
)mget();
959 p
->link
= (uintptr
)p1
;
962 if(runtime_sched
->sysmonwait
) {
963 runtime_sched
->sysmonwait
= false;
964 runtime_notewakeup(&runtime_sched
->sysmonnote
);
966 runtime_unlock(&runtime_sched
->lock
);
975 runtime_throw("startTheWorldWithSema: inconsistent mp->nextp");
976 mp
->nextp
= (uintptr
)p
;
977 runtime_notewakeup(&mp
->park
);
979 // Start M to run P. Do not start another M below.
986 // If GC could have used another helper proc, start one now,
987 // in the hope that it will be available next time.
988 // It would have been even better to start it before the collection,
989 // but doing so requires allocating memory, so it's tricky to
990 // coordinate. This lazy approach works out in practice:
991 // we don't mind if the first couple gc rounds don't have quite
992 // the maximum number of procs.
998 // Called to start an M.
1000 runtime_mstart(void* mp
)
1013 // Record top of stack for use by mcall.
1014 // Once we call schedule we're never coming back,
1015 // so other calls can reuse this stack space.
1016 #ifdef USING_SPLIT_STACK
1017 __splitstack_getcontext(&g
->stackcontext
[0]);
1019 g
->gcinitialsp
= &mp
;
1020 // Setting gcstacksize to 0 is a marker meaning that gcinitialsp
1021 // is the top of the stack, not the bottom.
1025 getcontext(ucontext_arg(&g
->context
[0]));
1027 if(g
->entry
!= nil
) {
1028 // Got here from mcall.
1029 void (*pfn
)(G
*) = (void (*)(G
*))g
->entry
;
1030 G
* gp
= (G
*)g
->param
;
1036 #ifdef USING_SPLIT_STACK
1038 int dont_block_signals
= 0;
1039 __splitstack_block_signals(&dont_block_signals
, nil
);
1043 // Install signal handlers; after minit so that minit can
1044 // prepare the thread to be able to handle the signals.
1045 if(m
== &runtime_m0
) {
1046 if(runtime_iscgo
&& !runtime_cgoHasExtraM
) {
1047 runtime_cgoHasExtraM
= true;
1048 runtime_newextram();
1049 runtime_needextram
= 0;
1051 runtime_initsig(false);
1055 ((void (*)(void))m
->mstartfn
)();
1060 } else if(m
!= &runtime_m0
) {
1061 acquirep((P
*)m
->nextp
);
1066 // TODO(brainman): This point is never reached, because scheduler
1067 // does not release os threads at the moment. But once this path
1068 // is enabled, we must remove our seh here.
1073 typedef struct CgoThreadStart CgoThreadStart
;
1074 struct CgoThreadStart
1082 // Allocate a new m unassociated with any thread.
1083 // Can use p for allocation context if needed.
1085 runtime_allocm(P
*p
, int32 stacksize
, byte
** ret_g0_stack
, uintptr
* ret_g0_stacksize
)
1089 g
->m
->locks
++; // disable GC because it can be called from sysmon
1091 acquirep(p
); // temporarily borrow p for mallocs in this function
1095 runtime_gc_m_ptr(&e
);
1096 mtype
= ((const PtrType
*)e
.__type_descriptor
)->__element_type
;
1100 mp
= runtime_mal(sizeof *mp
);
1102 mp
->g0
= runtime_malg(stacksize
, ret_g0_stack
, ret_g0_stacksize
);
1105 if(p
== (P
*)g
->m
->p
)
1116 // static Type *gtype;
1118 // if(gtype == nil) {
1120 // runtime_gc_g_ptr(&e);
1121 // gtype = ((PtrType*)e.__type_descriptor)->__element_type;
1123 // gp = runtime_cnew(gtype);
1124 gp
= runtime_malloc(sizeof(G
));
1128 static M
* lockextra(bool nilokay
);
1129 static void unlockextra(M
*);
1131 // needm is called when a cgo callback happens on a
1132 // thread without an m (a thread not created by Go).
1133 // In this case, needm is expected to find an m to use
1134 // and return with m, g initialized correctly.
1135 // Since m and g are not set now (likely nil, but see below)
1136 // needm is limited in what routines it can call. In particular
1137 // it can only call nosplit functions (textflag 7) and cannot
1138 // do any scheduling that requires an m.
1140 // In order to avoid needing heavy lifting here, we adopt
1141 // the following strategy: there is a stack of available m's
1142 // that can be stolen. Using compare-and-swap
1143 // to pop from the stack has ABA races, so we simulate
1144 // a lock by doing an exchange (via casp) to steal the stack
1145 // head and replace the top pointer with MLOCKED (1).
1146 // This serves as a simple spin lock that we can use even
1147 // without an m. The thread that locks the stack in this way
1148 // unlocks the stack by storing a valid stack head pointer.
1150 // In order to make sure that there is always an m structure
1151 // available to be stolen, we maintain the invariant that there
1152 // is always one more than needed. At the beginning of the
1153 // program (if cgo is in use) the list is seeded with a single m.
1154 // If needm finds that it has taken the last m off the list, its job
1155 // is - once it has installed its own m so that it can do things like
1156 // allocate memory - to create a spare m and put it on the list.
1158 // Each of these extra m's also has a g0 and a curg that are
1159 // pressed into service as the scheduling stack and current
1160 // goroutine for the duration of the cgo callback.
1162 // When the callback is done with the m, it calls dropm to
1163 // put the m back on the list.
1165 // Unlike the gc toolchain, we start running on curg, since we are
1166 // just going to return and let the caller continue.
1172 if(runtime_needextram
) {
1173 // Can happen if C/C++ code calls Go from a global ctor.
1174 // Can not throw, because scheduler is not initialized yet.
1175 int rv
__attribute__((unused
));
1176 rv
= runtime_write(2, "fatal error: cgo callback before cgo call\n",
1177 sizeof("fatal error: cgo callback before cgo call\n")-1);
1181 // Lock extra list, take head, unlock popped list.
1182 // nilokay=false is safe here because of the invariant above,
1183 // that the extra list always contains or will soon contain
1185 mp
= lockextra(false);
1187 // Set needextram when we've just emptied the list,
1188 // so that the eventual call into cgocallbackg will
1189 // allocate a new m for the extra list. We delay the
1190 // allocation until then so that it can be done
1191 // after exitsyscall makes sure it is okay to be
1192 // running at all (that is, there's no garbage collection
1193 // running right now).
1194 mp
->needextram
= mp
->schedlink
== 0;
1195 unlockextra((M
*)mp
->schedlink
);
1197 // Install g (= m->curg).
1198 runtime_setg(mp
->curg
);
1200 // Initialize g's context as in mstart.
1202 g
->atomicstatus
= _Gsyscall
;
1205 #ifdef USING_SPLIT_STACK
1206 __splitstack_getcontext(&g
->stackcontext
[0]);
1208 g
->gcinitialsp
= &mp
;
1213 getcontext(ucontext_arg(&g
->context
[0]));
1215 if(g
->entry
!= nil
) {
1216 // Got here from mcall.
1217 void (*pfn
)(G
*) = (void (*)(G
*))g
->entry
;
1218 G
* gp
= (G
*)g
->param
;
1223 // Initialize this thread to use the m.
1226 #ifdef USING_SPLIT_STACK
1228 int dont_block_signals
= 0;
1229 __splitstack_block_signals(&dont_block_signals
, nil
);
1234 // newextram allocates an m and puts it on the extra list.
1235 // It is called with a working local m, so that it can do things
1236 // like call schedlock and allocate.
1238 runtime_newextram(void)
1243 uintptr g0_spsize
, spsize
;
1246 // Create extra goroutine locked to extra m.
1247 // The goroutine is the context in which the cgo callback will run.
1248 // The sched.pc will never be returned to, but setting it to
1249 // runtime.goexit makes clear to the traceback routines where
1250 // the goroutine stack ends.
1251 mp
= runtime_allocm(nil
, StackMin
, &g0_sp
, &g0_spsize
);
1252 gp
= runtime_malg(StackMin
, &sp
, &spsize
);
1253 gp
->atomicstatus
= _Gdead
;
1256 mp
->locked
= _LockInternal
;
1259 gp
->goid
= runtime_xadd64(&runtime_sched
->goidgen
, 1);
1260 // put on allg for garbage collector
1263 // The context for gp will be set up in runtime_needm. But
1264 // here we need to set up the context for g0.
1265 uc
= ucontext_arg(&mp
->g0
->context
[0]);
1267 uc
->uc_stack
.ss_sp
= g0_sp
;
1268 uc
->uc_stack
.ss_size
= (size_t)g0_spsize
;
1269 makecontext(uc
, kickoff
, 0);
1271 // Add m to the extra list.
1272 mnext
= lockextra(true);
1273 mp
->schedlink
= (uintptr
)mnext
;
1277 // dropm is called when a cgo callback has called needm but is now
1278 // done with the callback and returning back into the non-Go thread.
1279 // It puts the current m back onto the extra list.
1281 // The main expense here is the call to signalstack to release the
1282 // m's signal stack, and then the call to needm on the next callback
1283 // from this thread. It is tempting to try to save the m for next time,
1284 // which would eliminate both these costs, but there might not be
1285 // a next time: the current thread (which Go does not control) might exit.
1286 // If we saved the m for that thread, there would be an m leak each time
1287 // such a thread exited. Instead, we acquire and release an m on each
1288 // call. These should typically not be scheduling operations, just a few
1289 // atomics, so the cost should be small.
1291 // TODO(rsc): An alternative would be to allocate a dummy pthread per-thread
1292 // variable using pthread_key_create. Unlike the pthread keys we already use
1293 // on OS X, this dummy key would never be read by Go code. It would exist
1294 // only so that we could register at thread-exit-time destructor.
1295 // That destructor would put the m back onto the extra list.
1296 // This is purely a performance optimization. The current version,
1297 // in which dropm happens on each cgo call, is still correct too.
1298 // We may have to keep the current version on systems with cgo
1299 // but without pthreads, like Windows.
1305 // Undo whatever initialization minit did during needm.
1308 // Clear m and g, and return m to the extra list.
1309 // After the call to setg we can only call nosplit functions.
1313 mp
->curg
->atomicstatus
= _Gdead
;
1314 mp
->curg
->gcstack
= nil
;
1315 mp
->curg
->gcnextsp
= nil
;
1317 mnext
= lockextra(true);
1318 mp
->schedlink
= (uintptr
)mnext
;
1322 #define MLOCKED ((M*)1)
1324 // lockextra locks the extra list and returns the list head.
1325 // The caller must unlock the list by storing a new list head
1326 // to runtime.extram. If nilokay is true, then lockextra will
1327 // return a nil list head if that's what it finds. If nilokay is false,
1328 // lockextra will keep waiting until the list head is no longer nil.
1330 lockextra(bool nilokay
)
1333 void (*yield
)(void);
1336 mp
= runtime_atomicloadp(&runtime_extram
);
1338 yield
= runtime_osyield
;
1342 if(mp
== nil
&& !nilokay
) {
1346 if(!runtime_casp(&runtime_extram
, mp
, MLOCKED
)) {
1347 yield
= runtime_osyield
;
1359 runtime_atomicstorep(&runtime_extram
, mp
);
1369 mp
= runtime_atomicloadp(&runtime_extram
);
1374 if(!runtime_casp(&runtime_extram
, mp
, MLOCKED
)) {
1379 for(mc
= mp
; mc
!= nil
; mc
= (M
*)mc
->schedlink
)
1381 runtime_atomicstorep(&runtime_extram
, mp
);
1386 // Create a new m. It will start off with a call to fn, or else the scheduler.
1388 newm(void(*fn
)(void), P
*p
)
1392 mp
= runtime_allocm(p
, -1, nil
, nil
);
1393 mp
->nextp
= (uintptr
)p
;
1394 mp
->mstartfn
= (uintptr
)(void*)fn
;
1396 runtime_newosproc(mp
);
1399 // Stops execution of the current m until new work is available.
1400 // Returns with acquired P.
1408 runtime_throw("stopm holding locks");
1410 runtime_throw("stopm holding p");
1412 m
->spinning
= false;
1413 runtime_xadd(&runtime_sched
->nmspinning
, -1);
1417 runtime_lock(&runtime_sched
->lock
);
1419 runtime_unlock(&runtime_sched
->lock
);
1420 runtime_notesleep(&m
->park
);
1422 runtime_noteclear(&m
->park
);
1429 acquirep((P
*)m
->nextp
);
1436 g
->m
->spinning
= true;
1439 // Schedules some M to run the p (creates an M if necessary).
1440 // If p==nil, tries to get an idle P, if no idle P's does nothing.
1442 startm(P
*p
, bool spinning
)
1447 runtime_lock(&runtime_sched
->lock
);
1451 runtime_unlock(&runtime_sched
->lock
);
1453 runtime_xadd(&runtime_sched
->nmspinning
, -1);
1458 runtime_unlock(&runtime_sched
->lock
);
1467 runtime_throw("startm: m is spinning");
1469 runtime_throw("startm: m has p");
1470 mp
->spinning
= spinning
;
1471 mp
->nextp
= (uintptr
)p
;
1472 runtime_notewakeup(&mp
->park
);
1475 // Hands off P from syscall or locked M.
1479 // if it has local work, start it straight away
1480 if(p
->runqhead
!= p
->runqtail
|| runtime_sched
->runqsize
) {
1484 // no local work, check that there are no spinning/idle M's,
1485 // otherwise our help is not required
1486 if(runtime_atomicload(&runtime_sched
->nmspinning
) + runtime_atomicload(&runtime_sched
->npidle
) == 0 && // TODO: fast atomic
1487 runtime_cas(&runtime_sched
->nmspinning
, 0, 1)) {
1491 runtime_lock(&runtime_sched
->lock
);
1492 if(runtime_sched
->gcwaiting
) {
1493 p
->status
= _Pgcstop
;
1494 if(--runtime_sched
->stopwait
== 0)
1495 runtime_notewakeup(&runtime_sched
->stopnote
);
1496 runtime_unlock(&runtime_sched
->lock
);
1499 if(runtime_sched
->runqsize
) {
1500 runtime_unlock(&runtime_sched
->lock
);
1504 // If this is the last running P and nobody is polling network,
1505 // need to wakeup another M to poll network.
1506 if(runtime_sched
->npidle
== (uint32
)runtime_gomaxprocs
-1 && runtime_atomicload64(&runtime_sched
->lastpoll
) != 0) {
1507 runtime_unlock(&runtime_sched
->lock
);
1512 runtime_unlock(&runtime_sched
->lock
);
1515 // Tries to add one more P to execute G's.
1516 // Called when a G is made runnable (newproc, ready).
1520 // be conservative about spinning threads
1521 if(!runtime_cas(&runtime_sched
->nmspinning
, 0, 1))
1526 // Stops execution of the current m that is locked to a g until the g is runnable again.
1527 // Returns with acquired P.
1535 if(m
->lockedg
== nil
|| m
->lockedg
->lockedm
!= m
)
1536 runtime_throw("stoplockedm: inconsistent locking");
1538 // Schedule another M to run this p.
1543 // Wait until another thread schedules lockedg again.
1544 runtime_notesleep(&m
->park
);
1546 runtime_noteclear(&m
->park
);
1547 if(m
->lockedg
->atomicstatus
!= _Grunnable
)
1548 runtime_throw("stoplockedm: not runnable");
1549 acquirep((P
*)m
->nextp
);
1553 // Schedules the locked m to run the locked gp.
1562 runtime_throw("startlockedm: locked to me");
1564 runtime_throw("startlockedm: m has p");
1565 // directly handoff current P to the locked m
1568 mp
->nextp
= (uintptr
)p
;
1569 runtime_notewakeup(&mp
->park
);
1573 // Stops the current m for stoptheworld.
1574 // Returns when the world is restarted.
1580 if(!runtime_sched
->gcwaiting
)
1581 runtime_throw("gcstopm: not waiting for gc");
1582 if(g
->m
->spinning
) {
1583 g
->m
->spinning
= false;
1584 runtime_xadd(&runtime_sched
->nmspinning
, -1);
1587 runtime_lock(&runtime_sched
->lock
);
1588 p
->status
= _Pgcstop
;
1589 if(--runtime_sched
->stopwait
== 0)
1590 runtime_notewakeup(&runtime_sched
->stopnote
);
1591 runtime_unlock(&runtime_sched
->lock
);
1595 // Schedules gp to run on the current M.
1602 if(gp
->atomicstatus
!= _Grunnable
) {
1603 runtime_printf("execute: bad g status %d\n", gp
->atomicstatus
);
1604 runtime_throw("execute: bad g status");
1606 gp
->atomicstatus
= _Grunning
;
1608 ((P
*)g
->m
->p
)->schedtick
++;
1612 // Check whether the profiler needs to be turned on or off.
1613 hz
= runtime_sched
->profilehz
;
1614 if(g
->m
->profilehz
!= hz
)
1615 runtime_resetcpuprofiler(hz
);
1620 // Finds a runnable goroutine to execute.
1621 // Tries to steal from other P's, get g from global queue, poll network.
1630 if(runtime_sched
->gcwaiting
) {
1634 if(runtime_fingwait
&& runtime_fingwake
&& (gp
= runtime_wakefing()) != nil
)
1637 gp
= runqget((P
*)g
->m
->p
);
1641 if(runtime_sched
->runqsize
) {
1642 runtime_lock(&runtime_sched
->lock
);
1643 gp
= globrunqget((P
*)g
->m
->p
, 0);
1644 runtime_unlock(&runtime_sched
->lock
);
1649 gp
= runtime_netpoll(false); // non-blocking
1651 injectglist((G
*)gp
->schedlink
);
1652 gp
->atomicstatus
= _Grunnable
;
1655 // If number of spinning M's >= number of busy P's, block.
1656 // This is necessary to prevent excessive CPU consumption
1657 // when GOMAXPROCS>>1 but the program parallelism is low.
1658 if(!g
->m
->spinning
&& 2 * runtime_atomicload(&runtime_sched
->nmspinning
) >= runtime_gomaxprocs
- runtime_atomicload(&runtime_sched
->npidle
)) // TODO: fast atomic
1660 if(!g
->m
->spinning
) {
1661 g
->m
->spinning
= true;
1662 runtime_xadd(&runtime_sched
->nmspinning
, 1);
1664 // random steal from other P's
1665 for(i
= 0; i
< 2*runtime_gomaxprocs
; i
++) {
1666 if(runtime_sched
->gcwaiting
)
1668 p
= runtime_allp
[runtime_fastrand1()%runtime_gomaxprocs
];
1669 if(p
== (P
*)g
->m
->p
)
1672 gp
= runqsteal((P
*)g
->m
->p
, p
);
1677 // return P and block
1678 runtime_lock(&runtime_sched
->lock
);
1679 if(runtime_sched
->gcwaiting
) {
1680 runtime_unlock(&runtime_sched
->lock
);
1683 if(runtime_sched
->runqsize
) {
1684 gp
= globrunqget((P
*)g
->m
->p
, 0);
1685 runtime_unlock(&runtime_sched
->lock
);
1690 runtime_unlock(&runtime_sched
->lock
);
1691 if(g
->m
->spinning
) {
1692 g
->m
->spinning
= false;
1693 runtime_xadd(&runtime_sched
->nmspinning
, -1);
1695 // check all runqueues once again
1696 for(i
= 0; i
< runtime_gomaxprocs
; i
++) {
1697 p
= runtime_allp
[i
];
1698 if(p
&& p
->runqhead
!= p
->runqtail
) {
1699 runtime_lock(&runtime_sched
->lock
);
1701 runtime_unlock(&runtime_sched
->lock
);
1710 if(runtime_xchg64(&runtime_sched
->lastpoll
, 0) != 0) {
1712 runtime_throw("findrunnable: netpoll with p");
1714 runtime_throw("findrunnable: netpoll with spinning");
1715 gp
= runtime_netpoll(true); // block until new work is available
1716 runtime_atomicstore64(&runtime_sched
->lastpoll
, runtime_nanotime());
1718 runtime_lock(&runtime_sched
->lock
);
1720 runtime_unlock(&runtime_sched
->lock
);
1723 injectglist((G
*)gp
->schedlink
);
1724 gp
->atomicstatus
= _Grunnable
;
1739 if(g
->m
->spinning
) {
1740 g
->m
->spinning
= false;
1741 nmspinning
= runtime_xadd(&runtime_sched
->nmspinning
, -1);
1743 runtime_throw("findrunnable: negative nmspinning");
1745 nmspinning
= runtime_atomicload(&runtime_sched
->nmspinning
);
1747 // M wakeup policy is deliberately somewhat conservative (see nmspinning handling),
1748 // so see if we need to wakeup another P here.
1749 if (nmspinning
== 0 && runtime_atomicload(&runtime_sched
->npidle
) > 0)
1753 // Injects the list of runnable G's into the scheduler.
1754 // Can run concurrently with GC.
1756 injectglist(G
*glist
)
1763 runtime_lock(&runtime_sched
->lock
);
1764 for(n
= 0; glist
; n
++) {
1766 glist
= (G
*)gp
->schedlink
;
1767 gp
->atomicstatus
= _Grunnable
;
1770 runtime_unlock(&runtime_sched
->lock
);
1772 for(; n
&& runtime_sched
->npidle
; n
--)
1776 // One round of scheduler: find a runnable goroutine and execute it.
1785 runtime_throw("schedule: holding locks");
1788 if(runtime_sched
->gcwaiting
) {
1794 // Check the global runnable queue once in a while to ensure fairness.
1795 // Otherwise two goroutines can completely occupy the local runqueue
1796 // by constantly respawning each other.
1797 tick
= ((P
*)g
->m
->p
)->schedtick
;
1798 // This is a fancy way to say tick%61==0,
1799 // it uses 2 MUL instructions instead of a single DIV and so is faster on modern processors.
1800 if(tick
- (((uint64
)tick
*0x4325c53fu
)>>36)*61 == 0 && runtime_sched
->runqsize
> 0) {
1801 runtime_lock(&runtime_sched
->lock
);
1802 gp
= globrunqget((P
*)g
->m
->p
, 1);
1803 runtime_unlock(&runtime_sched
->lock
);
1808 gp
= runqget((P
*)g
->m
->p
);
1809 if(gp
&& g
->m
->spinning
)
1810 runtime_throw("schedule: spinning with local work");
1813 gp
= findrunnable(); // blocks until work is available
1818 // Hands off own p to the locked m,
1819 // then blocks waiting for a new p.
1827 // Puts the current goroutine into a waiting state and calls unlockf.
1828 // If unlockf returns false, the goroutine is resumed.
1830 runtime_park(bool(*unlockf
)(G
*, void*), void *lock
, const char *reason
)
1832 if(g
->atomicstatus
!= _Grunning
)
1833 runtime_throw("bad g status");
1834 g
->m
->waitlock
= lock
;
1835 g
->m
->waitunlockf
= unlockf
;
1836 g
->waitreason
= runtime_gostringnocopy((const byte
*)reason
);
1837 runtime_mcall(park0
);
1840 void gopark(FuncVal
*, void *, String
, byte
, int)
1841 __asm__ ("runtime.gopark");
1844 gopark(FuncVal
*unlockf
, void *lock
, String reason
,
1845 byte traceEv
__attribute__ ((unused
)),
1846 int traceskip
__attribute__ ((unused
)))
1848 if(g
->atomicstatus
!= _Grunning
)
1849 runtime_throw("bad g status");
1850 g
->m
->waitlock
= lock
;
1851 g
->m
->waitunlockf
= unlockf
== nil
? nil
: (void*)unlockf
->fn
;
1852 g
->waitreason
= reason
;
1853 runtime_mcall(park0
);
1857 parkunlock(G
*gp
, void *lock
)
1860 runtime_unlock(lock
);
1864 // Puts the current goroutine into a waiting state and unlocks the lock.
1865 // The goroutine can be made runnable again by calling runtime_ready(gp).
1867 runtime_parkunlock(Lock
*lock
, const char *reason
)
1869 runtime_park(parkunlock
, lock
, reason
);
1872 void goparkunlock(Lock
*, String
, byte
, int)
1873 __asm__ (GOSYM_PREFIX
"runtime.goparkunlock");
1876 goparkunlock(Lock
*lock
, String reason
, byte traceEv
__attribute__ ((unused
)),
1877 int traceskip
__attribute__ ((unused
)))
1879 if(g
->atomicstatus
!= _Grunning
)
1880 runtime_throw("bad g status");
1881 g
->m
->waitlock
= lock
;
1882 g
->m
->waitunlockf
= parkunlock
;
1883 g
->waitreason
= reason
;
1884 runtime_mcall(park0
);
1887 // runtime_park continuation on g0.
1895 gp
->atomicstatus
= _Gwaiting
;
1898 if(m
->waitunlockf
) {
1899 ok
= ((bool (*)(G
*, void*))m
->waitunlockf
)(gp
, m
->waitlock
);
1900 m
->waitunlockf
= nil
;
1903 gp
->atomicstatus
= _Grunnable
;
1904 execute(gp
); // Schedule it back, never returns.
1909 execute(gp
); // Never returns.
1916 runtime_gosched(void)
1918 if(g
->atomicstatus
!= _Grunning
)
1919 runtime_throw("bad g status");
1920 runtime_mcall(runtime_gosched0
);
1923 // runtime_gosched continuation on g0.
1925 runtime_gosched0(G
*gp
)
1930 gp
->atomicstatus
= _Grunnable
;
1933 runtime_lock(&runtime_sched
->lock
);
1935 runtime_unlock(&runtime_sched
->lock
);
1938 execute(gp
); // Never returns.
1943 // Finishes execution of the current goroutine.
1944 // Need to mark it as nosplit, because it runs with sp > stackbase (as runtime_lessstack).
1945 // Since it does not return it does not matter. But if it is preempted
1946 // at the split stack check, GC will complain about inconsistent sp.
1947 void runtime_goexit1(void) __attribute__ ((noinline
));
1949 runtime_goexit1(void)
1951 if(g
->atomicstatus
!= _Grunning
)
1952 runtime_throw("bad g status");
1953 runtime_mcall(goexit0
);
1956 // runtime_goexit1 continuation on g0.
1963 gp
->atomicstatus
= _Gdead
;
1967 gp
->paniconfault
= 0;
1968 gp
->_defer
= nil
; // should be true already but just in case.
1969 gp
->_panic
= nil
; // non-nil for Goexit during panic. points at stack-allocated data.
1970 gp
->writebuf
.__values
= nil
;
1971 gp
->writebuf
.__count
= 0;
1972 gp
->writebuf
.__capacity
= 0;
1973 gp
->waitreason
= runtime_gostringnocopy(nil
);
1977 if(m
->locked
& ~_LockExternal
) {
1978 runtime_printf("invalid m->locked = %d\n", m
->locked
);
1979 runtime_throw("internal lockOSThread error");
1982 gfput((P
*)m
->p
, gp
);
1986 // The goroutine g is about to enter a system call.
1987 // Record that it's not using the cpu anymore.
1988 // This is called only from the go syscall library and cgocall,
1989 // not from the low-level system calls used by the runtime.
1991 // Entersyscall cannot split the stack: the runtime_gosave must
1992 // make g->sched refer to the caller's stack segment, because
1993 // entersyscall is going to return immediately after.
1995 void runtime_entersyscall(int32
) __attribute__ ((no_split_stack
));
1996 static void doentersyscall(uintptr
, uintptr
)
1997 __attribute__ ((no_split_stack
, noinline
));
2000 runtime_entersyscall(int32 dummy
__attribute__ ((unused
)))
2002 // Save the registers in the g structure so that any pointers
2003 // held in registers will be seen by the garbage collector.
2004 getcontext(ucontext_arg(&g
->gcregs
[0]));
2006 // Do the work in a separate function, so that this function
2007 // doesn't save any registers on its own stack. If this
2008 // function does save any registers, we might store the wrong
2009 // value in the call to getcontext.
2011 // FIXME: This assumes that we do not need to save any
2012 // callee-saved registers to access the TLS variable g. We
2013 // don't want to put the ucontext_t on the stack because it is
2014 // large and we can not split the stack here.
2015 doentersyscall((uintptr
)runtime_getcallerpc(&dummy
),
2016 (uintptr
)runtime_getcallersp(&dummy
));
2020 doentersyscall(uintptr pc
, uintptr sp
)
2022 // Disable preemption because during this function g is in _Gsyscall status,
2023 // but can have inconsistent g->sched, do not let GC observe it.
2026 // Leave SP around for GC and traceback.
2027 #ifdef USING_SPLIT_STACK
2030 g
->gcstack
= __splitstack_find(nil
, nil
, &gcstacksize
,
2031 &g
->gcnextsegment
, &g
->gcnextsp
,
2033 g
->gcstacksize
= (uintptr
)gcstacksize
;
2039 g
->gcnextsp
= (byte
*) &v
;
2046 g
->atomicstatus
= _Gsyscall
;
2048 if(runtime_atomicload(&runtime_sched
->sysmonwait
)) { // TODO: fast atomic
2049 runtime_lock(&runtime_sched
->lock
);
2050 if(runtime_atomicload(&runtime_sched
->sysmonwait
)) {
2051 runtime_atomicstore(&runtime_sched
->sysmonwait
, 0);
2052 runtime_notewakeup(&runtime_sched
->sysmonnote
);
2054 runtime_unlock(&runtime_sched
->lock
);
2058 ((P
*)(g
->m
->p
))->m
= 0;
2059 runtime_atomicstore(&((P
*)g
->m
->p
)->status
, _Psyscall
);
2060 if(runtime_atomicload(&runtime_sched
->gcwaiting
)) {
2061 runtime_lock(&runtime_sched
->lock
);
2062 if (runtime_sched
->stopwait
> 0 && runtime_cas(&((P
*)g
->m
->p
)->status
, _Psyscall
, _Pgcstop
)) {
2063 if(--runtime_sched
->stopwait
== 0)
2064 runtime_notewakeup(&runtime_sched
->stopnote
);
2066 runtime_unlock(&runtime_sched
->lock
);
2072 // The same as runtime_entersyscall(), but with a hint that the syscall is blocking.
2074 runtime_entersyscallblock(int32 dummy
__attribute__ ((unused
)))
2078 g
->m
->locks
++; // see comment in entersyscall
2080 // Leave SP around for GC and traceback.
2081 #ifdef USING_SPLIT_STACK
2084 g
->gcstack
= __splitstack_find(nil
, nil
, &gcstacksize
,
2085 &g
->gcnextsegment
, &g
->gcnextsp
,
2087 g
->gcstacksize
= (uintptr
)gcstacksize
;
2090 g
->gcnextsp
= (byte
*) &p
;
2093 // Save the registers in the g structure so that any pointers
2094 // held in registers will be seen by the garbage collector.
2095 getcontext(ucontext_arg(&g
->gcregs
[0]));
2097 g
->syscallpc
= (uintptr
)runtime_getcallerpc(&dummy
);
2098 g
->syscallsp
= (uintptr
)runtime_getcallersp(&dummy
);
2100 g
->atomicstatus
= _Gsyscall
;
2104 if(g
->isbackground
) // do not consider blocked scavenger for deadlock detection
2110 // The goroutine g exited its system call.
2111 // Arrange for it to run on a cpu again.
2112 // This is called only from the go syscall library, not
2113 // from the low-level system calls used by the runtime.
2115 runtime_exitsyscall(int32 dummy
__attribute__ ((unused
)))
2120 gp
->m
->locks
++; // see comment in entersyscall
2122 if(gp
->isbackground
) // do not consider blocked scavenger for deadlock detection
2126 if(exitsyscallfast()) {
2127 // There's a cpu for us, so we can run.
2128 ((P
*)gp
->m
->p
)->syscalltick
++;
2129 gp
->atomicstatus
= _Grunning
;
2130 // Garbage collector isn't running (since we are),
2131 // so okay to clear gcstack and gcsp.
2132 #ifdef USING_SPLIT_STACK
2136 runtime_memclr(&gp
->gcregs
[0], sizeof gp
->gcregs
);
2144 // Call the scheduler.
2145 runtime_mcall(exitsyscall0
);
2147 // Scheduler returned, so we're allowed to run now.
2148 // Delete the gcstack information that we left for
2149 // the garbage collector during the system call.
2150 // Must wait until now because until gosched returns
2151 // we don't know for sure that the garbage collector
2153 #ifdef USING_SPLIT_STACK
2157 runtime_memclr(&gp
->gcregs
[0], sizeof gp
->gcregs
);
2161 // Note that this gp->m might be different than the earlier
2162 // gp->m after returning from runtime_mcall.
2163 ((P
*)gp
->m
->p
)->syscalltick
++;
2167 exitsyscallfast(void)
2174 // Freezetheworld sets stopwait but does not retake P's.
2175 if(runtime_sched
->stopwait
) {
2180 // Try to re-acquire the last P.
2181 if(gp
->m
->p
&& ((P
*)gp
->m
->p
)->status
== _Psyscall
&& runtime_cas(&((P
*)gp
->m
->p
)->status
, _Psyscall
, _Prunning
)) {
2182 // There's a cpu for us, so we can run.
2183 gp
->m
->mcache
= ((P
*)gp
->m
->p
)->mcache
;
2184 ((P
*)gp
->m
->p
)->m
= (uintptr
)gp
->m
;
2187 // Try to get any other idle P.
2189 if(runtime_sched
->pidle
) {
2190 runtime_lock(&runtime_sched
->lock
);
2192 if(p
&& runtime_atomicload(&runtime_sched
->sysmonwait
)) {
2193 runtime_atomicstore(&runtime_sched
->sysmonwait
, 0);
2194 runtime_notewakeup(&runtime_sched
->sysmonnote
);
2196 runtime_unlock(&runtime_sched
->lock
);
2205 // runtime_exitsyscall slow path on g0.
2206 // Failed to acquire P, enqueue gp as runnable.
2214 gp
->atomicstatus
= _Grunnable
;
2217 runtime_lock(&runtime_sched
->lock
);
2221 else if(runtime_atomicload(&runtime_sched
->sysmonwait
)) {
2222 runtime_atomicstore(&runtime_sched
->sysmonwait
, 0);
2223 runtime_notewakeup(&runtime_sched
->sysmonnote
);
2225 runtime_unlock(&runtime_sched
->lock
);
2228 execute(gp
); // Never returns.
2231 // Wait until another thread schedules gp and so m again.
2233 execute(gp
); // Never returns.
2236 schedule(); // Never returns.
2239 void syscall_entersyscall(void)
2240 __asm__(GOSYM_PREFIX
"syscall.Entersyscall");
2242 void syscall_entersyscall(void) __attribute__ ((no_split_stack
));
2245 syscall_entersyscall()
2247 runtime_entersyscall(0);
2250 void syscall_exitsyscall(void)
2251 __asm__(GOSYM_PREFIX
"syscall.Exitsyscall");
2253 void syscall_exitsyscall(void) __attribute__ ((no_split_stack
));
2256 syscall_exitsyscall()
2258 runtime_exitsyscall(0);
2261 // Called from syscall package before fork.
2262 void syscall_runtime_BeforeFork(void)
2263 __asm__(GOSYM_PREFIX
"syscall.runtime_BeforeFork");
2265 syscall_runtime_BeforeFork(void)
2267 // Fork can hang if preempted with signals frequently enough (see issue 5517).
2268 // Ensure that we stay on the same M where we disable profiling.
2269 runtime_m()->locks
++;
2270 if(runtime_m()->profilehz
!= 0)
2271 runtime_resetcpuprofiler(0);
2274 // Called from syscall package after fork in parent.
2275 void syscall_runtime_AfterFork(void)
2276 __asm__(GOSYM_PREFIX
"syscall.runtime_AfterFork");
2278 syscall_runtime_AfterFork(void)
2282 hz
= runtime_sched
->profilehz
;
2284 runtime_resetcpuprofiler(hz
);
2285 runtime_m()->locks
--;
2288 // Allocate a new g, with a stack big enough for stacksize bytes.
2290 runtime_malg(int32 stacksize
, byte
** ret_stack
, uintptr
* ret_stacksize
)
2295 if(stacksize
>= 0) {
2296 #if USING_SPLIT_STACK
2297 int dont_block_signals
= 0;
2298 size_t ss_stacksize
;
2300 *ret_stack
= __splitstack_makecontext(stacksize
,
2301 &newg
->stackcontext
[0],
2303 *ret_stacksize
= (uintptr
)ss_stacksize
;
2304 __splitstack_block_signals_context(&newg
->stackcontext
[0],
2305 &dont_block_signals
, nil
);
2307 // In 64-bit mode, the maximum Go allocation space is
2308 // 128G. Our stack size is 4M, which only permits 32K
2309 // goroutines. In order to not limit ourselves,
2310 // allocate the stacks out of separate memory. In
2311 // 32-bit mode, the Go allocation space is all of
2313 if(sizeof(void*) == 8) {
2314 void *p
= runtime_SysAlloc(stacksize
, &mstats()->other_sys
);
2316 runtime_throw("runtime: cannot allocate memory for goroutine stack");
2317 *ret_stack
= (byte
*)p
;
2319 *ret_stack
= runtime_mallocgc(stacksize
, 0, FlagNoProfiling
|FlagNoGC
);
2320 runtime_xadd(&runtime_stacks_sys
, stacksize
);
2322 *ret_stacksize
= (uintptr
)stacksize
;
2323 newg
->gcinitialsp
= *ret_stack
;
2324 newg
->gcstacksize
= (uintptr
)stacksize
;
2331 __go_go(void (*fn
)(void*), void* arg
)
2338 //runtime_printf("newproc1 %p %p narg=%d nret=%d\n", fn->fn, argp, narg, nret);
2340 g
->m
->throwing
= -1; // do not dump full stacks
2341 runtime_throw("go of nil func value");
2343 g
->m
->locks
++; // disable preemption because it can be holding p in a local var
2346 if((newg
= gfget(p
)) != nil
) {
2347 #ifdef USING_SPLIT_STACK
2348 int dont_block_signals
= 0;
2350 sp
= __splitstack_resetcontext(&newg
->stackcontext
[0],
2352 __splitstack_block_signals_context(&newg
->stackcontext
[0],
2353 &dont_block_signals
, nil
);
2355 sp
= newg
->gcinitialsp
;
2356 spsize
= newg
->gcstacksize
;
2358 runtime_throw("bad spsize in __go_go");
2359 newg
->gcnextsp
= sp
;
2364 newg
= runtime_malg(StackMin
, &sp
, &malsize
);
2365 spsize
= (size_t)malsize
;
2369 newg
->entry
= (byte
*)fn
;
2371 newg
->gopc
= (uintptr
)__builtin_return_address(0);
2372 newg
->atomicstatus
= _Grunnable
;
2373 if(p
->goidcache
== p
->goidcacheend
) {
2374 p
->goidcache
= runtime_xadd64(&runtime_sched
->goidgen
, GoidCacheBatch
);
2375 p
->goidcacheend
= p
->goidcache
+ GoidCacheBatch
;
2377 newg
->goid
= p
->goidcache
++;
2380 // Avoid warnings about variables clobbered by
2382 byte
* volatile vsp
= sp
;
2383 size_t volatile vspsize
= spsize
;
2384 G
* volatile vnewg
= newg
;
2385 ucontext_t
* volatile uc
;
2387 uc
= ucontext_arg(&vnewg
->context
[0]);
2389 uc
->uc_stack
.ss_sp
= vsp
;
2390 uc
->uc_stack
.ss_size
= vspsize
;
2391 makecontext(uc
, kickoff
, 0);
2395 if(runtime_atomicload(&runtime_sched
->npidle
) != 0 && runtime_atomicload(&runtime_sched
->nmspinning
) == 0 && fn
!= runtime_main
) // TODO: fast atomic
2408 runtime_lock(&allglock
);
2409 if(runtime_allglen
>= allgcap
) {
2410 cap
= 4096/sizeof(new[0]);
2413 new = runtime_malloc(cap
*sizeof(new[0]));
2415 runtime_throw("runtime: cannot allocate memory");
2416 if(runtime_allg
!= nil
) {
2417 runtime_memmove(new, runtime_allg
, runtime_allglen
*sizeof(new[0]));
2418 runtime_free(runtime_allg
);
2423 runtime_allg
[runtime_allglen
++] = gp
;
2424 runtime_unlock(&allglock
);
2427 // Put on gfree list.
2428 // If local list is too long, transfer a batch to the global list.
2432 gp
->schedlink
= (uintptr
)p
->gfree
;
2435 if(p
->gfreecnt
>= 64) {
2436 runtime_lock(&runtime_sched
->gflock
);
2437 while(p
->gfreecnt
>= 32) {
2440 p
->gfree
= (G
*)gp
->schedlink
;
2441 gp
->schedlink
= (uintptr
)runtime_sched
->gfree
;
2442 runtime_sched
->gfree
= gp
;
2444 runtime_unlock(&runtime_sched
->gflock
);
2448 // Get from gfree list.
2449 // If local list is empty, grab a batch from global list.
2457 if(gp
== nil
&& runtime_sched
->gfree
) {
2458 runtime_lock(&runtime_sched
->gflock
);
2459 while(p
->gfreecnt
< 32 && runtime_sched
->gfree
) {
2461 gp
= runtime_sched
->gfree
;
2462 runtime_sched
->gfree
= (G
*)gp
->schedlink
;
2463 gp
->schedlink
= (uintptr
)p
->gfree
;
2466 runtime_unlock(&runtime_sched
->gflock
);
2470 p
->gfree
= (G
*)gp
->schedlink
;
2476 // Purge all cached G's from gfree list to the global list.
2482 runtime_lock(&runtime_sched
->gflock
);
2483 while(p
->gfreecnt
) {
2486 p
->gfree
= (G
*)gp
->schedlink
;
2487 gp
->schedlink
= (uintptr
)runtime_sched
->gfree
;
2488 runtime_sched
->gfree
= gp
;
2490 runtime_unlock(&runtime_sched
->gflock
);
2494 runtime_Breakpoint(void)
2496 runtime_breakpoint();
2499 void runtime_Gosched (void) __asm__ (GOSYM_PREFIX
"runtime.Gosched");
2502 runtime_Gosched(void)
2507 // Implementation of runtime.GOMAXPROCS.
2508 // delete when scheduler is even stronger
2510 intgo
runtime_GOMAXPROCS(intgo
)
2511 __asm__(GOSYM_PREFIX
"runtime.GOMAXPROCS");
2514 runtime_GOMAXPROCS(intgo n
)
2518 if(n
> _MaxGomaxprocs
)
2520 runtime_lock(&runtime_sched
->lock
);
2521 ret
= (intgo
)runtime_gomaxprocs
;
2522 if(n
<= 0 || n
== ret
) {
2523 runtime_unlock(&runtime_sched
->lock
);
2526 runtime_unlock(&runtime_sched
->lock
);
2528 runtime_acquireWorldsema();
2530 runtime_stopTheWorldWithSema();
2531 newprocs
= (int32
)n
;
2533 runtime_releaseWorldsema();
2534 runtime_startTheWorldWithSema();
2539 // lockOSThread is called by runtime.LockOSThread and runtime.lockOSThread below
2540 // after they modify m->locked. Do not allow preemption during this call,
2541 // or else the m might be different in this function than in the caller.
2549 void runtime_LockOSThread(void) __asm__ (GOSYM_PREFIX
"runtime.LockOSThread");
2551 runtime_LockOSThread(void)
2553 g
->m
->locked
|= _LockExternal
;
2558 runtime_lockOSThread(void)
2560 g
->m
->locked
+= _LockInternal
;
2565 // unlockOSThread is called by runtime.UnlockOSThread and runtime.unlockOSThread below
2566 // after they update m->locked. Do not allow preemption during this call,
2567 // or else the m might be in different in this function than in the caller.
2569 unlockOSThread(void)
2571 if(g
->m
->locked
!= 0)
2573 g
->m
->lockedg
= nil
;
2577 void runtime_UnlockOSThread(void) __asm__ (GOSYM_PREFIX
"runtime.UnlockOSThread");
2580 runtime_UnlockOSThread(void)
2582 g
->m
->locked
&= ~_LockExternal
;
2587 runtime_unlockOSThread(void)
2589 if(g
->m
->locked
< _LockInternal
)
2590 runtime_throw("runtime: internal error: misuse of lockOSThread/unlockOSThread");
2591 g
->m
->locked
-= _LockInternal
;
2596 runtime_lockedOSThread(void)
2598 return g
->lockedm
!= nil
&& g
->m
->lockedg
!= nil
;
2602 runtime_gcount(void)
2609 runtime_lock(&allglock
);
2610 // TODO(dvyukov): runtime.NumGoroutine() is O(N).
2611 // We do not want to increment/decrement centralized counter in newproc/goexit,
2612 // just to make runtime.NumGoroutine() faster.
2613 // Compromise solution is to introduce per-P counters of active goroutines.
2614 for(i
= 0; i
< runtime_allglen
; i
++) {
2615 gp
= runtime_allg
[i
];
2616 s
= gp
->atomicstatus
;
2617 if(s
== _Grunnable
|| s
== _Grunning
|| s
== _Gsyscall
|| s
== _Gwaiting
)
2620 runtime_unlock(&allglock
);
2625 runtime_mcount(void)
2627 return runtime_sched
->mcount
;
2635 static void System(void) {}
2636 static void GC(void) {}
2638 // Called if we receive a SIGPROF signal.
2645 uintptr pcbuf
[TracebackMaxFrames
];
2646 Location locbuf
[TracebackMaxFrames
];
2655 // Profiling runs concurrently with GC, so it must not allocate.
2660 if(mp
->mcache
== nil
)
2665 if(runtime_atomicload(&runtime_in_callers
) > 0) {
2666 // If SIGPROF arrived while already fetching runtime
2667 // callers we can have trouble on older systems
2668 // because the unwind library calls dl_iterate_phdr
2669 // which was not recursive in the past.
2674 n
= runtime_callers(0, locbuf
, nelem(locbuf
), false);
2675 for(i
= 0; i
< n
; i
++)
2676 pcbuf
[i
] = locbuf
[i
].pc
;
2678 if(!traceback
|| n
<= 0) {
2680 pcbuf
[0] = (uintptr
)runtime_getcallerpc(&n
);
2681 if(mp
->gcing
|| mp
->helpgc
)
2682 pcbuf
[1] = (uintptr
)GC
;
2684 pcbuf
[1] = (uintptr
)System
;
2688 stk
.__values
= &pcbuf
[0];
2692 // Simple cas-lock to coordinate with setcpuprofilerate.
2693 while (!runtime_cas(&prof
.lock
, 0, 1)) {
2697 runtime_cpuprofAdd(stk
);
2699 runtime_atomicstore(&prof
.lock
, 0);
2705 // Arrange to call fn with a traceback hz times a second.
2707 runtime_setcpuprofilerate_m(int32 hz
)
2709 // Force sane arguments.
2713 // Disable preemption, otherwise we can be rescheduled to another thread
2714 // that has profiling enabled.
2717 // Stop profiler on this thread so that it is safe to lock prof.
2718 // if a profiling signal came in while we had prof locked,
2719 // it would deadlock.
2720 runtime_resetcpuprofiler(0);
2722 while (!runtime_cas(&prof
.lock
, 0, 1)) {
2726 runtime_atomicstore(&prof
.lock
, 0);
2728 runtime_lock(&runtime_sched
->lock
);
2729 runtime_sched
->profilehz
= hz
;
2730 runtime_unlock(&runtime_sched
->lock
);
2733 runtime_resetcpuprofiler(hz
);
2738 // Change number of processors. The world is stopped, sched is locked.
2740 procresize(int32
new)
2748 old
= runtime_gomaxprocs
;
2749 if(old
< 0 || old
> _MaxGomaxprocs
|| new <= 0 || new >_MaxGomaxprocs
)
2750 runtime_throw("procresize: invalid arg");
2751 // initialize new P's
2752 for(i
= 0; i
< new; i
++) {
2753 p
= runtime_allp
[i
];
2755 p
= (P
*)runtime_mallocgc(sizeof(*p
), 0, FlagNoInvokeGC
);
2757 p
->status
= _Pgcstop
;
2758 p
->deferpool
.__values
= &p
->deferpoolbuf
[0];
2759 p
->deferpool
.__count
= 0;
2760 p
->deferpool
.__capacity
= nelem(p
->deferpoolbuf
);
2761 runtime_atomicstorep(&runtime_allp
[i
], p
);
2763 if(p
->mcache
== nil
) {
2765 p
->mcache
= g
->m
->mcache
; // bootstrap
2767 p
->mcache
= runtime_allocmcache();
2771 // redistribute runnable G's evenly
2772 // collect all runnable goroutines in global queue preserving FIFO order
2773 // FIFO order is required to ensure fairness even during frequent GCs
2774 // see http://golang.org/issue/7126
2778 for(i
= 0; i
< old
; i
++) {
2779 p
= runtime_allp
[i
];
2780 if(p
->runqhead
== p
->runqtail
)
2783 // pop from tail of local queue
2785 gp
= (G
*)p
->runq
[p
->runqtail
%nelem(p
->runq
)];
2786 // push onto head of global queue
2787 gp
->schedlink
= runtime_sched
->runqhead
;
2788 runtime_sched
->runqhead
= (uintptr
)gp
;
2789 if(runtime_sched
->runqtail
== 0)
2790 runtime_sched
->runqtail
= (uintptr
)gp
;
2791 runtime_sched
->runqsize
++;
2794 // fill local queues with at most nelem(p->runq)/2 goroutines
2795 // start at 1 because current M already executes some G and will acquire allp[0] below,
2796 // so if we have a spare G we want to put it into allp[1].
2797 for(i
= 1; (uint32
)i
< (uint32
)new * nelem(p
->runq
)/2 && runtime_sched
->runqsize
> 0; i
++) {
2798 gp
= (G
*)runtime_sched
->runqhead
;
2799 runtime_sched
->runqhead
= gp
->schedlink
;
2800 if(runtime_sched
->runqhead
== 0)
2801 runtime_sched
->runqtail
= 0;
2802 runtime_sched
->runqsize
--;
2803 runqput(runtime_allp
[i
%new], gp
);
2807 for(i
= new; i
< old
; i
++) {
2808 p
= runtime_allp
[i
];
2809 for(j
= 0; j
< p
->deferpool
.__count
; j
++) {
2810 ((struct _defer
**)p
->deferpool
.__values
)[j
] = nil
;
2812 p
->deferpool
.__count
= 0;
2813 runtime_freemcache(p
->mcache
);
2817 // can't free P itself because it can be referenced by an M in syscall
2821 ((P
*)g
->m
->p
)->m
= 0;
2824 p
= runtime_allp
[0];
2828 for(i
= new-1; i
> 0; i
--) {
2829 p
= runtime_allp
[i
];
2833 runtime_atomicstore((uint32
*)&runtime_gomaxprocs
, new);
2836 // Associate p and the current m.
2843 if(m
->p
|| m
->mcache
)
2844 runtime_throw("acquirep: already in go");
2845 if(p
->m
|| p
->status
!= _Pidle
) {
2846 runtime_printf("acquirep: p->m=%p(%d) p->status=%d\n", p
->m
, p
->m
? ((M
*)p
->m
)->id
: 0, p
->status
);
2847 runtime_throw("acquirep: invalid p state");
2849 m
->mcache
= p
->mcache
;
2852 p
->status
= _Prunning
;
2855 // Disassociate p and the current m.
2863 if(m
->p
== 0 || m
->mcache
== nil
)
2864 runtime_throw("releasep: invalid arg");
2866 if((M
*)p
->m
!= m
|| p
->mcache
!= m
->mcache
|| p
->status
!= _Prunning
) {
2867 runtime_printf("releasep: m=%p m->p=%p p->m=%p m->mcache=%p p->mcache=%p p->status=%d\n",
2868 m
, m
->p
, p
->m
, m
->mcache
, p
->mcache
, p
->status
);
2869 runtime_throw("releasep: invalid p state");
2879 incidlelocked(int32 v
)
2881 runtime_lock(&runtime_sched
->lock
);
2882 runtime_sched
->nmidlelocked
+= v
;
2885 runtime_unlock(&runtime_sched
->lock
);
2888 // Check for deadlock situation.
2889 // The check is based on number of running M's, if 0 -> deadlock.
2894 int32 run
, grunning
, s
;
2897 // For -buildmode=c-shared or -buildmode=c-archive it's OK if
2898 // there are no running goroutines. The calling program is
2899 // assumed to be running.
2900 if(runtime_isarchive
) {
2905 run
= runtime_sched
->mcount
- runtime_sched
->nmidle
- runtime_sched
->nmidlelocked
- 1 - countextra();
2908 // If we are dying because of a signal caught on an already idle thread,
2909 // freezetheworld will cause all running threads to block.
2910 // And runtime will essentially enter into deadlock state,
2911 // except that there is a thread that will call runtime_exit soon.
2912 if(runtime_panicking() > 0)
2915 runtime_printf("runtime: checkdead: nmidle=%d nmidlelocked=%d mcount=%d\n",
2916 runtime_sched
->nmidle
, runtime_sched
->nmidlelocked
, runtime_sched
->mcount
);
2917 runtime_throw("checkdead: inconsistent counts");
2920 runtime_lock(&allglock
);
2921 for(i
= 0; i
< runtime_allglen
; i
++) {
2922 gp
= runtime_allg
[i
];
2923 if(gp
->isbackground
)
2925 s
= gp
->atomicstatus
;
2928 else if(s
== _Grunnable
|| s
== _Grunning
|| s
== _Gsyscall
) {
2929 runtime_unlock(&allglock
);
2930 runtime_printf("runtime: checkdead: find g %D in status %d\n", gp
->goid
, s
);
2931 runtime_throw("checkdead: runnable g");
2934 runtime_unlock(&allglock
);
2935 if(grunning
== 0) // possible if main goroutine calls runtime_Goexit()
2936 runtime_throw("no goroutines (main called runtime.Goexit) - deadlock!");
2937 g
->m
->throwing
= -1; // do not dump full stacks
2938 runtime_throw("all goroutines are asleep - deadlock!");
2945 int64 now
, lastpoll
, lasttrace
;
2949 idle
= 0; // how many cycles in succession we had not wokeup somebody
2952 if(idle
== 0) // start with 20us sleep...
2954 else if(idle
> 50) // start doubling the sleep after 1ms...
2956 if(delay
> 10*1000) // up to 10ms
2958 runtime_usleep(delay
);
2959 if(runtime_debug
.schedtrace
<= 0 &&
2960 (runtime_sched
->gcwaiting
|| runtime_atomicload(&runtime_sched
->npidle
) == (uint32
)runtime_gomaxprocs
)) { // TODO: fast atomic
2961 runtime_lock(&runtime_sched
->lock
);
2962 if(runtime_atomicload(&runtime_sched
->gcwaiting
) || runtime_atomicload(&runtime_sched
->npidle
) == (uint32
)runtime_gomaxprocs
) {
2963 runtime_atomicstore(&runtime_sched
->sysmonwait
, 1);
2964 runtime_unlock(&runtime_sched
->lock
);
2965 runtime_notesleep(&runtime_sched
->sysmonnote
);
2966 runtime_noteclear(&runtime_sched
->sysmonnote
);
2970 runtime_unlock(&runtime_sched
->lock
);
2972 // poll network if not polled for more than 10ms
2973 lastpoll
= runtime_atomicload64(&runtime_sched
->lastpoll
);
2974 now
= runtime_nanotime();
2975 if(lastpoll
!= 0 && lastpoll
+ 10*1000*1000 < now
) {
2976 runtime_cas64(&runtime_sched
->lastpoll
, lastpoll
, now
);
2977 gp
= runtime_netpoll(false); // non-blocking
2979 // Need to decrement number of idle locked M's
2980 // (pretending that one more is running) before injectglist.
2981 // Otherwise it can lead to the following situation:
2982 // injectglist grabs all P's but before it starts M's to run the P's,
2983 // another M returns from syscall, finishes running its G,
2984 // observes that there is no work to do and no other running M's
2985 // and reports deadlock.
2991 // retake P's blocked in syscalls
2992 // and preempt long running G's
2998 if(runtime_debug
.schedtrace
> 0 && lasttrace
+ runtime_debug
.schedtrace
*1000000ll <= now
) {
3000 runtime_schedtrace(runtime_debug
.scheddetail
);
3005 typedef struct Pdesc Pdesc
;
3013 static Pdesc pdesc
[_MaxGomaxprocs
];
3024 for(i
= 0; i
< (uint32
)runtime_gomaxprocs
; i
++) {
3025 p
= runtime_allp
[i
];
3030 if(s
== _Psyscall
) {
3031 // Retake P from syscall if it's there for more than 1 sysmon tick (at least 20us).
3033 if(pd
->syscalltick
!= t
) {
3034 pd
->syscalltick
= t
;
3035 pd
->syscallwhen
= now
;
3038 // On the one hand we don't want to retake Ps if there is no other work to do,
3039 // but on the other hand we want to retake them eventually
3040 // because they can prevent the sysmon thread from deep sleep.
3041 if(p
->runqhead
== p
->runqtail
&&
3042 runtime_atomicload(&runtime_sched
->nmspinning
) + runtime_atomicload(&runtime_sched
->npidle
) > 0 &&
3043 pd
->syscallwhen
+ 10*1000*1000 > now
)
3045 // Need to decrement number of idle locked M's
3046 // (pretending that one more is running) before the CAS.
3047 // Otherwise the M from which we retake can exit the syscall,
3048 // increment nmidle and report deadlock.
3050 if(runtime_cas(&p
->status
, s
, _Pidle
)) {
3055 } else if(s
== _Prunning
) {
3056 // Preempt G if it's running for more than 10ms.
3058 if(pd
->schedtick
!= t
) {
3060 pd
->schedwhen
= now
;
3063 if(pd
->schedwhen
+ 10*1000*1000 > now
)
3071 // Tell all goroutines that they have been preempted and they should stop.
3072 // This function is purely best-effort. It can fail to inform a goroutine if a
3073 // processor just started running it.
3074 // No locks need to be held.
3075 // Returns true if preemption request was issued to at least one goroutine.
3083 runtime_schedtrace(bool detailed
)
3085 static int64 starttime
;
3087 int64 id1
, id2
, id3
;
3095 now
= runtime_nanotime();
3099 runtime_lock(&runtime_sched
->lock
);
3100 runtime_printf("SCHED %Dms: gomaxprocs=%d idleprocs=%d threads=%d idlethreads=%d runqueue=%d",
3101 (now
-starttime
)/1000000, runtime_gomaxprocs
, runtime_sched
->npidle
, runtime_sched
->mcount
,
3102 runtime_sched
->nmidle
, runtime_sched
->runqsize
);
3104 runtime_printf(" gcwaiting=%d nmidlelocked=%d nmspinning=%d stopwait=%d sysmonwait=%d\n",
3105 runtime_sched
->gcwaiting
, runtime_sched
->nmidlelocked
, runtime_sched
->nmspinning
,
3106 runtime_sched
->stopwait
, runtime_sched
->sysmonwait
);
3108 // We must be careful while reading data from P's, M's and G's.
3109 // Even if we hold schedlock, most data can be changed concurrently.
3110 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil.
3111 for(i
= 0; i
< runtime_gomaxprocs
; i
++) {
3112 p
= runtime_allp
[i
];
3116 h
= runtime_atomicload(&p
->runqhead
);
3117 t
= runtime_atomicload(&p
->runqtail
);
3119 runtime_printf(" P%d: status=%d schedtick=%d syscalltick=%d m=%d runqsize=%d gfreecnt=%d\n",
3120 i
, p
->status
, p
->schedtick
, p
->syscalltick
, mp
? mp
->id
: -1, t
-h
, p
->gfreecnt
);
3122 // In non-detailed mode format lengths of per-P run queues as:
3123 // [len1 len2 len3 len4]
3125 if(runtime_gomaxprocs
== 1)
3129 else if(i
== runtime_gomaxprocs
-1)
3131 runtime_printf(fmt
, t
-h
);
3135 runtime_unlock(&runtime_sched
->lock
);
3138 for(mp
= runtime_allm
; mp
; mp
= mp
->alllink
) {
3141 lockedg
= mp
->lockedg
;
3150 id3
= lockedg
->goid
;
3151 runtime_printf(" M%d: p=%D curg=%D mallocing=%d throwing=%d gcing=%d"
3152 " locks=%d dying=%d helpgc=%d spinning=%d blocked=%d lockedg=%D\n",
3154 mp
->mallocing
, mp
->throwing
, mp
->gcing
, mp
->locks
, mp
->dying
, mp
->helpgc
,
3155 mp
->spinning
, mp
->blocked
, id3
);
3157 runtime_lock(&allglock
);
3158 for(gi
= 0; gi
< runtime_allglen
; gi
++) {
3159 gp
= runtime_allg
[gi
];
3161 lockedm
= gp
->lockedm
;
3162 runtime_printf(" G%D: status=%d(%S) m=%d lockedm=%d\n",
3163 gp
->goid
, gp
->atomicstatus
, gp
->waitreason
, mp
? mp
->id
: -1,
3164 lockedm
? lockedm
->id
: -1);
3166 runtime_unlock(&allglock
);
3167 runtime_unlock(&runtime_sched
->lock
);
3170 // Put mp on midle list.
3171 // Sched must be locked.
3175 mp
->schedlink
= runtime_sched
->midle
;
3176 runtime_sched
->midle
= (uintptr
)mp
;
3177 runtime_sched
->nmidle
++;
3181 // Try to get an m from midle list.
3182 // Sched must be locked.
3188 if((mp
= (M
*)runtime_sched
->midle
) != nil
){
3189 runtime_sched
->midle
= mp
->schedlink
;
3190 runtime_sched
->nmidle
--;
3195 // Put gp on the global runnable queue.
3196 // Sched must be locked.
3201 if(runtime_sched
->runqtail
)
3202 ((G
*)runtime_sched
->runqtail
)->schedlink
= (uintptr
)gp
;
3204 runtime_sched
->runqhead
= (uintptr
)gp
;
3205 runtime_sched
->runqtail
= (uintptr
)gp
;
3206 runtime_sched
->runqsize
++;
3209 // Put a batch of runnable goroutines on the global runnable queue.
3210 // Sched must be locked.
3212 globrunqputbatch(G
*ghead
, G
*gtail
, int32 n
)
3214 gtail
->schedlink
= 0;
3215 if(runtime_sched
->runqtail
)
3216 ((G
*)runtime_sched
->runqtail
)->schedlink
= (uintptr
)ghead
;
3218 runtime_sched
->runqhead
= (uintptr
)ghead
;
3219 runtime_sched
->runqtail
= (uintptr
)gtail
;
3220 runtime_sched
->runqsize
+= n
;
3223 // Try get a batch of G's from the global runnable queue.
3224 // Sched must be locked.
3226 globrunqget(P
*p
, int32 max
)
3231 if(runtime_sched
->runqsize
== 0)
3233 n
= runtime_sched
->runqsize
/runtime_gomaxprocs
+1;
3234 if(n
> runtime_sched
->runqsize
)
3235 n
= runtime_sched
->runqsize
;
3236 if(max
> 0 && n
> max
)
3238 if((uint32
)n
> nelem(p
->runq
)/2)
3239 n
= nelem(p
->runq
)/2;
3240 runtime_sched
->runqsize
-= n
;
3241 if(runtime_sched
->runqsize
== 0)
3242 runtime_sched
->runqtail
= 0;
3243 gp
= (G
*)runtime_sched
->runqhead
;
3244 runtime_sched
->runqhead
= gp
->schedlink
;
3247 gp1
= (G
*)runtime_sched
->runqhead
;
3248 runtime_sched
->runqhead
= gp1
->schedlink
;
3254 // Put p to on pidle list.
3255 // Sched must be locked.
3259 p
->link
= runtime_sched
->pidle
;
3260 runtime_sched
->pidle
= (uintptr
)p
;
3261 runtime_xadd(&runtime_sched
->npidle
, 1); // TODO: fast atomic
3264 // Try get a p from pidle list.
3265 // Sched must be locked.
3271 p
= (P
*)runtime_sched
->pidle
;
3273 runtime_sched
->pidle
= p
->link
;
3274 runtime_xadd(&runtime_sched
->npidle
, -1); // TODO: fast atomic
3279 // Try to put g on local runnable queue.
3280 // If it's full, put onto global queue.
3281 // Executed only by the owner P.
3283 runqput(P
*p
, G
*gp
)
3288 h
= runtime_atomicload(&p
->runqhead
); // load-acquire, synchronize with consumers
3290 if(t
- h
< nelem(p
->runq
)) {
3291 p
->runq
[t
%nelem(p
->runq
)] = (uintptr
)gp
;
3292 runtime_atomicstore(&p
->runqtail
, t
+1); // store-release, makes the item available for consumption
3295 if(runqputslow(p
, gp
, h
, t
))
3297 // the queue is not full, now the put above must suceed
3301 // Put g and a batch of work from local runnable queue on global queue.
3302 // Executed only by the owner P.
3304 runqputslow(P
*p
, G
*gp
, uint32 h
, uint32 t
)
3306 G
*batch
[nelem(p
->runq
)/2+1];
3309 // First, grab a batch from local queue.
3312 if(n
!= nelem(p
->runq
)/2)
3313 runtime_throw("runqputslow: queue is not full");
3315 batch
[i
] = (G
*)p
->runq
[(h
+i
)%nelem(p
->runq
)];
3316 if(!runtime_cas(&p
->runqhead
, h
, h
+n
)) // cas-release, commits consume
3319 // Link the goroutines.
3321 batch
[i
]->schedlink
= (uintptr
)batch
[i
+1];
3322 // Now put the batch on global queue.
3323 runtime_lock(&runtime_sched
->lock
);
3324 globrunqputbatch(batch
[0], batch
[n
], n
+1);
3325 runtime_unlock(&runtime_sched
->lock
);
3329 // Get g from local runnable queue.
3330 // Executed only by the owner P.
3338 h
= runtime_atomicload(&p
->runqhead
); // load-acquire, synchronize with other consumers
3342 gp
= (G
*)p
->runq
[h
%nelem(p
->runq
)];
3343 if(runtime_cas(&p
->runqhead
, h
, h
+1)) // cas-release, commits consume
3348 // Grabs a batch of goroutines from local runnable queue.
3349 // batch array must be of size nelem(p->runq)/2. Returns number of grabbed goroutines.
3350 // Can be executed by any P.
3352 runqgrab(P
*p
, G
**batch
)
3357 h
= runtime_atomicload(&p
->runqhead
); // load-acquire, synchronize with other consumers
3358 t
= runtime_atomicload(&p
->runqtail
); // load-acquire, synchronize with the producer
3363 if(n
> nelem(p
->runq
)/2) // read inconsistent h and t
3366 batch
[i
] = (G
*)p
->runq
[(h
+i
)%nelem(p
->runq
)];
3367 if(runtime_cas(&p
->runqhead
, h
, h
+n
)) // cas-release, commits consume
3373 // Steal half of elements from local runnable queue of p2
3374 // and put onto local runnable queue of p.
3375 // Returns one of the stolen elements (or nil if failed).
3377 runqsteal(P
*p
, P
*p2
)
3380 G
*batch
[nelem(p
->runq
)/2];
3383 n
= runqgrab(p2
, batch
);
3390 h
= runtime_atomicload(&p
->runqhead
); // load-acquire, synchronize with consumers
3392 if(t
- h
+ n
>= nelem(p
->runq
))
3393 runtime_throw("runqsteal: runq overflow");
3394 for(i
=0; i
<n
; i
++, t
++)
3395 p
->runq
[t
%nelem(p
->runq
)] = (uintptr
)batch
[i
];
3396 runtime_atomicstore(&p
->runqtail
, t
); // store-release, makes the item available for consumption
3400 void runtime_testSchedLocalQueue(void)
3401 __asm__("runtime.testSchedLocalQueue");
3404 runtime_testSchedLocalQueue(void)
3407 G gs
[nelem(p
.runq
)];
3410 runtime_memclr((byte
*)&p
, sizeof(p
));
3412 for(i
= 0; i
< (int32
)nelem(gs
); i
++) {
3413 if(runqget(&p
) != nil
)
3414 runtime_throw("runq is not empty initially");
3415 for(j
= 0; j
< i
; j
++)
3416 runqput(&p
, &gs
[i
]);
3417 for(j
= 0; j
< i
; j
++) {
3418 if(runqget(&p
) != &gs
[i
]) {
3419 runtime_printf("bad element at iter %d/%d\n", i
, j
);
3420 runtime_throw("bad element");
3423 if(runqget(&p
) != nil
)
3424 runtime_throw("runq is not empty afterwards");
3428 void runtime_testSchedLocalQueueSteal(void)
3429 __asm__("runtime.testSchedLocalQueueSteal");
3432 runtime_testSchedLocalQueueSteal(void)
3435 G gs
[nelem(p1
.runq
)], *gp
;
3438 runtime_memclr((byte
*)&p1
, sizeof(p1
));
3439 runtime_memclr((byte
*)&p2
, sizeof(p2
));
3441 for(i
= 0; i
< (int32
)nelem(gs
); i
++) {
3442 for(j
= 0; j
< i
; j
++) {
3444 runqput(&p1
, &gs
[j
]);
3446 gp
= runqsteal(&p2
, &p1
);
3452 while((gp
= runqget(&p2
)) != nil
) {
3456 while((gp
= runqget(&p1
)) != nil
)
3458 for(j
= 0; j
< i
; j
++) {
3459 if(gs
[j
].sig
!= 1) {
3460 runtime_printf("bad element %d(%d) at iter %d\n", j
, gs
[j
].sig
, i
);
3461 runtime_throw("bad element");
3464 if(s
!= i
/2 && s
!= i
/2+1) {
3465 runtime_printf("bad steal %d, want %d or %d, iter %d\n",
3467 runtime_throw("bad steal");
3473 runtime_setmaxthreads(intgo in
)
3477 runtime_lock(&runtime_sched
->lock
);
3478 out
= (intgo
)runtime_sched
->maxmcount
;
3479 runtime_sched
->maxmcount
= (int32
)in
;
3481 runtime_unlock(&runtime_sched
->lock
);
3492 return (intgo
)(((P
*)mp
->p
)->id
);
3498 runtime_m()->locks
--;
3501 intgo
sync_runtime_procPin(void)
3502 __asm__ (GOSYM_PREFIX
"sync.runtime_procPin");
3505 sync_runtime_procPin()
3510 void sync_runtime_procUnpin(void)
3511 __asm__ (GOSYM_PREFIX
"sync.runtime_procUnpin");
3514 sync_runtime_procUnpin()
3519 intgo
sync_atomic_runtime_procPin(void)
3520 __asm__ (GOSYM_PREFIX
"sync_atomic.runtime_procPin");
3523 sync_atomic_runtime_procPin()
3528 void sync_atomic_runtime_procUnpin(void)
3529 __asm__ (GOSYM_PREFIX
"sync_atomic.runtime_procUnpin");
3532 sync_atomic_runtime_procUnpin()
3538 runtime_proc_scan(struct Workbuf
** wbufp
, void (*enqueue1
)(struct Workbuf
**, Obj
))
3540 enqueue1(wbufp
, (Obj
){(byte
*)&runtime_main_init_done
, sizeof runtime_main_init_done
, 0});
3543 // Return whether we are waiting for a GC. This gc toolchain uses
3544 // preemption instead.
3546 runtime_gcwaiting(void)
3548 return runtime_sched
->gcwaiting
;
3551 // os_beforeExit is called from os.Exit(0).
3552 //go:linkname os_beforeExit os.runtime_beforeExit
3554 extern void os_beforeExit() __asm__ (GOSYM_PREFIX
"os.runtime_beforeExit");
3561 // Active spinning for sync.Mutex.
3562 //go:linkname sync_runtime_canSpin sync.runtime_canSpin
3567 ACTIVE_SPIN_CNT
= 30,
3570 extern _Bool
sync_runtime_canSpin(intgo i
)
3571 __asm__ (GOSYM_PREFIX
"sync.runtime_canSpin");
3574 sync_runtime_canSpin(intgo i
)
3578 // sync.Mutex is cooperative, so we are conservative with spinning.
3579 // Spin only few times and only if running on a multicore machine and
3580 // GOMAXPROCS>1 and there is at least one other running P and local runq is empty.
3581 // As opposed to runtime mutex we don't do passive spinning here,
3582 // because there can be work on global runq on on other Ps.
3583 if (i
>= ACTIVE_SPIN
|| runtime_ncpu
<= 1 || runtime_gomaxprocs
<= (int32
)(runtime_sched
->npidle
+runtime_sched
->nmspinning
)+1) {
3587 return p
!= nil
&& p
->runqhead
== p
->runqtail
;
3590 //go:linkname sync_runtime_doSpin sync.runtime_doSpin
3593 extern void sync_runtime_doSpin(void)
3594 __asm__ (GOSYM_PREFIX
"sync.runtime_doSpin");
3597 sync_runtime_doSpin()
3599 runtime_procyield(ACTIVE_SPIN_CNT
);
3602 // For Go code to look at variables, until we port proc.go.
3604 extern M
** runtime_go_allm(void)
3605 __asm__ (GOSYM_PREFIX
"runtime.allm");
3610 return &runtime_allm
;
3613 extern Slice
runtime_go_allgs(void)
3614 __asm__ (GOSYM_PREFIX
"runtime.allgs");
3621 s
.__values
= runtime_allg
;
3622 s
.__count
= runtime_allglen
;
3623 s
.__capacity
= allgcap
;
3627 intgo
NumCPU(void) __asm__ (GOSYM_PREFIX
"runtime.NumCPU");
3632 return (intgo
)(runtime_ncpu
);