1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Garbage collector: marking and scanning
10 "runtime/internal/atomic"
11 "runtime/internal/sys"
16 fixedRootFinalizers
= iota
20 // rootBlockBytes is the number of bytes to scan per data or
22 rootBlockBytes
= 256 << 10
24 // rootBlockSpans is the number of spans to scan per span
26 rootBlockSpans
= 8 * 1024 // 64MB worth of spans
28 // maxObletBytes is the maximum bytes of an object to scan at
29 // once. Larger objects will be split up into "oblets" of at
30 // most this size. Since we can scan 1–2 MB/ms, 128 KB bounds
31 // scan preemption at ~100 µs.
33 // This must be > _MaxSmallSize so that the object base is the
35 maxObletBytes
= 128 << 10
37 // drainCheckThreshold specifies how many units of work to do
38 // between self-preemption checks in gcDrain. Assuming a scan
39 // rate of 1 MB/ms, this is ~100 µs. Lower values have higher
40 // overhead in the scan loop (the scheduler check may perform
41 // a syscall, so its overhead is nontrivial). Higher values
42 // make the system less responsive to incoming work.
43 drainCheckThreshold
= 100000
46 // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
47 // some miscellany) and initializes scanning-related state.
49 // The caller must have call gcCopySpans().
51 // The world must be stopped.
54 func gcMarkRootPrepare() {
55 if gcphase
== _GCmarktermination
{
56 work
.nFlushCacheRoots
= int(gomaxprocs
)
58 work
.nFlushCacheRoots
= 0
63 // Only scan globals once per cycle; preferably concurrently.
64 if !work
.markrootDone
{
72 if !work
.markrootDone
{
73 // On the first markroot, we need to scan span roots.
74 // In concurrent GC, this happens during concurrent
75 // mark and we depend on addfinalizer to ensure the
76 // above invariants for objects that get finalizers
77 // after concurrent mark. In STW GC, this will happen
78 // during mark termination.
80 // We're only interested in scanning the in-use spans,
81 // which will all be swept at this point. More spans
82 // may be added to this list during concurrent GC, but
83 // we only care about spans that were allocated before
85 work
.nSpanRoots
= mheap_
.sweepSpans
[mheap_
.sweepgen
/2%2
].numBlocks()
87 // On the first markroot, we need to scan all Gs. Gs
88 // may be created after this point, but it's okay that
89 // we ignore them because they begin life without any
90 // roots, so there's nothing to scan, and any roots
91 // they create during the concurrent phase will be
92 // scanned during mark termination. During mark
93 // termination, allglen isn't changing, so we'll scan
95 work
.nStackRoots
= int(atomic
.Loaduintptr(&allglen
))
97 // We've already scanned span roots and kept the scan
98 // up-to-date during concurrent mark.
101 // The hybrid barrier ensures that stacks can't
102 // contain pointers to unmarked objects, so on the
103 // second markroot, there's no need to scan stacks.
106 if debug
.gcrescanstacks
> 0 {
107 // Scan stacks anyway for debugging.
108 work
.nStackRoots
= int(atomic
.Loaduintptr(&allglen
))
112 work
.markrootNext
= 0
113 work
.markrootJobs
= uint32(fixedRootCount
+ work
.nFlushCacheRoots
+ work
.nDataRoots
+ work
.nSpanRoots
+ work
.nStackRoots
)
116 // gcMarkRootCheck checks that all roots have been scanned. It is
117 // purely for debugging.
118 func gcMarkRootCheck() {
119 if work
.markrootNext
< work
.markrootJobs
{
120 print(work
.markrootNext
, " of ", work
.markrootJobs
, " markroot jobs done\n")
121 throw("left over markroot jobs")
125 // Check that stacks have been scanned.
127 if gcphase
== _GCmarktermination
&& debug
.gcrescanstacks
> 0 {
128 for i
:= 0; i
< len(allgs
); i
++ {
130 if !(gp
.gcscandone
&& gp
.gcscanvalid
) && readgstatus(gp
) != _Gdead
{
135 for i
:= 0; i
< work
.nStackRoots
; i
++ {
146 println("gp", gp
, "goid", gp
.goid
,
147 "status", readgstatus(gp
),
148 "gcscandone", gp
.gcscandone
,
149 "gcscanvalid", gp
.gcscanvalid
)
150 unlock(&allglock
) // Avoid self-deadlock with traceback.
151 throw("scan missed a g")
154 // ptrmask for an allocation containing a single pointer.
155 var oneptrmask
= [...]uint8{1}
157 // markroot scans the i'th root.
159 // Preemption must be disabled (because this uses a gcWork).
161 // nowritebarrier is only advisory here.
164 func markroot(gcw
*gcWork
, i
uint32) {
165 // TODO(austin): This is a bit ridiculous. Compute and store
166 // the bases in gcMarkRootPrepare instead of the counts.
167 baseFlushCache
:= uint32(fixedRootCount
)
168 baseData
:= baseFlushCache
+ uint32(work
.nFlushCacheRoots
)
169 baseSpans
:= baseData
+ uint32(work
.nDataRoots
)
170 baseStacks
:= baseSpans
+ uint32(work
.nSpanRoots
)
171 end
:= baseStacks
+ uint32(work
.nStackRoots
)
173 // Note: if you add a case here, please also update heapdump.go:dumproots.
175 case baseFlushCache
<= i
&& i
< baseData
:
176 flushmcache(int(i
- baseFlushCache
))
178 case baseData
<= i
&& i
< baseSpans
:
183 markrootBlock(roots
, gcw
)
190 case i
== fixedRootFinalizers
:
191 // Only do this once per GC cycle since we don't call
192 // queuefinalizer during marking.
193 if work
.markrootDone
{
196 for fb
:= allfin
; fb
!= nil; fb
= fb
.alllink
{
197 cnt
:= uintptr(atomic
.Load(&fb
.cnt
))
198 scanblock(uintptr(unsafe
.Pointer(&fb
.fin
[0])), cnt
*unsafe
.Sizeof(fb
.fin
[0]), &finptrmask
[0], gcw
)
201 case i
== fixedRootFreeGStacks
:
202 // FIXME: We don't do this for gccgo.
204 case baseSpans
<= i
&& i
< baseStacks
:
205 // mark MSpan.specials
206 markrootSpans(gcw
, int(i
-baseSpans
))
209 // the rest is scanning goroutine stacks
211 if baseStacks
<= i
&& i
< end
{
212 gp
= allgs
[i
-baseStacks
]
214 throw("markroot: bad index")
217 // remember when we've first observed the G blocked
218 // needed only to output in traceback
219 status
:= readgstatus(gp
) // We are not in a scan state
220 if (status
== _Gwaiting || status
== _Gsyscall
) && gp
.waitsince
== 0 {
221 gp
.waitsince
= work
.tstart
224 // scang must be done on the system stack in case
225 // we're trying to scan our own stack.
227 // If this is a self-scan, put the user G in
228 // _Gwaiting to prevent self-deadlock. It may
229 // already be in _Gwaiting if this is a mark
230 // worker or we're in mark termination.
231 userG
:= getg().m
.curg
232 selfScan
:= gp
== userG
&& readgstatus(userG
) == _Grunning
234 casgstatus(userG
, _Grunning
, _Gwaiting
)
235 userG
.waitreason
= waitReasonGarbageCollectionScan
238 // TODO: scang blocks until gp's stack has
239 // been scanned, which may take a while for
240 // running goroutines. Consider doing this in
241 // two phases where the first is non-blocking:
242 // we scan the stacks we can and ask running
243 // goroutines to scan themselves; and the
248 casgstatus(userG
, _Gwaiting
, _Grunning
)
254 // markrootBlock scans one element of the list of GC roots.
257 func markrootBlock(roots
*gcRootList
, gcw
*gcWork
) {
258 for i
:= 0; i
< roots
.count
; i
++ {
260 scanblock(uintptr(r
.decl
), r
.ptrdata
, r
.gcdata
, gcw
)
264 // markrootSpans marks roots for one shard of work.spans.
267 func markrootSpans(gcw
*gcWork
, shard
int) {
268 // Objects with finalizers have two GC-related invariants:
270 // 1) Everything reachable from the object must be marked.
271 // This ensures that when we pass the object to its finalizer,
272 // everything the finalizer can reach will be retained.
274 // 2) Finalizer specials (which are not in the garbage
275 // collected heap) are roots. In practice, this means the fn
276 // field must be scanned.
278 // TODO(austin): There are several ideas for making this more
279 // efficient in issue #11485.
281 if work
.markrootDone
{
282 throw("markrootSpans during second markroot")
285 sg
:= mheap_
.sweepgen
286 spans
:= mheap_
.sweepSpans
[mheap_
.sweepgen
/2%2
].block(shard
)
287 // Note that work.spans may not include spans that were
288 // allocated between entering the scan phase and now. This is
289 // okay because any objects with finalizers in those spans
290 // must have been allocated and given finalizers after we
291 // entered the scan phase, so addfinalizer will have ensured
292 // the above invariants for them.
293 for _
, s
:= range spans
{
294 if s
.state
!= mSpanInUse
{
297 if !useCheckmark
&& s
.sweepgen
!= sg
{
298 // sweepgen was updated (+2) during non-checkmark GC pass
299 print("sweep ", s
.sweepgen
, " ", sg
, "\n")
300 throw("gc: unswept span")
303 // Speculatively check if there are any specials
304 // without acquiring the span lock. This may race with
305 // adding the first special to a span, but in that
306 // case addfinalizer will observe that the GC is
307 // active (which is globally synchronized) and ensure
308 // the above invariants. We may also ensure the
309 // invariants, but it's okay to scan an object twice.
310 if s
.specials
== nil {
314 // Lock the specials to prevent a special from being
315 // removed from the list while we're traversing it.
318 for sp
:= s
.specials
; sp
!= nil; sp
= sp
.next
{
319 if sp
.kind
!= _KindSpecialFinalizer
{
322 // don't mark finalized object, but scan it so we
323 // retain everything it points to.
324 spf
:= (*specialfinalizer
)(unsafe
.Pointer(sp
))
325 // A finalizer can be set for an inner byte of an object, find object beginning.
326 p
:= s
.base() + uintptr(spf
.special
.offset
)/s
.elemsize
*s
.elemsize
328 // Mark everything that can be reached from
329 // the object (but *not* the object itself or
330 // we'll never collect it).
333 // The special itself is a root.
334 scanblock(uintptr(unsafe
.Pointer(&spf
.fn
)), sys
.PtrSize
, &oneptrmask
[0], gcw
)
337 unlock(&s
.speciallock
)
341 // gcAssistAlloc performs GC work to make gp's assist debt positive.
342 // gp must be the calling user gorountine.
344 // This must be called with preemption enabled.
345 func gcAssistAlloc(gp
*g
) {
346 // Don't assist in non-preemptible contexts. These are
347 // generally fragile and won't allow the assist to block.
348 if getg() == gp
.m
.g0
{
351 if mp
:= getg().m
; mp
.locks
> 0 || mp
.preemptoff
!= "" {
357 // Compute the amount of scan work we need to do to make the
358 // balance positive. When the required amount of work is low,
359 // we over-assist to build up credit for future allocations
360 // and amortize the cost of assisting.
361 debtBytes
:= -gp
.gcAssistBytes
362 scanWork
:= int64(gcController
.assistWorkPerByte
* float64(debtBytes
))
363 if scanWork
< gcOverAssistWork
{
364 scanWork
= gcOverAssistWork
365 debtBytes
= int64(gcController
.assistBytesPerWork
* float64(scanWork
))
368 // Steal as much credit as we can from the background GC's
369 // scan credit. This is racy and may drop the background
370 // credit below 0 if two mutators steal at the same time. This
371 // will just cause steals to fail until credit is accumulated
372 // again, so in the long run it doesn't really matter, but we
373 // do have to handle the negative credit case.
374 bgScanCredit
:= atomic
.Loadint64(&gcController
.bgScanCredit
)
376 if bgScanCredit
> 0 {
377 if bgScanCredit
< scanWork
{
378 stolen
= bgScanCredit
379 gp
.gcAssistBytes
+= 1 + int64(gcController
.assistBytesPerWork
*float64(stolen
))
382 gp
.gcAssistBytes
+= debtBytes
384 atomic
.Xaddint64(&gcController
.bgScanCredit
, -stolen
)
389 // We were able to steal all of the credit we
392 traceGCMarkAssistDone()
398 if trace
.enabled
&& !traced
{
400 traceGCMarkAssistStart()
403 // Perform assist work
405 gcAssistAlloc1(gp
, scanWork
)
406 // The user stack may have moved, so this can't touch
407 // anything on it until it returns from systemstack.
410 completed
:= gp
.param
!= nil
416 if gp
.gcAssistBytes
< 0 {
417 // We were unable steal enough credit or perform
418 // enough work to pay off the assist debt. We need to
419 // do one of these before letting the mutator allocate
420 // more to prevent over-allocation.
422 // If this is because we were preempted, reschedule
423 // and try some more.
429 // Add this G to an assist queue and park. When the GC
430 // has more background credit, it will satisfy queued
431 // assists before flushing to the global credit pool.
433 // Note that this does *not* get woken up when more
434 // work is added to the work list. The theory is that
435 // there wasn't enough work to do anyway, so we might
436 // as well let background marking take care of the
437 // work that is available.
442 // At this point either background GC has satisfied
443 // this G's assist debt, or the GC cycle is over.
446 traceGCMarkAssistDone()
450 // gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system
451 // stack. This is a separate function to make it easier to see that
452 // we're not capturing anything from the user stack, since the user
453 // stack may move while we're in this function.
455 // gcAssistAlloc1 indicates whether this assist completed the mark
456 // phase by setting gp.param to non-nil. This can't be communicated on
457 // the stack since it may move.
460 func gcAssistAlloc1(gp
*g
, scanWork
int64) {
461 // Clear the flag indicating that this assist completed the
465 if atomic
.Load(&gcBlackenEnabled
) == 0 {
466 // The gcBlackenEnabled check in malloc races with the
467 // store that clears it but an atomic check in every malloc
468 // would be a performance hit.
469 // Instead we recheck it here on the non-preemptable system
470 // stack to determine if we should perform an assist.
472 // GC is done, so ignore any remaining debt.
476 // Track time spent in this assist. Since we're on the
477 // system stack, this is non-preemptible, so we can
478 // just measure start and end time.
479 startTime
:= nanotime()
481 decnwait
:= atomic
.Xadd(&work
.nwait
, -1)
482 if decnwait
== work
.nproc
{
483 println("runtime: work.nwait =", decnwait
, "work.nproc=", work
.nproc
)
484 throw("nwait > work.nprocs")
487 // gcDrainN requires the caller to be preemptible.
488 casgstatus(gp
, _Grunning
, _Gwaiting
)
489 gp
.waitreason
= waitReasonGCAssistMarking
491 // drain own cached work first in the hopes that it
492 // will be more cache friendly.
493 gcw
:= &getg().m
.p
.ptr().gcw
494 workDone
:= gcDrainN(gcw
, scanWork
)
495 // If we are near the end of the mark phase
496 // dispose of the gcw.
497 if gcBlackenPromptly
{
501 casgstatus(gp
, _Gwaiting
, _Grunning
)
503 // Record that we did this much scan work.
505 // Back out the number of bytes of assist credit that
506 // this scan work counts for. The "1+" is a poor man's
507 // round-up, to ensure this adds credit even if
508 // assistBytesPerWork is very low.
509 gp
.gcAssistBytes
+= 1 + int64(gcController
.assistBytesPerWork
*float64(workDone
))
511 // If this is the last worker and we ran out of work,
512 // signal a completion point.
513 incnwait
:= atomic
.Xadd(&work
.nwait
, +1)
514 if incnwait
> work
.nproc
{
515 println("runtime: work.nwait=", incnwait
,
516 "work.nproc=", work
.nproc
,
517 "gcBlackenPromptly=", gcBlackenPromptly
)
518 throw("work.nwait > work.nproc")
521 if incnwait
== work
.nproc
&& !gcMarkWorkAvailable(nil) {
522 // This has reached a background completion point. Set
523 // gp.param to a non-nil value to indicate this. It
524 // doesn't matter what we set it to (it just has to be
526 gp
.param
= unsafe
.Pointer(gp
)
528 duration
:= nanotime() - startTime
530 _p_
.gcAssistTime
+= duration
531 if _p_
.gcAssistTime
> gcAssistTimeSlack
{
532 atomic
.Xaddint64(&gcController
.assistTime
, _p_
.gcAssistTime
)
537 // gcWakeAllAssists wakes all currently blocked assists. This is used
538 // at the end of a GC cycle. gcBlackenEnabled must be false to prevent
539 // new assists from going to sleep after this point.
540 func gcWakeAllAssists() {
541 lock(&work
.assistQueue
.lock
)
542 injectglist(work
.assistQueue
.head
.ptr())
543 work
.assistQueue
.head
.set(nil)
544 work
.assistQueue
.tail
.set(nil)
545 unlock(&work
.assistQueue
.lock
)
548 // gcParkAssist puts the current goroutine on the assist queue and parks.
550 // gcParkAssist returns whether the assist is now satisfied. If it
551 // returns false, the caller must retry the assist.
554 func gcParkAssist() bool {
555 lock(&work
.assistQueue
.lock
)
556 // If the GC cycle finished while we were getting the lock,
557 // exit the assist. The cycle can't finish while we hold the
559 if atomic
.Load(&gcBlackenEnabled
) == 0 {
560 unlock(&work
.assistQueue
.lock
)
565 oldHead
, oldTail
:= work
.assistQueue
.head
, work
.assistQueue
.tail
567 work
.assistQueue
.head
.set(gp
)
569 oldTail
.ptr().schedlink
.set(gp
)
571 work
.assistQueue
.tail
.set(gp
)
572 gp
.schedlink
.set(nil)
574 // Recheck for background credit now that this G is in
575 // the queue, but can still back out. This avoids a
576 // race in case background marking has flushed more
577 // credit since we checked above.
578 if atomic
.Loadint64(&gcController
.bgScanCredit
) > 0 {
579 work
.assistQueue
.head
= oldHead
580 work
.assistQueue
.tail
= oldTail
582 oldTail
.ptr().schedlink
.set(nil)
584 unlock(&work
.assistQueue
.lock
)
588 goparkunlock(&work
.assistQueue
.lock
, waitReasonGCAssistWait
, traceEvGoBlockGC
, 2)
592 // gcFlushBgCredit flushes scanWork units of background scan work
593 // credit. This first satisfies blocked assists on the
594 // work.assistQueue and then flushes any remaining credit to
595 // gcController.bgScanCredit.
597 // Write barriers are disallowed because this is used by gcDrain after
598 // it has ensured that all work is drained and this must preserve that
601 //go:nowritebarrierrec
602 func gcFlushBgCredit(scanWork
int64) {
603 if work
.assistQueue
.head
== 0 {
604 // Fast path; there are no blocked assists. There's a
605 // small window here where an assist may add itself to
606 // the blocked queue and park. If that happens, we'll
607 // just get it on the next flush.
608 atomic
.Xaddint64(&gcController
.bgScanCredit
, scanWork
)
612 scanBytes
:= int64(float64(scanWork
) * gcController
.assistBytesPerWork
)
614 lock(&work
.assistQueue
.lock
)
615 gp
:= work
.assistQueue
.head
.ptr()
616 for gp
!= nil && scanBytes
> 0 {
617 // Note that gp.gcAssistBytes is negative because gp
618 // is in debt. Think carefully about the signs below.
619 if scanBytes
+gp
.gcAssistBytes
>= 0 {
620 // Satisfy this entire assist debt.
621 scanBytes
+= gp
.gcAssistBytes
624 gp
= gp
.schedlink
.ptr()
625 // It's important that we *not* put xgp in
626 // runnext. Otherwise, it's possible for user
627 // code to exploit the GC worker's high
628 // scheduler priority to get itself always run
629 // before other goroutines and always in the
630 // fresh quantum started by GC.
633 // Partially satisfy this assist.
634 gp
.gcAssistBytes
+= scanBytes
636 // As a heuristic, we move this assist to the
637 // back of the queue so that large assists
638 // can't clog up the assist queue and
639 // substantially delay small assists.
641 gp
= gp
.schedlink
.ptr()
643 // gp is the only assist in the queue.
647 work
.assistQueue
.tail
.ptr().schedlink
.set(xgp
)
648 work
.assistQueue
.tail
.set(xgp
)
653 work
.assistQueue
.head
.set(gp
)
655 work
.assistQueue
.tail
.set(nil)
659 // Convert from scan bytes back to work.
660 scanWork
= int64(float64(scanBytes
) * gcController
.assistWorkPerByte
)
661 atomic
.Xaddint64(&gcController
.bgScanCredit
, scanWork
)
663 unlock(&work
.assistQueue
.lock
)
666 // We use a C function to find the stack.
667 func doscanstack(*g
, *gcWork
)
669 // scanstack scans gp's stack, greying all pointers found on the stack.
671 // scanstack is marked go:systemstack because it must not be preempted
672 // while using a workbuf.
676 func scanstack(gp
*g
, gcw
*gcWork
) {
681 if readgstatus(gp
)&_Gscan
== 0 {
682 print("runtime:scanstack: gp=", gp
, ", goid=", gp
.goid
, ", gp->atomicstatus=", hex(readgstatus(gp
)), "\n")
683 throw("scanstack - bad status")
686 switch readgstatus(gp
) &^ _Gscan
{
688 print("runtime: gp=", gp
, ", goid=", gp
.goid
, ", gp->atomicstatus=", readgstatus(gp
), "\n")
689 throw("mark - bad status")
693 // ok for gccgo, though not for gc.
694 case _Grunnable
, _Gsyscall
, _Gwaiting
:
699 if mp
!= nil && mp
.helpgc
!= 0 {
700 throw("can't scan gchelper stack")
706 // Conservatively scan the saved register values.
707 scanstackblock(uintptr(unsafe
.Pointer(&gp
.gcregs
)), unsafe
.Sizeof(gp
.gcregs
), gcw
)
708 scanstackblock(uintptr(unsafe
.Pointer(&gp
.context
)), unsafe
.Sizeof(gp
.context
), gcw
)
710 gp
.gcscanvalid
= true
713 type gcDrainFlags
int
716 gcDrainUntilPreempt gcDrainFlags
= 1 << iota
722 // gcDrainBlock means neither gcDrainUntilPreempt or
723 // gcDrainNoBlock. It is the default, but callers should use
724 // the constant for documentation purposes.
725 gcDrainBlock gcDrainFlags
= 0
728 // gcDrain scans roots and objects in work buffers, blackening grey
729 // objects until all roots and work buffers have been drained.
731 // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
732 // is set. This implies gcDrainNoBlock.
734 // If flags&gcDrainIdle != 0, gcDrain returns when there is other work
735 // to do. This implies gcDrainNoBlock.
737 // If flags&gcDrainFractional != 0, gcDrain self-preempts when
738 // pollFractionalWorkerExit() returns true. This implies
741 // If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is
742 // unable to get more work. Otherwise, it will block until all
743 // blocking calls are blocked in gcDrain.
745 // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work
746 // credit to gcController.bgScanCredit every gcCreditSlack units of
750 func gcDrain(gcw
*gcWork
, flags gcDrainFlags
) {
751 if !writeBarrier
.needed
{
752 throw("gcDrain phase incorrect")
756 preemptible
:= flags
&gcDrainUntilPreempt
!= 0
757 blocking
:= flags
&(gcDrainUntilPreempt|gcDrainIdle|gcDrainFractional|gcDrainNoBlock
) == 0
758 flushBgCredit
:= flags
&gcDrainFlushBgCredit
!= 0
759 idle
:= flags
&gcDrainIdle
!= 0
761 initScanWork
:= gcw
.scanWork
763 // checkWork is the scan work before performing the next
764 // self-preempt check.
765 checkWork
:= int64(1<<63 - 1)
766 var check
func() bool
767 if flags
&(gcDrainIdle|gcDrainFractional
) != 0 {
768 checkWork
= initScanWork
+ drainCheckThreshold
771 } else if flags
&gcDrainFractional
!= 0 {
772 check
= pollFractionalWorkerExit
776 // Drain root marking jobs.
777 if work
.markrootNext
< work
.markrootJobs
{
778 for !(preemptible
&& gp
.preempt
) {
779 job
:= atomic
.Xadd(&work
.markrootNext
, +1) - 1
780 if job
>= work
.markrootJobs
{
784 if check
!= nil && check() {
790 // Drain heap marking jobs.
791 for !(preemptible
&& gp
.preempt
) {
792 // Try to keep work available on the global queue. We used to
793 // check if there were waiting workers, but it's better to
794 // just keep work available than to make workers wait. In the
795 // worst case, we'll do O(log(_WorkbufSize)) unnecessary
811 // work barrier reached or tryGet failed.
816 // Flush background scan work credit to the global
817 // account if we've accumulated enough locally so
818 // mutator assists can draw on it.
819 if gcw
.scanWork
>= gcCreditSlack
{
820 atomic
.Xaddint64(&gcController
.scanWork
, gcw
.scanWork
)
822 gcFlushBgCredit(gcw
.scanWork
- initScanWork
)
825 checkWork
-= gcw
.scanWork
829 checkWork
+= drainCheckThreshold
830 if check
!= nil && check() {
837 // In blocking mode, write barriers are not allowed after this
838 // point because we must preserve the condition that the work
839 // buffers are empty.
842 // Flush remaining scan work credit.
843 if gcw
.scanWork
> 0 {
844 atomic
.Xaddint64(&gcController
.scanWork
, gcw
.scanWork
)
846 gcFlushBgCredit(gcw
.scanWork
- initScanWork
)
852 // gcDrainN blackens grey objects until it has performed roughly
853 // scanWork units of scan work or the G is preempted. This is
854 // best-effort, so it may perform less work if it fails to get a work
855 // buffer. Otherwise, it will perform at least n units of work, but
856 // may perform more because scanning is always done in whole object
857 // increments. It returns the amount of scan work performed.
859 // The caller goroutine must be in a preemptible state (e.g.,
860 // _Gwaiting) to prevent deadlocks during stack scanning. As a
861 // consequence, this must be called on the system stack.
865 func gcDrainN(gcw
*gcWork
, scanWork
int64) int64 {
866 if !writeBarrier
.needed
{
867 throw("gcDrainN phase incorrect")
870 // There may already be scan work on the gcw, which we don't
871 // want to claim was done by this call.
872 workFlushed
:= -gcw
.scanWork
875 for !gp
.preempt
&& workFlushed
+gcw
.scanWork
< scanWork
{
876 // See gcDrain comment.
881 // This might be a good place to add prefetch code...
882 // if(wbuf.nobj > 4) {
883 // PREFETCH(wbuf->obj[wbuf.nobj - 3];
886 b
:= gcw
.tryGetFast()
892 // Try to do a root job.
894 // TODO: Assists should get credit for this
896 if work
.markrootNext
< work
.markrootJobs
{
897 job
:= atomic
.Xadd(&work
.markrootNext
, +1) - 1
898 if job
< work
.markrootJobs
{
903 // No heap or root jobs.
908 // Flush background scan work credit.
909 if gcw
.scanWork
>= gcCreditSlack
{
910 atomic
.Xaddint64(&gcController
.scanWork
, gcw
.scanWork
)
911 workFlushed
+= gcw
.scanWork
916 // Unlike gcDrain, there's no need to flush remaining work
917 // here because this never flushes to bgScanCredit and
918 // gcw.dispose will flush any remaining work to scanWork.
920 return workFlushed
+ gcw
.scanWork
923 // scanblock scans b as scanobject would, but using an explicit
924 // pointer bitmap instead of the heap bitmap.
926 // This is used to scan non-heap roots, so it does not update
927 // gcw.bytesMarked or gcw.scanWork.
930 func scanblock(b0
, n0
uintptr, ptrmask
*uint8, gcw
*gcWork
) {
931 // Use local copies of original parameters, so that a stack trace
932 // due to one of the throws below shows the original block
937 for i
:= uintptr(0); i
< n
; {
938 // Find bits for the next word.
939 bits
:= uint32(*addb(ptrmask
, i
/(sys
.PtrSize
*8)))
944 for j
:= 0; j
< 8 && i
< n
; j
++ {
946 // Same work as in scanobject; see comments there.
947 obj
:= *(*uintptr)(unsafe
.Pointer(b
+ i
))
949 if obj
, span
, objIndex
:= findObject(obj
, b
, i
, false); obj
!= 0 {
950 greyobject(obj
, b
, i
, span
, gcw
, objIndex
, false)
960 // scanobject scans the object starting at b, adding pointers to gcw.
961 // b must point to the beginning of a heap object or an oblet.
962 // scanobject consults the GC bitmap for the pointer mask and the
963 // spans for the size of the object.
966 func scanobject(b
uintptr, gcw
*gcWork
) {
967 // Find the bits for b and the size of the object at b.
969 // b is either the beginning of an object, in which case this
970 // is the size of the object to scan, or it points to an
971 // oblet, in which case we compute the size to scan below.
972 hbits
:= heapBitsForAddr(b
)
973 s
:= spanOfUnchecked(b
)
976 throw("scanobject n == 0")
979 if n
> maxObletBytes
{
980 // Large object. Break into oblets for better
981 // parallelism and lower latency.
983 // It's possible this is a noscan object (not
984 // from greyobject, but from other code
985 // paths), in which case we must *not* enqueue
986 // oblets since their bitmaps will be
988 if s
.spanclass
.noscan() {
989 // Bypass the whole scan.
990 gcw
.bytesMarked
+= uint64(n
)
994 // Enqueue the other oblets to scan later.
995 // Some oblets may be in b's scalar tail, but
996 // these will be marked as "no more pointers",
997 // so we'll drop out immediately when we go to
999 for oblet
:= b
+ maxObletBytes
; oblet
< s
.base()+s
.elemsize
; oblet
+= maxObletBytes
{
1000 if !gcw
.putFast(oblet
) {
1006 // Compute the size of the oblet. Since this object
1007 // must be a large object, s.base() is the beginning
1009 n
= s
.base() + s
.elemsize
- b
1010 if n
> maxObletBytes
{
1016 for i
= 0; i
< n
; i
+= sys
.PtrSize
{
1017 // Find bits for this word.
1019 // Avoid needless hbits.next() on last iteration.
1020 hbits
= hbits
.next()
1022 // Load bits once. See CL 22712 and issue 16973 for discussion.
1023 bits
:= hbits
.bits()
1024 // During checkmarking, 1-word objects store the checkmark
1025 // in the type bit for the one word. The only one-word objects
1026 // are pointers, or else they'd be merged with other non-pointer
1027 // data into larger allocations.
1028 if i
!= 1*sys
.PtrSize
&& bits
&bitScan
== 0 {
1029 break // no more pointers in this object
1031 if bits
&bitPointer
== 0 {
1032 continue // not a pointer
1035 // Work here is duplicated in scanblock and above.
1036 // If you make changes here, make changes there too.
1037 obj
:= *(*uintptr)(unsafe
.Pointer(b
+ i
))
1039 // At this point we have extracted the next potential pointer.
1040 // Quickly filter out nil and pointers back to the current object.
1041 if obj
!= 0 && obj
-b
>= n
{
1042 // Test if obj points into the Go heap and, if so,
1045 // Note that it's possible for findObject to
1046 // fail if obj points to a just-allocated heap
1047 // object because of a race with growing the
1048 // heap. In this case, we know the object was
1049 // just allocated and hence will be marked by
1050 // allocation itself.
1051 if obj
, span
, objIndex
:= findObject(obj
, b
, i
, false); obj
!= 0 {
1052 greyobject(obj
, b
, i
, span
, gcw
, objIndex
, false)
1056 gcw
.bytesMarked
+= uint64(n
)
1057 gcw
.scanWork
+= int64(i
)
1060 //go:linkname scanstackblock runtime.scanstackblock
1062 // scanstackblock is called by the stack scanning code in C to
1063 // actually find and mark pointers in the stack block. This is like
1064 // scanblock, but we scan the stack conservatively, so there is no
1065 // bitmask of pointers.
1066 func scanstackblock(b
, n
uintptr, gcw
*gcWork
) {
1067 for i
:= uintptr(0); i
< n
; i
+= sys
.PtrSize
{
1068 // Same work as in scanobject; see comments there.
1069 obj
:= *(*uintptr)(unsafe
.Pointer(b
+ i
))
1070 if obj
, span
, objIndex
:= findObject(obj
, b
, i
, true); obj
!= 0 {
1071 greyobject(obj
, b
, i
, span
, gcw
, objIndex
, true)
1076 // Shade the object if it isn't already.
1077 // The object is not nil and known to be in the heap.
1078 // Preemption must be disabled.
1080 func shade(b
uintptr) {
1081 if obj
, span
, objIndex
:= findObject(b
, 0, 0, true); obj
!= 0 {
1082 gcw
:= &getg().m
.p
.ptr().gcw
1083 greyobject(obj
, 0, 0, span
, gcw
, objIndex
, true)
1084 if gcphase
== _GCmarktermination || gcBlackenPromptly
{
1085 // Ps aren't allowed to cache work during mark
1092 // obj is the start of an object with mark mbits.
1093 // If it isn't already marked, mark it and enqueue into gcw.
1094 // base and off are for debugging only and could be removed.
1096 // See also wbBufFlush1, which partially duplicates this logic.
1098 //go:nowritebarrierrec
1099 func greyobject(obj
, base
, off
uintptr, span
*mspan
, gcw
*gcWork
, objIndex
uintptr, forStack
bool) {
1100 // obj should be start of allocation, and so must be at least pointer-aligned.
1101 if obj
&(sys
.PtrSize
-1) != 0 {
1102 throw("greyobject: obj not pointer-aligned")
1104 mbits
:= span
.markBitsForIndex(objIndex
)
1107 if !mbits
.isMarked() {
1108 // Stack scanning is conservative, so we can
1109 // see a reference to an object not previously
1110 // found. Assume the object was correctly not
1111 // marked and ignore the pointer.
1116 print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj
), "\n")
1117 print("runtime: found obj at *(", hex(base
), "+", hex(off
), ")\n")
1119 // Dump the source (base) object
1120 gcDumpObject("base", base
, off
)
1123 gcDumpObject("obj", obj
, ^uintptr(0))
1125 getg().m
.traceback
= 2
1126 throw("checkmark found unmarked object")
1128 hbits
:= heapBitsForAddr(obj
)
1129 if hbits
.isCheckmarked(span
.elemsize
) {
1132 hbits
.setCheckmarked(span
.elemsize
)
1133 if !hbits
.isCheckmarked(span
.elemsize
) {
1134 throw("setCheckmarked and isCheckmarked disagree")
1137 // Stack scanning is conservative, so we can see a
1138 // pointer to a free object. Assume the object was
1139 // correctly freed and we must ignore the pointer.
1140 if forStack
&& span
.isFree(objIndex
) {
1144 if debug
.gccheckmark
> 0 && span
.isFree(objIndex
) {
1145 print("runtime: marking free object ", hex(obj
), " found at *(", hex(base
), "+", hex(off
), ")\n")
1146 gcDumpObject("base", base
, off
)
1147 gcDumpObject("obj", obj
, ^uintptr(0))
1148 getg().m
.traceback
= 2
1149 throw("marking free object")
1152 // If marked we have nothing to do.
1153 if mbits
.isMarked() {
1156 // mbits.setMarked() // Avoid extra call overhead with manual inlining.
1157 atomic
.Or8(mbits
.bytep
, mbits
.mask
)
1158 // If this is a noscan object, fast-track it to black
1159 // instead of greying it.
1160 if span
.spanclass
.noscan() {
1161 gcw
.bytesMarked
+= uint64(span
.elemsize
)
1166 // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
1167 // seems like a nice optimization that can be added back in.
1168 // There needs to be time between the PREFETCH and the use.
1169 // Previously we put the obj in an 8 element buffer that is drained at a rate
1170 // to give the PREFETCH time to do its work.
1171 // Use of PREFETCHNTA might be more appropriate than PREFETCH
1172 if !gcw
.putFast(obj
) {
1177 // gcDumpObject dumps the contents of obj for debugging and marks the
1178 // field at byte offset off in obj.
1179 func gcDumpObject(label
string, obj
, off
uintptr) {
1181 print(label
, "=", hex(obj
))
1186 print(" s.base()=", hex(s
.base()), " s.limit=", hex(s
.limit
), " s.spanclass=", s
.spanclass
, " s.elemsize=", s
.elemsize
, " s.state=")
1187 if 0 <= s
.state
&& int(s
.state
) < len(mSpanStateNames
) {
1188 print(mSpanStateNames
[s
.state
], "\n")
1190 print("unknown(", s
.state
, ")\n")
1195 if s
.state
== _MSpanManual
&& size
== 0 {
1196 // We're printing something from a stack frame. We
1197 // don't know how big it is, so just show up to an
1199 size
= off
+ sys
.PtrSize
1201 for i
:= uintptr(0); i
< size
; i
+= sys
.PtrSize
{
1202 // For big objects, just print the beginning (because
1203 // that usually hints at the object's type) and the
1204 // fields around off.
1205 if !(i
< 128*sys
.PtrSize || off
-16*sys
.PtrSize
< i
&& i
< off
+16*sys
.PtrSize
) {
1213 print(" *(", label
, "+", i
, ") = ", hex(*(*uintptr)(unsafe
.Pointer(obj
+ i
))))
1224 // gcmarknewobject marks a newly allocated object black. obj must
1225 // not contain any non-nil pointers.
1227 // This is nosplit so it can manipulate a gcWork without preemption.
1231 func gcmarknewobject(obj
, size
, scanSize
uintptr) {
1232 if useCheckmark
&& !gcBlackenPromptly
{ // The world should be stopped so this should not happen.
1233 throw("gcmarknewobject called while doing checkmark")
1235 markBitsForAddr(obj
).setMarked()
1236 gcw
:= &getg().m
.p
.ptr().gcw
1237 gcw
.bytesMarked
+= uint64(size
)
1238 gcw
.scanWork
+= int64(scanSize
)
1239 if gcBlackenPromptly
{
1240 // There shouldn't be anything in the work queue, but
1241 // we still need to flush stats.
1246 // gcMarkTinyAllocs greys all active tiny alloc blocks.
1248 // The world must be stopped.
1249 func gcMarkTinyAllocs() {
1250 for _
, p
:= range allp
{
1252 if c
== nil || c
.tiny
== 0 {
1255 _
, span
, objIndex
:= findObject(c
.tiny
, 0, 0, false)
1257 greyobject(c
.tiny
, 0, 0, span
, gcw
, objIndex
, false)
1258 if gcBlackenPromptly
{
1266 // To help debug the concurrent GC we remark with the world
1267 // stopped ensuring that any object encountered has their normal
1268 // mark bit set. To do this we use an orthogonal bit
1269 // pattern to indicate the object is marked. The following pattern
1270 // uses the upper two bits in the object's boundary nibble.
1271 // 01: scalar not marked
1272 // 10: pointer not marked
1273 // 11: pointer marked
1274 // 00: scalar marked
1275 // Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
1276 // The higher bit is 1 for pointers and 0 for scalars, whether the object
1277 // is marked or not.
1278 // The first nibble no longer holds the typeDead pattern indicating that the
1279 // there are no more pointers in the object. This information is held
1280 // in the second nibble.
1282 // If useCheckmark is true, marking of an object uses the
1283 // checkmark bits (encoding above) instead of the standard
1285 var useCheckmark
= false
1288 func initCheckmarks() {
1290 for _
, s
:= range mheap_
.allspans
{
1291 if s
.state
== _MSpanInUse
{
1292 heapBitsForAddr(s
.base()).initCheckmarkSpan(s
.layout())
1297 func clearCheckmarks() {
1298 useCheckmark
= false
1299 for _
, s
:= range mheap_
.allspans
{
1300 if s
.state
== _MSpanInUse
{
1301 heapBitsForAddr(s
.base()).clearCheckmarkSpan(s
.layout())