1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Garbage collector: marking and scanning
10 "runtime/internal/atomic"
11 "runtime/internal/sys"
16 fixedRootFinalizers
= iota
20 // rootBlockBytes is the number of bytes to scan per data or
22 rootBlockBytes
= 256 << 10
24 // rootBlockSpans is the number of spans to scan per span
26 rootBlockSpans
= 8 * 1024 // 64MB worth of spans
28 // maxObletBytes is the maximum bytes of an object to scan at
29 // once. Larger objects will be split up into "oblets" of at
30 // most this size. Since we can scan 1–2 MB/ms, 128 KB bounds
31 // scan preemption at ~100 µs.
33 // This must be > _MaxSmallSize so that the object base is the
35 maxObletBytes
= 128 << 10
37 // drainCheckThreshold specifies how many units of work to do
38 // between self-preemption checks in gcDrain. Assuming a scan
39 // rate of 1 MB/ms, this is ~100 µs. Lower values have higher
40 // overhead in the scan loop (the scheduler check may perform
41 // a syscall, so its overhead is nontrivial). Higher values
42 // make the system less responsive to incoming work.
43 drainCheckThreshold
= 100000
46 // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
47 // some miscellany) and initializes scanning-related state.
49 // The caller must have call gcCopySpans().
51 // The world must be stopped.
54 func gcMarkRootPrepare() {
55 if gcphase
== _GCmarktermination
{
56 work
.nFlushCacheRoots
= int(gomaxprocs
)
58 work
.nFlushCacheRoots
= 0
63 // Only scan globals once per cycle; preferably concurrently.
64 if !work
.markrootDone
{
72 if !work
.markrootDone
{
73 // On the first markroot, we need to scan span roots.
74 // In concurrent GC, this happens during concurrent
75 // mark and we depend on addfinalizer to ensure the
76 // above invariants for objects that get finalizers
77 // after concurrent mark. In STW GC, this will happen
78 // during mark termination.
80 // We're only interested in scanning the in-use spans,
81 // which will all be swept at this point. More spans
82 // may be added to this list during concurrent GC, but
83 // we only care about spans that were allocated before
85 work
.nSpanRoots
= mheap_
.sweepSpans
[mheap_
.sweepgen
/2%2
].numBlocks()
87 // On the first markroot, we need to scan all Gs. Gs
88 // may be created after this point, but it's okay that
89 // we ignore them because they begin life without any
90 // roots, so there's nothing to scan, and any roots
91 // they create during the concurrent phase will be
92 // scanned during mark termination. During mark
93 // termination, allglen isn't changing, so we'll scan
95 work
.nStackRoots
= int(atomic
.Loaduintptr(&allglen
))
97 // We've already scanned span roots and kept the scan
98 // up-to-date during concurrent mark.
101 // The hybrid barrier ensures that stacks can't
102 // contain pointers to unmarked objects, so on the
103 // second markroot, there's no need to scan stacks.
106 if debug
.gcrescanstacks
> 0 {
107 // Scan stacks anyway for debugging.
108 work
.nStackRoots
= int(atomic
.Loaduintptr(&allglen
))
112 work
.markrootNext
= 0
113 work
.markrootJobs
= uint32(fixedRootCount
+ work
.nFlushCacheRoots
+ work
.nDataRoots
+ work
.nSpanRoots
+ work
.nStackRoots
)
116 // gcMarkRootCheck checks that all roots have been scanned. It is
117 // purely for debugging.
118 func gcMarkRootCheck() {
119 if work
.markrootNext
< work
.markrootJobs
{
120 print(work
.markrootNext
, " of ", work
.markrootJobs
, " markroot jobs done\n")
121 throw("left over markroot jobs")
125 // Check that stacks have been scanned.
127 if gcphase
== _GCmarktermination
&& debug
.gcrescanstacks
> 0 {
128 for i
:= 0; i
< len(allgs
); i
++ {
130 if !(gp
.gcscandone
&& gp
.gcscanvalid
) && readgstatus(gp
) != _Gdead
{
135 for i
:= 0; i
< work
.nStackRoots
; i
++ {
146 println("gp", gp
, "goid", gp
.goid
,
147 "status", readgstatus(gp
),
148 "gcscandone", gp
.gcscandone
,
149 "gcscanvalid", gp
.gcscanvalid
)
150 unlock(&allglock
) // Avoid self-deadlock with traceback.
151 throw("scan missed a g")
154 // ptrmask for an allocation containing a single pointer.
155 var oneptrmask
= [...]uint8{1}
157 // markroot scans the i'th root.
159 // Preemption must be disabled (because this uses a gcWork).
161 // nowritebarrier is only advisory here.
164 func markroot(gcw
*gcWork
, i
uint32) {
165 // TODO(austin): This is a bit ridiculous. Compute and store
166 // the bases in gcMarkRootPrepare instead of the counts.
167 baseFlushCache
:= uint32(fixedRootCount
)
168 baseData
:= baseFlushCache
+ uint32(work
.nFlushCacheRoots
)
169 baseSpans
:= baseData
+ uint32(work
.nDataRoots
)
170 baseStacks
:= baseSpans
+ uint32(work
.nSpanRoots
)
171 end
:= baseStacks
+ uint32(work
.nStackRoots
)
173 // Note: if you add a case here, please also update heapdump.go:dumproots.
175 case baseFlushCache
<= i
&& i
< baseData
:
176 flushmcache(int(i
- baseFlushCache
))
178 case baseData
<= i
&& i
< baseSpans
:
183 markrootBlock(roots
, gcw
)
190 case i
== fixedRootFinalizers
:
191 // Only do this once per GC cycle since we don't call
192 // queuefinalizer during marking.
193 if work
.markrootDone
{
196 for fb
:= allfin
; fb
!= nil; fb
= fb
.alllink
{
197 cnt
:= uintptr(atomic
.Load(&fb
.cnt
))
198 scanblock(uintptr(unsafe
.Pointer(&fb
.fin
[0])), cnt
*unsafe
.Sizeof(fb
.fin
[0]), &finptrmask
[0], gcw
)
201 case i
== fixedRootFreeGStacks
:
202 // FIXME: We don't do this for gccgo.
204 case baseSpans
<= i
&& i
< baseStacks
:
205 // mark MSpan.specials
206 markrootSpans(gcw
, int(i
-baseSpans
))
209 // the rest is scanning goroutine stacks
211 if baseStacks
<= i
&& i
< end
{
212 gp
= allgs
[i
-baseStacks
]
214 throw("markroot: bad index")
217 // remember when we've first observed the G blocked
218 // needed only to output in traceback
219 status
:= readgstatus(gp
) // We are not in a scan state
220 if (status
== _Gwaiting || status
== _Gsyscall
) && gp
.waitsince
== 0 {
221 gp
.waitsince
= work
.tstart
224 // scang must be done on the system stack in case
225 // we're trying to scan our own stack.
227 // If this is a self-scan, put the user G in
228 // _Gwaiting to prevent self-deadlock. It may
229 // already be in _Gwaiting if this is a mark
230 // worker or we're in mark termination.
231 userG
:= getg().m
.curg
232 selfScan
:= gp
== userG
&& readgstatus(userG
) == _Grunning
234 casgstatus(userG
, _Grunning
, _Gwaiting
)
235 userG
.waitreason
= "garbage collection scan"
238 // TODO: scang blocks until gp's stack has
239 // been scanned, which may take a while for
240 // running goroutines. Consider doing this in
241 // two phases where the first is non-blocking:
242 // we scan the stacks we can and ask running
243 // goroutines to scan themselves; and the
248 casgstatus(userG
, _Gwaiting
, _Grunning
)
254 // markrootBlock scans one element of the list of GC roots.
257 func markrootBlock(roots
*gcRootList
, gcw
*gcWork
) {
258 for i
:= 0; i
< roots
.count
; i
++ {
260 scanblock(uintptr(r
.decl
), r
.ptrdata
, r
.gcdata
, gcw
)
264 // markrootSpans marks roots for one shard of work.spans.
267 func markrootSpans(gcw
*gcWork
, shard
int) {
268 // Objects with finalizers have two GC-related invariants:
270 // 1) Everything reachable from the object must be marked.
271 // This ensures that when we pass the object to its finalizer,
272 // everything the finalizer can reach will be retained.
274 // 2) Finalizer specials (which are not in the garbage
275 // collected heap) are roots. In practice, this means the fn
276 // field must be scanned.
278 // TODO(austin): There are several ideas for making this more
279 // efficient in issue #11485.
281 if work
.markrootDone
{
282 throw("markrootSpans during second markroot")
285 sg
:= mheap_
.sweepgen
286 spans
:= mheap_
.sweepSpans
[mheap_
.sweepgen
/2%2
].block(shard
)
287 // Note that work.spans may not include spans that were
288 // allocated between entering the scan phase and now. This is
289 // okay because any objects with finalizers in those spans
290 // must have been allocated and given finalizers after we
291 // entered the scan phase, so addfinalizer will have ensured
292 // the above invariants for them.
293 for _
, s
:= range spans
{
294 if s
.state
!= mSpanInUse
{
297 if !useCheckmark
&& s
.sweepgen
!= sg
{
298 // sweepgen was updated (+2) during non-checkmark GC pass
299 print("sweep ", s
.sweepgen
, " ", sg
, "\n")
300 throw("gc: unswept span")
303 // Speculatively check if there are any specials
304 // without acquiring the span lock. This may race with
305 // adding the first special to a span, but in that
306 // case addfinalizer will observe that the GC is
307 // active (which is globally synchronized) and ensure
308 // the above invariants. We may also ensure the
309 // invariants, but it's okay to scan an object twice.
310 if s
.specials
== nil {
314 // Lock the specials to prevent a special from being
315 // removed from the list while we're traversing it.
318 for sp
:= s
.specials
; sp
!= nil; sp
= sp
.next
{
319 if sp
.kind
!= _KindSpecialFinalizer
{
322 // don't mark finalized object, but scan it so we
323 // retain everything it points to.
324 spf
:= (*specialfinalizer
)(unsafe
.Pointer(sp
))
325 // A finalizer can be set for an inner byte of an object, find object beginning.
326 p
:= s
.base() + uintptr(spf
.special
.offset
)/s
.elemsize
*s
.elemsize
328 // Mark everything that can be reached from
329 // the object (but *not* the object itself or
330 // we'll never collect it).
333 // The special itself is a root.
334 scanblock(uintptr(unsafe
.Pointer(&spf
.fn
)), sys
.PtrSize
, &oneptrmask
[0], gcw
)
337 unlock(&s
.speciallock
)
341 // gcAssistAlloc performs GC work to make gp's assist debt positive.
342 // gp must be the calling user gorountine.
344 // This must be called with preemption enabled.
345 func gcAssistAlloc(gp
*g
) {
346 // Don't assist in non-preemptible contexts. These are
347 // generally fragile and won't allow the assist to block.
348 if getg() == gp
.m
.g0
{
351 if mp
:= getg().m
; mp
.locks
> 0 || mp
.preemptoff
!= "" {
357 // Compute the amount of scan work we need to do to make the
358 // balance positive. When the required amount of work is low,
359 // we over-assist to build up credit for future allocations
360 // and amortize the cost of assisting.
361 debtBytes
:= -gp
.gcAssistBytes
362 scanWork
:= int64(gcController
.assistWorkPerByte
* float64(debtBytes
))
363 if scanWork
< gcOverAssistWork
{
364 scanWork
= gcOverAssistWork
365 debtBytes
= int64(gcController
.assistBytesPerWork
* float64(scanWork
))
368 // Steal as much credit as we can from the background GC's
369 // scan credit. This is racy and may drop the background
370 // credit below 0 if two mutators steal at the same time. This
371 // will just cause steals to fail until credit is accumulated
372 // again, so in the long run it doesn't really matter, but we
373 // do have to handle the negative credit case.
374 bgScanCredit
:= atomic
.Loadint64(&gcController
.bgScanCredit
)
376 if bgScanCredit
> 0 {
377 if bgScanCredit
< scanWork
{
378 stolen
= bgScanCredit
379 gp
.gcAssistBytes
+= 1 + int64(gcController
.assistBytesPerWork
*float64(stolen
))
382 gp
.gcAssistBytes
+= debtBytes
384 atomic
.Xaddint64(&gcController
.bgScanCredit
, -stolen
)
389 // We were able to steal all of the credit we
392 traceGCMarkAssistDone()
398 if trace
.enabled
&& !traced
{
400 traceGCMarkAssistStart()
403 // Perform assist work
405 gcAssistAlloc1(gp
, scanWork
)
406 // The user stack may have moved, so this can't touch
407 // anything on it until it returns from systemstack.
410 completed
:= gp
.param
!= nil
416 if gp
.gcAssistBytes
< 0 {
417 // We were unable steal enough credit or perform
418 // enough work to pay off the assist debt. We need to
419 // do one of these before letting the mutator allocate
420 // more to prevent over-allocation.
422 // If this is because we were preempted, reschedule
423 // and try some more.
429 // Add this G to an assist queue and park. When the GC
430 // has more background credit, it will satisfy queued
431 // assists before flushing to the global credit pool.
433 // Note that this does *not* get woken up when more
434 // work is added to the work list. The theory is that
435 // there wasn't enough work to do anyway, so we might
436 // as well let background marking take care of the
437 // work that is available.
442 // At this point either background GC has satisfied
443 // this G's assist debt, or the GC cycle is over.
446 traceGCMarkAssistDone()
450 // gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system
451 // stack. This is a separate function to make it easier to see that
452 // we're not capturing anything from the user stack, since the user
453 // stack may move while we're in this function.
455 // gcAssistAlloc1 indicates whether this assist completed the mark
456 // phase by setting gp.param to non-nil. This can't be communicated on
457 // the stack since it may move.
460 func gcAssistAlloc1(gp
*g
, scanWork
int64) {
461 // Clear the flag indicating that this assist completed the
465 if atomic
.Load(&gcBlackenEnabled
) == 0 {
466 // The gcBlackenEnabled check in malloc races with the
467 // store that clears it but an atomic check in every malloc
468 // would be a performance hit.
469 // Instead we recheck it here on the non-preemptable system
470 // stack to determine if we should preform an assist.
472 // GC is done, so ignore any remaining debt.
476 // Track time spent in this assist. Since we're on the
477 // system stack, this is non-preemptible, so we can
478 // just measure start and end time.
479 startTime
:= nanotime()
481 decnwait
:= atomic
.Xadd(&work
.nwait
, -1)
482 if decnwait
== work
.nproc
{
483 println("runtime: work.nwait =", decnwait
, "work.nproc=", work
.nproc
)
484 throw("nwait > work.nprocs")
487 // gcDrainN requires the caller to be preemptible.
488 casgstatus(gp
, _Grunning
, _Gwaiting
)
489 gp
.waitreason
= "GC assist marking"
491 // drain own cached work first in the hopes that it
492 // will be more cache friendly.
493 gcw
:= &getg().m
.p
.ptr().gcw
494 workDone
:= gcDrainN(gcw
, scanWork
)
495 // If we are near the end of the mark phase
496 // dispose of the gcw.
497 if gcBlackenPromptly
{
501 casgstatus(gp
, _Gwaiting
, _Grunning
)
503 // Record that we did this much scan work.
505 // Back out the number of bytes of assist credit that
506 // this scan work counts for. The "1+" is a poor man's
507 // round-up, to ensure this adds credit even if
508 // assistBytesPerWork is very low.
509 gp
.gcAssistBytes
+= 1 + int64(gcController
.assistBytesPerWork
*float64(workDone
))
511 // If this is the last worker and we ran out of work,
512 // signal a completion point.
513 incnwait
:= atomic
.Xadd(&work
.nwait
, +1)
514 if incnwait
> work
.nproc
{
515 println("runtime: work.nwait=", incnwait
,
516 "work.nproc=", work
.nproc
,
517 "gcBlackenPromptly=", gcBlackenPromptly
)
518 throw("work.nwait > work.nproc")
521 if incnwait
== work
.nproc
&& !gcMarkWorkAvailable(nil) {
522 // This has reached a background completion point. Set
523 // gp.param to a non-nil value to indicate this. It
524 // doesn't matter what we set it to (it just has to be
526 gp
.param
= unsafe
.Pointer(gp
)
528 duration
:= nanotime() - startTime
530 _p_
.gcAssistTime
+= duration
531 if _p_
.gcAssistTime
> gcAssistTimeSlack
{
532 atomic
.Xaddint64(&gcController
.assistTime
, _p_
.gcAssistTime
)
537 // gcWakeAllAssists wakes all currently blocked assists. This is used
538 // at the end of a GC cycle. gcBlackenEnabled must be false to prevent
539 // new assists from going to sleep after this point.
540 func gcWakeAllAssists() {
541 lock(&work
.assistQueue
.lock
)
542 injectglist(work
.assistQueue
.head
.ptr())
543 work
.assistQueue
.head
.set(nil)
544 work
.assistQueue
.tail
.set(nil)
545 unlock(&work
.assistQueue
.lock
)
548 // gcParkAssist puts the current goroutine on the assist queue and parks.
550 // gcParkAssist returns whether the assist is now satisfied. If it
551 // returns false, the caller must retry the assist.
554 func gcParkAssist() bool {
555 lock(&work
.assistQueue
.lock
)
556 // If the GC cycle finished while we were getting the lock,
557 // exit the assist. The cycle can't finish while we hold the
559 if atomic
.Load(&gcBlackenEnabled
) == 0 {
560 unlock(&work
.assistQueue
.lock
)
565 oldHead
, oldTail
:= work
.assistQueue
.head
, work
.assistQueue
.tail
567 work
.assistQueue
.head
.set(gp
)
569 oldTail
.ptr().schedlink
.set(gp
)
571 work
.assistQueue
.tail
.set(gp
)
572 gp
.schedlink
.set(nil)
574 // Recheck for background credit now that this G is in
575 // the queue, but can still back out. This avoids a
576 // race in case background marking has flushed more
577 // credit since we checked above.
578 if atomic
.Loadint64(&gcController
.bgScanCredit
) > 0 {
579 work
.assistQueue
.head
= oldHead
580 work
.assistQueue
.tail
= oldTail
582 oldTail
.ptr().schedlink
.set(nil)
584 unlock(&work
.assistQueue
.lock
)
588 goparkunlock(&work
.assistQueue
.lock
, "GC assist wait", traceEvGoBlockGC
, 2)
592 // gcFlushBgCredit flushes scanWork units of background scan work
593 // credit. This first satisfies blocked assists on the
594 // work.assistQueue and then flushes any remaining credit to
595 // gcController.bgScanCredit.
597 // Write barriers are disallowed because this is used by gcDrain after
598 // it has ensured that all work is drained and this must preserve that
601 //go:nowritebarrierrec
602 func gcFlushBgCredit(scanWork
int64) {
603 if work
.assistQueue
.head
== 0 {
604 // Fast path; there are no blocked assists. There's a
605 // small window here where an assist may add itself to
606 // the blocked queue and park. If that happens, we'll
607 // just get it on the next flush.
608 atomic
.Xaddint64(&gcController
.bgScanCredit
, scanWork
)
612 scanBytes
:= int64(float64(scanWork
) * gcController
.assistBytesPerWork
)
614 lock(&work
.assistQueue
.lock
)
615 gp
:= work
.assistQueue
.head
.ptr()
616 for gp
!= nil && scanBytes
> 0 {
617 // Note that gp.gcAssistBytes is negative because gp
618 // is in debt. Think carefully about the signs below.
619 if scanBytes
+gp
.gcAssistBytes
>= 0 {
620 // Satisfy this entire assist debt.
621 scanBytes
+= gp
.gcAssistBytes
624 gp
= gp
.schedlink
.ptr()
625 // It's important that we *not* put xgp in
626 // runnext. Otherwise, it's possible for user
627 // code to exploit the GC worker's high
628 // scheduler priority to get itself always run
629 // before other goroutines and always in the
630 // fresh quantum started by GC.
633 // Partially satisfy this assist.
634 gp
.gcAssistBytes
+= scanBytes
636 // As a heuristic, we move this assist to the
637 // back of the queue so that large assists
638 // can't clog up the assist queue and
639 // substantially delay small assists.
641 gp
= gp
.schedlink
.ptr()
643 // gp is the only assist in the queue.
647 work
.assistQueue
.tail
.ptr().schedlink
.set(xgp
)
648 work
.assistQueue
.tail
.set(xgp
)
653 work
.assistQueue
.head
.set(gp
)
655 work
.assistQueue
.tail
.set(nil)
659 // Convert from scan bytes back to work.
660 scanWork
= int64(float64(scanBytes
) * gcController
.assistWorkPerByte
)
661 atomic
.Xaddint64(&gcController
.bgScanCredit
, scanWork
)
663 unlock(&work
.assistQueue
.lock
)
666 // We use a C function to find the stack.
667 func doscanstack(*g
, *gcWork
)
669 // scanstack scans gp's stack, greying all pointers found on the stack.
671 // scanstack is marked go:systemstack because it must not be preempted
672 // while using a workbuf.
676 func scanstack(gp
*g
, gcw
*gcWork
) {
681 if readgstatus(gp
)&_Gscan
== 0 {
682 print("runtime:scanstack: gp=", gp
, ", goid=", gp
.goid
, ", gp->atomicstatus=", hex(readgstatus(gp
)), "\n")
683 throw("scanstack - bad status")
686 switch readgstatus(gp
) &^ _Gscan
{
688 print("runtime: gp=", gp
, ", goid=", gp
.goid
, ", gp->atomicstatus=", readgstatus(gp
), "\n")
689 throw("mark - bad status")
693 // ok for gccgo, though not for gc.
694 case _Grunnable
, _Gsyscall
, _Gwaiting
:
699 if mp
!= nil && mp
.helpgc
!= 0 {
700 throw("can't scan gchelper stack")
706 // Conservatively scan the saved register values.
707 scanstackblock(uintptr(unsafe
.Pointer(&gp
.gcregs
)), unsafe
.Sizeof(gp
.gcregs
), gcw
)
708 scanstackblock(uintptr(unsafe
.Pointer(&gp
.context
)), unsafe
.Sizeof(gp
.context
), gcw
)
710 gp
.gcscanvalid
= true
713 type gcDrainFlags
int
716 gcDrainUntilPreempt gcDrainFlags
= 1 << iota
722 // gcDrainBlock means neither gcDrainUntilPreempt or
723 // gcDrainNoBlock. It is the default, but callers should use
724 // the constant for documentation purposes.
725 gcDrainBlock gcDrainFlags
= 0
728 // gcDrain scans roots and objects in work buffers, blackening grey
729 // objects until all roots and work buffers have been drained.
731 // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
732 // is set. This implies gcDrainNoBlock.
734 // If flags&gcDrainIdle != 0, gcDrain returns when there is other work
735 // to do. This implies gcDrainNoBlock.
737 // If flags&gcDrainFractional != 0, gcDrain self-preempts when
738 // pollFractionalWorkerExit() returns true. This implies
741 // If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is
742 // unable to get more work. Otherwise, it will block until all
743 // blocking calls are blocked in gcDrain.
745 // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work
746 // credit to gcController.bgScanCredit every gcCreditSlack units of
750 func gcDrain(gcw
*gcWork
, flags gcDrainFlags
) {
751 if !writeBarrier
.needed
{
752 throw("gcDrain phase incorrect")
756 preemptible
:= flags
&gcDrainUntilPreempt
!= 0
757 blocking
:= flags
&(gcDrainUntilPreempt|gcDrainIdle|gcDrainFractional|gcDrainNoBlock
) == 0
758 flushBgCredit
:= flags
&gcDrainFlushBgCredit
!= 0
759 idle
:= flags
&gcDrainIdle
!= 0
761 initScanWork
:= gcw
.scanWork
763 // checkWork is the scan work before performing the next
764 // self-preempt check.
765 checkWork
:= int64(1<<63 - 1)
766 var check
func() bool
767 if flags
&(gcDrainIdle|gcDrainFractional
) != 0 {
768 checkWork
= initScanWork
+ drainCheckThreshold
771 } else if flags
&gcDrainFractional
!= 0 {
772 check
= pollFractionalWorkerExit
776 // Drain root marking jobs.
777 if work
.markrootNext
< work
.markrootJobs
{
778 for !(preemptible
&& gp
.preempt
) {
779 job
:= atomic
.Xadd(&work
.markrootNext
, +1) - 1
780 if job
>= work
.markrootJobs
{
784 if check
!= nil && check() {
790 // Drain heap marking jobs.
791 for !(preemptible
&& gp
.preempt
) {
792 // Try to keep work available on the global queue. We used to
793 // check if there were waiting workers, but it's better to
794 // just keep work available than to make workers wait. In the
795 // worst case, we'll do O(log(_WorkbufSize)) unnecessary
811 // work barrier reached or tryGet failed.
816 // Flush background scan work credit to the global
817 // account if we've accumulated enough locally so
818 // mutator assists can draw on it.
819 if gcw
.scanWork
>= gcCreditSlack
{
820 atomic
.Xaddint64(&gcController
.scanWork
, gcw
.scanWork
)
822 gcFlushBgCredit(gcw
.scanWork
- initScanWork
)
825 checkWork
-= gcw
.scanWork
829 checkWork
+= drainCheckThreshold
830 if check
!= nil && check() {
837 // In blocking mode, write barriers are not allowed after this
838 // point because we must preserve the condition that the work
839 // buffers are empty.
842 // Flush remaining scan work credit.
843 if gcw
.scanWork
> 0 {
844 atomic
.Xaddint64(&gcController
.scanWork
, gcw
.scanWork
)
846 gcFlushBgCredit(gcw
.scanWork
- initScanWork
)
852 // gcDrainN blackens grey objects until it has performed roughly
853 // scanWork units of scan work or the G is preempted. This is
854 // best-effort, so it may perform less work if it fails to get a work
855 // buffer. Otherwise, it will perform at least n units of work, but
856 // may perform more because scanning is always done in whole object
857 // increments. It returns the amount of scan work performed.
859 // The caller goroutine must be in a preemptible state (e.g.,
860 // _Gwaiting) to prevent deadlocks during stack scanning. As a
861 // consequence, this must be called on the system stack.
865 func gcDrainN(gcw
*gcWork
, scanWork
int64) int64 {
866 if !writeBarrier
.needed
{
867 throw("gcDrainN phase incorrect")
870 // There may already be scan work on the gcw, which we don't
871 // want to claim was done by this call.
872 workFlushed
:= -gcw
.scanWork
875 for !gp
.preempt
&& workFlushed
+gcw
.scanWork
< scanWork
{
876 // See gcDrain comment.
881 // This might be a good place to add prefetch code...
882 // if(wbuf.nobj > 4) {
883 // PREFETCH(wbuf->obj[wbuf.nobj - 3];
886 b
:= gcw
.tryGetFast()
892 // Try to do a root job.
894 // TODO: Assists should get credit for this
896 if work
.markrootNext
< work
.markrootJobs
{
897 job
:= atomic
.Xadd(&work
.markrootNext
, +1) - 1
898 if job
< work
.markrootJobs
{
903 // No heap or root jobs.
908 // Flush background scan work credit.
909 if gcw
.scanWork
>= gcCreditSlack
{
910 atomic
.Xaddint64(&gcController
.scanWork
, gcw
.scanWork
)
911 workFlushed
+= gcw
.scanWork
916 // Unlike gcDrain, there's no need to flush remaining work
917 // here because this never flushes to bgScanCredit and
918 // gcw.dispose will flush any remaining work to scanWork.
920 return workFlushed
+ gcw
.scanWork
923 // scanblock scans b as scanobject would, but using an explicit
924 // pointer bitmap instead of the heap bitmap.
926 // This is used to scan non-heap roots, so it does not update
927 // gcw.bytesMarked or gcw.scanWork.
930 func scanblock(b0
, n0
uintptr, ptrmask
*uint8, gcw
*gcWork
) {
931 // Use local copies of original parameters, so that a stack trace
932 // due to one of the throws below shows the original block
937 arena_start
:= mheap_
.arena_start
938 arena_used
:= mheap_
.arena_used
940 for i
:= uintptr(0); i
< n
; {
941 // Find bits for the next word.
942 bits
:= uint32(*addb(ptrmask
, i
/(sys
.PtrSize
*8)))
947 for j
:= 0; j
< 8 && i
< n
; j
++ {
949 // Same work as in scanobject; see comments there.
950 obj
:= *(*uintptr)(unsafe
.Pointer(b
+ i
))
951 if obj
!= 0 && arena_start
<= obj
&& obj
< arena_used
{
952 if obj
, hbits
, span
, objIndex
:= heapBitsForObject(obj
, b
, i
, false); obj
!= 0 {
953 greyobject(obj
, b
, i
, hbits
, span
, gcw
, objIndex
, false)
963 // scanobject scans the object starting at b, adding pointers to gcw.
964 // b must point to the beginning of a heap object or an oblet.
965 // scanobject consults the GC bitmap for the pointer mask and the
966 // spans for the size of the object.
969 func scanobject(b
uintptr, gcw
*gcWork
) {
970 // Note that arena_used may change concurrently during
971 // scanobject and hence scanobject may encounter a pointer to
972 // a newly allocated heap object that is *not* in
973 // [start,used). It will not mark this object; however, we
974 // know that it was just installed by a mutator, which means
975 // that mutator will execute a write barrier and take care of
976 // marking it. This is even more pronounced on relaxed memory
977 // architectures since we access arena_used without barriers
978 // or synchronization, but the same logic applies.
979 arena_start
:= mheap_
.arena_start
980 arena_used
:= mheap_
.arena_used
982 // Find the bits for b and the size of the object at b.
984 // b is either the beginning of an object, in which case this
985 // is the size of the object to scan, or it points to an
986 // oblet, in which case we compute the size to scan below.
987 hbits
:= heapBitsForAddr(b
)
988 s
:= spanOfUnchecked(b
)
991 throw("scanobject n == 0")
994 if n
> maxObletBytes
{
995 // Large object. Break into oblets for better
996 // parallelism and lower latency.
998 // It's possible this is a noscan object (not
999 // from greyobject, but from other code
1000 // paths), in which case we must *not* enqueue
1001 // oblets since their bitmaps will be
1003 if s
.spanclass
.noscan() {
1004 // Bypass the whole scan.
1005 gcw
.bytesMarked
+= uint64(n
)
1009 // Enqueue the other oblets to scan later.
1010 // Some oblets may be in b's scalar tail, but
1011 // these will be marked as "no more pointers",
1012 // so we'll drop out immediately when we go to
1014 for oblet
:= b
+ maxObletBytes
; oblet
< s
.base()+s
.elemsize
; oblet
+= maxObletBytes
{
1015 if !gcw
.putFast(oblet
) {
1021 // Compute the size of the oblet. Since this object
1022 // must be a large object, s.base() is the beginning
1024 n
= s
.base() + s
.elemsize
- b
1025 if n
> maxObletBytes
{
1031 for i
= 0; i
< n
; i
+= sys
.PtrSize
{
1032 // Find bits for this word.
1034 // Avoid needless hbits.next() on last iteration.
1035 hbits
= hbits
.next()
1037 // Load bits once. See CL 22712 and issue 16973 for discussion.
1038 bits
:= hbits
.bits()
1039 // During checkmarking, 1-word objects store the checkmark
1040 // in the type bit for the one word. The only one-word objects
1041 // are pointers, or else they'd be merged with other non-pointer
1042 // data into larger allocations.
1043 if i
!= 1*sys
.PtrSize
&& bits
&bitScan
== 0 {
1044 break // no more pointers in this object
1046 if bits
&bitPointer
== 0 {
1047 continue // not a pointer
1050 // Work here is duplicated in scanblock and above.
1051 // If you make changes here, make changes there too.
1052 obj
:= *(*uintptr)(unsafe
.Pointer(b
+ i
))
1054 // At this point we have extracted the next potential pointer.
1055 // Check if it points into heap and not back at the current object.
1056 if obj
!= 0 && arena_start
<= obj
&& obj
< arena_used
&& obj
-b
>= n
{
1058 if obj
, hbits
, span
, objIndex
:= heapBitsForObject(obj
, b
, i
, false); obj
!= 0 {
1059 greyobject(obj
, b
, i
, hbits
, span
, gcw
, objIndex
, false)
1063 gcw
.bytesMarked
+= uint64(n
)
1064 gcw
.scanWork
+= int64(i
)
1067 //go:linkname scanstackblock runtime.scanstackblock
1069 // scanstackblock is called by the stack scanning code in C to
1070 // actually find and mark pointers in the stack block. This is like
1071 // scanblock, but we scan the stack conservatively, so there is no
1072 // bitmask of pointers.
1073 func scanstackblock(b
, n
uintptr, gcw
*gcWork
) {
1074 arena_start
:= mheap_
.arena_start
1075 arena_used
:= mheap_
.arena_used
1077 for i
:= uintptr(0); i
< n
; i
+= sys
.PtrSize
{
1078 // Same work as in scanobject; see comments there.
1079 obj
:= *(*uintptr)(unsafe
.Pointer(b
+ i
))
1080 if obj
!= 0 && arena_start
<= obj
&& obj
< arena_used
{
1081 if obj
, hbits
, span
, objIndex
:= heapBitsForObject(obj
, b
, i
, true); obj
!= 0 {
1082 greyobject(obj
, b
, i
, hbits
, span
, gcw
, objIndex
, true)
1088 // Shade the object if it isn't already.
1089 // The object is not nil and known to be in the heap.
1090 // Preemption must be disabled.
1092 func shade(b
uintptr) {
1093 // shade can be called to shade a pointer found on the stack,
1094 // so pass forStack as true to heapBitsForObject and greyobject.
1095 if obj
, hbits
, span
, objIndex
:= heapBitsForObject(b
, 0, 0, true); obj
!= 0 {
1096 gcw
:= &getg().m
.p
.ptr().gcw
1097 greyobject(obj
, 0, 0, hbits
, span
, gcw
, objIndex
, true)
1098 if gcphase
== _GCmarktermination || gcBlackenPromptly
{
1099 // Ps aren't allowed to cache work during mark
1106 // obj is the start of an object with mark mbits.
1107 // If it isn't already marked, mark it and enqueue into gcw.
1108 // base and off are for debugging only and could be removed.
1110 // See also wbBufFlush1, which partially duplicates this logic.
1112 //go:nowritebarrierrec
1113 func greyobject(obj
, base
, off
uintptr, hbits heapBits
, span
*mspan
, gcw
*gcWork
, objIndex
uintptr, forStack
bool) {
1114 // obj should be start of allocation, and so must be at least pointer-aligned.
1115 if obj
&(sys
.PtrSize
-1) != 0 {
1116 throw("greyobject: obj not pointer-aligned")
1118 mbits
:= span
.markBitsForIndex(objIndex
)
1121 if !mbits
.isMarked() {
1122 // Stack scanning is conservative, so we can
1123 // see a reference to an object not previously
1124 // found. Assume the object was correctly not
1125 // marked and ignore the pointer.
1130 print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj
), "\n")
1131 print("runtime: found obj at *(", hex(base
), "+", hex(off
), ")\n")
1133 // Dump the source (base) object
1134 gcDumpObject("base", base
, off
)
1137 gcDumpObject("obj", obj
, ^uintptr(0))
1139 getg().m
.traceback
= 2
1140 throw("checkmark found unmarked object")
1142 if hbits
.isCheckmarked(span
.elemsize
) {
1145 hbits
.setCheckmarked(span
.elemsize
)
1146 if !hbits
.isCheckmarked(span
.elemsize
) {
1147 throw("setCheckmarked and isCheckmarked disagree")
1150 // Stack scanning is conservative, so we can see a
1151 // pointer to a free object. Assume the object was
1152 // correctly freed and we must ignore the pointer.
1153 if forStack
&& span
.isFree(objIndex
) {
1157 if debug
.gccheckmark
> 0 && span
.isFree(objIndex
) {
1158 print("runtime: marking free object ", hex(obj
), " found at *(", hex(base
), "+", hex(off
), ")\n")
1159 gcDumpObject("base", base
, off
)
1160 gcDumpObject("obj", obj
, ^uintptr(0))
1161 getg().m
.traceback
= 2
1162 throw("marking free object")
1165 // If marked we have nothing to do.
1166 if mbits
.isMarked() {
1169 // mbits.setMarked() // Avoid extra call overhead with manual inlining.
1170 atomic
.Or8(mbits
.bytep
, mbits
.mask
)
1171 // If this is a noscan object, fast-track it to black
1172 // instead of greying it.
1173 if span
.spanclass
.noscan() {
1174 gcw
.bytesMarked
+= uint64(span
.elemsize
)
1179 // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
1180 // seems like a nice optimization that can be added back in.
1181 // There needs to be time between the PREFETCH and the use.
1182 // Previously we put the obj in an 8 element buffer that is drained at a rate
1183 // to give the PREFETCH time to do its work.
1184 // Use of PREFETCHNTA might be more appropriate than PREFETCH
1185 if !gcw
.putFast(obj
) {
1190 // gcDumpObject dumps the contents of obj for debugging and marks the
1191 // field at byte offset off in obj.
1192 func gcDumpObject(label
string, obj
, off
uintptr) {
1193 if obj
< mheap_
.arena_start || obj
>= mheap_
.arena_used
{
1194 print(label
, "=", hex(obj
), " is not in the Go heap\n")
1197 k
:= obj
>> _PageShift
1199 x
-= mheap_
.arena_start
>> _PageShift
1200 s
:= mheap_
.spans
[x
]
1201 print(label
, "=", hex(obj
), " k=", hex(k
))
1206 print(" s.base()=", hex(s
.base()), " s.limit=", hex(s
.limit
), " s.spanclass=", s
.spanclass
, " s.elemsize=", s
.elemsize
, " s.state=")
1207 if 0 <= s
.state
&& int(s
.state
) < len(mSpanStateNames
) {
1208 print(mSpanStateNames
[s
.state
], "\n")
1210 print("unknown(", s
.state
, ")\n")
1215 if s
.state
== _MSpanManual
&& size
== 0 {
1216 // We're printing something from a stack frame. We
1217 // don't know how big it is, so just show up to an
1219 size
= off
+ sys
.PtrSize
1221 for i
:= uintptr(0); i
< size
; i
+= sys
.PtrSize
{
1222 // For big objects, just print the beginning (because
1223 // that usually hints at the object's type) and the
1224 // fields around off.
1225 if !(i
< 128*sys
.PtrSize || off
-16*sys
.PtrSize
< i
&& i
< off
+16*sys
.PtrSize
) {
1233 print(" *(", label
, "+", i
, ") = ", hex(*(*uintptr)(unsafe
.Pointer(obj
+ i
))))
1244 // gcmarknewobject marks a newly allocated object black. obj must
1245 // not contain any non-nil pointers.
1247 // This is nosplit so it can manipulate a gcWork without preemption.
1251 func gcmarknewobject(obj
, size
, scanSize
uintptr) {
1252 if useCheckmark
&& !gcBlackenPromptly
{ // The world should be stopped so this should not happen.
1253 throw("gcmarknewobject called while doing checkmark")
1255 markBitsForAddr(obj
).setMarked()
1256 gcw
:= &getg().m
.p
.ptr().gcw
1257 gcw
.bytesMarked
+= uint64(size
)
1258 gcw
.scanWork
+= int64(scanSize
)
1259 if gcBlackenPromptly
{
1260 // There shouldn't be anything in the work queue, but
1261 // we still need to flush stats.
1266 // gcMarkTinyAllocs greys all active tiny alloc blocks.
1268 // The world must be stopped.
1269 func gcMarkTinyAllocs() {
1270 for _
, p
:= range allp
{
1272 if c
== nil || c
.tiny
== 0 {
1275 _
, hbits
, span
, objIndex
:= heapBitsForObject(c
.tiny
, 0, 0, false)
1277 greyobject(c
.tiny
, 0, 0, hbits
, span
, gcw
, objIndex
, false)
1278 if gcBlackenPromptly
{
1286 // To help debug the concurrent GC we remark with the world
1287 // stopped ensuring that any object encountered has their normal
1288 // mark bit set. To do this we use an orthogonal bit
1289 // pattern to indicate the object is marked. The following pattern
1290 // uses the upper two bits in the object's boundary nibble.
1291 // 01: scalar not marked
1292 // 10: pointer not marked
1293 // 11: pointer marked
1294 // 00: scalar marked
1295 // Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
1296 // The higher bit is 1 for pointers and 0 for scalars, whether the object
1297 // is marked or not.
1298 // The first nibble no longer holds the typeDead pattern indicating that the
1299 // there are no more pointers in the object. This information is held
1300 // in the second nibble.
1302 // If useCheckmark is true, marking of an object uses the
1303 // checkmark bits (encoding above) instead of the standard
1305 var useCheckmark
= false
1308 func initCheckmarks() {
1310 for _
, s
:= range mheap_
.allspans
{
1311 if s
.state
== _MSpanInUse
{
1312 heapBitsForSpan(s
.base()).initCheckmarkSpan(s
.layout())
1317 func clearCheckmarks() {
1318 useCheckmark
= false
1319 for _
, s
:= range mheap_
.allspans
{
1320 if s
.state
== _MSpanInUse
{
1321 heapBitsForSpan(s
.base()).clearCheckmarkSpan(s
.layout())