2018-03-01 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / libgo / go / runtime / mgcmark.go
blob7297fcb6d1a418a54805e1e3d058251c6cf01bf5
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Garbage collector: marking and scanning
7 package runtime
9 import (
10 "runtime/internal/atomic"
11 "runtime/internal/sys"
12 "unsafe"
15 const (
16 fixedRootFinalizers = iota
17 fixedRootFreeGStacks
18 fixedRootCount
20 // rootBlockBytes is the number of bytes to scan per data or
21 // BSS root.
22 rootBlockBytes = 256 << 10
24 // rootBlockSpans is the number of spans to scan per span
25 // root.
26 rootBlockSpans = 8 * 1024 // 64MB worth of spans
28 // maxObletBytes is the maximum bytes of an object to scan at
29 // once. Larger objects will be split up into "oblets" of at
30 // most this size. Since we can scan 1–2 MB/ms, 128 KB bounds
31 // scan preemption at ~100 µs.
33 // This must be > _MaxSmallSize so that the object base is the
34 // span base.
35 maxObletBytes = 128 << 10
37 // drainCheckThreshold specifies how many units of work to do
38 // between self-preemption checks in gcDrain. Assuming a scan
39 // rate of 1 MB/ms, this is ~100 µs. Lower values have higher
40 // overhead in the scan loop (the scheduler check may perform
41 // a syscall, so its overhead is nontrivial). Higher values
42 // make the system less responsive to incoming work.
43 drainCheckThreshold = 100000
46 // gcMarkRootPrepare queues root scanning jobs (stacks, globals, and
47 // some miscellany) and initializes scanning-related state.
49 // The caller must have call gcCopySpans().
51 // The world must be stopped.
53 //go:nowritebarrier
54 func gcMarkRootPrepare() {
55 if gcphase == _GCmarktermination {
56 work.nFlushCacheRoots = int(gomaxprocs)
57 } else {
58 work.nFlushCacheRoots = 0
61 work.nDataRoots = 0
63 // Only scan globals once per cycle; preferably concurrently.
64 if !work.markrootDone {
65 roots := gcRoots
66 for roots != nil {
67 work.nDataRoots++
68 roots = roots.next
72 if !work.markrootDone {
73 // On the first markroot, we need to scan span roots.
74 // In concurrent GC, this happens during concurrent
75 // mark and we depend on addfinalizer to ensure the
76 // above invariants for objects that get finalizers
77 // after concurrent mark. In STW GC, this will happen
78 // during mark termination.
80 // We're only interested in scanning the in-use spans,
81 // which will all be swept at this point. More spans
82 // may be added to this list during concurrent GC, but
83 // we only care about spans that were allocated before
84 // this mark phase.
85 work.nSpanRoots = mheap_.sweepSpans[mheap_.sweepgen/2%2].numBlocks()
87 // On the first markroot, we need to scan all Gs. Gs
88 // may be created after this point, but it's okay that
89 // we ignore them because they begin life without any
90 // roots, so there's nothing to scan, and any roots
91 // they create during the concurrent phase will be
92 // scanned during mark termination. During mark
93 // termination, allglen isn't changing, so we'll scan
94 // all Gs.
95 work.nStackRoots = int(atomic.Loaduintptr(&allglen))
96 } else {
97 // We've already scanned span roots and kept the scan
98 // up-to-date during concurrent mark.
99 work.nSpanRoots = 0
101 // The hybrid barrier ensures that stacks can't
102 // contain pointers to unmarked objects, so on the
103 // second markroot, there's no need to scan stacks.
104 work.nStackRoots = 0
106 if debug.gcrescanstacks > 0 {
107 // Scan stacks anyway for debugging.
108 work.nStackRoots = int(atomic.Loaduintptr(&allglen))
112 work.markrootNext = 0
113 work.markrootJobs = uint32(fixedRootCount + work.nFlushCacheRoots + work.nDataRoots + work.nSpanRoots + work.nStackRoots)
116 // gcMarkRootCheck checks that all roots have been scanned. It is
117 // purely for debugging.
118 func gcMarkRootCheck() {
119 if work.markrootNext < work.markrootJobs {
120 print(work.markrootNext, " of ", work.markrootJobs, " markroot jobs done\n")
121 throw("left over markroot jobs")
124 lock(&allglock)
125 // Check that stacks have been scanned.
126 var gp *g
127 if gcphase == _GCmarktermination && debug.gcrescanstacks > 0 {
128 for i := 0; i < len(allgs); i++ {
129 gp = allgs[i]
130 if !(gp.gcscandone && gp.gcscanvalid) && readgstatus(gp) != _Gdead {
131 goto fail
134 } else {
135 for i := 0; i < work.nStackRoots; i++ {
136 gp = allgs[i]
137 if !gp.gcscandone {
138 goto fail
142 unlock(&allglock)
143 return
145 fail:
146 println("gp", gp, "goid", gp.goid,
147 "status", readgstatus(gp),
148 "gcscandone", gp.gcscandone,
149 "gcscanvalid", gp.gcscanvalid)
150 unlock(&allglock) // Avoid self-deadlock with traceback.
151 throw("scan missed a g")
154 // ptrmask for an allocation containing a single pointer.
155 var oneptrmask = [...]uint8{1}
157 // markroot scans the i'th root.
159 // Preemption must be disabled (because this uses a gcWork).
161 // nowritebarrier is only advisory here.
163 //go:nowritebarrier
164 func markroot(gcw *gcWork, i uint32) {
165 // TODO(austin): This is a bit ridiculous. Compute and store
166 // the bases in gcMarkRootPrepare instead of the counts.
167 baseFlushCache := uint32(fixedRootCount)
168 baseData := baseFlushCache + uint32(work.nFlushCacheRoots)
169 baseSpans := baseData + uint32(work.nDataRoots)
170 baseStacks := baseSpans + uint32(work.nSpanRoots)
171 end := baseStacks + uint32(work.nStackRoots)
173 // Note: if you add a case here, please also update heapdump.go:dumproots.
174 switch {
175 case baseFlushCache <= i && i < baseData:
176 flushmcache(int(i - baseFlushCache))
178 case baseData <= i && i < baseSpans:
179 roots := gcRoots
180 c := baseData
181 for roots != nil {
182 if i == c {
183 markrootBlock(roots, gcw)
184 break
186 roots = roots.next
190 case i == fixedRootFinalizers:
191 // Only do this once per GC cycle since we don't call
192 // queuefinalizer during marking.
193 if work.markrootDone {
194 break
196 for fb := allfin; fb != nil; fb = fb.alllink {
197 cnt := uintptr(atomic.Load(&fb.cnt))
198 scanblock(uintptr(unsafe.Pointer(&fb.fin[0])), cnt*unsafe.Sizeof(fb.fin[0]), &finptrmask[0], gcw)
201 case i == fixedRootFreeGStacks:
202 // FIXME: We don't do this for gccgo.
204 case baseSpans <= i && i < baseStacks:
205 // mark MSpan.specials
206 markrootSpans(gcw, int(i-baseSpans))
208 default:
209 // the rest is scanning goroutine stacks
210 var gp *g
211 if baseStacks <= i && i < end {
212 gp = allgs[i-baseStacks]
213 } else {
214 throw("markroot: bad index")
217 // remember when we've first observed the G blocked
218 // needed only to output in traceback
219 status := readgstatus(gp) // We are not in a scan state
220 if (status == _Gwaiting || status == _Gsyscall) && gp.waitsince == 0 {
221 gp.waitsince = work.tstart
224 // scang must be done on the system stack in case
225 // we're trying to scan our own stack.
226 systemstack(func() {
227 // If this is a self-scan, put the user G in
228 // _Gwaiting to prevent self-deadlock. It may
229 // already be in _Gwaiting if this is a mark
230 // worker or we're in mark termination.
231 userG := getg().m.curg
232 selfScan := gp == userG && readgstatus(userG) == _Grunning
233 if selfScan {
234 casgstatus(userG, _Grunning, _Gwaiting)
235 userG.waitreason = "garbage collection scan"
238 // TODO: scang blocks until gp's stack has
239 // been scanned, which may take a while for
240 // running goroutines. Consider doing this in
241 // two phases where the first is non-blocking:
242 // we scan the stacks we can and ask running
243 // goroutines to scan themselves; and the
244 // second blocks.
245 scang(gp, gcw)
247 if selfScan {
248 casgstatus(userG, _Gwaiting, _Grunning)
254 // markrootBlock scans one element of the list of GC roots.
256 //go:nowritebarrier
257 func markrootBlock(roots *gcRootList, gcw *gcWork) {
258 for i := 0; i < roots.count; i++ {
259 r := &roots.roots[i]
260 scanblock(uintptr(r.decl), r.ptrdata, r.gcdata, gcw)
264 // markrootSpans marks roots for one shard of work.spans.
266 //go:nowritebarrier
267 func markrootSpans(gcw *gcWork, shard int) {
268 // Objects with finalizers have two GC-related invariants:
270 // 1) Everything reachable from the object must be marked.
271 // This ensures that when we pass the object to its finalizer,
272 // everything the finalizer can reach will be retained.
274 // 2) Finalizer specials (which are not in the garbage
275 // collected heap) are roots. In practice, this means the fn
276 // field must be scanned.
278 // TODO(austin): There are several ideas for making this more
279 // efficient in issue #11485.
281 if work.markrootDone {
282 throw("markrootSpans during second markroot")
285 sg := mheap_.sweepgen
286 spans := mheap_.sweepSpans[mheap_.sweepgen/2%2].block(shard)
287 // Note that work.spans may not include spans that were
288 // allocated between entering the scan phase and now. This is
289 // okay because any objects with finalizers in those spans
290 // must have been allocated and given finalizers after we
291 // entered the scan phase, so addfinalizer will have ensured
292 // the above invariants for them.
293 for _, s := range spans {
294 if s.state != mSpanInUse {
295 continue
297 if !useCheckmark && s.sweepgen != sg {
298 // sweepgen was updated (+2) during non-checkmark GC pass
299 print("sweep ", s.sweepgen, " ", sg, "\n")
300 throw("gc: unswept span")
303 // Speculatively check if there are any specials
304 // without acquiring the span lock. This may race with
305 // adding the first special to a span, but in that
306 // case addfinalizer will observe that the GC is
307 // active (which is globally synchronized) and ensure
308 // the above invariants. We may also ensure the
309 // invariants, but it's okay to scan an object twice.
310 if s.specials == nil {
311 continue
314 // Lock the specials to prevent a special from being
315 // removed from the list while we're traversing it.
316 lock(&s.speciallock)
318 for sp := s.specials; sp != nil; sp = sp.next {
319 if sp.kind != _KindSpecialFinalizer {
320 continue
322 // don't mark finalized object, but scan it so we
323 // retain everything it points to.
324 spf := (*specialfinalizer)(unsafe.Pointer(sp))
325 // A finalizer can be set for an inner byte of an object, find object beginning.
326 p := s.base() + uintptr(spf.special.offset)/s.elemsize*s.elemsize
328 // Mark everything that can be reached from
329 // the object (but *not* the object itself or
330 // we'll never collect it).
331 scanobject(p, gcw)
333 // The special itself is a root.
334 scanblock(uintptr(unsafe.Pointer(&spf.fn)), sys.PtrSize, &oneptrmask[0], gcw)
337 unlock(&s.speciallock)
341 // gcAssistAlloc performs GC work to make gp's assist debt positive.
342 // gp must be the calling user gorountine.
344 // This must be called with preemption enabled.
345 func gcAssistAlloc(gp *g) {
346 // Don't assist in non-preemptible contexts. These are
347 // generally fragile and won't allow the assist to block.
348 if getg() == gp.m.g0 {
349 return
351 if mp := getg().m; mp.locks > 0 || mp.preemptoff != "" {
352 return
355 traced := false
356 retry:
357 // Compute the amount of scan work we need to do to make the
358 // balance positive. When the required amount of work is low,
359 // we over-assist to build up credit for future allocations
360 // and amortize the cost of assisting.
361 debtBytes := -gp.gcAssistBytes
362 scanWork := int64(gcController.assistWorkPerByte * float64(debtBytes))
363 if scanWork < gcOverAssistWork {
364 scanWork = gcOverAssistWork
365 debtBytes = int64(gcController.assistBytesPerWork * float64(scanWork))
368 // Steal as much credit as we can from the background GC's
369 // scan credit. This is racy and may drop the background
370 // credit below 0 if two mutators steal at the same time. This
371 // will just cause steals to fail until credit is accumulated
372 // again, so in the long run it doesn't really matter, but we
373 // do have to handle the negative credit case.
374 bgScanCredit := atomic.Loadint64(&gcController.bgScanCredit)
375 stolen := int64(0)
376 if bgScanCredit > 0 {
377 if bgScanCredit < scanWork {
378 stolen = bgScanCredit
379 gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(stolen))
380 } else {
381 stolen = scanWork
382 gp.gcAssistBytes += debtBytes
384 atomic.Xaddint64(&gcController.bgScanCredit, -stolen)
386 scanWork -= stolen
388 if scanWork == 0 {
389 // We were able to steal all of the credit we
390 // needed.
391 if traced {
392 traceGCMarkAssistDone()
394 return
398 if trace.enabled && !traced {
399 traced = true
400 traceGCMarkAssistStart()
403 // Perform assist work
404 systemstack(func() {
405 gcAssistAlloc1(gp, scanWork)
406 // The user stack may have moved, so this can't touch
407 // anything on it until it returns from systemstack.
410 completed := gp.param != nil
411 gp.param = nil
412 if completed {
413 gcMarkDone()
416 if gp.gcAssistBytes < 0 {
417 // We were unable steal enough credit or perform
418 // enough work to pay off the assist debt. We need to
419 // do one of these before letting the mutator allocate
420 // more to prevent over-allocation.
422 // If this is because we were preempted, reschedule
423 // and try some more.
424 if gp.preempt {
425 Gosched()
426 goto retry
429 // Add this G to an assist queue and park. When the GC
430 // has more background credit, it will satisfy queued
431 // assists before flushing to the global credit pool.
433 // Note that this does *not* get woken up when more
434 // work is added to the work list. The theory is that
435 // there wasn't enough work to do anyway, so we might
436 // as well let background marking take care of the
437 // work that is available.
438 if !gcParkAssist() {
439 goto retry
442 // At this point either background GC has satisfied
443 // this G's assist debt, or the GC cycle is over.
445 if traced {
446 traceGCMarkAssistDone()
450 // gcAssistAlloc1 is the part of gcAssistAlloc that runs on the system
451 // stack. This is a separate function to make it easier to see that
452 // we're not capturing anything from the user stack, since the user
453 // stack may move while we're in this function.
455 // gcAssistAlloc1 indicates whether this assist completed the mark
456 // phase by setting gp.param to non-nil. This can't be communicated on
457 // the stack since it may move.
459 //go:systemstack
460 func gcAssistAlloc1(gp *g, scanWork int64) {
461 // Clear the flag indicating that this assist completed the
462 // mark phase.
463 gp.param = nil
465 if atomic.Load(&gcBlackenEnabled) == 0 {
466 // The gcBlackenEnabled check in malloc races with the
467 // store that clears it but an atomic check in every malloc
468 // would be a performance hit.
469 // Instead we recheck it here on the non-preemptable system
470 // stack to determine if we should preform an assist.
472 // GC is done, so ignore any remaining debt.
473 gp.gcAssistBytes = 0
474 return
476 // Track time spent in this assist. Since we're on the
477 // system stack, this is non-preemptible, so we can
478 // just measure start and end time.
479 startTime := nanotime()
481 decnwait := atomic.Xadd(&work.nwait, -1)
482 if decnwait == work.nproc {
483 println("runtime: work.nwait =", decnwait, "work.nproc=", work.nproc)
484 throw("nwait > work.nprocs")
487 // gcDrainN requires the caller to be preemptible.
488 casgstatus(gp, _Grunning, _Gwaiting)
489 gp.waitreason = "GC assist marking"
491 // drain own cached work first in the hopes that it
492 // will be more cache friendly.
493 gcw := &getg().m.p.ptr().gcw
494 workDone := gcDrainN(gcw, scanWork)
495 // If we are near the end of the mark phase
496 // dispose of the gcw.
497 if gcBlackenPromptly {
498 gcw.dispose()
501 casgstatus(gp, _Gwaiting, _Grunning)
503 // Record that we did this much scan work.
505 // Back out the number of bytes of assist credit that
506 // this scan work counts for. The "1+" is a poor man's
507 // round-up, to ensure this adds credit even if
508 // assistBytesPerWork is very low.
509 gp.gcAssistBytes += 1 + int64(gcController.assistBytesPerWork*float64(workDone))
511 // If this is the last worker and we ran out of work,
512 // signal a completion point.
513 incnwait := atomic.Xadd(&work.nwait, +1)
514 if incnwait > work.nproc {
515 println("runtime: work.nwait=", incnwait,
516 "work.nproc=", work.nproc,
517 "gcBlackenPromptly=", gcBlackenPromptly)
518 throw("work.nwait > work.nproc")
521 if incnwait == work.nproc && !gcMarkWorkAvailable(nil) {
522 // This has reached a background completion point. Set
523 // gp.param to a non-nil value to indicate this. It
524 // doesn't matter what we set it to (it just has to be
525 // a valid pointer).
526 gp.param = unsafe.Pointer(gp)
528 duration := nanotime() - startTime
529 _p_ := gp.m.p.ptr()
530 _p_.gcAssistTime += duration
531 if _p_.gcAssistTime > gcAssistTimeSlack {
532 atomic.Xaddint64(&gcController.assistTime, _p_.gcAssistTime)
533 _p_.gcAssistTime = 0
537 // gcWakeAllAssists wakes all currently blocked assists. This is used
538 // at the end of a GC cycle. gcBlackenEnabled must be false to prevent
539 // new assists from going to sleep after this point.
540 func gcWakeAllAssists() {
541 lock(&work.assistQueue.lock)
542 injectglist(work.assistQueue.head.ptr())
543 work.assistQueue.head.set(nil)
544 work.assistQueue.tail.set(nil)
545 unlock(&work.assistQueue.lock)
548 // gcParkAssist puts the current goroutine on the assist queue and parks.
550 // gcParkAssist returns whether the assist is now satisfied. If it
551 // returns false, the caller must retry the assist.
553 //go:nowritebarrier
554 func gcParkAssist() bool {
555 lock(&work.assistQueue.lock)
556 // If the GC cycle finished while we were getting the lock,
557 // exit the assist. The cycle can't finish while we hold the
558 // lock.
559 if atomic.Load(&gcBlackenEnabled) == 0 {
560 unlock(&work.assistQueue.lock)
561 return true
564 gp := getg()
565 oldHead, oldTail := work.assistQueue.head, work.assistQueue.tail
566 if oldHead == 0 {
567 work.assistQueue.head.set(gp)
568 } else {
569 oldTail.ptr().schedlink.set(gp)
571 work.assistQueue.tail.set(gp)
572 gp.schedlink.set(nil)
574 // Recheck for background credit now that this G is in
575 // the queue, but can still back out. This avoids a
576 // race in case background marking has flushed more
577 // credit since we checked above.
578 if atomic.Loadint64(&gcController.bgScanCredit) > 0 {
579 work.assistQueue.head = oldHead
580 work.assistQueue.tail = oldTail
581 if oldTail != 0 {
582 oldTail.ptr().schedlink.set(nil)
584 unlock(&work.assistQueue.lock)
585 return false
587 // Park.
588 goparkunlock(&work.assistQueue.lock, "GC assist wait", traceEvGoBlockGC, 2)
589 return true
592 // gcFlushBgCredit flushes scanWork units of background scan work
593 // credit. This first satisfies blocked assists on the
594 // work.assistQueue and then flushes any remaining credit to
595 // gcController.bgScanCredit.
597 // Write barriers are disallowed because this is used by gcDrain after
598 // it has ensured that all work is drained and this must preserve that
599 // condition.
601 //go:nowritebarrierrec
602 func gcFlushBgCredit(scanWork int64) {
603 if work.assistQueue.head == 0 {
604 // Fast path; there are no blocked assists. There's a
605 // small window here where an assist may add itself to
606 // the blocked queue and park. If that happens, we'll
607 // just get it on the next flush.
608 atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
609 return
612 scanBytes := int64(float64(scanWork) * gcController.assistBytesPerWork)
614 lock(&work.assistQueue.lock)
615 gp := work.assistQueue.head.ptr()
616 for gp != nil && scanBytes > 0 {
617 // Note that gp.gcAssistBytes is negative because gp
618 // is in debt. Think carefully about the signs below.
619 if scanBytes+gp.gcAssistBytes >= 0 {
620 // Satisfy this entire assist debt.
621 scanBytes += gp.gcAssistBytes
622 gp.gcAssistBytes = 0
623 xgp := gp
624 gp = gp.schedlink.ptr()
625 // It's important that we *not* put xgp in
626 // runnext. Otherwise, it's possible for user
627 // code to exploit the GC worker's high
628 // scheduler priority to get itself always run
629 // before other goroutines and always in the
630 // fresh quantum started by GC.
631 ready(xgp, 0, false)
632 } else {
633 // Partially satisfy this assist.
634 gp.gcAssistBytes += scanBytes
635 scanBytes = 0
636 // As a heuristic, we move this assist to the
637 // back of the queue so that large assists
638 // can't clog up the assist queue and
639 // substantially delay small assists.
640 xgp := gp
641 gp = gp.schedlink.ptr()
642 if gp == nil {
643 // gp is the only assist in the queue.
644 gp = xgp
645 } else {
646 xgp.schedlink = 0
647 work.assistQueue.tail.ptr().schedlink.set(xgp)
648 work.assistQueue.tail.set(xgp)
650 break
653 work.assistQueue.head.set(gp)
654 if gp == nil {
655 work.assistQueue.tail.set(nil)
658 if scanBytes > 0 {
659 // Convert from scan bytes back to work.
660 scanWork = int64(float64(scanBytes) * gcController.assistWorkPerByte)
661 atomic.Xaddint64(&gcController.bgScanCredit, scanWork)
663 unlock(&work.assistQueue.lock)
666 // We use a C function to find the stack.
667 func doscanstack(*g, *gcWork)
669 // scanstack scans gp's stack, greying all pointers found on the stack.
671 // scanstack is marked go:systemstack because it must not be preempted
672 // while using a workbuf.
674 //go:nowritebarrier
675 //go:systemstack
676 func scanstack(gp *g, gcw *gcWork) {
677 if gp.gcscanvalid {
678 return
681 if readgstatus(gp)&_Gscan == 0 {
682 print("runtime:scanstack: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", hex(readgstatus(gp)), "\n")
683 throw("scanstack - bad status")
686 switch readgstatus(gp) &^ _Gscan {
687 default:
688 print("runtime: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n")
689 throw("mark - bad status")
690 case _Gdead:
691 return
692 case _Grunning:
693 // ok for gccgo, though not for gc.
694 case _Grunnable, _Gsyscall, _Gwaiting:
695 // ok
698 mp := gp.m
699 if mp != nil && mp.helpgc != 0 {
700 throw("can't scan gchelper stack")
703 // Scan the stack.
704 doscanstack(gp, gcw)
706 // Conservatively scan the saved register values.
707 scanstackblock(uintptr(unsafe.Pointer(&gp.gcregs)), unsafe.Sizeof(gp.gcregs), gcw)
708 scanstackblock(uintptr(unsafe.Pointer(&gp.context)), unsafe.Sizeof(gp.context), gcw)
710 gp.gcscanvalid = true
713 type gcDrainFlags int
715 const (
716 gcDrainUntilPreempt gcDrainFlags = 1 << iota
717 gcDrainNoBlock
718 gcDrainFlushBgCredit
719 gcDrainIdle
720 gcDrainFractional
722 // gcDrainBlock means neither gcDrainUntilPreempt or
723 // gcDrainNoBlock. It is the default, but callers should use
724 // the constant for documentation purposes.
725 gcDrainBlock gcDrainFlags = 0
728 // gcDrain scans roots and objects in work buffers, blackening grey
729 // objects until all roots and work buffers have been drained.
731 // If flags&gcDrainUntilPreempt != 0, gcDrain returns when g.preempt
732 // is set. This implies gcDrainNoBlock.
734 // If flags&gcDrainIdle != 0, gcDrain returns when there is other work
735 // to do. This implies gcDrainNoBlock.
737 // If flags&gcDrainFractional != 0, gcDrain self-preempts when
738 // pollFractionalWorkerExit() returns true. This implies
739 // gcDrainNoBlock.
741 // If flags&gcDrainNoBlock != 0, gcDrain returns as soon as it is
742 // unable to get more work. Otherwise, it will block until all
743 // blocking calls are blocked in gcDrain.
745 // If flags&gcDrainFlushBgCredit != 0, gcDrain flushes scan work
746 // credit to gcController.bgScanCredit every gcCreditSlack units of
747 // scan work.
749 //go:nowritebarrier
750 func gcDrain(gcw *gcWork, flags gcDrainFlags) {
751 if !writeBarrier.needed {
752 throw("gcDrain phase incorrect")
755 gp := getg().m.curg
756 preemptible := flags&gcDrainUntilPreempt != 0
757 blocking := flags&(gcDrainUntilPreempt|gcDrainIdle|gcDrainFractional|gcDrainNoBlock) == 0
758 flushBgCredit := flags&gcDrainFlushBgCredit != 0
759 idle := flags&gcDrainIdle != 0
761 initScanWork := gcw.scanWork
763 // checkWork is the scan work before performing the next
764 // self-preempt check.
765 checkWork := int64(1<<63 - 1)
766 var check func() bool
767 if flags&(gcDrainIdle|gcDrainFractional) != 0 {
768 checkWork = initScanWork + drainCheckThreshold
769 if idle {
770 check = pollWork
771 } else if flags&gcDrainFractional != 0 {
772 check = pollFractionalWorkerExit
776 // Drain root marking jobs.
777 if work.markrootNext < work.markrootJobs {
778 for !(preemptible && gp.preempt) {
779 job := atomic.Xadd(&work.markrootNext, +1) - 1
780 if job >= work.markrootJobs {
781 break
783 markroot(gcw, job)
784 if check != nil && check() {
785 goto done
790 // Drain heap marking jobs.
791 for !(preemptible && gp.preempt) {
792 // Try to keep work available on the global queue. We used to
793 // check if there were waiting workers, but it's better to
794 // just keep work available than to make workers wait. In the
795 // worst case, we'll do O(log(_WorkbufSize)) unnecessary
796 // balances.
797 if work.full == 0 {
798 gcw.balance()
801 var b uintptr
802 if blocking {
803 b = gcw.get()
804 } else {
805 b = gcw.tryGetFast()
806 if b == 0 {
807 b = gcw.tryGet()
810 if b == 0 {
811 // work barrier reached or tryGet failed.
812 break
814 scanobject(b, gcw)
816 // Flush background scan work credit to the global
817 // account if we've accumulated enough locally so
818 // mutator assists can draw on it.
819 if gcw.scanWork >= gcCreditSlack {
820 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
821 if flushBgCredit {
822 gcFlushBgCredit(gcw.scanWork - initScanWork)
823 initScanWork = 0
825 checkWork -= gcw.scanWork
826 gcw.scanWork = 0
828 if checkWork <= 0 {
829 checkWork += drainCheckThreshold
830 if check != nil && check() {
831 break
837 // In blocking mode, write barriers are not allowed after this
838 // point because we must preserve the condition that the work
839 // buffers are empty.
841 done:
842 // Flush remaining scan work credit.
843 if gcw.scanWork > 0 {
844 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
845 if flushBgCredit {
846 gcFlushBgCredit(gcw.scanWork - initScanWork)
848 gcw.scanWork = 0
852 // gcDrainN blackens grey objects until it has performed roughly
853 // scanWork units of scan work or the G is preempted. This is
854 // best-effort, so it may perform less work if it fails to get a work
855 // buffer. Otherwise, it will perform at least n units of work, but
856 // may perform more because scanning is always done in whole object
857 // increments. It returns the amount of scan work performed.
859 // The caller goroutine must be in a preemptible state (e.g.,
860 // _Gwaiting) to prevent deadlocks during stack scanning. As a
861 // consequence, this must be called on the system stack.
863 //go:nowritebarrier
864 //go:systemstack
865 func gcDrainN(gcw *gcWork, scanWork int64) int64 {
866 if !writeBarrier.needed {
867 throw("gcDrainN phase incorrect")
870 // There may already be scan work on the gcw, which we don't
871 // want to claim was done by this call.
872 workFlushed := -gcw.scanWork
874 gp := getg().m.curg
875 for !gp.preempt && workFlushed+gcw.scanWork < scanWork {
876 // See gcDrain comment.
877 if work.full == 0 {
878 gcw.balance()
881 // This might be a good place to add prefetch code...
882 // if(wbuf.nobj > 4) {
883 // PREFETCH(wbuf->obj[wbuf.nobj - 3];
884 // }
886 b := gcw.tryGetFast()
887 if b == 0 {
888 b = gcw.tryGet()
891 if b == 0 {
892 // Try to do a root job.
894 // TODO: Assists should get credit for this
895 // work.
896 if work.markrootNext < work.markrootJobs {
897 job := atomic.Xadd(&work.markrootNext, +1) - 1
898 if job < work.markrootJobs {
899 markroot(gcw, job)
900 continue
903 // No heap or root jobs.
904 break
906 scanobject(b, gcw)
908 // Flush background scan work credit.
909 if gcw.scanWork >= gcCreditSlack {
910 atomic.Xaddint64(&gcController.scanWork, gcw.scanWork)
911 workFlushed += gcw.scanWork
912 gcw.scanWork = 0
916 // Unlike gcDrain, there's no need to flush remaining work
917 // here because this never flushes to bgScanCredit and
918 // gcw.dispose will flush any remaining work to scanWork.
920 return workFlushed + gcw.scanWork
923 // scanblock scans b as scanobject would, but using an explicit
924 // pointer bitmap instead of the heap bitmap.
926 // This is used to scan non-heap roots, so it does not update
927 // gcw.bytesMarked or gcw.scanWork.
929 //go:nowritebarrier
930 func scanblock(b0, n0 uintptr, ptrmask *uint8, gcw *gcWork) {
931 // Use local copies of original parameters, so that a stack trace
932 // due to one of the throws below shows the original block
933 // base and extent.
934 b := b0
935 n := n0
937 arena_start := mheap_.arena_start
938 arena_used := mheap_.arena_used
940 for i := uintptr(0); i < n; {
941 // Find bits for the next word.
942 bits := uint32(*addb(ptrmask, i/(sys.PtrSize*8)))
943 if bits == 0 {
944 i += sys.PtrSize * 8
945 continue
947 for j := 0; j < 8 && i < n; j++ {
948 if bits&1 != 0 {
949 // Same work as in scanobject; see comments there.
950 obj := *(*uintptr)(unsafe.Pointer(b + i))
951 if obj != 0 && arena_start <= obj && obj < arena_used {
952 if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, false); obj != 0 {
953 greyobject(obj, b, i, hbits, span, gcw, objIndex, false)
957 bits >>= 1
958 i += sys.PtrSize
963 // scanobject scans the object starting at b, adding pointers to gcw.
964 // b must point to the beginning of a heap object or an oblet.
965 // scanobject consults the GC bitmap for the pointer mask and the
966 // spans for the size of the object.
968 //go:nowritebarrier
969 func scanobject(b uintptr, gcw *gcWork) {
970 // Note that arena_used may change concurrently during
971 // scanobject and hence scanobject may encounter a pointer to
972 // a newly allocated heap object that is *not* in
973 // [start,used). It will not mark this object; however, we
974 // know that it was just installed by a mutator, which means
975 // that mutator will execute a write barrier and take care of
976 // marking it. This is even more pronounced on relaxed memory
977 // architectures since we access arena_used without barriers
978 // or synchronization, but the same logic applies.
979 arena_start := mheap_.arena_start
980 arena_used := mheap_.arena_used
982 // Find the bits for b and the size of the object at b.
984 // b is either the beginning of an object, in which case this
985 // is the size of the object to scan, or it points to an
986 // oblet, in which case we compute the size to scan below.
987 hbits := heapBitsForAddr(b)
988 s := spanOfUnchecked(b)
989 n := s.elemsize
990 if n == 0 {
991 throw("scanobject n == 0")
994 if n > maxObletBytes {
995 // Large object. Break into oblets for better
996 // parallelism and lower latency.
997 if b == s.base() {
998 // It's possible this is a noscan object (not
999 // from greyobject, but from other code
1000 // paths), in which case we must *not* enqueue
1001 // oblets since their bitmaps will be
1002 // uninitialized.
1003 if s.spanclass.noscan() {
1004 // Bypass the whole scan.
1005 gcw.bytesMarked += uint64(n)
1006 return
1009 // Enqueue the other oblets to scan later.
1010 // Some oblets may be in b's scalar tail, but
1011 // these will be marked as "no more pointers",
1012 // so we'll drop out immediately when we go to
1013 // scan those.
1014 for oblet := b + maxObletBytes; oblet < s.base()+s.elemsize; oblet += maxObletBytes {
1015 if !gcw.putFast(oblet) {
1016 gcw.put(oblet)
1021 // Compute the size of the oblet. Since this object
1022 // must be a large object, s.base() is the beginning
1023 // of the object.
1024 n = s.base() + s.elemsize - b
1025 if n > maxObletBytes {
1026 n = maxObletBytes
1030 var i uintptr
1031 for i = 0; i < n; i += sys.PtrSize {
1032 // Find bits for this word.
1033 if i != 0 {
1034 // Avoid needless hbits.next() on last iteration.
1035 hbits = hbits.next()
1037 // Load bits once. See CL 22712 and issue 16973 for discussion.
1038 bits := hbits.bits()
1039 // During checkmarking, 1-word objects store the checkmark
1040 // in the type bit for the one word. The only one-word objects
1041 // are pointers, or else they'd be merged with other non-pointer
1042 // data into larger allocations.
1043 if i != 1*sys.PtrSize && bits&bitScan == 0 {
1044 break // no more pointers in this object
1046 if bits&bitPointer == 0 {
1047 continue // not a pointer
1050 // Work here is duplicated in scanblock and above.
1051 // If you make changes here, make changes there too.
1052 obj := *(*uintptr)(unsafe.Pointer(b + i))
1054 // At this point we have extracted the next potential pointer.
1055 // Check if it points into heap and not back at the current object.
1056 if obj != 0 && arena_start <= obj && obj < arena_used && obj-b >= n {
1057 // Mark the object.
1058 if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, false); obj != 0 {
1059 greyobject(obj, b, i, hbits, span, gcw, objIndex, false)
1063 gcw.bytesMarked += uint64(n)
1064 gcw.scanWork += int64(i)
1067 //go:linkname scanstackblock runtime.scanstackblock
1069 // scanstackblock is called by the stack scanning code in C to
1070 // actually find and mark pointers in the stack block. This is like
1071 // scanblock, but we scan the stack conservatively, so there is no
1072 // bitmask of pointers.
1073 func scanstackblock(b, n uintptr, gcw *gcWork) {
1074 arena_start := mheap_.arena_start
1075 arena_used := mheap_.arena_used
1077 for i := uintptr(0); i < n; i += sys.PtrSize {
1078 // Same work as in scanobject; see comments there.
1079 obj := *(*uintptr)(unsafe.Pointer(b + i))
1080 if obj != 0 && arena_start <= obj && obj < arena_used {
1081 if obj, hbits, span, objIndex := heapBitsForObject(obj, b, i, true); obj != 0 {
1082 greyobject(obj, b, i, hbits, span, gcw, objIndex, true)
1088 // Shade the object if it isn't already.
1089 // The object is not nil and known to be in the heap.
1090 // Preemption must be disabled.
1091 //go:nowritebarrier
1092 func shade(b uintptr) {
1093 // shade can be called to shade a pointer found on the stack,
1094 // so pass forStack as true to heapBitsForObject and greyobject.
1095 if obj, hbits, span, objIndex := heapBitsForObject(b, 0, 0, true); obj != 0 {
1096 gcw := &getg().m.p.ptr().gcw
1097 greyobject(obj, 0, 0, hbits, span, gcw, objIndex, true)
1098 if gcphase == _GCmarktermination || gcBlackenPromptly {
1099 // Ps aren't allowed to cache work during mark
1100 // termination.
1101 gcw.dispose()
1106 // obj is the start of an object with mark mbits.
1107 // If it isn't already marked, mark it and enqueue into gcw.
1108 // base and off are for debugging only and could be removed.
1110 // See also wbBufFlush1, which partially duplicates this logic.
1112 //go:nowritebarrierrec
1113 func greyobject(obj, base, off uintptr, hbits heapBits, span *mspan, gcw *gcWork, objIndex uintptr, forStack bool) {
1114 // obj should be start of allocation, and so must be at least pointer-aligned.
1115 if obj&(sys.PtrSize-1) != 0 {
1116 throw("greyobject: obj not pointer-aligned")
1118 mbits := span.markBitsForIndex(objIndex)
1120 if useCheckmark {
1121 if !mbits.isMarked() {
1122 // Stack scanning is conservative, so we can
1123 // see a reference to an object not previously
1124 // found. Assume the object was correctly not
1125 // marked and ignore the pointer.
1126 if forStack {
1127 return
1129 printlock()
1130 print("runtime:greyobject: checkmarks finds unexpected unmarked object obj=", hex(obj), "\n")
1131 print("runtime: found obj at *(", hex(base), "+", hex(off), ")\n")
1133 // Dump the source (base) object
1134 gcDumpObject("base", base, off)
1136 // Dump the object
1137 gcDumpObject("obj", obj, ^uintptr(0))
1139 getg().m.traceback = 2
1140 throw("checkmark found unmarked object")
1142 if hbits.isCheckmarked(span.elemsize) {
1143 return
1145 hbits.setCheckmarked(span.elemsize)
1146 if !hbits.isCheckmarked(span.elemsize) {
1147 throw("setCheckmarked and isCheckmarked disagree")
1149 } else {
1150 // Stack scanning is conservative, so we can see a
1151 // pointer to a free object. Assume the object was
1152 // correctly freed and we must ignore the pointer.
1153 if forStack && span.isFree(objIndex) {
1154 return
1157 if debug.gccheckmark > 0 && span.isFree(objIndex) {
1158 print("runtime: marking free object ", hex(obj), " found at *(", hex(base), "+", hex(off), ")\n")
1159 gcDumpObject("base", base, off)
1160 gcDumpObject("obj", obj, ^uintptr(0))
1161 getg().m.traceback = 2
1162 throw("marking free object")
1165 // If marked we have nothing to do.
1166 if mbits.isMarked() {
1167 return
1169 // mbits.setMarked() // Avoid extra call overhead with manual inlining.
1170 atomic.Or8(mbits.bytep, mbits.mask)
1171 // If this is a noscan object, fast-track it to black
1172 // instead of greying it.
1173 if span.spanclass.noscan() {
1174 gcw.bytesMarked += uint64(span.elemsize)
1175 return
1179 // Queue the obj for scanning. The PREFETCH(obj) logic has been removed but
1180 // seems like a nice optimization that can be added back in.
1181 // There needs to be time between the PREFETCH and the use.
1182 // Previously we put the obj in an 8 element buffer that is drained at a rate
1183 // to give the PREFETCH time to do its work.
1184 // Use of PREFETCHNTA might be more appropriate than PREFETCH
1185 if !gcw.putFast(obj) {
1186 gcw.put(obj)
1190 // gcDumpObject dumps the contents of obj for debugging and marks the
1191 // field at byte offset off in obj.
1192 func gcDumpObject(label string, obj, off uintptr) {
1193 if obj < mheap_.arena_start || obj >= mheap_.arena_used {
1194 print(label, "=", hex(obj), " is not in the Go heap\n")
1195 return
1197 k := obj >> _PageShift
1198 x := k
1199 x -= mheap_.arena_start >> _PageShift
1200 s := mheap_.spans[x]
1201 print(label, "=", hex(obj), " k=", hex(k))
1202 if s == nil {
1203 print(" s=nil\n")
1204 return
1206 print(" s.base()=", hex(s.base()), " s.limit=", hex(s.limit), " s.spanclass=", s.spanclass, " s.elemsize=", s.elemsize, " s.state=")
1207 if 0 <= s.state && int(s.state) < len(mSpanStateNames) {
1208 print(mSpanStateNames[s.state], "\n")
1209 } else {
1210 print("unknown(", s.state, ")\n")
1213 skipped := false
1214 size := s.elemsize
1215 if s.state == _MSpanManual && size == 0 {
1216 // We're printing something from a stack frame. We
1217 // don't know how big it is, so just show up to an
1218 // including off.
1219 size = off + sys.PtrSize
1221 for i := uintptr(0); i < size; i += sys.PtrSize {
1222 // For big objects, just print the beginning (because
1223 // that usually hints at the object's type) and the
1224 // fields around off.
1225 if !(i < 128*sys.PtrSize || off-16*sys.PtrSize < i && i < off+16*sys.PtrSize) {
1226 skipped = true
1227 continue
1229 if skipped {
1230 print(" ...\n")
1231 skipped = false
1233 print(" *(", label, "+", i, ") = ", hex(*(*uintptr)(unsafe.Pointer(obj + i))))
1234 if i == off {
1235 print(" <==")
1237 print("\n")
1239 if skipped {
1240 print(" ...\n")
1244 // gcmarknewobject marks a newly allocated object black. obj must
1245 // not contain any non-nil pointers.
1247 // This is nosplit so it can manipulate a gcWork without preemption.
1249 //go:nowritebarrier
1250 //go:nosplit
1251 func gcmarknewobject(obj, size, scanSize uintptr) {
1252 if useCheckmark && !gcBlackenPromptly { // The world should be stopped so this should not happen.
1253 throw("gcmarknewobject called while doing checkmark")
1255 markBitsForAddr(obj).setMarked()
1256 gcw := &getg().m.p.ptr().gcw
1257 gcw.bytesMarked += uint64(size)
1258 gcw.scanWork += int64(scanSize)
1259 if gcBlackenPromptly {
1260 // There shouldn't be anything in the work queue, but
1261 // we still need to flush stats.
1262 gcw.dispose()
1266 // gcMarkTinyAllocs greys all active tiny alloc blocks.
1268 // The world must be stopped.
1269 func gcMarkTinyAllocs() {
1270 for _, p := range allp {
1271 c := p.mcache
1272 if c == nil || c.tiny == 0 {
1273 continue
1275 _, hbits, span, objIndex := heapBitsForObject(c.tiny, 0, 0, false)
1276 gcw := &p.gcw
1277 greyobject(c.tiny, 0, 0, hbits, span, gcw, objIndex, false)
1278 if gcBlackenPromptly {
1279 gcw.dispose()
1284 // Checkmarking
1286 // To help debug the concurrent GC we remark with the world
1287 // stopped ensuring that any object encountered has their normal
1288 // mark bit set. To do this we use an orthogonal bit
1289 // pattern to indicate the object is marked. The following pattern
1290 // uses the upper two bits in the object's boundary nibble.
1291 // 01: scalar not marked
1292 // 10: pointer not marked
1293 // 11: pointer marked
1294 // 00: scalar marked
1295 // Xoring with 01 will flip the pattern from marked to unmarked and vica versa.
1296 // The higher bit is 1 for pointers and 0 for scalars, whether the object
1297 // is marked or not.
1298 // The first nibble no longer holds the typeDead pattern indicating that the
1299 // there are no more pointers in the object. This information is held
1300 // in the second nibble.
1302 // If useCheckmark is true, marking of an object uses the
1303 // checkmark bits (encoding above) instead of the standard
1304 // mark bits.
1305 var useCheckmark = false
1307 //go:nowritebarrier
1308 func initCheckmarks() {
1309 useCheckmark = true
1310 for _, s := range mheap_.allspans {
1311 if s.state == _MSpanInUse {
1312 heapBitsForSpan(s.base()).initCheckmarkSpan(s.layout())
1317 func clearCheckmarks() {
1318 useCheckmark = false
1319 for _, s := range mheap_.allspans {
1320 if s.state == _MSpanInUse {
1321 heapBitsForSpan(s.base()).clearCheckmarkSpan(s.layout())