1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Garbage collector: type and heap bitmaps.
7 // Stack, data, and bss bitmaps
9 // Stack frames and global variables in the data and bss sections are
10 // described by bitmaps with 1 bit per pointer-sized word. A "1" bit
11 // means the word is a live pointer to be visited by the GC (referred to
12 // as "pointer"). A "0" bit means the word should be ignored by GC
13 // (referred to as "scalar", though it could be a dead pointer value).
17 // The heap bitmap comprises 2 bits for each pointer-sized word in the heap,
18 // stored in the heapArena metadata backing each heap arena.
19 // That is, if ha is the heapArena for the arena starting a start,
20 // then ha.bitmap[0] holds the 2-bit entries for the four words start
21 // through start+3*ptrSize, ha.bitmap[1] holds the entries for
22 // start+4*ptrSize through start+7*ptrSize, and so on.
24 // In each 2-bit entry, the lower bit is a pointer/scalar bit, just
25 // like in the stack/data bitmaps described above. The upper bit
26 // indicates scan/dead: a "1" value ("scan") indicates that there may
27 // be pointers in later words of the allocation, and a "0" value
28 // ("dead") indicates there are no more pointers in the allocation. If
29 // the upper bit is 0, the lower bit must also be 0, and this
30 // indicates scanning can ignore the rest of the allocation.
32 // The 2-bit entries are split when written into the byte, so that the top half
33 // of the byte contains 4 high (scan) bits and the bottom half contains 4 low
34 // (pointer) bits. This form allows a copy from the 1-bit to the 4-bit form to
35 // keep the pointer bits contiguous, instead of having to space them out.
37 // The code makes use of the fact that the zero value for a heap
38 // bitmap means scalar/dead. This property must be preserved when
39 // modifying the encoding.
41 // The bitmap for noscan spans is not maintained. Code must ensure
42 // that an object is scannable before consulting its bitmap by
43 // checking either the noscan bit in the span or by consulting its
44 // type's information.
50 "runtime/internal/atomic"
51 "runtime/internal/sys"
59 heapBitsShift
= 1 // shift offset between successive bitPointer or bitScan entries
60 wordsPerBitmapByte
= 8 / 2 // heap words described by one bitmap byte
62 // all scan/pointer bits in a byte
63 bitScanAll
= bitScan | bitScan
<<heapBitsShift | bitScan
<<(2*heapBitsShift
) | bitScan
<<(3*heapBitsShift
)
64 bitPointerAll
= bitPointer | bitPointer
<<heapBitsShift | bitPointer
<<(2*heapBitsShift
) | bitPointer
<<(3*heapBitsShift
)
67 // addb returns the byte pointer p+n.
70 func addb(p
*byte, n
uintptr) *byte {
71 // Note: wrote out full expression instead of calling add(p, n)
72 // to reduce the number of temporaries generated by the
73 // compiler for this trivial expression during inlining.
74 return (*byte)(unsafe
.Pointer(uintptr(unsafe
.Pointer(p
)) + n
))
77 // subtractb returns the byte pointer p-n.
80 func subtractb(p
*byte, n
uintptr) *byte {
81 // Note: wrote out full expression instead of calling add(p, -n)
82 // to reduce the number of temporaries generated by the
83 // compiler for this trivial expression during inlining.
84 return (*byte)(unsafe
.Pointer(uintptr(unsafe
.Pointer(p
)) - n
))
87 // add1 returns the byte pointer p+1.
90 func add1(p
*byte) *byte {
91 // Note: wrote out full expression instead of calling addb(p, 1)
92 // to reduce the number of temporaries generated by the
93 // compiler for this trivial expression during inlining.
94 return (*byte)(unsafe
.Pointer(uintptr(unsafe
.Pointer(p
)) + 1))
97 // subtract1 returns the byte pointer p-1.
100 // nosplit because it is used during write barriers and must not be preempted.
102 func subtract1(p
*byte) *byte {
103 // Note: wrote out full expression instead of calling subtractb(p, 1)
104 // to reduce the number of temporaries generated by the
105 // compiler for this trivial expression during inlining.
106 return (*byte)(unsafe
.Pointer(uintptr(unsafe
.Pointer(p
)) - 1))
109 // heapBits provides access to the bitmap bits for a single heap word.
110 // The methods on heapBits take value receivers so that the compiler
111 // can more easily inline calls to those methods and registerize the
112 // struct fields independently.
113 type heapBits
struct {
116 arena
uint32 // Index of heap arena containing bitp
117 last
*uint8 // Last byte arena's bitmap
120 // Make the compiler check that heapBits.arena is large enough to hold
121 // the maximum arena frame number.
122 var _
= heapBits
{arena
: (1<<heapAddrBits
)/heapArenaBytes
- 1}
124 // markBits provides access to the mark bit for an object in the heap.
125 // bytep points to the byte holding the mark bit.
126 // mask is a byte with a single bit set that can be &ed with *bytep
127 // to see if the bit has been set.
128 // *m.byte&m.mask != 0 indicates the mark bit is set.
129 // index can be used along with span information to generate
130 // the address of the object in the heap.
131 // We maintain one set of mark bits for allocation and one for
133 type markBits
struct {
140 func (s
*mspan
) allocBitsForIndex(allocBitIndex
uintptr) markBits
{
141 bytep
, mask
:= s
.allocBits
.bitp(allocBitIndex
)
142 return markBits
{bytep
, mask
, allocBitIndex
}
145 // refillAllocCache takes 8 bytes s.allocBits starting at whichByte
146 // and negates them so that ctz (count trailing zeros) instructions
147 // can be used. It then places these 8 bytes into the cached 64 bit
149 func (s
*mspan
) refillAllocCache(whichByte
uintptr) {
150 bytes
:= (*[8]uint8)(unsafe
.Pointer(s
.allocBits
.bytep(whichByte
)))
152 aCache |
= uint64(bytes
[0])
153 aCache |
= uint64(bytes
[1]) << (1 * 8)
154 aCache |
= uint64(bytes
[2]) << (2 * 8)
155 aCache |
= uint64(bytes
[3]) << (3 * 8)
156 aCache |
= uint64(bytes
[4]) << (4 * 8)
157 aCache |
= uint64(bytes
[5]) << (5 * 8)
158 aCache |
= uint64(bytes
[6]) << (6 * 8)
159 aCache |
= uint64(bytes
[7]) << (7 * 8)
160 s
.allocCache
= ^aCache
163 // nextFreeIndex returns the index of the next free object in s at
164 // or after s.freeindex.
165 // There are hardware instructions that can be used to make this
166 // faster if profiling warrants it.
167 func (s
*mspan
) nextFreeIndex() uintptr {
168 sfreeindex
:= s
.freeindex
170 if sfreeindex
== snelems
{
173 if sfreeindex
> snelems
{
174 throw("s.freeindex > s.nelems")
177 aCache
:= s
.allocCache
179 bitIndex
:= sys
.Ctz64(aCache
)
181 // Move index to start of next cached bits.
182 sfreeindex
= (sfreeindex
+ 64) &^ (64 - 1)
183 if sfreeindex
>= snelems
{
184 s
.freeindex
= snelems
187 whichByte
:= sfreeindex
/ 8
188 // Refill s.allocCache with the next 64 alloc bits.
189 s
.refillAllocCache(whichByte
)
190 aCache
= s
.allocCache
191 bitIndex
= sys
.Ctz64(aCache
)
192 // nothing available in cached bits
193 // grab the next 8 bytes and try again.
195 result
:= sfreeindex
+ uintptr(bitIndex
)
196 if result
>= snelems
{
197 s
.freeindex
= snelems
201 s
.allocCache
>>= uint(bitIndex
+ 1)
202 sfreeindex
= result
+ 1
204 if sfreeindex%64
== 0 && sfreeindex
!= snelems
{
205 // We just incremented s.freeindex so it isn't 0.
206 // As each 1 in s.allocCache was encountered and used for allocation
207 // it was shifted away. At this point s.allocCache contains all 0s.
208 // Refill s.allocCache so that it corresponds
209 // to the bits at s.allocBits starting at s.freeindex.
210 whichByte
:= sfreeindex
/ 8
211 s
.refillAllocCache(whichByte
)
213 s
.freeindex
= sfreeindex
217 // isFree reports whether the index'th object in s is unallocated.
219 // The caller must ensure s.state is mSpanInUse, and there must have
220 // been no preemption points since ensuring this (which could allow a
221 // GC transition, which would allow the state to change).
222 func (s
*mspan
) isFree(index
uintptr) bool {
223 if index
< s
.freeindex
{
226 bytep
, mask
:= s
.allocBits
.bitp(index
)
227 return *bytep
&mask
== 0
230 // divideByElemSize returns n/s.elemsize.
231 // n must be within [0, s.npages*_PageSize),
232 // or may be exactly s.npages*_PageSize
233 // if s.elemsize is from sizeclasses.go.
234 func (s
*mspan
) divideByElemSize(n
uintptr) uintptr {
235 const doubleCheck
= false
237 // See explanation in mksizeclasses.go's computeDivMagic.
238 q
:= uintptr((uint64(n
) * uint64(s
.divMul
)) >> 32)
240 if doubleCheck
&& q
!= n
/s
.elemsize
{
241 println(n
, "/", s
.elemsize
, "should be", n
/s
.elemsize
, "but got", q
)
242 throw("bad magic division")
247 func (s
*mspan
) objIndex(p
uintptr) uintptr {
248 return s
.divideByElemSize(p
- s
.base())
251 func markBitsForAddr(p
uintptr) markBits
{
253 objIndex
:= s
.objIndex(p
)
254 return s
.markBitsForIndex(objIndex
)
257 func (s
*mspan
) markBitsForIndex(objIndex
uintptr) markBits
{
258 bytep
, mask
:= s
.gcmarkBits
.bitp(objIndex
)
259 return markBits
{bytep
, mask
, objIndex
}
262 func (s
*mspan
) markBitsForBase() markBits
{
263 return markBits
{(*uint8)(s
.gcmarkBits
), uint8(1), 0}
266 // isMarked reports whether mark bit m is set.
267 func (m markBits
) isMarked() bool {
268 return *m
.bytep
&m
.mask
!= 0
271 // setMarked sets the marked bit in the markbits, atomically.
272 func (m markBits
) setMarked() {
273 // Might be racing with other updates, so use atomic update always.
274 // We used to be clever here and use a non-atomic update in certain
275 // cases, but it's not worth the risk.
276 atomic
.Or8(m
.bytep
, m
.mask
)
279 // setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.
280 func (m markBits
) setMarkedNonAtomic() {
284 // clearMarked clears the marked bit in the markbits, atomically.
285 func (m markBits
) clearMarked() {
286 // Might be racing with other updates, so use atomic update always.
287 // We used to be clever here and use a non-atomic update in certain
288 // cases, but it's not worth the risk.
289 atomic
.And8(m
.bytep
, ^m
.mask
)
292 // markBitsForSpan returns the markBits for the span base address base.
293 func markBitsForSpan(base
uintptr) (mbits markBits
) {
294 mbits
= markBitsForAddr(base
)
296 throw("markBitsForSpan: unaligned start")
301 // advance advances the markBits to the next object in the span.
302 func (m
*markBits
) advance() {
304 m
.bytep
= (*uint8)(unsafe
.Pointer(uintptr(unsafe
.Pointer(m
.bytep
)) + 1))
312 // heapBitsForAddr returns the heapBits for the address addr.
313 // The caller must ensure addr is in an allocated span.
314 // In particular, be careful not to point past the end of an object.
316 // nosplit because it is used during write barriers and must not be preempted.
318 func heapBitsForAddr(addr
uintptr) (h heapBits
) {
319 // 2 bits per word, 4 pairs per byte, and a mask is hard coded.
320 arena
:= arenaIndex(addr
)
321 ha
:= mheap_
.arenas
[arena
.l1()][arena
.l2()]
322 // The compiler uses a load for nil checking ha, but in this
323 // case we'll almost never hit that cache line again, so it
324 // makes more sense to do a value check.
326 // addr is not in the heap. Return nil heapBits, which
327 // we expect to crash in the caller.
330 h
.bitp
= &ha
.bitmap
[(addr
/(goarch
.PtrSize
*4))%heapArenaBitmapBytes
]
331 h
.shift
= uint32((addr
/ goarch
.PtrSize
) & 3)
332 h
.arena
= uint32(arena
)
333 h
.last
= &ha
.bitmap
[len(ha
.bitmap
)-1]
337 // clobberdeadPtr is a special value that is used by the compiler to
338 // clobber dead stack slots, when -clobberdead flag is set.
339 const clobberdeadPtr
= uintptr(0xdeaddead |
0xdeaddead<<((^uintptr(0)>>63)*32))
341 // badPointer throws bad pointer in heap panic.
342 func badPointer(s
*mspan
, p
, refBase
, refOff
uintptr) {
343 // Typically this indicates an incorrect use
344 // of unsafe or cgo to store a bad pointer in
345 // the Go heap. It may also indicate a runtime
348 // TODO(austin): We could be more aggressive
349 // and detect pointers to unallocated objects
350 // in allocated spans.
352 print("runtime: pointer ", hex(p
))
354 state
:= s
.state
.get()
355 if state
!= mSpanInUse
{
356 print(" to unallocated span")
358 print(" to unused region of span")
360 print(" span.base()=", hex(s
.base()), " span.limit=", hex(s
.limit
), " span.state=", state
)
364 print("runtime: found in object at *(", hex(refBase
), "+", hex(refOff
), ")\n")
365 gcDumpObject("object", refBase
, refOff
)
367 getg().m
.traceback
= 2
368 throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")
371 // findObject returns the base address for the heap object containing
372 // the address p, the object's span, and the index of the object in s.
373 // If p does not point into a heap object, it returns base == 0.
375 // If p points is an invalid heap pointer and debug.invalidptr != 0,
376 // findObject panics.
378 // For gccgo, the forStack parameter is true if the value came from the stack.
379 // The stack is collected conservatively and may contain invalid pointers.
381 // refBase and refOff optionally give the base address of the object
382 // in which the pointer p was found and the byte offset at which it
383 // was found. These are used for error reporting.
385 // It is nosplit so it is safe for p to be a pointer to the current goroutine's stack.
386 // Since p is a uintptr, it would not be adjusted if the stack were to move.
388 func findObject(p
, refBase
, refOff
uintptr, forStack
bool) (base
uintptr, s
*mspan
, objIndex
uintptr) {
390 // If s is nil, the virtual address has never been part of the heap.
391 // This pointer may be to some mmap'd region, so we allow it.
393 if (GOARCH
== "amd64" || GOARCH
== "arm64") && p
== clobberdeadPtr
&& debug
.invalidptr
!= 0 {
394 // Crash if clobberdeadPtr is seen. Only on AMD64 and ARM64 for now,
395 // as they are the only platform where compiler's clobberdead mode is
396 // implemented. On these platforms clobberdeadPtr cannot be a valid address.
397 badPointer(s
, p
, refBase
, refOff
)
401 // If p is a bad pointer, it may not be in s's bounds.
403 // Check s.state to synchronize with span initialization
404 // before checking other fields. See also spanOfHeap.
405 if state
:= s
.state
.get(); state
!= mSpanInUse || p
< s
.base() || p
>= s
.limit
{
406 // Pointers into stacks are also ok, the runtime manages these explicitly.
407 if state
== mSpanManual || forStack
{
410 // The following ensures that we are rigorous about what data
411 // structures hold valid pointers.
412 if debug
.invalidptr
!= 0 {
413 badPointer(s
, p
, refBase
, refOff
)
419 // A span can be entered in mheap_.spans, and be set
420 // to mSpanInUse, before it is fully initialized.
421 // All we need in practice is allocBits and gcmarkBits,
422 // so make sure they are set.
423 if s
.allocBits
== nil || s
.gcmarkBits
== nil {
428 objIndex
= s
.objIndex(p
)
429 base
= s
.base() + objIndex
*s
.elemsize
433 // verifyNotInHeapPtr reports whether converting the not-in-heap pointer into a unsafe.Pointer is ok.
434 //go:linkname reflect_verifyNotInHeapPtr reflect.verifyNotInHeapPtr
435 func reflect_verifyNotInHeapPtr(p
uintptr) bool {
436 // Conversion to a pointer is ok as long as findObject above does not call badPointer.
437 // Since we're already promised that p doesn't point into the heap, just disallow heap
438 // pointers and the special clobbered pointer.
439 return spanOf(p
) == nil && p
!= clobberdeadPtr
442 // next returns the heapBits describing the next pointer-sized word in memory.
443 // That is, if h describes address p, h.next() describes p+ptrSize.
444 // Note that next does not modify h. The caller must record the result.
446 // nosplit because it is used during write barriers and must not be preempted.
448 func (h heapBits
) next() heapBits
{
449 if h
.shift
< 3*heapBitsShift
{
450 h
.shift
+= heapBitsShift
451 } else if h
.bitp
!= h
.last
{
452 h
.bitp
, h
.shift
= add1(h
.bitp
), 0
454 // Move to the next arena.
460 // nextArena advances h to the beginning of the next heap arena.
462 // This is a slow-path helper to next. gc's inliner knows that
463 // heapBits.next can be inlined even though it calls this. This is
464 // marked noinline so it doesn't get inlined into next and cause next
465 // to be too big to inline.
469 func (h heapBits
) nextArena() heapBits
{
471 ai
:= arenaIdx(h
.arena
)
472 l2
:= mheap_
.arenas
[ai
.l1()]
474 // We just passed the end of the object, which
475 // was also the end of the heap. Poison h. It
476 // should never be dereferenced at this point.
483 h
.bitp
, h
.shift
= &ha
.bitmap
[0], 0
484 h
.last
= &ha
.bitmap
[len(ha
.bitmap
)-1]
488 // forward returns the heapBits describing n pointer-sized words ahead of h in memory.
489 // That is, if h describes address p, h.forward(n) describes p+n*ptrSize.
490 // h.forward(1) is equivalent to h.next(), just slower.
491 // Note that forward does not modify h. The caller must record the result.
492 // bits returns the heap bits for the current word.
494 func (h heapBits
) forward(n
uintptr) heapBits
{
495 n
+= uintptr(h
.shift
) / heapBitsShift
496 nbitp
:= uintptr(unsafe
.Pointer(h
.bitp
)) + n
/4
497 h
.shift
= uint32(n%4
) * heapBitsShift
498 if nbitp
<= uintptr(unsafe
.Pointer(h
.last
)) {
499 h
.bitp
= (*uint8)(unsafe
.Pointer(nbitp
))
503 // We're in a new heap arena.
504 past
:= nbitp
- (uintptr(unsafe
.Pointer(h
.last
)) + 1)
505 h
.arena
+= 1 + uint32(past
/heapArenaBitmapBytes
)
506 ai
:= arenaIdx(h
.arena
)
507 if l2
:= mheap_
.arenas
[ai
.l1()]; l2
!= nil && l2
[ai
.l2()] != nil {
509 h
.bitp
= &a
.bitmap
[past%heapArenaBitmapBytes
]
510 h
.last
= &a
.bitmap
[len(a
.bitmap
)-1]
512 h
.bitp
, h
.last
= nil, nil
517 // forwardOrBoundary is like forward, but stops at boundaries between
518 // contiguous sections of the bitmap. It returns the number of words
519 // advanced over, which will be <= n.
520 func (h heapBits
) forwardOrBoundary(n
uintptr) (heapBits
, uintptr) {
521 maxn
:= 4 * ((uintptr(unsafe
.Pointer(h
.last
)) + 1) - uintptr(unsafe
.Pointer(h
.bitp
)))
525 return h
.forward(n
), n
528 // The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.
529 // The result includes in its higher bits the bits for subsequent words
530 // described by the same bitmap byte.
532 // nosplit because it is used during write barriers and must not be preempted.
534 func (h heapBits
) bits() uint32 {
535 // The (shift & 31) eliminates a test and conditional branch
536 // from the generated code.
537 return uint32(*h
.bitp
) >> (h
.shift
& 31)
540 // morePointers reports whether this word and all remaining words in this object
542 // h must not describe the second word of the object.
543 func (h heapBits
) morePointers() bool {
544 return h
.bits()&bitScan
!= 0
547 // isPointer reports whether the heap bits describe a pointer word.
549 // nosplit because it is used during write barriers and must not be preempted.
551 func (h heapBits
) isPointer() bool {
552 return h
.bits()&bitPointer
!= 0
555 // bulkBarrierPreWrite executes a write barrier
556 // for every pointer slot in the memory range [src, src+size),
557 // using pointer/scalar information from [dst, dst+size).
558 // This executes the write barriers necessary before a memmove.
559 // src, dst, and size must be pointer-aligned.
560 // The range [dst, dst+size) must lie within a single object.
561 // It does not perform the actual writes.
563 // As a special case, src == 0 indicates that this is being used for a
564 // memclr. bulkBarrierPreWrite will pass 0 for the src of each write
567 // Callers should call bulkBarrierPreWrite immediately before
568 // calling memmove(dst, src, size). This function is marked nosplit
569 // to avoid being preempted; the GC must not stop the goroutine
570 // between the memmove and the execution of the barriers.
571 // The caller is also responsible for cgo pointer checks if this
572 // may be writing Go pointers into non-Go memory.
574 // The pointer bitmap is not maintained for allocations containing
575 // no pointers at all; any caller of bulkBarrierPreWrite must first
576 // make sure the underlying allocation contains pointers, usually
577 // by checking typ.ptrdata.
579 // Callers must perform cgo checks if writeBarrier.cgo.
582 func bulkBarrierPreWrite(dst
, src
, size
uintptr) {
583 if (dst|src|size
)&(goarch
.PtrSize
-1) != 0 {
584 throw("bulkBarrierPreWrite: unaligned arguments")
586 if !writeBarrier
.needed
{
589 if s
:= spanOf(dst
); s
== nil {
590 // If dst is a global, use the data or BSS bitmaps to
591 // execute write barriers.
593 hi
:= len(gcRootsIndex
)
596 pr
:= gcRootsIndex
[m
]
597 addr
:= uintptr(pr
.decl
)
598 if addr
<= dst
&& dst
< addr
+pr
.size
{
599 if dst
< addr
+pr
.ptrdata
{
600 bulkBarrierBitmap(dst
, src
, size
, dst
-addr
, pr
.gcdata
)
611 } else if s
.state
.get() != mSpanInUse || dst
< s
.base() || s
.limit
<= dst
{
612 // dst was heap memory at some point, but isn't now.
613 // It can't be a global. It must be either our stack,
614 // or in the case of direct channel sends, it could be
615 // another stack. Either way, no need for barriers.
616 // This will also catch if dst is in a freed span,
617 // though that should never have.
621 buf
:= &getg().m
.p
.ptr().wbBuf
622 h
:= heapBitsForAddr(dst
)
624 for i
:= uintptr(0); i
< size
; i
+= goarch
.PtrSize
{
626 dstx
:= (*uintptr)(unsafe
.Pointer(dst
+ i
))
627 if !buf
.putFast(*dstx
, 0) {
634 for i
:= uintptr(0); i
< size
; i
+= goarch
.PtrSize
{
636 dstx
:= (*uintptr)(unsafe
.Pointer(dst
+ i
))
637 srcx
:= (*uintptr)(unsafe
.Pointer(src
+ i
))
638 if !buf
.putFast(*dstx
, *srcx
) {
647 // bulkBarrierPreWriteSrcOnly is like bulkBarrierPreWrite but
648 // does not execute write barriers for [dst, dst+size).
650 // In addition to the requirements of bulkBarrierPreWrite
651 // callers need to ensure [dst, dst+size) is zeroed.
653 // This is used for special cases where e.g. dst was just
654 // created and zeroed with malloc.
656 func bulkBarrierPreWriteSrcOnly(dst
, src
, size
uintptr) {
657 if (dst|src|size
)&(goarch
.PtrSize
-1) != 0 {
658 throw("bulkBarrierPreWrite: unaligned arguments")
660 if !writeBarrier
.needed
{
663 buf
:= &getg().m
.p
.ptr().wbBuf
664 h
:= heapBitsForAddr(dst
)
665 for i
:= uintptr(0); i
< size
; i
+= goarch
.PtrSize
{
667 srcx
:= (*uintptr)(unsafe
.Pointer(src
+ i
))
668 if !buf
.putFast(0, *srcx
) {
676 // bulkBarrierBitmap executes write barriers for copying from [src,
677 // src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is
678 // assumed to start maskOffset bytes into the data covered by the
679 // bitmap in bits (which may not be a multiple of 8).
681 // This is used by bulkBarrierPreWrite for writes to data and BSS.
684 func bulkBarrierBitmap(dst
, src
, size
, maskOffset
uintptr, bits
*uint8) {
685 word
:= maskOffset
/ goarch
.PtrSize
686 bits
= addb(bits
, word
/8)
687 mask
:= uint8(1) << (word
% 8)
689 buf
:= &getg().m
.p
.ptr().wbBuf
690 for i
:= uintptr(0); i
< size
; i
+= goarch
.PtrSize
{
695 i
+= 7 * goarch
.PtrSize
701 dstx
:= (*uintptr)(unsafe
.Pointer(dst
+ i
))
703 if !buf
.putFast(*dstx
, 0) {
707 srcx
:= (*uintptr)(unsafe
.Pointer(src
+ i
))
708 if !buf
.putFast(*dstx
, *srcx
) {
717 // typeBitsBulkBarrier executes a write barrier for every
718 // pointer that would be copied from [src, src+size) to [dst,
719 // dst+size) by a memmove using the type bitmap to locate those
722 // The type typ must correspond exactly to [src, src+size) and [dst, dst+size).
723 // dst, src, and size must be pointer-aligned.
724 // The type typ must have a plain bitmap, not a GC program.
725 // The only use of this function is in channel sends, and the
726 // 64 kB channel element limit takes care of this for us.
728 // Must not be preempted because it typically runs right before memmove,
729 // and the GC must observe them as an atomic action.
731 // Callers must perform cgo checks if writeBarrier.cgo.
734 func typeBitsBulkBarrier(typ
*_type
, dst
, src
, size
uintptr) {
736 throw("runtime: typeBitsBulkBarrier without type")
738 if typ
.size
!= size
{
739 println("runtime: typeBitsBulkBarrier with type ", typ
.string(), " of size ", typ
.size
, " but memory size", size
)
740 throw("runtime: invalid typeBitsBulkBarrier")
742 if typ
.kind
&kindGCProg
!= 0 {
743 println("runtime: typeBitsBulkBarrier with type ", typ
.string(), " with GC prog")
744 throw("runtime: invalid typeBitsBulkBarrier")
746 if !writeBarrier
.needed
{
749 ptrmask
:= typ
.gcdata
750 buf
:= &getg().m
.p
.ptr().wbBuf
752 for i
:= uintptr(0); i
< typ
.ptrdata
; i
+= goarch
.PtrSize
{
753 if i
&(goarch
.PtrSize
*8-1) == 0 {
754 bits
= uint32(*ptrmask
)
755 ptrmask
= addb(ptrmask
, 1)
760 dstx
:= (*uintptr)(unsafe
.Pointer(dst
+ i
))
761 srcx
:= (*uintptr)(unsafe
.Pointer(src
+ i
))
762 if !buf
.putFast(*dstx
, *srcx
) {
769 // The methods operating on spans all require that h has been returned
770 // by heapBitsForSpan and that size, n, total are the span layout description
771 // returned by the mspan's layout method.
772 // If total > size*n, it means that there is extra leftover memory in the span,
773 // usually due to rounding.
775 // TODO(rsc): Perhaps introduce a different heapBitsSpan type.
777 // initSpan initializes the heap bitmap for a span.
778 // If this is a span of pointer-sized objects, it initializes all
779 // words to pointer/scan.
780 // Otherwise, it initializes all words to scalar/dead.
781 func (h heapBits
) initSpan(s
*mspan
) {
782 // Clear bits corresponding to objects.
783 nw
:= (s
.npages
<< _PageShift
) / goarch
.PtrSize
784 if nw%wordsPerBitmapByte
!= 0 {
785 throw("initSpan: unaligned length")
788 throw("initSpan: unaligned base")
790 isPtrs
:= goarch
.PtrSize
== 8 && s
.elemsize
== goarch
.PtrSize
792 hNext
, anw
:= h
.forwardOrBoundary(nw
)
793 nbyte
:= anw
/ wordsPerBitmapByte
796 for i
:= uintptr(0); i
< nbyte
; i
++ {
797 *bitp
= bitPointerAll | bitScanAll
801 memclrNoHeapPointers(unsafe
.Pointer(h
.bitp
), nbyte
)
808 // countAlloc returns the number of objects allocated in span s by
809 // scanning the allocation bitmap.
810 func (s
*mspan
) countAlloc() int {
812 bytes
:= divRoundUp(s
.nelems
, 8)
813 // Iterate over each 8-byte chunk and count allocations
814 // with an intrinsic. Note that newMarkBits guarantees that
815 // gcmarkBits will be 8-byte aligned, so we don't have to
816 // worry about edge cases, irrelevant bits will simply be zero.
817 for i
:= uintptr(0); i
< bytes
; i
+= 8 {
818 // Extract 64 bits from the byte pointer and get a OnesCount.
819 // Note that the unsafe cast here doesn't preserve endianness,
820 // but that's OK. We only care about how many bits are 1, not
821 // about the order we discover them in.
822 mrkBits
:= *(*uint64)(unsafe
.Pointer(s
.gcmarkBits
.bytep(i
)))
823 count
+= sys
.OnesCount64(mrkBits
)
828 // heapBitsSetType records that the new allocation [x, x+size)
829 // holds in [x, x+dataSize) one or more values of type typ.
830 // (The number of values is given by dataSize / typ.size.)
831 // If dataSize < size, the fragment [x+dataSize, x+size) is
832 // recorded as non-pointer data.
833 // It is known that the type has pointers somewhere;
834 // malloc does not call heapBitsSetType when there are no pointers,
835 // because all free objects are marked as noscan during
836 // heapBitsSweepSpan.
838 // There can only be one allocation from a given span active at a time,
839 // and the bitmap for a span always falls on byte boundaries,
840 // so there are no write-write races for access to the heap bitmap.
841 // Hence, heapBitsSetType can access the bitmap without atomics.
843 // There can be read-write races between heapBitsSetType and things
844 // that read the heap bitmap like scanobject. However, since
845 // heapBitsSetType is only used for objects that have not yet been
846 // made reachable, readers will ignore bits being modified by this
847 // function. This does mean this function cannot transiently modify
848 // bits that belong to neighboring objects. Also, on weakly-ordered
849 // machines, callers must execute a store/store (publication) barrier
850 // between calling this function and making the object reachable.
851 func heapBitsSetType(x
, size
, dataSize
uintptr, typ
*_type
) {
852 const doubleCheck
= false // slow but helpful; enable to test modifications to this code
855 mask1
= bitPointer | bitScan
// 00010001
856 mask2
= bitPointer | bitScan | mask1
<<heapBitsShift
// 00110011
857 mask3
= bitPointer | bitScan | mask2
<<heapBitsShift
// 01110111
860 // dataSize is always size rounded up to the next malloc size class,
861 // except in the case of allocating a defer block, in which case
862 // size is sizeof(_defer{}) (at least 6 words) and dataSize may be
863 // arbitrarily larger.
865 // The checks for size == goarch.PtrSize and size == 2*goarch.PtrSize can therefore
866 // assume that dataSize == size without checking it explicitly.
868 if goarch
.PtrSize
== 8 && size
== goarch
.PtrSize
{
869 // It's one word and it has pointers, it must be a pointer.
870 // Since all allocated one-word objects are pointers
871 // (non-pointers are aggregated into tinySize allocations),
872 // initSpan sets the pointer bits for us. Nothing to do here.
874 h
:= heapBitsForAddr(x
)
876 throw("heapBitsSetType: pointer bit missing")
878 if !h
.morePointers() {
879 throw("heapBitsSetType: scan bit missing")
885 h
:= heapBitsForAddr(x
)
886 ptrmask
:= typ
.gcdata
// start of 1-bit pointer mask (or GC program, handled below)
888 // 2-word objects only have 4 bitmap bits and 3-word objects only have 6 bitmap bits.
889 // Therefore, these objects share a heap bitmap byte with the objects next to them.
890 // These are called out as a special case primarily so the code below can assume all
891 // objects are at least 4 words long and that their bitmaps start either at the beginning
892 // of a bitmap byte, or half-way in (h.shift of 0 and 2 respectively).
894 if size
== 2*goarch
.PtrSize
{
895 if typ
.size
== goarch
.PtrSize
{
896 // We're allocating a block big enough to hold two pointers.
897 // On 64-bit, that means the actual object must be two pointers,
898 // or else we'd have used the one-pointer-sized block.
899 // On 32-bit, however, this is the 8-byte block, the smallest one.
900 // So it could be that we're allocating one pointer and this was
901 // just the smallest block available. Distinguish by checking dataSize.
902 // (In general the number of instances of typ being allocated is
903 // dataSize/typ.size.)
904 if goarch
.PtrSize
== 4 && dataSize
== goarch
.PtrSize
{
905 // 1 pointer object. On 32-bit machines clear the bit for the
906 // unused second word.
907 *h
.bitp
&^= (bitPointer | bitScan |
(bitPointer|bitScan
)<<heapBitsShift
) << h
.shift
908 *h
.bitp |
= (bitPointer | bitScan
) << h
.shift
910 // 2-element array of pointer.
911 *h
.bitp |
= (bitPointer | bitScan |
(bitPointer|bitScan
)<<heapBitsShift
) << h
.shift
915 // Otherwise typ.size must be 2*goarch.PtrSize,
916 // and typ.kind&kindGCProg == 0.
918 if typ
.size
!= 2*goarch
.PtrSize || typ
.kind
&kindGCProg
!= 0 {
919 print("runtime: heapBitsSetType size=", size
, " but typ.size=", typ
.size
, " gcprog=", typ
.kind
&kindGCProg
!= 0, "\n")
920 throw("heapBitsSetType")
923 b
:= uint32(*ptrmask
)
925 hb |
= bitScanAll
& ((bitScan
<< (typ
.ptrdata
/ goarch
.PtrSize
)) - 1)
926 // Clear the bits for this object so we can set the
928 *h
.bitp
&^= (bitPointer | bitScan |
((bitPointer | bitScan
) << heapBitsShift
)) << h
.shift
929 *h
.bitp |
= uint8(hb
<< h
.shift
)
931 } else if size
== 3*goarch
.PtrSize
{
935 println("runtime: invalid type ", typ
.string())
936 throw("heapBitsSetType: called with non-pointer type")
938 if goarch
.PtrSize
!= 8 {
939 throw("heapBitsSetType: unexpected 3 pointer wide size class on 32 bit")
941 if typ
.kind
&kindGCProg
!= 0 {
942 throw("heapBitsSetType: unexpected GC prog for 3 pointer wide size class")
944 if typ
.size
== 2*goarch
.PtrSize
{
945 print("runtime: heapBitsSetType size=", size
, " but typ.size=", typ
.size
, "\n")
946 throw("heapBitsSetType: inconsistent object sizes")
949 if typ
.size
== goarch
.PtrSize
{
950 // The type contains a pointer otherwise heapBitsSetType wouldn't have been called.
951 // Since the type is only 1 pointer wide and contains a pointer, its gcdata must be exactly 1.
952 if doubleCheck
&& *typ
.gcdata
!= 1 {
953 print("runtime: heapBitsSetType size=", size
, " typ.size=", typ
.size
, "but *typ.gcdata", *typ
.gcdata
, "\n")
954 throw("heapBitsSetType: unexpected gcdata for 1 pointer wide type size in 3 pointer wide size class")
956 // 3 element array of pointers. Unrolling ptrmask 3 times into p yields 00000111.
961 // Set bitScan bits for all pointers.
962 hb |
= hb
<< wordsPerBitmapByte
963 // First bitScan bit is always set since the type contains pointers.
965 // Second bitScan bit needs to also be set if the third bitScan bit is set.
966 hb |
= hb
& (bitScan
<< (2 * heapBitsShift
)) >> 1
968 // For h.shift > 1 heap bits cross a byte boundary and need to be written part
969 // to h.bitp and part to the next h.bitp.
972 *h
.bitp
&^= mask3
<< 0
975 *h
.bitp
&^= mask3
<< 1
978 *h
.bitp
&^= mask2
<< 2
979 *h
.bitp |
= (hb
& mask2
) << 2
980 // Two words written to the first byte.
981 // Advance two words to get to the next byte.
984 *h
.bitp |
= (hb
>> 2) & mask1
986 *h
.bitp
&^= mask1
<< 3
987 *h
.bitp |
= (hb
& mask1
) << 3
988 // One word written to the first byte.
989 // Advance one word to get to the next byte.
992 *h
.bitp |
= (hb
>> 1) & mask2
997 // Copy from 1-bit ptrmask into 2-bit bitmap.
998 // The basic approach is to use a single uintptr as a bit buffer,
999 // alternating between reloading the buffer and writing bitmap bytes.
1000 // In general, one load can supply two bitmap byte writes.
1001 // This is a lot of lines of code, but it compiles into relatively few
1002 // machine instructions.
1005 if arenaIndex(x
+size
-1) != arenaIdx(h
.arena
) ||
(doubleCheck
&& fastrandn(2) == 0) {
1006 // This object spans heap arenas, so the bitmap may be
1007 // discontiguous. Unroll it into the object instead
1008 // and then copy it out.
1010 // In doubleCheck mode, we randomly do this anyway to
1011 // stress test the bitmap copying path.
1013 h
.bitp
= (*uint8)(unsafe
.Pointer(x
))
1019 p
*byte // last ptrmask byte read
1020 b
uintptr // ptrmask bits already loaded
1021 nb
uintptr // number of bits in b at next read
1022 endp
*byte // final ptrmask byte to read (then repeat)
1023 endnb
uintptr // number of valid bits in *endp
1024 pbits
uintptr // alternate source of bits
1026 // Heap bitmap output.
1027 w
uintptr // words processed
1028 nw
uintptr // number of words to process
1029 hbitp
*byte // next heap bitmap byte to write
1030 hb
uintptr // bits being prepared for *hbitp
1035 // Handle GC program. Delayed until this part of the code
1036 // so that we can use the same double-checking mechanism
1037 // as the 1-bit case. Nothing above could have encountered
1038 // GC programs: the cases were all too small.
1039 if typ
.kind
&kindGCProg
!= 0 {
1040 heapBitsSetTypeGCProg(h
, typ
.ptrdata
, typ
.size
, dataSize
, size
, addb(typ
.gcdata
, 4))
1042 // Double-check the heap bits written by GC program
1043 // by running the GC program to create a 1-bit pointer mask
1044 // and then jumping to the double-check code below.
1045 // This doesn't catch bugs shared between the 1-bit and 4-bit
1046 // GC program execution, but it does catch mistakes specific
1047 // to just one of those and bugs in heapBitsSetTypeGCProg's
1048 // implementation of arrays.
1049 lock(&debugPtrmask
.lock
)
1050 if debugPtrmask
.data
== nil {
1051 debugPtrmask
.data
= (*byte)(persistentalloc(1<<20, 1, &memstats
.other_sys
))
1053 ptrmask
= debugPtrmask
.data
1054 runGCProg(addb(typ
.gcdata
, 4), nil, ptrmask
, 1)
1059 // Note about sizes:
1061 // typ.size is the number of words in the object,
1062 // and typ.ptrdata is the number of words in the prefix
1063 // of the object that contains pointers. That is, the final
1064 // typ.size - typ.ptrdata words contain no pointers.
1065 // This allows optimization of a common pattern where
1066 // an object has a small header followed by a large scalar
1067 // buffer. If we know the pointers are over, we don't have
1068 // to scan the buffer's heap bitmap at all.
1069 // The 1-bit ptrmasks are sized to contain only bits for
1070 // the typ.ptrdata prefix, zero padded out to a full byte
1071 // of bitmap. This code sets nw (below) so that heap bitmap
1072 // bits are only written for the typ.ptrdata prefix; if there is
1073 // more room in the allocated object, the next heap bitmap
1074 // entry is a 00, indicating that there are no more pointers
1075 // to scan. So only the ptrmask for the ptrdata bytes is needed.
1077 // Replicated copies are not as nice: if there is an array of
1078 // objects with scalar tails, all but the last tail does have to
1079 // be initialized, because there is no way to say "skip forward".
1080 // However, because of the possibility of a repeated type with
1081 // size not a multiple of 4 pointers (one heap bitmap byte),
1082 // the code already must handle the last ptrmask byte specially
1083 // by treating it as containing only the bits for endnb pointers,
1084 // where endnb <= 4. We represent large scalar tails that must
1085 // be expanded in the replication by setting endnb larger than 4.
1086 // This will have the effect of reading many bits out of b,
1087 // but once the real bits are shifted out, b will supply as many
1088 // zero bits as we try to read, which is exactly what we need.
1091 if typ
.size
< dataSize
{
1092 // Filling in bits for an array of typ.
1093 // Set up for repetition of ptrmask during main loop.
1094 // Note that ptrmask describes only a prefix of
1095 const maxBits
= goarch
.PtrSize
*8 - 7
1096 if typ
.ptrdata
/goarch
.PtrSize
<= maxBits
{
1097 // Entire ptrmask fits in uintptr with room for a byte fragment.
1098 // Load into pbits and never read from ptrmask again.
1099 // This is especially important when the ptrmask has
1100 // fewer than 8 bits in it; otherwise the reload in the middle
1101 // of the Phase 2 loop would itself need to loop to gather
1104 // Accumulate ptrmask into b.
1105 // ptrmask is sized to describe only typ.ptrdata, but we record
1106 // it as describing typ.size bytes, since all the high bits are zero.
1107 nb
= typ
.ptrdata
/ goarch
.PtrSize
1108 for i
:= uintptr(0); i
< nb
; i
+= 8 {
1109 b |
= uintptr(*p
) << i
1112 nb
= typ
.size
/ goarch
.PtrSize
1114 // Replicate ptrmask to fill entire pbits uintptr.
1115 // Doubling and truncating is fewer steps than
1116 // iterating by nb each time. (nb could be 1.)
1117 // Since we loaded typ.ptrdata/goarch.PtrSize bits
1118 // but are pretending to have typ.size/goarch.PtrSize,
1119 // there might be no replication necessary/possible.
1122 if nb
+nb
<= maxBits
{
1123 for endnb
<= goarch
.PtrSize
*8 {
1124 pbits |
= pbits
<< endnb
1127 // Truncate to a multiple of original ptrmask.
1128 // Because nb+nb <= maxBits, nb fits in a byte.
1129 // Byte division is cheaper than uintptr division.
1130 endnb
= uintptr(maxBits
/byte(nb
)) * nb
1131 pbits
&= 1<<endnb
- 1
1136 // Clear p and endp as sentinel for using pbits.
1137 // Checked during Phase 2 loop.
1141 // Ptrmask is larger. Read it multiple times.
1142 n
:= (typ
.ptrdata
/goarch
.PtrSize
+7)/8 - 1
1143 endp
= addb(ptrmask
, n
)
1144 endnb
= typ
.size
/goarch
.PtrSize
- n
*8
1153 if typ
.size
== dataSize
{
1154 // Single entry: can stop once we reach the non-pointer data.
1155 nw
= typ
.ptrdata
/ goarch
.PtrSize
1157 // Repeated instances of typ in an array.
1158 // Have to process first N-1 entries in full, but can stop
1159 // once we reach the non-pointer data in the final entry.
1160 nw
= ((dataSize
/typ
.size
-1)*typ
.size
+ typ
.ptrdata
) / goarch
.PtrSize
1163 // No pointers! Caller was supposed to check.
1164 println("runtime: invalid type ", typ
.string())
1165 throw("heapBitsSetType: called with non-pointer type")
1169 // Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2).
1170 // The leading byte is special because it contains the bits for word 1,
1171 // which does not have the scan bit set.
1172 // The leading half-byte is special because it's a half a byte,
1173 // so we have to be careful with the bits already there.
1176 throw("heapBitsSetType: unexpected shift")
1179 // Ptrmask and heap bitmap are aligned.
1181 // This is a fast path for small objects.
1183 // The first byte we write out covers the first four
1184 // words of the object. The scan/dead bit on the first
1185 // word must be set to scan since there are pointers
1186 // somewhere in the object.
1187 // In all following words, we set the scan/dead
1188 // appropriately to indicate that the object continues
1189 // to the next 2-bit entry in the bitmap.
1191 // We set four bits at a time here, but if the object
1192 // is fewer than four words, phase 3 will clear
1193 // unnecessary bits.
1194 hb
= b
& bitPointerAll
1196 if w
+= 4; w
>= nw
{
1205 // Ptrmask and heap bitmap are misaligned.
1207 // On 32 bit architectures only the 6-word object that corresponds
1208 // to a 24 bytes size class can start with h.shift of 2 here since
1209 // all other non 16 byte aligned size classes have been handled by
1210 // special code paths at the beginning of heapBitsSetType on 32 bit.
1212 // Many size classes are only 16 byte aligned. On 64 bit architectures
1213 // this results in a heap bitmap position starting with a h.shift of 2.
1215 // The bits for the first two words are in a byte shared
1216 // with another object, so we must be careful with the bits
1219 // We took care of 1-word, 2-word, and 3-word objects above,
1220 // so this is at least a 6-word object.
1221 hb
= (b
& (bitPointer | bitPointer
<<heapBitsShift
)) << (2 * heapBitsShift
)
1222 hb |
= bitScan
<< (2 * heapBitsShift
)
1224 hb |
= bitScan
<< (3 * heapBitsShift
)
1228 *hbitp
&^= uint8((bitPointer | bitScan |
((bitPointer | bitScan
) << heapBitsShift
)) << (2 * heapBitsShift
))
1231 if w
+= 2; w
>= nw
{
1232 // We know that there is more data, because we handled 2-word and 3-word objects above.
1233 // This must be at least a 6-word object. If we're out of pointer words,
1234 // mark no scan in next bitmap byte and finish.
1241 // Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap.
1242 // The loop computes the bits for that last write but does not execute the write;
1243 // it leaves the bits in hb for processing by phase 3.
1244 // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to
1245 // use in the first half of the loop right now, and then we only adjust nb explicitly
1246 // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop.
1249 // Emit bitmap byte.
1250 // b has at least nb+4 bits, with one exception:
1251 // if w+4 >= nw, then b has only nw-w bits,
1252 // but we'll stop at the break and then truncate
1253 // appropriately in Phase 3.
1254 hb
= b
& bitPointerAll
1256 if w
+= 4; w
>= nw
{
1263 // Load more bits. b has nb right now.
1265 // Fast path: keep reading from ptrmask.
1266 // nb unmodified: we just loaded 8 bits,
1267 // and the next iteration will consume 8 bits,
1268 // leaving us with the same nb the next time we're here.
1270 b |
= uintptr(*p
) << nb
1273 // Reduce the number of bits in b.
1274 // This is important if we skipped
1275 // over a scalar tail, since nb could
1276 // be larger than the bit width of b.
1279 } else if p
== nil {
1280 // Almost as fast path: track bit count and refill from pbits.
1281 // For short repetitions.
1286 nb
-= 8 // for next iteration
1288 // Slow path: reached end of ptrmask.
1289 // Process final partial byte and rewind to start.
1290 b |
= uintptr(*p
) << nb
1293 b |
= uintptr(*ptrmask
) << nb
1301 // Emit bitmap byte.
1302 hb
= b
& bitPointerAll
1304 if w
+= 4; w
>= nw
{
1313 // Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.
1315 // Counting the 4 entries in hb not yet written to memory,
1316 // there are more entries than possible pointer slots.
1317 // Discard the excess entries (can't be more than 3).
1318 mask
:= uintptr(1)<<(4-(w
-nw
)) - 1
1319 hb
&= mask | mask
<<4 // apply mask to both pointer bits and scan bits
1322 // Change nw from counting possibly-pointer words to total words in allocation.
1323 nw
= size
/ goarch
.PtrSize
1325 // Write whole bitmap bytes.
1326 // The first is hb, the rest are zero.
1330 hb
= 0 // for possible final half-byte below
1331 for w
+= 4; w
<= nw
; w
+= 4 {
1337 // Write final partial bitmap byte if any.
1338 // We know w > nw, or else we'd still be in the loop above.
1339 // It can be bigger only due to the 4 entries in hb that it counts.
1340 // If w == nw+4 then there's nothing left to do: we wrote all nw entries
1341 // and can discard the 4 sitting in hb.
1342 // But if w == nw+2, we need to write first two in hb.
1343 // The byte is shared with the next object, so be careful with
1346 *hbitp
= *hbitp
&^(bitPointer|bitScan|
(bitPointer|bitScan
)<<heapBitsShift
) |
uint8(hb
)
1350 // Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary.
1352 // TODO: We could probably make this faster by
1353 // handling [x+dataSize, x+size) specially.
1354 h
:= heapBitsForAddr(x
)
1355 // cnw is the number of heap words, or bit pairs
1356 // remaining (like nw above).
1357 cnw
:= size
/ goarch
.PtrSize
1358 src
:= (*uint8)(unsafe
.Pointer(x
))
1359 // We know the first and last byte of the bitmap are
1360 // not the same, but it's still possible for small
1361 // objects span arenas, so it may share bitmap bytes
1362 // with neighboring objects.
1364 // Handle the first byte specially if it's shared. See
1365 // Phase 1 for why this is the only special case we need.
1367 if !(h
.shift
== 0 || h
.shift
== 2) {
1368 print("x=", x
, " size=", size
, " cnw=", h
.shift
, "\n")
1369 throw("bad start shift")
1373 *h
.bitp
= *h
.bitp
&^((bitPointer|bitScan|
(bitPointer|bitScan
)<<heapBitsShift
)<<(2*heapBitsShift
)) |
*src
1378 // We're now byte aligned. Copy out to per-arena
1379 // bitmaps until the last byte (which may again be
1382 // This loop processes four words at a time,
1383 // so round cnw down accordingly.
1384 hNext
, words
:= h
.forwardOrBoundary(cnw
/ 4 * 4)
1386 // n is the number of bitmap bytes to copy.
1388 memmove(unsafe
.Pointer(h
.bitp
), unsafe
.Pointer(src
), n
)
1393 if doubleCheck
&& h
.shift
!= 0 {
1394 print("cnw=", cnw
, " h.shift=", h
.shift
, "\n")
1395 throw("bad shift after block copy")
1397 // Handle the last byte if it's shared.
1399 *h
.bitp
= *h
.bitp
&^(bitPointer|bitScan|
(bitPointer|bitScan
)<<heapBitsShift
) |
*src
1404 if uintptr(unsafe
.Pointer(src
)) > x
+size
{
1405 throw("copy exceeded object size")
1407 if !(cnw
== 0 || cnw
== 2) {
1408 print("x=", x
, " size=", size
, " cnw=", cnw
, "\n")
1409 throw("bad number of remaining words")
1411 // Set up hbitp so doubleCheck code below can check it.
1414 // Zero the object where we wrote the bitmap.
1415 memclrNoHeapPointers(unsafe
.Pointer(x
), uintptr(unsafe
.Pointer(src
))-x
)
1418 // Double check the whole bitmap.
1420 // x+size may not point to the heap, so back up one
1421 // word and then advance it the way we do above.
1422 end
:= heapBitsForAddr(x
+ size
- goarch
.PtrSize
)
1424 // In out-of-place copying, we just advance
1428 // Don't use next because that may advance to
1429 // the next arena and the in-place logic
1431 end
.shift
+= heapBitsShift
1432 if end
.shift
== 4*heapBitsShift
{
1433 end
.bitp
, end
.shift
= add1(end
.bitp
), 0
1436 if typ
.kind
&kindGCProg
== 0 && (hbitp
!= end
.bitp ||
(w
== nw
+2) != (end
.shift
== 2)) {
1437 println("ended at wrong bitmap byte for", typ
.string(), "x", dataSize
/typ
.size
)
1438 print("typ.size=", typ
.size
, " typ.ptrdata=", typ
.ptrdata
, " dataSize=", dataSize
, " size=", size
, "\n")
1439 print("w=", w
, " nw=", nw
, " b=", hex(b
), " nb=", nb
, " hb=", hex(hb
), "\n")
1440 h0
:= heapBitsForAddr(x
)
1441 print("initial bits h0.bitp=", h0
.bitp
, " h0.shift=", h0
.shift
, "\n")
1442 print("ended at hbitp=", hbitp
, " but next starts at bitp=", end
.bitp
, " shift=", end
.shift
, "\n")
1443 throw("bad heapBitsSetType")
1446 // Double-check that bits to be written were written correctly.
1447 // Does not check that other bits were not written, unfortunately.
1448 h
:= heapBitsForAddr(x
)
1449 nptr
:= typ
.ptrdata
/ goarch
.PtrSize
1450 ndata
:= typ
.size
/ goarch
.PtrSize
1451 count
:= dataSize
/ typ
.size
1452 totalptr
:= ((count
-1)*typ
.size
+ typ
.ptrdata
) / goarch
.PtrSize
1453 for i
:= uintptr(0); i
< size
/goarch
.PtrSize
; i
++ {
1455 var have
, want
uint8
1456 have
= (*h
.bitp
>> h
.shift
) & (bitPointer | bitScan
)
1458 if typ
.kind
&kindGCProg
!= 0 && i
< (totalptr
+3)/4*4 {
1459 // heapBitsSetTypeGCProg always fills
1460 // in full nibbles of bitScan.
1464 if j
< nptr
&& (*addb(ptrmask
, j
/8)>>(j%8
))&1 != 0 {
1470 println("mismatch writing bits for", typ
.string(), "x", dataSize
/typ
.size
)
1471 print("typ.size=", typ
.size
, " typ.ptrdata=", typ
.ptrdata
, " dataSize=", dataSize
, " size=", size
, "\n")
1472 print("kindGCProg=", typ
.kind
&kindGCProg
!= 0, " outOfPlace=", outOfPlace
, "\n")
1473 print("w=", w
, " nw=", nw
, " b=", hex(b
), " nb=", nb
, " hb=", hex(hb
), "\n")
1474 h0
:= heapBitsForAddr(x
)
1475 print("initial bits h0.bitp=", h0
.bitp
, " h0.shift=", h0
.shift
, "\n")
1476 print("current bits h.bitp=", h
.bitp
, " h.shift=", h
.shift
, " *h.bitp=", hex(*h
.bitp
), "\n")
1477 print("ptrmask=", ptrmask
, " p=", p
, " endp=", endp
, " endnb=", endnb
, " pbits=", hex(pbits
), " b=", hex(b
), " nb=", nb
, "\n")
1478 println("at word", i
, "offset", i
*goarch
.PtrSize
, "have", hex(have
), "want", hex(want
))
1479 if typ
.kind
&kindGCProg
!= 0 {
1480 println("GC program:")
1481 dumpGCProg(addb(typ
.gcdata
, 4))
1483 throw("bad heapBitsSetType")
1487 if ptrmask
== debugPtrmask
.data
{
1488 unlock(&debugPtrmask
.lock
)
1493 var debugPtrmask
struct {
1498 // heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.
1499 // progSize is the size of the memory described by the program.
1500 // elemSize is the size of the element that the GC program describes (a prefix of).
1501 // dataSize is the total size of the intended data, a multiple of elemSize.
1502 // allocSize is the total size of the allocated memory.
1504 // GC programs are only used for large allocations.
1505 // heapBitsSetType requires that allocSize is a multiple of 4 words,
1506 // so that the relevant bitmap bytes are not shared with surrounding
1508 func heapBitsSetTypeGCProg(h heapBits
, progSize
, elemSize
, dataSize
, allocSize
uintptr, prog
*byte) {
1509 if goarch
.PtrSize
== 8 && allocSize
%(4*goarch
.PtrSize
) != 0 {
1510 // Alignment will be wrong.
1511 throw("heapBitsSetTypeGCProg: small allocation")
1513 var totalBits
uintptr
1514 if elemSize
== dataSize
{
1515 totalBits
= runGCProg(prog
, nil, h
.bitp
, 2)
1516 if totalBits
*goarch
.PtrSize
!= progSize
{
1517 println("runtime: heapBitsSetTypeGCProg: total bits", totalBits
, "but progSize", progSize
)
1518 throw("heapBitsSetTypeGCProg: unexpected bit count")
1521 count
:= dataSize
/ elemSize
1523 // Piece together program trailer to run after prog that does:
1525 // repeat(1, elemSize-progSize-1) // zeros to fill element size
1526 // repeat(elemSize, count-1) // repeat that element for count
1527 // This zero-pads the data remaining in the first element and then
1528 // repeats that first element to fill the array.
1529 var trailer
[40]byte // 3 varints (max 10 each) + some bytes
1531 if n
:= elemSize
/goarch
.PtrSize
- progSize
/goarch
.PtrSize
; n
> 0 {
1542 for ; n
>= 0x80; n
>>= 7 {
1543 trailer
[i
] = byte(n |
0x80)
1546 trailer
[i
] = byte(n
)
1550 // repeat(elemSize/ptrSize, count-1)
1553 n
:= elemSize
/ goarch
.PtrSize
1554 for ; n
>= 0x80; n
>>= 7 {
1555 trailer
[i
] = byte(n |
0x80)
1558 trailer
[i
] = byte(n
)
1561 for ; n
>= 0x80; n
>>= 7 {
1562 trailer
[i
] = byte(n |
0x80)
1565 trailer
[i
] = byte(n
)
1570 runGCProg(prog
, &trailer
[0], h
.bitp
, 2)
1572 // Even though we filled in the full array just now,
1573 // record that we only filled in up to the ptrdata of the
1574 // last element. This will cause the code below to
1575 // memclr the dead section of the final array element,
1576 // so that scanobject can stop early in the final element.
1577 totalBits
= (elemSize
*(count
-1) + progSize
) / goarch
.PtrSize
1579 endProg
:= unsafe
.Pointer(addb(h
.bitp
, (totalBits
+3)/4))
1580 endAlloc
:= unsafe
.Pointer(addb(h
.bitp
, allocSize
/goarch
.PtrSize
/wordsPerBitmapByte
))
1581 memclrNoHeapPointers(endProg
, uintptr(endAlloc
)-uintptr(endProg
))
1584 // progToPointerMask returns the 1-bit pointer mask output by the GC program prog.
1585 // size the size of the region described by prog, in bytes.
1586 // The resulting bitvector will have no more than size/goarch.PtrSize bits.
1587 func progToPointerMask(prog
*byte, size
uintptr) bitvector
{
1588 n
:= (size
/goarch
.PtrSize
+ 7) / 8
1589 x
:= (*[1 << 30]byte)(persistentalloc(n
+1, 1, &memstats
.buckhash_sys
))[:n
+1]
1590 x
[len(x
)-1] = 0xa1 // overflow check sentinel
1591 n
= runGCProg(prog
, nil, &x
[0], 1)
1592 if x
[len(x
)-1] != 0xa1 {
1593 throw("progToPointerMask: overflow")
1595 return bitvector
{int32(n
), &x
[0]}
1598 // Packed GC pointer bitmaps, aka GC programs.
1600 // For large types containing arrays, the type information has a
1601 // natural repetition that can be encoded to save space in the
1602 // binary and in the memory representation of the type information.
1604 // The encoding is a simple Lempel-Ziv style bytecode machine
1605 // with the following instructions:
1608 // 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes
1609 // 10000000 n c: repeat the previous n bits c times; n, c are varints
1610 // 1nnnnnnn c: repeat the previous n bits c times; c is a varint
1612 // runGCProg executes the GC program prog, and then trailer if non-nil,
1613 // writing to dst with entries of the given size.
1614 // If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.
1615 // If size == 2, dst is the 2-bit heap bitmap, and writes move backward
1616 // starting at dst (because the heap bitmap does). In this case, the caller guarantees
1617 // that only whole bytes in dst need to be written.
1619 // runGCProg returns the number of 1- or 2-bit entries written to memory.
1620 func runGCProg(prog
, trailer
, dst
*byte, size
int) uintptr {
1623 // Bits waiting to be written to memory.
1630 // Flush accumulated full bytes.
1631 // The rest of the loop assumes that nbits <= 7.
1632 for ; nbits
>= 8; nbits
-= 8 {
1638 v
:= bits
&bitPointerAll | bitScanAll
1642 v
= bits
&bitPointerAll | bitScanAll
1649 // Process one instruction.
1654 // Literal bits; n == 0 means end of program.
1656 // Program is over; continue in trailer if present.
1665 for i
:= uintptr(0); i
< nbyte
; i
++ {
1666 bits |
= uintptr(*p
) << nbits
1673 v
:= bits
&0xf | bitScanAll
1677 v
= bits
&0xf | bitScanAll
1684 bits |
= uintptr(*p
) << nbits
1691 // Repeat. If n == 0, it is encoded in a varint in the next bytes.
1693 for off
:= uint(0); ; off
+= 7 {
1696 n |
= (x
& 0x7F) << off
1703 // Count is encoded in a varint in the next bytes.
1705 for off
:= uint(0); ; off
+= 7 {
1708 c |
= (x
& 0x7F) << off
1713 c
*= n
// now total number of bits to copy
1715 // If the number of bits being repeated is small, load them
1716 // into a register and use that register for the entire loop
1717 // instead of repeatedly reading from memory.
1718 // Handling fewer than 8 bits here makes the general loop simpler.
1719 // The cutoff is goarch.PtrSize*8 - 7 to guarantee that when we add
1720 // the pattern to a bit buffer holding at most 7 bits (a partial byte)
1721 // it will not overflow.
1723 const maxBits
= goarch
.PtrSize
*8 - 7
1725 // Start with bits in output buffer.
1729 // If we need more bits, fetch them from memory.
1731 src
= subtract1(src
)
1734 pattern |
= uintptr(*src
)
1735 src
= subtract1(src
)
1739 src
= subtract1(src
)
1742 pattern |
= uintptr(*src
) & 0xf
1743 src
= subtract1(src
)
1748 // We started with the whole bit output buffer,
1749 // and then we loaded bits from whole bytes.
1750 // Either way, we might now have too many instead of too few.
1751 // Discard the extra.
1753 pattern
>>= npattern
- n
1757 // Replicate pattern to at most maxBits.
1759 // One bit being repeated.
1760 // If the bit is 1, make the pattern all 1s.
1761 // If the bit is 0, the pattern is already all 0s,
1762 // but we can claim that the number of bits
1763 // in the word is equal to the number we need (c),
1764 // because right shift of bits will zero fill.
1766 pattern
= 1<<maxBits
- 1
1774 if nb
+nb
<= maxBits
{
1775 // Double pattern until the whole uintptr is filled.
1776 for nb
<= goarch
.PtrSize
*8 {
1780 // Trim away incomplete copy of original pattern in high bits.
1781 // TODO(rsc): Replace with table lookup or loop on systems without divide?
1782 nb
= maxBits
/ npattern
* npattern
1789 // Add pattern to bit buffer and flush bit buffer, c/npattern times.
1790 // Since pattern contains >8 bits, there will be full bytes to flush
1791 // on each iteration.
1792 for ; c
>= npattern
; c
-= npattern
{
1793 bits |
= pattern
<< nbits
1804 *dst
= uint8(bits
&0xf | bitScanAll
)
1812 // Add final fragment to bit buffer.
1815 bits |
= pattern
<< nbits
1821 // Repeat; n too large to fit in a register.
1822 // Since nbits <= 7, we know the first few bytes of repeated data
1823 // are already written to memory.
1824 off
:= n
- nbits
// n > nbits because n > maxBits and nbits <= 7
1826 // Leading src fragment.
1827 src
= subtractb(src
, (off
+7)/8)
1828 if frag
:= off
& 7; frag
!= 0 {
1829 bits |
= uintptr(*src
) >> (8 - frag
) << nbits
1834 // Main loop: load one byte, write another.
1835 // The bits are rotating through the bit buffer.
1836 for i
:= c
/ 8; i
> 0; i
-- {
1837 bits |
= uintptr(*src
) << nbits
1843 // Final src fragment.
1845 bits |
= (uintptr(*src
) & (1<<c
- 1)) << nbits
1849 // Leading src fragment.
1850 src
= subtractb(src
, (off
+3)/4)
1851 if frag
:= off
& 3; frag
!= 0 {
1852 bits |
= (uintptr(*src
) & 0xf) >> (4 - frag
) << nbits
1857 // Main loop: load one byte, write another.
1858 // The bits are rotating through the bit buffer.
1859 for i
:= c
/ 4; i
> 0; i
-- {
1860 bits |
= (uintptr(*src
) & 0xf) << nbits
1862 *dst
= uint8(bits
&0xf | bitScanAll
)
1866 // Final src fragment.
1868 bits |
= (uintptr(*src
) & (1<<c
- 1)) << nbits
1874 // Write any final bits out, using full-byte writes, even for the final byte.
1875 var totalBits
uintptr
1877 totalBits
= (uintptr(unsafe
.Pointer(dst
))-uintptr(unsafe
.Pointer(dstStart
)))*8 + nbits
1879 for ; nbits
> 0; nbits
-= 8 {
1885 totalBits
= (uintptr(unsafe
.Pointer(dst
))-uintptr(unsafe
.Pointer(dstStart
)))*4 + nbits
1887 for ; nbits
> 0; nbits
-= 4 {
1888 v
:= bits
&0xf | bitScanAll
1897 // materializeGCProg allocates space for the (1-bit) pointer bitmask
1898 // for an object of size ptrdata. Then it fills that space with the
1899 // pointer bitmask specified by the program prog.
1900 // The bitmask starts at s.startAddr.
1901 // The result must be deallocated with dematerializeGCProg.
1902 func materializeGCProg(ptrdata
uintptr, prog
*byte) *mspan
{
1903 // Each word of ptrdata needs one bit in the bitmap.
1904 bitmapBytes
:= divRoundUp(ptrdata
, 8*goarch
.PtrSize
)
1905 // Compute the number of pages needed for bitmapBytes.
1906 pages
:= divRoundUp(bitmapBytes
, pageSize
)
1907 s
:= mheap_
.allocManual(pages
, spanAllocPtrScalarBits
)
1908 runGCProg(addb(prog
, 4), nil, (*byte)(unsafe
.Pointer(s
.startAddr
)), 1)
1911 func dematerializeGCProg(s
*mspan
) {
1912 mheap_
.freeManual(s
, spanAllocPtrScalarBits
)
1915 func dumpGCProg(p
*byte) {
1921 print("\t", nptr
, " end\n")
1925 print("\t", nptr
, " lit ", x
, ":")
1927 for i
:= 0; i
< n
; i
++ {
1934 nbit
:= int(x
&^ 0x80)
1936 for nb
:= uint(0); ; nb
+= 7 {
1939 nbit |
= int(x
&0x7f) << nb
1946 for nb
:= uint(0); ; nb
+= 7 {
1949 count |
= int(x
&0x7f) << nb
1954 print("\t", nptr
, " repeat ", nbit
, " × ", count
, "\n")
1955 nptr
+= nbit
* count
1962 // gcbits returns the GC type info for x, for testing.
1963 // The result is the bitmap entries (0 or 1), one entry per byte.
1964 //go:linkname reflect_gcbits reflect.gcbits
1965 func reflect_gcbits(x any
) []byte {
1967 typ
:= (*ptrtype
)(unsafe
.Pointer(efaceOf(&x
)._type
)).elem
1968 nptr
:= typ
.ptrdata
/ goarch
.PtrSize
1969 for uintptr(len(ret
)) > nptr
&& ret
[len(ret
)-1] == 0 {
1970 ret
= ret
[:len(ret
)-1]
1975 // Returns GC type info for the pointer stored in ep for testing.
1976 // If ep points to the stack, only static live information will be returned
1977 // (i.e. not for objects which are only dynamically live stack objects).
1978 func getgcmask(ep any
) (mask
[]byte) {
1985 for i
:= 0; i
< roots
.count
; i
++ {
1986 pr
:= roots
.roots
[i
]
1987 addr
:= uintptr(pr
.decl
)
1988 if addr
<= uintptr(p
) && uintptr(p
) < addr
+pr
.size
{
1989 n
:= (*ptrtype
)(unsafe
.Pointer(t
)).elem
.size
1990 mask
= make([]byte, n
/goarch
.PtrSize
)
1991 copy(mask
, (*[1 << 29]uint8)(unsafe
.Pointer(pr
.gcdata
))[:pr
.ptrdata
])
1999 if base
, s
, _
:= findObject(uintptr(p
), 0, 0, false); base
!= 0 {
2000 hbits
:= heapBitsForAddr(base
)
2002 mask
= make([]byte, n
/goarch
.PtrSize
)
2003 for i
:= uintptr(0); i
< n
; i
+= goarch
.PtrSize
{
2004 if hbits
.isPointer() {
2005 mask
[i
/goarch
.PtrSize
] = 1
2007 if !hbits
.morePointers() {
2008 mask
= mask
[:i
/goarch
.PtrSize
]
2011 hbits
= hbits
.next()
2016 // otherwise, not something the GC knows about.
2017 // possibly read-only data, like malloc(0).
2018 // must not have pointers
2019 // For gccgo, may live on the stack, which is collected conservatively.