tree-optimization/115629 - missed tail merging
[official-gcc.git] / libgo / go / runtime / mstats.go
blobadfe302c3e046841caa7415180c3551d53893196
1 // Copyright 2009 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 // Memory statistics
7 package runtime
9 import (
10 "internal/goarch"
11 "runtime/internal/atomic"
12 "unsafe"
15 // Statistics.
17 // For detailed descriptions see the documentation for MemStats.
18 // Fields that differ from MemStats are further documented here.
20 // Many of these fields are updated on the fly, while others are only
21 // updated when updatememstats is called.
22 type mstats struct {
23 // General statistics.
24 alloc uint64 // bytes allocated and not yet freed
25 total_alloc uint64 // bytes allocated (even if freed)
26 sys uint64 // bytes obtained from system (should be sum of xxx_sys below, no locking, approximate)
27 nlookup uint64 // number of pointer lookups (unused)
28 nmalloc uint64 // number of mallocs
29 nfree uint64 // number of frees
31 // Statistics about malloc heap.
32 // Updated atomically, or with the world stopped.
34 // Like MemStats, heap_sys and heap_inuse do not count memory
35 // in manually-managed spans.
36 heap_sys sysMemStat // virtual address space obtained from system for GC'd heap
37 heap_inuse uint64 // bytes in mSpanInUse spans
38 heap_released uint64 // bytes released to the os
40 // heap_objects is not used by the runtime directly and instead
41 // computed on the fly by updatememstats.
42 heap_objects uint64 // total number of allocated objects
44 // Statistics about stacks.
45 stacks_inuse uint64 // bytes in manually-managed stack spans; computed by updatememstats
46 stacks_sys sysMemStat // only counts newosproc0 stack in mstats; differs from MemStats.StackSys
48 // Statistics about allocation of low-level fixed-size structures.
49 // Protected by FixAlloc locks.
50 mspan_inuse uint64 // mspan structures
51 mspan_sys sysMemStat
52 mcache_inuse uint64 // mcache structures
53 mcache_sys sysMemStat
54 buckhash_sys sysMemStat // profiling bucket hash table
56 // Statistics about GC overhead.
57 gcWorkBufInUse uint64 // computed by updatememstats
58 gcProgPtrScalarBitsInUse uint64 // computed by updatememstats
59 gcMiscSys sysMemStat // updated atomically or during STW
61 // Miscellaneous statistics.
62 other_sys sysMemStat // updated atomically or during STW
64 // Statistics about the garbage collector.
66 // Protected by mheap or stopping the world during GC.
67 last_gc_unix uint64 // last gc (in unix time)
68 pause_total_ns uint64
69 pause_ns [256]uint64 // circular buffer of recent gc pause lengths
70 pause_end [256]uint64 // circular buffer of recent gc end times (nanoseconds since 1970)
71 numgc uint32
72 numforcedgc uint32 // number of user-forced GCs
73 gc_cpu_fraction float64 // fraction of CPU time used by GC
74 enablegc bool
75 debuggc bool
77 // Statistics about allocation size classes.
79 by_size [_NumSizeClasses]struct {
80 size uint32
81 nmalloc uint64
82 nfree uint64
85 // Add an uint32 for even number of size classes to align below fields
86 // to 64 bits for atomic operations on 32 bit platforms.
87 _ [1 - _NumSizeClasses%2]uint32
89 last_gc_nanotime uint64 // last gc (monotonic time)
90 last_heap_inuse uint64 // heap_inuse at mark termination of the previous GC
92 // heapStats is a set of statistics
93 heapStats consistentHeapStats
95 // _ uint32 // ensure gcPauseDist is aligned
97 // gcPauseDist represents the distribution of all GC-related
98 // application pauses in the runtime.
100 // Each individual pause is counted separately, unlike pause_ns.
101 gcPauseDist timeHistogram
104 var memstats mstats
106 // A MemStats records statistics about the memory allocator.
107 type MemStats struct {
108 // General statistics.
110 // Alloc is bytes of allocated heap objects.
112 // This is the same as HeapAlloc (see below).
113 Alloc uint64
115 // TotalAlloc is cumulative bytes allocated for heap objects.
117 // TotalAlloc increases as heap objects are allocated, but
118 // unlike Alloc and HeapAlloc, it does not decrease when
119 // objects are freed.
120 TotalAlloc uint64
122 // Sys is the total bytes of memory obtained from the OS.
124 // Sys is the sum of the XSys fields below. Sys measures the
125 // virtual address space reserved by the Go runtime for the
126 // heap, stacks, and other internal data structures. It's
127 // likely that not all of the virtual address space is backed
128 // by physical memory at any given moment, though in general
129 // it all was at some point.
130 Sys uint64
132 // Lookups is the number of pointer lookups performed by the
133 // runtime.
135 // This is primarily useful for debugging runtime internals.
136 Lookups uint64
138 // Mallocs is the cumulative count of heap objects allocated.
139 // The number of live objects is Mallocs - Frees.
140 Mallocs uint64
142 // Frees is the cumulative count of heap objects freed.
143 Frees uint64
145 // Heap memory statistics.
147 // Interpreting the heap statistics requires some knowledge of
148 // how Go organizes memory. Go divides the virtual address
149 // space of the heap into "spans", which are contiguous
150 // regions of memory 8K or larger. A span may be in one of
151 // three states:
153 // An "idle" span contains no objects or other data. The
154 // physical memory backing an idle span can be released back
155 // to the OS (but the virtual address space never is), or it
156 // can be converted into an "in use" or "stack" span.
158 // An "in use" span contains at least one heap object and may
159 // have free space available to allocate more heap objects.
161 // A "stack" span is used for goroutine stacks. Stack spans
162 // are not considered part of the heap. A span can change
163 // between heap and stack memory; it is never used for both
164 // simultaneously.
166 // HeapAlloc is bytes of allocated heap objects.
168 // "Allocated" heap objects include all reachable objects, as
169 // well as unreachable objects that the garbage collector has
170 // not yet freed. Specifically, HeapAlloc increases as heap
171 // objects are allocated and decreases as the heap is swept
172 // and unreachable objects are freed. Sweeping occurs
173 // incrementally between GC cycles, so these two processes
174 // occur simultaneously, and as a result HeapAlloc tends to
175 // change smoothly (in contrast with the sawtooth that is
176 // typical of stop-the-world garbage collectors).
177 HeapAlloc uint64
179 // HeapSys is bytes of heap memory obtained from the OS.
181 // HeapSys measures the amount of virtual address space
182 // reserved for the heap. This includes virtual address space
183 // that has been reserved but not yet used, which consumes no
184 // physical memory, but tends to be small, as well as virtual
185 // address space for which the physical memory has been
186 // returned to the OS after it became unused (see HeapReleased
187 // for a measure of the latter).
189 // HeapSys estimates the largest size the heap has had.
190 HeapSys uint64
192 // HeapIdle is bytes in idle (unused) spans.
194 // Idle spans have no objects in them. These spans could be
195 // (and may already have been) returned to the OS, or they can
196 // be reused for heap allocations, or they can be reused as
197 // stack memory.
199 // HeapIdle minus HeapReleased estimates the amount of memory
200 // that could be returned to the OS, but is being retained by
201 // the runtime so it can grow the heap without requesting more
202 // memory from the OS. If this difference is significantly
203 // larger than the heap size, it indicates there was a recent
204 // transient spike in live heap size.
205 HeapIdle uint64
207 // HeapInuse is bytes in in-use spans.
209 // In-use spans have at least one object in them. These spans
210 // can only be used for other objects of roughly the same
211 // size.
213 // HeapInuse minus HeapAlloc estimates the amount of memory
214 // that has been dedicated to particular size classes, but is
215 // not currently being used. This is an upper bound on
216 // fragmentation, but in general this memory can be reused
217 // efficiently.
218 HeapInuse uint64
220 // HeapReleased is bytes of physical memory returned to the OS.
222 // This counts heap memory from idle spans that was returned
223 // to the OS and has not yet been reacquired for the heap.
224 HeapReleased uint64
226 // HeapObjects is the number of allocated heap objects.
228 // Like HeapAlloc, this increases as objects are allocated and
229 // decreases as the heap is swept and unreachable objects are
230 // freed.
231 HeapObjects uint64
233 // Stack memory statistics.
235 // Stacks are not considered part of the heap, but the runtime
236 // can reuse a span of heap memory for stack memory, and
237 // vice-versa.
239 // StackInuse is bytes in stack spans.
241 // In-use stack spans have at least one stack in them. These
242 // spans can only be used for other stacks of the same size.
244 // There is no StackIdle because unused stack spans are
245 // returned to the heap (and hence counted toward HeapIdle).
246 StackInuse uint64
248 // StackSys is bytes of stack memory obtained from the OS.
250 // StackSys is StackInuse, plus any memory obtained directly
251 // from the OS for OS thread stacks (which should be minimal).
252 StackSys uint64
254 // Off-heap memory statistics.
256 // The following statistics measure runtime-internal
257 // structures that are not allocated from heap memory (usually
258 // because they are part of implementing the heap). Unlike
259 // heap or stack memory, any memory allocated to these
260 // structures is dedicated to these structures.
262 // These are primarily useful for debugging runtime memory
263 // overheads.
265 // MSpanInuse is bytes of allocated mspan structures.
266 MSpanInuse uint64
268 // MSpanSys is bytes of memory obtained from the OS for mspan
269 // structures.
270 MSpanSys uint64
272 // MCacheInuse is bytes of allocated mcache structures.
273 MCacheInuse uint64
275 // MCacheSys is bytes of memory obtained from the OS for
276 // mcache structures.
277 MCacheSys uint64
279 // BuckHashSys is bytes of memory in profiling bucket hash tables.
280 BuckHashSys uint64
282 // GCSys is bytes of memory in garbage collection metadata.
283 GCSys uint64
285 // OtherSys is bytes of memory in miscellaneous off-heap
286 // runtime allocations.
287 OtherSys uint64
289 // Garbage collector statistics.
291 // NextGC is the target heap size of the next GC cycle.
293 // The garbage collector's goal is to keep HeapAlloc ≤ NextGC.
294 // At the end of each GC cycle, the target for the next cycle
295 // is computed based on the amount of reachable data and the
296 // value of GOGC.
297 NextGC uint64
299 // LastGC is the time the last garbage collection finished, as
300 // nanoseconds since 1970 (the UNIX epoch).
301 LastGC uint64
303 // PauseTotalNs is the cumulative nanoseconds in GC
304 // stop-the-world pauses since the program started.
306 // During a stop-the-world pause, all goroutines are paused
307 // and only the garbage collector can run.
308 PauseTotalNs uint64
310 // PauseNs is a circular buffer of recent GC stop-the-world
311 // pause times in nanoseconds.
313 // The most recent pause is at PauseNs[(NumGC+255)%256]. In
314 // general, PauseNs[N%256] records the time paused in the most
315 // recent N%256th GC cycle. There may be multiple pauses per
316 // GC cycle; this is the sum of all pauses during a cycle.
317 PauseNs [256]uint64
319 // PauseEnd is a circular buffer of recent GC pause end times,
320 // as nanoseconds since 1970 (the UNIX epoch).
322 // This buffer is filled the same way as PauseNs. There may be
323 // multiple pauses per GC cycle; this records the end of the
324 // last pause in a cycle.
325 PauseEnd [256]uint64
327 // NumGC is the number of completed GC cycles.
328 NumGC uint32
330 // NumForcedGC is the number of GC cycles that were forced by
331 // the application calling the GC function.
332 NumForcedGC uint32
334 // GCCPUFraction is the fraction of this program's available
335 // CPU time used by the GC since the program started.
337 // GCCPUFraction is expressed as a number between 0 and 1,
338 // where 0 means GC has consumed none of this program's CPU. A
339 // program's available CPU time is defined as the integral of
340 // GOMAXPROCS since the program started. That is, if
341 // GOMAXPROCS is 2 and a program has been running for 10
342 // seconds, its "available CPU" is 20 seconds. GCCPUFraction
343 // does not include CPU time used for write barrier activity.
345 // This is the same as the fraction of CPU reported by
346 // GODEBUG=gctrace=1.
347 GCCPUFraction float64
349 // EnableGC indicates that GC is enabled. It is always true,
350 // even if GOGC=off.
351 EnableGC bool
353 // DebugGC is currently unused.
354 DebugGC bool
356 // BySize reports per-size class allocation statistics.
358 // BySize[N] gives statistics for allocations of size S where
359 // BySize[N-1].Size < S ≤ BySize[N].Size.
361 // This does not report allocations larger than BySize[60].Size.
362 BySize [61]struct {
363 // Size is the maximum byte size of an object in this
364 // size class.
365 Size uint32
367 // Mallocs is the cumulative count of heap objects
368 // allocated in this size class. The cumulative bytes
369 // of allocation is Size*Mallocs. The number of live
370 // objects in this size class is Mallocs - Frees.
371 Mallocs uint64
373 // Frees is the cumulative count of heap objects freed
374 // in this size class.
375 Frees uint64
379 func init() {
380 if offset := unsafe.Offsetof(memstats.heapStats); offset%8 != 0 {
381 println(offset)
382 throw("memstats.heapStats not aligned to 8 bytes")
384 if offset := unsafe.Offsetof(memstats.gcPauseDist); offset%8 != 0 {
385 println(offset)
386 throw("memstats.gcPauseDist not aligned to 8 bytes")
388 // Ensure the size of heapStatsDelta causes adjacent fields/slots (e.g.
389 // [3]heapStatsDelta) to be 8-byte aligned.
390 if size := unsafe.Sizeof(heapStatsDelta{}); size%8 != 0 {
391 println(size)
392 throw("heapStatsDelta not a multiple of 8 bytes in size")
396 // ReadMemStats populates m with memory allocator statistics.
398 // The returned memory allocator statistics are up to date as of the
399 // call to ReadMemStats. This is in contrast with a heap profile,
400 // which is a snapshot as of the most recently completed garbage
401 // collection cycle.
402 func ReadMemStats(m *MemStats) {
403 stopTheWorld("read mem stats")
405 systemstack(func() {
406 readmemstats_m(m)
409 startTheWorld()
412 func readmemstats_m(stats *MemStats) {
413 updatememstats()
415 stats.Alloc = memstats.alloc
416 stats.TotalAlloc = memstats.total_alloc
417 stats.Sys = memstats.sys
418 stats.Mallocs = memstats.nmalloc
419 stats.Frees = memstats.nfree
420 stats.HeapAlloc = memstats.alloc
421 stats.HeapSys = memstats.heap_sys.load()
422 // By definition, HeapIdle is memory that was mapped
423 // for the heap but is not currently used to hold heap
424 // objects. It also specifically is memory that can be
425 // used for other purposes, like stacks, but this memory
426 // is subtracted out of HeapSys before it makes that
427 // transition. Put another way:
429 // heap_sys = bytes allocated from the OS for the heap - bytes ultimately used for non-heap purposes
430 // heap_idle = bytes allocated from the OS for the heap - bytes ultimately used for any purpose
432 // or
434 // heap_sys = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse
435 // heap_idle = sys - stacks_inuse - gcWorkBufInUse - gcProgPtrScalarBitsInUse - heap_inuse
437 // => heap_idle = heap_sys - heap_inuse
438 stats.HeapIdle = memstats.heap_sys.load() - memstats.heap_inuse
439 stats.HeapInuse = memstats.heap_inuse
440 stats.HeapReleased = memstats.heap_released
441 stats.HeapObjects = memstats.heap_objects
442 stats.StackInuse = memstats.stacks_inuse
443 // memstats.stacks_sys is only memory mapped directly for OS stacks.
444 // Add in heap-allocated stack memory for user consumption.
445 stats.StackSys = memstats.stacks_inuse + memstats.stacks_sys.load()
446 stats.MSpanInuse = memstats.mspan_inuse
447 stats.MSpanSys = memstats.mspan_sys.load()
448 stats.MCacheInuse = memstats.mcache_inuse
449 stats.MCacheSys = memstats.mcache_sys.load()
450 stats.BuckHashSys = memstats.buckhash_sys.load()
451 // MemStats defines GCSys as an aggregate of all memory related
452 // to the memory management system, but we track this memory
453 // at a more granular level in the runtime.
454 stats.GCSys = memstats.gcMiscSys.load() + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse
455 stats.OtherSys = memstats.other_sys.load()
456 stats.NextGC = gcController.heapGoal
457 stats.LastGC = memstats.last_gc_unix
458 stats.PauseTotalNs = memstats.pause_total_ns
459 stats.PauseNs = memstats.pause_ns
460 stats.PauseEnd = memstats.pause_end
461 stats.NumGC = memstats.numgc
462 stats.NumForcedGC = memstats.numforcedgc
463 stats.GCCPUFraction = memstats.gc_cpu_fraction
464 stats.EnableGC = true
466 // Handle BySize. Copy N values, where N is
467 // the minimum of the lengths of the two arrays.
468 // Unfortunately copy() won't work here because
469 // the arrays have different structs.
471 // TODO(mknyszek): Consider renaming the fields
472 // of by_size's elements to align so we can use
473 // the copy built-in.
474 bySizeLen := len(stats.BySize)
475 if l := len(memstats.by_size); l < bySizeLen {
476 bySizeLen = l
478 for i := 0; i < bySizeLen; i++ {
479 stats.BySize[i].Size = memstats.by_size[i].size
480 stats.BySize[i].Mallocs = memstats.by_size[i].nmalloc
481 stats.BySize[i].Frees = memstats.by_size[i].nfree
485 //go:linkname readGCStats runtime_1debug.readGCStats
486 func readGCStats(pauses *[]uint64) {
487 systemstack(func() {
488 readGCStats_m(pauses)
492 // readGCStats_m must be called on the system stack because it acquires the heap
493 // lock. See mheap for details.
494 //go:systemstack
495 func readGCStats_m(pauses *[]uint64) {
496 p := *pauses
497 // Calling code in runtime/debug should make the slice large enough.
498 if cap(p) < len(memstats.pause_ns)+3 {
499 throw("short slice passed to readGCStats")
502 // Pass back: pauses, pause ends, last gc (absolute time), number of gc, total pause ns.
503 lock(&mheap_.lock)
505 n := memstats.numgc
506 if n > uint32(len(memstats.pause_ns)) {
507 n = uint32(len(memstats.pause_ns))
510 // The pause buffer is circular. The most recent pause is at
511 // pause_ns[(numgc-1)%len(pause_ns)], and then backward
512 // from there to go back farther in time. We deliver the times
513 // most recent first (in p[0]).
514 p = p[:cap(p)]
515 for i := uint32(0); i < n; i++ {
516 j := (memstats.numgc - 1 - i) % uint32(len(memstats.pause_ns))
517 p[i] = memstats.pause_ns[j]
518 p[n+i] = memstats.pause_end[j]
521 p[n+n] = memstats.last_gc_unix
522 p[n+n+1] = uint64(memstats.numgc)
523 p[n+n+2] = memstats.pause_total_ns
524 unlock(&mheap_.lock)
525 *pauses = p[:n+n+3]
528 // Updates the memstats structure.
530 // The world must be stopped.
532 //go:nowritebarrier
533 func updatememstats() {
534 assertWorldStopped()
536 // Flush mcaches to mcentral before doing anything else.
538 // Flushing to the mcentral may in general cause stats to
539 // change as mcentral data structures are manipulated.
540 systemstack(flushallmcaches)
542 memstats.mcache_inuse = uint64(mheap_.cachealloc.inuse)
543 memstats.mspan_inuse = uint64(mheap_.spanalloc.inuse)
544 memstats.sys = memstats.heap_sys.load() + memstats.stacks_sys.load() + memstats.mspan_sys.load() +
545 memstats.mcache_sys.load() + memstats.buckhash_sys.load() + memstats.gcMiscSys.load() +
546 memstats.other_sys.load()
548 // Calculate memory allocator stats.
549 // During program execution we only count number of frees and amount of freed memory.
550 // Current number of alive objects in the heap and amount of alive heap memory
551 // are calculated by scanning all spans.
552 // Total number of mallocs is calculated as number of frees plus number of alive objects.
553 // Similarly, total amount of allocated memory is calculated as amount of freed memory
554 // plus amount of alive heap memory.
555 memstats.alloc = 0
556 memstats.total_alloc = 0
557 memstats.nmalloc = 0
558 memstats.nfree = 0
559 for i := 0; i < len(memstats.by_size); i++ {
560 memstats.by_size[i].nmalloc = 0
561 memstats.by_size[i].nfree = 0
563 // Collect consistent stats, which are the source-of-truth in the some cases.
564 var consStats heapStatsDelta
565 memstats.heapStats.unsafeRead(&consStats)
567 // Collect large allocation stats.
568 totalAlloc := uint64(consStats.largeAlloc)
569 memstats.nmalloc += uint64(consStats.largeAllocCount)
570 totalFree := uint64(consStats.largeFree)
571 memstats.nfree += uint64(consStats.largeFreeCount)
573 // Collect per-sizeclass stats.
574 for i := 0; i < _NumSizeClasses; i++ {
575 // Malloc stats.
576 a := uint64(consStats.smallAllocCount[i])
577 totalAlloc += a * uint64(class_to_size[i])
578 memstats.nmalloc += a
579 memstats.by_size[i].nmalloc = a
581 // Free stats.
582 f := uint64(consStats.smallFreeCount[i])
583 totalFree += f * uint64(class_to_size[i])
584 memstats.nfree += f
585 memstats.by_size[i].nfree = f
588 // Account for tiny allocations.
589 memstats.nfree += uint64(consStats.tinyAllocCount)
590 memstats.nmalloc += uint64(consStats.tinyAllocCount)
592 // Calculate derived stats.
593 memstats.total_alloc = totalAlloc
594 memstats.alloc = totalAlloc - totalFree
595 memstats.heap_objects = memstats.nmalloc - memstats.nfree
597 memstats.stacks_inuse = uint64(consStats.inStacks)
598 memstats.gcWorkBufInUse = uint64(consStats.inWorkBufs)
599 memstats.gcProgPtrScalarBitsInUse = uint64(consStats.inPtrScalarBits)
601 // We also count stacks_inuse, gcWorkBufInUse, and gcProgPtrScalarBitsInUse as sys memory.
602 memstats.sys += memstats.stacks_inuse + memstats.gcWorkBufInUse + memstats.gcProgPtrScalarBitsInUse
604 // The world is stopped, so the consistent stats (after aggregation)
605 // should be identical to some combination of memstats. In particular:
607 // * heap_inuse == inHeap
608 // * heap_released == released
609 // * heap_sys - heap_released == committed - inStacks - inWorkBufs - inPtrScalarBits
611 // Check if that's actually true.
613 // TODO(mknyszek): Maybe don't throw here. It would be bad if a
614 // bug in otherwise benign accounting caused the whole application
615 // to crash.
616 if memstats.heap_inuse != uint64(consStats.inHeap) {
617 print("runtime: heap_inuse=", memstats.heap_inuse, "\n")
618 print("runtime: consistent value=", consStats.inHeap, "\n")
619 throw("heap_inuse and consistent stats are not equal")
621 if memstats.heap_released != uint64(consStats.released) {
622 print("runtime: heap_released=", memstats.heap_released, "\n")
623 print("runtime: consistent value=", consStats.released, "\n")
624 throw("heap_released and consistent stats are not equal")
626 globalRetained := memstats.heap_sys.load() - memstats.heap_released
627 consRetained := uint64(consStats.committed - consStats.inStacks - consStats.inWorkBufs - consStats.inPtrScalarBits)
628 if globalRetained != consRetained {
629 print("runtime: global value=", globalRetained, "\n")
630 print("runtime: consistent value=", consRetained, "\n")
631 throw("measures of the retained heap are not equal")
635 // flushmcache flushes the mcache of allp[i].
637 // The world must be stopped.
639 //go:nowritebarrier
640 func flushmcache(i int) {
641 assertWorldStopped()
643 p := allp[i]
644 c := p.mcache
645 if c == nil {
646 return
648 c.releaseAll()
651 // flushallmcaches flushes the mcaches of all Ps.
653 // The world must be stopped.
655 //go:nowritebarrier
656 func flushallmcaches() {
657 assertWorldStopped()
659 for i := 0; i < int(gomaxprocs); i++ {
660 flushmcache(i)
664 // sysMemStat represents a global system statistic that is managed atomically.
666 // This type must structurally be a uint64 so that mstats aligns with MemStats.
667 type sysMemStat uint64
669 // load atomically reads the value of the stat.
671 // Must be nosplit as it is called in runtime initialization, e.g. newosproc0.
672 //go:nosplit
673 func (s *sysMemStat) load() uint64 {
674 return atomic.Load64((*uint64)(s))
677 // add atomically adds the sysMemStat by n.
679 // Must be nosplit as it is called in runtime initialization, e.g. newosproc0.
680 //go:nosplit
681 func (s *sysMemStat) add(n int64) {
682 if s == nil {
683 return
685 val := atomic.Xadd64((*uint64)(s), n)
686 if (n > 0 && int64(val) < n) || (n < 0 && int64(val)+n < n) {
687 print("runtime: val=", val, " n=", n, "\n")
688 throw("sysMemStat overflow")
692 // heapStatsDelta contains deltas of various runtime memory statistics
693 // that need to be updated together in order for them to be kept
694 // consistent with one another.
695 type heapStatsDelta struct {
696 // Memory stats.
697 committed int64 // byte delta of memory committed
698 released int64 // byte delta of released memory generated
699 inHeap int64 // byte delta of memory placed in the heap
700 inStacks int64 // byte delta of memory reserved for stacks
701 inWorkBufs int64 // byte delta of memory reserved for work bufs
702 inPtrScalarBits int64 // byte delta of memory reserved for unrolled GC prog bits
704 // Allocator stats.
705 tinyAllocCount uintptr // number of tiny allocations
706 largeAlloc uintptr // bytes allocated for large objects
707 largeAllocCount uintptr // number of large object allocations
708 smallAllocCount [_NumSizeClasses]uintptr // number of allocs for small objects
709 largeFree uintptr // bytes freed for large objects (>maxSmallSize)
710 largeFreeCount uintptr // number of frees for large objects (>maxSmallSize)
711 smallFreeCount [_NumSizeClasses]uintptr // number of frees for small objects (<=maxSmallSize)
713 // Add a uint32 to ensure this struct is a multiple of 8 bytes in size.
714 // Only necessary on 32-bit platforms.
715 _ [(goarch.PtrSize / 4) % 2]uint32
718 // merge adds in the deltas from b into a.
719 func (a *heapStatsDelta) merge(b *heapStatsDelta) {
720 a.committed += b.committed
721 a.released += b.released
722 a.inHeap += b.inHeap
723 a.inStacks += b.inStacks
724 a.inWorkBufs += b.inWorkBufs
725 a.inPtrScalarBits += b.inPtrScalarBits
727 a.tinyAllocCount += b.tinyAllocCount
728 a.largeAlloc += b.largeAlloc
729 a.largeAllocCount += b.largeAllocCount
730 for i := range b.smallAllocCount {
731 a.smallAllocCount[i] += b.smallAllocCount[i]
733 a.largeFree += b.largeFree
734 a.largeFreeCount += b.largeFreeCount
735 for i := range b.smallFreeCount {
736 a.smallFreeCount[i] += b.smallFreeCount[i]
740 // consistentHeapStats represents a set of various memory statistics
741 // whose updates must be viewed completely to get a consistent
742 // state of the world.
744 // To write updates to memory stats use the acquire and release
745 // methods. To obtain a consistent global snapshot of these statistics,
746 // use read.
747 type consistentHeapStats struct {
748 // stats is a ring buffer of heapStatsDelta values.
749 // Writers always atomically update the delta at index gen.
751 // Readers operate by rotating gen (0 -> 1 -> 2 -> 0 -> ...)
752 // and synchronizing with writers by observing each P's
753 // statsSeq field. If the reader observes a P not writing,
754 // it can be sure that it will pick up the new gen value the
755 // next time it writes.
757 // The reader then takes responsibility by clearing space
758 // in the ring buffer for the next reader to rotate gen to
759 // that space (i.e. it merges in values from index (gen-2) mod 3
760 // to index (gen-1) mod 3, then clears the former).
762 // Note that this means only one reader can be reading at a time.
763 // There is no way for readers to synchronize.
765 // This process is why we need a ring buffer of size 3 instead
766 // of 2: one is for the writers, one contains the most recent
767 // data, and the last one is clear so writers can begin writing
768 // to it the moment gen is updated.
769 stats [3]heapStatsDelta
771 // gen represents the current index into which writers
772 // are writing, and can take on the value of 0, 1, or 2.
773 // This value is updated atomically.
774 gen uint32
776 // noPLock is intended to provide mutual exclusion for updating
777 // stats when no P is available. It does not block other writers
778 // with a P, only other writers without a P and the reader. Because
779 // stats are usually updated when a P is available, contention on
780 // this lock should be minimal.
781 noPLock mutex
784 // acquire returns a heapStatsDelta to be updated. In effect,
785 // it acquires the shard for writing. release must be called
786 // as soon as the relevant deltas are updated.
788 // The returned heapStatsDelta must be updated atomically.
790 // The caller's P must not change between acquire and
791 // release. This also means that the caller should not
792 // acquire a P or release its P in between. A P also must
793 // not acquire a given consistentHeapStats if it hasn't
794 // yet released it.
796 // nosplit because a stack growth in this function could
797 // lead to a stack allocation that could reenter the
798 // function.
800 //go:nosplit
801 func (m *consistentHeapStats) acquire() *heapStatsDelta {
802 if pp := getg().m.p.ptr(); pp != nil {
803 seq := atomic.Xadd(&pp.statsSeq, 1)
804 if seq%2 == 0 {
805 // Should have been incremented to odd.
806 print("runtime: seq=", seq, "\n")
807 throw("bad sequence number")
809 } else {
810 lock(&m.noPLock)
812 gen := atomic.Load(&m.gen) % 3
813 return &m.stats[gen]
816 // release indicates that the writer is done modifying
817 // the delta. The value returned by the corresponding
818 // acquire must no longer be accessed or modified after
819 // release is called.
821 // The caller's P must not change between acquire and
822 // release. This also means that the caller should not
823 // acquire a P or release its P in between.
825 // nosplit because a stack growth in this function could
826 // lead to a stack allocation that causes another acquire
827 // before this operation has completed.
829 //go:nosplit
830 func (m *consistentHeapStats) release() {
831 if pp := getg().m.p.ptr(); pp != nil {
832 seq := atomic.Xadd(&pp.statsSeq, 1)
833 if seq%2 != 0 {
834 // Should have been incremented to even.
835 print("runtime: seq=", seq, "\n")
836 throw("bad sequence number")
838 } else {
839 unlock(&m.noPLock)
843 // unsafeRead aggregates the delta for this shard into out.
845 // Unsafe because it does so without any synchronization. The
846 // world must be stopped.
847 func (m *consistentHeapStats) unsafeRead(out *heapStatsDelta) {
848 assertWorldStopped()
850 for i := range m.stats {
851 out.merge(&m.stats[i])
855 // unsafeClear clears the shard.
857 // Unsafe because the world must be stopped and values should
858 // be donated elsewhere before clearing.
859 func (m *consistentHeapStats) unsafeClear() {
860 assertWorldStopped()
862 for i := range m.stats {
863 m.stats[i] = heapStatsDelta{}
867 // read takes a globally consistent snapshot of m
868 // and puts the aggregated value in out. Even though out is a
869 // heapStatsDelta, the resulting values should be complete and
870 // valid statistic values.
872 // Not safe to call concurrently. The world must be stopped
873 // or metricsSema must be held.
874 func (m *consistentHeapStats) read(out *heapStatsDelta) {
875 // Getting preempted after this point is not safe because
876 // we read allp. We need to make sure a STW can't happen
877 // so it doesn't change out from under us.
878 mp := acquirem()
880 // Get the current generation. We can be confident that this
881 // will not change since read is serialized and is the only
882 // one that modifies currGen.
883 currGen := atomic.Load(&m.gen)
884 prevGen := currGen - 1
885 if currGen == 0 {
886 prevGen = 2
889 // Prevent writers without a P from writing while we update gen.
890 lock(&m.noPLock)
892 // Rotate gen, effectively taking a snapshot of the state of
893 // these statistics at the point of the exchange by moving
894 // writers to the next set of deltas.
896 // This exchange is safe to do because we won't race
897 // with anyone else trying to update this value.
898 atomic.Xchg(&m.gen, (currGen+1)%3)
900 // Allow P-less writers to continue. They'll be writing to the
901 // next generation now.
902 unlock(&m.noPLock)
904 for _, p := range allp {
905 // Spin until there are no more writers.
906 for atomic.Load(&p.statsSeq)%2 != 0 {
910 // At this point we've observed that each sequence
911 // number is even, so any future writers will observe
912 // the new gen value. That means it's safe to read from
913 // the other deltas in the stats buffer.
915 // Perform our responsibilities and free up
916 // stats[prevGen] for the next time we want to take
917 // a snapshot.
918 m.stats[currGen].merge(&m.stats[prevGen])
919 m.stats[prevGen] = heapStatsDelta{}
921 // Finally, copy out the complete delta.
922 *out = m.stats[currGen]
924 releasem(mp)