2018-03-01 Paul Thomas <pault@gcc.gnu.org>
[official-gcc.git] / libgo / go / runtime / profbuf.go
blobf40881aed5150db26d3556d135337f544d4469cb
1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
5 package runtime
7 import (
8 "runtime/internal/atomic"
9 "unsafe"
12 // A profBuf is a lock-free buffer for profiling events,
13 // safe for concurrent use by one reader and one writer.
14 // The writer may be a signal handler running without a user g.
15 // The reader is assumed to be a user g.
17 // Each logged event corresponds to a fixed size header, a list of
18 // uintptrs (typically a stack), and exactly one unsafe.Pointer tag.
19 // The header and uintptrs are stored in the circular buffer data and the
20 // tag is stored in a circular buffer tags, running in parallel.
21 // In the circular buffer data, each event takes 2+hdrsize+len(stk)
22 // words: the value 2+hdrsize+len(stk), then the time of the event, then
23 // hdrsize words giving the fixed-size header, and then len(stk) words
24 // for the stack.
26 // The current effective offsets into the tags and data circular buffers
27 // for reading and writing are stored in the high 30 and low 32 bits of r and w.
28 // The bottom bits of the high 32 are additional flag bits in w, unused in r.
29 // "Effective" offsets means the total number of reads or writes, mod 2^length.
30 // The offset in the buffer is the effective offset mod the length of the buffer.
31 // To make wraparound mod 2^length match wraparound mod length of the buffer,
32 // the length of the buffer must be a power of two.
34 // If the reader catches up to the writer, a flag passed to read controls
35 // whether the read blocks until more data is available. A read returns a
36 // pointer to the buffer data itself; the caller is assumed to be done with
37 // that data at the next read. The read offset rNext tracks the next offset to
38 // be returned by read. By definition, r ≤ rNext ≤ w (before wraparound),
39 // and rNext is only used by the reader, so it can be accessed without atomics.
41 // If the writer gets ahead of the reader, so that the buffer fills,
42 // future writes are discarded and replaced in the output stream by an
43 // overflow entry, which has size 2+hdrsize+1, time set to the time of
44 // the first discarded write, a header of all zeroed words, and a "stack"
45 // containing one word, the number of discarded writes.
47 // Between the time the buffer fills and the buffer becomes empty enough
48 // to hold more data, the overflow entry is stored as a pending overflow
49 // entry in the fields overflow and overflowTime. The pending overflow
50 // entry can be turned into a real record by either the writer or the
51 // reader. If the writer is called to write a new record and finds that
52 // the output buffer has room for both the pending overflow entry and the
53 // new record, the writer emits the pending overflow entry and the new
54 // record into the buffer. If the reader is called to read data and finds
55 // that the output buffer is empty but that there is a pending overflow
56 // entry, the reader will return a synthesized record for the pending
57 // overflow entry.
59 // Only the writer can create or add to a pending overflow entry, but
60 // either the reader or the writer can clear the pending overflow entry.
61 // A pending overflow entry is indicated by the low 32 bits of 'overflow'
62 // holding the number of discarded writes, and overflowTime holding the
63 // time of the first discarded write. The high 32 bits of 'overflow'
64 // increment each time the low 32 bits transition from zero to non-zero
65 // or vice versa. This sequence number avoids ABA problems in the use of
66 // compare-and-swap to coordinate between reader and writer.
67 // The overflowTime is only written when the low 32 bits of overflow are
68 // zero, that is, only when there is no pending overflow entry, in
69 // preparation for creating a new one. The reader can therefore fetch and
70 // clear the entry atomically using
72 // for {
73 // overflow = load(&b.overflow)
74 // if uint32(overflow) == 0 {
75 // // no pending entry
76 // break
77 // }
78 // time = load(&b.overflowTime)
79 // if cas(&b.overflow, overflow, ((overflow>>32)+1)<<32) {
80 // // pending entry cleared
81 // break
82 // }
83 // }
84 // if uint32(overflow) > 0 {
85 // emit entry for uint32(overflow), time
86 // }
88 type profBuf struct {
89 // accessed atomically
90 r, w profAtomic
91 overflow uint64
92 overflowTime uint64
93 eof uint32
95 // immutable (excluding slice content)
96 hdrsize uintptr
97 data []uint64
98 tags []unsafe.Pointer
100 // owned by reader
101 rNext profIndex
102 overflowBuf []uint64 // for use by reader to return overflow record
103 wait note
106 // A profAtomic is the atomically-accessed word holding a profIndex.
107 type profAtomic uint64
109 // A profIndex is the packet tag and data counts and flags bits, described above.
110 type profIndex uint64
112 const (
113 profReaderSleeping profIndex = 1 << 32 // reader is sleeping and must be woken up
114 profWriteExtra profIndex = 1 << 33 // overflow or eof waiting
117 func (x *profAtomic) load() profIndex {
118 return profIndex(atomic.Load64((*uint64)(x)))
121 func (x *profAtomic) store(new profIndex) {
122 atomic.Store64((*uint64)(x), uint64(new))
125 func (x *profAtomic) cas(old, new profIndex) bool {
126 return atomic.Cas64((*uint64)(x), uint64(old), uint64(new))
129 func (x profIndex) dataCount() uint32 {
130 return uint32(x)
133 func (x profIndex) tagCount() uint32 {
134 return uint32(x >> 34)
137 // countSub subtracts two counts obtained from profIndex.dataCount or profIndex.tagCount,
138 // assuming that they are no more than 2^29 apart (guaranteed since they are never more than
139 // len(data) or len(tags) apart, respectively).
140 // tagCount wraps at 2^30, while dataCount wraps at 2^32.
141 // This function works for both.
142 func countSub(x, y uint32) int {
143 // x-y is 32-bit signed or 30-bit signed; sign-extend to 32 bits and convert to int.
144 return int(int32(x-y) << 2 >> 2)
147 // addCountsAndClearFlags returns the packed form of "x + (data, tag) - all flags".
148 func (x profIndex) addCountsAndClearFlags(data, tag int) profIndex {
149 return profIndex((uint64(x)>>34+uint64(uint32(tag)<<2>>2))<<34 | uint64(uint32(x)+uint32(data)))
152 // hasOverflow reports whether b has any overflow records pending.
153 func (b *profBuf) hasOverflow() bool {
154 return uint32(atomic.Load64(&b.overflow)) > 0
157 // takeOverflow consumes the pending overflow records, returning the overflow count
158 // and the time of the first overflow.
159 // When called by the reader, it is racing against incrementOverflow.
160 func (b *profBuf) takeOverflow() (count uint32, time uint64) {
161 overflow := atomic.Load64(&b.overflow)
162 time = atomic.Load64(&b.overflowTime)
163 for {
164 count = uint32(overflow)
165 if count == 0 {
166 time = 0
167 break
169 // Increment generation, clear overflow count in low bits.
170 if atomic.Cas64(&b.overflow, overflow, ((overflow>>32)+1)<<32) {
171 break
173 overflow = atomic.Load64(&b.overflow)
174 time = atomic.Load64(&b.overflowTime)
176 return uint32(overflow), time
179 // incrementOverflow records a single overflow at time now.
180 // It is racing against a possible takeOverflow in the reader.
181 func (b *profBuf) incrementOverflow(now int64) {
182 for {
183 overflow := atomic.Load64(&b.overflow)
185 // Once we see b.overflow reach 0, it's stable: no one else is changing it underfoot.
186 // We need to set overflowTime if we're incrementing b.overflow from 0.
187 if uint32(overflow) == 0 {
188 // Store overflowTime first so it's always available when overflow != 0.
189 atomic.Store64(&b.overflowTime, uint64(now))
190 atomic.Store64(&b.overflow, (((overflow>>32)+1)<<32)+1)
191 break
193 // Otherwise we're racing to increment against reader
194 // who wants to set b.overflow to 0.
195 // Out of paranoia, leave 2³²-1 a sticky overflow value,
196 // to avoid wrapping around. Extremely unlikely.
197 if int32(overflow) == -1 {
198 break
200 if atomic.Cas64(&b.overflow, overflow, overflow+1) {
201 break
206 // newProfBuf returns a new profiling buffer with room for
207 // a header of hdrsize words and a buffer of at least bufwords words.
208 func newProfBuf(hdrsize, bufwords, tags int) *profBuf {
209 if min := 2 + hdrsize + 1; bufwords < min {
210 bufwords = min
213 // Buffer sizes must be power of two, so that we don't have to
214 // worry about uint32 wraparound changing the effective position
215 // within the buffers. We store 30 bits of count; limiting to 28
216 // gives us some room for intermediate calculations.
217 if bufwords >= 1<<28 || tags >= 1<<28 {
218 throw("newProfBuf: buffer too large")
220 var i int
221 for i = 1; i < bufwords; i <<= 1 {
223 bufwords = i
224 for i = 1; i < tags; i <<= 1 {
226 tags = i
228 b := new(profBuf)
229 b.hdrsize = uintptr(hdrsize)
230 b.data = make([]uint64, bufwords)
231 b.tags = make([]unsafe.Pointer, tags)
232 b.overflowBuf = make([]uint64, 2+b.hdrsize+1)
233 return b
236 // canWriteRecord reports whether the buffer has room
237 // for a single contiguous record with a stack of length nstk.
238 func (b *profBuf) canWriteRecord(nstk int) bool {
239 br := b.r.load()
240 bw := b.w.load()
242 // room for tag?
243 if countSub(br.tagCount(), bw.tagCount())+len(b.tags) < 1 {
244 return false
247 // room for data?
248 nd := countSub(br.dataCount(), bw.dataCount()) + len(b.data)
249 want := 2 + int(b.hdrsize) + nstk
250 i := int(bw.dataCount() % uint32(len(b.data)))
251 if i+want > len(b.data) {
252 // Can't fit in trailing fragment of slice.
253 // Skip over that and start over at beginning of slice.
254 nd -= len(b.data) - i
256 return nd >= want
259 // canWriteTwoRecords reports whether the buffer has room
260 // for two records with stack lengths nstk1, nstk2, in that order.
261 // Each record must be contiguous on its own, but the two
262 // records need not be contiguous (one can be at the end of the buffer
263 // and the other can wrap around and start at the beginning of the buffer).
264 func (b *profBuf) canWriteTwoRecords(nstk1, nstk2 int) bool {
265 br := b.r.load()
266 bw := b.w.load()
268 // room for tag?
269 if countSub(br.tagCount(), bw.tagCount())+len(b.tags) < 2 {
270 return false
273 // room for data?
274 nd := countSub(br.dataCount(), bw.dataCount()) + len(b.data)
276 // first record
277 want := 2 + int(b.hdrsize) + nstk1
278 i := int(bw.dataCount() % uint32(len(b.data)))
279 if i+want > len(b.data) {
280 // Can't fit in trailing fragment of slice.
281 // Skip over that and start over at beginning of slice.
282 nd -= len(b.data) - i
283 i = 0
285 i += want
286 nd -= want
288 // second record
289 want = 2 + int(b.hdrsize) + nstk2
290 if i+want > len(b.data) {
291 // Can't fit in trailing fragment of slice.
292 // Skip over that and start over at beginning of slice.
293 nd -= len(b.data) - i
294 i = 0
296 return nd >= want
299 // write writes an entry to the profiling buffer b.
300 // The entry begins with a fixed hdr, which must have
301 // length b.hdrsize, followed by a variable-sized stack
302 // and a single tag pointer *tagPtr (or nil if tagPtr is nil).
303 // No write barriers allowed because this might be called from a signal handler.
304 func (b *profBuf) write(tagPtr *unsafe.Pointer, now int64, hdr []uint64, stk []uintptr) {
305 if b == nil {
306 return
308 if len(hdr) > int(b.hdrsize) {
309 throw("misuse of profBuf.write")
312 if hasOverflow := b.hasOverflow(); hasOverflow && b.canWriteTwoRecords(1, len(stk)) {
313 // Room for both an overflow record and the one being written.
314 // Write the overflow record if the reader hasn't gotten to it yet.
315 // Only racing against reader, not other writers.
316 count, time := b.takeOverflow()
317 if count > 0 {
318 var stk [1]uintptr
319 stk[0] = uintptr(count)
320 b.write(nil, int64(time), nil, stk[:])
322 } else if hasOverflow || !b.canWriteRecord(len(stk)) {
323 // Pending overflow without room to write overflow and new records
324 // or no overflow but also no room for new record.
325 b.incrementOverflow(now)
326 b.wakeupExtra()
327 return
330 // There's room: write the record.
331 br := b.r.load()
332 bw := b.w.load()
334 // Profiling tag
336 // The tag is a pointer, but we can't run a write barrier here.
337 // We have interrupted the OS-level execution of gp, but the
338 // runtime still sees gp as executing. In effect, we are running
339 // in place of the real gp. Since gp is the only goroutine that
340 // can overwrite gp.labels, the value of gp.labels is stable during
341 // this signal handler: it will still be reachable from gp when
342 // we finish executing. If a GC is in progress right now, it must
343 // keep gp.labels alive, because gp.labels is reachable from gp.
344 // If gp were to overwrite gp.labels, the deletion barrier would
345 // still shade that pointer, which would preserve it for the
346 // in-progress GC, so all is well. Any future GC will see the
347 // value we copied when scanning b.tags (heap-allocated).
348 // We arrange that the store here is always overwriting a nil,
349 // so there is no need for a deletion barrier on b.tags[wt].
350 wt := int(bw.tagCount() % uint32(len(b.tags)))
351 if tagPtr != nil {
352 *(*uintptr)(unsafe.Pointer(&b.tags[wt])) = uintptr(unsafe.Pointer(*tagPtr))
355 // Main record.
356 // It has to fit in a contiguous section of the slice, so if it doesn't fit at the end,
357 // leave a rewind marker (0) and start over at the beginning of the slice.
358 wd := int(bw.dataCount() % uint32(len(b.data)))
359 nd := countSub(br.dataCount(), bw.dataCount()) + len(b.data)
360 skip := 0
361 if wd+2+int(b.hdrsize)+len(stk) > len(b.data) {
362 b.data[wd] = 0
363 skip = len(b.data) - wd
364 nd -= skip
365 wd = 0
367 data := b.data[wd:]
368 data[0] = uint64(2 + b.hdrsize + uintptr(len(stk))) // length
369 data[1] = uint64(now) // time stamp
370 // header, zero-padded
371 i := uintptr(copy(data[2:2+b.hdrsize], hdr))
372 for ; i < b.hdrsize; i++ {
373 data[2+i] = 0
375 for i, pc := range stk {
376 data[2+b.hdrsize+uintptr(i)] = uint64(pc)
379 for {
380 // Commit write.
381 // Racing with reader setting flag bits in b.w, to avoid lost wakeups.
382 old := b.w.load()
383 new := old.addCountsAndClearFlags(skip+2+len(stk)+int(b.hdrsize), 1)
384 if !b.w.cas(old, new) {
385 continue
387 // If there was a reader, wake it up.
388 if old&profReaderSleeping != 0 {
389 notewakeup(&b.wait)
391 break
395 // close signals that there will be no more writes on the buffer.
396 // Once all the data has been read from the buffer, reads will return eof=true.
397 func (b *profBuf) close() {
398 if atomic.Load(&b.eof) > 0 {
399 throw("runtime: profBuf already closed")
401 atomic.Store(&b.eof, 1)
402 b.wakeupExtra()
405 // wakeupExtra must be called after setting one of the "extra"
406 // atomic fields b.overflow or b.eof.
407 // It records the change in b.w and wakes up the reader if needed.
408 func (b *profBuf) wakeupExtra() {
409 for {
410 old := b.w.load()
411 new := old | profWriteExtra
412 if !b.w.cas(old, new) {
413 continue
415 if old&profReaderSleeping != 0 {
416 notewakeup(&b.wait)
418 break
422 // profBufReadMode specifies whether to block when no data is available to read.
423 type profBufReadMode int
425 const (
426 profBufBlocking profBufReadMode = iota
427 profBufNonBlocking
430 var overflowTag [1]unsafe.Pointer // always nil
432 func (b *profBuf) read(mode profBufReadMode) (data []uint64, tags []unsafe.Pointer, eof bool) {
433 if b == nil {
434 return nil, nil, true
437 br := b.rNext
439 // Commit previous read, returning that part of the ring to the writer.
440 // First clear tags that have now been read, both to avoid holding
441 // up the memory they point at for longer than necessary
442 // and so that b.write can assume it is always overwriting
443 // nil tag entries (see comment in b.write).
444 rPrev := b.r.load()
445 if rPrev != br {
446 ntag := countSub(br.tagCount(), rPrev.tagCount())
447 ti := int(rPrev.tagCount() % uint32(len(b.tags)))
448 for i := 0; i < ntag; i++ {
449 b.tags[ti] = nil
450 if ti++; ti == len(b.tags) {
451 ti = 0
454 b.r.store(br)
457 Read:
458 bw := b.w.load()
459 numData := countSub(bw.dataCount(), br.dataCount())
460 if numData == 0 {
461 if b.hasOverflow() {
462 // No data to read, but there is overflow to report.
463 // Racing with writer flushing b.overflow into a real record.
464 count, time := b.takeOverflow()
465 if count == 0 {
466 // Lost the race, go around again.
467 goto Read
469 // Won the race, report overflow.
470 dst := b.overflowBuf
471 dst[0] = uint64(2 + b.hdrsize + 1)
472 dst[1] = uint64(time)
473 for i := uintptr(0); i < b.hdrsize; i++ {
474 dst[2+i] = 0
476 dst[2+b.hdrsize] = uint64(count)
477 return dst[:2+b.hdrsize+1], overflowTag[:1], false
479 if atomic.Load(&b.eof) > 0 {
480 // No data, no overflow, EOF set: done.
481 return nil, nil, true
483 if bw&profWriteExtra != 0 {
484 // Writer claims to have published extra information (overflow or eof).
485 // Attempt to clear notification and then check again.
486 // If we fail to clear the notification it means b.w changed,
487 // so we still need to check again.
488 b.w.cas(bw, bw&^profWriteExtra)
489 goto Read
492 // Nothing to read right now.
493 // Return or sleep according to mode.
494 if mode == profBufNonBlocking {
495 return nil, nil, false
497 if !b.w.cas(bw, bw|profReaderSleeping) {
498 goto Read
500 // Committed to sleeping.
501 notetsleepg(&b.wait, -1)
502 noteclear(&b.wait)
503 goto Read
505 data = b.data[br.dataCount()%uint32(len(b.data)):]
506 if len(data) > numData {
507 data = data[:numData]
508 } else {
509 numData -= len(data) // available in case of wraparound
511 skip := 0
512 if data[0] == 0 {
513 // Wraparound record. Go back to the beginning of the ring.
514 skip = len(data)
515 data = b.data
516 if len(data) > numData {
517 data = data[:numData]
521 ntag := countSub(bw.tagCount(), br.tagCount())
522 if ntag == 0 {
523 throw("runtime: malformed profBuf buffer - tag and data out of sync")
525 tags = b.tags[br.tagCount()%uint32(len(b.tags)):]
526 if len(tags) > ntag {
527 tags = tags[:ntag]
530 // Count out whole data records until either data or tags is done.
531 // They are always in sync in the buffer, but due to an end-of-slice
532 // wraparound we might need to stop early and return the rest
533 // in the next call.
534 di := 0
535 ti := 0
536 for di < len(data) && data[di] != 0 && ti < len(tags) {
537 if uintptr(di)+uintptr(data[di]) > uintptr(len(data)) {
538 throw("runtime: malformed profBuf buffer - invalid size")
540 di += int(data[di])
541 ti++
544 // Remember how much we returned, to commit read on next call.
545 b.rNext = br.addCountsAndClearFlags(skip+di, ti)
547 if raceenabled {
548 // Match racereleasemerge in runtime_setProfLabel,
549 // so that the setting of the labels in runtime_setProfLabel
550 // is treated as happening before any use of the labels
551 // by our caller. The synchronization on labelSync itself is a fiction
552 // for the race detector. The actual synchronization is handled
553 // by the fact that the signal handler only reads from the current
554 // goroutine and uses atomics to write the updated queue indices,
555 // and then the read-out from the signal handler buffer uses
556 // atomics to read those queue indices.
557 raceacquire(unsafe.Pointer(&labelSync))
560 return data[:di], tags[:ti], false