Merge mozilla-central to autoland. a=merge CLOSED TREE
[gecko.git] / js / public / ProfilingStack.h
blobb9b96c27dc1bb49693dbe8658cf95b9424ddf1b4
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*-
2 * vim: set ts=8 sts=2 et sw=2 tw=80:
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
7 #ifndef js_ProfilingStack_h
8 #define js_ProfilingStack_h
10 #include "mozilla/Atomics.h"
12 #include <stdint.h>
14 #include "jstypes.h"
16 #include "js/ProfilingCategory.h"
17 #include "js/TypeDecls.h"
19 class JS_PUBLIC_API JSTracer;
20 class JS_PUBLIC_API ProfilingStack;
22 // This file defines the classes ProfilingStack and ProfilingStackFrame.
23 // The ProfilingStack manages an array of ProfilingStackFrames.
24 // It keeps track of the "label stack" and the JS interpreter stack.
25 // The two stack types are interleaved.
27 // Usage:
29 // ProfilingStack* profilingStack = ...;
31 // // For label frames:
32 // profilingStack->pushLabelFrame(...);
33 // // Execute some code. When finished, pop the frame:
34 // profilingStack->pop();
36 // // For JS stack frames:
37 // profilingStack->pushJSFrame(...);
38 // // Execute some code. When finished, pop the frame:
39 // profilingStack->pop();
42 // Concurrency considerations
44 // A thread's profiling stack (and the frames inside it) is only modified by
45 // that thread. However, the profiling stack can be *read* by a different
46 // thread, the sampler thread: Whenever the profiler wants to sample a given
47 // thread A, the following happens:
48 // (1) Thread A is suspended.
49 // (2) The sampler thread (thread S) reads the ProfilingStack of thread A,
50 // including all ProfilingStackFrames that are currently in that stack
51 // (profilingStack->frames[0..profilingStack->stackSize()]).
52 // (3) Thread A is resumed.
54 // Thread suspension is achieved using platform-specific APIs; refer to each
55 // platform's Sampler::SuspendAndSampleAndResumeThread implementation in
56 // platform-*.cpp for details.
58 // When the thread is suspended, the values in profilingStack->stackPointer and
59 // in the stack frame range
60 // profilingStack->frames[0..profilingStack->stackPointer] need to be in a
61 // consistent state, so that thread S does not read partially- constructed stack
62 // frames. More specifically, we have two requirements:
63 // (1) When adding a new frame at the top of the stack, its ProfilingStackFrame
64 // data needs to be put in place *before* the stackPointer is incremented,
65 // and the compiler + CPU need to know that this order matters.
66 // (2) When popping an frame from the stack and then preparing the
67 // ProfilingStackFrame data for the next frame that is about to be pushed,
68 // the decrement of the stackPointer in pop() needs to happen *before* the
69 // ProfilingStackFrame for the new frame is being popuplated, and the
70 // compiler + CPU need to know that this order matters.
72 // We can express the relevance of these orderings in multiple ways.
73 // Option A is to make stackPointer an atomic with SequentiallyConsistent
74 // memory ordering. This would ensure that no writes in thread A would be
75 // reordered across any writes to stackPointer, which satisfies requirements
76 // (1) and (2) at the same time. Option A is the simplest.
77 // Option B is to use ReleaseAcquire memory ordering both for writes to
78 // stackPointer *and* for writes to ProfilingStackFrame fields. Release-stores
79 // ensure that all writes that happened *before this write in program order* are
80 // not reordered to happen after this write. ReleaseAcquire ordering places no
81 // requirements on the ordering of writes that happen *after* this write in
82 // program order.
83 // Using release-stores for writes to stackPointer expresses requirement (1),
84 // and using release-stores for writes to the ProfilingStackFrame fields
85 // expresses requirement (2).
87 // Option B is more complicated than option A, but has much better performance
88 // on x86/64: In a microbenchmark run on a Macbook Pro from 2017, switching
89 // from option A to option B reduced the overhead of pushing+popping a
90 // ProfilingStackFrame by 10 nanoseconds.
91 // On x86/64, release-stores require no explicit hardware barriers or lock
92 // instructions.
93 // On ARM/64, option B may be slower than option A, because the compiler will
94 // generate hardware barriers for every single release-store instead of just
95 // for the writes to stackPointer. However, the actual performance impact of
96 // this has not yet been measured on ARM, so we're currently using option B
97 // everywhere. This is something that we may want to change in the future once
98 // we've done measurements.
100 namespace js {
102 // A call stack can be specified to the JS engine such that all JS entry/exits
103 // to functions push/pop a stack frame to/from the specified stack.
105 // For more detailed information, see vm/GeckoProfiler.h.
107 class ProfilingStackFrame {
108 // A ProfilingStackFrame represents either a label frame or a JS frame.
110 // WARNING WARNING WARNING
112 // All the fields below are Atomic<...,ReleaseAcquire>. This is needed so
113 // that writes to these fields are release-writes, which ensures that
114 // earlier writes in this thread don't get reordered after the writes to
115 // these fields. In particular, the decrement of the stack pointer in
116 // ProfilingStack::pop() is a write that *must* happen before the values in
117 // this ProfilingStackFrame are changed. Otherwise, the sampler thread might
118 // see an inconsistent state where the stack pointer still points to a
119 // ProfilingStackFrame which has already been popped off the stack and whose
120 // fields have now been partially repopulated with new values.
121 // See the "Concurrency considerations" paragraph at the top of this file
122 // for more details.
124 // Descriptive label for this stack frame. Must be a static string! Can be
125 // an empty string, but not a null pointer.
126 mozilla::Atomic<const char*, mozilla::ReleaseAcquire> label_;
128 // An additional descriptive string of this frame which is combined with
129 // |label_| in profiler output. Need not be (and usually isn't) static. Can
130 // be null.
131 mozilla::Atomic<const char*, mozilla::ReleaseAcquire> dynamicString_;
133 // Stack pointer for non-JS stack frames, the script pointer otherwise.
134 mozilla::Atomic<void*, mozilla::ReleaseAcquire> spOrScript;
136 // ID of the JS Realm for JS stack frames.
137 // Must not be used on non-JS frames; it'll contain either the default 0,
138 // or a leftover value from a previous JS stack frame that was using this
139 // ProfilingStackFrame object.
140 mozilla::Atomic<uint64_t, mozilla::ReleaseAcquire> realmID_;
142 // The bytecode offset for JS stack frames.
143 // Must not be used on non-JS frames; it'll contain either the default 0,
144 // or a leftover value from a previous JS stack frame that was using this
145 // ProfilingStackFrame object.
146 mozilla::Atomic<int32_t, mozilla::ReleaseAcquire> pcOffsetIfJS_;
148 // Bits 0...8 hold the Flags. Bits 9...31 hold the category pair.
149 mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> flagsAndCategoryPair_;
151 static int32_t pcToOffset(JSScript* aScript, jsbytecode* aPc);
153 public:
154 ProfilingStackFrame() = default;
155 ProfilingStackFrame& operator=(const ProfilingStackFrame& other) {
156 label_ = other.label();
157 dynamicString_ = other.dynamicString();
158 void* spScript = other.spOrScript;
159 spOrScript = spScript;
160 int32_t offsetIfJS = other.pcOffsetIfJS_;
161 pcOffsetIfJS_ = offsetIfJS;
162 uint64_t realmID = other.realmID_;
163 realmID_ = realmID;
164 uint32_t flagsAndCategory = other.flagsAndCategoryPair_;
165 flagsAndCategoryPair_ = flagsAndCategory;
166 return *this;
169 // Reserve up to 16 bits for flags, and 16 for category pair.
170 enum class Flags : uint32_t {
171 // The first three flags describe the kind of the frame and are
172 // mutually exclusive. (We still give them individual bits for
173 // simplicity.)
175 // A regular label frame. These usually come from AutoProfilerLabel.
176 IS_LABEL_FRAME = 1 << 0,
178 // A special frame indicating the start of a run of JS profiling stack
179 // frames. IS_SP_MARKER_FRAME frames are ignored, except for the sp
180 // field. These frames are needed to get correct ordering between JS
181 // and LABEL frames because JS frames don't carry sp information.
182 // SP is short for "stack pointer".
183 IS_SP_MARKER_FRAME = 1 << 1,
185 // A JS frame.
186 IS_JS_FRAME = 1 << 2,
188 // An interpreter JS frame that has OSR-ed into baseline. IS_JS_FRAME
189 // frames can have this flag set and unset during their lifetime.
190 // JS_OSR frames are ignored.
191 JS_OSR = 1 << 3,
193 // The next three are mutually exclusive.
194 // By default, for profiling stack frames that have both a label and a
195 // dynamic string, the two strings are combined into one string of the
196 // form "<label> <dynamicString>" during JSON serialization. The
197 // following flags can be used to change this preset.
198 STRING_TEMPLATE_METHOD = 1 << 4, // "<label>.<dynamicString>"
199 STRING_TEMPLATE_GETTER = 1 << 5, // "get <label>.<dynamicString>"
200 STRING_TEMPLATE_SETTER = 1 << 6, // "set <label>.<dynamicString>"
202 // If set, causes this stack frame to be marked as "relevantForJS" in
203 // the profile JSON, which will make it show up in the "JS only" call
204 // tree view.
205 RELEVANT_FOR_JS = 1 << 7,
207 // If set, causes the label on this ProfilingStackFrame to be ignored
208 // and to be replaced by the subcategory's label.
209 LABEL_DETERMINED_BY_CATEGORY_PAIR = 1 << 8,
211 // Frame dynamic string does not contain user data.
212 NONSENSITIVE = 1 << 9,
214 // A JS Baseline Interpreter frame.
215 IS_BLINTERP_FRAME = 1 << 10,
217 FLAGS_BITCOUNT = 16,
218 FLAGS_MASK = (1 << FLAGS_BITCOUNT) - 1
221 static_assert(
222 uint32_t(JS::ProfilingCategoryPair::LAST) <=
223 (UINT32_MAX >> uint32_t(Flags::FLAGS_BITCOUNT)),
224 "Too many category pairs to fit into u32 with together with the "
225 "reserved bits for the flags");
227 bool isLabelFrame() const {
228 return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_LABEL_FRAME);
231 bool isNonsensitive() const {
232 return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::NONSENSITIVE);
235 bool isSpMarkerFrame() const {
236 return uint32_t(flagsAndCategoryPair_) &
237 uint32_t(Flags::IS_SP_MARKER_FRAME);
240 bool isJsFrame() const {
241 return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_JS_FRAME);
244 bool isJsBlinterpFrame() const {
245 return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::IS_BLINTERP_FRAME);
248 bool isOSRFrame() const {
249 return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::JS_OSR);
252 void setIsOSRFrame(bool isOSR) {
253 if (isOSR) {
254 flagsAndCategoryPair_ =
255 uint32_t(flagsAndCategoryPair_) | uint32_t(Flags::JS_OSR);
256 } else {
257 flagsAndCategoryPair_ =
258 uint32_t(flagsAndCategoryPair_) & ~uint32_t(Flags::JS_OSR);
262 void setLabelCategory(JS::ProfilingCategoryPair aCategoryPair) {
263 MOZ_ASSERT(isLabelFrame());
264 flagsAndCategoryPair_ =
265 (uint32_t(aCategoryPair) << uint32_t(Flags::FLAGS_BITCOUNT)) | flags();
268 const char* label() const {
269 uint32_t flagsAndCategoryPair = flagsAndCategoryPair_;
270 if (flagsAndCategoryPair &
271 uint32_t(Flags::LABEL_DETERMINED_BY_CATEGORY_PAIR)) {
272 auto categoryPair = JS::ProfilingCategoryPair(
273 flagsAndCategoryPair >> uint32_t(Flags::FLAGS_BITCOUNT));
274 return JS::GetProfilingCategoryPairInfo(categoryPair).mLabel;
276 return label_;
279 const char* dynamicString() const { return dynamicString_; }
281 void initLabelFrame(const char* aLabel, const char* aDynamicString, void* sp,
282 JS::ProfilingCategoryPair aCategoryPair,
283 uint32_t aFlags) {
284 label_ = aLabel;
285 dynamicString_ = aDynamicString;
286 spOrScript = sp;
287 // pcOffsetIfJS_ is not set and must not be used on label frames.
288 flagsAndCategoryPair_ =
289 uint32_t(Flags::IS_LABEL_FRAME) |
290 (uint32_t(aCategoryPair) << uint32_t(Flags::FLAGS_BITCOUNT)) | aFlags;
291 MOZ_ASSERT(isLabelFrame());
294 void initSpMarkerFrame(void* sp) {
295 label_ = "";
296 dynamicString_ = nullptr;
297 spOrScript = sp;
298 // pcOffsetIfJS_ is not set and must not be used on sp marker frames.
299 flagsAndCategoryPair_ = uint32_t(Flags::IS_SP_MARKER_FRAME) |
300 (uint32_t(JS::ProfilingCategoryPair::OTHER)
301 << uint32_t(Flags::FLAGS_BITCOUNT));
302 MOZ_ASSERT(isSpMarkerFrame());
305 template <JS::ProfilingCategoryPair Category, uint32_t ExtraFlags = 0>
306 void initJsFrame(const char* aLabel, const char* aDynamicString,
307 JSScript* aScript, jsbytecode* aPc, uint64_t aRealmID) {
308 label_ = aLabel;
309 dynamicString_ = aDynamicString;
310 spOrScript = aScript;
311 pcOffsetIfJS_ = pcToOffset(aScript, aPc);
312 realmID_ = aRealmID;
313 flagsAndCategoryPair_ =
314 (uint32_t(Category) << uint32_t(Flags::FLAGS_BITCOUNT)) |
315 uint32_t(Flags::IS_JS_FRAME) | ExtraFlags;
316 MOZ_ASSERT(isJsFrame());
319 uint32_t flags() const {
320 return uint32_t(flagsAndCategoryPair_) & uint32_t(Flags::FLAGS_MASK);
323 JS::ProfilingCategoryPair categoryPair() const {
324 return JS::ProfilingCategoryPair(flagsAndCategoryPair_ >>
325 uint32_t(Flags::FLAGS_BITCOUNT));
328 uint64_t realmID() const { return realmID_; }
330 void* stackAddress() const {
331 MOZ_ASSERT(!isJsFrame());
332 return spOrScript;
335 JS_PUBLIC_API JSScript* script() const;
337 JS_PUBLIC_API JSFunction* function() const;
339 // Note that the pointer returned might be invalid.
340 JSScript* rawScript() const {
341 MOZ_ASSERT(isJsFrame());
342 void* script = spOrScript;
343 return static_cast<JSScript*>(script);
346 // We can't know the layout of JSScript, so look in vm/GeckoProfiler.cpp.
347 JS_PUBLIC_API jsbytecode* pc() const;
348 void setPC(jsbytecode* pc);
350 void trace(JSTracer* trc);
352 // The offset of a pc into a script's code can actually be 0, so to
353 // signify a nullptr pc, use a -1 index. This is checked against in
354 // pc() and setPC() to set/get the right pc.
355 static const int32_t NullPCOffset = -1;
358 JS_PUBLIC_API void SetContextProfilingStack(JSContext* cx,
359 ProfilingStack* profilingStack);
361 // GetContextProfilingStack also exists, but it's defined in RootingAPI.h.
363 JS_PUBLIC_API void EnableContextProfilingStack(JSContext* cx, bool enabled);
365 JS_PUBLIC_API void RegisterContextProfilingEventMarker(JSContext* cx,
366 void (*fn)(const char*,
367 const char*));
369 } // namespace js
371 namespace JS {
373 typedef ProfilingStack* (*RegisterThreadCallback)(const char* threadName,
374 void* stackBase);
376 typedef void (*UnregisterThreadCallback)();
378 // regiserThread and unregisterThread callbacks are functions which are called
379 // by other threads without any locking mechanism.
380 JS_PUBLIC_API void SetProfilingThreadCallbacks(
381 RegisterThreadCallback registerThread,
382 UnregisterThreadCallback unregisterThread);
384 } // namespace JS
386 // Each thread has its own ProfilingStack. That thread modifies the
387 // ProfilingStack, pushing and popping elements as necessary.
389 // The ProfilingStack is also read periodically by the profiler's sampler
390 // thread. This happens only when the thread that owns the ProfilingStack is
391 // suspended. So there are no genuine parallel accesses.
393 // However, it is possible for pushing/popping to be interrupted by a periodic
394 // sample. Because of this, we need pushing/popping to be effectively atomic.
396 // - When pushing a new frame, we increment the stack pointer -- making the new
397 // frame visible to the sampler thread -- only after the new frame has been
398 // fully written. The stack pointer is Atomic<uint32_t,ReleaseAcquire>, so
399 // the increment is a release-store, which ensures that this store is not
400 // reordered before the writes of the frame.
402 // - When popping an old frame, the only operation is the decrementing of the
403 // stack pointer, which is obviously atomic.
405 class JS_PUBLIC_API ProfilingStack final {
406 public:
407 ProfilingStack() = default;
409 ~ProfilingStack();
411 void pushLabelFrame(const char* label, const char* dynamicString, void* sp,
412 JS::ProfilingCategoryPair categoryPair,
413 uint32_t flags = 0) {
414 // This thread is the only one that ever changes the value of
415 // stackPointer.
416 // Store the value of the atomic in a non-atomic local variable so that
417 // the compiler won't generate two separate loads from the atomic for
418 // the size check and the frames[] array indexing operation.
419 uint32_t stackPointerVal = stackPointer;
421 if (MOZ_UNLIKELY(stackPointerVal >= capacity)) {
422 ensureCapacitySlow();
424 frames[stackPointerVal].initLabelFrame(label, dynamicString, sp,
425 categoryPair, flags);
427 // This must happen at the end! The compiler will not reorder this
428 // update because stackPointer is Atomic<..., ReleaseAcquire>, so any
429 // the writes above will not be reordered below the stackPointer store.
430 // Do the read and the write as two separate statements, in order to
431 // make it clear that we don't need an atomic increment, which would be
432 // more expensive on x86 than the separate operations done here.
433 // However, don't use stackPointerVal here; instead, allow the compiler
434 // to turn this store into a non-atomic increment instruction which
435 // takes up less code size.
436 stackPointer = stackPointer + 1;
439 void pushSpMarkerFrame(void* sp) {
440 uint32_t oldStackPointer = stackPointer;
442 if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
443 ensureCapacitySlow();
445 frames[oldStackPointer].initSpMarkerFrame(sp);
447 // This must happen at the end, see the comment in pushLabelFrame.
448 stackPointer = oldStackPointer + 1;
451 void pushJsFrame(const char* label, const char* dynamicString,
452 JSScript* script, jsbytecode* pc, uint64_t aRealmID) {
453 // This thread is the only one that ever changes the value of
454 // stackPointer. Only load the atomic once.
455 uint32_t oldStackPointer = stackPointer;
457 if (MOZ_UNLIKELY(oldStackPointer >= capacity)) {
458 ensureCapacitySlow();
460 frames[oldStackPointer]
461 .initJsFrame<JS::ProfilingCategoryPair::JS_Interpreter>(
462 label, dynamicString, script, pc, aRealmID);
464 // This must happen at the end, see the comment in pushLabelFrame.
465 stackPointer = stackPointer + 1;
468 void pop() {
469 MOZ_ASSERT(stackPointer > 0);
470 // Do the read and the write as two separate statements, in order to
471 // make it clear that we don't need an atomic decrement, which would be
472 // more expensive on x86 than the separate operations done here.
473 // This thread is the only one that ever changes the value of
474 // stackPointer.
475 uint32_t oldStackPointer = stackPointer;
476 stackPointer = oldStackPointer - 1;
479 uint32_t stackSize() const { return stackPointer; }
480 uint32_t stackCapacity() const { return capacity; }
482 private:
483 // Out of line path for expanding the buffer, since otherwise this would get
484 // inlined in every DOM WebIDL call.
485 MOZ_COLD void ensureCapacitySlow();
487 // No copying.
488 ProfilingStack(const ProfilingStack&) = delete;
489 void operator=(const ProfilingStack&) = delete;
491 // No moving either.
492 ProfilingStack(ProfilingStack&&) = delete;
493 void operator=(ProfilingStack&&) = delete;
495 uint32_t capacity = 0;
497 public:
498 // The pointer to the stack frames, this is read from the profiler thread and
499 // written from the current thread.
501 // This is effectively a unique pointer.
502 mozilla::Atomic<js::ProfilingStackFrame*, mozilla::SequentiallyConsistent>
503 frames{nullptr};
505 // This may exceed the capacity, so instead use the stackSize() method to
506 // determine the number of valid frames in stackFrames. When this is less
507 // than stackCapacity(), it refers to the first free stackframe past the top
508 // of the in-use stack (i.e. frames[stackPointer - 1] is the top stack
509 // frame).
511 // WARNING WARNING WARNING
513 // This is an atomic variable that uses ReleaseAcquire memory ordering.
514 // See the "Concurrency considerations" paragraph at the top of this file
515 // for more details.
516 mozilla::Atomic<uint32_t, mozilla::ReleaseAcquire> stackPointer{0};
519 namespace js {
521 class AutoGeckoProfilerEntry;
522 class GeckoProfilerEntryMarker;
523 class GeckoProfilerBaselineOSRMarker;
525 class GeckoProfilerThread {
526 friend class AutoGeckoProfilerEntry;
527 friend class GeckoProfilerEntryMarker;
528 friend class GeckoProfilerBaselineOSRMarker;
530 ProfilingStack* profilingStack_;
532 // Same as profilingStack_ if the profiler is currently active, otherwise
533 // null.
534 ProfilingStack* profilingStackIfEnabled_;
536 public:
537 GeckoProfilerThread();
539 uint32_t stackPointer() {
540 MOZ_ASSERT(infraInstalled());
541 return profilingStack_->stackPointer;
543 ProfilingStackFrame* stack() { return profilingStack_->frames; }
544 ProfilingStack* getProfilingStack() { return profilingStack_; }
545 ProfilingStack* getProfilingStackIfEnabled() {
546 return profilingStackIfEnabled_;
550 * True if the profiler infrastructure is setup. Should be true in builds
551 * that include profiler support except during early startup or late
552 * shutdown. Unrelated to the presence of the Gecko Profiler addon.
554 bool infraInstalled() { return profilingStack_ != nullptr; }
556 void setProfilingStack(ProfilingStack* profilingStack, bool enabled);
557 void enable(bool enable) {
558 profilingStackIfEnabled_ = enable ? profilingStack_ : nullptr;
560 void trace(JSTracer* trc);
563 * Functions which are the actual instrumentation to track run information
565 * - enter: a function has started to execute
566 * - updatePC: updates the pc information about where a function
567 * is currently executing
568 * - exit: this function has ceased execution, and no further
569 * entries/exits will be made
571 bool enter(JSContext* cx, JSScript* script);
572 void exit(JSContext* cx, JSScript* script);
573 inline void updatePC(JSContext* cx, JSScript* script, jsbytecode* pc);
576 } // namespace js
578 #endif /* js_ProfilingStack_h */