3 *************************************************************************
5 * Copyright (C) 2009-2016, Intel Corporation
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
12 * * Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * * Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in
16 * the documentation and/or other materials provided with the
18 * * Neither the name of Intel Corporation nor the names of its
19 * contributors may be used to endorse or promote products derived
20 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
25 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
26 * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
29 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
32 * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
35 * *********************************************************************
37 * PLEASE NOTE: This file is a downstream copy of a file mainitained in
38 * a repository at cilkplus.org. Changes made to this file that are not
39 * submitted through the contribution process detailed at
40 * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
41 * time that a new version is released. Changes only submitted to the
42 * GNU compiler collection or posted to the git repository at
43 * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
46 * We welcome your contributions to this open source project. Thank you
47 * for your assistance in helping us improve Cilk Plus.
48 **************************************************************************/
53 * @brief Support for gathering and reporting statistics on Cilk applications.
55 * Note that stats are normally NOT compiled in because it increases the
56 * overhead of stealing. To compile in profiling support, define CILK_PROFILE.
59 #ifndef INCLUDED_STATS_DOT_H
60 #define INCLUDED_STATS_DOT_H
62 /* #define CILK_PROFILE 1 */
63 // @note The CILK_PROFILE flag and intervals is known to be broken
64 // in at least programs with Windows exceptions.
65 // Enable this flag at your own peril. :)
67 #include <cilk/common.h>
68 #include "rts-common.h"
69 #include "internal/abi.h"
72 #include <stdio.h> // Define FILE *
75 __CILKRTS_BEGIN_EXTERN_C
77 // Some notes on the stats we are counting for the Cilk Plus runtime.
79 // INTERVAL_IN_SCHEDULER:
81 // For a user worker, it starts counting when the worker thread is
82 // bound, and stops when the worker thread is unbound.
84 // For a system worker, it starts counting when the worker thread is
85 // created, and stops after it returns from the scheduling loop
86 // (which usually won't happen unless __cilkrts_end_cilk() is
89 // INTERVAL_WORKING + INTERVAL_IN_RUNTIME should be equal to
90 // INTERVAL_IN_SCHEDULER (modulo overheads in instrumentation).
92 // INTERVAL_IN_RUNTIME tries to count all the time spent inside
93 // runtime code. (leave_frame and reudctions blur the accounting a
94 // bit). We switch from WORKING to IN_RUNTIME from:
96 // 1. __cilkrts_leave_frame, when we start doing an exception check
97 // and might jump into the runtime, either when returning from a
98 // spawn or have an exception to process.
100 // 2. __cilkrts_c_sync, when we are at a sync and the frame has
103 // We switch back to WORKING when we are returning to user code.
105 // We also switch back to WORKING when we need to do a slow merge of
106 // reducer maps. Otherwise, parallel reductions can mess up the stat
109 /** @brief Events that we measure. */
112 INTERVAL_IN_SCHEDULER
, ///< Time threads spend "bound" to Cilk
113 INTERVAL_WORKING
, ///< Time spent working
114 INTERVAL_IN_RUNTIME
, ///< Time spent executing runtime code
115 INTERVAL_SCHED_LOOP
, ///< Time spent in scheduling loop
116 INTERVAL_STEALING
, ///< Time spent stealing work
117 INTERVAL_STEAL_SUCCESS
, ///< Time to do a successful steal
118 INTERVAL_STEAL_FAIL_EMPTYQ
, ///< Count of steal failures due to lack of stealable work
119 INTERVAL_STEAL_FAIL_LOCK
, ///< Count of steal failures due to failure to lock worker
120 INTERVAL_STEAL_FAIL_USER_WORKER
, ///< Count of steal failures by user workers which attempt to steal from another team
121 INTERVAL_STEAL_FAIL_DEKKER
, ///< Count of steal failures due to Dekker protocol failure
122 INTERVAL_SYNC_CHECK
, ///< Time spent processing syncs
123 INTERVAL_THE_EXCEPTION_CHECK
, ///< Time spent performing THE exception checks
124 INTERVAL_THE_EXCEPTION_CHECK_USELESS
, ///< Count of useless THE exception checks
125 INTERVAL_RETURNING
, ///< Time spent returning from calls
126 INTERVAL_FINALIZE_CHILD
, ///< Time spent in finalize_child
127 INTERVAL_PROVABLY_GOOD_STEAL
, ///< Time spent in provably_good_steal
128 INTERVAL_UNCONDITIONAL_STEAL
, ///< Time spent in unconditional_steal
129 INTERVAL_ALLOC_FULL_FRAME
, ///< Time spent in __cilkrts_make_full_frame
130 INTERVAL_FRAME_ALLOC_LARGE
, ///< Count of calls to __cilkrts_frame_malloc for buffers bigger than FRAME_MALLOC_MAX_SIZE or with a NULL worker
131 INTERVAL_FRAME_ALLOC
, ///< Time spent allocating memory from worker buckets
132 INTERVAL_FRAME_ALLOC_GLOBAL
, ///< Time spent calling memory allocator when buckets are empty
133 INTERVAL_FRAME_FREE_LARGE
, ///< Count of calls to __cilkrts_frame_malloc for buffers bigger than FRAME_MALLOC_MAX_SIZE or with a NULL worker
134 INTERVAL_FRAME_FREE
, ///< Time spent freeing memory to worker buckets
135 INTERVAL_FRAME_FREE_GLOBAL
, ///< Time spent calling memory deallocator when buckets are full
136 INTERVAL_MUTEX_LOCK
, ///< Count of calls to __cilkrts_mutex_lock for a worker
137 INTERVAL_MUTEX_LOCK_SPINNING
, ///< Time spent spinning in __cilkrts_mutex_lock for a worker
138 INTERVAL_MUTEX_LOCK_YIELDING
, ///< Time spent yielding in __cilkrts_mutex_lock for a worker
139 INTERVAL_MUTEX_TRYLOCK
, ///< Count of calls to __cilkrts_mutex_trylock
140 INTERVAL_FIBER_ALLOCATE
, ///< Time spent calling cilk_fiber_allocate
141 INTERVAL_FIBER_DEALLOCATE
, ///< Time spent calling cilk_fiber_deallocate (not from thread)
142 INTERVAL_FIBER_ALLOCATE_FROM_THREAD
, ///< Time spent calling cilk_fiber_allocate_from_thread
143 INTERVAL_FIBER_DEALLOCATE_FROM_THREAD
, ///< Time spent calling cilk_fiber_deallocate (from thread)
144 INTERVAL_SUSPEND_RESUME_OTHER
, ///< Count of fiber suspend_self_and_resume_other
145 INTERVAL_DEALLOCATE_RESUME_OTHER
, ///< Count of fiber deallocate_self_and_resume_other
146 INTERVAL_INIT_WORKER
, ///< Time spent on initialization of worker thread
147 INTERVAL_SCHEDULE_WAIT
, ///< Time worker spends in SCHEDULE_WAIT state
148 INTERVAL_N
///< Number of intervals, must be last
152 * @brief Struct that collects of all runtime statistics.
154 * There is an instance of this structure in each worker's
155 * local_state, as well as one in the @c global_state_t which will be
156 * used to accumulate the per-worker stats.
158 typedef struct statistics
160 /** Number of times each interval is entered */
161 unsigned long long count
[INTERVAL_N
];
164 * Time when the system entered each interval, in system-dependent
167 unsigned long long start
[INTERVAL_N
];
169 /** Total time spent in each interval, in system-dependent "ticks" */
170 unsigned long long accum
[INTERVAL_N
];
173 * Largest global number of stacks seen by this worker.
174 * The true maximum at end of execution is the max of the
181 * Initializes a statistics structure
183 * @param s The statistics structure to be initialized.
185 COMMON_PORTABLE
void __cilkrts_init_stats(statistics
*s
);
188 * @brief Sums statistics from worker to the global struct
190 * @param to The statistics structure that will accumulate the information.
191 * This structure is usually @c g->stats.
192 * @param from The statistics structure that will be accumulated.
193 * This structure is usually statistics kept per worker.
196 void __cilkrts_accum_stats(statistics
*to
, statistics
*from
);
199 * @brief Mark the start of an interval by saving the current tick count.
201 * @pre Start time == INVALID_START
203 * @param w The worker we're accumulating stats for.
204 * @param i The interval we're accumulating stats for.
207 void __cilkrts_start_interval(__cilkrts_worker
*w
, enum interval i
);
210 * @brief Mark the end of an interval by adding the ticks since the
211 * start to the accumulated time.
213 * @pre Start time != INVALID_START
215 * @param w The worker we're accumulating stats for.
216 * @param i The interval we're accumulating stats for.
219 void __cilkrts_stop_interval(__cilkrts_worker
*w
, enum interval i
);
222 * @brief Start and stop interval I, charging zero time against it
225 * - Start time == INVALID_START
227 * @param w The worker we're accumulating stats for.
228 * @param i The interval we're accumulating stats for.
231 void __cilkrts_note_interval(__cilkrts_worker
*w
, enum interval i
);
235 void dump_stats_to_file(FILE *stat_file
, statistics
*s
);
240 # define START_INTERVAL(w, i) __cilkrts_start_interval(w, i);
241 # define STOP_INTERVAL(w, i) __cilkrts_stop_interval(w, i);
242 # define NOTE_INTERVAL(w, i) __cilkrts_note_interval(w, i);
244 /** Start an interval. No effect unless CILK_PROFILE is defined. */
245 # define START_INTERVAL(w, i)
246 /** End an interval. No effect unless CILK_PROFILE is defined. */
247 # define STOP_INTERVAL(w, i)
248 /** Increment a counter. No effect unless CILK_PROFILE is defined. */
249 # define NOTE_INTERVAL(w, i)
252 __CILKRTS_END_EXTERN_C
254 #endif // ! defined(INCLUDED_STATS_DOT_H)