Merge 'remotes/trunk'
[0ad.git] / source / lib / timer.h
blob9af3352c486220900949b0655813b32ac0bf7885
1 /* Copyright (C) 2010 Wildfire Games.
3 * Permission is hereby granted, free of charge, to any person obtaining
4 * a copy of this software and associated documentation files (the
5 * "Software"), to deal in the Software without restriction, including
6 * without limitation the rights to use, copy, modify, merge, publish,
7 * distribute, sublicense, and/or sell copies of the Software, and to
8 * permit persons to whom the Software is furnished to do so, subject to
9 * the following conditions:
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * platform-independent high resolution timer
27 #ifndef INCLUDED_TIMER
28 #define INCLUDED_TIMER
30 #include "lib/config2.h" // CONFIG2_TIMER_ALLOW_RDTSC
31 #include "lib/sysdep/cpu.h" // cpu_AtomicAdd
32 #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
33 # include "lib/sysdep/os_cpu.h" // os_cpu_ClockFrequency
34 # include "lib/sysdep/arch/x86_x64/x86_x64.h" // x86_x64::rdtsc
35 #endif
37 #include "lib/utf8.h"
39 /**
40 * timer_Time will subsequently return values relative to the current time.
41 **/
42 LIB_API void timer_LatchStartTime();
44 /**
45 * @return high resolution (> 1 us) timestamp [s].
46 **/
47 LIB_API double timer_Time();
49 /**
50 * @return resolution [s] of the timer.
51 **/
52 LIB_API double timer_Resolution();
55 // (allow using XADD (faster than CMPXCHG) in 64-bit builds without casting)
56 #if ARCH_AMD64
57 typedef intptr_t Cycles;
58 #else
59 typedef i64 Cycles;
60 #endif
62 /**
63 * internal helper functions for returning an easily readable
64 * string (i.e. re-scaled to appropriate units)
65 **/
66 LIB_API std::string StringForSeconds(double seconds);
67 LIB_API std::string StringForCycles(Cycles cycles);
70 //-----------------------------------------------------------------------------
71 // scope timing
73 /// used by TIMER
74 class ScopeTimer
76 NONCOPYABLE(ScopeTimer);
77 public:
78 ScopeTimer(const wchar_t* description)
79 : m_t0(timer_Time()), m_description(description)
83 ~ScopeTimer()
85 const double t1 = timer_Time();
86 const std::string elapsedTimeString = StringForSeconds(t1-m_t0);
87 debug_printf("TIMER| %s: %s\n", utf8_from_wstring(m_description).c_str(), elapsedTimeString.c_str());
90 private:
91 double m_t0;
92 const wchar_t* m_description;
95 /**
96 * Measures the time taken to execute code up until end of the current scope;
97 * displays it via debug_printf. Can safely be nested.
98 * Useful for measuring time spent in a function or basic block.
99 * <description> must remain valid over the lifetime of this object;
100 * a string literal is safest.
102 * Example usage:
103 * void func()
105 * TIMER(L"description");
106 * // code to be measured
109 #define TIMER(description) ScopeTimer UID__(description)
112 * Measures the time taken to execute code between BEGIN and END markers;
113 * displays it via debug_printf. Can safely be nested.
114 * Useful for measuring several pieces of code within the same function/block.
115 * <description> must remain valid over the lifetime of this object;
116 * a string literal is safest.
118 * Caveats:
119 * - this wraps the code to be measured in a basic block, so any
120 * variables defined there are invisible to surrounding code.
121 * - the description passed to END isn't inspected; you are responsible for
122 * ensuring correct nesting!
124 * Example usage:
125 * void func2()
127 * // uninteresting code
128 * TIMER_BEGIN(L"description2");
129 * // code to be measured
130 * TIMER_END(L"description2");
131 * // uninteresting code
134 #define TIMER_BEGIN(description) { ScopeTimer UID__(description)
135 #define TIMER_END(description) }
138 //-----------------------------------------------------------------------------
139 // cumulative timer API
141 // this supplements in-game profiling by providing low-overhead,
142 // high resolution time accounting of specific areas.
144 // since TIMER_ACCRUE et al. are called so often, we try to keep
145 // overhead to an absolute minimum. storing raw tick counts (e.g. CPU cycles
146 // returned by x86_x64::rdtsc) instead of absolute time has two benefits:
147 // - no need to convert from raw->time on every call
148 // (instead, it's only done once when displaying the totals)
149 // - possibly less overhead to querying the time itself
150 // (timer_Time may be using slower time sources with ~3us overhead)
152 // however, the cycle count is not necessarily a measure of wall-clock time
153 // (see http://www.gamedev.net/reference/programming/features/timing).
154 // therefore, on systems with SpeedStep active, measurements of I/O or other
155 // non-CPU bound activity may be skewed. this is ok because the timer is
156 // only used for profiling; just be aware of the issue.
157 // if this is a problem, disable CONFIG2_TIMER_ALLOW_RDTSC.
159 // note that overflow isn't an issue either way (63 bit cycle counts
160 // at 10 GHz cover intervals of 29 years).
162 #if ARCH_X86_X64 && CONFIG2_TIMER_ALLOW_RDTSC
164 class TimerUnit
166 public:
167 void SetToZero()
169 m_cycles = 0;
172 void SetFromTimer()
174 m_cycles = x86_x64::rdtsc();
177 void AddDifference(TimerUnit t0, TimerUnit t1)
179 m_cycles += t1.m_cycles - t0.m_cycles;
182 void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1)
184 const Cycles delta = t1.m_cycles - t0.m_cycles;
185 #if ARCH_AMD64
186 cpu_AtomicAdd(&m_cycles, delta);
187 #elif ARCH_IA32
188 retry:
189 if(!cpu_CAS64(&m_cycles, m_cycles, m_cycles+delta))
190 goto retry;
191 #else
192 # error "port"
193 #endif
196 void Subtract(TimerUnit t)
198 m_cycles -= t.m_cycles;
201 std::string ToString() const
203 ENSURE(m_cycles >= 0);
204 return StringForCycles(m_cycles);
207 double ToSeconds() const
209 return (double)m_cycles / os_cpu_ClockFrequency();
212 private:
213 Cycles m_cycles;
216 #else
218 class TimerUnit
220 public:
221 void SetToZero()
223 m_seconds = 0.0;
226 void SetFromTimer()
228 m_seconds = timer_Time();
231 void AddDifference(TimerUnit t0, TimerUnit t1)
233 m_seconds += t1.m_seconds - t0.m_seconds;
236 void AddDifferenceAtomic(TimerUnit t0, TimerUnit t1)
238 retry:
239 i64 oldRepresentation;
240 memcpy(&oldRepresentation, &m_seconds, sizeof(oldRepresentation));
242 const double seconds = m_seconds + t1.m_seconds - t0.m_seconds;
243 i64 newRepresentation;
244 memcpy(&newRepresentation, &seconds, sizeof(newRepresentation));
246 if(!cpu_CAS64((volatile i64*)&m_seconds, oldRepresentation, newRepresentation))
247 goto retry;
250 void Subtract(TimerUnit t)
252 m_seconds -= t.m_seconds;
255 std::string ToString() const
257 ENSURE(m_seconds >= 0.0);
258 return StringForSeconds(m_seconds);
261 double ToSeconds() const
263 return m_seconds;
266 private:
267 double m_seconds;
270 #endif
272 // opaque - do not access its fields!
273 // note: must be defined here because clients instantiate them;
274 // fields cannot be made private due to POD requirement.
275 struct TimerClient
277 TimerUnit sum; // total bill
279 // only store a pointer for efficiency.
280 const wchar_t* description;
282 TimerClient* next;
284 // how often the timer was billed (helps measure relative
285 // performance of something that is done indeterminately often).
286 intptr_t num_calls;
290 * make the given TimerClient (usually instantiated as static data)
291 * ready for use. returns its address for TIMER_ADD_CLIENT's convenience.
292 * this client's total (which is increased by a BillingPolicy) will be
293 * displayed by timer_DisplayClientTotals.
294 * notes:
295 * - may be called at any time;
296 * - always succeeds (there's no fixed limit);
297 * - free() is not needed nor possible.
298 * - description must remain valid until exit; a string literal is safest.
300 LIB_API TimerClient* timer_AddClient(TimerClient* tc, const wchar_t* description);
303 * "allocate" a new TimerClient that will keep track of the total time
304 * billed to it, along with a description string. These are displayed when
305 * timer_DisplayClientTotals is called.
306 * Invoke this at file or function scope; a (static) TimerClient pointer of
307 * name \<id\> will be defined, which should be passed to TIMER_ACCRUE.
309 #define TIMER_ADD_CLIENT(id)\
310 static TimerClient UID__;\
311 static TimerClient* id = timer_AddClient(&UID__, WIDEN(#id))
314 * bill the difference between t0 and t1 to the client's total.
316 struct BillingPolicy_Default
318 void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
320 tc->sum.AddDifference(t0, t1);
321 tc->num_calls++;
326 * thread-safe (not used by default due to its higher overhead)
327 * note: we can't just use thread-local variables to avoid
328 * synchronization overhead because we don't have control over all
329 * threads (for accumulating their separate timer copies).
331 struct BillingPolicy_Atomic
333 void operator()(TimerClient* tc, TimerUnit t0, TimerUnit t1) const
335 tc->sum.AddDifferenceAtomic(t0, t1);
336 cpu_AtomicAdd(&tc->num_calls, +1);
341 * display all clients' totals; does not reset them.
342 * typically called at exit.
344 LIB_API void timer_DisplayClientTotals();
347 /// used by TIMER_ACCRUE
348 template<class BillingPolicy = BillingPolicy_Default>
349 class ScopeTimerAccrue
351 NONCOPYABLE(ScopeTimerAccrue);
352 public:
353 ScopeTimerAccrue(TimerClient* tc)
354 : m_tc(tc)
356 m_t0.SetFromTimer();
359 ~ScopeTimerAccrue()
361 TimerUnit t1;
362 t1.SetFromTimer();
363 BillingPolicy()(m_tc, m_t0, t1);
366 private:
367 TimerUnit m_t0;
368 TimerClient* m_tc;
372 * Measure the time taken to execute code up until end of the current scope;
373 * bill it to the given TimerClient object. Can safely be nested.
374 * Useful for measuring total time spent in a function or basic block over the
375 * entire program.
376 * `client' is an identifier registered via TIMER_ADD_CLIENT.
378 * Example usage:
379 * TIMER_ADD_CLIENT(client);
381 * void func()
383 * TIMER_ACCRUE(client);
384 * // code to be measured
387 * [later or at exit]
388 * timer_DisplayClientTotals();
390 #define TIMER_ACCRUE(client) ScopeTimerAccrue<> UID__(client)
391 #define TIMER_ACCRUE_ATOMIC(client) ScopeTimerAccrue<BillingPolicy_Atomic> UID__(client)
393 #endif // #ifndef INCLUDED_TIMER