Merge remote-tracking branch 'gerrit/release-4-5-patches' into release-4-6
[gromacs.git] / include / gmx_cyclecounter.h
blob620df2c016e4efc6faf8b45c0c9a60270863c30a
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of Gromacs Copyright (c) 1991-2006
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
19 #ifndef _GMX_CYCLECOUNTER_H_
20 #define _GMX_CYCLECOUNTER_H_
22 /** @file gmx_cyclecounter.h
24 * @brief High-resolution timestamp or CPU clock cycle counters.
26 * After reading the current value with gmx_cycles_read() you can add or
27 * subtract these numbers as normal integers of type gmx_cycles_t.
30 #ifdef _MSC_VER
31 #include <intrin.h>
32 #endif
34 #ifdef __cplusplus
35 extern "C"
37 #endif
38 #if 0
39 } /* fixes auto-indentation problems */
40 #endif
44 /* Minor implementation note:
46 * I like to use these counters in other programs too, so to avoid making
47 * it dependent on other Gromacs definitions I use the #ifdef's to set
48 * architecture-specific inline macros instead of using gmx_inline from
49 * gmx_types.h /Erik 2005-12-10
52 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
53 (defined(__i386__) || defined(__x86_64__)))
54 /* x86 or x86-64 with GCC inline assembly */
55 typedef unsigned long long
56 gmx_cycles_t;
58 #elif defined(_MSC_VER)
59 #include <windows.h>
60 typedef __int64
61 gmx_cycles_t;
63 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
64 /* HP compiler on ia64 */
65 #include <machine/sys/inline.h>
66 typedef unsigned long
67 gmx_cycles_t;
69 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
70 /* Intel compiler on ia64 */
71 #include <ia64intrin.h>
72 typedef unsigned long
73 gmx_cycles_t;
75 #elif defined(__GNUC__) && defined(__ia64__)
76 /* ia64 with GCC inline assembly */
77 typedef unsigned long
78 gmx_cycles_t;
80 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
81 /* HP PA-RISC, inline asm with gcc */
82 typedef unsigned long
83 gmx_cycles_t;
85 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
86 /* HP PA-RISC, instruction when using HP compiler */
87 #include <machine/inline.h>
88 typedef unsigned long
89 gmx_cycles_t;
91 #elif defined(__GNUC__) && defined(__s390__)
92 /* S390, taken from FFTW who got it from James Treacy */
93 typedef unsigned long long
94 gmx_cycles_t;
96 #elif defined(__GNUC__) && defined(__alpha__)
97 /* gcc inline assembly on alpha CPUs */
98 typedef unsigned long
99 gmx_cycles_t;
101 #elif defined(__GNUC__) && defined(__sparc_v9__)
102 /* gcc inline assembly on sparc v9 */
103 typedef unsigned long
104 gmx_cycles_t;
106 #elif defined(__DECC) && defined(__alpha)
107 /* Digital GEM C compiler on alpha */
108 #include <c_asm.h>
109 typedef unsigned long
110 gmx_cycles_t;
112 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
113 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
114 typedef unsigned long long
115 gmx_cycles_t;
117 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
118 /* Solaris high-resolution timers */
119 typedef hrtime_t
120 gmx_cycles_t;
122 #elif defined(__xlC__) && defined (_AIX)
123 /* AIX compilers */
124 #include <sys/time.h>
125 #include <sys/systemcfg.h>
126 typedef unsigned long long
127 gmx_cycles_t;
129 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
130 ( defined(__powerpc__) || defined(__ppc__) ) )
131 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
132 typedef unsigned long long
133 gmx_cycles_t;
135 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
136 /* Metrowerks on macintosh */
137 typedef unsigned long long
138 gmx_cycles_t;
140 #elif defined(__sun) && defined(__sparcv9)
142 typedef unsigned long
143 gmx_cycles_t;
145 #else
146 /*! \brief Integer-like datatype for cycle counter values
148 * Depending on your system this will usually be something like long long,
149 * or a special cycle datatype from the system header files. It is NOT
150 * necessarily real processor cycles - many systems count in nanoseconds
151 * or a special external time register at fixed frequency (not the CPU freq.)
153 * You can subtract or add gmx_cycle_t types just as normal integers, and if
154 * you run the calibration routine you can also multiply it with a factor to
155 * translate the cycle data to seconds.
157 typedef long
158 gmx_cycles_t;
160 #endif
164 /*! \brief Check if high-resolution cycle counters are available
166 * Not all architectures provide any way to read timestep counters
167 * in the CPU, and on some it is broken. Although we refer to it
168 * as cycle counters, it is not necessarily given in units of
169 * cycles.
171 * If you notice that system is missing, implement support for it,
172 * find out how to detect the system during preprocessing, and send us a
173 * patch.
175 * \return 1 if cycle counters are available, 0 if not.
177 * \note This functions not need to be in the header for performance
178 * reasons, but it is very important that we get exactly the
179 * same detection as for gmx_cycles_read() routines. If you
180 * compile the library with one compiler, and then use a different
181 * one when later linking to the library it might happen that the
182 * library supports cyclecounters but not the headers, or vice versa.
184 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
185 (defined(__i386__) || defined(__x86_64__)))
186 static __inline__ int gmx_cycles_have_counter(void)
188 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
189 return 1;
191 #elif (defined(_MSC_VER))
192 static __inline int gmx_cycles_have_counter(void)
194 return 1;
196 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
197 static inline int gmx_cycles_have_counter(void)
199 /* HP compiler on ia64, use special instruction to read ITC */
200 return 1;
202 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
203 static __inline__ int gmx_cycles_have_counter(void)
205 /* Intel compiler on ia64, use special instruction to read ITC */
206 return 1;
208 #elif defined(__GNUC__) && defined(__ia64__)
209 static __inline__ int gmx_cycles_have_counter(void)
211 /* AMD64 with GCC inline assembly - TSC register */
212 return 1;
214 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
215 static __inline__ int gmx_cycles_have_counter(void)
217 /* HP PA-RISC, inline asm with gcc */
218 return 1;
220 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
221 static inline int gmx_cycles_have_counter(void)
223 /* HP PA-RISC, instruction when using HP compiler */
224 return 1;
226 #elif defined(__GNUC__) && defined(__s390__)
227 static __inline__ int gmx_cycles_have_counter(void)
229 /* S390, taken from FFTW who got it from James Treacy */
230 return 1;
232 #elif defined(__GNUC__) && defined(__alpha__)
233 static __inline__ int gmx_cycles_have_counter(void)
235 /* gcc inline assembly on alpha CPUs */
236 return 1;
238 #elif defined(__GNUC__) && defined(__sparc_v9__)
239 static __inline__ int gmx_cycles_have_counter(void)
241 /* gcc inline assembly on sparc v9 */
242 return 1;
244 #elif defined(__DECC) && defined(__alpha)
245 static __inline int gmx_cycles_have_counter(void)
247 /* Digital GEM C compiler on alpha */
248 return 1;
250 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
251 static __inline int gmx_cycles_have_counter(void)
253 /* Irix compilers on SGI hardware */
254 return 1;
256 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
257 static inline int gmx_cycles_have_counter(void)
259 /* Solaris high-resolution timers */
260 return 1;
262 #elif defined(__xlC__) && defined (_AIX)
263 static inline int gmx_cycles_have_counter(void)
265 /* AIX compilers */
266 return 1;
268 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
269 ( defined(__powerpc__) || defined(__ppc__) ) )
270 static __inline__ int gmx_cycles_have_counter(void)
272 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
273 return 1;
275 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
276 static __inline__ int gmx_cycles_have_counter(void)
278 /* Metrowerks on macintosh */
279 return 1;
281 #elif defined(__sun) && defined(__sparcv9)
283 static __inline__ int gmx_cycles_have_counter(void)
285 /* Solaris on SPARC*/
286 return 1;
288 #else
289 static int gmx_cycles_have_counter(void)
291 /* No cycle counter that we know of on this system */
292 return 0;
294 #endif
301 /*! \brief Read CPU cycle counter
303 * This routine returns an abstract datatype containing a
304 * cycle counter timestamp.
306 * \return Opaque data corresponding to a cycle reading.
308 * Please note that on most systems it takes several cycles
309 * to read and return the cycle counters. If you are measuring
310 * small intervals, you can compensate for this time by calling
311 * the routine twice and calculating what the difference is.
312 * Subtract this from your other measurements to get an accurate result.
314 * Use gmx_cycles_difference() to get a real number corresponding to
315 * the difference between two gmx_cycles_t values returned from this
316 * routine.
318 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
319 (defined(__i386__) || defined(__x86_64__)))
320 static __inline__ gmx_cycles_t gmx_cycles_read(void)
322 /* x86 with GCC inline assembly - pentium TSC register */
323 gmx_cycles_t cycle;
324 unsigned low,high;
326 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
328 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
330 return cycle;
332 #elif defined(_MSC_VER)
333 static __inline gmx_cycles_t gmx_cycles_read(void)
335 return __rdtsc();
337 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
338 static inline gmx_cycles_t gmx_cycles_read(void)
340 /* HP compiler on ia64 */
341 gmx_cycles_t ret;
342 ret = _Asm_mov_from_ar (_AREG_ITC);
343 return ret;
345 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
346 static __inline__ gmx_cycles_t gmx_cycles_read(void)
348 /* Intel compiler on ia64 */
349 return __getReg(_IA64_REG_AR_ITC);
351 #elif defined(__GNUC__) && defined(__ia64__)
352 static __inline__ gmx_cycles_t gmx_cycles_read(void)
354 /* ia64 with GCC inline assembly */
355 gmx_cycles_t ret;
356 __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
357 return ret;
359 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
360 static __inline__ gmx_cycles_t gmx_cycles_read(void)
362 /* HP PA-RISC, inline asm with gcc */
363 gmx_cycles_t ret;
364 __asm__ __volatile__("mfctl 16, %0": "=r" (ret));
365 /* no input, nothing else clobbered */
366 return ret;
368 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
369 static inline gmx_cycles_t gmx_cycles_read(void)
371 /* HP PA-RISC, instruction when using HP compiler */
372 gmx_cycles_t ret;
373 _MFCTL(16, ret);
374 return ret;
376 #elif defined(__GNUC__) && defined(__s390__)
377 static __inline__ gmx_cycles_t gmx_cycles_read(void)
379 /* S390, taken from FFTW who got it from James Treacy */
380 gmx_cycles_t cycle;
381 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
382 return cycle;
384 #elif defined(__GNUC__) && defined(__alpha__)
385 static __inline__ gmx_cycles_t gmx_cycles_read(void)
387 /* gcc inline assembly on alpha CPUs */
388 unsigned long cycle;
389 __asm__ __volatile__ ("rpcc %0" : "=r"(cycle));
390 return (cycle & 0xFFFFFFFF);
392 #elif defined(__GNUC__) && defined(__sparc_v9__)
393 static __inline__ gmx_cycles_t gmx_cycles_read(void)
395 /* gcc inline assembly on sparc v9 */
396 unsigned long ret;
397 __asm__("rd %%tick, %0" : "=r" (ret));
398 return ret;
400 #elif defined(__DECC) && defined(__alpha)
401 static __inline gmx_cycles_t gmx_cycles_read(void)
403 /* Digital GEM C compiler on alpha */
404 unsigned long cycle;
405 cycle = asm("rpcc %v0");
406 return (cycle & 0xFFFFFFFF);
408 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
409 static __inline gmx_cycles_t gmx_cycles_read(void)
411 /* Irix compilers on SGI hardware */
412 struct timespec t;
413 clock_gettime(CLOCK_SGI_CYCLE, &t);
414 /* Return the number of nanoseconds, so we can subtract/add */
415 return ((unsigned long long)t.tv_sec)*1000000000+
416 (unsigned long long)t.tv_nsec;
418 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
419 static inline gmx_cycles_t gmx_cycles_read(void)
421 /* Solaris high-resolution timers */
422 return gethrtime();
424 #elif defined(__xlC__) && defined (_AIX)
425 static inline gmx_cycles_t gmx_cycles_read(void)
427 /* AIX compilers. Inline the calculation instead of using library functions */
428 timebasestruct_t t1;
429 read_real_time(&t1, TIMEBASE_SZ);
430 /* POWER returns real time (seconds + nanoseconds),
431 * POWER_PC returns high/low 32 bits of a counter.
433 if(t1.flag==RTC_POWER_PC)
435 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
437 else
439 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
442 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
443 ( defined(__powerpc__) || defined(__ppc__) ) )
444 static __inline__ gmx_cycles_t gmx_cycles_read(void)
446 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
447 unsigned long low, high1, high2;
450 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
451 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
452 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
454 while (high1 != high2);
456 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
458 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
459 static __inline__ gmx_cycles_t gmx_cycles_read(void)
461 /* Metrowerks on macintosh */
462 unsigned int long low, high1, high2;
465 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
466 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
467 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
469 while (high1 != high2);
471 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
473 #elif defined(__sun) && defined(__sparcv9)
475 static __inline__ gmx_cycles_t gmx_cycles_read(void)
477 gmx_cycles_t ret;
478 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
479 return ret;
482 #else
483 static gmx_cycles_t gmx_cycles_read(void)
485 return 0;
487 #endif
496 /*! \brief Calculate number of seconds per cycle tick on host
498 * This routine runs a timer loop to calibrate the number of
499 * seconds per the units returned from gmx_cycles_difference()
501 * To calculate the time used, call gmx_cycles_read() twice,
502 * and then use this routine to calculate the difference as a double
503 * precision floating-point number.
505 * \param sampletime Minimum number of seconds to sample.
506 * One second should give you a reasonably accurate calibration.
507 * \return Number of seconds per cycle unit. If it is not possible to
508 * calculate on this system (for whatever reason) the return value
509 * will be -1, so check that it is positive before using it.
511 double
512 gmx_cycles_calibrate(double sampletime);
515 #ifdef __cplusplus
517 #endif
521 #endif /* _GMX_CYCLECOUNTER_H_ */