Added .gitignore for kernel generation
[gromacs.git] / include / gmx_cyclecounter.h
blob397f323a355aa60d2127db0d02e2bcdeae47ba01
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of Gromacs Copyright (c) 1991-2006
5 * David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or (at your option) any later version.
12 * To help us fund GROMACS development, we humbly ask that you cite
13 * the research papers on the package. Check out http://www.gromacs.org
15 * And Hey:
16 * Gnomes, ROck Monsters And Chili Sauce
19 #ifndef _GMX_CYCLECOUNTER_H_
20 #define _GMX_CYCLECOUNTER_H_
23 * define HAVE_RDTSCP to use the serializing rdtscp instruction instead of rdtsc.
24 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
28 /** @file gmx_cyclecounter.h
30 * @brief High-resolution timestamp or CPU clock cycle counters.
32 * After reading the current value with gmx_cycles_read() you can add or
33 * subtract these numbers as normal integers of type gmx_cycles_t.
36 #ifdef _MSC_VER
37 #include <intrin.h>
38 #endif
40 #ifdef __cplusplus
41 extern "C"
43 #endif
44 #if 0
45 } /* fixes auto-indentation problems */
46 #endif
50 /* Minor implementation note:
52 * I like to use these counters in other programs too, so to avoid making
53 * it dependent on other Gromacs definitions I use the #ifdef's to set
54 * architecture-specific inline macros instead of using gmx_inline from
55 * gmx_types.h /Erik 2005-12-10
58 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
59 (defined(__i386__) || defined(__x86_64__)))
60 /* x86 or x86-64 with GCC inline assembly */
61 typedef unsigned long long
62 gmx_cycles_t;
64 #elif defined(_MSC_VER)
65 #include <windows.h>
66 typedef __int64
67 gmx_cycles_t;
69 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
70 /* HP compiler on ia64 */
71 #include <machine/sys/inline.h>
72 typedef unsigned long
73 gmx_cycles_t;
75 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
76 /* Intel compiler on ia64 */
77 #include <ia64intrin.h>
78 typedef unsigned long
79 gmx_cycles_t;
81 #elif defined(__GNUC__) && defined(__ia64__)
82 /* ia64 with GCC inline assembly */
83 typedef unsigned long
84 gmx_cycles_t;
86 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
87 /* HP PA-RISC, inline asm with gcc */
88 typedef unsigned long
89 gmx_cycles_t;
91 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
92 /* HP PA-RISC, instruction when using HP compiler */
93 #include <machine/inline.h>
94 typedef unsigned long
95 gmx_cycles_t;
97 #elif defined(__GNUC__) && defined(__s390__)
98 /* S390, taken from FFTW who got it from James Treacy */
99 typedef unsigned long long
100 gmx_cycles_t;
102 #elif defined(__GNUC__) && defined(__alpha__)
103 /* gcc inline assembly on alpha CPUs */
104 typedef unsigned long
105 gmx_cycles_t;
107 #elif defined(__GNUC__) && defined(__sparc_v9__)
108 /* gcc inline assembly on sparc v9 */
109 typedef unsigned long
110 gmx_cycles_t;
112 #elif defined(__DECC) && defined(__alpha)
113 /* Digital GEM C compiler on alpha */
114 #include <c_asm.h>
115 typedef unsigned long
116 gmx_cycles_t;
118 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
119 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
120 typedef unsigned long long
121 gmx_cycles_t;
123 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
124 /* Solaris high-resolution timers */
125 typedef hrtime_t
126 gmx_cycles_t;
128 #elif defined(__xlC__) && defined (_AIX)
129 /* AIX compilers */
130 #include <sys/time.h>
131 #include <sys/systemcfg.h>
132 typedef unsigned long long
133 gmx_cycles_t;
135 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
136 ( defined(__powerpc__) || defined(__ppc__) ) )
137 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
138 typedef unsigned long long
139 gmx_cycles_t;
141 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
142 /* Metrowerks on macintosh */
143 typedef unsigned long long
144 gmx_cycles_t;
146 #elif defined(__sun) && defined(__sparcv9)
148 typedef unsigned long
149 gmx_cycles_t;
151 #else
152 /*! \brief Integer-like datatype for cycle counter values
154 * Depending on your system this will usually be something like long long,
155 * or a special cycle datatype from the system header files. It is NOT
156 * necessarily real processor cycles - many systems count in nanoseconds
157 * or a special external time register at fixed frequency (not the CPU freq.)
159 * You can subtract or add gmx_cycle_t types just as normal integers, and if
160 * you run the calibration routine you can also multiply it with a factor to
161 * translate the cycle data to seconds.
163 typedef long
164 gmx_cycles_t;
166 #endif
170 /*! \brief Check if high-resolution cycle counters are available
172 * Not all architectures provide any way to read timestep counters
173 * in the CPU, and on some it is broken. Although we refer to it
174 * as cycle counters, it is not necessarily given in units of
175 * cycles.
177 * If you notice that system is missing, implement support for it,
178 * find out how to detect the system during preprocessing, and send us a
179 * patch.
181 * \return 1 if cycle counters are available, 0 if not.
183 * \note This functions not need to be in the header for performance
184 * reasons, but it is very important that we get exactly the
185 * same detection as for gmx_cycles_read() routines. If you
186 * compile the library with one compiler, and then use a different
187 * one when later linking to the library it might happen that the
188 * library supports cyclecounters but not the headers, or vice versa.
190 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
191 (defined(__i386__) || defined(__x86_64__)))
192 static __inline__ int gmx_cycles_have_counter(void)
194 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
195 return 1;
197 #elif (defined(_MSC_VER))
198 static __inline int gmx_cycles_have_counter(void)
200 return 1;
202 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
203 static inline int gmx_cycles_have_counter(void)
205 /* HP compiler on ia64, use special instruction to read ITC */
206 return 1;
208 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
209 static __inline__ int gmx_cycles_have_counter(void)
211 /* Intel compiler on ia64, use special instruction to read ITC */
212 return 1;
214 #elif defined(__GNUC__) && defined(__ia64__)
215 static __inline__ int gmx_cycles_have_counter(void)
217 /* AMD64 with GCC inline assembly - TSC register */
218 return 1;
220 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
221 static __inline__ int gmx_cycles_have_counter(void)
223 /* HP PA-RISC, inline asm with gcc */
224 return 1;
226 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
227 static inline int gmx_cycles_have_counter(void)
229 /* HP PA-RISC, instruction when using HP compiler */
230 return 1;
232 #elif defined(__GNUC__) && defined(__s390__)
233 static __inline__ int gmx_cycles_have_counter(void)
235 /* S390, taken from FFTW who got it from James Treacy */
236 return 1;
238 #elif defined(__GNUC__) && defined(__alpha__)
239 static __inline__ int gmx_cycles_have_counter(void)
241 /* gcc inline assembly on alpha CPUs */
242 return 1;
244 #elif defined(__GNUC__) && defined(__sparc_v9__)
245 static __inline__ int gmx_cycles_have_counter(void)
247 /* gcc inline assembly on sparc v9 */
248 return 1;
250 #elif defined(__DECC) && defined(__alpha)
251 static __inline int gmx_cycles_have_counter(void)
253 /* Digital GEM C compiler on alpha */
254 return 1;
256 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
257 static __inline int gmx_cycles_have_counter(void)
259 /* Irix compilers on SGI hardware */
260 return 1;
262 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
263 static inline int gmx_cycles_have_counter(void)
265 /* Solaris high-resolution timers */
266 return 1;
268 #elif defined(__xlC__) && defined (_AIX)
269 static inline int gmx_cycles_have_counter(void)
271 /* AIX compilers */
272 return 1;
274 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
275 ( defined(__powerpc__) || defined(__ppc__) ) )
276 static __inline__ int gmx_cycles_have_counter(void)
278 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
279 return 1;
281 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
282 static __inline__ int gmx_cycles_have_counter(void)
284 /* Metrowerks on macintosh */
285 return 1;
287 #elif defined(__sun) && defined(__sparcv9)
289 static __inline__ int gmx_cycles_have_counter(void)
291 /* Solaris on SPARC*/
292 return 1;
294 #else
295 static int gmx_cycles_have_counter(void)
297 /* No cycle counter that we know of on this system */
298 return 0;
300 #endif
307 /*! \brief Read CPU cycle counter
309 * This routine returns an abstract datatype containing a
310 * cycle counter timestamp.
312 * \return Opaque data corresponding to a cycle reading.
314 * Please note that on most systems it takes several cycles
315 * to read and return the cycle counters. If you are measuring
316 * small intervals, you can compensate for this time by calling
317 * the routine twice and calculating what the difference is.
318 * Subtract this from your other measurements to get an accurate result.
320 * Use gmx_cycles_difference() to get a real number corresponding to
321 * the difference between two gmx_cycles_t values returned from this
322 * routine.
324 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
325 (defined(__i386__) || defined(__x86_64__)))
326 static __inline__ gmx_cycles_t gmx_cycles_read(void)
328 /* x86 with GCC inline assembly - pentium TSC register */
329 gmx_cycles_t cycle;
330 unsigned low,high;
332 #ifdef HAVE_RDTSCP
333 __asm__ __volatile__("rdtscp" : "=a" (low), "=d" (high) :: "ecx" );
334 #else
335 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
336 #endif
338 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
340 return cycle;
342 #elif defined(_MSC_VER)
343 static __inline gmx_cycles_t gmx_cycles_read(void)
345 #ifdef HAVE_RDTSCP
346 unsigned int ui;
347 return __rdtscp(&ui);
348 #else
349 return __rdtsc();
350 #endif
352 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
353 static inline gmx_cycles_t gmx_cycles_read(void)
355 /* HP compiler on ia64 */
356 gmx_cycles_t ret;
357 ret = _Asm_mov_from_ar (_AREG_ITC);
358 return ret;
360 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
361 static __inline__ gmx_cycles_t gmx_cycles_read(void)
363 /* Intel compiler on ia64 */
364 return __getReg(_IA64_REG_AR_ITC);
366 #elif defined(__GNUC__) && defined(__ia64__)
367 static __inline__ gmx_cycles_t gmx_cycles_read(void)
369 /* ia64 with GCC inline assembly */
370 gmx_cycles_t ret;
371 __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(ret));
372 return ret;
374 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
375 static __inline__ gmx_cycles_t gmx_cycles_read(void)
377 /* HP PA-RISC, inline asm with gcc */
378 gmx_cycles_t ret;
379 __asm__ __volatile__("mfctl 16, %0": "=r" (ret));
380 /* no input, nothing else clobbered */
381 return ret;
383 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
384 static inline gmx_cycles_t gmx_cycles_read(void)
386 /* HP PA-RISC, instruction when using HP compiler */
387 gmx_cycles_t ret;
388 _MFCTL(16, ret);
389 return ret;
391 #elif defined(__GNUC__) && defined(__s390__)
392 static __inline__ gmx_cycles_t gmx_cycles_read(void)
394 /* S390, taken from FFTW who got it from James Treacy */
395 gmx_cycles_t cycle;
396 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
397 return cycle;
399 #elif defined(__GNUC__) && defined(__alpha__)
400 static __inline__ gmx_cycles_t gmx_cycles_read(void)
402 /* gcc inline assembly on alpha CPUs */
403 unsigned long cycle;
404 __asm__ __volatile__ ("rpcc %0" : "=r"(cycle));
405 return (cycle & 0xFFFFFFFF);
407 #elif defined(__GNUC__) && defined(__sparc_v9__)
408 static __inline__ gmx_cycles_t gmx_cycles_read(void)
410 /* gcc inline assembly on sparc v9 */
411 unsigned long ret;
412 __asm__("rd %%tick, %0" : "=r" (ret));
413 return ret;
415 #elif defined(__DECC) && defined(__alpha)
416 static __inline gmx_cycles_t gmx_cycles_read(void)
418 /* Digital GEM C compiler on alpha */
419 unsigned long cycle;
420 cycle = asm("rpcc %v0");
421 return (cycle & 0xFFFFFFFF);
423 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
424 static __inline gmx_cycles_t gmx_cycles_read(void)
426 /* Irix compilers on SGI hardware */
427 struct timespec t;
428 clock_gettime(CLOCK_SGI_CYCLE, &t);
429 /* Return the number of nanoseconds, so we can subtract/add */
430 return ((unsigned long long)t.tv_sec)*1000000000+
431 (unsigned long long)t.tv_nsec;
433 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
434 static inline gmx_cycles_t gmx_cycles_read(void)
436 /* Solaris high-resolution timers */
437 return gethrtime();
439 #elif defined(__xlC__) && defined (_AIX)
440 static inline gmx_cycles_t gmx_cycles_read(void)
442 /* AIX compilers. Inline the calculation instead of using library functions */
443 timebasestruct_t t1;
444 read_real_time(&t1, TIMEBASE_SZ);
445 /* POWER returns real time (seconds + nanoseconds),
446 * POWER_PC returns high/low 32 bits of a counter.
448 if(t1.flag==RTC_POWER_PC)
450 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
452 else
454 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
457 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
458 ( defined(__powerpc__) || defined(__ppc__) ) )
459 static __inline__ gmx_cycles_t gmx_cycles_read(void)
461 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
462 unsigned long low, high1, high2;
465 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
466 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
467 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
469 while (high1 != high2);
471 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
473 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
474 static __inline__ gmx_cycles_t gmx_cycles_read(void)
476 /* Metrowerks on macintosh */
477 unsigned int long low, high1, high2;
480 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
481 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
482 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
484 while (high1 != high2);
486 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
488 #elif defined(__sun) && defined(__sparcv9)
490 static __inline__ gmx_cycles_t gmx_cycles_read(void)
492 gmx_cycles_t ret;
493 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
494 return ret;
497 #else
498 static gmx_cycles_t gmx_cycles_read(void)
500 return 0;
502 #endif
511 /*! \brief Calculate number of seconds per cycle tick on host
513 * This routine runs a timer loop to calibrate the number of
514 * seconds per the units returned from gmx_cycles_difference()
516 * To calculate the time used, call gmx_cycles_read() twice,
517 * and then use this routine to calculate the difference as a double
518 * precision floating-point number.
520 * \param sampletime Minimum number of seconds to sample.
521 * One second should give you a reasonably accurate calibration.
522 * \return Number of seconds per cycle unit. If it is not possible to
523 * calculate on this system (for whatever reason) the return value
524 * will be -1, so check that it is positive before using it.
526 double
527 gmx_cycles_calibrate(double sampletime);
530 #ifdef __cplusplus
532 #endif
536 #endif /* _GMX_CYCLECOUNTER_H_ */