2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
38 * High-resolution timestamp or CPU clock cycle counters.
40 * After reading the current value with gmx_cycles_read() you can add or
41 * subtract these numbers as normal integers of type gmx_cycles_t.
45 #ifndef GMX_TIMING_CYCLECOUNTER_H
46 #define GMX_TIMING_CYCLECOUNTER_H
49 * define HAVE_RDTSCP to use the serializing rdtscp instruction instead of rdtsc.
50 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
63 } /* fixes auto-indentation problems */
66 /* Minor implementation note:
68 * I like to use these counters in other programs too, so to avoid making
69 * it dependent on other Gromacs definitions I use the #ifdef's to set
70 * architecture-specific inline macros instead of using gmx_inline from
71 * gmx_types.h /Erik 2005-12-10
74 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
75 (defined(__i386__) || defined(__x86_64__)))
76 /* x86 or x86-64 with GCC inline assembly */
77 typedef unsigned long long
80 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
81 /* 64-bit ARM cycle counters with GCC inline assembly */
82 typedef unsigned long long
85 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
86 /* Armv7A can provide 64-bit cycles by returning two registers */
87 typedef unsigned long long
90 #elif defined(_MSC_VER)
95 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
96 /* HP compiler on ia64 */
97 #include <machine/sys/inline.h>
101 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
102 /* Intel compiler on ia64 */
103 #include <ia64intrin.h>
104 typedef unsigned long
107 #elif defined(__GNUC__) && defined(__ia64__)
108 /* ia64 with GCC inline assembly */
109 typedef unsigned long
112 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
113 /* HP PA-RISC, inline asm with gcc */
114 typedef unsigned long
117 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
118 /* HP PA-RISC, instruction when using HP compiler */
119 #include <machine/inline.h>
120 typedef unsigned long
123 #elif defined(__GNUC__) && defined(__s390__)
124 /* S390, taken from FFTW who got it from James Treacy */
125 typedef unsigned long long
128 #elif defined(__GNUC__) && defined(__alpha__)
129 /* gcc inline assembly on alpha CPUs */
130 typedef unsigned long
133 #elif defined(__GNUC__) && defined(__sparc_v9__)
134 /* gcc inline assembly on sparc v9 */
135 typedef unsigned long
138 #elif defined(__DECC) && defined(__alpha)
139 /* Digital GEM C compiler on alpha */
141 typedef unsigned long
144 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
145 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
146 typedef unsigned long long
149 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
150 /* Solaris high-resolution timers */
154 #elif defined(__xlC__) && defined (_AIX)
156 #include <sys/systemcfg.h>
157 #include <sys/time.h>
158 typedef unsigned long long
161 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
162 ( defined(__powerpc__) || defined(__ppc__) ) )
163 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
164 typedef unsigned long long
167 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
168 /* Metrowerks on macintosh */
169 typedef unsigned long long
172 #elif defined(__sun) && defined(__sparcv9)
174 typedef unsigned long
178 /*! \brief Integer-like datatype for cycle counter values
180 * Depending on your system this will usually be something like long long,
181 * or a special cycle datatype from the system header files. It is NOT
182 * necessarily real processor cycles - many systems count in nanoseconds
183 * or a special external time register at fixed frequency (not the CPU freq.)
185 * You can subtract or add gmx_cycle_t types just as normal integers, and if
186 * you run the calibration routine you can also multiply it with a factor to
187 * translate the cycle data to seconds.
194 /*! \brief Read CPU cycle counter
196 * This routine returns an abstract datatype containing a
197 * cycle counter timestamp.
199 * \return Opaque data corresponding to a cycle reading.
201 * Please note that on most systems it takes several cycles
202 * to read and return the cycle counters. If you are measuring
203 * small intervals, you can compensate for this time by calling
204 * the routine twice and calculating what the difference is.
205 * Subtract this from your other measurements to get an accurate result.
207 * Use gmx_cycles_difference() to get a real number corresponding to
208 * the difference between two gmx_cycles_t values returned from this
211 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
212 (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
213 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
215 /* x86 with GCC inline assembly - pentium TSC register */
220 __asm__
__volatile__("rdtscp" : "=a" (low
), "=d" (high
) :: "ecx" );
222 __asm__
__volatile__("rdtsc" : "=a" (low
), "=d" (high
));
225 cycle
= ((unsigned long long)low
) | (((unsigned long long)high
)<<32);
229 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
230 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
232 /* 64-bit ARM cycle counters with GCC inline assembly */
234 __asm__
__volatile__("mrs %0, cntvct_el0" : "=r" (cycle
) );
238 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
239 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
241 unsigned int cycles_lo
, cycles_hi
;
242 asm volatile("mrrc p15, 1, %0, %1, c14" : "=r" (cycles_lo
), "=r" (cycles_hi
));
243 return ((gmx_cycles_t
)cycles_lo
) | (((gmx_cycles_t
)cycles_hi
) << 32);
245 #elif defined(_MSC_VER)
246 static __inline gmx_cycles_t
gmx_cycles_read(void)
249 /* Windows on 64-bit ARM */
250 return __rdpmccntr64();
255 return __rdtscp(&ui
);
261 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
262 static inline gmx_cycles_t
gmx_cycles_read(void)
264 /* HP compiler on ia64 */
266 ret
= _Asm_mov_from_ar (_AREG_ITC
);
269 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
270 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
272 /* Intel compiler on ia64 */
273 return __getReg(_IA64_REG_AR_ITC
);
275 #elif defined(__GNUC__) && defined(__ia64__)
276 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
278 /* ia64 with GCC inline assembly */
280 __asm__
__volatile__ ("mov %0=ar.itc" : "=r" (ret
));
283 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
284 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
286 /* HP PA-RISC, inline asm with gcc */
288 __asm__
__volatile__("mfctl 16, %0" : "=r" (ret
));
289 /* no input, nothing else clobbered */
292 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
293 static inline gmx_cycles_t
gmx_cycles_read(void)
295 /* HP PA-RISC, instruction when using HP compiler */
300 #elif defined(__GNUC__) && defined(__s390__)
301 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
303 /* S390, taken from FFTW who got it from James Treacy */
305 __asm__("stck 0(%0)" : : "a" (&(cycle
)) : "memory", "cc");
308 #elif defined(__GNUC__) && defined(__alpha__)
309 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
311 /* gcc inline assembly on alpha CPUs */
313 __asm__
__volatile__ ("rpcc %0" : "=r" (cycle
));
314 return (cycle
& 0xFFFFFFFF);
316 #elif defined(__GNUC__) && defined(__sparc_v9__)
317 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
319 /* gcc inline assembly on sparc v9 */
321 __asm__("rd %%tick, %0" : "=r" (ret
));
324 #elif defined(__DECC) && defined(__alpha)
325 static __inline gmx_cycles_t
gmx_cycles_read(void)
327 /* Digital GEM C compiler on alpha */
329 cycle
= asm ("rpcc %v0");
330 return (cycle
& 0xFFFFFFFF);
332 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
333 static __inline gmx_cycles_t
gmx_cycles_read(void)
335 /* Irix compilers on SGI hardware */
337 clock_gettime(CLOCK_SGI_CYCLE
, &t
);
338 /* Return the number of nanoseconds, so we can subtract/add */
339 return ((unsigned long long)t
.tv_sec
)*1000000000+
340 (unsigned long long)t
.tv_nsec
;
342 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
343 static inline gmx_cycles_t
gmx_cycles_read(void)
345 /* Solaris high-resolution timers */
348 #elif defined(__xlC__) && defined (_AIX)
349 static inline gmx_cycles_t
gmx_cycles_read(void)
351 /* AIX compilers. Inline the calculation instead of using library functions */
353 read_real_time(&t1
, TIMEBASE_SZ
);
354 /* POWER returns real time (seconds + nanoseconds),
355 * POWER_PC returns high/low 32 bits of a counter.
357 if (t1
.flag
== RTC_POWER_PC
)
359 return ((gmx_cycles_t
)t1
.tb_high
)<<32 | (gmx_cycles_t
)t1
.tb_low
;
363 return ((gmx_cycles_t
)t1
.tb_high
)*1000000000+(gmx_cycles_t
)t1
.tb_low
;
366 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
367 ( defined(__powerpc__) || defined(__ppc__) ) )
368 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
370 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
371 unsigned long low
, high1
, high2
;
374 __asm__
__volatile__ ("mftbu %0" : "=r" (high1
) : );
375 __asm__
__volatile__ ("mftb %0" : "=r" (low
) : );
376 __asm__
__volatile__ ("mftbu %0" : "=r" (high2
) : );
378 while (high1
!= high2
);
380 return (((gmx_cycles_t
)high2
) << 32) | (gmx_cycles_t
)low
;
382 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
383 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
385 /* Metrowerks on macintosh */
386 unsigned int long low
, high1
, high2
;
389 __asm__
__volatile__ ("mftbu %0" : "=r" (high1
) : );
390 __asm__
__volatile__ ("mftb %0" : "=r" (low
) : );
391 __asm__
__volatile__ ("mftbu %0" : "=r" (high2
) : );
393 while (high1
!= high2
);
395 return (((gmx_cycles_t
)high2
) << 32) | (gmx_cycles_t
)low
;
397 #elif defined(__sun) && defined(__sparcv9)
399 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
402 __asm__
__volatile__("rd %%tick, %0" : "=r" (ret
));
406 #elif defined(_CRAYC)
407 #include <intrinsics.h>
409 static __inline gmx_cycles_t
gmx_cycles_read(void)
414 static gmx_cycles_t
gmx_cycles_read(void)
421 /*! \brief Check if high-resolution cycle counters are available
423 * Not all architectures provide any way to read timestep counters
424 * in the CPU, and on some it is broken. Although we refer to it
425 * as cycle counters, it is not necessarily given in units of
428 * If you notice that system is missing, implement support for it,
429 * find out how to detect the system during preprocessing, and send us a
432 * \return 1 if cycle counters are available, 0 if not.
434 * \note This functions not need to be in the header for performance
435 * reasons, but it is very important that we get exactly the
436 * same detection as for gmx_cycles_read() routines. If you
437 * compile the library with one compiler, and then use a different
438 * one when later linking to the library it might happen that the
439 * library supports cyclecounters but not the headers, or vice versa.
441 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__) || defined(_CRAYC)) && \
442 (defined(__i386__) || defined(__x86_64__)))
443 static __inline__
int gmx_cycles_have_counter(void)
445 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
448 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
449 static __inline
int gmx_cycles_have_counter(void)
451 /* 64-bit ARM cycle counters with GCC inline assembly */
454 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
455 static __inline
int gmx_cycles_have_counter(void)
457 /* Armv7A can provide 64-bit cycles by returning two registers. However, it will not work unless
458 * the performance registers have been made available from user space by a kernel module -
459 * otherwise it returns 0.
463 c0
= gmx_cycles_read();
464 c1
= gmx_cycles_read();
466 /* if both counters return 0, support is not present */
467 return (c0
!= 0 || c1
!= 0);
469 #elif (defined(_MSC_VER))
470 static __inline
int gmx_cycles_have_counter(void)
474 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
475 static inline int gmx_cycles_have_counter(void)
477 /* HP compiler on ia64, use special instruction to read ITC */
480 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
481 static __inline__
int gmx_cycles_have_counter(void)
483 /* Intel compiler on ia64, use special instruction to read ITC */
486 #elif defined(__GNUC__) && defined(__ia64__)
487 static __inline__
int gmx_cycles_have_counter(void)
489 /* AMD64 with GCC inline assembly - TSC register */
492 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
493 static __inline__
int gmx_cycles_have_counter(void)
495 /* HP PA-RISC, inline asm with gcc */
498 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
499 static inline int gmx_cycles_have_counter(void)
501 /* HP PA-RISC, instruction when using HP compiler */
504 #elif defined(__GNUC__) && defined(__s390__)
505 static __inline__
int gmx_cycles_have_counter(void)
507 /* S390, taken from FFTW who got it from James Treacy */
510 #elif defined(__GNUC__) && defined(__alpha__)
511 static __inline__
int gmx_cycles_have_counter(void)
513 /* gcc inline assembly on alpha CPUs */
516 #elif defined(__GNUC__) && defined(__sparc_v9__)
517 static __inline__
int gmx_cycles_have_counter(void)
519 /* gcc inline assembly on sparc v9 */
522 #elif defined(__DECC) && defined(__alpha)
523 static __inline
int gmx_cycles_have_counter(void)
525 /* Digital GEM C compiler on alpha */
528 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
529 static __inline
int gmx_cycles_have_counter(void)
531 /* Irix compilers on SGI hardware */
534 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
535 static inline int gmx_cycles_have_counter(void)
537 /* Solaris high-resolution timers */
540 #elif defined(__xlC__) && defined (_AIX)
541 static inline int gmx_cycles_have_counter(void)
546 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
547 ( defined(__powerpc__) || defined(__ppc__) ) )
548 static __inline__
int gmx_cycles_have_counter(void)
550 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
553 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
554 static __inline__
int gmx_cycles_have_counter(void)
556 /* Metrowerks on macintosh */
559 #elif defined(__sun) && defined(__sparcv9)
561 static __inline__
int gmx_cycles_have_counter(void)
563 /* Solaris on SPARC*/
567 static int gmx_cycles_have_counter(void)
569 /* No cycle counter that we know of on this system */
575 /*! \brief Calculate number of seconds per cycle tick on host
577 * This routine runs a timer loop to calibrate the number of
578 * seconds per the units returned fro gmx_cycles_read().
580 * \param sampletime Minimum real sample time. It takes some trial-and-error
581 * to find the correct delay loop size, so the total runtime of
582 * this routine is about twice this time.
583 * \return Number of seconds per cycle unit. If it is not possible to
584 * calculate on this system (for whatever reason) the return value
585 * will be -1, so check that it is positive before using it.
588 gmx_cycles_calibrate(double sampletime
);