2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
6 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 /*! \libinternal \file
39 * High-resolution timestamp or CPU clock cycle counters.
41 * After reading the current value with gmx_cycles_read() you can add or
42 * subtract these numbers as normal integers of type gmx_cycles_t.
46 #ifndef GMX_TIMING_CYCLECOUNTER_H
47 #define GMX_TIMING_CYCLECOUNTER_H
50 * Define GMX_USE_RDTSCP=1 to use the serializing rdtscp instruction instead of rdtsc.
51 * This is supported on essentially all Intel/AMD hardware still in use, and provides better accuracy.
59 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) \
60 && (defined(__i386__) || defined(__x86_64__)))
61 /* x86 or x86-64 with GCC inline assembly */
62 typedef unsigned long long gmx_cycles_t
;
64 #elif ((defined __aarch64__) \
65 && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
66 /* 64-bit ARM cycle counters with GCC inline assembly */
67 typedef unsigned long long gmx_cycles_t
;
69 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
70 /* Armv7A can provide 64-bit cycles by returning two registers */
71 typedef unsigned long long gmx_cycles_t
;
73 #elif defined(_MSC_VER)
75 typedef __int64 gmx_cycles_t
;
77 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
78 /* HP compiler on ia64 */
79 # include <machine/sys/inline.h>
80 typedef unsigned long gmx_cycles_t
;
82 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
83 /* Intel compiler on ia64 */
84 # include <ia64intrin.h>
85 typedef unsigned long gmx_cycles_t
;
87 #elif defined(__GNUC__) && defined(__ia64__)
88 /* ia64 with GCC inline assembly */
89 typedef unsigned long gmx_cycles_t
;
91 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__GNUC__))
92 /* HP PA-RISC, inline asm with gcc */
93 typedef unsigned long gmx_cycles_t
;
95 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__hpux))
96 /* HP PA-RISC, instruction when using HP compiler */
97 # include <machine/inline.h>
98 typedef unsigned long gmx_cycles_t
;
100 #elif defined(__GNUC__) && defined(__s390__)
101 /* S390, taken from FFTW who got it from James Treacy */
102 typedef unsigned long long gmx_cycles_t
;
104 #elif defined(__GNUC__) && defined(__alpha__)
105 /* gcc inline assembly on alpha CPUs */
106 typedef unsigned long gmx_cycles_t
;
108 #elif defined(__GNUC__) && defined(__sparc_v9__)
109 /* gcc inline assembly on sparc v9 */
110 typedef unsigned long gmx_cycles_t
;
112 #elif defined(__DECC) && defined(__alpha)
113 /* Digital GEM C compiler on alpha */
115 typedef unsigned long gmx_cycles_t
;
117 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
118 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
119 typedef unsigned long long gmx_cycles_t
;
121 #elif (defined(__SVR4) && defined(__SUNPRO_CC))
122 /* Solaris high-resolution timers */
123 typedef hrtime_t gmx_cycles_t
;
125 #elif defined(__xlC__) && defined(_AIX)
127 # include <sys/systemcfg.h>
128 # include <sys/time.h>
129 typedef unsigned long long gmx_cycles_t
;
131 #elif ((defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM)) \
132 && (defined(__powerpc__) || defined(__ppc__)))
133 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
134 typedef unsigned long long gmx_cycles_t
;
136 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
137 /* Metrowerks on macintosh */
138 typedef unsigned long long gmx_cycles_t
;
140 #elif defined(__sun) && defined(__sparcv9)
142 typedef unsigned long gmx_cycles_t
;
145 /*! \brief Integer-like datatype for cycle counter values
147 * Depending on your system this will usually be something like long long,
148 * or a special cycle datatype from the system header files. It is NOT
149 * necessarily real processor cycles - many systems count in nanoseconds
150 * or a special external time register at fixed frequency (not the CPU freq.)
152 * You can subtract or add gmx_cycle_t types just as normal integers, and if
153 * you run the calibration routine you can also multiply it with a factor to
154 * translate the cycle data to seconds.
156 typedef long gmx_cycles_t
;
160 /*! \brief Read CPU cycle counter
162 * This routine returns an abstract datatype containing a
163 * cycle counter timestamp.
165 * \return Opaque data corresponding to a cycle reading.
167 * Please note that on most systems it takes several cycles
168 * to read and return the cycle counters. If you are measuring
169 * small intervals, you can compensate for this time by calling
170 * the routine twice and calculating what the difference is.
171 * Subtract this from your other measurements to get an accurate result.
173 * Use gmx_cycles_difference() to get a real number corresponding to
174 * the difference between two gmx_cycles_t values returned from this
177 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) \
178 && (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
179 static __inline__ gmx_cycles_t
gmx_cycles_read()
181 /* x86 with GCC inline assembly - pentium TSC register */
185 __asm__
__volatile__("rdtscp" : "=a"(low
), "=d"(high
)::"ecx");
187 __asm__
__volatile__("rdtsc" : "=a"(low
), "=d"(high
));
189 const gmx_cycles_t c_low
= low
;
190 const gmx_cycles_t c_high
= high
;
191 return c_low
| c_high
<< 32;
193 #elif ((defined __aarch64__) \
194 && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
195 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
197 /* 64-bit ARM cycle counters with GCC inline assembly */
199 __asm__
__volatile__("mrs %0, cntvct_el0" : "=r"(cycle
));
203 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
204 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
206 unsigned int cycles_lo
, cycles_hi
;
207 asm volatile("mrrc p15, 1, %0, %1, c14" : "=r"(cycles_lo
), "=r"(cycles_hi
));
208 return ((gmx_cycles_t
)cycles_lo
) | (((gmx_cycles_t
)cycles_hi
) << 32);
210 #elif defined(_MSC_VER)
211 static __inline gmx_cycles_t
gmx_cycles_read(void)
214 /* Windows on 64-bit ARM */
215 return __rdpmccntr64();
220 return __rdtscp(&ui
);
226 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
227 static inline gmx_cycles_t
gmx_cycles_read(void)
229 /* HP compiler on ia64 */
231 ret
= _Asm_mov_from_ar(_AREG_ITC
);
234 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
235 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
237 /* Intel compiler on ia64 */
238 return __getReg(_IA64_REG_AR_ITC
);
240 #elif defined(__GNUC__) && defined(__ia64__)
241 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
243 /* ia64 with GCC inline assembly */
245 __asm__
__volatile__("mov %0=ar.itc" : "=r"(ret
));
248 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__GNUC__))
249 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
251 /* HP PA-RISC, inline asm with gcc */
253 __asm__
__volatile__("mfctl 16, %0" : "=r"(ret
));
254 /* no input, nothing else clobbered */
257 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__hpux))
258 static inline gmx_cycles_t
gmx_cycles_read(void)
260 /* HP PA-RISC, instruction when using HP compiler */
265 #elif defined(__GNUC__) && defined(__s390__)
266 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
268 /* S390, taken from FFTW who got it from James Treacy */
270 __asm__("stck 0(%0)" : : "a"(&(cycle
)) : "memory", "cc");
273 #elif defined(__GNUC__) && defined(__alpha__)
274 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
276 /* gcc inline assembly on alpha CPUs */
278 __asm__
__volatile__("rpcc %0" : "=r"(cycle
));
279 return (cycle
& 0xFFFFFFFF);
281 #elif defined(__GNUC__) && defined(__sparc_v9__)
282 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
284 /* gcc inline assembly on sparc v9 */
286 __asm__("rd %%tick, %0" : "=r"(ret
));
289 #elif defined(__DECC) && defined(__alpha)
290 static __inline gmx_cycles_t
gmx_cycles_read(void)
292 /* Digital GEM C compiler on alpha */
294 cycle
= asm("rpcc %v0");
295 return (cycle
& 0xFFFFFFFF);
297 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
298 static __inline gmx_cycles_t
gmx_cycles_read(void)
300 /* Irix compilers on SGI hardware */
302 clock_gettime(CLOCK_SGI_CYCLE
, &t
);
303 /* Return the number of nanoseconds, so we can subtract/add */
304 return ((unsigned long long)t
.tv_sec
) * 1000000000 + (unsigned long long)t
.tv_nsec
;
306 #elif (defined(__SVR4) && defined(__SUNPRO_CC))
307 static inline gmx_cycles_t
gmx_cycles_read(void)
309 /* Solaris high-resolution timers */
312 #elif defined(__xlC__) && defined(_AIX)
313 static inline gmx_cycles_t
gmx_cycles_read(void)
315 /* AIX compilers. Inline the calculation instead of using library functions */
317 read_real_time(&t1
, TIMEBASE_SZ
);
318 /* POWER returns real time (seconds + nanoseconds),
319 * POWER_PC returns high/low 32 bits of a counter.
321 if (t1
.flag
== RTC_POWER_PC
)
323 return ((gmx_cycles_t
)t1
.tb_high
) << 32 | (gmx_cycles_t
)t1
.tb_low
;
327 return ((gmx_cycles_t
)t1
.tb_high
) * 1000000000 + (gmx_cycles_t
)t1
.tb_low
;
330 #elif ((defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM)) \
331 && (defined(__powerpc__) || defined(__ppc__)))
332 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
334 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc, and clang) */
335 unsigned long low
, high1
, high2
;
338 // clang 3.7 incorrectly warns that mftb* are
339 // deprecated. That's not correct - see
340 // https://llvm.org/bugs/show_bug.cgi?id=23680.
341 __asm__
__volatile__("mftbu %0" : "=r"(high1
) :);
342 __asm__
__volatile__("mftb %0" : "=r"(low
) :);
343 __asm__
__volatile__("mftbu %0" : "=r"(high2
) :);
344 } while (high1
!= high2
);
346 return (((gmx_cycles_t
)high2
) << 32) | (gmx_cycles_t
)low
;
348 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
349 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
351 /* Metrowerks on macintosh */
352 unsigned int long low
, high1
, high2
;
355 __asm__
__volatile__("mftbu %0" : "=r"(high1
) :);
356 __asm__
__volatile__("mftb %0" : "=r"(low
) :);
357 __asm__
__volatile__("mftbu %0" : "=r"(high2
) :);
358 } while (high1
!= high2
);
360 return (((gmx_cycles_t
)high2
) << 32) | (gmx_cycles_t
)low
;
362 #elif defined(__sun) && defined(__sparcv9)
364 static __inline__ gmx_cycles_t
gmx_cycles_read(void)
367 __asm__
__volatile__("rd %%tick, %0" : "=r"(ret
));
371 #elif defined(_CRAYC)
372 # include <intrinsics.h>
374 static __inline gmx_cycles_t
gmx_cycles_read(void)
379 static gmx_cycles_t
gmx_cycles_read(void)
386 /*! \brief Check if high-resolution cycle counters are available
388 * Not all architectures provide any way to read timestep counters
389 * in the CPU, and on some it is broken. Although we refer to it
390 * as cycle counters, it is not necessarily given in units of
393 * If you notice that system is missing, implement support for it,
394 * find out how to detect the system during preprocessing, and send us a
397 * \return 1 if cycle counters are available, 0 if not.
399 * \note This functions not need to be in the header for performance
400 * reasons, but it is very important that we get exactly the
401 * same detection as for gmx_cycles_read() routines. If you
402 * compile the library with one compiler, and then use a different
403 * one when later linking to the library it might happen that the
404 * library supports cyclecounters but not the headers, or vice versa.
406 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) \
407 || defined(__PGIC__) || defined(_CRAYC)) \
408 && (defined(__i386__) || defined(__x86_64__)))
409 static __inline__
bool gmx_cycles_have_counter()
411 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
414 #elif ((defined __aarch64__) \
415 && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
416 static __inline
bool gmx_cycles_have_counter(void)
418 /* 64-bit ARM cycle counters with GCC inline assembly */
421 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
422 static __inline
bool gmx_cycles_have_counter(void)
424 /* Armv7A can provide 64-bit cycles by returning two registers. However, it will not work unless
425 * the performance registers have been made available from user space by a kernel module -
426 * otherwise it returns 0.
430 c0
= gmx_cycles_read();
431 c1
= gmx_cycles_read();
433 /* if both counters return 0, support is not present */
434 return (c0
!= 0 || c1
!= 0);
436 #elif (defined(_MSC_VER))
437 static __inline
bool gmx_cycles_have_counter(void)
441 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
442 static inline bool gmx_cycles_have_counter(void)
444 /* HP compiler on ia64, use special instruction to read ITC */
447 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
448 static __inline__
bool gmx_cycles_have_counter(void)
450 /* Intel compiler on ia64, use special instruction to read ITC */
453 #elif defined(__GNUC__) && defined(__ia64__)
454 static __inline__
bool gmx_cycles_have_counter(void)
456 /* AMD64 with GCC inline assembly - TSC register */
459 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__GNUC__))
460 static __inline__
bool gmx_cycles_have_counter(void)
462 /* HP PA-RISC, inline asm with gcc */
465 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__hpux))
466 static inline bool gmx_cycles_have_counter(void)
468 /* HP PA-RISC, instruction when using HP compiler */
471 #elif defined(__GNUC__) && defined(__s390__)
472 static __inline__
bool gmx_cycles_have_counter(void)
474 /* S390, taken from FFTW who got it from James Treacy */
477 #elif defined(__GNUC__) && defined(__alpha__)
478 static __inline__
bool gmx_cycles_have_counter(void)
480 /* gcc inline assembly on alpha CPUs */
483 #elif defined(__GNUC__) && defined(__sparc_v9__)
484 static __inline__
bool gmx_cycles_have_counter(void)
486 /* gcc inline assembly on sparc v9 */
489 #elif defined(__DECC) && defined(__alpha)
490 static __inline
bool gmx_cycles_have_counter(void)
492 /* Digital GEM C compiler on alpha */
495 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
496 static __inline
bool gmx_cycles_have_counter(void)
498 /* Irix compilers on SGI hardware */
501 #elif (defined(__SVR4) && defined(__SUNPRO_CC))
502 static inline bool gmx_cycles_have_counter(void)
504 /* Solaris high-resolution timers */
507 #elif defined(__xlC__) && defined(_AIX)
508 static inline bool gmx_cycles_have_counter(void)
513 #elif ((defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM)) \
514 && (defined(__powerpc__) || defined(__ppc__)))
515 static __inline__
bool gmx_cycles_have_counter(void)
517 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
520 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
521 static __inline__
bool gmx_cycles_have_counter(void)
523 /* Metrowerks on macintosh */
526 #elif defined(__sun) && defined(__sparcv9)
528 static __inline__
bool gmx_cycles_have_counter(void)
530 /* Solaris on SPARC*/
534 static bool gmx_cycles_have_counter(void)
536 /* No cycle counter that we know of on this system */
542 /*! \brief Calculate number of seconds per cycle tick on host
544 * This routine runs a timer loop to calibrate the number of
545 * seconds per the units returned fro gmx_cycles_read().
547 * \param sampletime Minimum real sample time. It takes some trial-and-error
548 * to find the correct delay loop size, so the total runtime of
549 * this routine is about twice this time.
550 * \return Number of seconds per cycle unit. If it is not possible to
551 * calculate on this system (for whatever reason) the return value
552 * will be -1, so check that it is positive before using it.
554 double gmx_cycles_calibrate(double sampletime
);