Update instructions in containers.rst
[gromacs.git] / src / gromacs / timing / cyclecounter.h
blob956386a0762cf1bceee73da570d77c26fe4eefee
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014,2015,2016,2017 by the GROMACS development team.
6 * Copyright (c) 2018,2019,2020, by the GROMACS development team, led by
7 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
8 * and including many others, as listed in the AUTHORS file in the
9 * top-level source directory and at http://www.gromacs.org.
11 * GROMACS is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation; either version 2.1
14 * of the License, or (at your option) any later version.
16 * GROMACS is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * Lesser General Public License for more details.
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with GROMACS; if not, see
23 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
24 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
26 * If you want to redistribute modifications to GROMACS, please
27 * consider that scientific software is very special. Version
28 * control is crucial - bugs must be traceable. We will be happy to
29 * consider code for inclusion in the official distribution, but
30 * derived work must not be called official GROMACS. Details are found
31 * in the README & COPYING files - if they are missing, get the
32 * official version at http://www.gromacs.org.
34 * To help us fund GROMACS development, we humbly ask that you cite
35 * the research papers on the package. Check out http://www.gromacs.org.
37 /*! \libinternal \file
38 * \brief
39 * High-resolution timestamp or CPU clock cycle counters.
41 * After reading the current value with gmx_cycles_read() you can add or
42 * subtract these numbers as normal integers of type gmx_cycles_t.
44 * \inlibraryapi
46 #ifndef GMX_TIMING_CYCLECOUNTER_H
47 #define GMX_TIMING_CYCLECOUNTER_H
50 * Define GMX_USE_RDTSCP=1 to use the serializing rdtscp instruction instead of rdtsc.
51 * This is supported on essentially all Intel/AMD hardware still in use, and provides better accuracy.
53 #include "config.h"
55 #ifdef _MSC_VER
56 # include <intrin.h>
57 #endif
59 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) \
60 && (defined(__i386__) || defined(__x86_64__)))
61 /* x86 or x86-64 with GCC inline assembly */
62 typedef unsigned long long gmx_cycles_t;
64 #elif ((defined __aarch64__) \
65 && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
66 /* 64-bit ARM cycle counters with GCC inline assembly */
67 typedef unsigned long long gmx_cycles_t;
69 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
70 /* Armv7A can provide 64-bit cycles by returning two registers */
71 typedef unsigned long long gmx_cycles_t;
73 #elif defined(_MSC_VER)
74 # include <windows.h>
75 typedef __int64 gmx_cycles_t;
77 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
78 /* HP compiler on ia64 */
79 # include <machine/sys/inline.h>
80 typedef unsigned long gmx_cycles_t;
82 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
83 /* Intel compiler on ia64 */
84 # include <ia64intrin.h>
85 typedef unsigned long gmx_cycles_t;
87 #elif defined(__GNUC__) && defined(__ia64__)
88 /* ia64 with GCC inline assembly */
89 typedef unsigned long gmx_cycles_t;
91 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__GNUC__))
92 /* HP PA-RISC, inline asm with gcc */
93 typedef unsigned long gmx_cycles_t;
95 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__hpux))
96 /* HP PA-RISC, instruction when using HP compiler */
97 # include <machine/inline.h>
98 typedef unsigned long gmx_cycles_t;
100 #elif defined(__GNUC__) && defined(__s390__)
101 /* S390, taken from FFTW who got it from James Treacy */
102 typedef unsigned long long gmx_cycles_t;
104 #elif defined(__GNUC__) && defined(__alpha__)
105 /* gcc inline assembly on alpha CPUs */
106 typedef unsigned long gmx_cycles_t;
108 #elif defined(__GNUC__) && defined(__sparc_v9__)
109 /* gcc inline assembly on sparc v9 */
110 typedef unsigned long gmx_cycles_t;
112 #elif defined(__DECC) && defined(__alpha)
113 /* Digital GEM C compiler on alpha */
114 # include <c_asm.h>
115 typedef unsigned long gmx_cycles_t;
117 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
118 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
119 typedef unsigned long long gmx_cycles_t;
121 #elif (defined(__SVR4) && defined(__SUNPRO_CC))
122 /* Solaris high-resolution timers */
123 typedef hrtime_t gmx_cycles_t;
125 #elif defined(__xlC__) && defined(_AIX)
126 /* AIX compilers */
127 # include <sys/systemcfg.h>
128 # include <sys/time.h>
129 typedef unsigned long long gmx_cycles_t;
131 #elif ((defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM)) \
132 && (defined(__powerpc__) || defined(__ppc__)))
133 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
134 typedef unsigned long long gmx_cycles_t;
136 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
137 /* Metrowerks on macintosh */
138 typedef unsigned long long gmx_cycles_t;
140 #elif defined(__sun) && defined(__sparcv9)
142 typedef unsigned long gmx_cycles_t;
144 #else
145 /*! \brief Integer-like datatype for cycle counter values
147 * Depending on your system this will usually be something like long long,
148 * or a special cycle datatype from the system header files. It is NOT
149 * necessarily real processor cycles - many systems count in nanoseconds
150 * or a special external time register at fixed frequency (not the CPU freq.)
152 * You can subtract or add gmx_cycle_t types just as normal integers, and if
153 * you run the calibration routine you can also multiply it with a factor to
154 * translate the cycle data to seconds.
156 typedef long gmx_cycles_t;
158 #endif
160 /*! \brief Read CPU cycle counter
162 * This routine returns an abstract datatype containing a
163 * cycle counter timestamp.
165 * \return Opaque data corresponding to a cycle reading.
167 * Please note that on most systems it takes several cycles
168 * to read and return the cycle counters. If you are measuring
169 * small intervals, you can compensate for this time by calling
170 * the routine twice and calculating what the difference is.
171 * Subtract this from your other measurements to get an accurate result.
173 * Use gmx_cycles_difference() to get a real number corresponding to
174 * the difference between two gmx_cycles_t values returned from this
175 * routine.
177 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) \
178 && (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
179 static __inline__ gmx_cycles_t gmx_cycles_read()
181 /* x86 with GCC inline assembly - pentium TSC register */
182 unsigned low, high;
184 # if GMX_USE_RDTSCP
185 __asm__ __volatile__("rdtscp" : "=a"(low), "=d"(high)::"ecx");
186 # else
187 __asm__ __volatile__("rdtsc" : "=a"(low), "=d"(high));
188 # endif
189 const gmx_cycles_t c_low = low;
190 const gmx_cycles_t c_high = high;
191 return c_low | c_high << 32;
193 #elif ((defined __aarch64__) \
194 && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
195 static __inline__ gmx_cycles_t gmx_cycles_read(void)
197 /* 64-bit ARM cycle counters with GCC inline assembly */
198 gmx_cycles_t cycle;
199 __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(cycle));
201 return cycle;
203 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
204 static __inline__ gmx_cycles_t gmx_cycles_read(void)
206 unsigned int cycles_lo, cycles_hi;
207 asm volatile("mrrc p15, 1, %0, %1, c14" : "=r"(cycles_lo), "=r"(cycles_hi));
208 return ((gmx_cycles_t)cycles_lo) | (((gmx_cycles_t)cycles_hi) << 32);
210 #elif defined(_MSC_VER)
211 static __inline gmx_cycles_t gmx_cycles_read(void)
213 # ifdef _M_ARM
214 /* Windows on 64-bit ARM */
215 return __rdpmccntr64();
216 # else
217 /* x86 */
218 # if GMX_USE_RDTSCP
219 unsigned int ui;
220 return __rdtscp(&ui);
221 # else
222 return __rdtsc();
223 # endif
224 # endif
226 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
227 static inline gmx_cycles_t gmx_cycles_read(void)
229 /* HP compiler on ia64 */
230 gmx_cycles_t ret;
231 ret = _Asm_mov_from_ar(_AREG_ITC);
232 return ret;
234 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
235 static __inline__ gmx_cycles_t gmx_cycles_read(void)
237 /* Intel compiler on ia64 */
238 return __getReg(_IA64_REG_AR_ITC);
240 #elif defined(__GNUC__) && defined(__ia64__)
241 static __inline__ gmx_cycles_t gmx_cycles_read(void)
243 /* ia64 with GCC inline assembly */
244 gmx_cycles_t ret;
245 __asm__ __volatile__("mov %0=ar.itc" : "=r"(ret));
246 return ret;
248 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__GNUC__))
249 static __inline__ gmx_cycles_t gmx_cycles_read(void)
251 /* HP PA-RISC, inline asm with gcc */
252 gmx_cycles_t ret;
253 __asm__ __volatile__("mfctl 16, %0" : "=r"(ret));
254 /* no input, nothing else clobbered */
255 return ret;
257 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__hpux))
258 static inline gmx_cycles_t gmx_cycles_read(void)
260 /* HP PA-RISC, instruction when using HP compiler */
261 gmx_cycles_t ret;
262 _MFCTL(16, ret);
263 return ret;
265 #elif defined(__GNUC__) && defined(__s390__)
266 static __inline__ gmx_cycles_t gmx_cycles_read(void)
268 /* S390, taken from FFTW who got it from James Treacy */
269 gmx_cycles_t cycle;
270 __asm__("stck 0(%0)" : : "a"(&(cycle)) : "memory", "cc");
271 return cycle;
273 #elif defined(__GNUC__) && defined(__alpha__)
274 static __inline__ gmx_cycles_t gmx_cycles_read(void)
276 /* gcc inline assembly on alpha CPUs */
277 unsigned long cycle;
278 __asm__ __volatile__("rpcc %0" : "=r"(cycle));
279 return (cycle & 0xFFFFFFFF);
281 #elif defined(__GNUC__) && defined(__sparc_v9__)
282 static __inline__ gmx_cycles_t gmx_cycles_read(void)
284 /* gcc inline assembly on sparc v9 */
285 unsigned long ret;
286 __asm__("rd %%tick, %0" : "=r"(ret));
287 return ret;
289 #elif defined(__DECC) && defined(__alpha)
290 static __inline gmx_cycles_t gmx_cycles_read(void)
292 /* Digital GEM C compiler on alpha */
293 unsigned long cycle;
294 cycle = asm("rpcc %v0");
295 return (cycle & 0xFFFFFFFF);
297 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
298 static __inline gmx_cycles_t gmx_cycles_read(void)
300 /* Irix compilers on SGI hardware */
301 struct timespec t;
302 clock_gettime(CLOCK_SGI_CYCLE, &t);
303 /* Return the number of nanoseconds, so we can subtract/add */
304 return ((unsigned long long)t.tv_sec) * 1000000000 + (unsigned long long)t.tv_nsec;
306 #elif (defined(__SVR4) && defined(__SUNPRO_CC))
307 static inline gmx_cycles_t gmx_cycles_read(void)
309 /* Solaris high-resolution timers */
310 return gethrtime();
312 #elif defined(__xlC__) && defined(_AIX)
313 static inline gmx_cycles_t gmx_cycles_read(void)
315 /* AIX compilers. Inline the calculation instead of using library functions */
316 timebasestruct_t t1;
317 read_real_time(&t1, TIMEBASE_SZ);
318 /* POWER returns real time (seconds + nanoseconds),
319 * POWER_PC returns high/low 32 bits of a counter.
321 if (t1.flag == RTC_POWER_PC)
323 return ((gmx_cycles_t)t1.tb_high) << 32 | (gmx_cycles_t)t1.tb_low;
325 else
327 return ((gmx_cycles_t)t1.tb_high) * 1000000000 + (gmx_cycles_t)t1.tb_low;
330 #elif ((defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM)) \
331 && (defined(__powerpc__) || defined(__ppc__)))
332 static __inline__ gmx_cycles_t gmx_cycles_read(void)
334 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc, and clang) */
335 unsigned long low, high1, high2;
338 // clang 3.7 incorrectly warns that mftb* are
339 // deprecated. That's not correct - see
340 // https://llvm.org/bugs/show_bug.cgi?id=23680.
341 __asm__ __volatile__("mftbu %0" : "=r"(high1) :);
342 __asm__ __volatile__("mftb %0" : "=r"(low) :);
343 __asm__ __volatile__("mftbu %0" : "=r"(high2) :);
344 } while (high1 != high2);
346 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
348 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
349 static __inline__ gmx_cycles_t gmx_cycles_read(void)
351 /* Metrowerks on macintosh */
352 unsigned int long low, high1, high2;
355 __asm__ __volatile__("mftbu %0" : "=r"(high1) :);
356 __asm__ __volatile__("mftb %0" : "=r"(low) :);
357 __asm__ __volatile__("mftbu %0" : "=r"(high2) :);
358 } while (high1 != high2);
360 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
362 #elif defined(__sun) && defined(__sparcv9)
364 static __inline__ gmx_cycles_t gmx_cycles_read(void)
366 gmx_cycles_t ret;
367 __asm__ __volatile__("rd %%tick, %0" : "=r"(ret));
368 return ret;
371 #elif defined(_CRAYC)
372 # include <intrinsics.h>
374 static __inline gmx_cycles_t gmx_cycles_read(void)
376 return _rtc();
378 #else
379 static gmx_cycles_t gmx_cycles_read(void)
381 return 0;
383 #endif
386 /*! \brief Check if high-resolution cycle counters are available
388 * Not all architectures provide any way to read timestep counters
389 * in the CPU, and on some it is broken. Although we refer to it
390 * as cycle counters, it is not necessarily given in units of
391 * cycles.
393 * If you notice that system is missing, implement support for it,
394 * find out how to detect the system during preprocessing, and send us a
395 * patch.
397 * \return 1 if cycle counters are available, 0 if not.
399 * \note This functions not need to be in the header for performance
400 * reasons, but it is very important that we get exactly the
401 * same detection as for gmx_cycles_read() routines. If you
402 * compile the library with one compiler, and then use a different
403 * one when later linking to the library it might happen that the
404 * library supports cyclecounters but not the headers, or vice versa.
406 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) \
407 || defined(__PGIC__) || defined(_CRAYC)) \
408 && (defined(__i386__) || defined(__x86_64__)))
409 static __inline__ bool gmx_cycles_have_counter()
411 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
412 return true;
414 #elif ((defined __aarch64__) \
415 && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
416 static __inline bool gmx_cycles_have_counter(void)
418 /* 64-bit ARM cycle counters with GCC inline assembly */
419 return 1;
421 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
422 static __inline bool gmx_cycles_have_counter(void)
424 /* Armv7A can provide 64-bit cycles by returning two registers. However, it will not work unless
425 * the performance registers have been made available from user space by a kernel module -
426 * otherwise it returns 0.
428 gmx_cycles_t c0, c1;
430 c0 = gmx_cycles_read();
431 c1 = gmx_cycles_read();
433 /* if both counters return 0, support is not present */
434 return (c0 != 0 || c1 != 0);
436 #elif (defined(_MSC_VER))
437 static __inline bool gmx_cycles_have_counter(void)
439 return 1;
441 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
442 static inline bool gmx_cycles_have_counter(void)
444 /* HP compiler on ia64, use special instruction to read ITC */
445 return 1;
447 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
448 static __inline__ bool gmx_cycles_have_counter(void)
450 /* Intel compiler on ia64, use special instruction to read ITC */
451 return 1;
453 #elif defined(__GNUC__) && defined(__ia64__)
454 static __inline__ bool gmx_cycles_have_counter(void)
456 /* AMD64 with GCC inline assembly - TSC register */
457 return 1;
459 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__GNUC__))
460 static __inline__ bool gmx_cycles_have_counter(void)
462 /* HP PA-RISC, inline asm with gcc */
463 return 1;
465 #elif ((defined(__hppa__) || defined(__hppa)) && defined(__hpux))
466 static inline bool gmx_cycles_have_counter(void)
468 /* HP PA-RISC, instruction when using HP compiler */
469 return 1;
471 #elif defined(__GNUC__) && defined(__s390__)
472 static __inline__ bool gmx_cycles_have_counter(void)
474 /* S390, taken from FFTW who got it from James Treacy */
475 return 1;
477 #elif defined(__GNUC__) && defined(__alpha__)
478 static __inline__ bool gmx_cycles_have_counter(void)
480 /* gcc inline assembly on alpha CPUs */
481 return 1;
483 #elif defined(__GNUC__) && defined(__sparc_v9__)
484 static __inline__ bool gmx_cycles_have_counter(void)
486 /* gcc inline assembly on sparc v9 */
487 return 1;
489 #elif defined(__DECC) && defined(__alpha)
490 static __inline bool gmx_cycles_have_counter(void)
492 /* Digital GEM C compiler on alpha */
493 return 1;
495 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
496 static __inline bool gmx_cycles_have_counter(void)
498 /* Irix compilers on SGI hardware */
499 return 1;
501 #elif (defined(__SVR4) && defined(__SUNPRO_CC))
502 static inline bool gmx_cycles_have_counter(void)
504 /* Solaris high-resolution timers */
505 return 1;
507 #elif defined(__xlC__) && defined(_AIX)
508 static inline bool gmx_cycles_have_counter(void)
510 /* AIX compilers */
511 return 1;
513 #elif ((defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM)) \
514 && (defined(__powerpc__) || defined(__ppc__)))
515 static __inline__ bool gmx_cycles_have_counter(void)
517 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
518 return 1;
520 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
521 static __inline__ bool gmx_cycles_have_counter(void)
523 /* Metrowerks on macintosh */
524 return 1;
526 #elif defined(__sun) && defined(__sparcv9)
528 static __inline__ bool gmx_cycles_have_counter(void)
530 /* Solaris on SPARC*/
531 return 1;
533 #else
534 static bool gmx_cycles_have_counter(void)
536 /* No cycle counter that we know of on this system */
537 return 0;
539 #endif
542 /*! \brief Calculate number of seconds per cycle tick on host
544 * This routine runs a timer loop to calibrate the number of
545 * seconds per the units returned fro gmx_cycles_read().
547 * \param sampletime Minimum real sample time. It takes some trial-and-error
548 * to find the correct delay loop size, so the total runtime of
549 * this routine is about twice this time.
550 * \return Number of seconds per cycle unit. If it is not possible to
551 * calculate on this system (for whatever reason) the return value
552 * will be -1, so check that it is positive before using it.
554 double gmx_cycles_calibrate(double sampletime);
556 #endif