Moved mdatom.h from legacyheader/types to mdtypes.
[gromacs.git] / src / gromacs / timing / cyclecounter.h
blob0221013b4bc00f172fbfbca26b58863fb35b50bb
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 1991-2006 David van der Spoel, Erik Lindahl, Berk Hess, University of Groningen.
5 * Copyright (c) 2013,2014,2015, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \libinternal \file
37 * \brief
38 * High-resolution timestamp or CPU clock cycle counters.
40 * After reading the current value with gmx_cycles_read() you can add or
41 * subtract these numbers as normal integers of type gmx_cycles_t.
43 * \inlibraryapi
45 #ifndef GMX_TIMING_CYCLECOUNTER_H
46 #define GMX_TIMING_CYCLECOUNTER_H
49 * define HAVE_RDTSCP to use the serializing rdtscp instruction instead of rdtsc.
50 * This is only supported on newer Intel/AMD hardware, but provides better accuracy.
52 #include "config.h"
54 #ifdef _MSC_VER
55 #include <intrin.h>
56 #endif
58 #ifdef __cplusplus
59 extern "C"
61 #endif
62 #if 0
63 } /* fixes auto-indentation problems */
64 #endif
66 /* Minor implementation note:
68 * I like to use these counters in other programs too, so to avoid making
69 * it dependent on other Gromacs definitions I use the #ifdef's to set
70 * architecture-specific inline macros instead of using gmx_inline from
71 * gmx_types.h /Erik 2005-12-10
74 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
75 (defined(__i386__) || defined(__x86_64__)))
76 /* x86 or x86-64 with GCC inline assembly */
77 typedef unsigned long long
78 gmx_cycles_t;
80 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
81 /* 64-bit ARM cycle counters with GCC inline assembly */
82 typedef unsigned long long
83 gmx_cycles_t;
85 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
86 /* Armv7A can provide 64-bit cycles by returning two registers */
87 typedef unsigned long long
88 gmx_cycles_t;
90 #elif defined(_MSC_VER)
91 #include <windows.h>
92 typedef __int64
93 gmx_cycles_t;
95 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
96 /* HP compiler on ia64 */
97 #include <machine/sys/inline.h>
98 typedef unsigned long
99 gmx_cycles_t;
101 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
102 /* Intel compiler on ia64 */
103 #include <ia64intrin.h>
104 typedef unsigned long
105 gmx_cycles_t;
107 #elif defined(__GNUC__) && defined(__ia64__)
108 /* ia64 with GCC inline assembly */
109 typedef unsigned long
110 gmx_cycles_t;
112 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
113 /* HP PA-RISC, inline asm with gcc */
114 typedef unsigned long
115 gmx_cycles_t;
117 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
118 /* HP PA-RISC, instruction when using HP compiler */
119 #include <machine/inline.h>
120 typedef unsigned long
121 gmx_cycles_t;
123 #elif defined(__GNUC__) && defined(__s390__)
124 /* S390, taken from FFTW who got it from James Treacy */
125 typedef unsigned long long
126 gmx_cycles_t;
128 #elif defined(__GNUC__) && defined(__alpha__)
129 /* gcc inline assembly on alpha CPUs */
130 typedef unsigned long
131 gmx_cycles_t;
133 #elif defined(__GNUC__) && defined(__sparc_v9__)
134 /* gcc inline assembly on sparc v9 */
135 typedef unsigned long
136 gmx_cycles_t;
138 #elif defined(__DECC) && defined(__alpha)
139 /* Digital GEM C compiler on alpha */
140 #include <c_asm.h>
141 typedef unsigned long
142 gmx_cycles_t;
144 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
145 /* Irix compilers on SGI hardware. Get nanoseconds from struct timespec */
146 typedef unsigned long long
147 gmx_cycles_t;
149 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
150 /* Solaris high-resolution timers */
151 typedef hrtime_t
152 gmx_cycles_t;
154 #elif defined(__xlC__) && defined (_AIX)
155 /* AIX compilers */
156 #include <sys/systemcfg.h>
157 #include <sys/time.h>
158 typedef unsigned long long
159 gmx_cycles_t;
161 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
162 ( defined(__powerpc__) || defined(__ppc__) ) )
163 /* PowerPC using gcc inline assembly (also works on xlc>=7.0 with -qasm=gcc) */
164 typedef unsigned long long
165 gmx_cycles_t;
167 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
168 /* Metrowerks on macintosh */
169 typedef unsigned long long
170 gmx_cycles_t;
172 #elif defined(__sun) && defined(__sparcv9)
174 typedef unsigned long
175 gmx_cycles_t;
177 #else
178 /*! \brief Integer-like datatype for cycle counter values
180 * Depending on your system this will usually be something like long long,
181 * or a special cycle datatype from the system header files. It is NOT
182 * necessarily real processor cycles - many systems count in nanoseconds
183 * or a special external time register at fixed frequency (not the CPU freq.)
185 * You can subtract or add gmx_cycle_t types just as normal integers, and if
186 * you run the calibration routine you can also multiply it with a factor to
187 * translate the cycle data to seconds.
189 typedef long
190 gmx_cycles_t;
192 #endif
194 /*! \brief Read CPU cycle counter
196 * This routine returns an abstract datatype containing a
197 * cycle counter timestamp.
199 * \return Opaque data corresponding to a cycle reading.
201 * Please note that on most systems it takes several cycles
202 * to read and return the cycle counters. If you are measuring
203 * small intervals, you can compensate for this time by calling
204 * the routine twice and calculating what the difference is.
205 * Subtract this from your other measurements to get an accurate result.
207 * Use gmx_cycles_difference() to get a real number corresponding to
208 * the difference between two gmx_cycles_t values returned from this
209 * routine.
211 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)) && \
212 (defined(__i386__) || defined(__x86_64__)) && !defined(_CRAYC))
213 static __inline__ gmx_cycles_t gmx_cycles_read(void)
215 /* x86 with GCC inline assembly - pentium TSC register */
216 gmx_cycles_t cycle;
217 unsigned low, high;
219 #ifdef HAVE_RDTSCP
220 __asm__ __volatile__("rdtscp" : "=a" (low), "=d" (high) :: "ecx" );
221 #else
222 __asm__ __volatile__("rdtsc" : "=a" (low), "=d" (high));
223 #endif
225 cycle = ((unsigned long long)low) | (((unsigned long long)high)<<32);
227 return cycle;
229 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
230 static __inline__ gmx_cycles_t gmx_cycles_read(void)
232 /* 64-bit ARM cycle counters with GCC inline assembly */
233 gmx_cycles_t cycle;
234 __asm__ __volatile__("mrs %0, cntvct_el0" : "=r" (cycle) );
236 return cycle;
238 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
239 static __inline__ gmx_cycles_t gmx_cycles_read(void)
241 unsigned int cycles_lo, cycles_hi;
242 asm volatile("mrrc p15, 1, %0, %1, c14" : "=r" (cycles_lo), "=r" (cycles_hi));
243 return ((gmx_cycles_t)cycles_lo) | (((gmx_cycles_t)cycles_hi) << 32);
245 #elif defined(_MSC_VER)
246 static __inline gmx_cycles_t gmx_cycles_read(void)
248 #ifdef _M_ARM
249 /* Windows on 64-bit ARM */
250 return __rdpmccntr64();
251 #else
252 /* x86 */
253 # ifdef HAVE_RDTSCP
254 unsigned int ui;
255 return __rdtscp(&ui);
256 # else
257 return __rdtsc();
258 # endif
259 #endif
261 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
262 static inline gmx_cycles_t gmx_cycles_read(void)
264 /* HP compiler on ia64 */
265 gmx_cycles_t ret;
266 ret = _Asm_mov_from_ar (_AREG_ITC);
267 return ret;
269 #elif (defined(__INTEL_COMPILER) && defined(__ia64__))
270 static __inline__ gmx_cycles_t gmx_cycles_read(void)
272 /* Intel compiler on ia64 */
273 return __getReg(_IA64_REG_AR_ITC);
275 #elif defined(__GNUC__) && defined(__ia64__)
276 static __inline__ gmx_cycles_t gmx_cycles_read(void)
278 /* ia64 with GCC inline assembly */
279 gmx_cycles_t ret;
280 __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (ret));
281 return ret;
283 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
284 static __inline__ gmx_cycles_t gmx_cycles_read(void)
286 /* HP PA-RISC, inline asm with gcc */
287 gmx_cycles_t ret;
288 __asm__ __volatile__("mfctl 16, %0" : "=r" (ret));
289 /* no input, nothing else clobbered */
290 return ret;
292 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
293 static inline gmx_cycles_t gmx_cycles_read(void)
295 /* HP PA-RISC, instruction when using HP compiler */
296 gmx_cycles_t ret;
297 _MFCTL(16, ret);
298 return ret;
300 #elif defined(__GNUC__) && defined(__s390__)
301 static __inline__ gmx_cycles_t gmx_cycles_read(void)
303 /* S390, taken from FFTW who got it from James Treacy */
304 gmx_cycles_t cycle;
305 __asm__("stck 0(%0)" : : "a" (&(cycle)) : "memory", "cc");
306 return cycle;
308 #elif defined(__GNUC__) && defined(__alpha__)
309 static __inline__ gmx_cycles_t gmx_cycles_read(void)
311 /* gcc inline assembly on alpha CPUs */
312 unsigned long cycle;
313 __asm__ __volatile__ ("rpcc %0" : "=r" (cycle));
314 return (cycle & 0xFFFFFFFF);
316 #elif defined(__GNUC__) && defined(__sparc_v9__)
317 static __inline__ gmx_cycles_t gmx_cycles_read(void)
319 /* gcc inline assembly on sparc v9 */
320 unsigned long ret;
321 __asm__("rd %%tick, %0" : "=r" (ret));
322 return ret;
324 #elif defined(__DECC) && defined(__alpha)
325 static __inline gmx_cycles_t gmx_cycles_read(void)
327 /* Digital GEM C compiler on alpha */
328 unsigned long cycle;
329 cycle = asm ("rpcc %v0");
330 return (cycle & 0xFFFFFFFF);
332 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
333 static __inline gmx_cycles_t gmx_cycles_read(void)
335 /* Irix compilers on SGI hardware */
336 struct timespec t;
337 clock_gettime(CLOCK_SGI_CYCLE, &t);
338 /* Return the number of nanoseconds, so we can subtract/add */
339 return ((unsigned long long)t.tv_sec)*1000000000+
340 (unsigned long long)t.tv_nsec;
342 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
343 static inline gmx_cycles_t gmx_cycles_read(void)
345 /* Solaris high-resolution timers */
346 return gethrtime();
348 #elif defined(__xlC__) && defined (_AIX)
349 static inline gmx_cycles_t gmx_cycles_read(void)
351 /* AIX compilers. Inline the calculation instead of using library functions */
352 timebasestruct_t t1;
353 read_real_time(&t1, TIMEBASE_SZ);
354 /* POWER returns real time (seconds + nanoseconds),
355 * POWER_PC returns high/low 32 bits of a counter.
357 if (t1.flag == RTC_POWER_PC)
359 return ((gmx_cycles_t)t1.tb_high)<<32 | (gmx_cycles_t)t1.tb_low;
361 else
363 return ((gmx_cycles_t)t1.tb_high)*1000000000+(gmx_cycles_t)t1.tb_low;
366 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
367 ( defined(__powerpc__) || defined(__ppc__) ) )
368 static __inline__ gmx_cycles_t gmx_cycles_read(void)
370 /* PowerPC using gcc inline assembly (and xlC>=7.0 with -qasm=gcc) */
371 unsigned long low, high1, high2;
374 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
375 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
376 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
378 while (high1 != high2);
380 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
382 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
383 static __inline__ gmx_cycles_t gmx_cycles_read(void)
385 /* Metrowerks on macintosh */
386 unsigned int long low, high1, high2;
389 __asm__ __volatile__ ("mftbu %0" : "=r" (high1) : );
390 __asm__ __volatile__ ("mftb %0" : "=r" (low) : );
391 __asm__ __volatile__ ("mftbu %0" : "=r" (high2) : );
393 while (high1 != high2);
395 return (((gmx_cycles_t)high2) << 32) | (gmx_cycles_t)low;
397 #elif defined(__sun) && defined(__sparcv9)
399 static __inline__ gmx_cycles_t gmx_cycles_read(void)
401 gmx_cycles_t ret;
402 __asm__ __volatile__("rd %%tick, %0" : "=r" (ret));
403 return ret;
406 #elif defined(_CRAYC)
407 #include <intrinsics.h>
409 static __inline gmx_cycles_t gmx_cycles_read(void)
411 return _rtc();
413 #else
414 static gmx_cycles_t gmx_cycles_read(void)
416 return 0;
418 #endif
421 /*! \brief Check if high-resolution cycle counters are available
423 * Not all architectures provide any way to read timestep counters
424 * in the CPU, and on some it is broken. Although we refer to it
425 * as cycle counters, it is not necessarily given in units of
426 * cycles.
428 * If you notice that system is missing, implement support for it,
429 * find out how to detect the system during preprocessing, and send us a
430 * patch.
432 * \return 1 if cycle counters are available, 0 if not.
434 * \note This functions not need to be in the header for performance
435 * reasons, but it is very important that we get exactly the
436 * same detection as for gmx_cycles_read() routines. If you
437 * compile the library with one compiler, and then use a different
438 * one when later linking to the library it might happen that the
439 * library supports cyclecounters but not the headers, or vice versa.
441 #if ((defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__) || defined(_CRAYC)) && \
442 (defined(__i386__) || defined(__x86_64__)))
443 static __inline__ int gmx_cycles_have_counter(void)
445 /* x86 or x86-64 with GCC inline assembly - pentium TSC register */
446 return 1;
448 #elif ((defined __aarch64__) && (defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__PATHSCALE__) || defined(__PGIC__)))
449 static __inline int gmx_cycles_have_counter(void)
451 /* 64-bit ARM cycle counters with GCC inline assembly */
452 return 1;
454 #elif defined(__ARM_ARCH_7A__) && defined(__GNUC__)
455 static __inline int gmx_cycles_have_counter(void)
457 /* Armv7A can provide 64-bit cycles by returning two registers. However, it will not work unless
458 * the performance registers have been made available from user space by a kernel module -
459 * otherwise it returns 0.
461 gmx_cycles_t c0, c1;
463 c0 = gmx_cycles_read();
464 c1 = gmx_cycles_read();
466 /* if both counters return 0, support is not present */
467 return (c0 != 0 || c1 != 0);
469 #elif (defined(_MSC_VER))
470 static __inline int gmx_cycles_have_counter(void)
472 return 1;
474 #elif (defined(__hpux) || defined(__HP_cc)) && defined(__ia64)
475 static inline int gmx_cycles_have_counter(void)
477 /* HP compiler on ia64, use special instruction to read ITC */
478 return 1;
480 #elif (defined(__INTEL_COMPILER) || defined(__ECC)) && defined(__ia64__)
481 static __inline__ int gmx_cycles_have_counter(void)
483 /* Intel compiler on ia64, use special instruction to read ITC */
484 return 1;
486 #elif defined(__GNUC__) && defined(__ia64__)
487 static __inline__ int gmx_cycles_have_counter(void)
489 /* AMD64 with GCC inline assembly - TSC register */
490 return 1;
492 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__GNUC__))
493 static __inline__ int gmx_cycles_have_counter(void)
495 /* HP PA-RISC, inline asm with gcc */
496 return 1;
498 #elif ((defined(__hppa__) || defined(__hppa)) && defined (__hpux))
499 static inline int gmx_cycles_have_counter(void)
501 /* HP PA-RISC, instruction when using HP compiler */
502 return 1;
504 #elif defined(__GNUC__) && defined(__s390__)
505 static __inline__ int gmx_cycles_have_counter(void)
507 /* S390, taken from FFTW who got it from James Treacy */
508 return 1;
510 #elif defined(__GNUC__) && defined(__alpha__)
511 static __inline__ int gmx_cycles_have_counter(void)
513 /* gcc inline assembly on alpha CPUs */
514 return 1;
516 #elif defined(__GNUC__) && defined(__sparc_v9__)
517 static __inline__ int gmx_cycles_have_counter(void)
519 /* gcc inline assembly on sparc v9 */
520 return 1;
522 #elif defined(__DECC) && defined(__alpha)
523 static __inline int gmx_cycles_have_counter(void)
525 /* Digital GEM C compiler on alpha */
526 return 1;
528 #elif (defined(__sgi) && defined(CLOCK_SGI_CYCLE))
529 static __inline int gmx_cycles_have_counter(void)
531 /* Irix compilers on SGI hardware */
532 return 1;
534 #elif (defined(__SVR4) && defined (__SUNPRO_CC))
535 static inline int gmx_cycles_have_counter(void)
537 /* Solaris high-resolution timers */
538 return 1;
540 #elif defined(__xlC__) && defined (_AIX)
541 static inline int gmx_cycles_have_counter(void)
543 /* AIX compilers */
544 return 1;
546 #elif ( ( defined(__GNUC__) || defined(__IBM_GCC_ASM) || defined(__IBM_STDCPP_ASM) ) && \
547 ( defined(__powerpc__) || defined(__ppc__) ) )
548 static __inline__ int gmx_cycles_have_counter(void)
550 /* PowerPC using gcc inline assembly (and xlc>=7.0 with -qasm=gcc) */
551 return 1;
553 #elif (defined(__MWERKS__) && (defined(MAC) || defined(macintosh)))
554 static __inline__ int gmx_cycles_have_counter(void)
556 /* Metrowerks on macintosh */
557 return 1;
559 #elif defined(__sun) && defined(__sparcv9)
561 static __inline__ int gmx_cycles_have_counter(void)
563 /* Solaris on SPARC*/
564 return 1;
566 #else
567 static int gmx_cycles_have_counter(void)
569 /* No cycle counter that we know of on this system */
570 return 0;
572 #endif
575 /*! \brief Calculate number of seconds per cycle tick on host
577 * This routine runs a timer loop to calibrate the number of
578 * seconds per the units returned fro gmx_cycles_read().
580 * \param sampletime Minimum real sample time. It takes some trial-and-error
581 * to find the correct delay loop size, so the total runtime of
582 * this routine is about twice this time.
583 * \return Number of seconds per cycle unit. If it is not possible to
584 * calculate on this system (for whatever reason) the return value
585 * will be -1, so check that it is positive before using it.
587 double
588 gmx_cycles_calibrate(double sampletime);
590 #ifdef __cplusplus
592 #endif
594 #endif