1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
4 * This file is part of GROMACS.
7 * Written by the Gromacs development team under coordination of
8 * David van der Spoel, Berk Hess, and Erik Lindahl.
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * To help us fund GROMACS development, we humbly ask that you cite
16 * the research papers on the package. Check out http://www.gromacs.org
19 * Gnomes, ROck Monsters And Chili Sauce
23 #include "visibility.h"
28 } /* fixes auto-indentation problems */
32 /* Currently identifiable CPU Vendors */
35 GMX_CPUID_VENDOR_CANNOTDETECT
, /* Should only be used if something fails */
36 GMX_CPUID_VENDOR_UNKNOWN
,
37 GMX_CPUID_VENDOR_INTEL
,
43 /* CPU feature/property list, to be used as indices into the feature array of the
44 * gmxcpuid_t data structure.
46 * To facilitate looking things up, we keep this list alphabetical.
47 * The list is NOT exhaustive - we have basically added stuff that might be
48 * useful in an application like Gromacs.
50 * AMD and Intel tend to share most architectural elements, and even if the
51 * flags might have to be detected in different ways (different cpuid registers),
52 * once the flag is present the functions should be identical. Unfortunately the
53 * trend right now (2012) seems to be that they are diverging. This means that
54 * we need to use specific flags to the compiler to maximize performance, and
55 * then the binaries might not be portable between Intel and AMD as they were
56 * before when we only needed to check for SSE and/or SSE2 support in Gromacs.
58 enum gmx_cpuid_feature
60 GMX_CPUID_FEATURE_CANNOTDETECT
, /* Flag set if we could not detect on this CPU */
61 GMX_CPUID_FEATURE_X86_AES
, /* x86 advanced encryption standard accel. */
62 GMX_CPUID_FEATURE_X86_APIC
, /* APIC support */
63 GMX_CPUID_FEATURE_X86_AVX
, /* Advanced vector extensions */
64 GMX_CPUID_FEATURE_X86_AVX2
, /* AVX2 including gather support (not used yet) */
65 GMX_CPUID_FEATURE_X86_CLFSH
, /* Supports CLFLUSH instruction */
66 GMX_CPUID_FEATURE_X86_CMOV
, /* Conditional move insn support */
67 GMX_CPUID_FEATURE_X86_CX8
, /* Supports CMPXCHG8B (8-byte compare-exchange) */
68 GMX_CPUID_FEATURE_X86_CX16
, /* Supports CMPXCHG16B (16-byte compare-exchg) */
69 GMX_CPUID_FEATURE_X86_F16C
, /* Supports 16-bit FP conversion instructions */
70 GMX_CPUID_FEATURE_X86_FMA
, /* Fused-multiply add support (mainly for AVX) */
71 GMX_CPUID_FEATURE_X86_FMA4
, /* 4-operand FMA, only on AMD for now */
72 GMX_CPUID_FEATURE_X86_HTT
, /* Hyper-Threading supported */
73 GMX_CPUID_FEATURE_X86_LAHF_LM
, /* LAHF/SAHF support in 64 bits */
74 GMX_CPUID_FEATURE_X86_MISALIGNSSE
, /* Support for misaligned SSE data instructions */
75 GMX_CPUID_FEATURE_X86_MMX
, /* MMX registers and instructions */
76 GMX_CPUID_FEATURE_X86_MSR
, /* Supports Intel model-specific-registers */
77 GMX_CPUID_FEATURE_X86_NONSTOP_TSC
, /* Invariant TSC (constant rate in ACPI states) */
78 GMX_CPUID_FEATURE_X86_PCID
, /* Process context identifier support */
79 GMX_CPUID_FEATURE_X86_PCLMULDQ
, /* Carry-less 64-bit multiplication supported */
80 GMX_CPUID_FEATURE_X86_PDCM
, /* Perfmon and Debug Capability */
81 GMX_CPUID_FEATURE_X86_PDPE1GB
, /* Support for 1GB pages */
82 GMX_CPUID_FEATURE_X86_POPCNT
, /* Supports the POPCNT (population count) insn */
83 GMX_CPUID_FEATURE_X86_PSE
, /* Supports 4MB-pages (page size extension) */
84 GMX_CPUID_FEATURE_X86_RDRND
, /* RDRAND high-quality hardware random numbers */
85 GMX_CPUID_FEATURE_X86_RDTSCP
, /* Serializing rdtscp instruction available */
86 GMX_CPUID_FEATURE_X86_SSE2
, /* SSE 2 */
87 GMX_CPUID_FEATURE_X86_SSE3
, /* SSE 3 */
88 GMX_CPUID_FEATURE_X86_SSE4A
, /* SSE 4A */
89 GMX_CPUID_FEATURE_X86_SSE4_1
, /* SSE 4.1 */
90 GMX_CPUID_FEATURE_X86_SSE4_2
, /* SSE 4.2 */
91 GMX_CPUID_FEATURE_X86_SSSE3
, /* Supplemental SSE3 */
92 GMX_CPUID_FEATURE_X86_TDT
, /* TSC deadline timer */
93 GMX_CPUID_FEATURE_X86_X2APIC
, /* Extended xAPIC Support */
94 GMX_CPUID_FEATURE_X86_XOP
, /* AMD extended instructions, only AMD for now */
99 /* Currently supported acceleration instruction sets, intrinsics or other similar combinations
100 * in Gromacs. There is not always a 1-to-1 correspondence with feature flags; on some AMD
101 * hardware we prefer to use 128bit AVX instructions (although 256-bit ones could be executed),
102 * and we still haven't written the AVX2 kernels.
104 enum gmx_cpuid_acceleration
106 GMX_CPUID_ACCELERATION_CANNOTDETECT
, /* Should only be used if something fails */
107 GMX_CPUID_ACCELERATION_NONE
,
108 GMX_CPUID_ACCELERATION_X86_SSE2
,
109 GMX_CPUID_ACCELERATION_X86_SSE4_1
,
110 GMX_CPUID_ACCELERATION_X86_AVX_128_FMA
,
111 GMX_CPUID_ACCELERATION_X86_AVX_256
,
112 GMX_CPUID_NACCELERATIONS
115 /* Text strings corresponding to CPU vendors */
118 gmx_cpuid_vendor_string
[GMX_CPUID_NVENDORS
];
120 /* Text strings for CPU feature indices */
122 gmx_cpuid_feature_string
[GMX_CPUID_NFEATURES
];
124 /* Text strings for Gromacs acceleration/instruction sets */
126 gmx_cpuid_acceleration_string
[GMX_CPUID_NACCELERATIONS
];
129 /* Abstract data type with CPU detection information. Set by gmx_cpuid_init(). */
130 typedef struct gmx_cpuid
*
134 /* Fill the data structure by using CPU detection instructions.
135 * Return 0 on success, 1 if something bad happened.
138 gmx_cpuid_init (gmx_cpuid_t
* cpuid
);
141 /* Return the vendor id as enumerated type. Use gmx_cpuid_vendor_string[]
142 * to get the corresponding text string.
145 enum gmx_cpuid_vendor
146 gmx_cpuid_vendor (gmx_cpuid_t cpuid
);
149 /* Return a constant pointer to the processor brand string. */
151 gmx_cpuid_brand (gmx_cpuid_t cpuid
);
154 /* Return processor family version. For a chip of version 1.2.3, this is 1 */
157 gmx_cpuid_family (gmx_cpuid_t cpuid
);
159 /* Return processor model version, For a chip of version 1.2.3, this is 2. */
162 gmx_cpuid_model (gmx_cpuid_t cpuid
);
164 /* Return processor stepping version, For a chip of version 1.2.3, this is 3. */
166 gmx_cpuid_stepping (gmx_cpuid_t cpuid
);
169 /* Check whether a particular CPUID feature is set.
170 * Returns 0 if flag "feature" is not set, 1 if the flag is set. We cannot use
171 * gmx_bool here since this file must be possible to compile without simple.h.
175 gmx_cpuid_feature (gmx_cpuid_t cpuid
,
176 enum gmx_cpuid_feature feature
);
179 /* Enumerated values for x86 SMT enabled-status. Note that this does not refer
180 * to Hyper-Threading support (that is the flag GMX_CPUID_FEATURE_X86_HTT), but
181 * whether Hyper-Threading is _enabled_ and _used_ in bios right now.
183 enum gmx_cpuid_x86_smt
185 GMX_CPUID_X86_SMT_CANNOTDETECT
,
186 GMX_CPUID_X86_SMT_DISABLED
,
187 GMX_CPUID_X86_SMT_ENABLED
190 /* Returns the status of x86 SMT support. IMPORTANT: There are non-zero
191 * return values for this routine that still do not indicate supported and
192 * enabled smt/Hyper-Threading. You need to carefully check the return value
193 * against the enumerated type values to see what you are getting.
195 * Long-term, this functionality will move to a new hardware topology detection
196 * layer, but that will require a lot of new code and a working interface to the
197 * hwloc library. Surprisingly, there is no simple way to find out that
198 * Hyper-Threading is actually turned on without fully enumerating and checking
199 * all the cores, which we presently can only do on Linux. This means a couple
202 * 1) If you want to know whether your CPU _supports_ Hyper-Threading in the
203 * first place, check the GMX_CPUID_FEATURE_X86_HTT flag instead!
204 * 2) There are several scenarios where this routine will say that it cannot
205 * detect whether SMT is enabled and used right now.
206 * 3) If you need support on non-Linux x86, you have to write it :-)
207 * 4) Don't invest too much efforts, since this will be replaced with
208 * full hardware topology detection in the future.
209 * 5) Don't worry if the detection does not work. It is not a catastrophe, but
210 * but we get slightly better performance on x86 if we use Hyper-Threading
211 * cores in direct space, but not reciprocal space.
213 * Since this routine presently only supports Hyper-Threading we say X86_SMT
214 * in order not to give the impression we can detect any SMT. We haven't
215 * even tested the performance on other SMT implementations, so it is not
216 * obvious we shouldn't use SMT there.
219 enum gmx_cpuid_x86_smt
220 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid
);
224 /* Formats a text string (up to n characters) from the data structure.
225 * The output will have max 80 chars between newline characters.
228 gmx_cpuid_formatstring (gmx_cpuid_t cpuid
,
233 /* Suggests a suitable gromacs acceleration based on the support in the
236 enum gmx_cpuid_acceleration
237 gmx_cpuid_acceleration_suggest (gmx_cpuid_t cpuid
);
240 /* Check if this binary was compiled with the same acceleration as we
241 * would suggest for the current hardware. Always print stats to the log file
242 * if it is non-NULL, and print a warning in stdout if we don't have a match.
245 gmx_cpuid_acceleration_check (gmx_cpuid_t cpuid
,
249 /* Release resources used by data structure. Note that the pointer to the
250 * CPU brand string will no longer be valid once this routine has been called.
253 gmx_cpuid_done (gmx_cpuid_t cpuid
);
263 #endif /* GMX_CPUID_H_ */