Fix builds on ARM & clarify (ARM) GPU support
[gromacs.git] / src / gromacs / hardware / cpuinfo.cpp
blob6f16a041c52b165ef2cddfd43e975f314a1f3fbe
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
36 /*! \internal \file
37 * \brief
38 * Implements gmx::CpuInfo.
40 * We need to be able to compile this file in stand-alone mode to use basic
41 * CPU feature detection to set the SIMD acceleration and similar things in
42 * CMake, while we still want to use more features that enable topology
43 * detection when config.h is present.
45 * We solve this by skipping the advanced stuff when the preprocessor
46 * macro GMX_CPUINFO_STANDALONE is defined. In this case you likely also need to
47 * define GMX_X86_GCC_INLINE_ASM if you are on x86; without inline assembly
48 * support it is not possible to perform the actual detection on Linux/Mac.
49 * Since these macros are specific to this file, they do not use the GMX prefix.
51 * The remaining defines (GMX_NATIVE_WINDOWS,HAVE_UNISTD_H,HAVE_SCHED_H,
52 * HAVE_SYSCONF, HAVE_SCHED_AFFINITY) are only used to determine the topology on
53 * 86, and for this we rely on including config.h.
55 * \author Erik Lindahl <erik.lindahl@gmail.com>
56 * \ingroup module_hardware
59 #ifndef GMX_CPUINFO_STANDALONE
60 # include "gmxpre.h"
61 #endif
63 #include "cpuinfo.h"
65 #ifndef GMX_CPUINFO_STANDALONE
66 # include "config.h"
67 #else
68 # define GMX_NATIVE_WINDOWS 0
69 #endif
71 #if defined _MSC_VER
72 # include <intrin.h> // __cpuid()
73 #endif
75 #if GMX_NATIVE_WINDOWS
76 # include <windows.h> // sysinfo(), necessary for topology stuff
77 #endif
79 #ifdef HAVE_SCHED_H
80 # include <sched.h> // sched_getaffinity(), sched_setaffinity()
81 #endif
82 #ifdef HAVE_UNISTD_H
83 # include <unistd.h> // sysconf()
84 #endif
86 #include <cctype>
87 #include <cstdlib>
89 #include <algorithm>
90 #include <fstream>
91 #include <map>
92 #include <set>
93 #include <sstream>
94 #include <string>
96 #ifdef GMX_CPUINFO_STANDALONE
97 # define gmx_unused
98 #else
99 # include "gromacs/utility/basedefinitions.h"
100 #endif
102 #include "architecture.h"
104 namespace gmx
107 namespace
110 /*! \cond internal */
112 /******************************************************************************
114 * Utility functions to make this file independent of the GROMACS library *
116 ******************************************************************************/
118 /*! \brief Remove initial and trailing whitespace from string
120 * \param s Pointer to string where whitespace will be removed
122 void
123 trimString(std::string * s)
125 // heading
126 s->erase(s->begin(), std::find_if(s->begin(), s->end(), [](char &c) -> bool { return !std::isspace(c); }));
127 // trailing
128 s->erase(std::find_if(s->rbegin(), s->rend(), [](char &c) -> bool { return !std::isspace(c); }).base(), s->end());
132 /******************************************************************************
134 * x86 detection functions *
136 ******************************************************************************/
138 /*! \brief execute x86 cpuid instructions with custom level and extended level
140 * \param level The main cpuid level (input argument for eax register)
141 * \param ecxval Extended level (input argument for ecx register)
142 * \param eax Output in eax register
143 * \param ebx Output in ebx register
144 * \param ecx Output in ecx register
145 * \param edx Output in edx register
147 * \return 0 on success, or non-zero if the instruction could not execute.
150 executeX86CpuID(unsigned int gmx_unused level,
151 unsigned int gmx_unused ecxval,
152 unsigned int * eax,
153 unsigned int * ebx,
154 unsigned int * ecx,
155 unsigned int * edx)
157 if (c_architecture == Architecture::X86)
159 #if defined __GNUC__ || GMX_X86_GCC_INLINE_ASM
161 // any compiler that understands gcc inline assembly
162 *eax = level;
163 *ecx = ecxval;
164 *ebx = 0;
165 *edx = 0;
167 # if GMX_IS_X86_32 && defined(__PIC__)
168 // Avoid clobbering the global offset table in 32-bit pic code (ebx register)
169 __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
170 "cpuid \n\t"
171 "xchgl %%ebx, %1 \n\t"
172 : "+a" (*eax), "+r" (*ebx), "+c" (*ecx), "+d" (*edx));
173 # elif GMX_IS_X86_64
174 // i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want
175 __asm__ __volatile__ ("cpuid \n\t"
176 : "+a" (*eax), "+b" (*ebx), "+c" (*ecx), "+d" (*edx));
177 # else
178 // Not a normal x86, which could happen when a compiler
179 // targetting non-x86 pretends to be GCC.
180 # endif
181 return 0;
183 #elif defined _MSC_VER
185 // MSVC (and icc on windows) on ia32 or x86-64
186 int cpuInfo[4];
187 __cpuidex(cpuInfo, level, ecxval);
188 *eax = static_cast<unsigned int>(cpuInfo[0]);
189 *ebx = static_cast<unsigned int>(cpuInfo[1]);
190 *ecx = static_cast<unsigned int>(cpuInfo[2]);
191 *edx = static_cast<unsigned int>(cpuInfo[3]);
192 return 0;
194 #else
196 // We are on x86, but without compiler support for cpuid if we get here
197 *eax = 0;
198 *ebx = 0;
199 *ecx = 0;
200 *edx = 0;
201 return 1;
203 #endif // check for inline asm on x86
205 else
207 // We are not on x86
208 *eax = 0;
209 *ebx = 0;
210 *ecx = 0;
211 *edx = 0;
212 return 1;
217 /*! \brief Detect x86 vendors by using the cpuid assembly instructions
219 * If support for the cpuid instruction is present, we check for Intel
220 * or AMD vendors.
222 * \return gmx::CpuInfo::Vendor::Intel, gmx::CpuInfo::Vendor::Amd. If neither
223 * Intel nor Amd can be identified, or if the code fails to execute,
224 * gmx::CpuInfo::Vendor::Unknown is returned.
226 CpuInfo::Vendor
227 detectX86Vendor()
229 unsigned int eax, ebx, ecx, edx;
230 CpuInfo::Vendor v = CpuInfo::Vendor::Unknown;
232 if (executeX86CpuID(0x0, 0, &eax, &ebx, &ecx, &edx) == 0)
234 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
236 v = CpuInfo::Vendor::Intel; // ebx=='uneG', ecx=='letn', edx=='Ieni'
238 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
240 v = CpuInfo::Vendor::Amd; // ebx=='htuA', ecx=='DMAc', edx=='itne'
243 return v;
246 /*! \brief Simple utility function to set/clear feature in a set
248 * \param featureSet Pointer to the feature set to update
249 * \param feature The specific feature to set/clear
250 * \param registerValue Register value (returned from cpuid)
251 * \param bit Bit to check in registerValue. The feature will be
252 * added to the featureSet if this bit is set.
254 * \note Nothing is done if the bit is not set. In particular, this will not
255 * erase anything if the feature already exists in the set.
257 void
258 setFeatureFromBit(std::set<CpuInfo::Feature> * featureSet,
259 CpuInfo::Feature feature,
260 unsigned int registerValue,
261 unsigned char bit)
263 if (registerValue & (1 << bit))
265 featureSet->insert(feature);
269 /*! \brief Process x86 cpuinfo features that are common to Intel and AMD CPUs
271 * \param[out] brand String where to write the x86 brand string
272 * \param[out] family Major version of processor
273 * \param[out] model Middle version of processor
274 * \param[out] stepping Minor version of processor
275 * \param[out] features Feature set where supported features are inserted
277 void
278 detectX86Features(std::string * brand,
279 int * family,
280 int * model,
281 int * stepping,
282 std::set<CpuInfo::Feature> * features)
284 unsigned int eax, ebx, ecx, edx;
286 // Return if we cannot execute any levels
287 if (executeX86CpuID(0x0, 0, &eax, &ebx, &ecx, &edx) != 0)
289 return;
291 unsigned int maxStdLevel = eax;
293 if (maxStdLevel >= 0x1)
295 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
297 *family = ((eax & 0x0ff00000) >> 20) + ((eax & 0x00000f00) >> 8);
298 *model = ((eax & 0x000f0000) >> 12) + ((eax & 0x000000f0) >> 4);
299 *stepping = (eax & 0x0000000f);
301 setFeatureFromBit(features, CpuInfo::Feature::X86_Sse3, ecx, 0 );
302 setFeatureFromBit(features, CpuInfo::Feature::X86_Pclmuldq, ecx, 1 );
303 setFeatureFromBit(features, CpuInfo::Feature::X86_Ssse3, ecx, 9 );
304 setFeatureFromBit(features, CpuInfo::Feature::X86_Fma, ecx, 12 );
305 setFeatureFromBit(features, CpuInfo::Feature::X86_Cx16, ecx, 13 );
306 setFeatureFromBit(features, CpuInfo::Feature::X86_Pdcm, ecx, 15 );
307 setFeatureFromBit(features, CpuInfo::Feature::X86_Pcid, ecx, 17 );
308 setFeatureFromBit(features, CpuInfo::Feature::X86_Sse4_1, ecx, 19 );
309 setFeatureFromBit(features, CpuInfo::Feature::X86_Sse4_2, ecx, 20 );
310 setFeatureFromBit(features, CpuInfo::Feature::X86_X2Apic, ecx, 21 );
311 setFeatureFromBit(features, CpuInfo::Feature::X86_Popcnt, ecx, 23 );
312 setFeatureFromBit(features, CpuInfo::Feature::X86_Tdt, ecx, 24 );
313 setFeatureFromBit(features, CpuInfo::Feature::X86_Aes, ecx, 25 );
314 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx, ecx, 28 );
315 setFeatureFromBit(features, CpuInfo::Feature::X86_F16C, ecx, 29 );
316 setFeatureFromBit(features, CpuInfo::Feature::X86_Rdrnd, ecx, 30 );
318 setFeatureFromBit(features, CpuInfo::Feature::X86_Pse, edx, 3 );
319 setFeatureFromBit(features, CpuInfo::Feature::X86_Msr, edx, 5 );
320 setFeatureFromBit(features, CpuInfo::Feature::X86_Cx8, edx, 8 );
321 setFeatureFromBit(features, CpuInfo::Feature::X86_Apic, edx, 9 );
322 setFeatureFromBit(features, CpuInfo::Feature::X86_Cmov, edx, 15 );
323 setFeatureFromBit(features, CpuInfo::Feature::X86_Clfsh, edx, 19 );
324 setFeatureFromBit(features, CpuInfo::Feature::X86_Mmx, edx, 23 );
325 setFeatureFromBit(features, CpuInfo::Feature::X86_Sse2, edx, 26 );
326 setFeatureFromBit(features, CpuInfo::Feature::X86_Htt, edx, 28 );
329 if (maxStdLevel >= 0x7)
331 executeX86CpuID(0x7, 0, &eax, &ebx, &ecx, &edx);
333 setFeatureFromBit(features, CpuInfo::Feature::X86_Hle, ebx, 4 );
334 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx2, ebx, 5 );
335 setFeatureFromBit(features, CpuInfo::Feature::X86_Rtm, ebx, 11 );
336 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512F, ebx, 16 );
337 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512PF, ebx, 26 );
338 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512ER, ebx, 27 );
339 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512CD, ebx, 28 );
340 setFeatureFromBit(features, CpuInfo::Feature::X86_Sha, ebx, 29 );
341 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512BW, ebx, 30 );
342 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512VL, ebx, 31 );
345 // Check whether Hyper-threading is really possible to enable in the hardware,
346 // not just technically supported by this generation of processors
347 if (features->count(CpuInfo::Feature::X86_Htt) && maxStdLevel >= 0x4)
349 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
350 unsigned int maxLogicalCores = (ebx >> 16) & 0x0ff;
351 executeX86CpuID(0x4, 0, &eax, &ebx, &ecx, &edx);
352 unsigned int maxPhysicalCores = ((eax >> 26) & 0x3f) + 1;
353 if (maxLogicalCores/maxPhysicalCores < 2)
355 features->erase(CpuInfo::Feature::X86_Htt);
359 if (executeX86CpuID(0x80000000, 0, &eax, &ebx, &ecx, &edx) != 0)
361 // No point in continuing if we don't support any extended levels
362 return;
364 unsigned int maxExtLevel = eax;
366 if (maxExtLevel >= 0x80000001)
368 executeX86CpuID(0x80000001, 0, &eax, &ebx, &ecx, &edx);
370 setFeatureFromBit(features, CpuInfo::Feature::X86_Lahf, ecx, 0 );
371 setFeatureFromBit(features, CpuInfo::Feature::X86_Sse4A, ecx, 6 );
372 setFeatureFromBit(features, CpuInfo::Feature::X86_MisalignSse, ecx, 7 );
373 setFeatureFromBit(features, CpuInfo::Feature::X86_Xop, ecx, 11 );
374 setFeatureFromBit(features, CpuInfo::Feature::X86_Fma4, ecx, 16 );
375 setFeatureFromBit(features, CpuInfo::Feature::X86_PDPE1GB, edx, 26 );
376 setFeatureFromBit(features, CpuInfo::Feature::X86_Rdtscp, edx, 27 );
379 if (maxExtLevel >= 0x80000005)
381 // Get the x86 CPU brand string (3 levels, 16 bytes in each)
382 brand->clear();
383 for (unsigned int level = 0x80000002; level < 0x80000005; level++)
385 executeX86CpuID(level, 0, &eax, &ebx, &ecx, &edx);
386 // Add eax, ebx, ecx, edx contents as 4 chars each to the brand string
387 brand->append(reinterpret_cast<const char *>(&eax), sizeof(eax));
388 brand->append(reinterpret_cast<const char *>(&ebx), sizeof(ebx));
389 brand->append(reinterpret_cast<const char *>(&ecx), sizeof(ecx));
390 brand->append(reinterpret_cast<const char *>(&edx), sizeof(edx));
392 trimString(brand);
395 if (maxExtLevel >= 0x80000007)
397 executeX86CpuID(0x80000007, 0, &eax, &ebx, &ecx, &edx);
399 setFeatureFromBit(features, CpuInfo::Feature::X86_NonstopTsc, edx, 8 );
404 /*! \brief Return a vector with x86 APIC IDs for all threads
406 * \param haveX2Apic True if the processors supports x2APIC, otherwise vanilla APIC.
408 * \returns A new std::vector of unsigned integer APIC IDs, one for each
409 * logical processor in the system.
411 const std::vector<unsigned int>
412 detectX86ApicIDs(bool gmx_unused haveX2Apic)
414 std::vector<unsigned int> apicID;
416 // We cannot just ask for all APIC IDs, but must force execution on each
417 // hardware thread and extract the APIC id there.
418 #if HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF
419 unsigned int eax, ebx, ecx, edx;
420 unsigned int nApic = sysconf(_SC_NPROCESSORS_ONLN);
421 cpu_set_t saveCpuSet;
422 cpu_set_t cpuSet;
423 sched_getaffinity(0, sizeof(cpu_set_t), &saveCpuSet);
424 CPU_ZERO(&cpuSet);
425 for (unsigned int i = 0; i < nApic; i++)
427 CPU_SET(i, &cpuSet);
428 sched_setaffinity(0, sizeof(cpu_set_t), &cpuSet);
429 if (haveX2Apic)
431 executeX86CpuID(0xb, 0, &eax, &ebx, &ecx, &edx);
432 apicID.push_back(edx);
434 else
436 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
437 apicID.push_back(ebx >> 24);
439 CPU_CLR(i, &cpuSet);
441 sched_setaffinity(0, sizeof(cpu_set_t), &saveCpuSet);
442 #elif GMX_NATIVE_WINDOWS
443 unsigned int eax, ebx, ecx, edx;
444 SYSTEM_INFO sysinfo;
445 GetSystemInfo( &sysinfo );
446 unsigned int nApic = sysinfo.dwNumberOfProcessors;
447 unsigned int saveAffinity = SetThreadAffinityMask(GetCurrentThread(), 1);
448 for (DWORD_PTR i = 0; i < nApic; i++)
450 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1)<<i));
451 Sleep(0);
452 if (haveX2Apic)
454 executeX86CpuID(0xb, 0, &eax, &ebx, &ecx, &edx);
455 apicID.push_back(edx);
457 else
459 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
460 apicID.push_back(ebx >> 24);
463 SetThreadAffinityMask(GetCurrentThread(), saveAffinity);
464 #endif
465 return apicID;
469 /*! \brief Utility to renumber indices extracted from APIC IDs
471 * \param v Vector with unsigned integer indices
473 * This routine returns the number of unique different elements found in the vector,
474 * and renumbers these starting from 0. For example, the vector {0,1,2,8,9,10,8,9,10,0,1,2}
475 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
476 * number of unique elements.
478 void
479 renumberIndex(std::vector<unsigned int> * v)
481 std::vector<unsigned int> sortedV (*v);
482 std::sort(sortedV.begin(), sortedV.end());
484 std::vector<unsigned int> uniqueSortedV (sortedV);
485 auto it = std::unique(uniqueSortedV.begin(), uniqueSortedV.end());
486 uniqueSortedV.resize( std::distance(uniqueSortedV.begin(), it) );
488 for (std::size_t i = 0; i < uniqueSortedV.size(); i++)
490 unsigned int val = uniqueSortedV[i];
491 std::replace_if(v->begin(), v->end(), [val](unsigned int &c) -> bool { return c == val; }, static_cast<unsigned int>(i));
496 /*! \brief Try to detect basic CPU topology information using x86 cpuid
498 * If x2APIC support is present, this is our first choice, otherwise we
499 * attempt to use old vanilla APIC.
501 * \return A new vector of entries with socket, core, hwthread information
502 * for each logical processor.
504 std::vector<CpuInfo::LogicalProcessor>
505 detectX86LogicalProcessors()
507 unsigned int eax;
508 unsigned int ebx;
509 unsigned int ecx;
510 unsigned int edx;
511 unsigned int maxStdLevel;
512 unsigned int maxExtLevel;
513 bool haveApic;
514 bool haveX2Apic;
516 std::vector<CpuInfo::LogicalProcessor> logicalProcessors;
518 // Find largest standard & extended level input values allowed
519 executeX86CpuID(0x0, 0, &eax, &ebx, &ecx, &edx);
520 maxStdLevel = eax;
521 executeX86CpuID(0x80000000, 0, &eax, &ebx, &ecx, &edx);
522 maxExtLevel = eax;
524 if (maxStdLevel >= 0x1)
526 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
527 haveX2Apic = (ecx & (1 << 21)) && maxStdLevel >= 0xb;
528 haveApic = (edx & (1 << 9)) && maxExtLevel >= 0x80000008;
530 else
532 haveX2Apic = false,
533 haveApic = false;
536 if (haveX2Apic || haveApic)
538 unsigned int hwThreadBits;
539 unsigned int coreBits;
540 // Get bits for cores and hardware threads
541 if (haveX2Apic)
543 executeX86CpuID(0xb, 0, &eax, &ebx, &ecx, &edx);
544 hwThreadBits = eax & 0x1f;
545 executeX86CpuID(0xb, 1, &eax, &ebx, &ecx, &edx);
546 coreBits = (eax & 0x1f) - hwThreadBits;
548 else // haveApic
550 // AMD without x2APIC does not support SMT - there are no hwthread bits in apic ID
551 hwThreadBits = 0;
552 // Get number of core bits in apic ID - try modern extended method first
553 executeX86CpuID(0x80000008, 0, &eax, &ebx, &ecx, &edx);
554 coreBits = (ecx >> 12) & 0xf;
555 if (coreBits == 0)
557 // Legacy method for old single/dual core AMD CPUs
558 int i = ecx & 0xf;
559 while (i >> coreBits)
561 coreBits++;
566 std::vector<unsigned int> apicID = detectX86ApicIDs(haveX2Apic);
568 if (!apicID.empty())
570 // APIC IDs can be buggy, and it is always a mess. Typically more bits are
571 // reserved than needed, and the numbers might not increment by 1 even in
572 // a single socket or core. Extract, renumber, and check that things make sense.
573 unsigned int hwThreadMask = (1 << hwThreadBits) - 1;
574 unsigned int coreMask = (1 << coreBits) - 1;
575 std::vector<unsigned int> hwThreadRanks;
576 std::vector<unsigned int> coreRanks;
577 std::vector<unsigned int> socketRanks;
579 for (auto a : apicID)
581 hwThreadRanks.push_back( static_cast<int>( a & hwThreadMask ) );
582 coreRanks.push_back( static_cast<int>( ( a >> hwThreadBits ) & coreMask ) );
583 socketRanks.push_back( static_cast<int>( a >> ( coreBits + hwThreadBits ) ) );
586 renumberIndex(&hwThreadRanks);
587 renumberIndex(&coreRanks);
588 renumberIndex(&socketRanks);
590 unsigned int hwThreadRankSize = 1 + *std::max_element(hwThreadRanks.begin(), hwThreadRanks.end());
591 unsigned int coreRankSize = 1 + *std::max_element(coreRanks.begin(), coreRanks.end());
592 unsigned int socketRankSize = 1 + *std::max_element(socketRanks.begin(), socketRanks.end());
594 if (socketRankSize * coreRankSize * hwThreadRankSize == apicID.size() )
596 // Alright, everything looks consistent, so put it in the result
597 for (std::size_t i = 0; i < apicID.size(); i++)
599 // While the internal APIC IDs are always unsigned integers, we also cast to
600 // plain integers for the externally exposed vectors, since that will make
601 // it possible to use '-1' for invalid entries in the future.
602 logicalProcessors.push_back( { int(socketRanks[i]), int(coreRanks[i]), int(hwThreadRanks[i]) } );
607 return logicalProcessors; // Will only have contents if everything worked
611 /******************************************************************************
613 * Generic Linux detection by parsing /proc/cpuinfo *
615 ******************************************************************************/
617 /*! \brief Parse /proc/cpuinfo into a simple string map
619 * This routine will read the contents of /proc/cpuinfo, and for each
620 * line that is not empty we will assign the (trimmed) string to the right of
621 * the colon as a key, and the left-hand side as the value in the map.
622 * For multi-processor systems where lines are repeated the latter lines will
623 * overwrite the first occurrence.
625 * \return New map with the contents. If the file is not available, the returned
626 * map will be empty.
628 const std::map<std::string, std::string>
629 parseProcCpuInfo()
631 std::ifstream procCpuInfo("/proc/cpuinfo");
632 std::string line;
633 std::map<std::string, std::string> cpuInfo;
635 while (std::getline(procCpuInfo, line))
637 if (!line.empty())
639 std::stringstream iss(line);
640 std::string key;
641 std::string val;
642 std::getline(iss, key, ':'); // part before colon
643 std::getline(iss, val); // part after colon
644 trimString(&key);
645 trimString(&val);
646 // put it in the map. This will overwrite previous processors, but we don't care.
647 cpuInfo[key] = val;
650 return cpuInfo;
654 /*! \brief Try to detect vendor from /proc/cpuinfo
656 * \param cpuInfo Map returned from parseProcCpuinfo()
658 * This routine tries to match a few common labels in /proc/cpuinfo to see if
659 * they begin with the name of a standard vendor. If the file cannot be read
660 * or if no match is found, we return gmx::CpuInfo::Vendor::Unknown.
662 CpuInfo::Vendor
663 detectProcCpuInfoVendor(const std::map<std::string, std::string> &cpuInfo)
665 const std::map<std::string, CpuInfo::Vendor> testVendors =
667 { "GenuineIntel", CpuInfo::Vendor::Intel },
668 { "Intel", CpuInfo::Vendor::Intel },
669 { "AuthenticAmd", CpuInfo::Vendor::Amd },
670 { "AMD", CpuInfo::Vendor::Amd },
671 { "ARM", CpuInfo::Vendor::Arm },
672 { "AArch64", CpuInfo::Vendor::Arm },
673 { "Fujitsu", CpuInfo::Vendor::Fujitsu },
674 { "IBM", CpuInfo::Vendor::Ibm },
675 { "POWER", CpuInfo::Vendor::Ibm },
676 { "Oracle", CpuInfo::Vendor::Oracle },
679 // For each label in /proc/cpuinfo, compare the value to the name in the
680 // testNames map above, and if it's a match return the vendor.
681 for (auto &l : { "vendor_id", "vendor", "manufacture", "model", "processor", "cpu" })
683 if (cpuInfo.count(l))
685 // there was a line with this left-hand side in /proc/cpuinfo
686 const std::string &s1 = cpuInfo.at(l);
688 for (auto &t : testVendors)
690 const std::string &s2 = t.first;
692 // If the entire name we are testing (s2) matches the first part of
693 // the string after the colon in /proc/cpuinfo (s1) we found our vendor
694 if (std::equal(s2.begin(), s2.end(), s1.begin(),
695 [](const char &x, const char &y) -> bool { return tolower(x) == tolower(y); }))
697 return t.second;
702 return CpuInfo::Vendor::Unknown;
706 /*! \brief Detect IBM processor name and features from /proc/cpuinfo
708 * \param cpuInfo Map returned from parseProcCpuinfo()
709 * \param[out] brand String where to write the brand string
710 * \param[out] features Feature set where supported features are inserted
712 * This routine tries to match a few common labels in /proc/cpuinfo to see if
713 * we can find the processor name and features. It is likely fragile.
715 void
716 detectProcCpuInfoIbm(const std::map<std::string, std::string> &cpuInfo,
717 std::string * brand,
718 std::set<CpuInfo::Feature> * features)
720 // Get brand string from 'cpu' label if present, otherwise 'Processor'
721 if (cpuInfo.count("cpu"))
723 *brand = cpuInfo.at("cpu");
725 else if (cpuInfo.count("Processor"))
727 *brand = cpuInfo.at("Processor");
730 if (brand->find("A2") != std::string::npos)
732 // If the processor identification contains "A2", this is BlueGene/Q with QPX
733 features->insert(CpuInfo::Feature::Ibm_Qpx);
736 for (auto &l : { "model name", "model", "Processor", "cpu" })
738 if (cpuInfo.count(l))
740 std::string s1 = cpuInfo.at(l);
741 std::transform(s1.begin(), s1.end(), s1.begin(), ::tolower);
743 if (s1.find("altivec") != std::string::npos)
745 features->insert(CpuInfo::Feature::Ibm_Vmx);
746 // If this is a power6, we only have VMX. All later processors have VSX.
747 if (s1.find("power6") == std::string::npos)
749 features->insert(CpuInfo::Feature::Ibm_Vsx);
757 /*! \brief Detect ARM processor name and features from /proc/cpuinfo
759 * \param cpuInfo Map returned from parseProcCpuinfo()
760 * \param[out] brand String where to write the brand string
761 * \param[out] family Major version of processor
762 * \param[out] model Middle version of processor
763 * \param[out] stepping Minor version of processor
764 * \param[out] features Feature set where supported features are inserted
766 * This routine tries to match a few common labels in /proc/cpuinfo to see if
767 * we can find the processor name and features. It is likely fragile.
769 void
770 detectProcCpuInfoArm(const std::map<std::string, std::string> &cpuInfo,
771 std::string * brand,
772 int * family,
773 int * model,
774 int * stepping,
775 std::set<CpuInfo::Feature> * features)
777 if (cpuInfo.count("Processor"))
779 *brand = cpuInfo.at("Processor");
781 else if (cpuInfo.count("model name"))
783 *brand = cpuInfo.at("model name");
786 if (cpuInfo.count("CPU architecture"))
788 *family = std::strtol(cpuInfo.at("CPU architecture").c_str(), nullptr, 10);
789 // For some 64-bit CPUs it appears to say 'AArch64' instead
790 if (*family == 0 && cpuInfo.at("CPU architecture").find("AArch64") != std::string::npos)
792 *family = 8; // fragile - no idea how a future ARMv9 will be represented in this case
795 if (cpuInfo.count("CPU variant"))
797 *model = std::strtol(cpuInfo.at("CPU variant").c_str(), nullptr, 16);
799 if (cpuInfo.count("CPU revision"))
801 *stepping = std::strtol(cpuInfo.at("CPU revision").c_str(), nullptr, 10);
804 if (cpuInfo.count("Features"))
806 const std::string &s = cpuInfo.at("Features");
807 if (s.find("neon") != std::string::npos)
809 features->insert(CpuInfo::Feature::Arm_Neon);
811 if (s.find("asimd") != std::string::npos)
813 // At least Jetson TX1 runs a 32-bit environment by default, although
814 // the kernel is 64-bits, and reports asimd feature flags. We cannot
815 // use Neon-asimd in this case, so make sure we are on a 64-bit platform.
816 if (sizeof(void *) == 8)
818 features->insert(CpuInfo::Feature::Arm_NeonAsimd);
825 /*! \brief Try to detect vendor, cpu and features from /proc/cpuinfo
827 * \param[out] vendor Detected hardware vendor
828 * \param[out] brand String where to write the brand string
829 * \param[out] family Major version of processor
830 * \param[out] model Middle version of processor
831 * \param[out] stepping Minor version of processor
832 * \param[out] features Feature set where supported features are inserted
834 * This routine reads the /proc/cpuinfo file into a map and calls subroutines
835 * that attempt to parse by matching keys and values to known strings. It is
836 * much more fragile than our x86 detection, but it does not depend on
837 * specific system calls, intrinsics or assembly instructions.
839 void
840 detectProcCpuInfo(CpuInfo::Vendor * vendor,
841 std::string * brand,
842 int * family,
843 int * model,
844 int * stepping,
845 std::set<CpuInfo::Feature> * features)
847 std::map<std::string, std::string> cpuInfo = parseProcCpuInfo();
849 if (*vendor == CpuInfo::Vendor::Unknown)
851 *vendor = detectProcCpuInfoVendor(cpuInfo);
854 // Unfortunately there is no standard for contents in /proc/cpuinfo. We cannot
855 // indiscriminately look for e.g. 'cpu' since it could be either name or an index.
856 // To handle this slightly better we use one subroutine per vendor.
857 switch (*vendor)
859 case CpuInfo::Vendor::Ibm:
860 detectProcCpuInfoIbm(cpuInfo, brand, features);
861 break;
863 case CpuInfo::Vendor::Arm:
864 detectProcCpuInfoArm(cpuInfo, brand, family, model, stepping, features);
865 break;
867 default:
868 // We only have a single check for fujitsu for now
869 #ifdef __HPC_ACE__
870 features->insert(CpuInfo::Feature::Fujitsu_HpcAce);
871 #endif
872 break;
875 /*! \endcond */
876 } // namespace anonymous
879 // static
880 CpuInfo CpuInfo::detect()
882 CpuInfo result;
884 if (c_architecture == Architecture::X86)
886 result.vendor_ = detectX86Vendor();
888 if (result.vendor_ == CpuInfo::Vendor::Intel)
890 result.features_.insert(CpuInfo::Feature::X86_Intel);
892 else if (result.vendor_ == CpuInfo::Vendor::Amd)
894 result.features_.insert(CpuInfo::Feature::X86_Amd);
896 detectX86Features(&result.brandString_, &result.family_, &result.model_,
897 &result.stepping_, &result.features_);
898 result.logicalProcessors_ = detectX86LogicalProcessors();
900 else
902 // Not x86
903 if (c_architecture == Architecture::Arm)
905 result.vendor_ = CpuInfo::Vendor::Arm;
907 else if (c_architecture == Architecture::PowerPC)
909 result.vendor_ = CpuInfo::Vendor::Ibm;
912 #if defined __aarch64__ || ( defined _M_ARM && _M_ARM >= 8 )
913 result.features_.insert(Feature::Arm_Neon); // ARMv8 always has Neon
914 result.features_.insert(Feature::Arm_NeonAsimd); // ARMv8 always has Neon-asimd
915 #endif
917 #if defined sun
918 result.vendor_ = CpuInfo::Vendor::Oracle;
919 #endif
921 // On Linux we might be able to find information in /proc/cpuinfo. If vendor or brand
922 // is set to a known value this routine will not overwrite it.
923 detectProcCpuInfo(&result.vendor_, &result.brandString_, &result.family_,
924 &result.model_, &result.stepping_, &result.features_);
927 if (!result.logicalProcessors_.empty())
929 result.supportLevel_ = CpuInfo::SupportLevel::LogicalProcessorInfo;
931 else if (!result.features_.empty())
933 result.supportLevel_ = CpuInfo::SupportLevel::Features;
935 else if (result.vendor_ != CpuInfo::Vendor::Unknown
936 || result.brandString_ != "Unknown CPU brand")
938 result.supportLevel_ = CpuInfo::SupportLevel::Name;
940 else
942 result.supportLevel_ = CpuInfo::SupportLevel::None;
945 return result;
949 CpuInfo::CpuInfo()
950 : vendor_(CpuInfo::Vendor::Unknown), brandString_("Unknown CPU brand"),
951 family_(0), model_(0), stepping_(0)
956 const std::map<CpuInfo::Vendor, std::string>
957 CpuInfo::s_vendorStrings_ =
959 { CpuInfo::Vendor::Unknown, "Unknown vendor" },
960 { CpuInfo::Vendor::Intel, "Intel" },
961 { CpuInfo::Vendor::Amd, "AMD" },
962 { CpuInfo::Vendor::Fujitsu, "Fujitsu" },
963 { CpuInfo::Vendor::Ibm, "IBM" },
964 { CpuInfo::Vendor::Arm, "ARM" },
965 { CpuInfo::Vendor::Oracle, "Oracle" },
969 const std::map<CpuInfo::Feature, std::string>
970 CpuInfo::s_featureStrings_ =
972 { CpuInfo::Feature::X86_Aes, "aes" },
973 { CpuInfo::Feature::X86_Amd, "amd" },
974 { CpuInfo::Feature::X86_Apic, "apic" },
975 { CpuInfo::Feature::X86_Avx, "avx" },
976 { CpuInfo::Feature::X86_Avx2, "avx2" },
977 { CpuInfo::Feature::X86_Avx512F, "avx512f" },
978 { CpuInfo::Feature::X86_Avx512PF, "avx512pf" },
979 { CpuInfo::Feature::X86_Avx512ER, "avx512er" },
980 { CpuInfo::Feature::X86_Avx512CD, "avx512cd" },
981 { CpuInfo::Feature::X86_Avx512BW, "avx512bw" },
982 { CpuInfo::Feature::X86_Avx512VL, "avx512vl" },
983 { CpuInfo::Feature::X86_Clfsh, "clfsh" },
984 { CpuInfo::Feature::X86_Cmov, "cmov" },
985 { CpuInfo::Feature::X86_Cx8, "cx8" },
986 { CpuInfo::Feature::X86_Cx16, "cx16" },
987 { CpuInfo::Feature::X86_F16C, "f16c" },
988 { CpuInfo::Feature::X86_Fma, "fma" },
989 { CpuInfo::Feature::X86_Fma4, "fma4" },
990 { CpuInfo::Feature::X86_Hle, "hle" },
991 { CpuInfo::Feature::X86_Htt, "htt" },
992 { CpuInfo::Feature::X86_Intel, "intel" },
993 { CpuInfo::Feature::X86_Lahf, "lahf" },
994 { CpuInfo::Feature::X86_MisalignSse, "misalignsse" },
995 { CpuInfo::Feature::X86_Mmx, "mmx" },
996 { CpuInfo::Feature::X86_Msr, "msr" },
997 { CpuInfo::Feature::X86_NonstopTsc, "nonstop_tsc" },
998 { CpuInfo::Feature::X86_Pcid, "pcid" },
999 { CpuInfo::Feature::X86_Pclmuldq, "pclmuldq" },
1000 { CpuInfo::Feature::X86_Pdcm, "pdcm" },
1001 { CpuInfo::Feature::X86_PDPE1GB, "pdpe1gb" },
1002 { CpuInfo::Feature::X86_Popcnt, "popcnt" },
1003 { CpuInfo::Feature::X86_Pse, "pse" },
1004 { CpuInfo::Feature::X86_Rdrnd, "rdrnd" },
1005 { CpuInfo::Feature::X86_Rdtscp, "rdtscp" },
1006 { CpuInfo::Feature::X86_Rtm, "rtm" },
1007 { CpuInfo::Feature::X86_Sha, "sha" },
1008 { CpuInfo::Feature::X86_Sse2, "sse2" },
1009 { CpuInfo::Feature::X86_Sse3, "sse3" },
1010 { CpuInfo::Feature::X86_Sse4A, "sse4a" },
1011 { CpuInfo::Feature::X86_Sse4_1, "sse4.1" },
1012 { CpuInfo::Feature::X86_Sse4_2, "sse4.2" },
1013 { CpuInfo::Feature::X86_Ssse3, "ssse3" },
1014 { CpuInfo::Feature::X86_Tdt, "tdt" },
1015 { CpuInfo::Feature::X86_X2Apic, "x2apic" },
1016 { CpuInfo::Feature::X86_Xop, "xop" },
1017 { CpuInfo::Feature::Arm_Neon, "neon" },
1018 { CpuInfo::Feature::Arm_NeonAsimd, "neon_asimd" },
1019 { CpuInfo::Feature::Ibm_Qpx, "qpx" },
1020 { CpuInfo::Feature::Ibm_Vmx, "vmx" },
1021 { CpuInfo::Feature::Ibm_Vsx, "vsx" },
1022 { CpuInfo::Feature::Fujitsu_HpcAce, "hpc-ace" }
1026 bool
1027 cpuIsX86Nehalem(const CpuInfo &cpuInfo)
1029 return (cpuInfo.vendor() == gmx::CpuInfo::Vendor::Intel &&
1030 cpuInfo.family() == 6 &&
1031 (cpuInfo.model() == 0x2E || cpuInfo.model() == 0x1A ||
1032 cpuInfo.model() == 0x1E || cpuInfo.model() == 0x2F ||
1033 cpuInfo.model() == 0x2C || cpuInfo.model() == 0x25) );
1036 } // namespace gmx
1038 #ifdef GMX_CPUINFO_STANDALONE
1040 main(int argc, char **argv)
1042 if (argc < 2)
1044 fprintf(stdout,
1045 "Usage:\n\n%s [flags]\n\n"
1046 "Available flags:\n"
1047 "-vendor Print CPU vendor.\n"
1048 "-brand Print CPU brand string.\n"
1049 "-family Print CPU family version.\n"
1050 "-model Print CPU model version.\n"
1051 "-stepping Print CPU stepping version.\n"
1052 "-features Print CPU feature flags.\n",
1053 argv[0]);
1054 exit(1);
1057 std::string arg(argv[1]);
1058 gmx::CpuInfo cpuInfo(gmx::CpuInfo::detect());
1060 if (arg == "-vendor")
1062 printf("%s\n", cpuInfo.vendorString().c_str());
1064 else if (arg == "-brand")
1066 printf("%s\n", cpuInfo.brandString().c_str());
1068 else if (arg == "-family")
1070 printf("%d\n", cpuInfo.family());
1072 else if (arg == "-model")
1074 printf("%d\n", cpuInfo.model());
1076 else if (arg == "-stepping")
1078 printf("%d\n", cpuInfo.stepping());
1080 else if (arg == "-features")
1082 // Separate the feature strings with spaces. Note that in the
1083 // GROMACS cmake code, surrounding whitespace is first
1084 // stripped by the CPU detection routine, and then added back
1085 // in the code for making the SIMD suggestion.
1086 for (auto &f : cpuInfo.featureSet() )
1088 printf("%s ", cpuInfo.featureString(f).c_str());
1090 printf("\n");
1092 else if (arg == "-topology")
1094 // Undocumented debug option, usually not present in standalone version
1095 for (auto &t : cpuInfo.logicalProcessors() )
1097 printf("%3u %3u %3u\n", t.socketRankInMachine, t.coreRankInSocket, t.hwThreadRankInCore);
1100 return 0;
1102 #endif