add detection of CPX 5300 w/ 2FMAs
[gromacs.git] / src / gromacs / hardware / cpuinfo.cpp
blob51d8cc7d961c8f443900c33b54b4a95d0242dde6
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012-2018, The GROMACS development team.
5 * Copyright (c) 2019,2020, by the GROMACS development team, led by
6 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
7 * and including many others, as listed in the AUTHORS file in the
8 * top-level source directory and at http://www.gromacs.org.
10 * GROMACS is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2.1
13 * of the License, or (at your option) any later version.
15 * GROMACS is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with GROMACS; if not, see
22 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
23 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
25 * If you want to redistribute modifications to GROMACS, please
26 * consider that scientific software is very special. Version
27 * control is crucial - bugs must be traceable. We will be happy to
28 * consider code for inclusion in the official distribution, but
29 * derived work must not be called official GROMACS. Details are found
30 * in the README & COPYING files - if they are missing, get the
31 * official version at http://www.gromacs.org.
33 * To help us fund GROMACS development, we humbly ask that you cite
34 * the research papers on the package. Check out http://www.gromacs.org.
37 /*! \internal \file
38 * \brief
39 * Implements gmx::CpuInfo.
41 * We need to be able to compile this file in stand-alone mode to use basic
42 * CPU feature detection to set the SIMD acceleration and similar things in
43 * CMake, while we still want to use more features that enable topology
44 * detection when config.h is present.
46 * We solve this by skipping the advanced stuff when the preprocessor
47 * macro GMX_CPUINFO_STANDALONE is defined. In this case you likely also need to
48 * define GMX_X86_GCC_INLINE_ASM if you are on x86; without inline assembly
49 * support it is not possible to perform the actual detection on Linux/Mac.
50 * Since these macros are specific to this file, they do not use the GMX prefix.
52 * The remaining defines (GMX_NATIVE_WINDOWS,HAVE_UNISTD_H,HAVE_SCHED_H,
53 * HAVE_SYSCONF, HAVE_SCHED_AFFINITY) are only used to determine the topology on
54 * 86, and for this we rely on including config.h.
56 * \author Erik Lindahl <erik.lindahl@gmail.com>
57 * \ingroup module_hardware
60 #ifndef GMX_CPUINFO_STANDALONE
61 # include "gmxpre.h"
62 #endif
64 #include "cpuinfo.h"
66 #ifndef GMX_CPUINFO_STANDALONE
67 # include "config.h"
68 #else
69 # define GMX_NATIVE_WINDOWS 0
70 #endif
72 #if defined _MSC_VER
73 # include <intrin.h> // __cpuid()
74 #endif
76 #if GMX_NATIVE_WINDOWS
77 # include <windows.h> // sysinfo(), necessary for topology stuff
78 #endif
80 #ifdef HAVE_SCHED_H
81 # include <sched.h> // sched_getaffinity(), sched_setaffinity()
82 #endif
83 #ifdef HAVE_UNISTD_H
84 # include <unistd.h> // sysconf()
85 #endif
87 #include <cctype>
88 #include <cstdint> // uint32_t in X86 processor name code
89 #include <cstdlib>
91 #include <algorithm>
92 #include <fstream>
93 #include <map>
94 #include <set>
95 #include <sstream>
96 #include <string>
98 #ifdef GMX_CPUINFO_STANDALONE
99 # define gmx_unused
100 #else
101 # include "gromacs/utility/basedefinitions.h"
102 #endif
104 #include "architecture.h"
106 namespace gmx
109 namespace
112 /*! \cond internal */
114 /******************************************************************************
116 * Utility functions to make this file independent of the GROMACS library *
118 ******************************************************************************/
120 /*! \brief Remove initial and trailing whitespace from string
122 * \param s Pointer to string where whitespace will be removed
124 void trimString(std::string* s)
126 // heading
127 s->erase(s->begin(),
128 std::find_if(s->begin(), s->end(), [](char& c) -> bool { return std::isspace(c) == 0; }));
129 // trailing
130 s->erase(
131 std::find_if(s->rbegin(), s->rend(), [](char& c) -> bool { return std::isspace(c) == 0; })
132 .base(),
133 s->end());
137 /******************************************************************************
139 * x86 detection functions *
141 ******************************************************************************/
143 /*! \brief execute x86 cpuid instructions with custom level and extended level
145 * \param level The main cpuid level (input argument for eax register)
146 * \param ecxval Extended level (input argument for ecx register)
147 * \param eax Output in eax register
148 * \param ebx Output in ebx register
149 * \param ecx Output in ecx register
150 * \param edx Output in edx register
152 * \return 0 on success, or non-zero if the instruction could not execute.
154 int executeX86CpuID(unsigned int gmx_unused level,
155 unsigned int gmx_unused ecxval,
156 unsigned int* eax,
157 unsigned int* ebx,
158 unsigned int* ecx,
159 unsigned int* edx)
161 if (c_architecture == Architecture::X86)
163 #if defined __GNUC__ || GMX_X86_GCC_INLINE_ASM
165 // any compiler that understands gcc inline assembly
166 *eax = level;
167 *ecx = ecxval;
168 *ebx = 0;
169 *edx = 0;
171 # if GMX_IS_X86_32 && defined(__PIC__)
172 // Avoid clobbering the global offset table in 32-bit pic code (ebx register)
173 __asm__ __volatile__(
174 "xchgl %%ebx, %1 \n\t"
175 "cpuid \n\t"
176 "xchgl %%ebx, %1 \n\t"
177 : "+a"(*eax), "+r"(*ebx), "+c"(*ecx), "+d"(*edx));
178 # elif GMX_IS_X86_64
179 // i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want
180 __asm__ __volatile__("cpuid \n\t"
181 : "+a"(*eax), "+b"(*ebx), "+c"(*ecx), "+d"(*edx));
182 # else
183 // Not a normal x86, which could happen when a compiler
184 // targetting non-x86 pretends to be GCC.
185 # endif
186 return 0;
188 #elif defined _MSC_VER
190 // MSVC (and icc on windows) on ia32 or x86-64
191 int cpuInfo[4];
192 __cpuidex(cpuInfo, level, ecxval);
193 *eax = static_cast<unsigned int>(cpuInfo[0]);
194 *ebx = static_cast<unsigned int>(cpuInfo[1]);
195 *ecx = static_cast<unsigned int>(cpuInfo[2]);
196 *edx = static_cast<unsigned int>(cpuInfo[3]);
197 return 0;
199 #else
201 // We are on x86, but without compiler support for cpuid if we get here
202 *eax = 0;
203 *ebx = 0;
204 *ecx = 0;
205 *edx = 0;
206 return 1;
208 #endif // check for inline asm on x86
210 else
212 // We are not on x86
213 *eax = 0;
214 *ebx = 0;
215 *ecx = 0;
216 *edx = 0;
217 return 1;
222 /*! \brief Detect x86 vendors by using the cpuid assembly instructions
224 * If support for the cpuid instruction is present, we check for Intel,
225 * AMD or Hygon vendors
227 * \return gmx::CpuInfo::Vendor::Intel, gmx::CpuInfo::Vendor::Amd,
228 * gmx::CpuInfl::Vendor::Hygon, . If neither Intel, Amd nor
229 * Hygon can be identified, or if the code fails to execute,
230 * gmx::CpuInfo::Vendor::Unknown is returned.
232 CpuInfo::Vendor detectX86Vendor()
234 unsigned int eax, ebx, ecx, edx;
235 CpuInfo::Vendor v = CpuInfo::Vendor::Unknown;
237 if (executeX86CpuID(0x0, 0, &eax, &ebx, &ecx, &edx) == 0)
239 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
241 v = CpuInfo::Vendor::Intel; // ebx=='uneG', ecx=='letn', edx=='Ieni'
243 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
245 v = CpuInfo::Vendor::Amd; // ebx=='htuA', ecx=='DMAc', edx=='itne'
247 else if (ebx == 0x6f677948 && ecx == 0x656e6975 && edx == 0x6e65476e)
249 v = CpuInfo::Vendor::Hygon; // ebx=='ogyH', ecx=='eniu', edx=='neGn'
252 return v;
255 /*! \brief Detect second AVX-512 FMA from the processor name
257 * Should only be called for processors already determined to support AVX-512.
259 * \param [in] brand x86 processor name
260 * \param [in] model x86 model
261 * \return True if second FMA present
263 bool detectProcCpuInfoSecondAvx512FMA(const std::string& brand, int model)
265 // Skylake server
266 if (model == 0x55)
268 // detect Xeon
269 if (brand.find("Xeon") == 9)
271 // detect Silver or Bronze or specific models
272 if (brand.find("Silver") == 17 || brand.find("Bronze") == 17
273 || (brand.find('W') == 17 && brand.find('0') == 21) // detect Xeon W 210x
274 || (brand.find('D') == 17 && brand.find("21") == 19)) // detect Xeon D 2xxx
276 return false;
278 // detect Gold 5xxx - can be corrected once Cooper Lake is added
279 else if (brand.find("Gold") == 17 && brand.find('5') == 22)
281 return (brand.find("53") == 22 || // detect Cooper Lake
282 brand.find("22") == 24); // detect 5[12]22
285 return true;
287 // Cannon Lake client
288 if (model == 0x66)
290 return false;
292 // Ice Lake client
293 if (model == 0x7d || model == 0x7e)
295 return false;
297 // This is the right default...
298 return true;
301 /*! \brief Simple utility function to set/clear feature in a set
303 * \param featureSet Pointer to the feature set to update
304 * \param feature The specific feature to set/clear
305 * \param registerValue Register value (returned from cpuid)
306 * \param bit Bit to check in registerValue. The feature will be
307 * added to the featureSet if this bit is set.
309 * \note Nothing is done if the bit is not set. In particular, this will not
310 * erase anything if the feature already exists in the set.
312 void setFeatureFromBit(std::set<CpuInfo::Feature>* featureSet,
313 CpuInfo::Feature feature,
314 unsigned int registerValue,
315 unsigned char bit)
317 if (registerValue & (1 << bit))
319 featureSet->insert(feature);
323 /*! \brief Process x86 cpuinfo features that are common to Intel and AMD CPUs
325 * \param[out] brand String where to write the x86 brand string
326 * \param[out] family Major version of processor
327 * \param[out] model Middle version of processor
328 * \param[out] stepping Minor version of processor
329 * \param[out] features Feature set where supported features are inserted
331 void detectX86Features(std::string* brand, int* family, int* model, int* stepping, std::set<CpuInfo::Feature>* features)
333 unsigned int eax, ebx, ecx, edx;
335 // Return if we cannot execute any levels
336 if (executeX86CpuID(0x0, 0, &eax, &ebx, &ecx, &edx) != 0)
338 return;
340 unsigned int maxStdLevel = eax;
342 if (maxStdLevel >= 0x1)
344 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
346 *family = ((eax & 0x0ff00000) >> 20) + ((eax & 0x00000f00) >> 8);
347 *model = ((eax & 0x000f0000) >> 12) + ((eax & 0x000000f0) >> 4);
348 *stepping = (eax & 0x0000000f);
350 setFeatureFromBit(features, CpuInfo::Feature::X86_Sse3, ecx, 0);
351 setFeatureFromBit(features, CpuInfo::Feature::X86_Pclmuldq, ecx, 1);
352 setFeatureFromBit(features, CpuInfo::Feature::X86_Ssse3, ecx, 9);
353 setFeatureFromBit(features, CpuInfo::Feature::X86_Fma, ecx, 12);
354 setFeatureFromBit(features, CpuInfo::Feature::X86_Cx16, ecx, 13);
355 setFeatureFromBit(features, CpuInfo::Feature::X86_Pdcm, ecx, 15);
356 setFeatureFromBit(features, CpuInfo::Feature::X86_Pcid, ecx, 17);
357 setFeatureFromBit(features, CpuInfo::Feature::X86_Sse4_1, ecx, 19);
358 setFeatureFromBit(features, CpuInfo::Feature::X86_Sse4_2, ecx, 20);
359 setFeatureFromBit(features, CpuInfo::Feature::X86_X2Apic, ecx, 21);
360 setFeatureFromBit(features, CpuInfo::Feature::X86_Popcnt, ecx, 23);
361 setFeatureFromBit(features, CpuInfo::Feature::X86_Tdt, ecx, 24);
362 setFeatureFromBit(features, CpuInfo::Feature::X86_Aes, ecx, 25);
363 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx, ecx, 28);
364 setFeatureFromBit(features, CpuInfo::Feature::X86_F16C, ecx, 29);
365 setFeatureFromBit(features, CpuInfo::Feature::X86_Rdrnd, ecx, 30);
367 setFeatureFromBit(features, CpuInfo::Feature::X86_Pse, edx, 3);
368 setFeatureFromBit(features, CpuInfo::Feature::X86_Msr, edx, 5);
369 setFeatureFromBit(features, CpuInfo::Feature::X86_Cx8, edx, 8);
370 setFeatureFromBit(features, CpuInfo::Feature::X86_Apic, edx, 9);
371 setFeatureFromBit(features, CpuInfo::Feature::X86_Cmov, edx, 15);
372 setFeatureFromBit(features, CpuInfo::Feature::X86_Clfsh, edx, 19);
373 setFeatureFromBit(features, CpuInfo::Feature::X86_Mmx, edx, 23);
374 setFeatureFromBit(features, CpuInfo::Feature::X86_Sse2, edx, 26);
375 setFeatureFromBit(features, CpuInfo::Feature::X86_Htt, edx, 28);
378 // Check whether Hyper-threading is really possible to enable in the hardware,
379 // not just technically supported by this generation of processors
380 if ((features->count(CpuInfo::Feature::X86_Htt) != 0U) && maxStdLevel >= 0x4)
382 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
383 unsigned int maxLogicalCores = (ebx >> 16) & 0x0ff;
384 executeX86CpuID(0x4, 0, &eax, &ebx, &ecx, &edx);
385 unsigned int maxPhysicalCores = ((eax >> 26) & 0x3f) + 1;
386 if (maxLogicalCores / maxPhysicalCores < 2)
388 features->erase(CpuInfo::Feature::X86_Htt);
392 if (executeX86CpuID(0x80000000, 0, &eax, &ebx, &ecx, &edx) != 0)
394 // No point in continuing if we don't support any extended levels
395 return;
397 unsigned int maxExtLevel = eax;
399 if (maxExtLevel >= 0x80000001)
401 executeX86CpuID(0x80000001, 0, &eax, &ebx, &ecx, &edx);
403 setFeatureFromBit(features, CpuInfo::Feature::X86_Lahf, ecx, 0);
404 setFeatureFromBit(features, CpuInfo::Feature::X86_Sse4A, ecx, 6);
405 setFeatureFromBit(features, CpuInfo::Feature::X86_MisalignSse, ecx, 7);
406 setFeatureFromBit(features, CpuInfo::Feature::X86_Xop, ecx, 11);
407 setFeatureFromBit(features, CpuInfo::Feature::X86_Fma4, ecx, 16);
408 setFeatureFromBit(features, CpuInfo::Feature::X86_PDPE1GB, edx, 26);
409 setFeatureFromBit(features, CpuInfo::Feature::X86_Rdtscp, edx, 27);
412 if (maxExtLevel >= 0x80000005)
414 // Get the x86 CPU brand string (3 levels, 16 bytes in each)
415 brand->clear();
416 for (unsigned int level = 0x80000002; level < 0x80000005; level++)
418 executeX86CpuID(level, 0, &eax, &ebx, &ecx, &edx);
419 // Add eax, ebx, ecx, edx contents as 4 chars each to the brand string
420 brand->append(reinterpret_cast<const char*>(&eax), sizeof(eax));
421 brand->append(reinterpret_cast<const char*>(&ebx), sizeof(ebx));
422 brand->append(reinterpret_cast<const char*>(&ecx), sizeof(ecx));
423 brand->append(reinterpret_cast<const char*>(&edx), sizeof(edx));
425 trimString(brand);
428 if (maxStdLevel >= 0x7)
430 executeX86CpuID(0x7, 0, &eax, &ebx, &ecx, &edx);
432 setFeatureFromBit(features, CpuInfo::Feature::X86_Hle, ebx, 4);
433 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx2, ebx, 5);
434 setFeatureFromBit(features, CpuInfo::Feature::X86_Rtm, ebx, 11);
435 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512F, ebx, 16);
436 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512PF, ebx, 26);
437 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512ER, ebx, 27);
438 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512CD, ebx, 28);
439 setFeatureFromBit(features, CpuInfo::Feature::X86_Sha, ebx, 29);
440 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512BW, ebx, 30);
441 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512VL, ebx, 31);
443 executeX86CpuID(0x7, 0x1, &eax, &ebx, &ecx, &edx);
444 setFeatureFromBit(features, CpuInfo::Feature::X86_Avx512BF16, eax, 5);
446 if (features->count(CpuInfo::Feature::X86_Avx512F) != 0)
448 // Only checking if the CPU supports AVX-512. There is no CPUID bit for this.
449 if (detectProcCpuInfoSecondAvx512FMA(*brand, *model))
451 features->insert(CpuInfo::Feature::X86_Avx512secondFMA);
457 if (maxExtLevel >= 0x80000007)
459 executeX86CpuID(0x80000007, 0, &eax, &ebx, &ecx, &edx);
461 setFeatureFromBit(features, CpuInfo::Feature::X86_NonstopTsc, edx, 8);
466 /*! \brief Return a vector with x86 APIC IDs for all threads
468 * \param haveX2Apic True if the processors supports x2APIC, otherwise vanilla APIC.
470 * \returns A new std::vector of unsigned integer APIC IDs, one for each
471 * logical processor in the system.
473 std::vector<unsigned int> detectX86ApicIDs(bool gmx_unused haveX2Apic)
475 std::vector<unsigned int> apicID;
477 // We cannot just ask for all APIC IDs, but must force execution on each
478 // hardware thread and extract the APIC id there.
479 #if HAVE_SCHED_AFFINITY && defined HAVE_SYSCONF
480 unsigned int eax, ebx, ecx, edx;
481 unsigned int nApic = sysconf(_SC_NPROCESSORS_ONLN);
482 cpu_set_t saveCpuSet;
483 cpu_set_t cpuSet;
484 sched_getaffinity(0, sizeof(cpu_set_t), &saveCpuSet);
485 CPU_ZERO(&cpuSet);
486 for (unsigned int i = 0; i < nApic; i++)
488 CPU_SET(i, &cpuSet);
489 sched_setaffinity(0, sizeof(cpu_set_t), &cpuSet);
490 if (haveX2Apic)
492 executeX86CpuID(0xb, 0, &eax, &ebx, &ecx, &edx);
493 apicID.push_back(edx);
495 else
497 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
498 apicID.push_back(ebx >> 24);
500 CPU_CLR(i, &cpuSet);
502 sched_setaffinity(0, sizeof(cpu_set_t), &saveCpuSet);
503 #elif GMX_NATIVE_WINDOWS
504 unsigned int eax, ebx, ecx, edx;
505 SYSTEM_INFO sysinfo;
506 GetSystemInfo(&sysinfo);
507 unsigned int nApic = sysinfo.dwNumberOfProcessors;
508 unsigned int saveAffinity = SetThreadAffinityMask(GetCurrentThread(), 1);
509 for (DWORD_PTR i = 0; i < nApic; i++)
511 SetThreadAffinityMask(GetCurrentThread(), (((DWORD_PTR)1) << i));
512 Sleep(0);
513 if (haveX2Apic)
515 executeX86CpuID(0xb, 0, &eax, &ebx, &ecx, &edx);
516 apicID.push_back(edx);
518 else
520 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
521 apicID.push_back(ebx >> 24);
524 SetThreadAffinityMask(GetCurrentThread(), saveAffinity);
525 #endif
526 return apicID;
530 /*! \brief Utility to renumber indices extracted from APIC IDs
532 * \param v Vector with unsigned integer indices
534 * This routine returns the number of unique different elements found in the vector,
535 * and renumbers these starting from 0. For example, the vector {0,1,2,8,9,10,8,9,10,0,1,2}
536 * will be rewritten to {0,1,2,3,4,5,3,4,5,0,1,2}, and it returns 6 for the
537 * number of unique elements.
539 void renumberIndex(std::vector<unsigned int>* v)
541 std::vector<unsigned int> sortedV(*v);
542 std::sort(sortedV.begin(), sortedV.end());
544 std::vector<unsigned int> uniqueSortedV(sortedV);
545 auto it = std::unique(uniqueSortedV.begin(), uniqueSortedV.end());
546 uniqueSortedV.resize(std::distance(uniqueSortedV.begin(), it));
548 for (std::size_t i = 0; i < uniqueSortedV.size(); i++)
550 unsigned int val = uniqueSortedV[i];
551 std::replace_if(v->begin(), v->end(), [val](unsigned int& c) -> bool { return c == val; },
552 static_cast<unsigned int>(i));
556 /*! \brief The layout of the bits in the APIC ID */
557 struct ApicIdLayout
559 unsigned int hwThreadBits; //!< The number of least significant bits for hw-threads
560 unsigned int coreBits; //!< The number of core bits following the hw-thread bits
563 /*! \brief Detect the APIC ID layout for x2APIC
565 ApicIdLayout detectX2ApicIdLayout()
567 ApicIdLayout layout;
569 unsigned int eax;
570 unsigned int ebx;
571 unsigned int ecx;
572 unsigned int edx;
573 executeX86CpuID(0xb, 0, &eax, &ebx, &ecx, &edx);
574 layout.hwThreadBits = eax & 0x1f;
575 executeX86CpuID(0xb, 1, &eax, &ebx, &ecx, &edx);
576 layout.coreBits = (eax & 0x1f) - layout.hwThreadBits;
578 return layout;
581 /*! \brief Detect the APIC ID layout for standard APIC or xAPIC on AMD
583 * \param[in] maxExtLevel The largest CPUID extended function input value supported by the processor implementation
585 ApicIdLayout detectAmdApicIdLayout(unsigned int maxExtLevel)
587 ApicIdLayout layout;
589 unsigned int eax;
590 unsigned int ebx;
591 unsigned int ecx;
592 unsigned int edx;
593 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
594 int family = ((eax & 0x0ff00000) >> 20) + ((eax & 0x00000f00) >> 8);
595 executeX86CpuID(0x80000001, 0, &eax, &ebx, &ecx, &edx);
596 bool haveExtendedTopology = (ecx & (1 << 22)) != 0U;
598 // NOTE: Here we assume 1 thread per core, unless we have family >= 17h
599 layout.hwThreadBits = 0;
600 if (family >= 0x17 && haveExtendedTopology && maxExtLevel >= 0x8000001e)
602 executeX86CpuID(0x8000001e, 1, &eax, &ebx, &ecx, &edx);
603 int numThreadsPerCore = ((ebx >> 8) & 0xff) + 1;
604 // NOTE: The AMD documentation only specifies the layout of apicid
605 // when we have 1 or 2 threads per core.
606 while (numThreadsPerCore > (1 << layout.hwThreadBits))
608 layout.hwThreadBits++;
612 // Get number of core bits in apic ID - try modern extended method first
613 executeX86CpuID(0x80000008, 0, &eax, &ebx, &ecx, &edx);
614 layout.coreBits = (ecx >> 12) & 0xf;
615 if (layout.coreBits == 0)
617 // Legacy method for old single/dual core AMD CPUs
618 int i = ecx & 0xf;
619 while (i >> layout.coreBits)
621 layout.coreBits++;
625 return layout;
628 /*! \brief Try to detect basic CPU topology information using x86 cpuid
630 * If x2APIC support is present, this is our first choice, otherwise we
631 * attempt to use old vanilla APIC.
633 * \return A new vector of entries with socket, core, hwthread information
634 * for each logical processor.
636 std::vector<CpuInfo::LogicalProcessor> detectX86LogicalProcessors()
638 unsigned int eax;
639 unsigned int ebx;
640 unsigned int ecx;
641 unsigned int edx;
642 unsigned int maxStdLevel;
643 unsigned int maxExtLevel;
644 bool haveApic;
645 bool haveX2Apic;
647 std::vector<CpuInfo::LogicalProcessor> logicalProcessors;
649 // Find largest standard & extended level input values allowed
650 executeX86CpuID(0x0, 0, &eax, &ebx, &ecx, &edx);
651 maxStdLevel = eax;
652 executeX86CpuID(0x80000000, 0, &eax, &ebx, &ecx, &edx);
653 maxExtLevel = eax;
655 if (maxStdLevel >= 0x1)
657 executeX86CpuID(0x1, 0, &eax, &ebx, &ecx, &edx);
658 haveX2Apic = ((ecx & (1 << 21)) != 0U) && maxStdLevel >= 0xb;
659 haveApic = ((edx & (1 << 9)) != 0U) && maxExtLevel >= 0x80000008;
661 else
663 haveX2Apic = false;
664 haveApic = false;
667 if (haveX2Apic || haveApic)
669 ApicIdLayout layout;
670 // Get bits for cores and hardware threads
671 if (haveX2Apic)
673 layout = detectX2ApicIdLayout();
675 else // haveApic
677 if (detectX86Vendor() == CpuInfo::Vendor::Amd || detectX86Vendor() == CpuInfo::Vendor::Hygon)
679 layout = detectAmdApicIdLayout(maxExtLevel);
681 if (layout.hwThreadBits > 1)
683 // At the time of writing this code we do not know what
684 // to do with more than 2 threads, so return empty.
685 return logicalProcessors;
688 else
690 // We do not know the APIC ID layout, return empty.
691 return logicalProcessors;
695 std::vector<unsigned int> apicID = detectX86ApicIDs(haveX2Apic);
697 if (!apicID.empty())
699 // APIC IDs can be buggy, and it is always a mess. Typically more bits are
700 // reserved than needed, and the numbers might not increment by 1 even in
701 // a single socket or core. Extract, renumber, and check that things make sense.
702 unsigned int hwThreadMask = (1 << layout.hwThreadBits) - 1;
703 unsigned int coreMask = (1 << layout.coreBits) - 1;
704 std::vector<unsigned int> hwThreadRanks;
705 std::vector<unsigned int> coreRanks;
706 std::vector<unsigned int> socketRanks;
708 for (auto a : apicID)
710 hwThreadRanks.push_back(static_cast<int>(a & hwThreadMask));
711 coreRanks.push_back(static_cast<int>((a >> layout.hwThreadBits) & coreMask));
712 socketRanks.push_back(static_cast<int>(a >> (layout.coreBits + layout.hwThreadBits)));
715 renumberIndex(&hwThreadRanks);
716 renumberIndex(&coreRanks);
717 renumberIndex(&socketRanks);
719 unsigned int hwThreadRankSize =
720 1 + *std::max_element(hwThreadRanks.begin(), hwThreadRanks.end());
721 unsigned int coreRankSize = 1 + *std::max_element(coreRanks.begin(), coreRanks.end());
722 unsigned int socketRankSize = 1 + *std::max_element(socketRanks.begin(), socketRanks.end());
724 if (socketRankSize * coreRankSize * hwThreadRankSize == apicID.size())
726 // Alright, everything looks consistent, so put it in the result
727 for (std::size_t i = 0; i < apicID.size(); i++)
729 // While the internal APIC IDs are always unsigned integers, we also cast to
730 // plain integers for the externally exposed vectors, since that will make
731 // it possible to use '-1' for invalid entries in the future.
732 logicalProcessors.push_back(
733 { int(socketRanks[i]), int(coreRanks[i]), int(hwThreadRanks[i]) });
738 return logicalProcessors; // Will only have contents if everything worked
742 /******************************************************************************
744 * Generic Linux detection by parsing /proc/cpuinfo *
746 ******************************************************************************/
748 /*! \brief Parse /proc/cpuinfo into a simple string map
750 * This routine will read the contents of /proc/cpuinfo, and for each
751 * line that is not empty we will assign the (trimmed) string to the right of
752 * the colon as a key, and the left-hand side as the value in the map.
753 * For multi-processor systems where lines are repeated the latter lines will
754 * overwrite the first occurrence.
756 * \return New map with the contents. If the file is not available, the returned
757 * map will be empty.
759 std::map<std::string, std::string> parseProcCpuInfo()
761 std::ifstream procCpuInfo("/proc/cpuinfo");
762 std::string line;
763 std::map<std::string, std::string> cpuInfo;
765 while (std::getline(procCpuInfo, line))
767 if (!line.empty())
769 std::stringstream iss(line);
770 std::string key;
771 std::string val;
772 std::getline(iss, key, ':'); // part before colon
773 std::getline(iss, val); // part after colon
774 trimString(&key);
775 trimString(&val);
776 // put it in the map. This will overwrite previous processors, but we don't care.
777 cpuInfo[key] = val;
780 return cpuInfo;
784 /*! \brief Try to detect vendor from /proc/cpuinfo
786 * \param cpuInfo Map returned from parseProcCpuinfo()
788 * This routine tries to match a few common labels in /proc/cpuinfo to see if
789 * they begin with the name of a standard vendor. If the file cannot be read
790 * or if no match is found, we return gmx::CpuInfo::Vendor::Unknown.
792 CpuInfo::Vendor detectProcCpuInfoVendor(const std::map<std::string, std::string>& cpuInfo)
794 const std::map<std::string, CpuInfo::Vendor> testVendors = {
795 { "GenuineIntel", CpuInfo::Vendor::Intel },
796 { "Intel", CpuInfo::Vendor::Intel },
797 { "AuthenticAmd", CpuInfo::Vendor::Amd },
798 { "AMD", CpuInfo::Vendor::Amd },
799 { "ARM", CpuInfo::Vendor::Arm },
800 { "AArch64", CpuInfo::Vendor::Arm },
801 { "Fujitsu", CpuInfo::Vendor::Fujitsu },
802 { "IBM", CpuInfo::Vendor::Ibm },
803 { "POWER", CpuInfo::Vendor::Ibm },
804 { "Oracle", CpuInfo::Vendor::Oracle },
805 { "HygonGenuine", CpuInfo::Vendor::Hygon },
806 { "Hygon", CpuInfo::Vendor::Hygon },
809 // For each label in /proc/cpuinfo, compare the value to the name in the
810 // testNames map above, and if it's a match return the vendor.
811 for (auto& l : { "vendor_id", "vendor", "manufacture", "model", "processor", "cpu" })
813 if (cpuInfo.count(l) != 0U)
815 // there was a line with this left-hand side in /proc/cpuinfo
816 const std::string& s1 = cpuInfo.at(l);
818 for (auto& t : testVendors)
820 const std::string& s2 = t.first;
822 // If the entire name we are testing (s2) matches the first part of
823 // the string after the colon in /proc/cpuinfo (s1) we found our vendor
824 if (std::equal(s2.begin(), s2.end(), s1.begin(), [](const char& x, const char& y) -> bool {
825 return tolower(x) == tolower(y);
828 return t.second;
833 return CpuInfo::Vendor::Unknown;
837 /*! \brief Detect IBM processor name and features from /proc/cpuinfo
839 * \param cpuInfo Map returned from parseProcCpuinfo()
840 * \param[out] brand String where to write the brand string
841 * \param[out] features Feature set where supported features are inserted
843 * This routine tries to match a few common labels in /proc/cpuinfo to see if
844 * we can find the processor name and features. It is likely fragile.
846 void detectProcCpuInfoIbm(const std::map<std::string, std::string>& cpuInfo,
847 std::string* brand,
848 std::set<CpuInfo::Feature>* features)
850 // Get brand string from 'cpu' label if present, otherwise 'Processor'
851 if (cpuInfo.count("cpu") != 0U)
853 *brand = cpuInfo.at("cpu");
855 else if (cpuInfo.count("Processor") != 0U)
857 *brand = cpuInfo.at("Processor");
860 if (brand->find("A2") != std::string::npos)
862 // If the processor identification contains "A2", this is BlueGene/Q with QPX
863 features->insert(CpuInfo::Feature::Ibm_Qpx);
866 for (auto& l : { "model name", "model", "Processor", "cpu" })
868 if (cpuInfo.count(l) != 0U)
870 std::string s1 = cpuInfo.at(l);
871 std::transform(s1.begin(), s1.end(), s1.begin(), ::tolower);
873 if (s1.find("altivec") != std::string::npos)
875 features->insert(CpuInfo::Feature::Ibm_Vmx);
876 // If this is a power6, we only have VMX. All later processors have VSX.
877 if (s1.find("power6") == std::string::npos)
879 features->insert(CpuInfo::Feature::Ibm_Vsx);
887 /*! \brief Detect ARM processor name and features from /proc/cpuinfo
889 * \param cpuInfo Map returned from parseProcCpuinfo()
890 * \param[out] brand String where to write the brand string
891 * \param[out] family Major version of processor
892 * \param[out] model Middle version of processor
893 * \param[out] stepping Minor version of processor
894 * \param[out] features Feature set where supported features are inserted
896 * This routine tries to match a few common labels in /proc/cpuinfo to see if
897 * we can find the processor name and features. It is likely fragile.
899 void detectProcCpuInfoArm(const std::map<std::string, std::string>& cpuInfo,
900 std::string* brand,
901 int* family,
902 int* model,
903 int* stepping,
904 std::set<CpuInfo::Feature>* features)
906 if (cpuInfo.count("Processor") != 0U)
908 *brand = cpuInfo.at("Processor");
910 else if (cpuInfo.count("model name") != 0U)
912 *brand = cpuInfo.at("model name");
915 if (cpuInfo.count("CPU architecture") != 0U)
917 *family = std::strtol(cpuInfo.at("CPU architecture").c_str(), nullptr, 10);
918 // For some 64-bit CPUs it appears to say 'AArch64' instead
919 if (*family == 0 && cpuInfo.at("CPU architecture").find("AArch64") != std::string::npos)
921 *family = 8; // fragile - no idea how a future ARMv9 will be represented in this case
924 if (cpuInfo.count("CPU variant") != 0U)
926 *model = std::strtol(cpuInfo.at("CPU variant").c_str(), nullptr, 16);
928 if (cpuInfo.count("CPU revision") != 0U)
930 *stepping = std::strtol(cpuInfo.at("CPU revision").c_str(), nullptr, 10);
933 if (cpuInfo.count("Features") != 0U)
935 const std::string& s = cpuInfo.at("Features");
936 if (s.find("neon") != std::string::npos)
938 features->insert(CpuInfo::Feature::Arm_Neon);
940 if (s.find("asimd") != std::string::npos)
942 // At least Jetson TX1 runs a 32-bit environment by default, although
943 // the kernel is 64-bits, and reports asimd feature flags. We cannot
944 // use Neon-asimd in this case, so make sure we are on a 64-bit platform.
945 if (sizeof(void*) == 8)
947 features->insert(CpuInfo::Feature::Arm_NeonAsimd);
954 /*! \brief Try to detect vendor, cpu and features from /proc/cpuinfo
956 * \param[out] vendor Detected hardware vendor
957 * \param[out] brand String where to write the brand string
958 * \param[out] family Major version of processor
959 * \param[out] model Middle version of processor
960 * \param[out] stepping Minor version of processor
961 * \param[out] features Feature set where supported features are inserted
963 * This routine reads the /proc/cpuinfo file into a map and calls subroutines
964 * that attempt to parse by matching keys and values to known strings. It is
965 * much more fragile than our x86 detection, but it does not depend on
966 * specific system calls, intrinsics or assembly instructions.
968 void detectProcCpuInfo(CpuInfo::Vendor* vendor,
969 std::string* brand,
970 int* family,
971 int* model,
972 int* stepping,
973 std::set<CpuInfo::Feature>* features)
975 std::map<std::string, std::string> cpuInfo = parseProcCpuInfo();
977 if (*vendor == CpuInfo::Vendor::Unknown)
979 *vendor = detectProcCpuInfoVendor(cpuInfo);
982 // Unfortunately there is no standard for contents in /proc/cpuinfo. We cannot
983 // indiscriminately look for e.g. 'cpu' since it could be either name or an index.
984 // To handle this slightly better we use one subroutine per vendor.
985 switch (*vendor)
987 case CpuInfo::Vendor::Ibm: detectProcCpuInfoIbm(cpuInfo, brand, features); break;
989 case CpuInfo::Vendor::Arm:
990 detectProcCpuInfoArm(cpuInfo, brand, family, model, stepping, features);
991 break;
993 default:
994 // We only have a single check for fujitsu for now
995 #ifdef __HPC_ACE__
996 features->insert(CpuInfo::Feature::Fujitsu_HpcAce);
997 #endif
998 break;
1001 /*! \endcond */
1002 } // namespace
1005 // static
1006 CpuInfo CpuInfo::detect()
1008 CpuInfo result;
1010 if (c_architecture == Architecture::X86)
1012 result.vendor_ = detectX86Vendor();
1014 if (result.vendor_ == CpuInfo::Vendor::Intel)
1016 result.features_.insert(CpuInfo::Feature::X86_Intel);
1018 else if (result.vendor_ == CpuInfo::Vendor::Amd)
1020 result.features_.insert(CpuInfo::Feature::X86_Amd);
1022 else if (result.vendor_ == CpuInfo::Vendor::Hygon)
1024 result.features_.insert(CpuInfo::Feature::X86_Hygon);
1026 detectX86Features(&result.brandString_, &result.family_, &result.model_, &result.stepping_,
1027 &result.features_);
1028 result.logicalProcessors_ = detectX86LogicalProcessors();
1030 else
1032 // Not x86
1033 if (c_architecture == Architecture::Arm)
1035 result.vendor_ = CpuInfo::Vendor::Arm;
1037 else if (c_architecture == Architecture::PowerPC)
1039 result.vendor_ = CpuInfo::Vendor::Ibm;
1042 #if defined __aarch64__ || (defined _M_ARM && _M_ARM >= 8)
1043 result.features_.insert(Feature::Arm_Neon); // ARMv8 always has Neon
1044 result.features_.insert(Feature::Arm_NeonAsimd); // ARMv8 always has Neon-asimd
1045 #endif
1047 #if defined sun
1048 result.vendor_ = CpuInfo::Vendor::Oracle;
1049 #endif
1051 // On Linux we might be able to find information in /proc/cpuinfo. If vendor or brand
1052 // is set to a known value this routine will not overwrite it.
1053 detectProcCpuInfo(&result.vendor_, &result.brandString_, &result.family_, &result.model_,
1054 &result.stepping_, &result.features_);
1057 if (!result.logicalProcessors_.empty())
1059 result.supportLevel_ = CpuInfo::SupportLevel::LogicalProcessorInfo;
1061 else if (!result.features_.empty())
1063 result.supportLevel_ = CpuInfo::SupportLevel::Features;
1065 else if (result.vendor_ != CpuInfo::Vendor::Unknown
1066 || result.brandString_ != "Unknown CPU brand")
1068 result.supportLevel_ = CpuInfo::SupportLevel::Name;
1070 else
1072 result.supportLevel_ = CpuInfo::SupportLevel::None;
1075 return result;
1078 CpuInfo::CpuInfo() :
1079 vendor_(CpuInfo::Vendor::Unknown),
1080 brandString_("Unknown CPU brand"),
1081 family_(0),
1082 model_(0),
1083 stepping_(0)
1087 const std::string& CpuInfo::vendorString() const
1089 static const std::map<Vendor, std::string> vendorStrings = {
1090 { Vendor::Unknown, "Unknown vendor" }, { Vendor::Intel, "Intel" }, { Vendor::Amd, "AMD" },
1091 { Vendor::Fujitsu, "Fujitsu" }, { Vendor::Ibm, "IBM" }, { Vendor::Arm, "ARM" },
1092 { Vendor::Oracle, "Oracle" }, { Vendor::Hygon, "Hygon" },
1095 return vendorStrings.at(vendor_);
1099 const std::string& CpuInfo::featureString(Feature f)
1101 static const std::map<Feature, std::string> featureStrings = {
1102 { Feature::X86_Aes, "aes" },
1103 { Feature::X86_Amd, "amd" },
1104 { Feature::X86_Apic, "apic" },
1105 { Feature::X86_Avx, "avx" },
1106 { Feature::X86_Avx2, "avx2" },
1107 { Feature::X86_Avx512F, "avx512f" },
1108 { Feature::X86_Avx512PF, "avx512pf" },
1109 { Feature::X86_Avx512ER, "avx512er" },
1110 { Feature::X86_Avx512CD, "avx512cd" },
1111 { Feature::X86_Avx512BW, "avx512bw" },
1112 { Feature::X86_Avx512VL, "avx512vl" },
1113 { Feature::X86_Avx512BF16, "avx512bf16" },
1114 { Feature::X86_Avx512secondFMA, "avx512secondFMA" },
1115 { Feature::X86_Clfsh, "clfsh" },
1116 { Feature::X86_Cmov, "cmov" },
1117 { Feature::X86_Cx8, "cx8" },
1118 { Feature::X86_Cx16, "cx16" },
1119 { Feature::X86_F16C, "f16c" },
1120 { Feature::X86_Fma, "fma" },
1121 { Feature::X86_Fma4, "fma4" },
1122 { Feature::X86_Hle, "hle" },
1123 { Feature::X86_Htt, "htt" },
1124 { Feature::X86_Intel, "intel" },
1125 { Feature::X86_Lahf, "lahf" },
1126 { Feature::X86_MisalignSse, "misalignsse" },
1127 { Feature::X86_Mmx, "mmx" },
1128 { Feature::X86_Msr, "msr" },
1129 { Feature::X86_NonstopTsc, "nonstop_tsc" },
1130 { Feature::X86_Pcid, "pcid" },
1131 { Feature::X86_Pclmuldq, "pclmuldq" },
1132 { Feature::X86_Pdcm, "pdcm" },
1133 { Feature::X86_PDPE1GB, "pdpe1gb" },
1134 { Feature::X86_Popcnt, "popcnt" },
1135 { Feature::X86_Pse, "pse" },
1136 { Feature::X86_Rdrnd, "rdrnd" },
1137 { Feature::X86_Rdtscp, "rdtscp" },
1138 { Feature::X86_Rtm, "rtm" },
1139 { Feature::X86_Sha, "sha" },
1140 { Feature::X86_Sse2, "sse2" },
1141 { Feature::X86_Sse3, "sse3" },
1142 { Feature::X86_Sse4A, "sse4a" },
1143 { Feature::X86_Sse4_1, "sse4.1" },
1144 { Feature::X86_Sse4_2, "sse4.2" },
1145 { Feature::X86_Ssse3, "ssse3" },
1146 { Feature::X86_Tdt, "tdt" },
1147 { Feature::X86_X2Apic, "x2apic" },
1148 { Feature::X86_Xop, "xop" },
1149 { Feature::Arm_Neon, "neon" },
1150 { Feature::Arm_NeonAsimd, "neon_asimd" },
1151 { Feature::Ibm_Qpx, "qpx" },
1152 { Feature::Ibm_Vmx, "vmx" },
1153 { Feature::Ibm_Vsx, "vsx" },
1154 { Feature::Fujitsu_HpcAce, "hpc-ace" },
1155 { Feature::X86_Hygon, "hygon" }
1157 return featureStrings.at(f);
1161 bool cpuIsX86Nehalem(const CpuInfo& cpuInfo)
1163 return (cpuInfo.vendor() == gmx::CpuInfo::Vendor::Intel && cpuInfo.family() == 6
1164 && (cpuInfo.model() == 0x2E || cpuInfo.model() == 0x1A || cpuInfo.model() == 0x1E
1165 || cpuInfo.model() == 0x2F || cpuInfo.model() == 0x2C || cpuInfo.model() == 0x25));
1168 } // namespace gmx
1170 #ifdef GMX_CPUINFO_STANDALONE
1171 int main(int argc, char** argv)
1173 if (argc < 2)
1175 fprintf(stdout,
1176 "Usage:\n\n%s [flags]\n\n"
1177 "Available flags:\n"
1178 "-vendor Print CPU vendor.\n"
1179 "-brand Print CPU brand string.\n"
1180 "-family Print CPU family version.\n"
1181 "-model Print CPU model version.\n"
1182 "-stepping Print CPU stepping version.\n"
1183 "-features Print CPU feature flags.\n",
1184 argv[0]);
1185 exit(1);
1188 std::string arg(argv[1]);
1189 gmx::CpuInfo cpuInfo(gmx::CpuInfo::detect());
1191 if (arg == "-vendor")
1193 printf("%s\n", cpuInfo.vendorString().c_str());
1195 else if (arg == "-brand")
1197 printf("%s\n", cpuInfo.brandString().c_str());
1199 else if (arg == "-family")
1201 printf("%d\n", cpuInfo.family());
1203 else if (arg == "-model")
1205 printf("%d\n", cpuInfo.model());
1207 else if (arg == "-stepping")
1209 printf("%d\n", cpuInfo.stepping());
1211 else if (arg == "-features")
1213 // Separate the feature strings with spaces. Note that in the
1214 // GROMACS cmake code, surrounding whitespace is first
1215 // stripped by the CPU detection routine, and then added back
1216 // in the code for making the SIMD suggestion.
1217 for (auto& f : cpuInfo.featureSet())
1219 printf("%s ", cpuInfo.featureString(f).c_str());
1221 printf("\n");
1223 else if (arg == "-topology")
1225 // Undocumented debug option, usually not present in standalone version
1226 for (auto& t : cpuInfo.logicalProcessors())
1228 printf("%3u %3u %3u\n", t.socketRankInMachine, t.coreRankInSocket, t.hwThreadRankInCore);
1231 return 0;
1233 #endif