added Verlet scheme and NxN non-bonded functionality
[gromacs.git] / src / gmxlib / gmx_cpuid.c
blob90c73f8318ba4c4f0ce0d45cb94bd12228cda38f
1 /* -*- mode: c; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; c-file-style: "stroustrup"; -*-
3 *
4 * This file is part of GROMACS.
5 * Copyright (c) 2012-
7 * Written by the Gromacs development team under coordination of
8 * David van der Spoel, Berk Hess, and Erik Lindahl.
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * To help us fund GROMACS development, we humbly ask that you cite
16 * the research papers on the package. Check out http://www.gromacs.org
18 * And Hey:
19 * Gnomes, ROck Monsters And Chili Sauce
21 #ifdef HAVE_CONFIG_H
22 #include <config.h>
23 #endif
25 #ifdef HAVE_SCHED_H
26 #define _GNU_SOURCE
27 #include <sched.h>
28 #endif
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <ctype.h>
34 #ifdef _MSC_VER
35 /* MSVC definition for __cpuid() */
36 #include <intrin.h>
37 #endif
38 #ifdef HAVE_UNISTD_H
39 /* sysconf() definition */
40 #include <unistd.h>
41 #endif
46 #include "gmx_cpuid.h"
49 /* Global constant character strings corresponding to our enumerated types */
50 const char *
51 gmx_cpuid_vendor_string[GMX_CPUID_NVENDORS] =
53 "CannotDetect",
54 "Unknown",
55 "GenuineIntel",
56 "AuthenticAMD"
59 const char *
60 gmx_cpuid_feature_string[GMX_CPUID_NFEATURES] =
62 "CannotDetect",
63 "aes",
64 "apic",
65 "avx",
66 "avx2",
67 "clfsh",
68 "cmov",
69 "cx8",
70 "cx16",
71 "f16c",
72 "fma",
73 "fma4",
74 "htt",
75 "lahf_lm",
76 "misalignsse",
77 "mmx",
78 "msr",
79 "nonstop_tsc",
80 "pcid",
81 "pclmuldq",
82 "pdcm",
83 "pdpe1gb",
84 "popcnt",
85 "pse",
86 "rdrnd",
87 "rdtscp",
88 "sse2",
89 "sse3",
90 "sse4a",
91 "sse4.1",
92 "sse4.2",
93 "ssse3",
94 "tdt",
95 "x2apic",
96 "xop"
99 const char *
100 gmx_cpuid_acceleration_string[GMX_CPUID_NACCELERATIONS] =
102 "CannotDetect",
103 "None",
104 "SSE2",
105 "SSE4.1",
106 "AVX_128_FMA",
107 "AVX_256"
110 /* Max length of brand string */
111 #define GMX_CPUID_BRAND_MAXLEN 256
114 /* Contents of the abstract datatype */
115 struct gmx_cpuid
117 enum gmx_cpuid_vendor vendor;
118 char brand[GMX_CPUID_BRAND_MAXLEN];
119 int family;
120 int model;
121 int stepping;
122 /* Not using gmx_bool here, since this file must be possible to compile without simple.h */
123 char feature[GMX_CPUID_NFEATURES];
127 /* Simple routines to access the data structure. The initialization routine is
128 * further down since that needs to call other static routines in this file.
130 enum gmx_cpuid_vendor
131 gmx_cpuid_vendor (gmx_cpuid_t cpuid)
133 return cpuid->vendor;
137 const char *
138 gmx_cpuid_brand (gmx_cpuid_t cpuid)
140 return cpuid->brand;
144 gmx_cpuid_family (gmx_cpuid_t cpuid)
146 return cpuid->family;
150 gmx_cpuid_model (gmx_cpuid_t cpuid)
152 return cpuid->model;
156 gmx_cpuid_stepping (gmx_cpuid_t cpuid)
158 return cpuid->stepping;
162 gmx_cpuid_feature (gmx_cpuid_t cpuid,
163 enum gmx_cpuid_feature feature)
165 return (cpuid->feature[feature]!=0);
171 /* What type of acceleration was compiled in, if any?
172 * This is set from Cmake. Note that the SSE2 and SSE4_1 macros are set for
173 * AVX too, so it is important that they appear last in the list.
175 #ifdef GMX_X86_AVX_256
176 static const
177 enum gmx_cpuid_acceleration
178 compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_256;
179 #elif defined GMX_X86_AVX_128_FMA
180 static const
181 enum gmx_cpuid_acceleration
182 compiled_acc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
183 #elif defined GMX_X86_SSE4_1
184 static const
185 enum gmx_cpuid_acceleration
186 compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
187 #elif defined GMX_X86_SSE2
188 static const
189 enum gmx_cpuid_acceleration
190 compiled_acc = GMX_CPUID_ACCELERATION_X86_SSE2;
191 #else
192 static const
193 enum gmx_cpuid_acceleration
194 compiled_acc = GMX_CPUID_ACCELERATION_NONE;
195 #endif
198 /* Currently CPUID is only supported (1) if we can use an instruction on MSVC, or (2)
199 * if the compiler handles GNU-style inline assembly.
201 #if defined (__i386__) || defined (__x86_64__) || defined (_M_IX86) || defined (_M_X64)
203 /* Execute CPUID on x86 class CPUs. level sets function to exec, and the
204 * contents of register output is returned. See Intel/AMD docs for details.
206 * This version supports extended information where we can also have an input
207 * value in the ecx register. This is ignored for most levels, but some of them
208 * (e.g. level 0xB on Intel) use it.
210 static int
211 execute_x86cpuid(unsigned int level,
212 unsigned int ecxval,
213 unsigned int * eax,
214 unsigned int * ebx,
215 unsigned int * ecx,
216 unsigned int * edx)
218 int rc = 0;
220 #if (defined _MSC_VER)
221 int CPUInfo[4];
223 #if (_MSC_VER > 1500) || (_MSC_VER==1500 & _MSC_FULL_VER >= 150030729)
224 /* MSVC 9.0 SP1 or later */
225 __cpuidex(CPUInfo,level,ecxval);
226 rc = 0;
227 #else
228 __cpuid(CPUInfo,level);
229 /* Set an error code if the user wanted a non-zero ecxval, since we did not have cpuidex */
230 rc = (ecxval>0) ? -1 : 0;
231 #endif
232 *eax=CPUInfo[0];
233 *ebx=CPUInfo[1];
234 *ecx=CPUInfo[2];
235 *edx=CPUInfo[3];
237 #elif (defined GMX_X86_GCC_INLINE_ASM)
238 /* for now this means GMX_X86_GCC_INLINE_ASM should be defined,
239 * but there might be more options added in the future.
241 *eax = level;
242 *ecx = ecxval;
243 *ebx = 0;
244 *edx = 0;
245 #if defined(__i386__) && defined(__PIC__)
246 /* Avoid clobbering the global offset table in 32-bit pic code (ebx register) */
247 __asm__ __volatile__ ("xchgl %%ebx, %1 \n\t"
248 "cpuid \n\t"
249 "xchgl %%ebx, %1 \n\t"
250 : "+a"(*eax), "+r"(*ebx), "+c"(*ecx), "+d"(*edx));
251 #else
252 /* i386 without PIC, or x86-64. Things are easy and we can clobber any reg we want :-) */
253 __asm__ __volatile__ ("cpuid \n\t"
254 : "+a"(*eax), "+b"(*ebx), "+c"(*ecx), "+d"(*edx));
255 #endif
256 rc = 0;
257 #else
258 /* Death and horror!
259 * Apparently this is an x86 platform where we don't know how to call cpuid.
261 * This is REALLY bad, since we will lose all Gromacs acceleration.
263 *eax = 0;
264 *ebx = 0;
265 *ecx = 0;
266 *edx = 0;
268 rc = -1;
269 #endif
270 return rc;
272 #endif /* architecture is x86 */
275 /* Identify CPU features common to Intel & AMD - mainly brand string,
276 * version and some features. Vendor has already been detected outside this.
278 static int
279 cpuid_check_common_x86(gmx_cpuid_t cpuid)
281 int fn,max_stdfn,max_extfn;
282 unsigned int eax,ebx,ecx,edx;
283 char str[GMX_CPUID_BRAND_MAXLEN];
284 char * p;
286 /* Find largest standard/extended function input value */
287 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
288 max_stdfn = eax;
289 execute_x86cpuid(0x80000000,0,&eax,&ebx,&ecx,&edx);
290 max_extfn = eax;
292 p = str;
293 if(max_extfn>=0x80000005)
295 /* Get CPU brand string */
296 for(fn=0x80000002;fn<0x80000005;fn++)
298 execute_x86cpuid(fn,0,&eax,&ebx,&ecx,&edx);
299 memcpy(p,&eax,4);
300 memcpy(p+4,&ebx,4);
301 memcpy(p+8,&ecx,4);
302 memcpy(p+12,&edx,4);
303 p+=16;
305 *p='\0';
307 /* Remove empty initial space */
308 p = str;
309 while(isspace(*(p)))
311 p++;
313 strncpy(cpuid->brand,p,GMX_CPUID_BRAND_MAXLEN);
315 else
317 strncpy(cpuid->brand,"Unknown CPU brand",GMX_CPUID_BRAND_MAXLEN);
320 /* Find basic CPU properties */
321 if(max_stdfn>=1)
323 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
325 cpuid->family = ((eax & 0x0FF00000) >> 20) + ((eax & 0x00000F00) >> 8);
326 /* Note that extended model should be shifted left 4, so only shift right 12 iso 16. */
327 cpuid->model = ((eax & 0x000F0000) >> 12) + ((eax & 0x000000F0) >> 4);
328 cpuid->stepping = (eax & 0x0000000F);
330 /* Feature flags common to AMD and intel */
331 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE3] = (ecx & (1 << 0)) != 0;
332 cpuid->feature[GMX_CPUID_FEATURE_X86_PCLMULDQ] = (ecx & (1 << 1)) != 0;
333 cpuid->feature[GMX_CPUID_FEATURE_X86_SSSE3] = (ecx & (1 << 9)) != 0;
334 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA] = (ecx & (1 << 12)) != 0;
335 cpuid->feature[GMX_CPUID_FEATURE_X86_CX16] = (ecx & (1 << 13)) != 0;
336 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_1] = (ecx & (1 << 19)) != 0;
337 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4_2] = (ecx & (1 << 20)) != 0;
338 cpuid->feature[GMX_CPUID_FEATURE_X86_POPCNT] = (ecx & (1 << 23)) != 0;
339 cpuid->feature[GMX_CPUID_FEATURE_X86_AES] = (ecx & (1 << 25)) != 0;
340 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX] = (ecx & (1 << 28)) != 0;
341 cpuid->feature[GMX_CPUID_FEATURE_X86_F16C] = (ecx & (1 << 29)) != 0;
342 cpuid->feature[GMX_CPUID_FEATURE_X86_RDRND] = (ecx & (1 << 30)) != 0;
344 cpuid->feature[GMX_CPUID_FEATURE_X86_PSE] = (edx & (1 << 3)) != 0;
345 cpuid->feature[GMX_CPUID_FEATURE_X86_MSR] = (edx & (1 << 5)) != 0;
346 cpuid->feature[GMX_CPUID_FEATURE_X86_CX8] = (edx & (1 << 8)) != 0;
347 cpuid->feature[GMX_CPUID_FEATURE_X86_APIC] = (edx & (1 << 9)) != 0;
348 cpuid->feature[GMX_CPUID_FEATURE_X86_CMOV] = (edx & (1 << 15)) != 0;
349 cpuid->feature[GMX_CPUID_FEATURE_X86_CLFSH] = (edx & (1 << 19)) != 0;
350 cpuid->feature[GMX_CPUID_FEATURE_X86_MMX] = (edx & (1 << 23)) != 0;
351 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE2] = (edx & (1 << 26)) != 0;
352 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = (edx & (1 << 28)) != 0;
354 else
356 cpuid->family = -1;
357 cpuid->model = -1;
358 cpuid->stepping = -1;
361 if(max_extfn>=0x80000001)
363 execute_x86cpuid(0x80000001,0,&eax,&ebx,&ecx,&edx);
364 cpuid->feature[GMX_CPUID_FEATURE_X86_LAHF_LM] = (ecx & (1 << 0)) != 0;
365 cpuid->feature[GMX_CPUID_FEATURE_X86_PDPE1GB] = (edx & (1 << 26)) != 0;
366 cpuid->feature[GMX_CPUID_FEATURE_X86_RDTSCP] = (edx & (1 << 27)) != 0;
369 if(max_extfn>=0x80000007)
371 execute_x86cpuid(0x80000007,0,&eax,&ebx,&ecx,&edx);
372 cpuid->feature[GMX_CPUID_FEATURE_X86_NONSTOP_TSC] = (edx & (1 << 8)) != 0;
375 return 0;
378 /* Detection of AMD-specific CPU features */
379 static int
380 cpuid_check_amd_x86(gmx_cpuid_t cpuid)
382 int max_stdfn,max_extfn;
383 unsigned int eax,ebx,ecx,edx;
385 cpuid_check_common_x86(cpuid);
387 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
388 max_stdfn = eax;
390 execute_x86cpuid(0x80000000,0,&eax,&ebx,&ecx,&edx);
391 max_extfn = eax;
393 if(max_extfn>=0x80000001)
395 execute_x86cpuid(0x80000001,0,&eax,&ebx,&ecx,&edx);
397 cpuid->feature[GMX_CPUID_FEATURE_X86_SSE4A] = (ecx & (1 << 6)) != 0;
398 cpuid->feature[GMX_CPUID_FEATURE_X86_MISALIGNSSE] = (ecx & (1 << 7)) != 0;
399 cpuid->feature[GMX_CPUID_FEATURE_X86_XOP] = (ecx & (1 << 11)) != 0;
400 cpuid->feature[GMX_CPUID_FEATURE_X86_FMA4] = (ecx & (1 << 16)) != 0;
403 return 0;
406 /* Detection of Intel-specific CPU features */
407 static int
408 cpuid_check_intel_x86(gmx_cpuid_t cpuid)
410 unsigned int max_stdfn,max_extfn;
411 unsigned int eax,ebx,ecx,edx;
412 unsigned int i;
413 unsigned int max_logical_cores,max_physical_cores;
415 cpuid_check_common_x86(cpuid);
417 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
418 max_stdfn = eax;
420 execute_x86cpuid(0x80000000,0,&eax,&ebx,&ecx,&edx);
421 max_extfn = eax;
423 if(max_stdfn>=1)
425 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
426 cpuid->feature[GMX_CPUID_FEATURE_X86_PDCM] = (ecx & (1 << 15)) != 0;
427 cpuid->feature[GMX_CPUID_FEATURE_X86_PCID] = (ecx & (1 << 17)) != 0;
428 cpuid->feature[GMX_CPUID_FEATURE_X86_X2APIC] = (ecx & (1 << 21)) != 0;
429 cpuid->feature[GMX_CPUID_FEATURE_X86_TDT] = (ecx & (1 << 24)) != 0;
432 if(max_stdfn>=7)
434 execute_x86cpuid(0x7,0,&eax,&ebx,&ecx,&edx);
435 cpuid->feature[GMX_CPUID_FEATURE_X86_AVX2] = (ebx & (1 << 5)) != 0;
438 /* Check whether Hyper-Threading is enabled, not only supported */
439 if(cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] && max_stdfn>=4)
441 execute_x86cpuid(0x1,0,&eax,&ebx,&ecx,&edx);
442 max_logical_cores = (ebx >> 16) & 0x0FF;
443 execute_x86cpuid(0x4,0,&eax,&ebx,&ecx,&edx);
444 max_physical_cores = ((eax >> 26) & 0x3F) + 1;
446 /* Clear HTT flag if we only have 1 logical core per physical */
447 if(max_logical_cores/max_physical_cores < 2)
449 cpuid->feature[GMX_CPUID_FEATURE_X86_HTT] = 0;
452 return 0;
455 /* Try to find the vendor of the current CPU, so we know what specific
456 * detection routine to call.
458 static enum gmx_cpuid_vendor
459 cpuid_check_vendor(void)
461 enum gmx_cpuid_vendor i,vendor;
462 /* Register data used on x86 */
463 unsigned int eax,ebx,ecx,edx;
464 char vendorstring[13];
466 /* Set default first */
467 vendor = GMX_CPUID_VENDOR_UNKNOWN;
469 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
471 memcpy(vendorstring,&ebx,4);
472 memcpy(vendorstring+4,&edx,4);
473 memcpy(vendorstring+8,&ecx,4);
475 vendorstring[12]='\0';
477 for(i=GMX_CPUID_VENDOR_UNKNOWN;i<GMX_CPUID_NVENDORS;i++)
479 if(!strncmp(vendorstring,gmx_cpuid_vendor_string[i],12))
481 vendor = i;
485 return vendor;
492 gmx_cpuid_init (gmx_cpuid_t * pcpuid)
494 gmx_cpuid_t cpuid;
495 int i;
497 cpuid = malloc(sizeof(*cpuid));
499 *pcpuid = cpuid;
501 for(i=0;i<GMX_CPUID_NFEATURES;i++)
503 cpuid->feature[i]=0;
506 cpuid->vendor = cpuid_check_vendor();
508 switch(cpuid->vendor)
510 case GMX_CPUID_VENDOR_INTEL:
511 cpuid_check_intel_x86(cpuid);
512 break;
513 case GMX_CPUID_VENDOR_AMD:
514 cpuid_check_amd_x86(cpuid);
515 break;
516 default:
517 /* Could not find vendor */
518 strncpy(cpuid->brand,"Unknown CPU brand",GMX_CPUID_BRAND_MAXLEN);
519 cpuid->family = 0;
520 cpuid->model = 0;
521 cpuid->stepping = 0;
523 for(i=0;i<GMX_CPUID_NFEATURES;i++)
525 cpuid->feature[i]=0;
527 cpuid->feature[GMX_CPUID_FEATURE_CANNOTDETECT] = 1;
528 break;
531 return 0;
536 void
537 gmx_cpuid_done (gmx_cpuid_t cpuid)
539 free(cpuid);
544 gmx_cpuid_formatstring (gmx_cpuid_t cpuid,
545 char * str,
546 int n)
548 int c;
549 int i;
550 enum gmx_cpuid_feature feature;
552 #ifdef _MSC_VER
553 _snprintf(str,n,
554 "Vendor: %s\n"
555 "Brand: %s\n"
556 "Family: %2d Model: %2d Stepping: %2d\n"
557 "Features:",
558 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
559 gmx_cpuid_brand(cpuid),
560 gmx_cpuid_family(cpuid),gmx_cpuid_model(cpuid),gmx_cpuid_stepping(cpuid));
561 #else
562 snprintf(str,n,
563 "Vendor: %s\n"
564 "Brand: %s\n"
565 "Family: %2d Model: %2d Stepping: %2d\n"
566 "Features:",
567 gmx_cpuid_vendor_string[gmx_cpuid_vendor(cpuid)],
568 gmx_cpuid_brand(cpuid),
569 gmx_cpuid_family(cpuid),gmx_cpuid_model(cpuid),gmx_cpuid_stepping(cpuid));
570 #endif
572 str[n-1] = '\0';
573 c = strlen(str);
574 n -= c;
575 str += c;
577 for(feature=GMX_CPUID_FEATURE_CANNOTDETECT;feature<GMX_CPUID_NFEATURES;feature++)
579 if(gmx_cpuid_feature(cpuid,feature)==1)
581 #ifdef _MSC_VER
582 _snprintf(str,n," %s",gmx_cpuid_feature_string[feature]);
583 #else
584 snprintf(str,n," %s",gmx_cpuid_feature_string[feature]);
585 #endif
586 str[n-1] = '\0';
587 c = strlen(str);
588 n -= c;
589 str += c;
592 #ifdef _MSC_VER
593 _snprintf(str,n,"\n");
594 #else
595 snprintf(str,n,"\n");
596 #endif
597 str[n-1] = '\0';
599 return 0;
604 enum gmx_cpuid_acceleration
605 gmx_cpuid_acceleration_suggest (gmx_cpuid_t cpuid)
607 enum gmx_cpuid_acceleration tmpacc;
609 tmpacc = GMX_CPUID_ACCELERATION_NONE;
611 if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_INTEL)
613 if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_AVX))
615 tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_256;
617 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE4_1))
619 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
621 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE2))
623 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
626 else if(gmx_cpuid_vendor(cpuid)==GMX_CPUID_VENDOR_AMD)
628 if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_AVX))
630 tmpacc = GMX_CPUID_ACCELERATION_X86_AVX_128_FMA;
632 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE4_1))
634 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE4_1;
636 else if(gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_SSE2))
638 tmpacc = GMX_CPUID_ACCELERATION_X86_SSE2;
642 return tmpacc;
648 gmx_cpuid_acceleration_check(gmx_cpuid_t cpuid,
649 FILE * log)
651 int rc;
652 char str[1024];
653 enum gmx_cpuid_acceleration acc;
655 acc = gmx_cpuid_acceleration_suggest(cpuid);
657 rc = (acc != compiled_acc);
659 gmx_cpuid_formatstring(cpuid,str,1023);
660 str[1023] = '\0';
662 if(log!=NULL)
664 fprintf(log,
665 "\nDetecting CPU-specific acceleration.\nPresent hardware specification:\n"
666 "%s"
667 "Acceleration most likely to fit this hardware: %s\n"
668 "Acceleration selected at GROMACS compile time: %s\n\n",
669 str,
670 gmx_cpuid_acceleration_string[acc],
671 gmx_cpuid_acceleration_string[compiled_acc]);
674 if(rc!=0)
676 if(log!=NULL)
678 fprintf(log,"WARNING! Binary not matching hardware - you are likely losing performance.\n\n");
680 printf("\nWARNING! Binary not matching hardware - you are likely losing performance.\n"
681 "Acceleration most likely to fit this hardware: %s\n"
682 "Acceleration selected at GROMACS compile time: %s\n\n",
683 gmx_cpuid_acceleration_string[acc],
684 gmx_cpuid_acceleration_string[compiled_acc]);
687 return rc;
691 enum gmx_cpuid_x86_smt
692 gmx_cpuid_x86_smt(gmx_cpuid_t cpuid)
695 #if (defined HAVE_SCHED_H && defined HAVE_SCHED_SETAFFINITY && defined HAVE_SYSCONF && defined __linux__)
696 int i;
697 int nproc;
698 cpu_set_t cpuset,save_cpuset;
699 int * apic_id;
700 unsigned int eax,ebx,ecx,edx;
701 int core_shift_bits;
702 int smt_found;
704 if( gmx_cpuid_vendor(cpuid)!=GMX_CPUID_VENDOR_INTEL ||
705 gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_HTT)==0)
707 return GMX_CPUID_X86_SMT_DISABLED;
710 /* Check cpuid max standard function */
711 execute_x86cpuid(0x0,0,&eax,&ebx,&ecx,&edx);
713 /* Early CPUs that do not support function 11 do not support SMT either */
714 if(eax<0xB)
716 return GMX_CPUID_X86_SMT_DISABLED;
719 /* If we got here, it is a modern Intel CPU that supports detection, as does our OS */
721 /* How many processors? */
722 nproc = sysconf(_SC_NPROCESSORS_ONLN);
724 apic_id = malloc(sizeof(int)*nproc);
726 sched_getaffinity(0,sizeof(cpu_set_t),&save_cpuset);
728 /* Get x2APIC ID from each hardware thread */
729 CPU_ZERO(&cpuset);
730 for(i=0;i<nproc;i++)
732 CPU_SET(i,&cpuset);
733 sched_setaffinity(0,sizeof(cpu_set_t),&cpuset);
734 execute_x86cpuid(0xB,0,&eax,&ebx,&ecx,&edx);
735 apic_id[i]=edx;
736 CPU_CLR(i,&cpuset);
738 /* Reset affinity to the value it had when calling this routine */
739 sched_setaffinity(0,sizeof(cpu_set_t),&save_cpuset);
741 core_shift_bits = eax & 0x1F;
743 /* Check if there is any other APIC id that is identical to [0], apart from
744 * the hardware thread bit.
746 smt_found = 0;
747 for(i=1;i<nproc && smt_found==0;i++)
749 smt_found = (apic_id[i]>>core_shift_bits == apic_id[0] >> core_shift_bits);
752 free(apic_id);
754 if(smt_found==1)
756 return GMX_CPUID_X86_SMT_ENABLED;
758 else
760 return GMX_CPUID_X86_SMT_DISABLED;
762 #else
763 /* Do the trivial stuff first. If Hyper-Threading isn't even supported it
764 * cannot be enabled, no matter what OS detection we use!
766 if(0==gmx_cpuid_feature(cpuid,GMX_CPUID_FEATURE_X86_HTT))
768 return GMX_CPUID_X86_SMT_DISABLED;
770 else
772 return GMX_CPUID_X86_SMT_CANNOTDETECT;
774 #endif
780 #ifdef GMX_CPUID_STANDALONE
781 /* Stand-alone program to enable queries of CPU features from Cmake.
782 * Note that you need to check inline ASM capabilities before compiling and set
783 * -DGMX_X86_GCC_INLINE_ASM for the cpuid instruction to work...
786 main(int argc, char **argv)
788 gmx_cpuid_t cpuid;
789 enum gmx_cpuid_acceleration acc;
790 int i,cnt;
792 if(argc<2)
794 fprintf(stdout,
795 "Usage:\n\n%s [flags]\n\n"
796 "Available flags:\n"
797 "-vendor Print CPU vendor.\n"
798 "-brand Print CPU brand string.\n"
799 "-family Print CPU family version.\n"
800 "-model Print CPU model version.\n"
801 "-stepping Print CPU stepping version.\n"
802 "-features Print CPU feature flags.\n"
803 "-acceleration Print suggested GROMACS acceleration.\n"
804 ,argv[0]);
805 exit(0);
808 gmx_cpuid_init(&cpuid);
810 if(!strncmp(argv[1],"-vendor",3))
812 printf("%s\n",gmx_cpuid_vendor_string[cpuid->vendor]);
814 else if(!strncmp(argv[1],"-brand",3))
816 printf("%s\n",cpuid->brand);
818 else if(!strncmp(argv[1],"-family",3))
820 printf("%d\n",cpuid->family);
822 else if(!strncmp(argv[1],"-model",3))
824 printf("%d\n",cpuid->model);
826 else if(!strncmp(argv[1],"-stepping",3))
828 printf("%d\n",cpuid->stepping);
830 else if(!strncmp(argv[1],"-features",3))
832 cnt = 0;
833 for(i=0;i<GMX_CPUID_NFEATURES;i++)
835 if(cpuid->feature[i]==1)
837 if(cnt++ > 0)
839 printf(" ");
841 printf("%s",gmx_cpuid_feature_string[i]);
844 printf("\n");
846 else if(!strncmp(argv[1],"-acceleration",3))
848 acc = gmx_cpuid_acceleration_suggest(cpuid);
849 fprintf(stdout,"%s\n",gmx_cpuid_acceleration_string[acc]);
852 gmx_cpuid_done(cpuid);
855 return 0;
858 #endif