1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2020 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
27 const char *host_detect_local_cpu (int argc
, const char **argv
);
29 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
31 #include "common/config/i386/cpuinfo.h"
32 #include "common/config/i386/i386-isas.h"
41 /* Returns command line parameters that describe size and
42 cache line size of the processor caches. */
45 describe_cache (struct cache_desc level1
, struct cache_desc level2
)
47 char size
[100], line
[100], size2
[100];
49 /* At the moment, gcc does not use the information
50 about the associativity of the cache. */
52 snprintf (size
, sizeof (size
),
53 "--param l1-cache-size=%u ", level1
.sizekb
);
54 snprintf (line
, sizeof (line
),
55 "--param l1-cache-line-size=%u ", level1
.line
);
57 snprintf (size2
, sizeof (size2
),
58 "--param l2-cache-size=%u ", level2
.sizekb
);
60 return concat (size
, line
, size2
, NULL
);
63 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
66 detect_l2_cache (struct cache_desc
*level2
)
68 unsigned eax
, ebx
, ecx
, edx
;
71 __cpuid (0x80000006, eax
, ebx
, ecx
, edx
);
73 level2
->sizekb
= (ecx
>> 16) & 0xffff;
74 level2
->line
= ecx
& 0xff;
76 assoc
= (ecx
>> 12) & 0xf;
81 else if (assoc
>= 0xa && assoc
<= 0xc)
82 assoc
= 32 + (assoc
- 0xa) * 16;
83 else if (assoc
>= 0xd && assoc
<= 0xe)
84 assoc
= 96 + (assoc
- 0xd) * 32;
86 level2
->assoc
= assoc
;
89 /* Returns the description of caches for an AMD processor. */
92 detect_caches_amd (unsigned max_ext_level
)
94 unsigned eax
, ebx
, ecx
, edx
;
96 struct cache_desc level1
, level2
= {0, 0, 0};
98 if (max_ext_level
< 0x80000005)
101 __cpuid (0x80000005, eax
, ebx
, ecx
, edx
);
103 level1
.sizekb
= (ecx
>> 24) & 0xff;
104 level1
.assoc
= (ecx
>> 16) & 0xff;
105 level1
.line
= ecx
& 0xff;
107 if (max_ext_level
>= 0x80000006)
108 detect_l2_cache (&level2
);
110 return describe_cache (level1
, level2
);
113 /* Decodes the size, the associativity and the cache line size of
114 L1/L2 caches of an Intel processor. Values are based on
115 "Intel Processor Identification and the CPUID Instruction"
116 [Application Note 485], revision -032, December 2007. */
119 decode_caches_intel (unsigned reg
, bool xeon_mp
,
120 struct cache_desc
*level1
, struct cache_desc
*level2
)
124 for (i
= 24; i
>= 0; i
-= 8)
125 switch ((reg
>> i
) & 0xff)
128 level1
->sizekb
= 8; level1
->assoc
= 2; level1
->line
= 32;
131 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 32;
134 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 64;
137 level1
->sizekb
= 24; level1
->assoc
= 6; level1
->line
= 64;
140 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 64;
143 level2
->sizekb
= 1024; level2
->assoc
= 16; level2
->line
= 64;
146 level1
->sizekb
= 32; level1
->assoc
= 8; level1
->line
= 64;
149 level2
->sizekb
= 128; level2
->assoc
= 4; level2
->line
= 64;
152 level2
->sizekb
= 192; level2
->assoc
= 6; level2
->line
= 64;
155 level2
->sizekb
= 128; level2
->assoc
= 2; level2
->line
= 64;
158 level2
->sizekb
= 256; level2
->assoc
= 4; level2
->line
= 64;
161 level2
->sizekb
= 384; level2
->assoc
= 6; level2
->line
= 64;
164 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 64;
167 level2
->sizekb
= 128; level2
->assoc
= 4; level2
->line
= 32;
170 level2
->sizekb
= 256; level2
->assoc
= 4; level2
->line
= 32;
173 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 32;
176 level2
->sizekb
= 1024; level2
->assoc
= 4; level2
->line
= 32;
179 level2
->sizekb
= 2048; level2
->assoc
= 4; level2
->line
= 32;
182 level2
->sizekb
= 3072; level2
->assoc
= 12; level2
->line
= 64;
187 level2
->sizekb
= 4096; level2
->assoc
= 16; level2
->line
= 64;
190 level2
->sizekb
= 6144; level2
->assoc
= 24; level2
->line
= 64;
193 level1
->sizekb
= 16; level1
->assoc
= 8; level1
->line
= 64;
196 level1
->sizekb
= 8; level1
->assoc
= 4; level1
->line
= 64;
199 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 64;
202 level1
->sizekb
= 32; level1
->assoc
= 4; level1
->line
= 64;
205 level2
->sizekb
= 1024; level2
->assoc
= 4; level2
->line
= 64;
208 level2
->sizekb
= 128; level2
->assoc
= 8; level2
->line
= 64;
211 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 64;
214 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 64;
217 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 64;
220 level2
->sizekb
= 2048; level2
->assoc
= 8; level2
->line
= 64;
223 level2
->sizekb
= 512; level2
->assoc
= 2; level2
->line
= 64;
226 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 64;
229 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 32;
232 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 32;
235 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 32;
238 level2
->sizekb
= 2048; level2
->assoc
= 8; level2
->line
= 32;
241 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 64;
244 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 64;
251 /* Detect cache parameters using CPUID function 2. */
254 detect_caches_cpuid2 (bool xeon_mp
,
255 struct cache_desc
*level1
, struct cache_desc
*level2
)
260 __cpuid (2, regs
[0], regs
[1], regs
[2], regs
[3]);
262 nreps
= regs
[0] & 0x0f;
267 for (i
= 0; i
< 4; i
++)
268 if (regs
[i
] && !((regs
[i
] >> 31) & 1))
269 decode_caches_intel (regs
[i
], xeon_mp
, level1
, level2
);
272 __cpuid (2, regs
[0], regs
[1], regs
[2], regs
[3]);
276 /* Detect cache parameters using CPUID function 4. This
277 method doesn't require hardcoded tables. */
288 detect_caches_cpuid4 (struct cache_desc
*level1
, struct cache_desc
*level2
,
289 struct cache_desc
*level3
)
291 struct cache_desc
*cache
;
293 unsigned eax
, ebx
, ecx
, edx
;
296 for (count
= 0;; count
++)
298 __cpuid_count(4, count
, eax
, ebx
, ecx
, edx
);
306 switch ((eax
>> 5) & 0x07)
323 unsigned sets
= ecx
+ 1;
324 unsigned part
= ((ebx
>> 12) & 0x03ff) + 1;
326 cache
->assoc
= ((ebx
>> 22) & 0x03ff) + 1;
327 cache
->line
= (ebx
& 0x0fff) + 1;
329 cache
->sizekb
= (cache
->assoc
* part
330 * cache
->line
* sets
) / 1024;
339 /* Returns the description of caches for an Intel processor. */
342 detect_caches_intel (bool xeon_mp
, unsigned max_level
,
343 unsigned max_ext_level
, unsigned *l2sizekb
)
345 struct cache_desc level1
= {0, 0, 0}, level2
= {0, 0, 0}, level3
= {0, 0, 0};
348 detect_caches_cpuid4 (&level1
, &level2
, &level3
);
349 else if (max_level
>= 2)
350 detect_caches_cpuid2 (xeon_mp
, &level1
, &level2
);
354 if (level1
.sizekb
== 0)
357 /* Let the L3 replace the L2. This assumes inclusive caches
358 and single threaded program for now. */
362 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
363 method if other methods fail to provide L2 cache parameters. */
364 if (level2
.sizekb
== 0 && max_ext_level
>= 0x80000006)
365 detect_l2_cache (&level2
);
367 *l2sizekb
= level2
.sizekb
;
369 return describe_cache (level1
, level2
);
372 /* This will be called by the spec parser in gcc.c when it sees
373 a %:local_cpu_detect(args) construct. Currently it will be called
374 with either "arch" or "tune" as argument depending on if -march=native
375 or -mtune=native is to be substituted.
377 It returns a string containing new command line parameters to be
378 put at the place of the above two options, depending on what CPU
379 this is executed. E.g. "-march=k8" on an AMD64 machine
382 ARGC and ARGV are set depending on the actual arguments given
385 const char *host_detect_local_cpu (int argc
, const char **argv
)
387 enum processor_type processor
= PROCESSOR_I386
;
388 const char *cpu
= "i386";
390 const char *cache
= "";
391 const char *options
= "";
393 unsigned int ebx
, ecx
, edx
;
395 unsigned int max_level
, ext_level
;
398 unsigned int model
, family
;
402 unsigned int l2sizekb
= 0;
407 arch
= !strcmp (argv
[0], "arch");
409 if (!arch
&& strcmp (argv
[0], "tune"))
412 struct __processor_model cpu_model
= { };
413 struct __processor_model2 cpu_model2
= { };
414 unsigned int cpu_features2
[SIZE_OF_CPU_FEATURES
] = { };
416 if (cpu_indicator_init (&cpu_model
, &cpu_model2
, cpu_features2
) != 0)
419 vendor
= cpu_model
.__cpu_vendor
;
420 family
= cpu_model2
.__cpu_family
;
421 model
= cpu_model2
.__cpu_model
;
422 max_level
= cpu_model2
.__cpu_max_level
;
423 ext_level
= cpu_model2
.__cpu_ext_level
;
427 if (vendor
== VENDOR_AMD
428 || vendor
== VENDOR_CENTAUR
429 || vendor
== VENDOR_CYRIX
430 || vendor
== VENDOR_NSC
)
431 cache
= detect_caches_amd (ext_level
);
432 else if (vendor
== VENDOR_INTEL
)
434 bool xeon_mp
= (family
== 15 && model
== 6);
435 cache
= detect_caches_intel (xeon_mp
, max_level
,
436 ext_level
, &l2sizekb
);
440 /* Extended features */
441 #define has_feature(f) \
442 has_cpu_feature (&cpu_model, cpu_features2, f)
444 if (vendor
== VENDOR_AMD
)
448 /* Detect geode processor by its processor signature. */
449 if (ext_level
>= 0x80000002)
450 __cpuid (0x80000002, name
, ebx
, ecx
, edx
);
454 if (name
== signature_NSC_ebx
)
455 processor
= PROCESSOR_GEODE
;
456 else if (has_feature (FEATURE_MOVBE
) && family
== 22)
457 processor
= PROCESSOR_BTVER2
;
458 else if (has_feature (FEATURE_CLWB
))
459 processor
= PROCESSOR_ZNVER2
;
460 else if (has_feature (FEATURE_CLZERO
))
461 processor
= PROCESSOR_ZNVER1
;
462 else if (has_feature (FEATURE_AVX2
))
463 processor
= PROCESSOR_BDVER4
;
464 else if (has_feature (FEATURE_XSAVEOPT
))
465 processor
= PROCESSOR_BDVER3
;
466 else if (has_feature (FEATURE_BMI
))
467 processor
= PROCESSOR_BDVER2
;
468 else if (has_feature (FEATURE_XOP
))
469 processor
= PROCESSOR_BDVER1
;
470 else if (has_feature (FEATURE_SSE4_A
)
471 && has_feature (FEATURE_SSSE3
))
472 processor
= PROCESSOR_BTVER1
;
473 else if (has_feature (FEATURE_SSE4_A
))
474 processor
= PROCESSOR_AMDFAM10
;
475 else if (has_feature (FEATURE_SSE2
)
476 || has_feature (FEATURE_LM
))
477 processor
= PROCESSOR_K8
;
478 else if (has_feature (FEATURE_3DNOWP
) && family
== 6)
479 processor
= PROCESSOR_ATHLON
;
480 else if (has_feature (FEATURE_MMX
))
481 processor
= PROCESSOR_K6
;
483 processor
= PROCESSOR_PENTIUM
;
485 else if (vendor
== VENDOR_CENTAUR
)
487 processor
= PROCESSOR_GENERIC
;
492 /* We have no idea. */
496 if (has_feature (FEATURE_3DNOW
)
497 || has_feature (FEATURE_MMX
))
498 processor
= PROCESSOR_I486
;
502 if (has_feature (FEATURE_LM
))
503 processor
= PROCESSOR_K8
;
505 processor
= PROCESSOR_PENTIUMPRO
;
507 processor
= PROCESSOR_I486
;
515 processor
= PROCESSOR_I486
;
518 processor
= PROCESSOR_PENTIUM
;
521 processor
= PROCESSOR_PENTIUMPRO
;
524 processor
= PROCESSOR_PENTIUM4
;
527 /* We have no idea. */
528 processor
= PROCESSOR_GENERIC
;
538 if (arch
&& vendor
== VENDOR_CENTAUR
)
542 else if (has_feature (FEATURE_3DNOW
))
545 /* Assume WinChip C6. */
551 case PROCESSOR_PENTIUM
:
552 if (arch
&& has_feature (FEATURE_MMX
))
557 case PROCESSOR_PENTIUMPRO
:
558 cpu
= get_intel_cpu (&cpu_model
, &cpu_model2
, cpu_features2
);
563 /* This is unknown family 0x6 CPU. */
564 if (has_feature (FEATURE_AVX
))
566 if (has_feature (FEATURE_AVX512VP2INTERSECT
))
568 if (has_feature (FEATURE_TSXLDTRK
))
569 /* Assume Sapphire Rapids. */
570 cpu
= "sapphirerapids";
572 /* Assume Tiger Lake */
575 /* Assume Cooper Lake */
576 else if (has_feature (FEATURE_AVX512BF16
))
578 /* Assume Ice Lake Server. */
579 else if (has_feature (FEATURE_WBNOINVD
))
580 cpu
= "icelake-server";
581 /* Assume Ice Lake. */
582 else if (has_feature (FEATURE_AVX512BITALG
))
583 cpu
= "icelake-client";
584 /* Assume Cannon Lake. */
585 else if (has_feature (FEATURE_AVX512VBMI
))
587 /* Assume Knights Mill. */
588 else if (has_feature (FEATURE_AVX5124VNNIW
))
590 /* Assume Knights Landing. */
591 else if (has_feature (FEATURE_AVX512ER
))
593 /* Assume Skylake with AVX-512. */
594 else if (has_feature (FEATURE_AVX512F
))
595 cpu
= "skylake-avx512";
596 /* Assume Alder Lake */
597 else if (has_feature (FEATURE_SERIALIZE
))
599 /* Assume Skylake. */
600 else if (has_feature (FEATURE_CLFLUSHOPT
))
602 /* Assume Broadwell. */
603 else if (has_feature (FEATURE_ADX
))
605 else if (has_feature (FEATURE_AVX2
))
606 /* Assume Haswell. */
609 /* Assume Sandy Bridge. */
612 else if (has_feature (FEATURE_SSE4_2
))
614 if (has_feature (FEATURE_GFNI
))
615 /* Assume Tremont. */
617 else if (has_feature (FEATURE_SGX
))
618 /* Assume Goldmont Plus. */
619 cpu
= "goldmont-plus";
620 else if (has_feature (FEATURE_XSAVE
))
621 /* Assume Goldmont. */
623 else if (has_feature (FEATURE_MOVBE
))
624 /* Assume Silvermont. */
627 /* Assume Nehalem. */
630 else if (has_feature (FEATURE_SSSE3
))
632 if (has_feature (FEATURE_MOVBE
))
633 /* Assume Bonnell. */
639 else if (has_feature (FEATURE_LM
))
640 /* Perhaps some emulator? Assume x86-64, otherwise gcc
641 -march=native would be unusable for 64-bit compilations,
642 as all the CPUs below are 32-bit only. */
644 else if (has_feature (FEATURE_SSE3
))
646 if (vendor
== VENDOR_CENTAUR
)
647 /* C7 / Eden "Esther" */
650 /* It is Core Duo. */
653 else if (has_feature (FEATURE_SSE2
))
654 /* It is Pentium M. */
656 else if (has_feature (FEATURE_SSE
))
658 if (vendor
== VENDOR_CENTAUR
)
661 /* Eden "Nehemiah" */
667 /* It is Pentium III. */
670 else if (has_feature (FEATURE_MMX
))
671 /* It is Pentium II. */
674 /* Default to Pentium Pro. */
678 /* For -mtune, we default to -mtune=generic. */
682 case PROCESSOR_PENTIUM4
:
683 if (has_feature (FEATURE_SSE3
))
685 if (has_feature (FEATURE_LM
))
693 case PROCESSOR_GEODE
:
697 if (arch
&& has_feature (FEATURE_3DNOW
))
702 case PROCESSOR_ATHLON
:
703 if (arch
&& has_feature (FEATURE_SSE
))
711 if (vendor
== VENDOR_CENTAUR
)
713 if (has_feature (FEATURE_SSE4_1
))
714 /* Nano 3000 | Nano dual / quad core | Eden X4 */
716 else if (has_feature (FEATURE_SSSE3
))
717 /* Nano 1000 | Nano 2000 */
719 else if (has_feature (FEATURE_SSE3
))
726 else if (has_feature (FEATURE_SSE3
))
732 /* For -mtune, we default to -mtune=k8 */
735 case PROCESSOR_AMDFAM10
:
738 case PROCESSOR_BDVER1
:
741 case PROCESSOR_BDVER2
:
744 case PROCESSOR_BDVER3
:
747 case PROCESSOR_BDVER4
:
750 case PROCESSOR_ZNVER1
:
753 case PROCESSOR_ZNVER2
:
756 case PROCESSOR_BTVER1
:
759 case PROCESSOR_BTVER2
:
764 /* Use something reasonable. */
767 if (has_feature (FEATURE_SSSE3
))
769 else if (has_feature (FEATURE_SSE3
))
771 if (has_feature (FEATURE_LM
))
776 else if (has_feature (FEATURE_LM
))
777 /* Perhaps some emulator? Assume x86-64, otherwise gcc
778 -march=native would be unusable for 64-bit compilations,
779 as all the CPUs below are 32-bit only. */
781 else if (has_feature (FEATURE_SSE2
))
783 else if (has_feature (FEATURE_CMOV
))
785 else if (has_feature (FEATURE_MMX
))
787 else if (has_feature (FEATURE_CMPXCHG8B
))
797 const char *const neg_option
= " -mno-";
798 for (i
= 0; i
< ARRAY_SIZE (isa_names_table
); i
++)
799 if (isa_names_table
[i
].option
)
801 if (has_feature (isa_names_table
[i
].feature
))
802 options
= concat (options
, " ",
803 isa_names_table
[i
].option
, NULL
);
805 options
= concat (options
, neg_option
,
806 isa_names_table
[i
].option
+ 2, NULL
);
811 return concat (cache
, "-m", argv
[0], "=", cpu
, options
, NULL
);
815 /* If we are compiling with GCC where %EBX register is fixed, then the
816 driver will just ignore -march and -mtune "native" target and will leave
817 to the newly built compiler to generate code for its default target. */
819 const char *host_detect_local_cpu (int, const char **)
823 #endif /* __GNUC__ */