1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
24 #include "coretypes.h"
27 const char *host_detect_local_cpu (int argc
, const char **argv
);
29 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
39 /* Returns command line parameters that describe size and
40 cache line size of the processor caches. */
43 describe_cache (struct cache_desc level1
, struct cache_desc level2
)
45 char size
[100], line
[100], size2
[100];
47 /* At the moment, gcc does not use the information
48 about the associativity of the cache. */
50 snprintf (size
, sizeof (size
),
51 "--param l1-cache-size=%u ", level1
.sizekb
);
52 snprintf (line
, sizeof (line
),
53 "--param l1-cache-line-size=%u ", level1
.line
);
55 snprintf (size2
, sizeof (size2
),
56 "--param l2-cache-size=%u ", level2
.sizekb
);
58 return concat (size
, line
, size2
, NULL
);
61 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
64 detect_l2_cache (struct cache_desc
*level2
)
66 unsigned eax
, ebx
, ecx
, edx
;
69 __cpuid (0x80000006, eax
, ebx
, ecx
, edx
);
71 level2
->sizekb
= (ecx
>> 16) & 0xffff;
72 level2
->line
= ecx
& 0xff;
74 assoc
= (ecx
>> 12) & 0xf;
79 else if (assoc
>= 0xa && assoc
<= 0xc)
80 assoc
= 32 + (assoc
- 0xa) * 16;
81 else if (assoc
>= 0xd && assoc
<= 0xe)
82 assoc
= 96 + (assoc
- 0xd) * 32;
84 level2
->assoc
= assoc
;
87 /* Returns the description of caches for an AMD processor. */
90 detect_caches_amd (unsigned max_ext_level
)
92 unsigned eax
, ebx
, ecx
, edx
;
94 struct cache_desc level1
, level2
= {0, 0, 0};
96 if (max_ext_level
< 0x80000005)
99 __cpuid (0x80000005, eax
, ebx
, ecx
, edx
);
101 level1
.sizekb
= (ecx
>> 24) & 0xff;
102 level1
.assoc
= (ecx
>> 16) & 0xff;
103 level1
.line
= ecx
& 0xff;
105 if (max_ext_level
>= 0x80000006)
106 detect_l2_cache (&level2
);
108 return describe_cache (level1
, level2
);
111 /* Decodes the size, the associativity and the cache line size of
112 L1/L2 caches of an Intel processor. Values are based on
113 "Intel Processor Identification and the CPUID Instruction"
114 [Application Note 485], revision -032, December 2007. */
117 decode_caches_intel (unsigned reg
, bool xeon_mp
,
118 struct cache_desc
*level1
, struct cache_desc
*level2
)
122 for (i
= 24; i
>= 0; i
-= 8)
123 switch ((reg
>> i
) & 0xff)
126 level1
->sizekb
= 8; level1
->assoc
= 2; level1
->line
= 32;
129 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 32;
132 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 64;
135 level1
->sizekb
= 24; level1
->assoc
= 6; level1
->line
= 64;
138 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 64;
141 level2
->sizekb
= 1024; level2
->assoc
= 16; level2
->line
= 64;
144 level1
->sizekb
= 32; level1
->assoc
= 8; level1
->line
= 64;
147 level2
->sizekb
= 128; level2
->assoc
= 4; level2
->line
= 64;
150 level2
->sizekb
= 192; level2
->assoc
= 6; level2
->line
= 64;
153 level2
->sizekb
= 128; level2
->assoc
= 2; level2
->line
= 64;
156 level2
->sizekb
= 256; level2
->assoc
= 4; level2
->line
= 64;
159 level2
->sizekb
= 384; level2
->assoc
= 6; level2
->line
= 64;
162 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 64;
165 level2
->sizekb
= 128; level2
->assoc
= 4; level2
->line
= 32;
168 level2
->sizekb
= 256; level2
->assoc
= 4; level2
->line
= 32;
171 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 32;
174 level2
->sizekb
= 1024; level2
->assoc
= 4; level2
->line
= 32;
177 level2
->sizekb
= 2048; level2
->assoc
= 4; level2
->line
= 32;
180 level2
->sizekb
= 3072; level2
->assoc
= 12; level2
->line
= 64;
185 level2
->sizekb
= 4096; level2
->assoc
= 16; level2
->line
= 64;
188 level2
->sizekb
= 6144; level2
->assoc
= 24; level2
->line
= 64;
191 level1
->sizekb
= 16; level1
->assoc
= 8; level1
->line
= 64;
194 level1
->sizekb
= 8; level1
->assoc
= 4; level1
->line
= 64;
197 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 64;
200 level1
->sizekb
= 32; level1
->assoc
= 4; level1
->line
= 64;
203 level2
->sizekb
= 1024; level2
->assoc
= 4; level2
->line
= 64;
206 level2
->sizekb
= 128; level2
->assoc
= 8; level2
->line
= 64;
209 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 64;
212 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 64;
215 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 64;
218 level2
->sizekb
= 2048; level2
->assoc
= 8; level2
->line
= 64;
221 level2
->sizekb
= 512; level2
->assoc
= 2; level2
->line
= 64;
224 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 64;
227 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 32;
230 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 32;
233 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 32;
236 level2
->sizekb
= 2048; level2
->assoc
= 8; level2
->line
= 32;
239 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 64;
242 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 64;
249 /* Detect cache parameters using CPUID function 2. */
252 detect_caches_cpuid2 (bool xeon_mp
,
253 struct cache_desc
*level1
, struct cache_desc
*level2
)
258 __cpuid (2, regs
[0], regs
[1], regs
[2], regs
[3]);
260 nreps
= regs
[0] & 0x0f;
265 for (i
= 0; i
< 4; i
++)
266 if (regs
[i
] && !((regs
[i
] >> 31) & 1))
267 decode_caches_intel (regs
[i
], xeon_mp
, level1
, level2
);
270 __cpuid (2, regs
[0], regs
[1], regs
[2], regs
[3]);
274 /* Detect cache parameters using CPUID function 4. This
275 method doesn't require hardcoded tables. */
286 detect_caches_cpuid4 (struct cache_desc
*level1
, struct cache_desc
*level2
,
287 struct cache_desc
*level3
)
289 struct cache_desc
*cache
;
291 unsigned eax
, ebx
, ecx
, edx
;
294 for (count
= 0;; count
++)
296 __cpuid_count(4, count
, eax
, ebx
, ecx
, edx
);
304 switch ((eax
>> 5) & 0x07)
321 unsigned sets
= ecx
+ 1;
322 unsigned part
= ((ebx
>> 12) & 0x03ff) + 1;
324 cache
->assoc
= ((ebx
>> 22) & 0x03ff) + 1;
325 cache
->line
= (ebx
& 0x0fff) + 1;
327 cache
->sizekb
= (cache
->assoc
* part
328 * cache
->line
* sets
) / 1024;
337 /* Returns the description of caches for an Intel processor. */
340 detect_caches_intel (bool xeon_mp
, unsigned max_level
,
341 unsigned max_ext_level
, unsigned *l2sizekb
)
343 struct cache_desc level1
= {0, 0, 0}, level2
= {0, 0, 0}, level3
= {0, 0, 0};
346 detect_caches_cpuid4 (&level1
, &level2
, &level3
);
347 else if (max_level
>= 2)
348 detect_caches_cpuid2 (xeon_mp
, &level1
, &level2
);
352 if (level1
.sizekb
== 0)
355 /* Let the L3 replace the L2. This assumes inclusive caches
356 and single threaded program for now. */
360 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
361 method if other methods fail to provide L2 cache parameters. */
362 if (level2
.sizekb
== 0 && max_ext_level
>= 0x80000006)
363 detect_l2_cache (&level2
);
365 *l2sizekb
= level2
.sizekb
;
367 return describe_cache (level1
, level2
);
370 /* This will be called by the spec parser in gcc.c when it sees
371 a %:local_cpu_detect(args) construct. Currently it will be called
372 with either "arch" or "tune" as argument depending on if -march=native
373 or -mtune=native is to be substituted.
375 It returns a string containing new command line parameters to be
376 put at the place of the above two options, depending on what CPU
377 this is executed. E.g. "-march=k8" on an AMD64 machine
380 ARGC and ARGV are set depending on the actual arguments given
383 const char *host_detect_local_cpu (int argc
, const char **argv
)
385 enum processor_type processor
= PROCESSOR_I386
;
386 const char *cpu
= "i386";
388 const char *cache
= "";
389 const char *options
= "";
391 unsigned int eax
, ebx
, ecx
, edx
;
393 unsigned int max_level
, ext_level
;
396 unsigned int model
, family
;
398 unsigned int has_sse3
, has_ssse3
, has_cmpxchg16b
;
399 unsigned int has_cmpxchg8b
, has_cmov
, has_mmx
, has_sse
, has_sse2
;
401 /* Extended features */
402 unsigned int has_lahf_lm
= 0, has_sse4a
= 0;
403 unsigned int has_longmode
= 0, has_3dnowp
= 0, has_3dnow
= 0;
404 unsigned int has_movbe
= 0, has_sse4_1
= 0, has_sse4_2
= 0;
405 unsigned int has_popcnt
= 0, has_aes
= 0, has_avx
= 0, has_avx2
= 0;
406 unsigned int has_pclmul
= 0, has_abm
= 0, has_lwp
= 0;
407 unsigned int has_fma
= 0, has_fma4
= 0, has_xop
= 0;
408 unsigned int has_bmi
= 0, has_bmi2
= 0, has_tbm
= 0, has_lzcnt
= 0;
409 unsigned int has_hle
= 0, has_rtm
= 0, has_sgx
= 0;
410 unsigned int has_pconfig
= 0, has_wbnoinvd
= 0;
411 unsigned int has_rdrnd
= 0, has_f16c
= 0, has_fsgsbase
= 0;
412 unsigned int has_rdseed
= 0, has_prfchw
= 0, has_adx
= 0;
413 unsigned int has_osxsave
= 0, has_fxsr
= 0, has_xsave
= 0, has_xsaveopt
= 0;
414 unsigned int has_avx512er
= 0, has_avx512pf
= 0, has_avx512cd
= 0;
415 unsigned int has_avx512f
= 0, has_sha
= 0, has_prefetchwt1
= 0;
416 unsigned int has_clflushopt
= 0, has_xsavec
= 0, has_xsaves
= 0;
417 unsigned int has_avx512dq
= 0, has_avx512bw
= 0, has_avx512vl
= 0;
418 unsigned int has_avx512vbmi
= 0, has_avx512ifma
= 0, has_clwb
= 0;
419 unsigned int has_mwaitx
= 0, has_clzero
= 0, has_pku
= 0, has_rdpid
= 0;
420 unsigned int has_avx5124fmaps
= 0, has_avx5124vnniw
= 0;
421 unsigned int has_gfni
= 0, has_avx512vbmi2
= 0;
422 unsigned int has_avx512bitalg
= 0;
423 unsigned int has_shstk
= 0;
424 unsigned int has_avx512vnni
= 0, has_vaes
= 0;
425 unsigned int has_vpclmulqdq
= 0;
426 unsigned int has_movdiri
= 0, has_movdir64b
= 0;
427 unsigned int has_waitpkg
= 0;
428 unsigned int has_cldemote
= 0;
432 unsigned int l2sizekb
= 0;
437 arch
= !strcmp (argv
[0], "arch");
439 if (!arch
&& strcmp (argv
[0], "tune"))
442 max_level
= __get_cpuid_max (0, &vendor
);
446 __cpuid (1, eax
, ebx
, ecx
, edx
);
448 model
= (eax
>> 4) & 0x0f;
449 family
= (eax
>> 8) & 0x0f;
450 if (vendor
== signature_INTEL_ebx
451 || vendor
== signature_AMD_ebx
)
453 unsigned int extended_model
, extended_family
;
455 extended_model
= (eax
>> 12) & 0xf0;
456 extended_family
= (eax
>> 20) & 0xff;
459 family
+= extended_family
;
460 model
+= extended_model
;
462 else if (family
== 0x06)
463 model
+= extended_model
;
466 has_sse3
= ecx
& bit_SSE3
;
467 has_ssse3
= ecx
& bit_SSSE3
;
468 has_sse4_1
= ecx
& bit_SSE4_1
;
469 has_sse4_2
= ecx
& bit_SSE4_2
;
470 has_avx
= ecx
& bit_AVX
;
471 has_osxsave
= ecx
& bit_OSXSAVE
;
472 has_cmpxchg16b
= ecx
& bit_CMPXCHG16B
;
473 has_movbe
= ecx
& bit_MOVBE
;
474 has_popcnt
= ecx
& bit_POPCNT
;
475 has_aes
= ecx
& bit_AES
;
476 has_pclmul
= ecx
& bit_PCLMUL
;
477 has_fma
= ecx
& bit_FMA
;
478 has_f16c
= ecx
& bit_F16C
;
479 has_rdrnd
= ecx
& bit_RDRND
;
480 has_xsave
= ecx
& bit_XSAVE
;
482 has_cmpxchg8b
= edx
& bit_CMPXCHG8B
;
483 has_cmov
= edx
& bit_CMOV
;
484 has_mmx
= edx
& bit_MMX
;
485 has_fxsr
= edx
& bit_FXSAVE
;
486 has_sse
= edx
& bit_SSE
;
487 has_sse2
= edx
& bit_SSE2
;
491 __cpuid_count (7, 0, eax
, ebx
, ecx
, edx
);
493 has_bmi
= ebx
& bit_BMI
;
494 has_sgx
= ebx
& bit_SGX
;
495 has_hle
= ebx
& bit_HLE
;
496 has_rtm
= ebx
& bit_RTM
;
497 has_avx2
= ebx
& bit_AVX2
;
498 has_bmi2
= ebx
& bit_BMI2
;
499 has_fsgsbase
= ebx
& bit_FSGSBASE
;
500 has_rdseed
= ebx
& bit_RDSEED
;
501 has_adx
= ebx
& bit_ADX
;
502 has_avx512f
= ebx
& bit_AVX512F
;
503 has_avx512er
= ebx
& bit_AVX512ER
;
504 has_avx512pf
= ebx
& bit_AVX512PF
;
505 has_avx512cd
= ebx
& bit_AVX512CD
;
506 has_sha
= ebx
& bit_SHA
;
507 has_clflushopt
= ebx
& bit_CLFLUSHOPT
;
508 has_clwb
= ebx
& bit_CLWB
;
509 has_avx512dq
= ebx
& bit_AVX512DQ
;
510 has_avx512bw
= ebx
& bit_AVX512BW
;
511 has_avx512vl
= ebx
& bit_AVX512VL
;
512 has_avx512ifma
= ebx
& bit_AVX512IFMA
;
514 has_prefetchwt1
= ecx
& bit_PREFETCHWT1
;
515 has_avx512vbmi
= ecx
& bit_AVX512VBMI
;
516 has_pku
= ecx
& bit_OSPKE
;
517 has_avx512vbmi2
= ecx
& bit_AVX512VBMI2
;
518 has_avx512vnni
= ecx
& bit_AVX512VNNI
;
519 has_rdpid
= ecx
& bit_RDPID
;
520 has_gfni
= ecx
& bit_GFNI
;
521 has_vaes
= ecx
& bit_VAES
;
522 has_vpclmulqdq
= ecx
& bit_VPCLMULQDQ
;
523 has_avx512bitalg
= ecx
& bit_AVX512BITALG
;
524 has_movdiri
= ecx
& bit_MOVDIRI
;
525 has_movdir64b
= ecx
& bit_MOVDIR64B
;
526 has_cldemote
= ecx
& bit_CLDEMOTE
;
528 has_avx5124vnniw
= edx
& bit_AVX5124VNNIW
;
529 has_avx5124fmaps
= edx
& bit_AVX5124FMAPS
;
531 has_shstk
= ecx
& bit_SHSTK
;
532 has_pconfig
= edx
& bit_PCONFIG
;
533 has_waitpkg
= ecx
& bit_WAITPKG
;
538 __cpuid_count (13, 1, eax
, ebx
, ecx
, edx
);
540 has_xsaveopt
= eax
& bit_XSAVEOPT
;
541 has_xsavec
= eax
& bit_XSAVEC
;
542 has_xsaves
= eax
& bit_XSAVES
;
545 /* Check cpuid level of extended features. */
546 __cpuid (0x80000000, ext_level
, ebx
, ecx
, edx
);
548 if (ext_level
>= 0x80000001)
550 __cpuid (0x80000001, eax
, ebx
, ecx
, edx
);
552 has_lahf_lm
= ecx
& bit_LAHF_LM
;
553 has_sse4a
= ecx
& bit_SSE4a
;
554 has_abm
= ecx
& bit_ABM
;
555 has_lwp
= ecx
& bit_LWP
;
556 has_fma4
= ecx
& bit_FMA4
;
557 has_xop
= ecx
& bit_XOP
;
558 has_tbm
= ecx
& bit_TBM
;
559 has_lzcnt
= ecx
& bit_LZCNT
;
560 has_prfchw
= ecx
& bit_PRFCHW
;
562 has_longmode
= edx
& bit_LM
;
563 has_3dnowp
= edx
& bit_3DNOWP
;
564 has_3dnow
= edx
& bit_3DNOW
;
565 has_mwaitx
= ecx
& bit_MWAITX
;
568 if (ext_level
>= 0x80000008)
570 __cpuid (0x80000008, eax
, ebx
, ecx
, edx
);
571 has_clzero
= ebx
& bit_CLZERO
;
572 has_wbnoinvd
= ebx
& bit_WBNOINVD
;
575 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
576 #define XCR_XFEATURE_ENABLED_MASK 0x0
577 #define XSTATE_FP 0x1
578 #define XSTATE_SSE 0x2
579 #define XSTATE_YMM 0x4
580 #define XSTATE_OPMASK 0x20
581 #define XSTATE_ZMM 0x40
582 #define XSTATE_HI_ZMM 0x80
584 #define XCR_AVX_ENABLED_MASK \
585 (XSTATE_SSE | XSTATE_YMM)
586 #define XCR_AVX512F_ENABLED_MASK \
587 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
590 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
591 : "=a" (eax
), "=d" (edx
)
592 : "c" (XCR_XFEATURE_ENABLED_MASK
));
596 /* Check if AVX registers are supported. */
597 if ((eax
& XCR_AVX_ENABLED_MASK
) != XCR_AVX_ENABLED_MASK
)
611 /* Check if AVX512F registers are supported. */
612 if ((eax
& XCR_AVX512F_ENABLED_MASK
) != XCR_AVX512F_ENABLED_MASK
)
625 if (vendor
== signature_AMD_ebx
626 || vendor
== signature_CENTAUR_ebx
627 || vendor
== signature_CYRIX_ebx
628 || vendor
== signature_NSC_ebx
)
629 cache
= detect_caches_amd (ext_level
);
630 else if (vendor
== signature_INTEL_ebx
)
632 bool xeon_mp
= (family
== 15 && model
== 6);
633 cache
= detect_caches_intel (xeon_mp
, max_level
,
634 ext_level
, &l2sizekb
);
638 if (vendor
== signature_AMD_ebx
)
642 /* Detect geode processor by its processor signature. */
643 if (ext_level
>= 0x80000002)
644 __cpuid (0x80000002, name
, ebx
, ecx
, edx
);
648 if (name
== signature_NSC_ebx
)
649 processor
= PROCESSOR_GEODE
;
650 else if (has_movbe
&& family
== 22)
651 processor
= PROCESSOR_BTVER2
;
653 processor
= PROCESSOR_ZNVER1
;
655 processor
= PROCESSOR_BDVER4
;
656 else if (has_xsaveopt
)
657 processor
= PROCESSOR_BDVER3
;
659 processor
= PROCESSOR_BDVER2
;
661 processor
= PROCESSOR_BDVER1
;
662 else if (has_sse4a
&& has_ssse3
)
663 processor
= PROCESSOR_BTVER1
;
665 processor
= PROCESSOR_AMDFAM10
;
666 else if (has_sse2
|| has_longmode
)
667 processor
= PROCESSOR_K8
;
668 else if (has_3dnowp
&& family
== 6)
669 processor
= PROCESSOR_ATHLON
;
671 processor
= PROCESSOR_K6
;
673 processor
= PROCESSOR_PENTIUM
;
675 else if (vendor
== signature_CENTAUR_ebx
)
677 processor
= PROCESSOR_GENERIC
;
682 /* We have no idea. */
686 if (has_3dnow
|| has_mmx
)
687 processor
= PROCESSOR_I486
;
692 processor
= PROCESSOR_K8
;
694 processor
= PROCESSOR_PENTIUMPRO
;
696 processor
= PROCESSOR_I486
;
704 processor
= PROCESSOR_I486
;
707 processor
= PROCESSOR_PENTIUM
;
710 processor
= PROCESSOR_PENTIUMPRO
;
713 processor
= PROCESSOR_PENTIUM4
;
716 /* We have no idea. */
717 processor
= PROCESSOR_GENERIC
;
727 if (arch
&& vendor
== signature_CENTAUR_ebx
)
734 /* Assume WinChip C6. */
740 case PROCESSOR_PENTIUM
:
746 case PROCESSOR_PENTIUMPRO
:
769 cpu
= "goldmont-plus";
824 /* Skylake with AVX-512. */
825 cpu
= "skylake-avx512";
828 /* Knights Landing. */
842 /* This is unknown family 0x6 CPU. */
843 /* Assume Ice Lake Server. */
845 cpu
= "icelake-server";
846 /* Assume Ice Lake. */
848 cpu
= "icelake-client";
849 /* Assume Cannon Lake. */
850 else if (has_avx512vbmi
)
852 /* Assume Knights Mill. */
853 else if (has_avx5124vnniw
)
855 /* Assume Knights Landing. */
856 else if (has_avx512er
)
858 /* Assume Skylake with AVX-512. */
859 else if (has_avx512f
)
860 cpu
= "skylake-avx512";
861 /* Assume Skylake. */
862 else if (has_clflushopt
)
864 /* Assume Broadwell. */
868 /* Assume Haswell. */
871 /* Assume Sandy Bridge. */
876 /* Assume Tremont. */
879 /* Assume Goldmont Plus. */
880 cpu
= "goldmont-plus";
882 /* Assume Goldmont. */
885 /* Assume Silvermont. */
888 /* Assume Nehalem. */
894 /* Assume Bonnell. */
900 else if (has_longmode
)
901 /* Perhaps some emulator? Assume x86-64, otherwise gcc
902 -march=native would be unusable for 64-bit compilations,
903 as all the CPUs below are 32-bit only. */
907 if (vendor
== signature_CENTAUR_ebx
)
908 /* C7 / Eden "Esther" */
911 /* It is Core Duo. */
915 /* It is Pentium M. */
919 if (vendor
== signature_CENTAUR_ebx
)
922 /* Eden "Nehemiah" */
928 /* It is Pentium III. */
932 /* It is Pentium II. */
935 /* Default to Pentium Pro. */
939 /* For -mtune, we default to -mtune=generic. */
944 case PROCESSOR_PENTIUM4
:
955 case PROCESSOR_GEODE
:
959 if (arch
&& has_3dnow
)
964 case PROCESSOR_ATHLON
:
973 if (vendor
== signature_CENTAUR_ebx
)
976 /* Nano 3000 | Nano dual / quad core | Eden X4 */
979 /* Nano 1000 | Nano 2000 */
994 /* For -mtune, we default to -mtune=k8 */
997 case PROCESSOR_AMDFAM10
:
1000 case PROCESSOR_BDVER1
:
1003 case PROCESSOR_BDVER2
:
1006 case PROCESSOR_BDVER3
:
1009 case PROCESSOR_BDVER4
:
1012 case PROCESSOR_ZNVER1
:
1015 case PROCESSOR_BTVER1
:
1018 case PROCESSOR_BTVER2
:
1023 /* Use something reasonable. */
1035 else if (has_longmode
)
1036 /* Perhaps some emulator? Assume x86-64, otherwise gcc
1037 -march=native would be unusable for 64-bit compilations,
1038 as all the CPUs below are 32-bit only. */
1045 cpu
= "pentium-mmx";
1046 else if (has_cmpxchg8b
)
1055 const char *mmx
= has_mmx
? " -mmmx" : " -mno-mmx";
1056 const char *mmx3dnow
= has_3dnow
? " -m3dnow" : " -mno-3dnow";
1057 const char *sse
= has_sse
? " -msse" : " -mno-sse";
1058 const char *sse2
= has_sse2
? " -msse2" : " -mno-sse2";
1059 const char *sse3
= has_sse3
? " -msse3" : " -mno-sse3";
1060 const char *ssse3
= has_ssse3
? " -mssse3" : " -mno-ssse3";
1061 const char *sse4a
= has_sse4a
? " -msse4a" : " -mno-sse4a";
1062 const char *cx16
= has_cmpxchg16b
? " -mcx16" : " -mno-cx16";
1063 const char *sahf
= has_lahf_lm
? " -msahf" : " -mno-sahf";
1064 const char *movbe
= has_movbe
? " -mmovbe" : " -mno-movbe";
1065 const char *aes
= has_aes
? " -maes" : " -mno-aes";
1066 const char *sha
= has_sha
? " -msha" : " -mno-sha";
1067 const char *pclmul
= has_pclmul
? " -mpclmul" : " -mno-pclmul";
1068 const char *popcnt
= has_popcnt
? " -mpopcnt" : " -mno-popcnt";
1069 const char *abm
= has_abm
? " -mabm" : " -mno-abm";
1070 const char *lwp
= has_lwp
? " -mlwp" : " -mno-lwp";
1071 const char *fma
= has_fma
? " -mfma" : " -mno-fma";
1072 const char *fma4
= has_fma4
? " -mfma4" : " -mno-fma4";
1073 const char *xop
= has_xop
? " -mxop" : " -mno-xop";
1074 const char *bmi
= has_bmi
? " -mbmi" : " -mno-bmi";
1075 const char *pconfig
= has_pconfig
? " -mpconfig" : " -mno-pconfig";
1076 const char *wbnoinvd
= has_wbnoinvd
? " -mwbnoinvd" : " -mno-wbnoinvd";
1077 const char *sgx
= has_sgx
? " -msgx" : " -mno-sgx";
1078 const char *bmi2
= has_bmi2
? " -mbmi2" : " -mno-bmi2";
1079 const char *tbm
= has_tbm
? " -mtbm" : " -mno-tbm";
1080 const char *avx
= has_avx
? " -mavx" : " -mno-avx";
1081 const char *avx2
= has_avx2
? " -mavx2" : " -mno-avx2";
1082 const char *sse4_2
= has_sse4_2
? " -msse4.2" : " -mno-sse4.2";
1083 const char *sse4_1
= has_sse4_1
? " -msse4.1" : " -mno-sse4.1";
1084 const char *lzcnt
= has_lzcnt
? " -mlzcnt" : " -mno-lzcnt";
1085 const char *hle
= has_hle
? " -mhle" : " -mno-hle";
1086 const char *rtm
= has_rtm
? " -mrtm" : " -mno-rtm";
1087 const char *rdrnd
= has_rdrnd
? " -mrdrnd" : " -mno-rdrnd";
1088 const char *f16c
= has_f16c
? " -mf16c" : " -mno-f16c";
1089 const char *fsgsbase
= has_fsgsbase
? " -mfsgsbase" : " -mno-fsgsbase";
1090 const char *rdseed
= has_rdseed
? " -mrdseed" : " -mno-rdseed";
1091 const char *prfchw
= has_prfchw
? " -mprfchw" : " -mno-prfchw";
1092 const char *adx
= has_adx
? " -madx" : " -mno-adx";
1093 const char *fxsr
= has_fxsr
? " -mfxsr" : " -mno-fxsr";
1094 const char *xsave
= has_xsave
? " -mxsave" : " -mno-xsave";
1095 const char *xsaveopt
= has_xsaveopt
? " -mxsaveopt" : " -mno-xsaveopt";
1096 const char *avx512f
= has_avx512f
? " -mavx512f" : " -mno-avx512f";
1097 const char *avx512er
= has_avx512er
? " -mavx512er" : " -mno-avx512er";
1098 const char *avx512cd
= has_avx512cd
? " -mavx512cd" : " -mno-avx512cd";
1099 const char *avx512pf
= has_avx512pf
? " -mavx512pf" : " -mno-avx512pf";
1100 const char *prefetchwt1
= has_prefetchwt1
? " -mprefetchwt1" : " -mno-prefetchwt1";
1101 const char *clflushopt
= has_clflushopt
? " -mclflushopt" : " -mno-clflushopt";
1102 const char *xsavec
= has_xsavec
? " -mxsavec" : " -mno-xsavec";
1103 const char *xsaves
= has_xsaves
? " -mxsaves" : " -mno-xsaves";
1104 const char *avx512dq
= has_avx512dq
? " -mavx512dq" : " -mno-avx512dq";
1105 const char *avx512bw
= has_avx512bw
? " -mavx512bw" : " -mno-avx512bw";
1106 const char *avx512vl
= has_avx512vl
? " -mavx512vl" : " -mno-avx512vl";
1107 const char *avx512ifma
= has_avx512ifma
? " -mavx512ifma" : " -mno-avx512ifma";
1108 const char *avx512vbmi
= has_avx512vbmi
? " -mavx512vbmi" : " -mno-avx512vbmi";
1109 const char *avx5124vnniw
= has_avx5124vnniw
? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1110 const char *avx512vbmi2
= has_avx512vbmi2
? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1111 const char *avx512vnni
= has_avx512vnni
? " -mavx512vnni" : " -mno-avx512vnni";
1112 const char *avx5124fmaps
= has_avx5124fmaps
? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1113 const char *clwb
= has_clwb
? " -mclwb" : " -mno-clwb";
1114 const char *mwaitx
= has_mwaitx
? " -mmwaitx" : " -mno-mwaitx";
1115 const char *clzero
= has_clzero
? " -mclzero" : " -mno-clzero";
1116 const char *pku
= has_pku
? " -mpku" : " -mno-pku";
1117 const char *rdpid
= has_rdpid
? " -mrdpid" : " -mno-rdpid";
1118 const char *gfni
= has_gfni
? " -mgfni" : " -mno-gfni";
1119 const char *shstk
= has_shstk
? " -mshstk" : " -mno-shstk";
1120 const char *vaes
= has_vaes
? " -mvaes" : " -mno-vaes";
1121 const char *vpclmulqdq
= has_vpclmulqdq
? " -mvpclmulqdq" : " -mno-vpclmulqdq";
1122 const char *avx512bitalg
= has_avx512bitalg
? " -mavx512bitalg" : " -mno-avx512bitalg";
1123 const char *movdiri
= has_movdiri
? " -mmovdiri" : " -mno-movdiri";
1124 const char *movdir64b
= has_movdir64b
? " -mmovdir64b" : " -mno-movdir64b";
1125 const char *waitpkg
= has_waitpkg
? " -mwaitpkg" : " -mno-waitpkg";
1126 const char *cldemote
= has_cldemote
? " -mcldemote" : " -mno-cldemote";
1127 options
= concat (options
, mmx
, mmx3dnow
, sse
, sse2
, sse3
, ssse3
,
1128 sse4a
, cx16
, sahf
, movbe
, aes
, sha
, pclmul
,
1129 popcnt
, abm
, lwp
, fma
, fma4
, xop
, bmi
, sgx
, bmi2
,
1131 tbm
, avx
, avx2
, sse4_2
, sse4_1
, lzcnt
, rtm
,
1132 hle
, rdrnd
, f16c
, fsgsbase
, rdseed
, prfchw
, adx
,
1133 fxsr
, xsave
, xsaveopt
, avx512f
, avx512er
,
1134 avx512cd
, avx512pf
, prefetchwt1
, clflushopt
,
1135 xsavec
, xsaves
, avx512dq
, avx512bw
, avx512vl
,
1136 avx512ifma
, avx512vbmi
, avx5124fmaps
, avx5124vnniw
,
1137 clwb
, mwaitx
, clzero
, pku
, rdpid
, gfni
, shstk
,
1138 avx512vbmi2
, avx512vnni
, vaes
, vpclmulqdq
,
1139 avx512bitalg
, movdiri
, movdir64b
, waitpkg
, cldemote
,
1144 return concat (cache
, "-m", argv
[0], "=", cpu
, options
, NULL
);
1148 /* If we are compiling with GCC where %EBX register is fixed, then the
1149 driver will just ignore -march and -mtune "native" target and will leave
1150 to the newly built compiler to generate code for its default target. */
1152 const char *host_detect_local_cpu (int, const char **)
1156 #endif /* __GNUC__ */