Enable support for bfloat16 which will be in Future Cooper Lake.
[official-gcc.git] / gcc / config / i386 / driver-i386.c
blob22ad5bcf07d7599784d13717139f14aac1fe6f90
1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #define IN_TARGET_CODE 1
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
27 const char *host_detect_local_cpu (int argc, const char **argv);
29 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
30 #include "cpuid.h"
32 struct cache_desc
34 unsigned sizekb;
35 unsigned assoc;
36 unsigned line;
39 /* Returns command line parameters that describe size and
40 cache line size of the processor caches. */
42 static char *
43 describe_cache (struct cache_desc level1, struct cache_desc level2)
45 char size[100], line[100], size2[100];
47 /* At the moment, gcc does not use the information
48 about the associativity of the cache. */
50 snprintf (size, sizeof (size),
51 "--param l1-cache-size=%u ", level1.sizekb);
52 snprintf (line, sizeof (line),
53 "--param l1-cache-line-size=%u ", level1.line);
55 snprintf (size2, sizeof (size2),
56 "--param l2-cache-size=%u ", level2.sizekb);
58 return concat (size, line, size2, NULL);
61 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
63 static void
64 detect_l2_cache (struct cache_desc *level2)
66 unsigned eax, ebx, ecx, edx;
67 unsigned assoc;
69 __cpuid (0x80000006, eax, ebx, ecx, edx);
71 level2->sizekb = (ecx >> 16) & 0xffff;
72 level2->line = ecx & 0xff;
74 assoc = (ecx >> 12) & 0xf;
75 if (assoc == 6)
76 assoc = 8;
77 else if (assoc == 8)
78 assoc = 16;
79 else if (assoc >= 0xa && assoc <= 0xc)
80 assoc = 32 + (assoc - 0xa) * 16;
81 else if (assoc >= 0xd && assoc <= 0xe)
82 assoc = 96 + (assoc - 0xd) * 32;
84 level2->assoc = assoc;
87 /* Returns the description of caches for an AMD processor. */
89 static const char *
90 detect_caches_amd (unsigned max_ext_level)
92 unsigned eax, ebx, ecx, edx;
94 struct cache_desc level1, level2 = {0, 0, 0};
96 if (max_ext_level < 0x80000005)
97 return "";
99 __cpuid (0x80000005, eax, ebx, ecx, edx);
101 level1.sizekb = (ecx >> 24) & 0xff;
102 level1.assoc = (ecx >> 16) & 0xff;
103 level1.line = ecx & 0xff;
105 if (max_ext_level >= 0x80000006)
106 detect_l2_cache (&level2);
108 return describe_cache (level1, level2);
111 /* Decodes the size, the associativity and the cache line size of
112 L1/L2 caches of an Intel processor. Values are based on
113 "Intel Processor Identification and the CPUID Instruction"
114 [Application Note 485], revision -032, December 2007. */
116 static void
117 decode_caches_intel (unsigned reg, bool xeon_mp,
118 struct cache_desc *level1, struct cache_desc *level2)
120 int i;
122 for (i = 24; i >= 0; i -= 8)
123 switch ((reg >> i) & 0xff)
125 case 0x0a:
126 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
127 break;
128 case 0x0c:
129 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
130 break;
131 case 0x0d:
132 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
133 break;
134 case 0x0e:
135 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
136 break;
137 case 0x21:
138 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
139 break;
140 case 0x24:
141 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
142 break;
143 case 0x2c:
144 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
145 break;
146 case 0x39:
147 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
148 break;
149 case 0x3a:
150 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
151 break;
152 case 0x3b:
153 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
154 break;
155 case 0x3c:
156 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
157 break;
158 case 0x3d:
159 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
160 break;
161 case 0x3e:
162 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
163 break;
164 case 0x41:
165 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
166 break;
167 case 0x42:
168 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
169 break;
170 case 0x43:
171 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
172 break;
173 case 0x44:
174 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
175 break;
176 case 0x45:
177 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
178 break;
179 case 0x48:
180 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
181 break;
182 case 0x49:
183 if (xeon_mp)
184 break;
185 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
186 break;
187 case 0x4e:
188 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
189 break;
190 case 0x60:
191 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
192 break;
193 case 0x66:
194 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
195 break;
196 case 0x67:
197 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
198 break;
199 case 0x68:
200 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
201 break;
202 case 0x78:
203 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
204 break;
205 case 0x79:
206 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
207 break;
208 case 0x7a:
209 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
210 break;
211 case 0x7b:
212 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
213 break;
214 case 0x7c:
215 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
216 break;
217 case 0x7d:
218 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
219 break;
220 case 0x7f:
221 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
222 break;
223 case 0x80:
224 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
225 break;
226 case 0x82:
227 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
228 break;
229 case 0x83:
230 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
231 break;
232 case 0x84:
233 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
234 break;
235 case 0x85:
236 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
237 break;
238 case 0x86:
239 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
240 break;
241 case 0x87:
242 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
244 default:
245 break;
249 /* Detect cache parameters using CPUID function 2. */
251 static void
252 detect_caches_cpuid2 (bool xeon_mp,
253 struct cache_desc *level1, struct cache_desc *level2)
255 unsigned regs[4];
256 int nreps, i;
258 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
260 nreps = regs[0] & 0x0f;
261 regs[0] &= ~0x0f;
263 while (--nreps >= 0)
265 for (i = 0; i < 4; i++)
266 if (regs[i] && !((regs[i] >> 31) & 1))
267 decode_caches_intel (regs[i], xeon_mp, level1, level2);
269 if (nreps)
270 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
274 /* Detect cache parameters using CPUID function 4. This
275 method doesn't require hardcoded tables. */
277 enum cache_type
279 CACHE_END = 0,
280 CACHE_DATA = 1,
281 CACHE_INST = 2,
282 CACHE_UNIFIED = 3
285 static void
286 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
287 struct cache_desc *level3)
289 struct cache_desc *cache;
291 unsigned eax, ebx, ecx, edx;
292 int count;
294 for (count = 0;; count++)
296 __cpuid_count(4, count, eax, ebx, ecx, edx);
297 switch (eax & 0x1f)
299 case CACHE_END:
300 return;
301 case CACHE_DATA:
302 case CACHE_UNIFIED:
304 switch ((eax >> 5) & 0x07)
306 case 1:
307 cache = level1;
308 break;
309 case 2:
310 cache = level2;
311 break;
312 case 3:
313 cache = level3;
314 break;
315 default:
316 cache = NULL;
319 if (cache)
321 unsigned sets = ecx + 1;
322 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
324 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
325 cache->line = (ebx & 0x0fff) + 1;
327 cache->sizekb = (cache->assoc * part
328 * cache->line * sets) / 1024;
331 default:
332 break;
337 /* Returns the description of caches for an Intel processor. */
339 static const char *
340 detect_caches_intel (bool xeon_mp, unsigned max_level,
341 unsigned max_ext_level, unsigned *l2sizekb)
343 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
345 if (max_level >= 4)
346 detect_caches_cpuid4 (&level1, &level2, &level3);
347 else if (max_level >= 2)
348 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
349 else
350 return "";
352 if (level1.sizekb == 0)
353 return "";
355 /* Let the L3 replace the L2. This assumes inclusive caches
356 and single threaded program for now. */
357 if (level3.sizekb)
358 level2 = level3;
360 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
361 method if other methods fail to provide L2 cache parameters. */
362 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
363 detect_l2_cache (&level2);
365 *l2sizekb = level2.sizekb;
367 return describe_cache (level1, level2);
370 /* This will be called by the spec parser in gcc.c when it sees
371 a %:local_cpu_detect(args) construct. Currently it will be called
372 with either "arch" or "tune" as argument depending on if -march=native
373 or -mtune=native is to be substituted.
375 It returns a string containing new command line parameters to be
376 put at the place of the above two options, depending on what CPU
377 this is executed. E.g. "-march=k8" on an AMD64 machine
378 for -march=native.
380 ARGC and ARGV are set depending on the actual arguments given
381 in the spec. */
383 const char *host_detect_local_cpu (int argc, const char **argv)
385 enum processor_type processor = PROCESSOR_I386;
386 const char *cpu = "i386";
388 const char *cache = "";
389 const char *options = "";
391 unsigned int eax, ebx, ecx, edx;
393 unsigned int max_level, ext_level;
395 unsigned int vendor;
396 unsigned int model, family;
398 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
399 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
401 /* Extended features */
402 unsigned int has_lahf_lm = 0, has_sse4a = 0;
403 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
404 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
405 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
406 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
407 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
408 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
409 unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
410 unsigned int has_pconfig = 0, has_wbnoinvd = 0;
411 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
412 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
413 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
414 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
415 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
416 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
417 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
418 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
419 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
420 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
421 unsigned int has_gfni = 0, has_avx512vbmi2 = 0;
422 unsigned int has_avx512bitalg = 0;
423 unsigned int has_shstk = 0;
424 unsigned int has_avx512vnni = 0, has_vaes = 0;
425 unsigned int has_vpclmulqdq = 0;
426 unsigned int has_movdiri = 0, has_movdir64b = 0;
427 unsigned int has_waitpkg = 0;
428 unsigned int has_cldemote = 0;
429 unsigned int has_avx512bf16 = 0;
431 unsigned int has_ptwrite = 0;
433 bool arch;
435 unsigned int l2sizekb = 0;
437 if (argc < 1)
438 return NULL;
440 arch = !strcmp (argv[0], "arch");
442 if (!arch && strcmp (argv[0], "tune"))
443 return NULL;
445 max_level = __get_cpuid_max (0, &vendor);
446 if (max_level < 1)
447 goto done;
449 __cpuid (1, eax, ebx, ecx, edx);
451 model = (eax >> 4) & 0x0f;
452 family = (eax >> 8) & 0x0f;
453 if (vendor == signature_INTEL_ebx
454 || vendor == signature_AMD_ebx)
456 unsigned int extended_model, extended_family;
458 extended_model = (eax >> 12) & 0xf0;
459 extended_family = (eax >> 20) & 0xff;
460 if (family == 0x0f)
462 family += extended_family;
463 model += extended_model;
465 else if (family == 0x06)
466 model += extended_model;
469 has_sse3 = ecx & bit_SSE3;
470 has_ssse3 = ecx & bit_SSSE3;
471 has_sse4_1 = ecx & bit_SSE4_1;
472 has_sse4_2 = ecx & bit_SSE4_2;
473 has_avx = ecx & bit_AVX;
474 has_osxsave = ecx & bit_OSXSAVE;
475 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
476 has_movbe = ecx & bit_MOVBE;
477 has_popcnt = ecx & bit_POPCNT;
478 has_aes = ecx & bit_AES;
479 has_pclmul = ecx & bit_PCLMUL;
480 has_fma = ecx & bit_FMA;
481 has_f16c = ecx & bit_F16C;
482 has_rdrnd = ecx & bit_RDRND;
483 has_xsave = ecx & bit_XSAVE;
485 has_cmpxchg8b = edx & bit_CMPXCHG8B;
486 has_cmov = edx & bit_CMOV;
487 has_mmx = edx & bit_MMX;
488 has_fxsr = edx & bit_FXSAVE;
489 has_sse = edx & bit_SSE;
490 has_sse2 = edx & bit_SSE2;
492 if (max_level >= 7)
494 __cpuid_count (7, 0, eax, ebx, ecx, edx);
496 has_bmi = ebx & bit_BMI;
497 has_sgx = ebx & bit_SGX;
498 has_hle = ebx & bit_HLE;
499 has_rtm = ebx & bit_RTM;
500 has_avx2 = ebx & bit_AVX2;
501 has_bmi2 = ebx & bit_BMI2;
502 has_fsgsbase = ebx & bit_FSGSBASE;
503 has_rdseed = ebx & bit_RDSEED;
504 has_adx = ebx & bit_ADX;
505 has_avx512f = ebx & bit_AVX512F;
506 has_avx512er = ebx & bit_AVX512ER;
507 has_avx512pf = ebx & bit_AVX512PF;
508 has_avx512cd = ebx & bit_AVX512CD;
509 has_sha = ebx & bit_SHA;
510 has_clflushopt = ebx & bit_CLFLUSHOPT;
511 has_clwb = ebx & bit_CLWB;
512 has_avx512dq = ebx & bit_AVX512DQ;
513 has_avx512bw = ebx & bit_AVX512BW;
514 has_avx512vl = ebx & bit_AVX512VL;
515 has_avx512ifma = ebx & bit_AVX512IFMA;
517 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
518 has_avx512vbmi = ecx & bit_AVX512VBMI;
519 has_pku = ecx & bit_OSPKE;
520 has_avx512vbmi2 = ecx & bit_AVX512VBMI2;
521 has_avx512vnni = ecx & bit_AVX512VNNI;
522 has_rdpid = ecx & bit_RDPID;
523 has_gfni = ecx & bit_GFNI;
524 has_vaes = ecx & bit_VAES;
525 has_vpclmulqdq = ecx & bit_VPCLMULQDQ;
526 has_avx512bitalg = ecx & bit_AVX512BITALG;
527 has_movdiri = ecx & bit_MOVDIRI;
528 has_movdir64b = ecx & bit_MOVDIR64B;
529 has_cldemote = ecx & bit_CLDEMOTE;
531 has_avx5124vnniw = edx & bit_AVX5124VNNIW;
532 has_avx5124fmaps = edx & bit_AVX5124FMAPS;
534 has_shstk = ecx & bit_SHSTK;
535 has_pconfig = edx & bit_PCONFIG;
536 has_waitpkg = ecx & bit_WAITPKG;
538 __cpuid_count (7, 1, eax, ebx, ecx, edx);
539 has_avx512bf16 = eax & bit_AVX512BF16;
542 if (max_level >= 13)
544 __cpuid_count (13, 1, eax, ebx, ecx, edx);
546 has_xsaveopt = eax & bit_XSAVEOPT;
547 has_xsavec = eax & bit_XSAVEC;
548 has_xsaves = eax & bit_XSAVES;
551 if (max_level >= 0x14)
553 __cpuid_count (0x14, 0, eax, ebx, ecx, edx);
555 has_ptwrite = ebx & bit_PTWRITE;
558 /* Check cpuid level of extended features. */
559 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
561 if (ext_level >= 0x80000001)
563 __cpuid (0x80000001, eax, ebx, ecx, edx);
565 has_lahf_lm = ecx & bit_LAHF_LM;
566 has_sse4a = ecx & bit_SSE4a;
567 has_abm = ecx & bit_ABM;
568 has_lwp = ecx & bit_LWP;
569 has_fma4 = ecx & bit_FMA4;
570 has_xop = ecx & bit_XOP;
571 has_tbm = ecx & bit_TBM;
572 has_lzcnt = ecx & bit_LZCNT;
573 has_prfchw = ecx & bit_PRFCHW;
575 has_longmode = edx & bit_LM;
576 has_3dnowp = edx & bit_3DNOWP;
577 has_3dnow = edx & bit_3DNOW;
578 has_mwaitx = ecx & bit_MWAITX;
581 if (ext_level >= 0x80000008)
583 __cpuid (0x80000008, eax, ebx, ecx, edx);
584 has_clzero = ebx & bit_CLZERO;
585 has_wbnoinvd = ebx & bit_WBNOINVD;
588 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
589 #define XCR_XFEATURE_ENABLED_MASK 0x0
590 #define XSTATE_FP 0x1
591 #define XSTATE_SSE 0x2
592 #define XSTATE_YMM 0x4
593 #define XSTATE_OPMASK 0x20
594 #define XSTATE_ZMM 0x40
595 #define XSTATE_HI_ZMM 0x80
597 #define XCR_AVX_ENABLED_MASK \
598 (XSTATE_SSE | XSTATE_YMM)
599 #define XCR_AVX512F_ENABLED_MASK \
600 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
602 if (has_osxsave)
603 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
604 : "=a" (eax), "=d" (edx)
605 : "c" (XCR_XFEATURE_ENABLED_MASK));
606 else
607 eax = 0;
609 /* Check if AVX registers are supported. */
610 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
612 has_avx = 0;
613 has_avx2 = 0;
614 has_fma = 0;
615 has_fma4 = 0;
616 has_f16c = 0;
617 has_xop = 0;
618 has_xsave = 0;
619 has_xsaveopt = 0;
620 has_xsaves = 0;
621 has_xsavec = 0;
624 /* Check if AVX512F registers are supported. */
625 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
627 has_avx512f = 0;
628 has_avx512er = 0;
629 has_avx512pf = 0;
630 has_avx512cd = 0;
631 has_avx512dq = 0;
632 has_avx512bw = 0;
633 has_avx512vl = 0;
636 if (!arch)
638 if (vendor == signature_AMD_ebx
639 || vendor == signature_CENTAUR_ebx
640 || vendor == signature_CYRIX_ebx
641 || vendor == signature_NSC_ebx)
642 cache = detect_caches_amd (ext_level);
643 else if (vendor == signature_INTEL_ebx)
645 bool xeon_mp = (family == 15 && model == 6);
646 cache = detect_caches_intel (xeon_mp, max_level,
647 ext_level, &l2sizekb);
651 if (vendor == signature_AMD_ebx)
653 unsigned int name;
655 /* Detect geode processor by its processor signature. */
656 if (ext_level >= 0x80000002)
657 __cpuid (0x80000002, name, ebx, ecx, edx);
658 else
659 name = 0;
661 if (name == signature_NSC_ebx)
662 processor = PROCESSOR_GEODE;
663 else if (has_movbe && family == 22)
664 processor = PROCESSOR_BTVER2;
665 else if (has_clwb)
666 processor = PROCESSOR_ZNVER2;
667 else if (has_clzero)
668 processor = PROCESSOR_ZNVER1;
669 else if (has_avx2)
670 processor = PROCESSOR_BDVER4;
671 else if (has_xsaveopt)
672 processor = PROCESSOR_BDVER3;
673 else if (has_bmi)
674 processor = PROCESSOR_BDVER2;
675 else if (has_xop)
676 processor = PROCESSOR_BDVER1;
677 else if (has_sse4a && has_ssse3)
678 processor = PROCESSOR_BTVER1;
679 else if (has_sse4a)
680 processor = PROCESSOR_AMDFAM10;
681 else if (has_sse2 || has_longmode)
682 processor = PROCESSOR_K8;
683 else if (has_3dnowp && family == 6)
684 processor = PROCESSOR_ATHLON;
685 else if (has_mmx)
686 processor = PROCESSOR_K6;
687 else
688 processor = PROCESSOR_PENTIUM;
690 else if (vendor == signature_CENTAUR_ebx)
692 processor = PROCESSOR_GENERIC;
694 switch (family)
696 default:
697 /* We have no idea. */
698 break;
700 case 5:
701 if (has_3dnow || has_mmx)
702 processor = PROCESSOR_I486;
703 break;
705 case 6:
706 if (has_longmode)
707 processor = PROCESSOR_K8;
708 else if (model >= 9)
709 processor = PROCESSOR_PENTIUMPRO;
710 else if (model >= 6)
711 processor = PROCESSOR_I486;
714 else
716 switch (family)
718 case 4:
719 processor = PROCESSOR_I486;
720 break;
721 case 5:
722 processor = PROCESSOR_PENTIUM;
723 break;
724 case 6:
725 processor = PROCESSOR_PENTIUMPRO;
726 break;
727 case 15:
728 processor = PROCESSOR_PENTIUM4;
729 break;
730 default:
731 /* We have no idea. */
732 processor = PROCESSOR_GENERIC;
736 switch (processor)
738 case PROCESSOR_I386:
739 /* Default. */
740 break;
741 case PROCESSOR_I486:
742 if (arch && vendor == signature_CENTAUR_ebx)
744 if (model >= 6)
745 cpu = "c3";
746 else if (has_3dnow)
747 cpu = "winchip2";
748 else
749 /* Assume WinChip C6. */
750 cpu = "winchip-c6";
752 else
753 cpu = "i486";
754 break;
755 case PROCESSOR_PENTIUM:
756 if (arch && has_mmx)
757 cpu = "pentium-mmx";
758 else
759 cpu = "pentium";
760 break;
761 case PROCESSOR_PENTIUMPRO:
762 switch (model)
764 case 0x1c:
765 case 0x26:
766 /* Bonnell. */
767 cpu = "bonnell";
768 break;
769 case 0x37:
770 case 0x4a:
771 case 0x4d:
772 case 0x5a:
773 case 0x5d:
774 /* Silvermont. */
775 cpu = "silvermont";
776 break;
777 case 0x5c:
778 case 0x5f:
779 /* Goldmont. */
780 cpu = "goldmont";
781 break;
782 case 0x7a:
783 /* Goldmont Plus. */
784 cpu = "goldmont-plus";
785 break;
786 case 0x0f:
787 /* Merom. */
788 case 0x17:
789 case 0x1d:
790 /* Penryn. */
791 cpu = "core2";
792 break;
793 case 0x1a:
794 case 0x1e:
795 case 0x1f:
796 case 0x2e:
797 /* Nehalem. */
798 cpu = "nehalem";
799 break;
800 case 0x25:
801 case 0x2c:
802 case 0x2f:
803 /* Westmere. */
804 cpu = "westmere";
805 break;
806 case 0x2a:
807 case 0x2d:
808 /* Sandy Bridge. */
809 cpu = "sandybridge";
810 break;
811 case 0x3a:
812 case 0x3e:
813 /* Ivy Bridge. */
814 cpu = "ivybridge";
815 break;
816 case 0x3c:
817 case 0x3f:
818 case 0x45:
819 case 0x46:
820 /* Haswell. */
821 cpu = "haswell";
822 break;
823 case 0x3d:
824 case 0x47:
825 case 0x4f:
826 case 0x56:
827 /* Broadwell. */
828 cpu = "broadwell";
829 break;
830 case 0x4e:
831 case 0x5e:
832 /* Skylake. */
833 case 0x8e:
834 case 0x9e:
835 /* Kaby Lake. */
836 cpu = "skylake";
837 break;
838 case 0x55:
839 if (has_avx512vnni)
840 /* Cascade Lake. */
841 cpu = "cascadelake";
842 else
843 /* Skylake with AVX-512. */
844 cpu = "skylake-avx512";
845 break;
846 case 0x57:
847 /* Knights Landing. */
848 cpu = "knl";
849 break;
850 case 0x66:
851 /* Cannon Lake. */
852 cpu = "cannonlake";
853 break;
854 case 0x85:
855 /* Knights Mill. */
856 cpu = "knm";
857 break;
858 default:
859 if (arch)
861 /* This is unknown family 0x6 CPU. */
862 /* Assume Ice Lake Server. */
863 if (has_wbnoinvd)
864 cpu = "icelake-server";
865 /* Assume Ice Lake. */
866 else if (has_gfni)
867 cpu = "icelake-client";
868 /* Assume Cannon Lake. */
869 else if (has_avx512vbmi)
870 cpu = "cannonlake";
871 /* Assume Knights Mill. */
872 else if (has_avx5124vnniw)
873 cpu = "knm";
874 /* Assume Knights Landing. */
875 else if (has_avx512er)
876 cpu = "knl";
877 /* Assume Skylake with AVX-512. */
878 else if (has_avx512f)
879 cpu = "skylake-avx512";
880 /* Assume Skylake. */
881 else if (has_clflushopt)
882 cpu = "skylake";
883 /* Assume Broadwell. */
884 else if (has_adx)
885 cpu = "broadwell";
886 else if (has_avx2)
887 /* Assume Haswell. */
888 cpu = "haswell";
889 else if (has_avx)
890 /* Assume Sandy Bridge. */
891 cpu = "sandybridge";
892 else if (has_sse4_2)
894 if (has_gfni)
895 /* Assume Tremont. */
896 cpu = "tremont";
897 else if (has_sgx)
898 /* Assume Goldmont Plus. */
899 cpu = "goldmont-plus";
900 else if (has_xsave)
901 /* Assume Goldmont. */
902 cpu = "goldmont";
903 else if (has_movbe)
904 /* Assume Silvermont. */
905 cpu = "silvermont";
906 else
907 /* Assume Nehalem. */
908 cpu = "nehalem";
910 else if (has_ssse3)
912 if (has_movbe)
913 /* Assume Bonnell. */
914 cpu = "bonnell";
915 else
916 /* Assume Core 2. */
917 cpu = "core2";
919 else if (has_longmode)
920 /* Perhaps some emulator? Assume x86-64, otherwise gcc
921 -march=native would be unusable for 64-bit compilations,
922 as all the CPUs below are 32-bit only. */
923 cpu = "x86-64";
924 else if (has_sse3)
926 if (vendor == signature_CENTAUR_ebx)
927 /* C7 / Eden "Esther" */
928 cpu = "c7";
929 else
930 /* It is Core Duo. */
931 cpu = "pentium-m";
933 else if (has_sse2)
934 /* It is Pentium M. */
935 cpu = "pentium-m";
936 else if (has_sse)
938 if (vendor == signature_CENTAUR_ebx)
940 if (model >= 9)
941 /* Eden "Nehemiah" */
942 cpu = "nehemiah";
943 else
944 cpu = "c3-2";
946 else
947 /* It is Pentium III. */
948 cpu = "pentium3";
950 else if (has_mmx)
951 /* It is Pentium II. */
952 cpu = "pentium2";
953 else
954 /* Default to Pentium Pro. */
955 cpu = "pentiumpro";
957 else
958 /* For -mtune, we default to -mtune=generic. */
959 cpu = "generic";
960 break;
962 break;
963 case PROCESSOR_PENTIUM4:
964 if (has_sse3)
966 if (has_longmode)
967 cpu = "nocona";
968 else
969 cpu = "prescott";
971 else
972 cpu = "pentium4";
973 break;
974 case PROCESSOR_GEODE:
975 cpu = "geode";
976 break;
977 case PROCESSOR_K6:
978 if (arch && has_3dnow)
979 cpu = "k6-3";
980 else
981 cpu = "k6";
982 break;
983 case PROCESSOR_ATHLON:
984 if (arch && has_sse)
985 cpu = "athlon-4";
986 else
987 cpu = "athlon";
988 break;
989 case PROCESSOR_K8:
990 if (arch)
992 if (vendor == signature_CENTAUR_ebx)
994 if (has_sse4_1)
995 /* Nano 3000 | Nano dual / quad core | Eden X4 */
996 cpu = "nano-3000";
997 else if (has_ssse3)
998 /* Nano 1000 | Nano 2000 */
999 cpu = "nano";
1000 else if (has_sse3)
1001 /* Eden X2 */
1002 cpu = "eden-x2";
1003 else
1004 /* Default to k8 */
1005 cpu = "k8";
1007 else if (has_sse3)
1008 cpu = "k8-sse3";
1009 else
1010 cpu = "k8";
1012 else
1013 /* For -mtune, we default to -mtune=k8 */
1014 cpu = "k8";
1015 break;
1016 case PROCESSOR_AMDFAM10:
1017 cpu = "amdfam10";
1018 break;
1019 case PROCESSOR_BDVER1:
1020 cpu = "bdver1";
1021 break;
1022 case PROCESSOR_BDVER2:
1023 cpu = "bdver2";
1024 break;
1025 case PROCESSOR_BDVER3:
1026 cpu = "bdver3";
1027 break;
1028 case PROCESSOR_BDVER4:
1029 cpu = "bdver4";
1030 break;
1031 case PROCESSOR_ZNVER1:
1032 cpu = "znver1";
1033 break;
1034 case PROCESSOR_ZNVER2:
1035 cpu = "znver2";
1036 break;
1037 case PROCESSOR_BTVER1:
1038 cpu = "btver1";
1039 break;
1040 case PROCESSOR_BTVER2:
1041 cpu = "btver2";
1042 break;
1044 default:
1045 /* Use something reasonable. */
1046 if (arch)
1048 if (has_ssse3)
1049 cpu = "core2";
1050 else if (has_sse3)
1052 if (has_longmode)
1053 cpu = "nocona";
1054 else
1055 cpu = "prescott";
1057 else if (has_longmode)
1058 /* Perhaps some emulator? Assume x86-64, otherwise gcc
1059 -march=native would be unusable for 64-bit compilations,
1060 as all the CPUs below are 32-bit only. */
1061 cpu = "x86-64";
1062 else if (has_sse2)
1063 cpu = "pentium4";
1064 else if (has_cmov)
1065 cpu = "pentiumpro";
1066 else if (has_mmx)
1067 cpu = "pentium-mmx";
1068 else if (has_cmpxchg8b)
1069 cpu = "pentium";
1071 else
1072 cpu = "generic";
1075 if (arch)
1077 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
1078 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
1079 const char *sse = has_sse ? " -msse" : " -mno-sse";
1080 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
1081 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
1082 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1083 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
1084 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1085 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1086 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
1087 const char *aes = has_aes ? " -maes" : " -mno-aes";
1088 const char *sha = has_sha ? " -msha" : " -mno-sha";
1089 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1090 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1091 const char *abm = has_abm ? " -mabm" : " -mno-abm";
1092 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1093 const char *fma = has_fma ? " -mfma" : " -mno-fma";
1094 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1095 const char *xop = has_xop ? " -mxop" : " -mno-xop";
1096 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1097 const char *pconfig = has_pconfig ? " -mpconfig" : " -mno-pconfig";
1098 const char *wbnoinvd = has_wbnoinvd ? " -mwbnoinvd" : " -mno-wbnoinvd";
1099 const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
1100 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
1101 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1102 const char *avx = has_avx ? " -mavx" : " -mno-avx";
1103 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1104 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1105 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1106 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1107 const char *hle = has_hle ? " -mhle" : " -mno-hle";
1108 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1109 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1110 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1111 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1112 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1113 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1114 const char *adx = has_adx ? " -madx" : " -mno-adx";
1115 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1116 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1117 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1118 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1119 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1120 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1121 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1122 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1123 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1124 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1125 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1126 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1127 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1128 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1129 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1130 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1131 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1132 const char *avx512vbmi2 = has_avx512vbmi2 ? " -mavx512vbmi2" : " -mno-avx512vbmi2";
1133 const char *avx512vnni = has_avx512vnni ? " -mavx512vnni" : " -mno-avx512vnni";
1134 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1135 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1136 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
1137 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
1138 const char *pku = has_pku ? " -mpku" : " -mno-pku";
1139 const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1140 const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1141 const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1142 const char *vaes = has_vaes ? " -mvaes" : " -mno-vaes";
1143 const char *vpclmulqdq = has_vpclmulqdq ? " -mvpclmulqdq" : " -mno-vpclmulqdq";
1144 const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " -mno-avx512bitalg";
1145 const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
1146 const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " -mno-movdir64b";
1147 const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
1148 const char *cldemote = has_cldemote ? " -mcldemote" : " -mno-cldemote";
1149 const char *ptwrite = has_ptwrite ? " -mptwrite" : " -mno-ptwrite";
1150 const char *avx512bf16 = has_avx512bf16 ? " -mavx512bf16" : " -mno-avx512bf16";
1152 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1153 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1154 popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1155 pconfig, wbnoinvd,
1156 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1157 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1158 fxsr, xsave, xsaveopt, avx512f, avx512er,
1159 avx512cd, avx512pf, prefetchwt1, clflushopt,
1160 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1161 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1162 clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
1163 avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
1164 avx512bitalg, movdiri, movdir64b, waitpkg, cldemote,
1165 ptwrite, avx512bf16,
1166 NULL);
1169 done:
1170 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1172 #else
1174 /* If we are compiling with GCC where %EBX register is fixed, then the
1175 driver will just ignore -march and -mtune "native" target and will leave
1176 to the newly built compiler to generate code for its default target. */
1178 const char *host_detect_local_cpu (int, const char **)
1180 return NULL;
1182 #endif /* __GNUC__ */