Fix PR82941 and PR82942 by adding proper vzeroupper generation on SKX.
[official-gcc.git] / gcc / config / i386 / driver-i386.c
blob80283996343bc0f62ff8a844855e8b7436bc3cab
1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 #include "config.h"
21 #include "system.h"
22 #include "coretypes.h"
23 #include "tm.h"
25 const char *host_detect_local_cpu (int argc, const char **argv);
27 #if defined(__GNUC__) && (__GNUC__ >= 5 || !defined(__PIC__))
28 #include "cpuid.h"
30 struct cache_desc
32 unsigned sizekb;
33 unsigned assoc;
34 unsigned line;
37 /* Returns command line parameters that describe size and
38 cache line size of the processor caches. */
40 static char *
41 describe_cache (struct cache_desc level1, struct cache_desc level2)
43 char size[100], line[100], size2[100];
45 /* At the moment, gcc does not use the information
46 about the associativity of the cache. */
48 snprintf (size, sizeof (size),
49 "--param l1-cache-size=%u ", level1.sizekb);
50 snprintf (line, sizeof (line),
51 "--param l1-cache-line-size=%u ", level1.line);
53 snprintf (size2, sizeof (size2),
54 "--param l2-cache-size=%u ", level2.sizekb);
56 return concat (size, line, size2, NULL);
59 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
61 static void
62 detect_l2_cache (struct cache_desc *level2)
64 unsigned eax, ebx, ecx, edx;
65 unsigned assoc;
67 __cpuid (0x80000006, eax, ebx, ecx, edx);
69 level2->sizekb = (ecx >> 16) & 0xffff;
70 level2->line = ecx & 0xff;
72 assoc = (ecx >> 12) & 0xf;
73 if (assoc == 6)
74 assoc = 8;
75 else if (assoc == 8)
76 assoc = 16;
77 else if (assoc >= 0xa && assoc <= 0xc)
78 assoc = 32 + (assoc - 0xa) * 16;
79 else if (assoc >= 0xd && assoc <= 0xe)
80 assoc = 96 + (assoc - 0xd) * 32;
82 level2->assoc = assoc;
85 /* Returns the description of caches for an AMD processor. */
87 static const char *
88 detect_caches_amd (unsigned max_ext_level)
90 unsigned eax, ebx, ecx, edx;
92 struct cache_desc level1, level2 = {0, 0, 0};
94 if (max_ext_level < 0x80000005)
95 return "";
97 __cpuid (0x80000005, eax, ebx, ecx, edx);
99 level1.sizekb = (ecx >> 24) & 0xff;
100 level1.assoc = (ecx >> 16) & 0xff;
101 level1.line = ecx & 0xff;
103 if (max_ext_level >= 0x80000006)
104 detect_l2_cache (&level2);
106 return describe_cache (level1, level2);
109 /* Decodes the size, the associativity and the cache line size of
110 L1/L2 caches of an Intel processor. Values are based on
111 "Intel Processor Identification and the CPUID Instruction"
112 [Application Note 485], revision -032, December 2007. */
114 static void
115 decode_caches_intel (unsigned reg, bool xeon_mp,
116 struct cache_desc *level1, struct cache_desc *level2)
118 int i;
120 for (i = 24; i >= 0; i -= 8)
121 switch ((reg >> i) & 0xff)
123 case 0x0a:
124 level1->sizekb = 8; level1->assoc = 2; level1->line = 32;
125 break;
126 case 0x0c:
127 level1->sizekb = 16; level1->assoc = 4; level1->line = 32;
128 break;
129 case 0x0d:
130 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
131 break;
132 case 0x0e:
133 level1->sizekb = 24; level1->assoc = 6; level1->line = 64;
134 break;
135 case 0x21:
136 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
137 break;
138 case 0x24:
139 level2->sizekb = 1024; level2->assoc = 16; level2->line = 64;
140 break;
141 case 0x2c:
142 level1->sizekb = 32; level1->assoc = 8; level1->line = 64;
143 break;
144 case 0x39:
145 level2->sizekb = 128; level2->assoc = 4; level2->line = 64;
146 break;
147 case 0x3a:
148 level2->sizekb = 192; level2->assoc = 6; level2->line = 64;
149 break;
150 case 0x3b:
151 level2->sizekb = 128; level2->assoc = 2; level2->line = 64;
152 break;
153 case 0x3c:
154 level2->sizekb = 256; level2->assoc = 4; level2->line = 64;
155 break;
156 case 0x3d:
157 level2->sizekb = 384; level2->assoc = 6; level2->line = 64;
158 break;
159 case 0x3e:
160 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
161 break;
162 case 0x41:
163 level2->sizekb = 128; level2->assoc = 4; level2->line = 32;
164 break;
165 case 0x42:
166 level2->sizekb = 256; level2->assoc = 4; level2->line = 32;
167 break;
168 case 0x43:
169 level2->sizekb = 512; level2->assoc = 4; level2->line = 32;
170 break;
171 case 0x44:
172 level2->sizekb = 1024; level2->assoc = 4; level2->line = 32;
173 break;
174 case 0x45:
175 level2->sizekb = 2048; level2->assoc = 4; level2->line = 32;
176 break;
177 case 0x48:
178 level2->sizekb = 3072; level2->assoc = 12; level2->line = 64;
179 break;
180 case 0x49:
181 if (xeon_mp)
182 break;
183 level2->sizekb = 4096; level2->assoc = 16; level2->line = 64;
184 break;
185 case 0x4e:
186 level2->sizekb = 6144; level2->assoc = 24; level2->line = 64;
187 break;
188 case 0x60:
189 level1->sizekb = 16; level1->assoc = 8; level1->line = 64;
190 break;
191 case 0x66:
192 level1->sizekb = 8; level1->assoc = 4; level1->line = 64;
193 break;
194 case 0x67:
195 level1->sizekb = 16; level1->assoc = 4; level1->line = 64;
196 break;
197 case 0x68:
198 level1->sizekb = 32; level1->assoc = 4; level1->line = 64;
199 break;
200 case 0x78:
201 level2->sizekb = 1024; level2->assoc = 4; level2->line = 64;
202 break;
203 case 0x79:
204 level2->sizekb = 128; level2->assoc = 8; level2->line = 64;
205 break;
206 case 0x7a:
207 level2->sizekb = 256; level2->assoc = 8; level2->line = 64;
208 break;
209 case 0x7b:
210 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
211 break;
212 case 0x7c:
213 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
214 break;
215 case 0x7d:
216 level2->sizekb = 2048; level2->assoc = 8; level2->line = 64;
217 break;
218 case 0x7f:
219 level2->sizekb = 512; level2->assoc = 2; level2->line = 64;
220 break;
221 case 0x80:
222 level2->sizekb = 512; level2->assoc = 8; level2->line = 64;
223 break;
224 case 0x82:
225 level2->sizekb = 256; level2->assoc = 8; level2->line = 32;
226 break;
227 case 0x83:
228 level2->sizekb = 512; level2->assoc = 8; level2->line = 32;
229 break;
230 case 0x84:
231 level2->sizekb = 1024; level2->assoc = 8; level2->line = 32;
232 break;
233 case 0x85:
234 level2->sizekb = 2048; level2->assoc = 8; level2->line = 32;
235 break;
236 case 0x86:
237 level2->sizekb = 512; level2->assoc = 4; level2->line = 64;
238 break;
239 case 0x87:
240 level2->sizekb = 1024; level2->assoc = 8; level2->line = 64;
242 default:
243 break;
247 /* Detect cache parameters using CPUID function 2. */
249 static void
250 detect_caches_cpuid2 (bool xeon_mp,
251 struct cache_desc *level1, struct cache_desc *level2)
253 unsigned regs[4];
254 int nreps, i;
256 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
258 nreps = regs[0] & 0x0f;
259 regs[0] &= ~0x0f;
261 while (--nreps >= 0)
263 for (i = 0; i < 4; i++)
264 if (regs[i] && !((regs[i] >> 31) & 1))
265 decode_caches_intel (regs[i], xeon_mp, level1, level2);
267 if (nreps)
268 __cpuid (2, regs[0], regs[1], regs[2], regs[3]);
272 /* Detect cache parameters using CPUID function 4. This
273 method doesn't require hardcoded tables. */
275 enum cache_type
277 CACHE_END = 0,
278 CACHE_DATA = 1,
279 CACHE_INST = 2,
280 CACHE_UNIFIED = 3
283 static void
284 detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
285 struct cache_desc *level3)
287 struct cache_desc *cache;
289 unsigned eax, ebx, ecx, edx;
290 int count;
292 for (count = 0;; count++)
294 __cpuid_count(4, count, eax, ebx, ecx, edx);
295 switch (eax & 0x1f)
297 case CACHE_END:
298 return;
299 case CACHE_DATA:
300 case CACHE_UNIFIED:
302 switch ((eax >> 5) & 0x07)
304 case 1:
305 cache = level1;
306 break;
307 case 2:
308 cache = level2;
309 break;
310 case 3:
311 cache = level3;
312 break;
313 default:
314 cache = NULL;
317 if (cache)
319 unsigned sets = ecx + 1;
320 unsigned part = ((ebx >> 12) & 0x03ff) + 1;
322 cache->assoc = ((ebx >> 22) & 0x03ff) + 1;
323 cache->line = (ebx & 0x0fff) + 1;
325 cache->sizekb = (cache->assoc * part
326 * cache->line * sets) / 1024;
329 default:
330 break;
335 /* Returns the description of caches for an Intel processor. */
337 static const char *
338 detect_caches_intel (bool xeon_mp, unsigned max_level,
339 unsigned max_ext_level, unsigned *l2sizekb)
341 struct cache_desc level1 = {0, 0, 0}, level2 = {0, 0, 0}, level3 = {0, 0, 0};
343 if (max_level >= 4)
344 detect_caches_cpuid4 (&level1, &level2, &level3);
345 else if (max_level >= 2)
346 detect_caches_cpuid2 (xeon_mp, &level1, &level2);
347 else
348 return "";
350 if (level1.sizekb == 0)
351 return "";
353 /* Let the L3 replace the L2. This assumes inclusive caches
354 and single threaded program for now. */
355 if (level3.sizekb)
356 level2 = level3;
358 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
359 method if other methods fail to provide L2 cache parameters. */
360 if (level2.sizekb == 0 && max_ext_level >= 0x80000006)
361 detect_l2_cache (&level2);
363 *l2sizekb = level2.sizekb;
365 return describe_cache (level1, level2);
368 /* This will be called by the spec parser in gcc.c when it sees
369 a %:local_cpu_detect(args) construct. Currently it will be called
370 with either "arch" or "tune" as argument depending on if -march=native
371 or -mtune=native is to be substituted.
373 It returns a string containing new command line parameters to be
374 put at the place of the above two options, depending on what CPU
375 this is executed. E.g. "-march=k8" on an AMD64 machine
376 for -march=native.
378 ARGC and ARGV are set depending on the actual arguments given
379 in the spec. */
381 const char *host_detect_local_cpu (int argc, const char **argv)
383 enum processor_type processor = PROCESSOR_I386;
384 const char *cpu = "i386";
386 const char *cache = "";
387 const char *options = "";
389 unsigned int eax, ebx, ecx, edx;
391 unsigned int max_level, ext_level;
393 unsigned int vendor;
394 unsigned int model, family;
396 unsigned int has_sse3, has_ssse3, has_cmpxchg16b;
397 unsigned int has_cmpxchg8b, has_cmov, has_mmx, has_sse, has_sse2;
399 /* Extended features */
400 unsigned int has_lahf_lm = 0, has_sse4a = 0;
401 unsigned int has_longmode = 0, has_3dnowp = 0, has_3dnow = 0;
402 unsigned int has_movbe = 0, has_sse4_1 = 0, has_sse4_2 = 0;
403 unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
404 unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
405 unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
406 unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
407 unsigned int has_hle = 0, has_rtm = 0, has_sgx = 0;
408 unsigned int has_rdrnd = 0, has_f16c = 0, has_fsgsbase = 0;
409 unsigned int has_rdseed = 0, has_prfchw = 0, has_adx = 0;
410 unsigned int has_osxsave = 0, has_fxsr = 0, has_xsave = 0, has_xsaveopt = 0;
411 unsigned int has_avx512er = 0, has_avx512pf = 0, has_avx512cd = 0;
412 unsigned int has_avx512f = 0, has_sha = 0, has_prefetchwt1 = 0;
413 unsigned int has_clflushopt = 0, has_xsavec = 0, has_xsaves = 0;
414 unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
415 unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
416 unsigned int has_mwaitx = 0, has_clzero = 0, has_pku = 0, has_rdpid = 0;
417 unsigned int has_avx5124fmaps = 0, has_avx5124vnniw = 0;
418 unsigned int has_gfni = 0;
419 unsigned int has_ibt = 0, has_shstk = 0;
421 bool arch;
423 unsigned int l2sizekb = 0;
425 if (argc < 1)
426 return NULL;
428 arch = !strcmp (argv[0], "arch");
430 if (!arch && strcmp (argv[0], "tune"))
431 return NULL;
433 max_level = __get_cpuid_max (0, &vendor);
434 if (max_level < 1)
435 goto done;
437 __cpuid (1, eax, ebx, ecx, edx);
439 model = (eax >> 4) & 0x0f;
440 family = (eax >> 8) & 0x0f;
441 if (vendor == signature_INTEL_ebx
442 || vendor == signature_AMD_ebx)
444 unsigned int extended_model, extended_family;
446 extended_model = (eax >> 12) & 0xf0;
447 extended_family = (eax >> 20) & 0xff;
448 if (family == 0x0f)
450 family += extended_family;
451 model += extended_model;
453 else if (family == 0x06)
454 model += extended_model;
457 has_sse3 = ecx & bit_SSE3;
458 has_ssse3 = ecx & bit_SSSE3;
459 has_sse4_1 = ecx & bit_SSE4_1;
460 has_sse4_2 = ecx & bit_SSE4_2;
461 has_avx = ecx & bit_AVX;
462 has_osxsave = ecx & bit_OSXSAVE;
463 has_cmpxchg16b = ecx & bit_CMPXCHG16B;
464 has_movbe = ecx & bit_MOVBE;
465 has_popcnt = ecx & bit_POPCNT;
466 has_aes = ecx & bit_AES;
467 has_pclmul = ecx & bit_PCLMUL;
468 has_fma = ecx & bit_FMA;
469 has_f16c = ecx & bit_F16C;
470 has_rdrnd = ecx & bit_RDRND;
471 has_xsave = ecx & bit_XSAVE;
473 has_cmpxchg8b = edx & bit_CMPXCHG8B;
474 has_cmov = edx & bit_CMOV;
475 has_mmx = edx & bit_MMX;
476 has_fxsr = edx & bit_FXSAVE;
477 has_sse = edx & bit_SSE;
478 has_sse2 = edx & bit_SSE2;
480 if (max_level >= 7)
482 __cpuid_count (7, 0, eax, ebx, ecx, edx);
484 has_bmi = ebx & bit_BMI;
485 has_sgx = ebx & bit_SGX;
486 has_hle = ebx & bit_HLE;
487 has_rtm = ebx & bit_RTM;
488 has_avx2 = ebx & bit_AVX2;
489 has_bmi2 = ebx & bit_BMI2;
490 has_fsgsbase = ebx & bit_FSGSBASE;
491 has_rdseed = ebx & bit_RDSEED;
492 has_adx = ebx & bit_ADX;
493 has_avx512f = ebx & bit_AVX512F;
494 has_avx512er = ebx & bit_AVX512ER;
495 has_avx512pf = ebx & bit_AVX512PF;
496 has_avx512cd = ebx & bit_AVX512CD;
497 has_sha = ebx & bit_SHA;
498 has_clflushopt = ebx & bit_CLFLUSHOPT;
499 has_clwb = ebx & bit_CLWB;
500 has_avx512dq = ebx & bit_AVX512DQ;
501 has_avx512bw = ebx & bit_AVX512BW;
502 has_avx512vl = ebx & bit_AVX512VL;
503 has_avx512ifma = ebx & bit_AVX512IFMA;
505 has_prefetchwt1 = ecx & bit_PREFETCHWT1;
506 has_avx512vbmi = ecx & bit_AVX512VBMI;
507 has_pku = ecx & bit_OSPKE;
508 has_rdpid = ecx & bit_RDPID;
509 has_gfni = ecx & bit_GFNI;
511 has_avx5124vnniw = edx & bit_AVX5124VNNIW;
512 has_avx5124fmaps = edx & bit_AVX5124FMAPS;
514 has_shstk = ecx & bit_SHSTK;
515 has_ibt = edx & bit_IBT;
518 if (max_level >= 13)
520 __cpuid_count (13, 1, eax, ebx, ecx, edx);
522 has_xsaveopt = eax & bit_XSAVEOPT;
523 has_xsavec = eax & bit_XSAVEC;
524 has_xsaves = eax & bit_XSAVES;
527 /* Check cpuid level of extended features. */
528 __cpuid (0x80000000, ext_level, ebx, ecx, edx);
530 if (ext_level >= 0x80000001)
532 __cpuid (0x80000001, eax, ebx, ecx, edx);
534 has_lahf_lm = ecx & bit_LAHF_LM;
535 has_sse4a = ecx & bit_SSE4a;
536 has_abm = ecx & bit_ABM;
537 has_lwp = ecx & bit_LWP;
538 has_fma4 = ecx & bit_FMA4;
539 has_xop = ecx & bit_XOP;
540 has_tbm = ecx & bit_TBM;
541 has_lzcnt = ecx & bit_LZCNT;
542 has_prfchw = ecx & bit_PRFCHW;
544 has_longmode = edx & bit_LM;
545 has_3dnowp = edx & bit_3DNOWP;
546 has_3dnow = edx & bit_3DNOW;
547 has_mwaitx = ecx & bit_MWAITX;
550 if (ext_level >= 0x80000008)
552 __cpuid (0x80000008, eax, ebx, ecx, edx);
553 has_clzero = ebx & bit_CLZERO;
556 /* Get XCR_XFEATURE_ENABLED_MASK register with xgetbv. */
557 #define XCR_XFEATURE_ENABLED_MASK 0x0
558 #define XSTATE_FP 0x1
559 #define XSTATE_SSE 0x2
560 #define XSTATE_YMM 0x4
561 #define XSTATE_OPMASK 0x20
562 #define XSTATE_ZMM 0x40
563 #define XSTATE_HI_ZMM 0x80
565 #define XCR_AVX_ENABLED_MASK \
566 (XSTATE_SSE | XSTATE_YMM)
567 #define XCR_AVX512F_ENABLED_MASK \
568 (XSTATE_SSE | XSTATE_YMM | XSTATE_OPMASK | XSTATE_ZMM | XSTATE_HI_ZMM)
570 if (has_osxsave)
571 asm (".byte 0x0f; .byte 0x01; .byte 0xd0"
572 : "=a" (eax), "=d" (edx)
573 : "c" (XCR_XFEATURE_ENABLED_MASK));
574 else
575 eax = 0;
577 /* Check if AVX registers are supported. */
578 if ((eax & XCR_AVX_ENABLED_MASK) != XCR_AVX_ENABLED_MASK)
580 has_avx = 0;
581 has_avx2 = 0;
582 has_fma = 0;
583 has_fma4 = 0;
584 has_f16c = 0;
585 has_xop = 0;
586 has_xsave = 0;
587 has_xsaveopt = 0;
588 has_xsaves = 0;
589 has_xsavec = 0;
592 /* Check if AVX512F registers are supported. */
593 if ((eax & XCR_AVX512F_ENABLED_MASK) != XCR_AVX512F_ENABLED_MASK)
595 has_avx512f = 0;
596 has_avx512er = 0;
597 has_avx512pf = 0;
598 has_avx512cd = 0;
599 has_avx512dq = 0;
600 has_avx512bw = 0;
601 has_avx512vl = 0;
604 if (!arch)
606 if (vendor == signature_AMD_ebx
607 || vendor == signature_CENTAUR_ebx
608 || vendor == signature_CYRIX_ebx
609 || vendor == signature_NSC_ebx)
610 cache = detect_caches_amd (ext_level);
611 else if (vendor == signature_INTEL_ebx)
613 bool xeon_mp = (family == 15 && model == 6);
614 cache = detect_caches_intel (xeon_mp, max_level,
615 ext_level, &l2sizekb);
619 if (vendor == signature_AMD_ebx)
621 unsigned int name;
623 /* Detect geode processor by its processor signature. */
624 if (ext_level >= 0x80000002)
625 __cpuid (0x80000002, name, ebx, ecx, edx);
626 else
627 name = 0;
629 if (name == signature_NSC_ebx)
630 processor = PROCESSOR_GEODE;
631 else if (has_movbe && family == 22)
632 processor = PROCESSOR_BTVER2;
633 else if (has_clzero)
634 processor = PROCESSOR_ZNVER1;
635 else if (has_avx2)
636 processor = PROCESSOR_BDVER4;
637 else if (has_xsaveopt)
638 processor = PROCESSOR_BDVER3;
639 else if (has_bmi)
640 processor = PROCESSOR_BDVER2;
641 else if (has_xop)
642 processor = PROCESSOR_BDVER1;
643 else if (has_sse4a && has_ssse3)
644 processor = PROCESSOR_BTVER1;
645 else if (has_sse4a)
646 processor = PROCESSOR_AMDFAM10;
647 else if (has_sse2 || has_longmode)
648 processor = PROCESSOR_K8;
649 else if (has_3dnowp && family == 6)
650 processor = PROCESSOR_ATHLON;
651 else if (has_mmx)
652 processor = PROCESSOR_K6;
653 else
654 processor = PROCESSOR_PENTIUM;
656 else if (vendor == signature_CENTAUR_ebx)
658 processor = PROCESSOR_GENERIC;
660 switch (family)
662 default:
663 /* We have no idea. */
664 break;
666 case 5:
667 if (has_3dnow || has_mmx)
668 processor = PROCESSOR_I486;
669 break;
671 case 6:
672 if (has_longmode)
673 processor = PROCESSOR_K8;
674 else if (model >= 9)
675 processor = PROCESSOR_PENTIUMPRO;
676 else if (model >= 6)
677 processor = PROCESSOR_I486;
680 else
682 switch (family)
684 case 4:
685 processor = PROCESSOR_I486;
686 break;
687 case 5:
688 processor = PROCESSOR_PENTIUM;
689 break;
690 case 6:
691 processor = PROCESSOR_PENTIUMPRO;
692 break;
693 case 15:
694 processor = PROCESSOR_PENTIUM4;
695 break;
696 default:
697 /* We have no idea. */
698 processor = PROCESSOR_GENERIC;
702 switch (processor)
704 case PROCESSOR_I386:
705 /* Default. */
706 break;
707 case PROCESSOR_I486:
708 if (arch && vendor == signature_CENTAUR_ebx)
710 if (model >= 6)
711 cpu = "c3";
712 else if (has_3dnow)
713 cpu = "winchip2";
714 else
715 /* Assume WinChip C6. */
716 cpu = "winchip-c6";
718 else
719 cpu = "i486";
720 break;
721 case PROCESSOR_PENTIUM:
722 if (arch && has_mmx)
723 cpu = "pentium-mmx";
724 else
725 cpu = "pentium";
726 break;
727 case PROCESSOR_PENTIUMPRO:
728 switch (model)
730 case 0x1c:
731 case 0x26:
732 /* Bonnell. */
733 cpu = "bonnell";
734 break;
735 case 0x37:
736 case 0x4a:
737 case 0x4d:
738 case 0x5a:
739 case 0x5d:
740 /* Silvermont. */
741 cpu = "silvermont";
742 break;
743 case 0x0f:
744 /* Merom. */
745 case 0x17:
746 case 0x1d:
747 /* Penryn. */
748 cpu = "core2";
749 break;
750 case 0x1a:
751 case 0x1e:
752 case 0x1f:
753 case 0x2e:
754 /* Nehalem. */
755 cpu = "nehalem";
756 break;
757 case 0x25:
758 case 0x2c:
759 case 0x2f:
760 /* Westmere. */
761 cpu = "westmere";
762 break;
763 case 0x2a:
764 case 0x2d:
765 /* Sandy Bridge. */
766 cpu = "sandybridge";
767 break;
768 case 0x3a:
769 case 0x3e:
770 /* Ivy Bridge. */
771 cpu = "ivybridge";
772 break;
773 case 0x3c:
774 case 0x3f:
775 case 0x45:
776 case 0x46:
777 /* Haswell. */
778 cpu = "haswell";
779 break;
780 case 0x3d:
781 case 0x47:
782 case 0x4f:
783 case 0x56:
784 /* Broadwell. */
785 cpu = "broadwell";
786 break;
787 case 0x4e:
788 case 0x5e:
789 /* Skylake. */
790 case 0x8e:
791 case 0x9e:
792 /* Kaby Lake. */
793 cpu = "skylake";
794 break;
795 case 0x57:
796 /* Knights Landing. */
797 cpu = "knl";
798 break;
799 case 0x85:
800 /* Knights Mill. */
801 cpu = "knm";
802 break;
803 default:
804 if (arch)
806 /* This is unknown family 0x6 CPU. */
807 /* Assume Knights Landing. */
808 if (has_avx512f)
809 cpu = "knl";
810 /* Assume Knights Mill */
811 else if (has_avx5124vnniw)
812 cpu = "knm";
813 /* Assume Skylake. */
814 else if (has_clflushopt)
815 cpu = "skylake";
816 /* Assume Broadwell. */
817 else if (has_adx)
818 cpu = "broadwell";
819 else if (has_avx2)
820 /* Assume Haswell. */
821 cpu = "haswell";
822 else if (has_avx)
823 /* Assume Sandy Bridge. */
824 cpu = "sandybridge";
825 else if (has_sse4_2)
827 if (has_movbe)
828 /* Assume Silvermont. */
829 cpu = "silvermont";
830 else
831 /* Assume Nehalem. */
832 cpu = "nehalem";
834 else if (has_ssse3)
836 if (has_movbe)
837 /* Assume Bonnell. */
838 cpu = "bonnell";
839 else
840 /* Assume Core 2. */
841 cpu = "core2";
843 else if (has_longmode)
844 /* Perhaps some emulator? Assume x86-64, otherwise gcc
845 -march=native would be unusable for 64-bit compilations,
846 as all the CPUs below are 32-bit only. */
847 cpu = "x86-64";
848 else if (has_sse3)
850 if (vendor == signature_CENTAUR_ebx)
851 /* C7 / Eden "Esther" */
852 cpu = "c7";
853 else
854 /* It is Core Duo. */
855 cpu = "pentium-m";
857 else if (has_sse2)
858 /* It is Pentium M. */
859 cpu = "pentium-m";
860 else if (has_sse)
862 if (vendor == signature_CENTAUR_ebx)
864 if (model >= 9)
865 /* Eden "Nehemiah" */
866 cpu = "nehemiah";
867 else
868 cpu = "c3-2";
870 else
871 /* It is Pentium III. */
872 cpu = "pentium3";
874 else if (has_mmx)
875 /* It is Pentium II. */
876 cpu = "pentium2";
877 else
878 /* Default to Pentium Pro. */
879 cpu = "pentiumpro";
881 else
882 /* For -mtune, we default to -mtune=generic. */
883 cpu = "generic";
884 break;
886 break;
887 case PROCESSOR_PENTIUM4:
888 if (has_sse3)
890 if (has_longmode)
891 cpu = "nocona";
892 else
893 cpu = "prescott";
895 else
896 cpu = "pentium4";
897 break;
898 case PROCESSOR_GEODE:
899 cpu = "geode";
900 break;
901 case PROCESSOR_K6:
902 if (arch && has_3dnow)
903 cpu = "k6-3";
904 else
905 cpu = "k6";
906 break;
907 case PROCESSOR_ATHLON:
908 if (arch && has_sse)
909 cpu = "athlon-4";
910 else
911 cpu = "athlon";
912 break;
913 case PROCESSOR_K8:
914 if (arch)
916 if (vendor == signature_CENTAUR_ebx)
918 if (has_sse4_1)
919 /* Nano 3000 | Nano dual / quad core | Eden X4 */
920 cpu = "nano-3000";
921 else if (has_ssse3)
922 /* Nano 1000 | Nano 2000 */
923 cpu = "nano";
924 else if (has_sse3)
925 /* Eden X2 */
926 cpu = "eden-x2";
927 else
928 /* Default to k8 */
929 cpu = "k8";
931 else if (has_sse3)
932 cpu = "k8-sse3";
933 else
934 cpu = "k8";
936 else
937 /* For -mtune, we default to -mtune=k8 */
938 cpu = "k8";
939 break;
940 case PROCESSOR_AMDFAM10:
941 cpu = "amdfam10";
942 break;
943 case PROCESSOR_BDVER1:
944 cpu = "bdver1";
945 break;
946 case PROCESSOR_BDVER2:
947 cpu = "bdver2";
948 break;
949 case PROCESSOR_BDVER3:
950 cpu = "bdver3";
951 break;
952 case PROCESSOR_BDVER4:
953 cpu = "bdver4";
954 break;
955 case PROCESSOR_ZNVER1:
956 cpu = "znver1";
957 break;
958 case PROCESSOR_BTVER1:
959 cpu = "btver1";
960 break;
961 case PROCESSOR_BTVER2:
962 cpu = "btver2";
963 break;
965 default:
966 /* Use something reasonable. */
967 if (arch)
969 if (has_ssse3)
970 cpu = "core2";
971 else if (has_sse3)
973 if (has_longmode)
974 cpu = "nocona";
975 else
976 cpu = "prescott";
978 else if (has_longmode)
979 /* Perhaps some emulator? Assume x86-64, otherwise gcc
980 -march=native would be unusable for 64-bit compilations,
981 as all the CPUs below are 32-bit only. */
982 cpu = "x86-64";
983 else if (has_sse2)
984 cpu = "pentium4";
985 else if (has_cmov)
986 cpu = "pentiumpro";
987 else if (has_mmx)
988 cpu = "pentium-mmx";
989 else if (has_cmpxchg8b)
990 cpu = "pentium";
992 else
993 cpu = "generic";
996 if (arch)
998 const char *mmx = has_mmx ? " -mmmx" : " -mno-mmx";
999 const char *mmx3dnow = has_3dnow ? " -m3dnow" : " -mno-3dnow";
1000 const char *sse = has_sse ? " -msse" : " -mno-sse";
1001 const char *sse2 = has_sse2 ? " -msse2" : " -mno-sse2";
1002 const char *sse3 = has_sse3 ? " -msse3" : " -mno-sse3";
1003 const char *ssse3 = has_ssse3 ? " -mssse3" : " -mno-ssse3";
1004 const char *sse4a = has_sse4a ? " -msse4a" : " -mno-sse4a";
1005 const char *cx16 = has_cmpxchg16b ? " -mcx16" : " -mno-cx16";
1006 const char *sahf = has_lahf_lm ? " -msahf" : " -mno-sahf";
1007 const char *movbe = has_movbe ? " -mmovbe" : " -mno-movbe";
1008 const char *aes = has_aes ? " -maes" : " -mno-aes";
1009 const char *sha = has_sha ? " -msha" : " -mno-sha";
1010 const char *pclmul = has_pclmul ? " -mpclmul" : " -mno-pclmul";
1011 const char *popcnt = has_popcnt ? " -mpopcnt" : " -mno-popcnt";
1012 const char *abm = has_abm ? " -mabm" : " -mno-abm";
1013 const char *lwp = has_lwp ? " -mlwp" : " -mno-lwp";
1014 const char *fma = has_fma ? " -mfma" : " -mno-fma";
1015 const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
1016 const char *xop = has_xop ? " -mxop" : " -mno-xop";
1017 const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
1018 const char *sgx = has_sgx ? " -msgx" : " -mno-sgx";
1019 const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
1020 const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
1021 const char *avx = has_avx ? " -mavx" : " -mno-avx";
1022 const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
1023 const char *sse4_2 = has_sse4_2 ? " -msse4.2" : " -mno-sse4.2";
1024 const char *sse4_1 = has_sse4_1 ? " -msse4.1" : " -mno-sse4.1";
1025 const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
1026 const char *hle = has_hle ? " -mhle" : " -mno-hle";
1027 const char *rtm = has_rtm ? " -mrtm" : " -mno-rtm";
1028 const char *rdrnd = has_rdrnd ? " -mrdrnd" : " -mno-rdrnd";
1029 const char *f16c = has_f16c ? " -mf16c" : " -mno-f16c";
1030 const char *fsgsbase = has_fsgsbase ? " -mfsgsbase" : " -mno-fsgsbase";
1031 const char *rdseed = has_rdseed ? " -mrdseed" : " -mno-rdseed";
1032 const char *prfchw = has_prfchw ? " -mprfchw" : " -mno-prfchw";
1033 const char *adx = has_adx ? " -madx" : " -mno-adx";
1034 const char *fxsr = has_fxsr ? " -mfxsr" : " -mno-fxsr";
1035 const char *xsave = has_xsave ? " -mxsave" : " -mno-xsave";
1036 const char *xsaveopt = has_xsaveopt ? " -mxsaveopt" : " -mno-xsaveopt";
1037 const char *avx512f = has_avx512f ? " -mavx512f" : " -mno-avx512f";
1038 const char *avx512er = has_avx512er ? " -mavx512er" : " -mno-avx512er";
1039 const char *avx512cd = has_avx512cd ? " -mavx512cd" : " -mno-avx512cd";
1040 const char *avx512pf = has_avx512pf ? " -mavx512pf" : " -mno-avx512pf";
1041 const char *prefetchwt1 = has_prefetchwt1 ? " -mprefetchwt1" : " -mno-prefetchwt1";
1042 const char *clflushopt = has_clflushopt ? " -mclflushopt" : " -mno-clflushopt";
1043 const char *xsavec = has_xsavec ? " -mxsavec" : " -mno-xsavec";
1044 const char *xsaves = has_xsaves ? " -mxsaves" : " -mno-xsaves";
1045 const char *avx512dq = has_avx512dq ? " -mavx512dq" : " -mno-avx512dq";
1046 const char *avx512bw = has_avx512bw ? " -mavx512bw" : " -mno-avx512bw";
1047 const char *avx512vl = has_avx512vl ? " -mavx512vl" : " -mno-avx512vl";
1048 const char *avx512ifma = has_avx512ifma ? " -mavx512ifma" : " -mno-avx512ifma";
1049 const char *avx512vbmi = has_avx512vbmi ? " -mavx512vbmi" : " -mno-avx512vbmi";
1050 const char *avx5124vnniw = has_avx5124vnniw ? " -mavx5124vnniw" : " -mno-avx5124vnniw";
1051 const char *avx5124fmaps = has_avx5124fmaps ? " -mavx5124fmaps" : " -mno-avx5124fmaps";
1052 const char *clwb = has_clwb ? " -mclwb" : " -mno-clwb";
1053 const char *mwaitx = has_mwaitx ? " -mmwaitx" : " -mno-mwaitx";
1054 const char *clzero = has_clzero ? " -mclzero" : " -mno-clzero";
1055 const char *pku = has_pku ? " -mpku" : " -mno-pku";
1056 const char *rdpid = has_rdpid ? " -mrdpid" : " -mno-rdpid";
1057 const char *gfni = has_gfni ? " -mgfni" : " -mno-gfni";
1058 const char *ibt = has_ibt ? " -mibt" : " -mno-ibt";
1059 const char *shstk = has_shstk ? " -mshstk" : " -mno-shstk";
1060 options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
1061 sse4a, cx16, sahf, movbe, aes, sha, pclmul,
1062 popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
1063 tbm, avx, avx2, sse4_2, sse4_1, lzcnt, rtm,
1064 hle, rdrnd, f16c, fsgsbase, rdseed, prfchw, adx,
1065 fxsr, xsave, xsaveopt, avx512f, avx512er,
1066 avx512cd, avx512pf, prefetchwt1, clflushopt,
1067 xsavec, xsaves, avx512dq, avx512bw, avx512vl,
1068 avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
1069 clwb, mwaitx, clzero, pku, rdpid, gfni, ibt, shstk, NULL);
1072 done:
1073 return concat (cache, "-m", argv[0], "=", cpu, options, NULL);
1075 #else
1077 /* If we are compiling with GCC where %EBX register is fixed, then the
1078 driver will just ignore -march and -mtune "native" target and will leave
1079 to the newly built compiler to generate code for its default target. */
1081 const char *host_detect_local_cpu (int, const char **)
1083 return NULL;
1085 #endif /* __GNUC__ */