1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
26 const char *host_detect_local_cpu (int argc
, const char **argv
);
38 /* Returns command line parameters that describe size and
39 cache line size of the processor caches. */
42 describe_cache (struct cache_desc level1
, struct cache_desc level2
)
44 char size
[100], line
[100], size2
[100];
46 /* At the moment, gcc does not use the information
47 about the associativity of the cache. */
49 sprintf (size
, "--param l1-cache-size=%u", level1
.sizekb
);
50 sprintf (line
, "--param l1-cache-line-size=%u", level1
.line
);
52 sprintf (size2
, "--param l2-cache-size=%u", level2
.sizekb
);
54 return concat (size
, " ", line
, " ", size2
, " ", NULL
);
57 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
60 detect_l2_cache (struct cache_desc
*level2
)
62 unsigned eax
, ebx
, ecx
, edx
;
65 __cpuid (0x80000006, eax
, ebx
, ecx
, edx
);
67 level2
->sizekb
= (ecx
>> 16) & 0xffff;
68 level2
->line
= ecx
& 0xff;
70 assoc
= (ecx
>> 12) & 0xf;
75 else if (assoc
>= 0xa && assoc
<= 0xc)
76 assoc
= 32 + (assoc
- 0xa) * 16;
77 else if (assoc
>= 0xd && assoc
<= 0xe)
78 assoc
= 96 + (assoc
- 0xd) * 32;
80 level2
->assoc
= assoc
;
83 /* Returns the description of caches for an AMD processor. */
86 detect_caches_amd (unsigned max_ext_level
)
88 unsigned eax
, ebx
, ecx
, edx
;
90 struct cache_desc level1
, level2
= {0, 0, 0};
92 if (max_ext_level
< 0x80000005)
95 __cpuid (0x80000005, eax
, ebx
, ecx
, edx
);
97 level1
.sizekb
= (ecx
>> 24) & 0xff;
98 level1
.assoc
= (ecx
>> 16) & 0xff;
99 level1
.line
= ecx
& 0xff;
101 if (max_ext_level
>= 0x80000006)
102 detect_l2_cache (&level2
);
104 return describe_cache (level1
, level2
);
107 /* Decodes the size, the associativity and the cache line size of
108 L1/L2 caches of an Intel processor. Values are based on
109 "Intel Processor Identification and the CPUID Instruction"
110 [Application Note 485], revision -032, December 2007. */
113 decode_caches_intel (unsigned reg
, bool xeon_mp
,
114 struct cache_desc
*level1
, struct cache_desc
*level2
)
118 for (i
= 24; i
>= 0; i
-= 8)
119 switch ((reg
>> i
) & 0xff)
122 level1
->sizekb
= 8; level1
->assoc
= 2; level1
->line
= 32;
125 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 32;
128 level1
->sizekb
= 32; level1
->assoc
= 8; level1
->line
= 64;
131 level2
->sizekb
= 128; level2
->assoc
= 4; level2
->line
= 64;
134 level2
->sizekb
= 192; level2
->assoc
= 6; level2
->line
= 64;
137 level2
->sizekb
= 128; level2
->assoc
= 2; level2
->line
= 64;
140 level2
->sizekb
= 256; level2
->assoc
= 4; level2
->line
= 64;
143 level2
->sizekb
= 384; level2
->assoc
= 6; level2
->line
= 64;
146 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 64;
149 level2
->sizekb
= 128; level2
->assoc
= 4; level2
->line
= 32;
152 level2
->sizekb
= 256; level2
->assoc
= 4; level2
->line
= 32;
155 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 32;
158 level2
->sizekb
= 1024; level2
->assoc
= 4; level2
->line
= 32;
161 level2
->sizekb
= 2048; level2
->assoc
= 4; level2
->line
= 32;
166 level2
->sizekb
= 4096; level2
->assoc
= 16; level2
->line
= 64;
169 level2
->sizekb
= 6144; level2
->assoc
= 24; level2
->line
= 64;
172 level1
->sizekb
= 16; level1
->assoc
= 8; level1
->line
= 64;
175 level1
->sizekb
= 8; level1
->assoc
= 4; level1
->line
= 64;
178 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 64;
181 level1
->sizekb
= 32; level1
->assoc
= 4; level1
->line
= 64;
184 level2
->sizekb
= 1024; level2
->assoc
= 4; level2
->line
= 64;
187 level2
->sizekb
= 128; level2
->assoc
= 8; level2
->line
= 64;
190 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 64;
193 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 64;
196 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 64;
199 level2
->sizekb
= 2048; level2
->assoc
= 8; level2
->line
= 64;
202 level2
->sizekb
= 512; level2
->assoc
= 2; level2
->line
= 64;
205 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 32;
208 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 32;
211 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 32;
214 level2
->sizekb
= 2048; level2
->assoc
= 8; level2
->line
= 32;
217 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 64;
220 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 64;
227 /* Detect cache parameters using CPUID function 2. */
230 detect_caches_cpuid2 (bool xeon_mp
,
231 struct cache_desc
*level1
, struct cache_desc
*level2
)
236 __cpuid (2, regs
[0], regs
[1], regs
[2], regs
[3]);
238 nreps
= regs
[0] & 0x0f;
243 for (i
= 0; i
< 4; i
++)
244 if (regs
[i
] && !((regs
[i
] >> 31) & 1))
245 decode_caches_intel (regs
[i
], xeon_mp
, level1
, level2
);
248 __cpuid (2, regs
[0], regs
[1], regs
[2], regs
[3]);
252 /* Detect cache parameters using CPUID function 4. This
253 method doesn't require hardcoded tables. */
264 detect_caches_cpuid4 (struct cache_desc
*level1
, struct cache_desc
*level2
)
266 struct cache_desc
*cache
;
268 unsigned eax
, ebx
, ecx
, edx
;
271 for (count
= 0;; count
++)
273 __cpuid_count(4, count
, eax
, ebx
, ecx
, edx
);
281 switch ((eax
>> 5) & 0x07)
295 unsigned sets
= ecx
+ 1;
296 unsigned part
= ((ebx
>> 12) & 0x03ff) + 1;
298 cache
->assoc
= ((ebx
>> 22) & 0x03ff) + 1;
299 cache
->line
= (ebx
& 0x0fff) + 1;
301 cache
->sizekb
= (cache
->assoc
* part
302 * cache
->line
* sets
) / 1024;
311 /* Returns the description of caches for an Intel processor. */
314 detect_caches_intel (bool xeon_mp
, unsigned max_level
, unsigned max_ext_level
)
316 struct cache_desc level1
= {0, 0, 0}, level2
= {0, 0, 0};
319 detect_caches_cpuid4 (&level1
, &level2
);
320 else if (max_level
>= 2)
321 detect_caches_cpuid2 (xeon_mp
, &level1
, &level2
);
325 if (level1
.sizekb
== 0)
328 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
329 method if other methods fail to provide L2 cache parameters. */
330 if (level2
.sizekb
== 0 && max_ext_level
>= 0x80000006)
331 detect_l2_cache (&level2
);
333 return describe_cache (level1
, level2
);
336 enum vendor_signatures
338 SIG_INTEL
= 0x756e6547 /* Genu */,
339 SIG_AMD
= 0x68747541 /* Auth */
342 enum processor_signatures
344 SIG_GEODE
= 0x646f6547 /* Geod */
347 /* This will be called by the spec parser in gcc.c when it sees
348 a %:local_cpu_detect(args) construct. Currently it will be called
349 with either "arch" or "tune" as argument depending on if -march=native
350 or -mtune=native is to be substituted.
352 It returns a string containing new command line parameters to be
353 put at the place of the above two options, depending on what CPU
354 this is executed. E.g. "-march=k8" on an AMD64 machine
357 ARGC and ARGV are set depending on the actual arguments given
360 const char *host_detect_local_cpu (int argc
, const char **argv
)
362 enum processor_type processor
= PROCESSOR_I386
;
363 const char *cpu
= "i386";
365 const char *cache
= "";
366 const char *options
= "";
368 unsigned int eax
, ebx
, ecx
, edx
;
370 unsigned int max_level
, ext_level
;
373 unsigned int model
, family
;
375 unsigned int has_sse3
, has_ssse3
, has_cmpxchg16b
;
376 unsigned int has_cmpxchg8b
, has_cmov
, has_mmx
, has_sse
, has_sse2
;
378 /* Extended features */
379 unsigned int has_lahf_lm
= 0, has_sse4a
= 0;
380 unsigned int has_longmode
= 0, has_3dnowp
= 0, has_3dnow
= 0;
381 unsigned int has_sse4_1
= 0, has_sse4_2
= 0;
382 unsigned int has_popcnt
= 0, has_aes
= 0, has_avx
= 0;
383 unsigned int has_pclmul
= 0;
390 arch
= !strcmp (argv
[0], "arch");
392 if (!arch
&& strcmp (argv
[0], "tune"))
395 max_level
= __get_cpuid_max (0, &vendor
);
399 __cpuid (1, eax
, ebx
, ecx
, edx
);
401 /* We don't care for extended family. */
402 model
= (eax
>> 4) & 0x0f;
403 family
= (eax
>> 8) & 0x0f;
405 has_sse3
= ecx
& bit_SSE3
;
406 has_ssse3
= ecx
& bit_SSSE3
;
407 has_sse4_1
= ecx
& bit_SSE4_1
;
408 has_sse4_2
= ecx
& bit_SSE4_2
;
409 has_avx
= ecx
& bit_AVX
;
410 has_cmpxchg16b
= ecx
& bit_CMPXCHG16B
;
411 has_popcnt
= ecx
& bit_POPCNT
;
412 has_aes
= ecx
& bit_AES
;
413 has_pclmul
= ecx
& bit_PCLMUL
;
415 has_cmpxchg8b
= edx
& bit_CMPXCHG8B
;
416 has_cmov
= edx
& bit_CMOV
;
417 has_mmx
= edx
& bit_MMX
;
418 has_sse
= edx
& bit_SSE
;
419 has_sse2
= edx
& bit_SSE2
;
421 /* Check cpuid level of extended features. */
422 __cpuid (0x80000000, ext_level
, ebx
, ecx
, edx
);
424 if (ext_level
> 0x80000000)
426 __cpuid (0x80000001, eax
, ebx
, ecx
, edx
);
428 has_lahf_lm
= ecx
& bit_LAHF_LM
;
429 has_sse4a
= ecx
& bit_SSE4a
;
431 has_longmode
= edx
& bit_LM
;
432 has_3dnowp
= edx
& bit_3DNOWP
;
433 has_3dnow
= edx
& bit_3DNOW
;
438 if (vendor
== SIG_AMD
)
439 cache
= detect_caches_amd (ext_level
);
440 else if (vendor
== SIG_INTEL
)
442 bool xeon_mp
= (family
== 15 && model
== 6);
443 cache
= detect_caches_intel (xeon_mp
, max_level
, ext_level
);
447 if (vendor
== SIG_AMD
)
451 /* Detect geode processor by its processor signature. */
452 if (ext_level
> 0x80000001)
453 __cpuid (0x80000002, name
, ebx
, ecx
, edx
);
457 if (name
== SIG_GEODE
)
458 processor
= PROCESSOR_GEODE
;
460 processor
= PROCESSOR_AMDFAM10
;
461 else if (has_sse2
|| has_longmode
)
462 processor
= PROCESSOR_K8
;
464 processor
= PROCESSOR_ATHLON
;
466 processor
= PROCESSOR_K6
;
468 processor
= PROCESSOR_PENTIUM
;
475 processor
= PROCESSOR_I486
;
478 processor
= PROCESSOR_PENTIUM
;
481 processor
= PROCESSOR_PENTIUMPRO
;
484 processor
= PROCESSOR_PENTIUM4
;
487 /* We have no idea. */
488 processor
= PROCESSOR_GENERIC32
;
500 case PROCESSOR_PENTIUM
:
506 case PROCESSOR_PENTIUMPRO
:
508 /* It is Core 2 Duo. */
513 /* It is Core Duo. */
516 /* It is Pentium M. */
519 /* It is Pentium III. */
522 /* It is Pentium II. */
525 /* Default to Pentium Pro. */
529 /* For -mtune, we default to -mtune=generic. */
532 case PROCESSOR_PENTIUM4
:
543 case PROCESSOR_GEODE
:
547 if (arch
&& has_3dnow
)
552 case PROCESSOR_ATHLON
:
559 if (arch
&& has_sse3
)
564 case PROCESSOR_AMDFAM10
:
569 /* Use something reasonable. */
587 else if (has_cmpxchg8b
)
597 options
= concat (options
, "-mcx16 ", NULL
);
599 options
= concat (options
, "-msahf ", NULL
);
601 options
= concat (options
, "-maes ", NULL
);
603 options
= concat (options
, "-mpclmul ", NULL
);
605 options
= concat (options
, "-mpopcnt ", NULL
);
607 options
= concat (options
, "-mavx ", NULL
);
609 options
= concat (options
, "-msse4.2 ", NULL
);
611 options
= concat (options
, "-msse4.1 ", NULL
);
615 return concat (cache
, "-m", argv
[0], "=", cpu
, " ", options
, NULL
);
619 /* If we aren't compiling with GCC we just provide a minimal
622 const char *host_detect_local_cpu (int argc
, const char **argv
)
630 arch
= !strcmp (argv
[0], "arch");
632 if (!arch
&& strcmp (argv
[0], "tune"))
637 /* FIXME: i386 is wrong for 64bit compiler. How can we tell if
638 we are generating 64bit or 32bit code? */
644 return concat ("-m", argv
[0], "=", cpu
, NULL
);
646 #endif /* __GNUC__ */