1 /* Subroutines for the gcc driver.
2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 GCC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
22 #include "coretypes.h"
26 const char *host_detect_local_cpu (int argc
, const char **argv
);
38 /* Returns command line parameters that describe size and
39 cache line size of the processor caches. */
42 describe_cache (struct cache_desc level1
, struct cache_desc level2
)
44 char size
[100], line
[100], size2
[100];
46 /* At the moment, gcc does not use the information
47 about the associativity of the cache. */
49 snprintf (size
, sizeof (size
),
50 "--param l1-cache-size=%u ", level1
.sizekb
);
51 snprintf (line
, sizeof (line
),
52 "--param l1-cache-line-size=%u ", level1
.line
);
54 snprintf (size2
, sizeof (size2
),
55 "--param l2-cache-size=%u ", level2
.sizekb
);
57 return concat (size
, line
, size2
, NULL
);
60 /* Detect L2 cache parameters using CPUID extended function 0x80000006. */
63 detect_l2_cache (struct cache_desc
*level2
)
65 unsigned eax
, ebx
, ecx
, edx
;
68 __cpuid (0x80000006, eax
, ebx
, ecx
, edx
);
70 level2
->sizekb
= (ecx
>> 16) & 0xffff;
71 level2
->line
= ecx
& 0xff;
73 assoc
= (ecx
>> 12) & 0xf;
78 else if (assoc
>= 0xa && assoc
<= 0xc)
79 assoc
= 32 + (assoc
- 0xa) * 16;
80 else if (assoc
>= 0xd && assoc
<= 0xe)
81 assoc
= 96 + (assoc
- 0xd) * 32;
83 level2
->assoc
= assoc
;
86 /* Returns the description of caches for an AMD processor. */
89 detect_caches_amd (unsigned max_ext_level
)
91 unsigned eax
, ebx
, ecx
, edx
;
93 struct cache_desc level1
, level2
= {0, 0, 0};
95 if (max_ext_level
< 0x80000005)
98 __cpuid (0x80000005, eax
, ebx
, ecx
, edx
);
100 level1
.sizekb
= (ecx
>> 24) & 0xff;
101 level1
.assoc
= (ecx
>> 16) & 0xff;
102 level1
.line
= ecx
& 0xff;
104 if (max_ext_level
>= 0x80000006)
105 detect_l2_cache (&level2
);
107 return describe_cache (level1
, level2
);
110 /* Decodes the size, the associativity and the cache line size of
111 L1/L2 caches of an Intel processor. Values are based on
112 "Intel Processor Identification and the CPUID Instruction"
113 [Application Note 485], revision -032, December 2007. */
116 decode_caches_intel (unsigned reg
, bool xeon_mp
,
117 struct cache_desc
*level1
, struct cache_desc
*level2
)
121 for (i
= 24; i
>= 0; i
-= 8)
122 switch ((reg
>> i
) & 0xff)
125 level1
->sizekb
= 8; level1
->assoc
= 2; level1
->line
= 32;
128 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 32;
131 level1
->sizekb
= 32; level1
->assoc
= 8; level1
->line
= 64;
134 level2
->sizekb
= 128; level2
->assoc
= 4; level2
->line
= 64;
137 level2
->sizekb
= 192; level2
->assoc
= 6; level2
->line
= 64;
140 level2
->sizekb
= 128; level2
->assoc
= 2; level2
->line
= 64;
143 level2
->sizekb
= 256; level2
->assoc
= 4; level2
->line
= 64;
146 level2
->sizekb
= 384; level2
->assoc
= 6; level2
->line
= 64;
149 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 64;
152 level2
->sizekb
= 128; level2
->assoc
= 4; level2
->line
= 32;
155 level2
->sizekb
= 256; level2
->assoc
= 4; level2
->line
= 32;
158 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 32;
161 level2
->sizekb
= 1024; level2
->assoc
= 4; level2
->line
= 32;
164 level2
->sizekb
= 2048; level2
->assoc
= 4; level2
->line
= 32;
169 level2
->sizekb
= 4096; level2
->assoc
= 16; level2
->line
= 64;
172 level2
->sizekb
= 6144; level2
->assoc
= 24; level2
->line
= 64;
175 level1
->sizekb
= 16; level1
->assoc
= 8; level1
->line
= 64;
178 level1
->sizekb
= 8; level1
->assoc
= 4; level1
->line
= 64;
181 level1
->sizekb
= 16; level1
->assoc
= 4; level1
->line
= 64;
184 level1
->sizekb
= 32; level1
->assoc
= 4; level1
->line
= 64;
187 level2
->sizekb
= 1024; level2
->assoc
= 4; level2
->line
= 64;
190 level2
->sizekb
= 128; level2
->assoc
= 8; level2
->line
= 64;
193 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 64;
196 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 64;
199 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 64;
202 level2
->sizekb
= 2048; level2
->assoc
= 8; level2
->line
= 64;
205 level2
->sizekb
= 512; level2
->assoc
= 2; level2
->line
= 64;
208 level2
->sizekb
= 256; level2
->assoc
= 8; level2
->line
= 32;
211 level2
->sizekb
= 512; level2
->assoc
= 8; level2
->line
= 32;
214 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 32;
217 level2
->sizekb
= 2048; level2
->assoc
= 8; level2
->line
= 32;
220 level2
->sizekb
= 512; level2
->assoc
= 4; level2
->line
= 64;
223 level2
->sizekb
= 1024; level2
->assoc
= 8; level2
->line
= 64;
230 /* Detect cache parameters using CPUID function 2. */
233 detect_caches_cpuid2 (bool xeon_mp
,
234 struct cache_desc
*level1
, struct cache_desc
*level2
)
239 __cpuid (2, regs
[0], regs
[1], regs
[2], regs
[3]);
241 nreps
= regs
[0] & 0x0f;
246 for (i
= 0; i
< 4; i
++)
247 if (regs
[i
] && !((regs
[i
] >> 31) & 1))
248 decode_caches_intel (regs
[i
], xeon_mp
, level1
, level2
);
251 __cpuid (2, regs
[0], regs
[1], regs
[2], regs
[3]);
255 /* Detect cache parameters using CPUID function 4. This
256 method doesn't require hardcoded tables. */
267 detect_caches_cpuid4 (struct cache_desc
*level1
, struct cache_desc
*level2
,
268 struct cache_desc
*level3
)
270 struct cache_desc
*cache
;
272 unsigned eax
, ebx
, ecx
, edx
;
275 for (count
= 0;; count
++)
277 __cpuid_count(4, count
, eax
, ebx
, ecx
, edx
);
285 switch ((eax
>> 5) & 0x07)
302 unsigned sets
= ecx
+ 1;
303 unsigned part
= ((ebx
>> 12) & 0x03ff) + 1;
305 cache
->assoc
= ((ebx
>> 22) & 0x03ff) + 1;
306 cache
->line
= (ebx
& 0x0fff) + 1;
308 cache
->sizekb
= (cache
->assoc
* part
309 * cache
->line
* sets
) / 1024;
318 /* Returns the description of caches for an Intel processor. */
321 detect_caches_intel (bool xeon_mp
, unsigned max_level
,
322 unsigned max_ext_level
, unsigned *l2sizekb
)
324 struct cache_desc level1
= {0, 0, 0}, level2
= {0, 0, 0}, level3
= {0, 0, 0};
327 detect_caches_cpuid4 (&level1
, &level2
, &level3
);
328 else if (max_level
>= 2)
329 detect_caches_cpuid2 (xeon_mp
, &level1
, &level2
);
333 if (level1
.sizekb
== 0)
336 /* Let the L3 replace the L2. This assumes inclusive caches
337 and single threaded program for now. */
341 /* Intel CPUs are equipped with AMD style L2 cache info. Try this
342 method if other methods fail to provide L2 cache parameters. */
343 if (level2
.sizekb
== 0 && max_ext_level
>= 0x80000006)
344 detect_l2_cache (&level2
);
346 *l2sizekb
= level2
.sizekb
;
348 return describe_cache (level1
, level2
);
351 enum vendor_signatures
353 SIG_INTEL
= 0x756e6547 /* Genu */,
354 SIG_AMD
= 0x68747541 /* Auth */
357 enum processor_signatures
359 SIG_GEODE
= 0x646f6547 /* Geod */
362 /* This will be called by the spec parser in gcc.c when it sees
363 a %:local_cpu_detect(args) construct. Currently it will be called
364 with either "arch" or "tune" as argument depending on if -march=native
365 or -mtune=native is to be substituted.
367 It returns a string containing new command line parameters to be
368 put at the place of the above two options, depending on what CPU
369 this is executed. E.g. "-march=k8" on an AMD64 machine
372 ARGC and ARGV are set depending on the actual arguments given
375 const char *host_detect_local_cpu (int argc
, const char **argv
)
377 enum processor_type processor
= PROCESSOR_I386
;
378 const char *cpu
= "i386";
380 const char *cache
= "";
381 const char *options
= "";
383 unsigned int eax
, ebx
, ecx
, edx
;
385 unsigned int max_level
, ext_level
;
388 unsigned int model
, family
;
390 unsigned int has_sse3
, has_ssse3
, has_cmpxchg16b
;
391 unsigned int has_cmpxchg8b
, has_cmov
, has_mmx
, has_sse
, has_sse2
;
393 /* Extended features */
394 unsigned int has_lahf_lm
= 0, has_sse4a
= 0;
395 unsigned int has_longmode
= 0, has_3dnowp
= 0, has_3dnow
= 0;
396 unsigned int has_movbe
= 0, has_sse4_1
= 0, has_sse4_2
= 0;
397 unsigned int has_popcnt
= 0, has_aes
= 0, has_avx
= 0;
398 unsigned int has_pclmul
= 0, has_abm
= 0, has_lwp
= 0;
402 unsigned int l2sizekb
= 0;
407 arch
= !strcmp (argv
[0], "arch");
409 if (!arch
&& strcmp (argv
[0], "tune"))
412 max_level
= __get_cpuid_max (0, &vendor
);
416 __cpuid (1, eax
, ebx
, ecx
, edx
);
418 model
= (eax
>> 4) & 0x0f;
419 family
= (eax
>> 8) & 0x0f;
420 if (vendor
== SIG_INTEL
)
422 unsigned int extended_model
, extended_family
;
424 extended_model
= (eax
>> 12) & 0xf0;
425 extended_family
= (eax
>> 20) & 0xff;
428 family
+= extended_family
;
429 model
+= extended_model
;
431 else if (family
== 0x06)
432 model
+= extended_model
;
435 has_sse3
= ecx
& bit_SSE3
;
436 has_ssse3
= ecx
& bit_SSSE3
;
437 has_sse4_1
= ecx
& bit_SSE4_1
;
438 has_sse4_2
= ecx
& bit_SSE4_2
;
439 has_avx
= ecx
& bit_AVX
;
440 has_cmpxchg16b
= ecx
& bit_CMPXCHG16B
;
441 has_movbe
= ecx
& bit_MOVBE
;
442 has_popcnt
= ecx
& bit_POPCNT
;
443 has_aes
= ecx
& bit_AES
;
444 has_pclmul
= ecx
& bit_PCLMUL
;
446 has_cmpxchg8b
= edx
& bit_CMPXCHG8B
;
447 has_cmov
= edx
& bit_CMOV
;
448 has_mmx
= edx
& bit_MMX
;
449 has_sse
= edx
& bit_SSE
;
450 has_sse2
= edx
& bit_SSE2
;
452 /* Check cpuid level of extended features. */
453 __cpuid (0x80000000, ext_level
, ebx
, ecx
, edx
);
455 if (ext_level
> 0x80000000)
457 __cpuid (0x80000001, eax
, ebx
, ecx
, edx
);
459 has_lahf_lm
= ecx
& bit_LAHF_LM
;
460 has_sse4a
= ecx
& bit_SSE4a
;
461 has_abm
= ecx
& bit_ABM
;
462 has_lwp
= ecx
& bit_LWP
;
464 has_longmode
= edx
& bit_LM
;
465 has_3dnowp
= edx
& bit_3DNOWP
;
466 has_3dnow
= edx
& bit_3DNOW
;
471 if (vendor
== SIG_AMD
)
472 cache
= detect_caches_amd (ext_level
);
473 else if (vendor
== SIG_INTEL
)
475 bool xeon_mp
= (family
== 15 && model
== 6);
476 cache
= detect_caches_intel (xeon_mp
, max_level
,
477 ext_level
, &l2sizekb
);
481 if (vendor
== SIG_AMD
)
485 /* Detect geode processor by its processor signature. */
486 if (ext_level
> 0x80000001)
487 __cpuid (0x80000002, name
, ebx
, ecx
, edx
);
491 if (name
== SIG_GEODE
)
492 processor
= PROCESSOR_GEODE
;
494 processor
= PROCESSOR_AMDFAM10
;
495 else if (has_sse2
|| has_longmode
)
496 processor
= PROCESSOR_K8
;
498 processor
= PROCESSOR_ATHLON
;
500 processor
= PROCESSOR_K6
;
502 processor
= PROCESSOR_PENTIUM
;
509 processor
= PROCESSOR_I486
;
512 processor
= PROCESSOR_PENTIUM
;
515 processor
= PROCESSOR_PENTIUMPRO
;
518 processor
= PROCESSOR_PENTIUM4
;
521 /* We have no idea. */
522 processor
= PROCESSOR_GENERIC32
;
534 case PROCESSOR_PENTIUM
:
540 case PROCESSOR_PENTIUMPRO
:
543 else if (model
>= 28 && l2sizekb
< 2048)
544 /* Assume it's a small core if there's less than 2MB cache */
546 else if (has_longmode
)
551 /* It is Core Duo. */
554 /* It is Pentium M. */
557 /* It is Pentium III. */
560 /* It is Pentium II. */
563 /* Default to Pentium Pro. */
567 /* For -mtune, we default to -mtune=generic. */
570 case PROCESSOR_PENTIUM4
:
581 case PROCESSOR_GEODE
:
585 if (arch
&& has_3dnow
)
590 case PROCESSOR_ATHLON
:
597 if (arch
&& has_sse3
)
602 case PROCESSOR_AMDFAM10
:
607 /* Use something reasonable. */
625 else if (has_cmpxchg8b
)
635 options
= concat (options
, " -mcx16", NULL
);
637 options
= concat (options
, " -msahf", NULL
);
639 options
= concat (options
, " -mmovbe", NULL
);
641 options
= concat (options
, " -maes", NULL
);
643 options
= concat (options
, " -mpclmul", NULL
);
645 options
= concat (options
, " -mpopcnt", NULL
);
647 options
= concat (options
, " -mabm", NULL
);
649 options
= concat (options
, " -mlwp", NULL
);
652 options
= concat (options
, " -mavx", NULL
);
654 options
= concat (options
, " -msse4.2", NULL
);
656 options
= concat (options
, " -msse4.1", NULL
);
660 return concat (cache
, "-m", argv
[0], "=", cpu
, options
, NULL
);
664 /* If we aren't compiling with GCC then the driver will just ignore
665 -march and -mtune "native" target and will leave to the newly
666 built compiler to generate code for its default target. */
668 const char *host_detect_local_cpu (int argc ATTRIBUTE_UNUSED
,
669 const char **argv ATTRIBUTE_UNUSED
)
673 #endif /* __GNUC__ */