2 Copyright (C) 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 # include "multiarch/init-arch.h"
29 # define is_intel __cpu_features.kind == arch_kind_intel
30 # define is_amd __cpu_features.kind == arch_kind_amd
31 # define max_cpuid __cpu_features.max_cpuid
33 /* This spells out "GenuineIntel". */
35 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
36 /* This spells out "AuthenticAMD". */
38 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
41 static const struct intel_02_cache_info
45 unsigned char linesize
;
46 unsigned char rel_name
;
50 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
51 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 8192 },
52 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 16384 },
53 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 32768 },
54 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE
), 8192 },
55 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
56 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
57 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
58 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 524288 },
59 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
60 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
61 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
62 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 32768 },
63 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE
), 32768 },
64 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
65 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE
), 196608 },
66 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
67 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
68 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE
), 393216 },
69 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
70 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
71 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
72 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
73 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
74 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
75 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
76 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
77 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
78 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE
), 3145728 },
79 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE
), 4194304 },
80 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 6291456 },
81 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
82 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 12582912 },
83 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 16777216 },
84 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE
), 6291456 },
85 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
86 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 8192 },
87 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
88 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 32768 },
89 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
90 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
91 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
92 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
93 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
94 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
95 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
96 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
97 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
98 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
99 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
100 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
101 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
102 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 524288 },
103 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
104 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
105 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
106 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
107 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
108 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
109 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
110 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
111 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
112 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
113 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
114 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 12582912 },
115 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 18874368 },
116 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 25165824 },
119 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
122 intel_02_known_compare (const void *p1
, const void *p2
)
124 const struct intel_02_cache_info
*i1
;
125 const struct intel_02_cache_info
*i2
;
127 i1
= (const struct intel_02_cache_info
*) p1
;
128 i2
= (const struct intel_02_cache_info
*) p2
;
130 if (i1
->idx
== i2
->idx
)
133 return i1
->idx
< i2
->idx
? -1 : 1;
138 __attribute__ ((noinline
))
139 intel_check_word (int name
, unsigned int value
, bool *has_level_2
,
140 bool *no_level_2_or_3
)
142 if ((value
& 0x80000000) != 0)
143 /* The register value is reserved. */
146 /* Fold the name. The _SC_ constants are always in the order SIZE,
148 int folded_rel_name
= (M(name
) / 3) * 3;
152 unsigned int byte
= value
& 0xff;
156 *no_level_2_or_3
= true;
158 if (folded_rel_name
== M(_SC_LEVEL3_CACHE_SIZE
))
159 /* No need to look further. */
164 if (byte
== 0x49 && folded_rel_name
== M(_SC_LEVEL3_CACHE_SIZE
))
166 /* Intel reused this value. For family 15, model 6 it
167 specifies the 3rd level cache. Otherwise the 2nd
172 family
= __cpu_features
.family
;
173 model
= __cpu_features
.model
;
179 asm volatile ("cpuid"
180 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
183 family
= ((eax
>> 20) & 0xff) + ((eax
>> 8) & 0xf);
184 model
= (((eax
>>16) & 0xf) << 4) + ((eax
>> 4) & 0xf);
187 if (family
== 15 && model
== 6)
189 /* The level 3 cache is encoded for this model like
190 the level 2 cache is for other models. Pretend
191 the caller asked for the level 2 cache. */
192 name
= (_SC_LEVEL2_CACHE_SIZE
193 + (name
- _SC_LEVEL3_CACHE_SIZE
));
194 folded_rel_name
= M(_SC_LEVEL2_CACHE_SIZE
);
198 struct intel_02_cache_info
*found
;
199 struct intel_02_cache_info search
;
202 found
= bsearch (&search
, intel_02_known
, nintel_02_known
,
203 sizeof (intel_02_known
[0]), intel_02_known_compare
);
206 if (found
->rel_name
== folded_rel_name
)
208 unsigned int offset
= M(name
) - folded_rel_name
;
216 assert (offset
== 2);
217 return found
->linesize
;
220 if (found
->rel_name
== M(_SC_LEVEL2_CACHE_SIZE
))
225 /* Next byte for the next round. */
234 static long int __attribute__ ((noinline
))
235 handle_intel (int name
, unsigned int maxidx
)
237 assert (maxidx
>= 2);
239 /* OK, we can use the CPUID instruction to get all info about the
241 unsigned int cnt
= 0;
242 unsigned int max
= 1;
244 bool no_level_2_or_3
= false;
245 bool has_level_2
= false;
253 asm volatile ("cpuid"
254 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
257 /* The low byte of EAX in the first round contain the number of
258 rounds we have to make. At least one, the one we are already
266 /* Process the individual registers' value. */
267 result
= intel_check_word (name
, eax
, &has_level_2
, &no_level_2_or_3
);
271 result
= intel_check_word (name
, ebx
, &has_level_2
, &no_level_2_or_3
);
275 result
= intel_check_word (name
, ecx
, &has_level_2
, &no_level_2_or_3
);
279 result
= intel_check_word (name
, edx
, &has_level_2
, &no_level_2_or_3
);
284 if (name
>= _SC_LEVEL2_CACHE_SIZE
&& name
<= _SC_LEVEL3_CACHE_LINESIZE
292 static long int __attribute__ ((noinline
))
293 handle_amd (int name
)
299 asm volatile ("cpuid"
300 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
303 /* No level 4 cache (yet). */
304 if (name
> _SC_LEVEL3_CACHE_LINESIZE
)
307 unsigned int fn
= 0x80000005 + (name
>= _SC_LEVEL2_CACHE_SIZE
);
311 asm volatile ("cpuid"
312 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
315 if (name
< _SC_LEVEL1_DCACHE_SIZE
)
317 name
+= _SC_LEVEL1_DCACHE_SIZE
- _SC_LEVEL1_ICACHE_SIZE
;
323 case _SC_LEVEL1_DCACHE_SIZE
:
324 return (ecx
>> 14) & 0x3fc00;
326 case _SC_LEVEL1_DCACHE_ASSOC
:
328 if ((ecx
& 0xff) == 0xff)
329 /* Fully associative. */
330 return (ecx
<< 2) & 0x3fc00;
333 case _SC_LEVEL1_DCACHE_LINESIZE
:
336 case _SC_LEVEL2_CACHE_SIZE
:
337 return (ecx
& 0xf000) == 0 ? 0 : (ecx
>> 6) & 0x3fffc00;
339 case _SC_LEVEL2_CACHE_ASSOC
:
340 switch ((ecx
>> 12) & 0xf)
346 return (ecx
>> 12) & 0xf;
362 return ((ecx
>> 6) & 0x3fffc00) / (ecx
& 0xff);
368 case _SC_LEVEL2_CACHE_LINESIZE
:
369 return (ecx
& 0xf000) == 0 ? 0 : ecx
& 0xff;
371 case _SC_LEVEL3_CACHE_SIZE
:
372 return (edx
& 0xf000) == 0 ? 0 : (edx
& 0x3ffc0000) << 1;
374 case _SC_LEVEL3_CACHE_ASSOC
:
375 switch ((edx
>> 12) & 0xf)
381 return (edx
>> 12) & 0xf;
397 return ((edx
& 0x3ffc0000) << 1) / (edx
& 0xff);
403 case _SC_LEVEL3_CACHE_LINESIZE
:
404 return (edx
& 0xf000) == 0 ? 0 : edx
& 0xff;
407 assert (! "cannot happen");
413 /* Get the value of the system variable NAME. */
416 __cache_sysconf (int name
)
419 if (__cpu_features
.kind
== arch_kind_unknown
)
420 __init_cpu_features ();
422 /* Find out what brand of processor. */
423 unsigned int max_cpuid
;
427 asm volatile ("cpuid"
428 : "=a" (max_cpuid
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
433 return handle_intel (name
, max_cpuid
);
436 return handle_amd (name
);
438 // XXX Fill in more vendors.
440 /* CPU not known, we have no information. */
445 /* Half the data cache size for use in memory and string routines, typically
447 long int __x86_64_data_cache_size_half attribute_hidden
= 32 * 1024 / 2;
448 /* Shared cache size for use in memory and string routines, typically
450 long int __x86_64_shared_cache_size_half attribute_hidden
= 1024 * 1024 / 2;
451 long int __x86_64_shared_cache_size attribute_hidden
= 1024 * 1024;
452 /* PREFETCHW support flag for use in memory and string routines. */
453 int __x86_64_prefetchw attribute_hidden
;
455 /* Instructions preferred for memory and string routines.
457 0: Regular instructions
460 3: SSSE3 instructions
463 int __x86_64_preferred_memory_instruction attribute_hidden
;
467 __attribute__((constructor
))
468 init_cacheinfo (void)
470 /* Find out what brand of processor. */
477 long int shared
= -1;
479 unsigned int threads
= 0;
482 if (__cpu_features
.kind
== arch_kind_unknown
)
483 __init_cpu_features ();
486 asm volatile ("cpuid"
487 : "=a" (max_cpuid
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
493 data
= handle_intel (_SC_LEVEL1_DCACHE_SIZE
, max_cpuid
);
497 shared
= handle_intel (_SC_LEVEL3_CACHE_SIZE
, max_cpuid
);
501 /* Try L2 otherwise. */
503 shared
= handle_intel (_SC_LEVEL2_CACHE_SIZE
, max_cpuid
);
507 eax
= __cpu_features
.cpuid
[COMMON_CPUID_INDEX_1
].eax
;
508 ebx
= __cpu_features
.cpuid
[COMMON_CPUID_INDEX_1
].ebx
;
509 ecx
= __cpu_features
.cpuid
[COMMON_CPUID_INDEX_1
].ecx
;
510 edx
= __cpu_features
.cpuid
[COMMON_CPUID_INDEX_1
].edx
;
512 asm volatile ("cpuid"
513 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
517 /* Intel prefers SSSE3 instructions for memory/string routines
518 if they are avaiable. */
520 __x86_64_preferred_memory_instruction
= 3;
522 __x86_64_preferred_memory_instruction
= 2;
524 /* Figure out the number of logical threads that share the
525 highest cache level. */
530 /* Query until desired cache level is enumerated. */
533 asm volatile ("cpuid"
534 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
535 : "0" (4), "2" (i
++));
537 /* There seems to be a bug in at least some Pentium Ds
538 which sometimes fail to iterate all cache parameters.
539 Do not loop indefinitely here, stop in this case and
540 assume there is no such information. */
541 if ((eax
& 0x1f) == 0)
542 goto intel_bug_no_cache_info
;
544 while (((eax
>> 5) & 0x7) != level
);
546 threads
= ((eax
>> 14) & 0x3ff) + 1;
550 intel_bug_no_cache_info
:
551 /* Assume that all logical threads share the highest cache level. */
553 threads
= (ebx
>> 16) & 0xff;
556 /* Cap usage of highest cache level to the number of supported
558 if (shared
> 0 && threads
> 0)
561 /* This spells out "AuthenticAMD". */
564 data
= handle_amd (_SC_LEVEL1_DCACHE_SIZE
);
565 long int core
= handle_amd (_SC_LEVEL2_CACHE_SIZE
);
566 shared
= handle_amd (_SC_LEVEL3_CACHE_SIZE
);
568 /* Get maximum extended function. */
569 asm volatile ("cpuid"
570 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
574 /* No shared L3 cache. All we have is the L2 cache. */
578 /* Figure out the number of logical threads that share L3. */
579 if (max_cpuid_ex
>= 0x80000008)
581 /* Get width of APIC ID. */
582 asm volatile ("cpuid"
583 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
),
586 threads
= 1 << ((ecx
>> 12) & 0x0f);
591 /* If APIC ID width is not available, use logical
593 asm volatile ("cpuid"
594 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
),
598 if ((edx
& (1 << 28)) != 0)
599 threads
= (ebx
>> 16) & 0xff;
602 /* Cap usage of highest cache level to the number of
603 supported threads. */
607 /* Account for exclusive L2 and L3 caches. */
611 if (max_cpuid_ex
>= 0x80000001)
613 asm volatile ("cpuid"
614 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
616 /* PREFETCHW || 3DNow! */
617 if ((ecx
& 0x100) || (edx
& 0x80000000))
618 __x86_64_prefetchw
= -1;
623 __x86_64_data_cache_size_half
= data
/ 2;
627 __x86_64_shared_cache_size_half
= shared
/ 2;
628 __x86_64_shared_cache_size
= shared
;