2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
24 #include <init-arch.h>
26 #define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
27 #define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
28 #define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
30 static const struct intel_02_cache_info
34 unsigned char linesize
;
35 unsigned char rel_name
;
39 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
40 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 8192 },
41 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 16384 },
42 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 32768 },
43 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE
), 8192 },
44 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
45 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
46 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 24576 },
47 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
48 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 524288 },
49 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
50 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
51 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
52 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 32768 },
53 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE
), 32768 },
54 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
55 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE
), 196608 },
56 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
57 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
58 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE
), 393216 },
59 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
60 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
61 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
62 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
63 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
64 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
65 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
66 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
67 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
68 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE
), 3145728 },
69 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE
), 4194304 },
70 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 6291456 },
71 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
72 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 12582912 },
73 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 16777216 },
74 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE
), 6291456 },
75 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
76 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 8192 },
77 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
78 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 32768 },
79 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
80 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
81 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
82 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
83 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
84 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
85 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
86 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
87 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
88 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
89 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
90 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
91 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
92 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
93 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 524288 },
94 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
95 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
96 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
97 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
98 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
99 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
100 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
101 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
102 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
103 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
104 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
105 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 12582912 },
106 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 18874368 },
107 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 25165824 },
110 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
113 intel_02_known_compare (const void *p1
, const void *p2
)
115 const struct intel_02_cache_info
*i1
;
116 const struct intel_02_cache_info
*i2
;
118 i1
= (const struct intel_02_cache_info
*) p1
;
119 i2
= (const struct intel_02_cache_info
*) p2
;
121 if (i1
->idx
== i2
->idx
)
124 return i1
->idx
< i2
->idx
? -1 : 1;
129 __attribute__ ((noinline
))
130 intel_check_word (int name
, unsigned int value
, bool *has_level_2
,
131 bool *no_level_2_or_3
)
133 if ((value
& 0x80000000) != 0)
134 /* The register value is reserved. */
137 /* Fold the name. The _SC_ constants are always in the order SIZE,
139 int folded_rel_name
= (M(name
) / 3) * 3;
143 unsigned int byte
= value
& 0xff;
147 *no_level_2_or_3
= true;
149 if (folded_rel_name
== M(_SC_LEVEL3_CACHE_SIZE
))
150 /* No need to look further. */
153 else if (byte
== 0xff)
155 /* CPUID leaf 0x4 contains all the information. We need to
162 unsigned int round
= 0;
165 __cpuid_count (4, round
, eax
, ebx
, ecx
, edx
);
167 enum { null
= 0, data
= 1, inst
= 2, uni
= 3 } type
= eax
& 0x1f;
169 /* That was the end. */
172 unsigned int level
= (eax
>> 5) & 0x7;
174 if ((level
== 1 && type
== data
175 && folded_rel_name
== M(_SC_LEVEL1_DCACHE_SIZE
))
176 || (level
== 1 && type
== inst
177 && folded_rel_name
== M(_SC_LEVEL1_ICACHE_SIZE
))
178 || (level
== 2 && folded_rel_name
== M(_SC_LEVEL2_CACHE_SIZE
))
179 || (level
== 3 && folded_rel_name
== M(_SC_LEVEL3_CACHE_SIZE
))
180 || (level
== 4 && folded_rel_name
== M(_SC_LEVEL4_CACHE_SIZE
)))
182 unsigned int offset
= M(name
) - folded_rel_name
;
186 return (((ebx
>> 22) + 1)
187 * (((ebx
>> 12) & 0x3ff) + 1)
188 * ((ebx
& 0xfff) + 1)
191 return (ebx
>> 22) + 1;
193 assert (offset
== 2);
194 return (ebx
& 0xfff) + 1;
199 /* There is no other cache information anywhere else. */
204 if (byte
== 0x49 && folded_rel_name
== M(_SC_LEVEL3_CACHE_SIZE
))
206 /* Intel reused this value. For family 15, model 6 it
207 specifies the 3rd level cache. Otherwise the 2nd
209 unsigned int family
= GLRO(dl_x86_cpu_features
).family
;
210 unsigned int model
= GLRO(dl_x86_cpu_features
).model
;
212 if (family
== 15 && model
== 6)
214 /* The level 3 cache is encoded for this model like
215 the level 2 cache is for other models. Pretend
216 the caller asked for the level 2 cache. */
217 name
= (_SC_LEVEL2_CACHE_SIZE
218 + (name
- _SC_LEVEL3_CACHE_SIZE
));
219 folded_rel_name
= M(_SC_LEVEL2_CACHE_SIZE
);
223 struct intel_02_cache_info
*found
;
224 struct intel_02_cache_info search
;
227 found
= bsearch (&search
, intel_02_known
, nintel_02_known
,
228 sizeof (intel_02_known
[0]), intel_02_known_compare
);
231 if (found
->rel_name
== folded_rel_name
)
233 unsigned int offset
= M(name
) - folded_rel_name
;
241 assert (offset
== 2);
242 return found
->linesize
;
245 if (found
->rel_name
== M(_SC_LEVEL2_CACHE_SIZE
))
250 /* Next byte for the next round. */
259 static long int __attribute__ ((noinline
))
260 handle_intel (int name
, unsigned int maxidx
)
262 assert (maxidx
>= 2);
264 /* OK, we can use the CPUID instruction to get all info about the
266 unsigned int cnt
= 0;
267 unsigned int max
= 1;
269 bool no_level_2_or_3
= false;
270 bool has_level_2
= false;
278 __cpuid (2, eax
, ebx
, ecx
, edx
);
280 /* The low byte of EAX in the first round contain the number of
281 rounds we have to make. At least one, the one we are already
289 /* Process the individual registers' value. */
290 result
= intel_check_word (name
, eax
, &has_level_2
, &no_level_2_or_3
);
294 result
= intel_check_word (name
, ebx
, &has_level_2
, &no_level_2_or_3
);
298 result
= intel_check_word (name
, ecx
, &has_level_2
, &no_level_2_or_3
);
302 result
= intel_check_word (name
, edx
, &has_level_2
, &no_level_2_or_3
);
307 if (name
>= _SC_LEVEL2_CACHE_SIZE
&& name
<= _SC_LEVEL3_CACHE_LINESIZE
315 static long int __attribute__ ((noinline
))
316 handle_amd (int name
)
322 __cpuid (0x80000000, eax
, ebx
, ecx
, edx
);
324 /* No level 4 cache (yet). */
325 if (name
> _SC_LEVEL3_CACHE_LINESIZE
)
328 unsigned int fn
= 0x80000005 + (name
>= _SC_LEVEL2_CACHE_SIZE
);
332 __cpuid (fn
, eax
, ebx
, ecx
, edx
);
334 if (name
< _SC_LEVEL1_DCACHE_SIZE
)
336 name
+= _SC_LEVEL1_DCACHE_SIZE
- _SC_LEVEL1_ICACHE_SIZE
;
342 case _SC_LEVEL1_DCACHE_SIZE
:
343 return (ecx
>> 14) & 0x3fc00;
345 case _SC_LEVEL1_DCACHE_ASSOC
:
347 if ((ecx
& 0xff) == 0xff)
348 /* Fully associative. */
349 return (ecx
<< 2) & 0x3fc00;
352 case _SC_LEVEL1_DCACHE_LINESIZE
:
355 case _SC_LEVEL2_CACHE_SIZE
:
356 return (ecx
& 0xf000) == 0 ? 0 : (ecx
>> 6) & 0x3fffc00;
358 case _SC_LEVEL2_CACHE_ASSOC
:
359 switch ((ecx
>> 12) & 0xf)
365 return (ecx
>> 12) & 0xf;
381 return ((ecx
>> 6) & 0x3fffc00) / (ecx
& 0xff);
387 case _SC_LEVEL2_CACHE_LINESIZE
:
388 return (ecx
& 0xf000) == 0 ? 0 : ecx
& 0xff;
390 case _SC_LEVEL3_CACHE_SIZE
:
391 return (edx
& 0xf000) == 0 ? 0 : (edx
& 0x3ffc0000) << 1;
393 case _SC_LEVEL3_CACHE_ASSOC
:
394 switch ((edx
>> 12) & 0xf)
400 return (edx
>> 12) & 0xf;
416 return ((edx
& 0x3ffc0000) << 1) / (edx
& 0xff);
422 case _SC_LEVEL3_CACHE_LINESIZE
:
423 return (edx
& 0xf000) == 0 ? 0 : edx
& 0xff;
426 assert (! "cannot happen");
432 /* Get the value of the system variable NAME. */
435 __cache_sysconf (int name
)
438 return handle_intel (name
, max_cpuid
);
441 return handle_amd (name
);
443 // XXX Fill in more vendors.
445 /* CPU not known, we have no information. */
450 /* Data cache size for use in memory and string routines, typically
451 L1 size, rounded to multiple of 256 bytes. */
452 long int __x86_data_cache_size_half attribute_hidden
= 32 * 1024 / 2;
453 long int __x86_data_cache_size attribute_hidden
= 32 * 1024;
454 /* Similar to __x86_data_cache_size_half, but not rounded. */
455 long int __x86_raw_data_cache_size_half attribute_hidden
= 32 * 1024 / 2;
456 /* Similar to __x86_data_cache_size, but not rounded. */
457 long int __x86_raw_data_cache_size attribute_hidden
= 32 * 1024;
458 /* Shared cache size for use in memory and string routines, typically
459 L2 or L3 size, rounded to multiple of 256 bytes. */
460 long int __x86_shared_cache_size_half attribute_hidden
= 1024 * 1024 / 2;
461 long int __x86_shared_cache_size attribute_hidden
= 1024 * 1024;
462 /* Similar to __x86_shared_cache_size_half, but not rounded. */
463 long int __x86_raw_shared_cache_size_half attribute_hidden
= 1024 * 1024 / 2;
464 /* Similar to __x86_shared_cache_size, but not rounded. */
465 long int __x86_raw_shared_cache_size attribute_hidden
= 1024 * 1024;
467 #ifndef DISABLE_PREFETCHW
468 /* PREFETCHW support flag for use in memory and string routines. */
469 int __x86_prefetchw attribute_hidden
;
474 __attribute__((constructor
))
475 init_cacheinfo (void)
477 /* Find out what brand of processor. */
484 long int shared
= -1;
486 unsigned int threads
= 0;
490 data
= handle_intel (_SC_LEVEL1_DCACHE_SIZE
, max_cpuid
);
494 shared
= handle_intel (_SC_LEVEL3_CACHE_SIZE
, max_cpuid
);
498 /* Try L2 otherwise. */
500 shared
= handle_intel (_SC_LEVEL2_CACHE_SIZE
, max_cpuid
);
503 /* Figure out the number of logical threads that share the
504 highest cache level. */
507 unsigned int family
= GLRO(dl_x86_cpu_features
).family
;
508 unsigned int model
= GLRO(dl_x86_cpu_features
).model
;
512 /* Query until desired cache level is enumerated. */
515 __cpuid_count (4, i
++, eax
, ebx
, ecx
, edx
);
517 /* There seems to be a bug in at least some Pentium Ds
518 which sometimes fail to iterate all cache parameters.
519 Do not loop indefinitely here, stop in this case and
520 assume there is no such information. */
521 if ((eax
& 0x1f) == 0)
522 goto intel_bug_no_cache_info
;
524 while (((eax
>> 5) & 0x7) != level
);
526 threads
= (eax
>> 14) & 0x3ff;
528 /* If max_cpuid >= 11, THREADS is the maximum number of
529 addressable IDs for logical processors sharing the
530 cache, instead of the maximum number of threads
531 sharing the cache. */
532 if (threads
&& max_cpuid
>= 11)
534 /* Find the number of logical processors shipped in
535 one core and apply count mask. */
539 __cpuid_count (11, i
++, eax
, ebx
, ecx
, edx
);
541 int shipped
= ebx
& 0xff;
542 int type
= ecx
& 0xff0;
543 if (shipped
== 0 || type
== 0)
545 else if (type
== 0x200)
549 /* Compute count mask. */
551 : "=r" (count_mask
) : "g" (threads
));
552 count_mask
= ~(-1 << (count_mask
+ 1));
553 threads
= (shipped
- 1) & count_mask
;
559 if (threads
> 2 && level
== 2 && family
== 6)
564 /* Knights Landing has L2 cache shared by 2 cores. */
570 /* Silvermont has L2 cache shared by 2 cores. */
580 intel_bug_no_cache_info
:
581 /* Assume that all logical threads share the highest cache level. */
584 = ((GLRO(dl_x86_cpu_features
).cpuid
[COMMON_CPUID_INDEX_1
].ebx
588 /* Cap usage of highest cache level to the number of supported
590 if (shared
> 0 && threads
> 0)
593 /* This spells out "AuthenticAMD". */
596 data
= handle_amd (_SC_LEVEL1_DCACHE_SIZE
);
597 long int core
= handle_amd (_SC_LEVEL2_CACHE_SIZE
);
598 shared
= handle_amd (_SC_LEVEL3_CACHE_SIZE
);
600 /* Get maximum extended function. */
601 __cpuid (0x80000000, max_cpuid_ex
, ebx
, ecx
, edx
);
604 /* No shared L3 cache. All we have is the L2 cache. */
608 /* Figure out the number of logical threads that share L3. */
609 if (max_cpuid_ex
>= 0x80000008)
611 /* Get width of APIC ID. */
612 __cpuid (0x80000008, max_cpuid_ex
, ebx
, ecx
, edx
);
613 threads
= 1 << ((ecx
>> 12) & 0x0f);
618 /* If APIC ID width is not available, use logical
620 __cpuid (0x00000001, max_cpuid_ex
, ebx
, ecx
, edx
);
622 if ((edx
& (1 << 28)) != 0)
623 threads
= (ebx
>> 16) & 0xff;
626 /* Cap usage of highest cache level to the number of
627 supported threads. */
631 /* Account for exclusive L2 and L3 caches. */
635 #ifndef DISABLE_PREFETCHW
636 if (max_cpuid_ex
>= 0x80000001)
638 __cpuid (0x80000001, eax
, ebx
, ecx
, edx
);
639 /* PREFETCHW || 3DNow! */
640 if ((ecx
& 0x100) || (edx
& 0x80000000))
641 __x86_prefetchw
= -1;
648 __x86_raw_data_cache_size_half
= data
/ 2;
649 __x86_raw_data_cache_size
= data
;
650 /* Round data cache size to multiple of 256 bytes. */
652 __x86_data_cache_size_half
= data
/ 2;
653 __x86_data_cache_size
= data
;
658 __x86_raw_shared_cache_size_half
= shared
/ 2;
659 __x86_raw_shared_cache_size
= shared
;
660 /* Round shared cache size to multiple of 256 bytes. */
661 shared
= shared
& ~255L;
662 __x86_shared_cache_size_half
= shared
/ 2;
663 __x86_shared_cache_size
= shared
;