2 Copyright (C) 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 /* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc
29 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */
30 # if defined(__i386__) && defined(__PIC__)
31 /* %ebx may be the PIC register. */
32 # define __cpuid_count(level, count, a, b, c, d) \
33 __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
35 "xchg{l}\t{%%}ebx, %1\n\t" \
36 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
37 : "0" (level), "2" (count))
39 # define __cpuid_count(level, count, a, b, c, d) \
40 __asm__ ("cpuid\n\t" \
41 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
42 : "0" (level), "2" (count))
47 # include "multiarch/init-arch.h"
49 # define is_intel __cpu_features.kind == arch_kind_intel
50 # define is_amd __cpu_features.kind == arch_kind_amd
51 # define max_cpuid __cpu_features.max_cpuid
53 /* This spells out "GenuineIntel". */
55 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
56 /* This spells out "AuthenticAMD". */
58 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
61 static const struct intel_02_cache_info
65 unsigned char linesize
;
66 unsigned char rel_name
;
70 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
71 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 8192 },
72 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 16384 },
73 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE
), 32768 },
74 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE
), 8192 },
75 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
76 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
77 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 24576 },
78 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
79 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 524288 },
80 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
81 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
82 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
83 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 32768 },
84 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE
), 32768 },
85 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
86 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE
), 196608 },
87 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
88 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
89 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE
), 393216 },
90 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
91 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
92 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
93 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
94 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
95 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
96 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
97 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
98 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
99 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE
), 3145728 },
100 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE
), 4194304 },
101 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 6291456 },
102 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
103 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 12582912 },
104 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 16777216 },
105 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE
), 6291456 },
106 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
107 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 8192 },
108 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 16384 },
109 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE
), 32768 },
110 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
111 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 131072 },
112 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
113 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
114 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
115 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
116 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
117 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
118 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 262144 },
119 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
120 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
121 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE
), 2097152 },
122 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE
), 524288 },
123 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE
), 1048576 },
124 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 524288 },
125 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
126 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
127 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 1048576 },
128 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
129 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
130 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
131 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
132 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
133 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 2097152 },
134 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 4194304 },
135 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE
), 8388608 },
136 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 12582912 },
137 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 18874368 },
138 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE
), 25165824 },
141 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
144 intel_02_known_compare (const void *p1
, const void *p2
)
146 const struct intel_02_cache_info
*i1
;
147 const struct intel_02_cache_info
*i2
;
149 i1
= (const struct intel_02_cache_info
*) p1
;
150 i2
= (const struct intel_02_cache_info
*) p2
;
152 if (i1
->idx
== i2
->idx
)
155 return i1
->idx
< i2
->idx
? -1 : 1;
160 __attribute__ ((noinline
))
161 intel_check_word (int name
, unsigned int value
, bool *has_level_2
,
162 bool *no_level_2_or_3
)
164 if ((value
& 0x80000000) != 0)
165 /* The register value is reserved. */
168 /* Fold the name. The _SC_ constants are always in the order SIZE,
170 int folded_rel_name
= (M(name
) / 3) * 3;
174 unsigned int byte
= value
& 0xff;
178 *no_level_2_or_3
= true;
180 if (folded_rel_name
== M(_SC_LEVEL3_CACHE_SIZE
))
181 /* No need to look further. */
186 if (byte
== 0x49 && folded_rel_name
== M(_SC_LEVEL3_CACHE_SIZE
))
188 /* Intel reused this value. For family 15, model 6 it
189 specifies the 3rd level cache. Otherwise the 2nd
194 family
= __cpu_features
.family
;
195 model
= __cpu_features
.model
;
201 __cpuid (1, eax
, ebx
, ecx
, edx
);
203 family
= ((eax
>> 20) & 0xff) + ((eax
>> 8) & 0xf);
204 model
= (((eax
>>16) & 0xf) << 4) + ((eax
>> 4) & 0xf);
207 if (family
== 15 && model
== 6)
209 /* The level 3 cache is encoded for this model like
210 the level 2 cache is for other models. Pretend
211 the caller asked for the level 2 cache. */
212 name
= (_SC_LEVEL2_CACHE_SIZE
213 + (name
- _SC_LEVEL3_CACHE_SIZE
));
214 folded_rel_name
= M(_SC_LEVEL2_CACHE_SIZE
);
218 struct intel_02_cache_info
*found
;
219 struct intel_02_cache_info search
;
222 found
= bsearch (&search
, intel_02_known
, nintel_02_known
,
223 sizeof (intel_02_known
[0]), intel_02_known_compare
);
226 if (found
->rel_name
== folded_rel_name
)
228 unsigned int offset
= M(name
) - folded_rel_name
;
236 assert (offset
== 2);
237 return found
->linesize
;
240 if (found
->rel_name
== M(_SC_LEVEL2_CACHE_SIZE
))
245 /* Next byte for the next round. */
254 static long int __attribute__ ((noinline
))
255 handle_intel (int name
, unsigned int maxidx
)
257 assert (maxidx
>= 2);
259 /* OK, we can use the CPUID instruction to get all info about the
261 unsigned int cnt
= 0;
262 unsigned int max
= 1;
264 bool no_level_2_or_3
= false;
265 bool has_level_2
= false;
273 __cpuid (2, eax
, ebx
, ecx
, edx
);
275 /* The low byte of EAX in the first round contain the number of
276 rounds we have to make. At least one, the one we are already
284 /* Process the individual registers' value. */
285 result
= intel_check_word (name
, eax
, &has_level_2
, &no_level_2_or_3
);
289 result
= intel_check_word (name
, ebx
, &has_level_2
, &no_level_2_or_3
);
293 result
= intel_check_word (name
, ecx
, &has_level_2
, &no_level_2_or_3
);
297 result
= intel_check_word (name
, edx
, &has_level_2
, &no_level_2_or_3
);
302 if (name
>= _SC_LEVEL2_CACHE_SIZE
&& name
<= _SC_LEVEL3_CACHE_LINESIZE
310 static long int __attribute__ ((noinline
))
311 handle_amd (int name
)
317 __cpuid (0x80000000, eax
, ebx
, ecx
, edx
);
319 /* No level 4 cache (yet). */
320 if (name
> _SC_LEVEL3_CACHE_LINESIZE
)
323 unsigned int fn
= 0x80000005 + (name
>= _SC_LEVEL2_CACHE_SIZE
);
327 __cpuid (fn
, eax
, ebx
, ecx
, edx
);
329 if (name
< _SC_LEVEL1_DCACHE_SIZE
)
331 name
+= _SC_LEVEL1_DCACHE_SIZE
- _SC_LEVEL1_ICACHE_SIZE
;
337 case _SC_LEVEL1_DCACHE_SIZE
:
338 return (ecx
>> 14) & 0x3fc00;
340 case _SC_LEVEL1_DCACHE_ASSOC
:
342 if ((ecx
& 0xff) == 0xff)
343 /* Fully associative. */
344 return (ecx
<< 2) & 0x3fc00;
347 case _SC_LEVEL1_DCACHE_LINESIZE
:
350 case _SC_LEVEL2_CACHE_SIZE
:
351 return (ecx
& 0xf000) == 0 ? 0 : (ecx
>> 6) & 0x3fffc00;
353 case _SC_LEVEL2_CACHE_ASSOC
:
354 switch ((ecx
>> 12) & 0xf)
360 return (ecx
>> 12) & 0xf;
376 return ((ecx
>> 6) & 0x3fffc00) / (ecx
& 0xff);
382 case _SC_LEVEL2_CACHE_LINESIZE
:
383 return (ecx
& 0xf000) == 0 ? 0 : ecx
& 0xff;
385 case _SC_LEVEL3_CACHE_SIZE
:
386 return (edx
& 0xf000) == 0 ? 0 : (edx
& 0x3ffc0000) << 1;
388 case _SC_LEVEL3_CACHE_ASSOC
:
389 switch ((edx
>> 12) & 0xf)
395 return (edx
>> 12) & 0xf;
411 return ((edx
& 0x3ffc0000) << 1) / (edx
& 0xff);
417 case _SC_LEVEL3_CACHE_LINESIZE
:
418 return (edx
& 0xf000) == 0 ? 0 : edx
& 0xff;
421 assert (! "cannot happen");
427 /* Get the value of the system variable NAME. */
430 __cache_sysconf (int name
)
433 if (__cpu_features
.kind
== arch_kind_unknown
)
434 __init_cpu_features ();
436 /* Find out what brand of processor. */
437 unsigned int max_cpuid
;
441 __cpuid (0, max_cpuid
, ebx
, ecx
, edx
);
445 return handle_intel (name
, max_cpuid
);
448 return handle_amd (name
);
450 // XXX Fill in more vendors.
452 /* CPU not known, we have no information. */
457 /* Data cache size for use in memory and string routines, typically
459 long int __x86_64_data_cache_size_half attribute_hidden
= 32 * 1024 / 2;
460 long int __x86_64_data_cache_size attribute_hidden
= 32 * 1024;
461 /* Shared cache size for use in memory and string routines, typically
463 long int __x86_64_shared_cache_size_half attribute_hidden
= 1024 * 1024 / 2;
464 long int __x86_64_shared_cache_size attribute_hidden
= 1024 * 1024;
466 #ifndef DISABLE_PREFETCHW
467 /* PREFETCHW support flag for use in memory and string routines. */
468 int __x86_64_prefetchw attribute_hidden
;
471 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
472 /* Instructions preferred for memory and string routines.
474 0: Regular instructions
477 3: SSSE3 instructions
480 int __x86_64_preferred_memory_instruction attribute_hidden
;
485 __attribute__((constructor
))
486 init_cacheinfo (void)
488 /* Find out what brand of processor. */
495 long int shared
= -1;
497 unsigned int threads
= 0;
500 if (__cpu_features
.kind
== arch_kind_unknown
)
501 __init_cpu_features ();
504 __cpuid (0, max_cpuid
, ebx
, ecx
, edx
);
509 data
= handle_intel (_SC_LEVEL1_DCACHE_SIZE
, max_cpuid
);
513 shared
= handle_intel (_SC_LEVEL3_CACHE_SIZE
, max_cpuid
);
517 /* Try L2 otherwise. */
519 shared
= handle_intel (_SC_LEVEL2_CACHE_SIZE
, max_cpuid
);
525 eax
= __cpu_features
.cpuid
[COMMON_CPUID_INDEX_1
].eax
;
526 ebx_1
= __cpu_features
.cpuid
[COMMON_CPUID_INDEX_1
].ebx
;
527 ecx
= __cpu_features
.cpuid
[COMMON_CPUID_INDEX_1
].ecx
;
528 edx
= __cpu_features
.cpuid
[COMMON_CPUID_INDEX_1
].edx
;
530 __cpuid (1, eax
, ebx_1
, ecx
, edx
);
533 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
534 /* Intel prefers SSSE3 instructions for memory/string routines
535 if they are avaiable. */
537 __x86_64_preferred_memory_instruction
= 3;
539 __x86_64_preferred_memory_instruction
= 2;
542 /* Figure out the number of logical threads that share the
543 highest cache level. */
548 /* Query until desired cache level is enumerated. */
551 __cpuid_count (4, i
++, eax
, ebx
, ecx
, edx
);
553 /* There seems to be a bug in at least some Pentium Ds
554 which sometimes fail to iterate all cache parameters.
555 Do not loop indefinitely here, stop in this case and
556 assume there is no such information. */
557 if ((eax
& 0x1f) == 0)
558 goto intel_bug_no_cache_info
;
560 while (((eax
>> 5) & 0x7) != level
);
562 threads
= (eax
>> 14) & 0x3ff;
564 /* If max_cpuid >= 11, THREADS is the maximum number of
565 addressable IDs for logical processors sharing the
566 cache, instead of the maximum number of threads
567 sharing the cache. */
568 if (threads
&& max_cpuid
>= 11)
570 /* Find the number of logical processors shipped in
571 one core and apply count mask. */
575 __cpuid_count (11, i
++, eax
, ebx
, ecx
, edx
);
577 int shipped
= ebx
& 0xff;
578 int type
= ecx
& 0xff0;
579 if (shipped
== 0 || type
== 0)
581 else if (type
== 0x200)
585 /* Compute count mask. */
587 : "=r" (count_mask
) : "g" (threads
));
588 count_mask
= ~(-1 << (count_mask
+ 1));
589 threads
= (shipped
- 1) & count_mask
;
598 intel_bug_no_cache_info
:
599 /* Assume that all logical threads share the highest cache level. */
601 threads
= (ebx_1
>> 16) & 0xff;
604 /* Cap usage of highest cache level to the number of supported
606 if (shared
> 0 && threads
> 0)
609 /* This spells out "AuthenticAMD". */
612 data
= handle_amd (_SC_LEVEL1_DCACHE_SIZE
);
613 long int core
= handle_amd (_SC_LEVEL2_CACHE_SIZE
);
614 shared
= handle_amd (_SC_LEVEL3_CACHE_SIZE
);
616 /* Get maximum extended function. */
617 __cpuid (0x80000000, max_cpuid_ex
, ebx
, ecx
, edx
);
620 /* No shared L3 cache. All we have is the L2 cache. */
624 /* Figure out the number of logical threads that share L3. */
625 if (max_cpuid_ex
>= 0x80000008)
627 /* Get width of APIC ID. */
628 __cpuid (0x80000008, max_cpuid_ex
, ebx
, ecx
, edx
);
629 threads
= 1 << ((ecx
>> 12) & 0x0f);
634 /* If APIC ID width is not available, use logical
636 __cpuid (0x00000001, max_cpuid_ex
, ebx
, ecx
, edx
);
638 if ((edx
& (1 << 28)) != 0)
639 threads
= (ebx
>> 16) & 0xff;
642 /* Cap usage of highest cache level to the number of
643 supported threads. */
647 /* Account for exclusive L2 and L3 caches. */
651 #ifndef DISABLE_PREFETCHW
652 if (max_cpuid_ex
>= 0x80000001)
654 __cpuid (0x80000001, eax
, ebx
, ecx
, edx
);
655 /* PREFETCHW || 3DNow! */
656 if ((ecx
& 0x100) || (edx
& 0x80000000))
657 __x86_64_prefetchw
= -1;
664 __x86_64_data_cache_size_half
= data
/ 2;
665 __x86_64_data_cache_size
= data
;
670 __x86_64_shared_cache_size_half
= shared
/ 2;
671 __x86_64_shared_cache_size
= shared
;