2 Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 static const struct intel_02_cache_info
35 { 0x06, _SC_LEVEL1_ICACHE_SIZE
, 8192, 4, 32 },
36 { 0x08, _SC_LEVEL1_ICACHE_SIZE
, 16384, 4, 32 },
37 { 0x0a, _SC_LEVEL1_DCACHE_SIZE
, 8192, 2, 32 },
38 { 0x0c, _SC_LEVEL1_DCACHE_SIZE
, 16384, 4, 32 },
39 { 0x22, _SC_LEVEL3_CACHE_SIZE
, 524288, 4, 64 },
40 { 0x23, _SC_LEVEL3_CACHE_SIZE
, 1048576, 8, 64 },
41 { 0x25, _SC_LEVEL3_CACHE_SIZE
, 2097152, 8, 64 },
42 { 0x29, _SC_LEVEL3_CACHE_SIZE
, 4194304, 8, 64 },
43 { 0x2c, _SC_LEVEL1_DCACHE_SIZE
, 32768, 8, 64 },
44 { 0x30, _SC_LEVEL1_ICACHE_SIZE
, 32768, 8, 64 },
45 { 0x39, _SC_LEVEL2_CACHE_SIZE
, 131072, 4, 64 },
46 { 0x3a, _SC_LEVEL2_CACHE_SIZE
, 196608, 6, 64 },
47 { 0x3b, _SC_LEVEL2_CACHE_SIZE
, 131072, 2, 64 },
48 { 0x3c, _SC_LEVEL2_CACHE_SIZE
, 262144, 4, 64 },
49 { 0x3d, _SC_LEVEL2_CACHE_SIZE
, 393216, 6, 64 },
50 { 0x3e, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 64 },
51 { 0x3f, _SC_LEVEL2_CACHE_SIZE
, 262144, 2, 64 },
52 { 0x41, _SC_LEVEL2_CACHE_SIZE
, 131072, 4, 32 },
53 { 0x42, _SC_LEVEL2_CACHE_SIZE
, 262144, 4, 32 },
54 { 0x43, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 32 },
55 { 0x44, _SC_LEVEL2_CACHE_SIZE
, 1048576, 4, 32 },
56 { 0x45, _SC_LEVEL2_CACHE_SIZE
, 2097152, 4, 32 },
57 { 0x46, _SC_LEVEL3_CACHE_SIZE
, 4194304, 4, 64 },
58 { 0x47, _SC_LEVEL3_CACHE_SIZE
, 8388608, 8, 64 },
59 { 0x48, _SC_LEVEL2_CACHE_SIZE
, 3145728, 12, 64 },
60 { 0x49, _SC_LEVEL2_CACHE_SIZE
, 4194304, 16, 64 },
61 { 0x4a, _SC_LEVEL3_CACHE_SIZE
, 6291456, 12, 64 },
62 { 0x4b, _SC_LEVEL3_CACHE_SIZE
, 8388608, 16, 64 },
63 { 0x4c, _SC_LEVEL3_CACHE_SIZE
, 12582912, 12, 64 },
64 { 0x4d, _SC_LEVEL3_CACHE_SIZE
, 16777216, 16, 64 },
65 { 0x4e, _SC_LEVEL2_CACHE_SIZE
, 6291456, 24, 64 },
66 { 0x60, _SC_LEVEL1_DCACHE_SIZE
, 16384, 8, 64 },
67 { 0x66, _SC_LEVEL1_DCACHE_SIZE
, 8192, 4, 64 },
68 { 0x67, _SC_LEVEL1_DCACHE_SIZE
, 16384, 4, 64 },
69 { 0x68, _SC_LEVEL1_DCACHE_SIZE
, 32768, 4, 64 },
70 { 0x78, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
71 { 0x79, _SC_LEVEL2_CACHE_SIZE
, 131072, 8, 64 },
72 { 0x7a, _SC_LEVEL2_CACHE_SIZE
, 262144, 8, 64 },
73 { 0x7b, _SC_LEVEL2_CACHE_SIZE
, 524288, 8, 64 },
74 { 0x7c, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
75 { 0x7d, _SC_LEVEL2_CACHE_SIZE
, 2097152, 8, 64 },
76 { 0x7f, _SC_LEVEL2_CACHE_SIZE
, 524288, 2, 64 },
77 { 0x82, _SC_LEVEL2_CACHE_SIZE
, 262144, 8, 32 },
78 { 0x83, _SC_LEVEL2_CACHE_SIZE
, 524288, 8, 32 },
79 { 0x84, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 32 },
80 { 0x85, _SC_LEVEL2_CACHE_SIZE
, 2097152, 8, 32 },
81 { 0x86, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 64 },
82 { 0x87, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
85 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
88 intel_02_known_compare (const void *p1
, const void *p2
)
90 const struct intel_02_cache_info
*i1
;
91 const struct intel_02_cache_info
*i2
;
93 i1
= (const struct intel_02_cache_info
*) p1
;
94 i2
= (const struct intel_02_cache_info
*) p2
;
96 if (i1
->idx
== i2
->idx
)
99 return i1
->idx
< i2
->idx
? -1 : 1;
104 __attribute__ ((noinline
))
105 intel_check_word (int name
, unsigned int value
, bool *has_level_2
,
106 bool *no_level_2_or_3
)
108 if ((value
& 0x80000000) != 0)
109 /* The register value is reserved. */
112 /* Fold the name. The _SC_ constants are always in the order SIZE,
114 int folded_name
= (_SC_LEVEL1_ICACHE_SIZE
115 + ((name
- _SC_LEVEL1_ICACHE_SIZE
) / 3) * 3);
119 unsigned int byte
= value
& 0xff;
123 *no_level_2_or_3
= true;
125 if (folded_name
== _SC_LEVEL3_CACHE_SIZE
)
126 /* No need to look further. */
131 if (byte
== 0x49 && folded_name
== _SC_LEVEL3_CACHE_SIZE
)
133 /* Intel reused this value. For family 15, model 6 it
134 specifies the 3rd level cache. Otherwise the 2nd
140 asm volatile ("cpuid"
141 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
144 unsigned int family
= ((eax
>> 20) & 0xff) + ((eax
>> 8) & 0xf);
145 unsigned int model
= ((((eax
>>16) & 0xf) << 4)
146 + ((eax
>> 4) & 0xf));
147 if (family
== 15 && model
== 6)
149 /* The level 3 cache is encoded for this model like
150 the level 2 cache is for other models. Pretend
151 the caller asked for the level 2 cache. */
152 name
= (_SC_LEVEL2_CACHE_SIZE
153 + (name
- _SC_LEVEL3_CACHE_SIZE
));
154 folded_name
= _SC_LEVEL3_CACHE_SIZE
;
158 struct intel_02_cache_info
*found
;
159 struct intel_02_cache_info search
;
162 found
= bsearch (&search
, intel_02_known
, nintel_02_known
,
163 sizeof (intel_02_known
[0]), intel_02_known_compare
);
166 if (found
->name
== folded_name
)
168 unsigned int offset
= name
- folded_name
;
176 assert (offset
== 2);
177 return found
->linesize
;
180 if (found
->name
== _SC_LEVEL2_CACHE_SIZE
)
185 /* Next byte for the next round. */
194 static long int __attribute__ ((noinline
))
195 handle_intel (int name
, unsigned int maxidx
)
197 assert (maxidx
>= 2);
199 /* OK, we can use the CPUID instruction to get all info about the
201 unsigned int cnt
= 0;
202 unsigned int max
= 1;
204 bool no_level_2_or_3
= false;
205 bool has_level_2
= false;
213 asm volatile ("cpuid"
214 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
217 /* The low byte of EAX in the first round contain the number of
218 rounds we have to make. At least one, the one we are already
226 /* Process the individual registers' value. */
227 result
= intel_check_word (name
, eax
, &has_level_2
, &no_level_2_or_3
);
231 result
= intel_check_word (name
, ebx
, &has_level_2
, &no_level_2_or_3
);
235 result
= intel_check_word (name
, ecx
, &has_level_2
, &no_level_2_or_3
);
239 result
= intel_check_word (name
, edx
, &has_level_2
, &no_level_2_or_3
);
244 if (name
>= _SC_LEVEL2_CACHE_SIZE
&& name
<= _SC_LEVEL3_CACHE_LINESIZE
252 static long int __attribute__ ((noinline
))
253 handle_amd (int name
)
259 asm volatile ("cpuid"
260 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
263 /* No level 4 cache (yet). */
264 if (name
> _SC_LEVEL3_CACHE_LINESIZE
)
267 unsigned int fn
= 0x80000005 + (name
>= _SC_LEVEL2_CACHE_SIZE
);
271 asm volatile ("cpuid"
272 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
275 if (name
< _SC_LEVEL1_DCACHE_SIZE
)
277 name
+= _SC_LEVEL1_DCACHE_SIZE
- _SC_LEVEL1_ICACHE_SIZE
;
283 case _SC_LEVEL1_DCACHE_SIZE
:
284 return (ecx
>> 14) & 0x3fc00;
286 case _SC_LEVEL1_DCACHE_ASSOC
:
288 if ((ecx
& 0xff) == 0xff)
289 /* Fully associative. */
290 return (ecx
<< 2) & 0x3fc00;
293 case _SC_LEVEL1_DCACHE_LINESIZE
:
296 case _SC_LEVEL2_CACHE_SIZE
:
297 return (ecx
& 0xf000) == 0 ? 0 : (ecx
>> 6) & 0x3fffc00;
299 case _SC_LEVEL2_CACHE_ASSOC
:
300 switch ((ecx
>> 12) & 0xf)
306 return (ecx
>> 12) & 0xf;
322 return ((ecx
>> 6) & 0x3fffc00) / (ecx
& 0xff);
328 case _SC_LEVEL2_CACHE_LINESIZE
:
329 return (ecx
& 0xf000) == 0 ? 0 : ecx
& 0xff;
331 case _SC_LEVEL3_CACHE_SIZE
:
332 return (edx
& 0xf000) == 0 ? 0 : (edx
& 0x3ffc0000) << 1;
334 case _SC_LEVEL3_CACHE_ASSOC
:
335 switch ((edx
>> 12) & 0xf)
341 return (edx
>> 12) & 0xf;
357 return ((edx
& 0x3ffc0000) << 1) / (edx
& 0xff);
363 case _SC_LEVEL3_CACHE_LINESIZE
:
364 return (edx
& 0xf000) == 0 ? 0 : edx
& 0xff;
367 assert (! "cannot happen");
373 /* Get the value of the system variable NAME. */
376 __cache_sysconf (int name
)
378 /* Find out what brand of processor. */
383 asm volatile ("cpuid"
384 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
387 /* This spells out "GenuineIntel". */
388 if (ebx
== 0x756e6547 && ecx
== 0x6c65746e && edx
== 0x49656e69)
389 return handle_intel (name
, eax
);
391 /* This spells out "AuthenticAMD". */
392 if (ebx
== 0x68747541 && ecx
== 0x444d4163 && edx
== 0x69746e65)
393 return handle_amd (name
);
395 // XXX Fill in more vendors.
397 /* CPU not known, we have no information. */
402 /* Half the data cache size for use in memory and string routines, typically
404 long int __x86_64_data_cache_size_half attribute_hidden
= 32 * 1024 / 2;
405 /* Shared cache size for use in memory and string routines, typically
407 long int __x86_64_shared_cache_size_half attribute_hidden
= 1024 * 1024 / 2;
408 #ifdef NOT_USED_RIGHT_NOW
409 long int __x86_64_shared_cache_size attribute_hidden
= 1024 * 1024;
411 /* PREFETCHW support flag for use in memory and string routines. */
412 int __x86_64_prefetchw attribute_hidden
;
414 #ifdef NOT_USED_RIGHT_NOW
415 /* Instructions preferred for memory and string routines.
417 0: Regular instructions
420 3: SSSE3 instructions
423 int __x86_64_preferred_memory_instruction attribute_hidden
;
428 __attribute__((constructor
))
429 init_cacheinfo (void)
431 /* Find out what brand of processor. */
439 long int shared
= -1;
441 unsigned int threads
= 0;
443 asm volatile ("cpuid"
444 : "=a" (max_cpuid
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
447 /* This spells out "GenuineIntel". */
448 if (ebx
== 0x756e6547 && ecx
== 0x6c65746e && edx
== 0x49656e69)
450 data
= handle_intel (_SC_LEVEL1_DCACHE_SIZE
, max_cpuid
);
454 shared
= handle_intel (_SC_LEVEL3_CACHE_SIZE
, max_cpuid
);
458 /* Try L2 otherwise. */
460 shared
= handle_intel (_SC_LEVEL2_CACHE_SIZE
, max_cpuid
);
463 asm volatile ("cpuid"
464 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
467 #ifdef NOT_USED_RIGHT_NOW
468 /* Intel prefers SSSE3 instructions for memory/string rountines
469 if they are avaiable. */
471 __x86_64_preferred_memory_instruction
= 3;
473 __x86_64_preferred_memory_instruction
= 2;
476 /* Figure out the number of logical threads that share the
477 highest cache level. */
482 /* Query until desired cache level is enumerated. */
485 asm volatile ("cpuid"
486 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
487 : "0" (4), "2" (i
++));
489 /* There seems to be a bug in at least some Pentium Ds
490 which sometimes fail to iterate all cache parameters.
491 Do not loop indefinitely here, stop in this case and
492 assume there is no such information. */
493 if ((eax
& 0x1f) == 0)
494 goto intel_bug_no_cache_info
;
496 while (((eax
>> 5) & 0x7) != level
);
498 threads
= ((eax
>> 14) & 0x3ff) + 1;
502 intel_bug_no_cache_info
:
503 /* Assume that all logical threads share the highest cache level. */
505 threads
= (ebx
>> 16) & 0xff;
508 /* Cap usage of highest cache level to the number of supported
510 if (shared
> 0 && threads
> 0)
513 /* This spells out "AuthenticAMD". */
514 else if (ebx
== 0x68747541 && ecx
== 0x444d4163 && edx
== 0x69746e65)
516 data
= handle_amd (_SC_LEVEL1_DCACHE_SIZE
);
517 long int core
= handle_amd (_SC_LEVEL2_CACHE_SIZE
);
518 shared
= handle_amd (_SC_LEVEL3_CACHE_SIZE
);
520 /* Get maximum extended function. */
521 asm volatile ("cpuid"
522 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
526 /* No shared L3 cache. All we have is the L2 cache. */
530 /* Figure out the number of logical threads that share L3. */
531 if (max_cpuid_ex
>= 0x80000008)
533 /* Get width of APIC ID. */
534 asm volatile ("cpuid"
535 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
),
538 threads
= 1 << ((ecx
>> 12) & 0x0f);
543 /* If APIC ID width is not available, use logical
545 asm volatile ("cpuid"
546 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
),
550 if ((edx
& (1 << 28)) != 0)
551 threads
= (ebx
>> 16) & 0xff;
554 /* Cap usage of highest cache level to the number of
555 supported threads. */
559 /* Account for exclusive L2 and L3 caches. */
563 if (max_cpuid_ex
>= 0x80000001)
565 asm volatile ("cpuid"
566 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
568 /* PREFETCHW || 3DNow! */
569 if ((ecx
& 0x100) || (edx
& 0x80000000))
570 __x86_64_prefetchw
= -1;
575 __x86_64_data_cache_size_half
= data
/ 2;
579 __x86_64_shared_cache_size_half
= shared
/ 2;
580 #ifdef NOT_USED_RIGHT_NOW
581 __x86_64_shared_cache_size
= shared
;