2 Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26 static const struct intel_02_cache_info
35 { 0x06, _SC_LEVEL1_ICACHE_SIZE
, 8192, 4, 32 },
36 { 0x08, _SC_LEVEL1_ICACHE_SIZE
, 16384, 4, 32 },
37 { 0x0a, _SC_LEVEL1_DCACHE_SIZE
, 8192, 2, 32 },
38 { 0x0c, _SC_LEVEL1_DCACHE_SIZE
, 16384, 4, 32 },
39 { 0x22, _SC_LEVEL3_CACHE_SIZE
, 524288, 4, 64 },
40 { 0x23, _SC_LEVEL3_CACHE_SIZE
, 1048576, 8, 64 },
41 { 0x25, _SC_LEVEL3_CACHE_SIZE
, 2097152, 8, 64 },
42 { 0x29, _SC_LEVEL3_CACHE_SIZE
, 4194304, 8, 64 },
43 { 0x2c, _SC_LEVEL1_DCACHE_SIZE
, 32768, 8, 64 },
44 { 0x30, _SC_LEVEL1_ICACHE_SIZE
, 32768, 8, 64 },
45 { 0x39, _SC_LEVEL2_CACHE_SIZE
, 131072, 4, 64 },
46 { 0x3a, _SC_LEVEL2_CACHE_SIZE
, 196608, 6, 64 },
47 { 0x3b, _SC_LEVEL2_CACHE_SIZE
, 131072, 2, 64 },
48 { 0x3c, _SC_LEVEL2_CACHE_SIZE
, 262144, 4, 64 },
49 { 0x3d, _SC_LEVEL2_CACHE_SIZE
, 393216, 6, 64 },
50 { 0x3e, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 64 },
51 { 0x3f, _SC_LEVEL2_CACHE_SIZE
, 262144, 2, 64 },
52 { 0x41, _SC_LEVEL2_CACHE_SIZE
, 131072, 4, 32 },
53 { 0x42, _SC_LEVEL2_CACHE_SIZE
, 262144, 4, 32 },
54 { 0x43, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 32 },
55 { 0x44, _SC_LEVEL2_CACHE_SIZE
, 1048576, 4, 32 },
56 { 0x45, _SC_LEVEL2_CACHE_SIZE
, 2097152, 4, 32 },
57 { 0x46, _SC_LEVEL3_CACHE_SIZE
, 4194304, 4, 64 },
58 { 0x47, _SC_LEVEL3_CACHE_SIZE
, 8388608, 8, 64 },
59 { 0x48, _SC_LEVEL2_CACHE_SIZE
, 3145728, 12, 64 },
60 { 0x49, _SC_LEVEL2_CACHE_SIZE
, 4194304, 16, 64 },
61 { 0x4a, _SC_LEVEL3_CACHE_SIZE
, 6291456, 12, 64 },
62 { 0x4b, _SC_LEVEL3_CACHE_SIZE
, 8388608, 16, 64 },
63 { 0x4c, _SC_LEVEL3_CACHE_SIZE
, 12582912, 12, 64 },
64 { 0x4d, _SC_LEVEL3_CACHE_SIZE
, 16777216, 16, 64 },
65 { 0x4e, _SC_LEVEL2_CACHE_SIZE
, 6291456, 24, 64 },
66 { 0x60, _SC_LEVEL1_DCACHE_SIZE
, 16384, 8, 64 },
67 { 0x66, _SC_LEVEL1_DCACHE_SIZE
, 8192, 4, 64 },
68 { 0x67, _SC_LEVEL1_DCACHE_SIZE
, 16384, 4, 64 },
69 { 0x68, _SC_LEVEL1_DCACHE_SIZE
, 32768, 4, 64 },
70 { 0x78, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
71 { 0x79, _SC_LEVEL2_CACHE_SIZE
, 131072, 8, 64 },
72 { 0x7a, _SC_LEVEL2_CACHE_SIZE
, 262144, 8, 64 },
73 { 0x7b, _SC_LEVEL2_CACHE_SIZE
, 524288, 8, 64 },
74 { 0x7c, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
75 { 0x7d, _SC_LEVEL2_CACHE_SIZE
, 2097152, 8, 64 },
76 { 0x7f, _SC_LEVEL2_CACHE_SIZE
, 524288, 2, 64 },
77 { 0x82, _SC_LEVEL2_CACHE_SIZE
, 262144, 8, 32 },
78 { 0x83, _SC_LEVEL2_CACHE_SIZE
, 524288, 8, 32 },
79 { 0x84, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 32 },
80 { 0x85, _SC_LEVEL2_CACHE_SIZE
, 2097152, 8, 32 },
81 { 0x86, _SC_LEVEL2_CACHE_SIZE
, 524288, 4, 64 },
82 { 0x87, _SC_LEVEL2_CACHE_SIZE
, 1048576, 8, 64 },
85 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
88 intel_02_known_compare (const void *p1
, const void *p2
)
90 const struct intel_02_cache_info
*i1
;
91 const struct intel_02_cache_info
*i2
;
93 i1
= (const struct intel_02_cache_info
*) p1
;
94 i2
= (const struct intel_02_cache_info
*) p2
;
96 if (i1
->idx
== i2
->idx
)
99 return i1
->idx
< i2
->idx
? -1 : 1;
104 __attribute__ ((noinline
))
105 intel_check_word (int name
, unsigned int value
, bool *has_level_2
,
106 bool *no_level_2_or_3
)
108 if ((value
& 0x80000000) != 0)
109 /* The register value is reserved. */
112 /* Fold the name. The _SC_ constants are always in the order SIZE,
114 int folded_name
= (_SC_LEVEL1_ICACHE_SIZE
115 + ((name
- _SC_LEVEL1_ICACHE_SIZE
) / 3) * 3);
119 unsigned int byte
= value
& 0xff;
123 *no_level_2_or_3
= true;
125 if (folded_name
== _SC_LEVEL3_CACHE_SIZE
)
126 /* No need to look further. */
131 if (byte
== 0x49 && folded_name
== _SC_LEVEL3_CACHE_SIZE
)
133 /* Intel reused this value. For family 15, model 6 it
134 specifies the 3rd level cache. Otherwise the 2nd
140 asm volatile ("cpuid"
141 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
144 unsigned int family
= ((eax
>> 20) & 0xff) + ((eax
>> 8) & 0xf);
145 unsigned int model
= ((((eax
>>16) & 0xf) << 4)
146 + ((eax
>> 4) & 0xf));
147 if (family
== 15 && model
== 6)
149 /* The level 3 cache is encoded for this model like
150 the level 2 cache is for other models. Pretend
151 the caller asked for the level 2 cache. */
152 name
= (_SC_LEVEL2_CACHE_SIZE
153 + (name
- _SC_LEVEL3_CACHE_SIZE
));
154 folded_name
= _SC_LEVEL3_CACHE_SIZE
;
158 struct intel_02_cache_info
*found
;
159 struct intel_02_cache_info search
;
162 found
= bsearch (&search
, intel_02_known
, nintel_02_known
,
163 sizeof (intel_02_known
[0]), intel_02_known_compare
);
166 if (found
->name
== folded_name
)
168 unsigned int offset
= name
- folded_name
;
176 assert (offset
== 2);
177 return found
->linesize
;
180 if (found
->name
== _SC_LEVEL2_CACHE_SIZE
)
185 /* Next byte for the next round. */
194 static long int __attribute__ ((noinline
))
195 handle_intel (int name
, unsigned int maxidx
)
197 assert (maxidx
>= 2);
199 /* OK, we can use the CPUID instruction to get all info about the
201 unsigned int cnt
= 0;
202 unsigned int max
= 1;
204 bool no_level_2_or_3
= false;
205 bool has_level_2
= false;
213 asm volatile ("cpuid"
214 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
217 /* The low byte of EAX in the first round contain the number of
218 rounds we have to make. At least one, the one we are already
226 /* Process the individual registers' value. */
227 result
= intel_check_word (name
, eax
, &has_level_2
, &no_level_2_or_3
);
231 result
= intel_check_word (name
, ebx
, &has_level_2
, &no_level_2_or_3
);
235 result
= intel_check_word (name
, ecx
, &has_level_2
, &no_level_2_or_3
);
239 result
= intel_check_word (name
, edx
, &has_level_2
, &no_level_2_or_3
);
244 if (name
>= _SC_LEVEL2_CACHE_SIZE
&& name
<= _SC_LEVEL3_CACHE_LINESIZE
252 static long int __attribute__ ((noinline
))
253 handle_amd (int name
)
259 asm volatile ("cpuid"
260 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
263 /* No level 4 cache (yet). */
264 if (name
> _SC_LEVEL3_CACHE_LINESIZE
)
267 unsigned int fn
= 0x80000005 + (name
>= _SC_LEVEL2_CACHE_SIZE
);
271 asm volatile ("cpuid"
272 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
275 if (name
< _SC_LEVEL1_DCACHE_SIZE
)
277 name
+= _SC_LEVEL1_DCACHE_SIZE
- _SC_LEVEL1_ICACHE_SIZE
;
283 case _SC_LEVEL1_DCACHE_SIZE
:
284 return (ecx
>> 14) & 0x3fc00;
286 case _SC_LEVEL1_DCACHE_ASSOC
:
288 if ((ecx
& 0xff) == 0xff)
289 /* Fully associative. */
290 return (ecx
<< 2) & 0x3fc00;
293 case _SC_LEVEL1_DCACHE_LINESIZE
:
296 case _SC_LEVEL2_CACHE_SIZE
:
297 return (ecx
& 0xf000) == 0 ? 0 : (ecx
>> 6) & 0x3fffc00;
299 case _SC_LEVEL2_CACHE_ASSOC
:
300 switch ((ecx
>> 12) & 0xf)
306 return (ecx
>> 12) & 0xf;
322 return ((ecx
>> 6) & 0x3fffc00) / (ecx
& 0xff);
328 case _SC_LEVEL2_CACHE_LINESIZE
:
329 return (ecx
& 0xf000) == 0 ? 0 : ecx
& 0xff;
331 case _SC_LEVEL3_CACHE_SIZE
:
332 return (edx
& 0xf000) == 0 ? 0 : (edx
& 0x3ffc0000) << 1;
334 case _SC_LEVEL3_CACHE_ASSOC
:
335 switch ((edx
>> 12) & 0xf)
341 return (edx
>> 12) & 0xf;
357 return ((edx
& 0x3ffc0000) << 1) / (edx
& 0xff);
363 case _SC_LEVEL3_CACHE_LINESIZE
:
364 return (edx
& 0xf000) == 0 ? 0 : edx
& 0xff;
367 assert (! "cannot happen");
373 /* Get the value of the system variable NAME. */
376 __cache_sysconf (int name
)
378 /* Find out what brand of processor. */
383 asm volatile ("cpuid"
384 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
387 /* This spells out "GenuineIntel". */
388 if (ebx
== 0x756e6547 && ecx
== 0x6c65746e && edx
== 0x49656e69)
389 return handle_intel (name
, eax
);
391 /* This spells out "AuthenticAMD". */
392 if (ebx
== 0x68747541 && ecx
== 0x444d4163 && edx
== 0x69746e65)
393 return handle_amd (name
);
395 // XXX Fill in more vendors.
397 /* CPU not known, we have no information. */
402 /* Half the data cache size for use in memory and string routines, typically
404 long int __x86_64_data_cache_size_half attribute_hidden
= 32 * 1024 / 2;
405 /* Shared cache size for use in memory and string routines, typically
407 long int __x86_64_shared_cache_size_half attribute_hidden
= 1024 * 1024 / 2;
408 long int __x86_64_shared_cache_size attribute_hidden
= 1024 * 1024;
409 /* PREFETCHW support flag for use in memory and string routines. */
410 int __x86_64_prefetchw attribute_hidden
;
412 /* Instructions preferred for memory and string routines.
414 0: Regular instructions
417 3: SSSE3 instructions
420 int __x86_64_preferred_memory_instruction attribute_hidden
;
424 __attribute__((constructor
))
425 init_cacheinfo (void)
427 /* Find out what brand of processor. */
435 long int shared
= -1;
437 unsigned int threads
= 0;
439 asm volatile ("cpuid"
440 : "=a" (max_cpuid
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
443 /* This spells out "GenuineIntel". */
444 if (ebx
== 0x756e6547 && ecx
== 0x6c65746e && edx
== 0x49656e69)
446 data
= handle_intel (_SC_LEVEL1_DCACHE_SIZE
, max_cpuid
);
450 shared
= handle_intel (_SC_LEVEL3_CACHE_SIZE
, max_cpuid
);
454 /* Try L2 otherwise. */
456 shared
= handle_intel (_SC_LEVEL2_CACHE_SIZE
, max_cpuid
);
459 asm volatile ("cpuid"
460 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
463 /* Intel prefers SSSE3 instructions for memory/string routines
464 if they are avaiable. */
466 __x86_64_preferred_memory_instruction
= 3;
468 __x86_64_preferred_memory_instruction
= 2;
470 /* Figure out the number of logical threads that share the
471 highest cache level. */
476 /* Query until desired cache level is enumerated. */
479 asm volatile ("cpuid"
480 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
481 : "0" (4), "2" (i
++));
483 /* There seems to be a bug in at least some Pentium Ds
484 which sometimes fail to iterate all cache parameters.
485 Do not loop indefinitely here, stop in this case and
486 assume there is no such information. */
487 if ((eax
& 0x1f) == 0)
488 goto intel_bug_no_cache_info
;
490 while (((eax
>> 5) & 0x7) != level
);
492 threads
= ((eax
>> 14) & 0x3ff) + 1;
496 intel_bug_no_cache_info
:
497 /* Assume that all logical threads share the highest cache level. */
499 threads
= (ebx
>> 16) & 0xff;
502 /* Cap usage of highest cache level to the number of supported
504 if (shared
> 0 && threads
> 0)
507 /* This spells out "AuthenticAMD". */
508 else if (ebx
== 0x68747541 && ecx
== 0x444d4163 && edx
== 0x69746e65)
510 data
= handle_amd (_SC_LEVEL1_DCACHE_SIZE
);
511 long int core
= handle_amd (_SC_LEVEL2_CACHE_SIZE
);
512 shared
= handle_amd (_SC_LEVEL3_CACHE_SIZE
);
514 /* Get maximum extended function. */
515 asm volatile ("cpuid"
516 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
520 /* No shared L3 cache. All we have is the L2 cache. */
524 /* Figure out the number of logical threads that share L3. */
525 if (max_cpuid_ex
>= 0x80000008)
527 /* Get width of APIC ID. */
528 asm volatile ("cpuid"
529 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
),
532 threads
= 1 << ((ecx
>> 12) & 0x0f);
537 /* If APIC ID width is not available, use logical
539 asm volatile ("cpuid"
540 : "=a" (max_cpuid_ex
), "=b" (ebx
), "=c" (ecx
),
544 if ((edx
& (1 << 28)) != 0)
545 threads
= (ebx
>> 16) & 0xff;
548 /* Cap usage of highest cache level to the number of
549 supported threads. */
553 /* Account for exclusive L2 and L3 caches. */
557 if (max_cpuid_ex
>= 0x80000001)
559 asm volatile ("cpuid"
560 : "=a" (eax
), "=b" (ebx
), "=c" (ecx
), "=d" (edx
)
562 /* PREFETCHW || 3DNow! */
563 if ((ecx
& 0x100) || (edx
& 0x80000000))
564 __x86_64_prefetchw
= -1;
569 __x86_64_data_cache_size_half
= data
/ 2;
573 __x86_64_shared_cache_size_half
= shared
/ 2;
574 __x86_64_shared_cache_size
= shared
;