Update copyright notices with scripts/update-copyrights
[glibc.git] / sysdeps / x86_64 / cacheinfo.c
blob163af2acbc666057413262a3673eb5a26be6f2ea
1 /* x86_64 cache info.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <stdbool.h>
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <cpuid.h>
25 #ifndef __cpuid_count
26 /* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc
27 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */
28 # if defined(__i386__) && defined(__PIC__)
29 /* %ebx may be the PIC register. */
30 # define __cpuid_count(level, count, a, b, c, d) \
31 __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
32 "cpuid\n\t" \
33 "xchg{l}\t{%%}ebx, %1\n\t" \
34 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
35 : "0" (level), "2" (count))
36 # else
37 # define __cpuid_count(level, count, a, b, c, d) \
38 __asm__ ("cpuid\n\t" \
39 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
40 : "0" (level), "2" (count))
41 # endif
42 #endif
44 #ifdef USE_MULTIARCH
45 # include "multiarch/init-arch.h"
47 # define is_intel __cpu_features.kind == arch_kind_intel
48 # define is_amd __cpu_features.kind == arch_kind_amd
49 # define max_cpuid __cpu_features.max_cpuid
50 #else
51 /* This spells out "GenuineIntel". */
52 # define is_intel \
53 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
54 /* This spells out "AuthenticAMD". */
55 # define is_amd \
56 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
57 #endif
59 static const struct intel_02_cache_info
61 unsigned char idx;
62 unsigned char assoc;
63 unsigned char linesize;
64 unsigned char rel_name;
65 unsigned int size;
66 } intel_02_known [] =
68 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
69 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
70 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
71 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
72 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
73 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
74 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
75 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
76 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
77 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
78 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
79 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
80 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
81 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
82 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
83 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
84 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
85 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
86 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
87 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
88 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
89 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
90 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
91 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
92 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
93 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
94 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
95 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
96 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
97 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
98 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
99 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
100 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
101 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
102 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
103 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
104 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
105 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
106 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
107 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
108 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
109 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
110 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
111 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
112 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
113 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
114 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
115 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
116 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
117 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
118 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
119 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
120 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
121 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
122 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
123 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
124 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
125 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
126 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
127 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
128 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
129 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
130 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
131 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
132 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
133 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
134 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
135 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
136 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
139 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
141 static int
142 intel_02_known_compare (const void *p1, const void *p2)
144 const struct intel_02_cache_info *i1;
145 const struct intel_02_cache_info *i2;
147 i1 = (const struct intel_02_cache_info *) p1;
148 i2 = (const struct intel_02_cache_info *) p2;
150 if (i1->idx == i2->idx)
151 return 0;
153 return i1->idx < i2->idx ? -1 : 1;
157 static long int
158 __attribute__ ((noinline))
159 intel_check_word (int name, unsigned int value, bool *has_level_2,
160 bool *no_level_2_or_3)
162 if ((value & 0x80000000) != 0)
163 /* The register value is reserved. */
164 return 0;
166 /* Fold the name. The _SC_ constants are always in the order SIZE,
167 ASSOC, LINESIZE. */
168 int folded_rel_name = (M(name) / 3) * 3;
170 while (value != 0)
172 unsigned int byte = value & 0xff;
174 if (byte == 0x40)
176 *no_level_2_or_3 = true;
178 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
179 /* No need to look further. */
180 break;
182 else if (byte == 0xff)
184 /* CPUID leaf 0x4 contains all the information. We need to
185 iterate over it. */
186 unsigned int eax;
187 unsigned int ebx;
188 unsigned int ecx;
189 unsigned int edx;
191 unsigned int round = 0;
192 while (1)
194 asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
195 : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
196 : "0" (4), "2" (round));
198 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
199 if (type == null)
200 /* That was the end. */
201 break;
203 unsigned int level = (eax >> 5) & 0x7;
205 if ((level == 1 && type == data
206 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
207 || (level == 1 && type == inst
208 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
209 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
210 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
211 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
213 unsigned int offset = M(name) - folded_rel_name;
215 if (offset == 0)
216 /* Cache size. */
217 return (((ebx >> 22) + 1)
218 * (((ebx >> 12) & 0x3ff) + 1)
219 * ((ebx & 0xfff) + 1)
220 * (ecx + 1));
221 if (offset == 1)
222 return (ebx >> 22) + 1;
224 assert (offset == 2);
225 return (ebx & 0xfff) + 1;
228 ++round;
230 /* There is no other cache information anywhere else. */
231 break;
233 else
235 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
237 /* Intel reused this value. For family 15, model 6 it
238 specifies the 3rd level cache. Otherwise the 2nd
239 level cache. */
240 unsigned int family;
241 unsigned int model;
242 #ifdef USE_MULTIARCH
243 family = __cpu_features.family;
244 model = __cpu_features.model;
245 #else
246 unsigned int eax;
247 unsigned int ebx;
248 unsigned int ecx;
249 unsigned int edx;
250 __cpuid (1, eax, ebx, ecx, edx);
252 family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
253 model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
254 #endif
256 if (family == 15 && model == 6)
258 /* The level 3 cache is encoded for this model like
259 the level 2 cache is for other models. Pretend
260 the caller asked for the level 2 cache. */
261 name = (_SC_LEVEL2_CACHE_SIZE
262 + (name - _SC_LEVEL3_CACHE_SIZE));
263 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
267 struct intel_02_cache_info *found;
268 struct intel_02_cache_info search;
270 search.idx = byte;
271 found = bsearch (&search, intel_02_known, nintel_02_known,
272 sizeof (intel_02_known[0]), intel_02_known_compare);
273 if (found != NULL)
275 if (found->rel_name == folded_rel_name)
277 unsigned int offset = M(name) - folded_rel_name;
279 if (offset == 0)
280 /* Cache size. */
281 return found->size;
282 if (offset == 1)
283 return found->assoc;
285 assert (offset == 2);
286 return found->linesize;
289 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
290 *has_level_2 = true;
294 /* Next byte for the next round. */
295 value >>= 8;
298 /* Nothing found. */
299 return 0;
303 static long int __attribute__ ((noinline))
304 handle_intel (int name, unsigned int maxidx)
306 assert (maxidx >= 2);
308 /* OK, we can use the CPUID instruction to get all info about the
309 caches. */
310 unsigned int cnt = 0;
311 unsigned int max = 1;
312 long int result = 0;
313 bool no_level_2_or_3 = false;
314 bool has_level_2 = false;
316 while (cnt++ < max)
318 unsigned int eax;
319 unsigned int ebx;
320 unsigned int ecx;
321 unsigned int edx;
322 __cpuid (2, eax, ebx, ecx, edx);
324 /* The low byte of EAX in the first round contain the number of
325 rounds we have to make. At least one, the one we are already
326 doing. */
327 if (cnt == 1)
329 max = eax & 0xff;
330 eax &= 0xffffff00;
333 /* Process the individual registers' value. */
334 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
335 if (result != 0)
336 return result;
338 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
339 if (result != 0)
340 return result;
342 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
343 if (result != 0)
344 return result;
346 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
347 if (result != 0)
348 return result;
351 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
352 && no_level_2_or_3)
353 return -1;
355 return 0;
359 static long int __attribute__ ((noinline))
360 handle_amd (int name)
362 unsigned int eax;
363 unsigned int ebx;
364 unsigned int ecx;
365 unsigned int edx;
366 __cpuid (0x80000000, eax, ebx, ecx, edx);
368 /* No level 4 cache (yet). */
369 if (name > _SC_LEVEL3_CACHE_LINESIZE)
370 return 0;
372 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
373 if (eax < fn)
374 return 0;
376 __cpuid (fn, eax, ebx, ecx, edx);
378 if (name < _SC_LEVEL1_DCACHE_SIZE)
380 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
381 ecx = edx;
384 switch (name)
386 case _SC_LEVEL1_DCACHE_SIZE:
387 return (ecx >> 14) & 0x3fc00;
389 case _SC_LEVEL1_DCACHE_ASSOC:
390 ecx >>= 16;
391 if ((ecx & 0xff) == 0xff)
392 /* Fully associative. */
393 return (ecx << 2) & 0x3fc00;
394 return ecx & 0xff;
396 case _SC_LEVEL1_DCACHE_LINESIZE:
397 return ecx & 0xff;
399 case _SC_LEVEL2_CACHE_SIZE:
400 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
402 case _SC_LEVEL2_CACHE_ASSOC:
403 switch ((ecx >> 12) & 0xf)
405 case 0:
406 case 1:
407 case 2:
408 case 4:
409 return (ecx >> 12) & 0xf;
410 case 6:
411 return 8;
412 case 8:
413 return 16;
414 case 10:
415 return 32;
416 case 11:
417 return 48;
418 case 12:
419 return 64;
420 case 13:
421 return 96;
422 case 14:
423 return 128;
424 case 15:
425 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
426 default:
427 return 0;
429 /* NOTREACHED */
431 case _SC_LEVEL2_CACHE_LINESIZE:
432 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
434 case _SC_LEVEL3_CACHE_SIZE:
435 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
437 case _SC_LEVEL3_CACHE_ASSOC:
438 switch ((edx >> 12) & 0xf)
440 case 0:
441 case 1:
442 case 2:
443 case 4:
444 return (edx >> 12) & 0xf;
445 case 6:
446 return 8;
447 case 8:
448 return 16;
449 case 10:
450 return 32;
451 case 11:
452 return 48;
453 case 12:
454 return 64;
455 case 13:
456 return 96;
457 case 14:
458 return 128;
459 case 15:
460 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
461 default:
462 return 0;
464 /* NOTREACHED */
466 case _SC_LEVEL3_CACHE_LINESIZE:
467 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
469 default:
470 assert (! "cannot happen");
472 return -1;
476 /* Get the value of the system variable NAME. */
477 long int
478 attribute_hidden
479 __cache_sysconf (int name)
481 #ifdef USE_MULTIARCH
482 if (__cpu_features.kind == arch_kind_unknown)
483 __init_cpu_features ();
484 #else
485 /* Find out what brand of processor. */
486 unsigned int max_cpuid;
487 unsigned int ebx;
488 unsigned int ecx;
489 unsigned int edx;
490 __cpuid (0, max_cpuid, ebx, ecx, edx);
491 #endif
493 if (is_intel)
494 return handle_intel (name, max_cpuid);
496 if (is_amd)
497 return handle_amd (name);
499 // XXX Fill in more vendors.
501 /* CPU not known, we have no information. */
502 return 0;
506 /* Data cache size for use in memory and string routines, typically
507 L1 size, rounded to multiple of 256 bytes. */
508 long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
509 long int __x86_data_cache_size attribute_hidden = 32 * 1024;
510 /* Similar to __x86_data_cache_size_half, but not rounded. */
511 long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
512 /* Similar to __x86_data_cache_size, but not rounded. */
513 long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
514 /* Shared cache size for use in memory and string routines, typically
515 L2 or L3 size, rounded to multiple of 256 bytes. */
516 long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
517 long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
518 /* Similar to __x86_shared_cache_size_half, but not rounded. */
519 long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
520 /* Similar to __x86_shared_cache_size, but not rounded. */
521 long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
523 #ifndef DISABLE_PREFETCHW
524 /* PREFETCHW support flag for use in memory and string routines. */
525 int __x86_prefetchw attribute_hidden;
526 #endif
528 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
529 /* Instructions preferred for memory and string routines.
531 0: Regular instructions
532 1: MMX instructions
533 2: SSE2 instructions
534 3: SSSE3 instructions
537 int __x86_preferred_memory_instruction attribute_hidden;
538 #endif
541 static void
542 __attribute__((constructor))
543 init_cacheinfo (void)
545 /* Find out what brand of processor. */
546 unsigned int eax;
547 unsigned int ebx;
548 unsigned int ecx;
549 unsigned int edx;
550 int max_cpuid_ex;
551 long int data = -1;
552 long int shared = -1;
553 unsigned int level;
554 unsigned int threads = 0;
556 #ifdef USE_MULTIARCH
557 if (__cpu_features.kind == arch_kind_unknown)
558 __init_cpu_features ();
559 #else
560 int max_cpuid;
561 __cpuid (0, max_cpuid, ebx, ecx, edx);
562 #endif
564 if (is_intel)
566 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
568 /* Try L3 first. */
569 level = 3;
570 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
572 if (shared <= 0)
574 /* Try L2 otherwise. */
575 level = 2;
576 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
579 unsigned int ebx_1;
581 #ifdef USE_MULTIARCH
582 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
583 ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
584 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
585 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
586 #else
587 __cpuid (1, eax, ebx_1, ecx, edx);
588 #endif
590 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
591 /* Intel prefers SSSE3 instructions for memory/string routines
592 if they are available. */
593 if ((ecx & 0x200))
594 __x86_preferred_memory_instruction = 3;
595 else
596 __x86_preferred_memory_instruction = 2;
597 #endif
599 /* Figure out the number of logical threads that share the
600 highest cache level. */
601 if (max_cpuid >= 4)
603 int i = 0;
605 /* Query until desired cache level is enumerated. */
608 __cpuid_count (4, i++, eax, ebx, ecx, edx);
610 /* There seems to be a bug in at least some Pentium Ds
611 which sometimes fail to iterate all cache parameters.
612 Do not loop indefinitely here, stop in this case and
613 assume there is no such information. */
614 if ((eax & 0x1f) == 0)
615 goto intel_bug_no_cache_info;
617 while (((eax >> 5) & 0x7) != level);
619 threads = (eax >> 14) & 0x3ff;
621 /* If max_cpuid >= 11, THREADS is the maximum number of
622 addressable IDs for logical processors sharing the
623 cache, instead of the maximum number of threads
624 sharing the cache. */
625 if (threads && max_cpuid >= 11)
627 /* Find the number of logical processors shipped in
628 one core and apply count mask. */
629 i = 0;
630 while (1)
632 __cpuid_count (11, i++, eax, ebx, ecx, edx);
634 int shipped = ebx & 0xff;
635 int type = ecx & 0xff0;
636 if (shipped == 0 || type == 0)
637 break;
638 else if (type == 0x200)
640 int count_mask;
642 /* Compute count mask. */
643 asm ("bsr %1, %0"
644 : "=r" (count_mask) : "g" (threads));
645 count_mask = ~(-1 << (count_mask + 1));
646 threads = (shipped - 1) & count_mask;
647 break;
651 threads += 1;
653 else
655 intel_bug_no_cache_info:
656 /* Assume that all logical threads share the highest cache level. */
658 threads = (ebx_1 >> 16) & 0xff;
661 /* Cap usage of highest cache level to the number of supported
662 threads. */
663 if (shared > 0 && threads > 0)
664 shared /= threads;
666 /* This spells out "AuthenticAMD". */
667 else if (is_amd)
669 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
670 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
671 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
673 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
674 # ifdef USE_MULTIARCH
675 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
676 ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
677 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
678 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
679 # else
680 __cpuid (1, eax, ebx, ecx, edx);
681 # endif
683 /* AMD prefers SSSE3 instructions for memory/string routines
684 if they are avaiable, otherwise it prefers integer
685 instructions. */
686 if ((ecx & 0x200))
687 __x86_preferred_memory_instruction = 3;
688 else
689 __x86_preferred_memory_instruction = 0;
690 #endif
692 /* Get maximum extended function. */
693 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
695 if (shared <= 0)
696 /* No shared L3 cache. All we have is the L2 cache. */
697 shared = core;
698 else
700 /* Figure out the number of logical threads that share L3. */
701 if (max_cpuid_ex >= 0x80000008)
703 /* Get width of APIC ID. */
704 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
705 threads = 1 << ((ecx >> 12) & 0x0f);
708 if (threads == 0)
710 /* If APIC ID width is not available, use logical
711 processor count. */
712 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
714 if ((edx & (1 << 28)) != 0)
715 threads = (ebx >> 16) & 0xff;
718 /* Cap usage of highest cache level to the number of
719 supported threads. */
720 if (threads > 0)
721 shared /= threads;
723 /* Account for exclusive L2 and L3 caches. */
724 shared += core;
727 #ifndef DISABLE_PREFETCHW
728 if (max_cpuid_ex >= 0x80000001)
730 __cpuid (0x80000001, eax, ebx, ecx, edx);
731 /* PREFETCHW || 3DNow! */
732 if ((ecx & 0x100) || (edx & 0x80000000))
733 __x86_prefetchw = -1;
735 #endif
738 if (data > 0)
740 __x86_raw_data_cache_size_half = data / 2;
741 __x86_raw_data_cache_size = data;
742 /* Round data cache size to multiple of 256 bytes. */
743 data = data & ~255L;
744 __x86_data_cache_size_half = data / 2;
745 __x86_data_cache_size = data;
748 if (shared > 0)
750 __x86_raw_shared_cache_size_half = shared / 2;
751 __x86_raw_shared_cache_size = shared;
752 /* Round shared cache size to multiple of 256 bytes. */
753 shared = shared & ~255L;
754 __x86_shared_cache_size_half = shared / 2;
755 __x86_shared_cache_size = shared;