NPTL: Move fork state variables to initializer files.
[glibc.git] / sysdeps / x86_64 / cacheinfo.c
blobca13a53f253ea27bfb77a94080c3708d42abb556
1 /* x86_64 cache info.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <stdbool.h>
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <cpuid.h>
25 #ifndef __cpuid_count
26 /* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc
27 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */
28 # if defined(__i386__) && defined(__PIC__)
29 /* %ebx may be the PIC register. */
30 # define __cpuid_count(level, count, a, b, c, d) \
31 __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
32 "cpuid\n\t" \
33 "xchg{l}\t{%%}ebx, %1\n\t" \
34 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
35 : "0" (level), "2" (count))
36 # else
37 # define __cpuid_count(level, count, a, b, c, d) \
38 __asm__ ("cpuid\n\t" \
39 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
40 : "0" (level), "2" (count))
41 # endif
42 #endif
44 #ifdef USE_MULTIARCH
45 # include "multiarch/init-arch.h"
47 # define is_intel __cpu_features.kind == arch_kind_intel
48 # define is_amd __cpu_features.kind == arch_kind_amd
49 # define max_cpuid __cpu_features.max_cpuid
50 #else
51 /* This spells out "GenuineIntel". */
52 # define is_intel \
53 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
54 /* This spells out "AuthenticAMD". */
55 # define is_amd \
56 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
57 #endif
59 static const struct intel_02_cache_info
61 unsigned char idx;
62 unsigned char assoc;
63 unsigned char linesize;
64 unsigned char rel_name;
65 unsigned int size;
66 } intel_02_known [] =
68 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
69 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
70 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
71 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
72 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
73 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
74 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
75 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
76 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
77 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
78 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
79 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
80 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
81 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
82 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
83 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
84 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
85 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
86 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
87 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
88 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
89 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
90 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
91 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
92 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
93 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
94 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
95 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
96 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
97 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
98 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
99 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
100 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
101 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
102 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
103 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
104 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
105 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
106 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
107 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
108 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
109 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
110 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
111 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
112 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
113 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
114 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
115 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
116 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
117 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
118 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
119 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
120 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
121 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
122 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
123 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
124 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
125 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
126 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
127 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
128 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
129 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
130 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
131 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
132 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
133 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
134 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
135 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
136 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
139 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
141 static int
142 intel_02_known_compare (const void *p1, const void *p2)
144 const struct intel_02_cache_info *i1;
145 const struct intel_02_cache_info *i2;
147 i1 = (const struct intel_02_cache_info *) p1;
148 i2 = (const struct intel_02_cache_info *) p2;
150 if (i1->idx == i2->idx)
151 return 0;
153 return i1->idx < i2->idx ? -1 : 1;
157 static long int
158 __attribute__ ((noinline))
159 intel_check_word (int name, unsigned int value, bool *has_level_2,
160 bool *no_level_2_or_3)
162 if ((value & 0x80000000) != 0)
163 /* The register value is reserved. */
164 return 0;
166 /* Fold the name. The _SC_ constants are always in the order SIZE,
167 ASSOC, LINESIZE. */
168 int folded_rel_name = (M(name) / 3) * 3;
170 while (value != 0)
172 unsigned int byte = value & 0xff;
174 if (byte == 0x40)
176 *no_level_2_or_3 = true;
178 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
179 /* No need to look further. */
180 break;
182 else if (byte == 0xff)
184 /* CPUID leaf 0x4 contains all the information. We need to
185 iterate over it. */
186 unsigned int eax;
187 unsigned int ebx;
188 unsigned int ecx;
189 unsigned int edx;
191 unsigned int round = 0;
192 while (1)
194 __cpuid_count (4, round, eax, ebx, ecx, edx);
196 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
197 if (type == null)
198 /* That was the end. */
199 break;
201 unsigned int level = (eax >> 5) & 0x7;
203 if ((level == 1 && type == data
204 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
205 || (level == 1 && type == inst
206 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
207 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
208 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
209 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
211 unsigned int offset = M(name) - folded_rel_name;
213 if (offset == 0)
214 /* Cache size. */
215 return (((ebx >> 22) + 1)
216 * (((ebx >> 12) & 0x3ff) + 1)
217 * ((ebx & 0xfff) + 1)
218 * (ecx + 1));
219 if (offset == 1)
220 return (ebx >> 22) + 1;
222 assert (offset == 2);
223 return (ebx & 0xfff) + 1;
226 ++round;
228 /* There is no other cache information anywhere else. */
229 break;
231 else
233 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
235 /* Intel reused this value. For family 15, model 6 it
236 specifies the 3rd level cache. Otherwise the 2nd
237 level cache. */
238 unsigned int family;
239 unsigned int model;
240 #ifdef USE_MULTIARCH
241 family = __cpu_features.family;
242 model = __cpu_features.model;
243 #else
244 unsigned int eax;
245 unsigned int ebx;
246 unsigned int ecx;
247 unsigned int edx;
248 __cpuid (1, eax, ebx, ecx, edx);
250 family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
251 model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
252 #endif
254 if (family == 15 && model == 6)
256 /* The level 3 cache is encoded for this model like
257 the level 2 cache is for other models. Pretend
258 the caller asked for the level 2 cache. */
259 name = (_SC_LEVEL2_CACHE_SIZE
260 + (name - _SC_LEVEL3_CACHE_SIZE));
261 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
265 struct intel_02_cache_info *found;
266 struct intel_02_cache_info search;
268 search.idx = byte;
269 found = bsearch (&search, intel_02_known, nintel_02_known,
270 sizeof (intel_02_known[0]), intel_02_known_compare);
271 if (found != NULL)
273 if (found->rel_name == folded_rel_name)
275 unsigned int offset = M(name) - folded_rel_name;
277 if (offset == 0)
278 /* Cache size. */
279 return found->size;
280 if (offset == 1)
281 return found->assoc;
283 assert (offset == 2);
284 return found->linesize;
287 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
288 *has_level_2 = true;
292 /* Next byte for the next round. */
293 value >>= 8;
296 /* Nothing found. */
297 return 0;
301 static long int __attribute__ ((noinline))
302 handle_intel (int name, unsigned int maxidx)
304 assert (maxidx >= 2);
306 /* OK, we can use the CPUID instruction to get all info about the
307 caches. */
308 unsigned int cnt = 0;
309 unsigned int max = 1;
310 long int result = 0;
311 bool no_level_2_or_3 = false;
312 bool has_level_2 = false;
314 while (cnt++ < max)
316 unsigned int eax;
317 unsigned int ebx;
318 unsigned int ecx;
319 unsigned int edx;
320 __cpuid (2, eax, ebx, ecx, edx);
322 /* The low byte of EAX in the first round contain the number of
323 rounds we have to make. At least one, the one we are already
324 doing. */
325 if (cnt == 1)
327 max = eax & 0xff;
328 eax &= 0xffffff00;
331 /* Process the individual registers' value. */
332 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
333 if (result != 0)
334 return result;
336 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
337 if (result != 0)
338 return result;
340 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
341 if (result != 0)
342 return result;
344 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
345 if (result != 0)
346 return result;
349 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
350 && no_level_2_or_3)
351 return -1;
353 return 0;
357 static long int __attribute__ ((noinline))
358 handle_amd (int name)
360 unsigned int eax;
361 unsigned int ebx;
362 unsigned int ecx;
363 unsigned int edx;
364 __cpuid (0x80000000, eax, ebx, ecx, edx);
366 /* No level 4 cache (yet). */
367 if (name > _SC_LEVEL3_CACHE_LINESIZE)
368 return 0;
370 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
371 if (eax < fn)
372 return 0;
374 __cpuid (fn, eax, ebx, ecx, edx);
376 if (name < _SC_LEVEL1_DCACHE_SIZE)
378 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
379 ecx = edx;
382 switch (name)
384 case _SC_LEVEL1_DCACHE_SIZE:
385 return (ecx >> 14) & 0x3fc00;
387 case _SC_LEVEL1_DCACHE_ASSOC:
388 ecx >>= 16;
389 if ((ecx & 0xff) == 0xff)
390 /* Fully associative. */
391 return (ecx << 2) & 0x3fc00;
392 return ecx & 0xff;
394 case _SC_LEVEL1_DCACHE_LINESIZE:
395 return ecx & 0xff;
397 case _SC_LEVEL2_CACHE_SIZE:
398 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
400 case _SC_LEVEL2_CACHE_ASSOC:
401 switch ((ecx >> 12) & 0xf)
403 case 0:
404 case 1:
405 case 2:
406 case 4:
407 return (ecx >> 12) & 0xf;
408 case 6:
409 return 8;
410 case 8:
411 return 16;
412 case 10:
413 return 32;
414 case 11:
415 return 48;
416 case 12:
417 return 64;
418 case 13:
419 return 96;
420 case 14:
421 return 128;
422 case 15:
423 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
424 default:
425 return 0;
427 /* NOTREACHED */
429 case _SC_LEVEL2_CACHE_LINESIZE:
430 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
432 case _SC_LEVEL3_CACHE_SIZE:
433 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
435 case _SC_LEVEL3_CACHE_ASSOC:
436 switch ((edx >> 12) & 0xf)
438 case 0:
439 case 1:
440 case 2:
441 case 4:
442 return (edx >> 12) & 0xf;
443 case 6:
444 return 8;
445 case 8:
446 return 16;
447 case 10:
448 return 32;
449 case 11:
450 return 48;
451 case 12:
452 return 64;
453 case 13:
454 return 96;
455 case 14:
456 return 128;
457 case 15:
458 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
459 default:
460 return 0;
462 /* NOTREACHED */
464 case _SC_LEVEL3_CACHE_LINESIZE:
465 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
467 default:
468 assert (! "cannot happen");
470 return -1;
474 /* Get the value of the system variable NAME. */
475 long int
476 attribute_hidden
477 __cache_sysconf (int name)
479 #ifdef USE_MULTIARCH
480 if (__cpu_features.kind == arch_kind_unknown)
481 __init_cpu_features ();
482 #else
483 /* Find out what brand of processor. */
484 unsigned int max_cpuid;
485 unsigned int ebx;
486 unsigned int ecx;
487 unsigned int edx;
488 __cpuid (0, max_cpuid, ebx, ecx, edx);
489 #endif
491 if (is_intel)
492 return handle_intel (name, max_cpuid);
494 if (is_amd)
495 return handle_amd (name);
497 // XXX Fill in more vendors.
499 /* CPU not known, we have no information. */
500 return 0;
504 /* Data cache size for use in memory and string routines, typically
505 L1 size, rounded to multiple of 256 bytes. */
506 long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
507 long int __x86_data_cache_size attribute_hidden = 32 * 1024;
508 /* Similar to __x86_data_cache_size_half, but not rounded. */
509 long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
510 /* Similar to __x86_data_cache_size, but not rounded. */
511 long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
512 /* Shared cache size for use in memory and string routines, typically
513 L2 or L3 size, rounded to multiple of 256 bytes. */
514 long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
515 long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
516 /* Similar to __x86_shared_cache_size_half, but not rounded. */
517 long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
518 /* Similar to __x86_shared_cache_size, but not rounded. */
519 long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
521 #ifndef DISABLE_PREFETCHW
522 /* PREFETCHW support flag for use in memory and string routines. */
523 int __x86_prefetchw attribute_hidden;
524 #endif
526 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
527 /* Instructions preferred for memory and string routines.
529 0: Regular instructions
530 1: MMX instructions
531 2: SSE2 instructions
532 3: SSSE3 instructions
535 int __x86_preferred_memory_instruction attribute_hidden;
536 #endif
539 static void
540 __attribute__((constructor))
541 init_cacheinfo (void)
543 /* Find out what brand of processor. */
544 unsigned int eax;
545 unsigned int ebx;
546 unsigned int ecx;
547 unsigned int edx;
548 int max_cpuid_ex;
549 long int data = -1;
550 long int shared = -1;
551 unsigned int level;
552 unsigned int threads = 0;
554 #ifdef USE_MULTIARCH
555 if (__cpu_features.kind == arch_kind_unknown)
556 __init_cpu_features ();
557 #else
558 int max_cpuid;
559 __cpuid (0, max_cpuid, ebx, ecx, edx);
560 #endif
562 if (is_intel)
564 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
566 /* Try L3 first. */
567 level = 3;
568 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
570 if (shared <= 0)
572 /* Try L2 otherwise. */
573 level = 2;
574 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
577 unsigned int ebx_1;
579 #ifdef USE_MULTIARCH
580 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
581 ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
582 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
583 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
584 #else
585 __cpuid (1, eax, ebx_1, ecx, edx);
586 #endif
588 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
589 /* Intel prefers SSSE3 instructions for memory/string routines
590 if they are available. */
591 if ((ecx & 0x200))
592 __x86_preferred_memory_instruction = 3;
593 else
594 __x86_preferred_memory_instruction = 2;
595 #endif
597 /* Figure out the number of logical threads that share the
598 highest cache level. */
599 if (max_cpuid >= 4)
601 int i = 0;
603 /* Query until desired cache level is enumerated. */
606 __cpuid_count (4, i++, eax, ebx, ecx, edx);
608 /* There seems to be a bug in at least some Pentium Ds
609 which sometimes fail to iterate all cache parameters.
610 Do not loop indefinitely here, stop in this case and
611 assume there is no such information. */
612 if ((eax & 0x1f) == 0)
613 goto intel_bug_no_cache_info;
615 while (((eax >> 5) & 0x7) != level);
617 threads = (eax >> 14) & 0x3ff;
619 /* If max_cpuid >= 11, THREADS is the maximum number of
620 addressable IDs for logical processors sharing the
621 cache, instead of the maximum number of threads
622 sharing the cache. */
623 if (threads && max_cpuid >= 11)
625 /* Find the number of logical processors shipped in
626 one core and apply count mask. */
627 i = 0;
628 while (1)
630 __cpuid_count (11, i++, eax, ebx, ecx, edx);
632 int shipped = ebx & 0xff;
633 int type = ecx & 0xff0;
634 if (shipped == 0 || type == 0)
635 break;
636 else if (type == 0x200)
638 int count_mask;
640 /* Compute count mask. */
641 asm ("bsr %1, %0"
642 : "=r" (count_mask) : "g" (threads));
643 count_mask = ~(-1 << (count_mask + 1));
644 threads = (shipped - 1) & count_mask;
645 break;
649 threads += 1;
651 else
653 intel_bug_no_cache_info:
654 /* Assume that all logical threads share the highest cache level. */
656 threads = (ebx_1 >> 16) & 0xff;
659 /* Cap usage of highest cache level to the number of supported
660 threads. */
661 if (shared > 0 && threads > 0)
662 shared /= threads;
664 /* This spells out "AuthenticAMD". */
665 else if (is_amd)
667 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
668 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
669 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
671 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
672 # ifdef USE_MULTIARCH
673 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
674 ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
675 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
676 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
677 # else
678 __cpuid (1, eax, ebx, ecx, edx);
679 # endif
681 /* AMD prefers SSSE3 instructions for memory/string routines
682 if they are avaiable, otherwise it prefers integer
683 instructions. */
684 if ((ecx & 0x200))
685 __x86_preferred_memory_instruction = 3;
686 else
687 __x86_preferred_memory_instruction = 0;
688 #endif
690 /* Get maximum extended function. */
691 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
693 if (shared <= 0)
694 /* No shared L3 cache. All we have is the L2 cache. */
695 shared = core;
696 else
698 /* Figure out the number of logical threads that share L3. */
699 if (max_cpuid_ex >= 0x80000008)
701 /* Get width of APIC ID. */
702 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
703 threads = 1 << ((ecx >> 12) & 0x0f);
706 if (threads == 0)
708 /* If APIC ID width is not available, use logical
709 processor count. */
710 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
712 if ((edx & (1 << 28)) != 0)
713 threads = (ebx >> 16) & 0xff;
716 /* Cap usage of highest cache level to the number of
717 supported threads. */
718 if (threads > 0)
719 shared /= threads;
721 /* Account for exclusive L2 and L3 caches. */
722 shared += core;
725 #ifndef DISABLE_PREFETCHW
726 if (max_cpuid_ex >= 0x80000001)
728 __cpuid (0x80000001, eax, ebx, ecx, edx);
729 /* PREFETCHW || 3DNow! */
730 if ((ecx & 0x100) || (edx & 0x80000000))
731 __x86_prefetchw = -1;
733 #endif
736 if (data > 0)
738 __x86_raw_data_cache_size_half = data / 2;
739 __x86_raw_data_cache_size = data;
740 /* Round data cache size to multiple of 256 bytes. */
741 data = data & ~255L;
742 __x86_data_cache_size_half = data / 2;
743 __x86_data_cache_size = data;
746 if (shared > 0)
748 __x86_raw_shared_cache_size_half = shared / 2;
749 __x86_raw_shared_cache_size = shared;
750 /* Round shared cache size to multiple of 256 bytes. */
751 shared = shared & ~255L;
752 __x86_shared_cache_size_half = shared / 2;
753 __x86_shared_cache_size = shared;