Last change caused infinite loops because of missing loop increment.
[glibc.git] / sysdeps / x86_64 / cacheinfo.c
blobbd4be3d2df52f5f184c5ab1663d6e10e8fc9a10a
1 /* x86_64 cache info.
2 Copyright (C) 2003,2004,2006,2007,2009,2011 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA.
21 #include <assert.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <unistd.h>
25 #include <cpuid.h>
27 #ifndef __cpuid_count
28 /* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc
29 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */
30 # if defined(__i386__) && defined(__PIC__)
31 /* %ebx may be the PIC register. */
32 # define __cpuid_count(level, count, a, b, c, d) \
33 __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
34 "cpuid\n\t" \
35 "xchg{l}\t{%%}ebx, %1\n\t" \
36 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
37 : "0" (level), "2" (count))
38 # else
39 # define __cpuid_count(level, count, a, b, c, d) \
40 __asm__ ("cpuid\n\t" \
41 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
42 : "0" (level), "2" (count))
43 # endif
44 #endif
46 #ifdef USE_MULTIARCH
47 # include "multiarch/init-arch.h"
49 # define is_intel __cpu_features.kind == arch_kind_intel
50 # define is_amd __cpu_features.kind == arch_kind_amd
51 # define max_cpuid __cpu_features.max_cpuid
52 #else
53 /* This spells out "GenuineIntel". */
54 # define is_intel \
55 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
56 /* This spells out "AuthenticAMD". */
57 # define is_amd \
58 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
59 #endif
61 static const struct intel_02_cache_info
63 unsigned char idx;
64 unsigned char assoc;
65 unsigned char linesize;
66 unsigned char rel_name;
67 unsigned int size;
68 } intel_02_known [] =
70 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
71 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
72 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
73 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
74 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
75 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
76 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
77 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
78 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
79 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
80 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
81 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
82 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
83 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
84 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
85 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
86 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
87 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
88 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
89 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
90 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
91 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
92 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
93 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
94 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
95 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
96 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
97 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
98 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
99 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
100 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
101 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
102 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
103 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
104 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
105 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
106 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
107 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
108 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
109 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
110 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
111 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
112 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
113 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
114 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
115 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
116 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
117 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
118 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
119 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
120 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
121 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
122 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
123 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
124 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
125 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
126 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
127 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
128 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
129 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
130 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
131 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
132 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
133 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
134 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
135 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
136 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
137 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
138 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
141 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
143 static int
144 intel_02_known_compare (const void *p1, const void *p2)
146 const struct intel_02_cache_info *i1;
147 const struct intel_02_cache_info *i2;
149 i1 = (const struct intel_02_cache_info *) p1;
150 i2 = (const struct intel_02_cache_info *) p2;
152 if (i1->idx == i2->idx)
153 return 0;
155 return i1->idx < i2->idx ? -1 : 1;
159 static long int
160 __attribute__ ((noinline))
161 intel_check_word (int name, unsigned int value, bool *has_level_2,
162 bool *no_level_2_or_3)
164 if ((value & 0x80000000) != 0)
165 /* The register value is reserved. */
166 return 0;
168 /* Fold the name. The _SC_ constants are always in the order SIZE,
169 ASSOC, LINESIZE. */
170 int folded_rel_name = (M(name) / 3) * 3;
172 while (value != 0)
174 unsigned int byte = value & 0xff;
176 if (byte == 0x40)
178 *no_level_2_or_3 = true;
180 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
181 /* No need to look further. */
182 break;
184 else if (byte == 0xff)
186 /* CPUID leaf 0x4 contains all the information. We need to
187 iterate over it. */
188 unsigned int eax;
189 unsigned int ebx;
190 unsigned int ecx;
191 unsigned int edx;
193 unsigned int round = 0;
194 while (1)
196 asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
197 : "=a" (eax), "=r" (ebx), "=c" (ecx), "=d" (edx)
198 : "0" (4), "2" (round));
200 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
201 if (type == null)
202 /* That was the end. */
203 break;
205 unsigned int level = (eax >> 5) & 0x7;
207 if ((level == 1 && type == data
208 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
209 || (level == 1 && type == inst
210 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
211 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
212 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
213 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
215 unsigned int offset = M(name) - folded_rel_name;
217 if (offset == 0)
218 /* Cache size. */
219 return (((ebx >> 22) + 1)
220 * (((ebx >> 12) & 0x3ff) + 1)
221 * ((ebx & 0xfff) + 1)
222 * (ecx + 1));
223 if (offset == 1)
224 return (ebx >> 22) + 1;
226 assert (offset == 2);
227 return (ebx & 0xfff) + 1;
230 ++round;
232 /* There is no other cache information anywhere else. */
233 break;
235 else
237 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
239 /* Intel reused this value. For family 15, model 6 it
240 specifies the 3rd level cache. Otherwise the 2nd
241 level cache. */
242 unsigned int family;
243 unsigned int model;
244 #ifdef USE_MULTIARCH
245 family = __cpu_features.family;
246 model = __cpu_features.model;
247 #else
248 unsigned int eax;
249 unsigned int ebx;
250 unsigned int ecx;
251 unsigned int edx;
252 __cpuid (1, eax, ebx, ecx, edx);
254 family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
255 model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
256 #endif
258 if (family == 15 && model == 6)
260 /* The level 3 cache is encoded for this model like
261 the level 2 cache is for other models. Pretend
262 the caller asked for the level 2 cache. */
263 name = (_SC_LEVEL2_CACHE_SIZE
264 + (name - _SC_LEVEL3_CACHE_SIZE));
265 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
269 struct intel_02_cache_info *found;
270 struct intel_02_cache_info search;
272 search.idx = byte;
273 found = bsearch (&search, intel_02_known, nintel_02_known,
274 sizeof (intel_02_known[0]), intel_02_known_compare);
275 if (found != NULL)
277 if (found->rel_name == folded_rel_name)
279 unsigned int offset = M(name) - folded_rel_name;
281 if (offset == 0)
282 /* Cache size. */
283 return found->size;
284 if (offset == 1)
285 return found->assoc;
287 assert (offset == 2);
288 return found->linesize;
291 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
292 *has_level_2 = true;
296 /* Next byte for the next round. */
297 value >>= 8;
300 /* Nothing found. */
301 return 0;
305 static long int __attribute__ ((noinline))
306 handle_intel (int name, unsigned int maxidx)
308 assert (maxidx >= 2);
310 /* OK, we can use the CPUID instruction to get all info about the
311 caches. */
312 unsigned int cnt = 0;
313 unsigned int max = 1;
314 long int result = 0;
315 bool no_level_2_or_3 = false;
316 bool has_level_2 = false;
318 while (cnt++ < max)
320 unsigned int eax;
321 unsigned int ebx;
322 unsigned int ecx;
323 unsigned int edx;
324 __cpuid (2, eax, ebx, ecx, edx);
326 /* The low byte of EAX in the first round contain the number of
327 rounds we have to make. At least one, the one we are already
328 doing. */
329 if (cnt == 1)
331 max = eax & 0xff;
332 eax &= 0xffffff00;
335 /* Process the individual registers' value. */
336 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
337 if (result != 0)
338 return result;
340 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
341 if (result != 0)
342 return result;
344 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
345 if (result != 0)
346 return result;
348 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
349 if (result != 0)
350 return result;
353 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
354 && no_level_2_or_3)
355 return -1;
357 return 0;
361 static long int __attribute__ ((noinline))
362 handle_amd (int name)
364 unsigned int eax;
365 unsigned int ebx;
366 unsigned int ecx;
367 unsigned int edx;
368 __cpuid (0x80000000, eax, ebx, ecx, edx);
370 /* No level 4 cache (yet). */
371 if (name > _SC_LEVEL3_CACHE_LINESIZE)
372 return 0;
374 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
375 if (eax < fn)
376 return 0;
378 __cpuid (fn, eax, ebx, ecx, edx);
380 if (name < _SC_LEVEL1_DCACHE_SIZE)
382 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
383 ecx = edx;
386 switch (name)
388 case _SC_LEVEL1_DCACHE_SIZE:
389 return (ecx >> 14) & 0x3fc00;
391 case _SC_LEVEL1_DCACHE_ASSOC:
392 ecx >>= 16;
393 if ((ecx & 0xff) == 0xff)
394 /* Fully associative. */
395 return (ecx << 2) & 0x3fc00;
396 return ecx & 0xff;
398 case _SC_LEVEL1_DCACHE_LINESIZE:
399 return ecx & 0xff;
401 case _SC_LEVEL2_CACHE_SIZE:
402 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
404 case _SC_LEVEL2_CACHE_ASSOC:
405 switch ((ecx >> 12) & 0xf)
407 case 0:
408 case 1:
409 case 2:
410 case 4:
411 return (ecx >> 12) & 0xf;
412 case 6:
413 return 8;
414 case 8:
415 return 16;
416 case 10:
417 return 32;
418 case 11:
419 return 48;
420 case 12:
421 return 64;
422 case 13:
423 return 96;
424 case 14:
425 return 128;
426 case 15:
427 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
428 default:
429 return 0;
431 /* NOTREACHED */
433 case _SC_LEVEL2_CACHE_LINESIZE:
434 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
436 case _SC_LEVEL3_CACHE_SIZE:
437 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
439 case _SC_LEVEL3_CACHE_ASSOC:
440 switch ((edx >> 12) & 0xf)
442 case 0:
443 case 1:
444 case 2:
445 case 4:
446 return (edx >> 12) & 0xf;
447 case 6:
448 return 8;
449 case 8:
450 return 16;
451 case 10:
452 return 32;
453 case 11:
454 return 48;
455 case 12:
456 return 64;
457 case 13:
458 return 96;
459 case 14:
460 return 128;
461 case 15:
462 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
463 default:
464 return 0;
466 /* NOTREACHED */
468 case _SC_LEVEL3_CACHE_LINESIZE:
469 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
471 default:
472 assert (! "cannot happen");
474 return -1;
478 /* Get the value of the system variable NAME. */
479 long int
480 attribute_hidden
481 __cache_sysconf (int name)
483 #ifdef USE_MULTIARCH
484 if (__cpu_features.kind == arch_kind_unknown)
485 __init_cpu_features ();
486 #else
487 /* Find out what brand of processor. */
488 unsigned int max_cpuid;
489 unsigned int ebx;
490 unsigned int ecx;
491 unsigned int edx;
492 __cpuid (0, max_cpuid, ebx, ecx, edx);
493 #endif
495 if (is_intel)
496 return handle_intel (name, max_cpuid);
498 if (is_amd)
499 return handle_amd (name);
501 // XXX Fill in more vendors.
503 /* CPU not known, we have no information. */
504 return 0;
508 /* Data cache size for use in memory and string routines, typically
509 L1 size, rounded to multiple of 256 bytes. */
510 long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
511 long int __x86_64_data_cache_size attribute_hidden = 32 * 1024;
512 /* Similar to __x86_64_data_cache_size_half, but not rounded. */
513 long int __x86_64_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
514 /* Similar to __x86_64_data_cache_size, but not rounded. */
515 long int __x86_64_raw_data_cache_size attribute_hidden = 32 * 1024;
516 /* Shared cache size for use in memory and string routines, typically
517 L2 or L3 size, rounded to multiple of 256 bytes. */
518 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
519 long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024;
520 /* Similar to __x86_64_shared_cache_size_half, but not rounded. */
521 long int __x86_64_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
522 /* Similar to __x86_64_shared_cache_size, but not rounded. */
523 long int __x86_64_raw_shared_cache_size attribute_hidden = 1024 * 1024;
525 #ifndef DISABLE_PREFETCHW
526 /* PREFETCHW support flag for use in memory and string routines. */
527 int __x86_64_prefetchw attribute_hidden;
528 #endif
530 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
531 /* Instructions preferred for memory and string routines.
533 0: Regular instructions
534 1: MMX instructions
535 2: SSE2 instructions
536 3: SSSE3 instructions
539 int __x86_64_preferred_memory_instruction attribute_hidden;
540 #endif
543 static void
544 __attribute__((constructor))
545 init_cacheinfo (void)
547 /* Find out what brand of processor. */
548 unsigned int eax;
549 unsigned int ebx;
550 unsigned int ecx;
551 unsigned int edx;
552 int max_cpuid_ex;
553 long int data = -1;
554 long int shared = -1;
555 unsigned int level;
556 unsigned int threads = 0;
558 #ifdef USE_MULTIARCH
559 if (__cpu_features.kind == arch_kind_unknown)
560 __init_cpu_features ();
561 #else
562 int max_cpuid;
563 __cpuid (0, max_cpuid, ebx, ecx, edx);
564 #endif
566 if (is_intel)
568 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
570 /* Try L3 first. */
571 level = 3;
572 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
574 if (shared <= 0)
576 /* Try L2 otherwise. */
577 level = 2;
578 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
581 unsigned int ebx_1;
583 #ifdef USE_MULTIARCH
584 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
585 ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
586 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
587 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
588 #else
589 __cpuid (1, eax, ebx_1, ecx, edx);
590 #endif
592 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
593 /* Intel prefers SSSE3 instructions for memory/string routines
594 if they are available. */
595 if ((ecx & 0x200))
596 __x86_64_preferred_memory_instruction = 3;
597 else
598 __x86_64_preferred_memory_instruction = 2;
599 #endif
601 /* Figure out the number of logical threads that share the
602 highest cache level. */
603 if (max_cpuid >= 4)
605 int i = 0;
607 /* Query until desired cache level is enumerated. */
610 __cpuid_count (4, i++, eax, ebx, ecx, edx);
612 /* There seems to be a bug in at least some Pentium Ds
613 which sometimes fail to iterate all cache parameters.
614 Do not loop indefinitely here, stop in this case and
615 assume there is no such information. */
616 if ((eax & 0x1f) == 0)
617 goto intel_bug_no_cache_info;
619 while (((eax >> 5) & 0x7) != level);
621 threads = (eax >> 14) & 0x3ff;
623 /* If max_cpuid >= 11, THREADS is the maximum number of
624 addressable IDs for logical processors sharing the
625 cache, instead of the maximum number of threads
626 sharing the cache. */
627 if (threads && max_cpuid >= 11)
629 /* Find the number of logical processors shipped in
630 one core and apply count mask. */
631 i = 0;
632 while (1)
634 __cpuid_count (11, i++, eax, ebx, ecx, edx);
636 int shipped = ebx & 0xff;
637 int type = ecx & 0xff0;
638 if (shipped == 0 || type == 0)
639 break;
640 else if (type == 0x200)
642 int count_mask;
644 /* Compute count mask. */
645 asm ("bsr %1, %0"
646 : "=r" (count_mask) : "g" (threads));
647 count_mask = ~(-1 << (count_mask + 1));
648 threads = (shipped - 1) & count_mask;
649 break;
653 threads += 1;
655 else
657 intel_bug_no_cache_info:
658 /* Assume that all logical threads share the highest cache level. */
660 threads = (ebx_1 >> 16) & 0xff;
663 /* Cap usage of highest cache level to the number of supported
664 threads. */
665 if (shared > 0 && threads > 0)
666 shared /= threads;
668 /* This spells out "AuthenticAMD". */
669 else if (is_amd)
671 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
672 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
673 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
675 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
676 # ifdef USE_MULTIARCH
677 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
678 ebx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
679 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
680 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
681 # else
682 __cpuid (1, eax, ebx, ecx, edx);
683 # endif
685 /* AMD prefers SSSE3 instructions for memory/string routines
686 if they are avaiable, otherwise it prefers integer
687 instructions. */
688 if ((ecx & 0x200))
689 __x86_64_preferred_memory_instruction = 3;
690 else
691 __x86_64_preferred_memory_instruction = 0;
692 #endif
694 /* Get maximum extended function. */
695 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
697 if (shared <= 0)
698 /* No shared L3 cache. All we have is the L2 cache. */
699 shared = core;
700 else
702 /* Figure out the number of logical threads that share L3. */
703 if (max_cpuid_ex >= 0x80000008)
705 /* Get width of APIC ID. */
706 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
707 threads = 1 << ((ecx >> 12) & 0x0f);
710 if (threads == 0)
712 /* If APIC ID width is not available, use logical
713 processor count. */
714 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
716 if ((edx & (1 << 28)) != 0)
717 threads = (ebx >> 16) & 0xff;
720 /* Cap usage of highest cache level to the number of
721 supported threads. */
722 if (threads > 0)
723 shared /= threads;
725 /* Account for exclusive L2 and L3 caches. */
726 shared += core;
729 #ifndef DISABLE_PREFETCHW
730 if (max_cpuid_ex >= 0x80000001)
732 __cpuid (0x80000001, eax, ebx, ecx, edx);
733 /* PREFETCHW || 3DNow! */
734 if ((ecx & 0x100) || (edx & 0x80000000))
735 __x86_64_prefetchw = -1;
737 #endif
740 if (data > 0)
742 __x86_64_raw_data_cache_size_half = data / 2;
743 __x86_64_raw_data_cache_size = data;
744 /* Round data cache size to multiple of 256 bytes. */
745 data = data & ~255L;
746 __x86_64_data_cache_size_half = data / 2;
747 __x86_64_data_cache_size = data;
750 if (shared > 0)
752 __x86_64_raw_shared_cache_size_half = shared / 2;
753 __x86_64_raw_shared_cache_size = shared;
754 /* Round shared cache size to multiple of 256 bytes. */
755 shared = shared & ~255L;
756 __x86_64_shared_cache_size_half = shared / 2;
757 __x86_64_shared_cache_size = shared;