More NEWS entries / fixes for float_t / double_t changes.
[glibc.git] / sysdeps / x86 / cacheinfo.c
blob35268e8d9af98f30397e6e3bc37386f1c79e5635
1 /* x86_64 cache info.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <stdbool.h>
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <cpuid.h>
24 #include <init-arch.h>
26 #define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
27 #define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
28 #define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
30 static const struct intel_02_cache_info
32 unsigned char idx;
33 unsigned char assoc;
34 unsigned char linesize;
35 unsigned char rel_name;
36 unsigned int size;
37 } intel_02_known [] =
39 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
40 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
41 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
42 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
43 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
44 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
45 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
46 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
47 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
48 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
49 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
50 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
51 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
52 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
53 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
54 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
55 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
56 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
57 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
58 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
59 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
60 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
61 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
62 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
63 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
64 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
65 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
66 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
67 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
68 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
69 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
70 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
71 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
72 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
73 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
74 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
75 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
76 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
77 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
78 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
79 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
80 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
81 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
82 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
83 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
84 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
85 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
86 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
87 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
88 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
89 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
90 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
91 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
92 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
93 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
94 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
95 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
96 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
97 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
98 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
99 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
100 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
101 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
102 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
103 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
104 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
105 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
106 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
107 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
110 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
112 static int
113 intel_02_known_compare (const void *p1, const void *p2)
115 const struct intel_02_cache_info *i1;
116 const struct intel_02_cache_info *i2;
118 i1 = (const struct intel_02_cache_info *) p1;
119 i2 = (const struct intel_02_cache_info *) p2;
121 if (i1->idx == i2->idx)
122 return 0;
124 return i1->idx < i2->idx ? -1 : 1;
128 static long int
129 __attribute__ ((noinline))
130 intel_check_word (int name, unsigned int value, bool *has_level_2,
131 bool *no_level_2_or_3)
133 if ((value & 0x80000000) != 0)
134 /* The register value is reserved. */
135 return 0;
137 /* Fold the name. The _SC_ constants are always in the order SIZE,
138 ASSOC, LINESIZE. */
139 int folded_rel_name = (M(name) / 3) * 3;
141 while (value != 0)
143 unsigned int byte = value & 0xff;
145 if (byte == 0x40)
147 *no_level_2_or_3 = true;
149 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
150 /* No need to look further. */
151 break;
153 else if (byte == 0xff)
155 /* CPUID leaf 0x4 contains all the information. We need to
156 iterate over it. */
157 unsigned int eax;
158 unsigned int ebx;
159 unsigned int ecx;
160 unsigned int edx;
162 unsigned int round = 0;
163 while (1)
165 __cpuid_count (4, round, eax, ebx, ecx, edx);
167 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
168 if (type == null)
169 /* That was the end. */
170 break;
172 unsigned int level = (eax >> 5) & 0x7;
174 if ((level == 1 && type == data
175 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
176 || (level == 1 && type == inst
177 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
178 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
179 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
180 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
182 unsigned int offset = M(name) - folded_rel_name;
184 if (offset == 0)
185 /* Cache size. */
186 return (((ebx >> 22) + 1)
187 * (((ebx >> 12) & 0x3ff) + 1)
188 * ((ebx & 0xfff) + 1)
189 * (ecx + 1));
190 if (offset == 1)
191 return (ebx >> 22) + 1;
193 assert (offset == 2);
194 return (ebx & 0xfff) + 1;
197 ++round;
199 /* There is no other cache information anywhere else. */
200 break;
202 else
204 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
206 /* Intel reused this value. For family 15, model 6 it
207 specifies the 3rd level cache. Otherwise the 2nd
208 level cache. */
209 unsigned int family = GLRO(dl_x86_cpu_features).family;
210 unsigned int model = GLRO(dl_x86_cpu_features).model;
212 if (family == 15 && model == 6)
214 /* The level 3 cache is encoded for this model like
215 the level 2 cache is for other models. Pretend
216 the caller asked for the level 2 cache. */
217 name = (_SC_LEVEL2_CACHE_SIZE
218 + (name - _SC_LEVEL3_CACHE_SIZE));
219 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
223 struct intel_02_cache_info *found;
224 struct intel_02_cache_info search;
226 search.idx = byte;
227 found = bsearch (&search, intel_02_known, nintel_02_known,
228 sizeof (intel_02_known[0]), intel_02_known_compare);
229 if (found != NULL)
231 if (found->rel_name == folded_rel_name)
233 unsigned int offset = M(name) - folded_rel_name;
235 if (offset == 0)
236 /* Cache size. */
237 return found->size;
238 if (offset == 1)
239 return found->assoc;
241 assert (offset == 2);
242 return found->linesize;
245 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
246 *has_level_2 = true;
250 /* Next byte for the next round. */
251 value >>= 8;
254 /* Nothing found. */
255 return 0;
259 static long int __attribute__ ((noinline))
260 handle_intel (int name, unsigned int maxidx)
262 /* Return -1 for older CPUs. */
263 if (maxidx < 2)
264 return -1;
266 /* OK, we can use the CPUID instruction to get all info about the
267 caches. */
268 unsigned int cnt = 0;
269 unsigned int max = 1;
270 long int result = 0;
271 bool no_level_2_or_3 = false;
272 bool has_level_2 = false;
274 while (cnt++ < max)
276 unsigned int eax;
277 unsigned int ebx;
278 unsigned int ecx;
279 unsigned int edx;
280 __cpuid (2, eax, ebx, ecx, edx);
282 /* The low byte of EAX in the first round contain the number of
283 rounds we have to make. At least one, the one we are already
284 doing. */
285 if (cnt == 1)
287 max = eax & 0xff;
288 eax &= 0xffffff00;
291 /* Process the individual registers' value. */
292 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
293 if (result != 0)
294 return result;
296 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
297 if (result != 0)
298 return result;
300 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
301 if (result != 0)
302 return result;
304 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
305 if (result != 0)
306 return result;
309 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
310 && no_level_2_or_3)
311 return -1;
313 return 0;
317 static long int __attribute__ ((noinline))
318 handle_amd (int name)
320 unsigned int eax;
321 unsigned int ebx;
322 unsigned int ecx;
323 unsigned int edx;
324 __cpuid (0x80000000, eax, ebx, ecx, edx);
326 /* No level 4 cache (yet). */
327 if (name > _SC_LEVEL3_CACHE_LINESIZE)
328 return 0;
330 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
331 if (eax < fn)
332 return 0;
334 __cpuid (fn, eax, ebx, ecx, edx);
336 if (name < _SC_LEVEL1_DCACHE_SIZE)
338 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
339 ecx = edx;
342 switch (name)
344 case _SC_LEVEL1_DCACHE_SIZE:
345 return (ecx >> 14) & 0x3fc00;
347 case _SC_LEVEL1_DCACHE_ASSOC:
348 ecx >>= 16;
349 if ((ecx & 0xff) == 0xff)
350 /* Fully associative. */
351 return (ecx << 2) & 0x3fc00;
352 return ecx & 0xff;
354 case _SC_LEVEL1_DCACHE_LINESIZE:
355 return ecx & 0xff;
357 case _SC_LEVEL2_CACHE_SIZE:
358 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
360 case _SC_LEVEL2_CACHE_ASSOC:
361 switch ((ecx >> 12) & 0xf)
363 case 0:
364 case 1:
365 case 2:
366 case 4:
367 return (ecx >> 12) & 0xf;
368 case 6:
369 return 8;
370 case 8:
371 return 16;
372 case 10:
373 return 32;
374 case 11:
375 return 48;
376 case 12:
377 return 64;
378 case 13:
379 return 96;
380 case 14:
381 return 128;
382 case 15:
383 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
384 default:
385 return 0;
387 /* NOTREACHED */
389 case _SC_LEVEL2_CACHE_LINESIZE:
390 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
392 case _SC_LEVEL3_CACHE_SIZE:
393 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
395 case _SC_LEVEL3_CACHE_ASSOC:
396 switch ((edx >> 12) & 0xf)
398 case 0:
399 case 1:
400 case 2:
401 case 4:
402 return (edx >> 12) & 0xf;
403 case 6:
404 return 8;
405 case 8:
406 return 16;
407 case 10:
408 return 32;
409 case 11:
410 return 48;
411 case 12:
412 return 64;
413 case 13:
414 return 96;
415 case 14:
416 return 128;
417 case 15:
418 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
419 default:
420 return 0;
422 /* NOTREACHED */
424 case _SC_LEVEL3_CACHE_LINESIZE:
425 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
427 default:
428 assert (! "cannot happen");
430 return -1;
434 /* Get the value of the system variable NAME. */
435 long int
436 attribute_hidden
437 __cache_sysconf (int name)
439 if (is_intel)
440 return handle_intel (name, max_cpuid);
442 if (is_amd)
443 return handle_amd (name);
445 // XXX Fill in more vendors.
447 /* CPU not known, we have no information. */
448 return 0;
452 /* Data cache size for use in memory and string routines, typically
453 L1 size, rounded to multiple of 256 bytes. */
454 long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
455 long int __x86_data_cache_size attribute_hidden = 32 * 1024;
456 /* Similar to __x86_data_cache_size_half, but not rounded. */
457 long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
458 /* Similar to __x86_data_cache_size, but not rounded. */
459 long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
460 /* Shared cache size for use in memory and string routines, typically
461 L2 or L3 size, rounded to multiple of 256 bytes. */
462 long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
463 long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
464 /* Similar to __x86_shared_cache_size_half, but not rounded. */
465 long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
466 /* Similar to __x86_shared_cache_size, but not rounded. */
467 long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
469 /* Threshold to use non temporal store. */
470 long int __x86_shared_non_temporal_threshold attribute_hidden;
472 #ifndef DISABLE_PREFETCHW
473 /* PREFETCHW support flag for use in memory and string routines. */
474 int __x86_prefetchw attribute_hidden;
475 #endif
478 static void
479 __attribute__((constructor))
480 init_cacheinfo (void)
482 /* Find out what brand of processor. */
483 unsigned int eax;
484 unsigned int ebx;
485 unsigned int ecx;
486 unsigned int edx;
487 int max_cpuid_ex;
488 long int data = -1;
489 long int shared = -1;
490 unsigned int level;
491 unsigned int threads = 0;
493 if (is_intel)
495 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
497 long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
498 bool inclusive_cache = true;
500 /* Try L3 first. */
501 level = 3;
502 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
504 /* Number of logical processors sharing L2 cache. */
505 int threads_l2;
507 /* Number of logical processors sharing L3 cache. */
508 int threads_l3;
510 if (shared <= 0)
512 /* Try L2 otherwise. */
513 level = 2;
514 shared = core;
515 threads_l2 = 0;
516 threads_l3 = -1;
518 else
520 threads_l2 = 0;
521 threads_l3 = 0;
524 /* A value of 0 for the HTT bit indicates there is only a single
525 logical processor. */
526 if (HAS_CPU_FEATURE (HTT))
528 /* Figure out the number of logical threads that share the
529 highest cache level. */
530 if (max_cpuid >= 4)
532 unsigned int family = GLRO(dl_x86_cpu_features).family;
533 unsigned int model = GLRO(dl_x86_cpu_features).model;
535 int i = 0;
537 /* Query until cache level 2 and 3 are enumerated. */
538 int check = 0x1 | (threads_l3 == 0) << 1;
541 __cpuid_count (4, i++, eax, ebx, ecx, edx);
543 /* There seems to be a bug in at least some Pentium Ds
544 which sometimes fail to iterate all cache parameters.
545 Do not loop indefinitely here, stop in this case and
546 assume there is no such information. */
547 if ((eax & 0x1f) == 0)
548 goto intel_bug_no_cache_info;
550 switch ((eax >> 5) & 0x7)
552 default:
553 break;
554 case 2:
555 if ((check & 0x1))
557 /* Get maximum number of logical processors
558 sharing L2 cache. */
559 threads_l2 = (eax >> 14) & 0x3ff;
560 check &= ~0x1;
562 break;
563 case 3:
564 if ((check & (0x1 << 1)))
566 /* Get maximum number of logical processors
567 sharing L3 cache. */
568 threads_l3 = (eax >> 14) & 0x3ff;
570 /* Check if L2 and L3 caches are inclusive. */
571 inclusive_cache = (edx & 0x2) != 0;
572 check &= ~(0x1 << 1);
574 break;
577 while (check);
579 /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
580 numbers of addressable IDs for logical processors sharing
581 the cache, instead of the maximum number of threads
582 sharing the cache. */
583 if (max_cpuid >= 11)
585 /* Find the number of logical processors shipped in
586 one core and apply count mask. */
587 i = 0;
589 /* Count SMT only if there is L3 cache. Always count
590 core if there is no L3 cache. */
591 int count = ((threads_l2 > 0 && level == 3)
592 | ((threads_l3 > 0
593 || (threads_l2 > 0 && level == 2)) << 1));
595 while (count)
597 __cpuid_count (11, i++, eax, ebx, ecx, edx);
599 int shipped = ebx & 0xff;
600 int type = ecx & 0xff00;
601 if (shipped == 0 || type == 0)
602 break;
603 else if (type == 0x100)
605 /* Count SMT. */
606 if ((count & 0x1))
608 int count_mask;
610 /* Compute count mask. */
611 asm ("bsr %1, %0"
612 : "=r" (count_mask) : "g" (threads_l2));
613 count_mask = ~(-1 << (count_mask + 1));
614 threads_l2 = (shipped - 1) & count_mask;
615 count &= ~0x1;
618 else if (type == 0x200)
620 /* Count core. */
621 if ((count & (0x1 << 1)))
623 int count_mask;
624 int threads_core
625 = (level == 2 ? threads_l2 : threads_l3);
627 /* Compute count mask. */
628 asm ("bsr %1, %0"
629 : "=r" (count_mask) : "g" (threads_core));
630 count_mask = ~(-1 << (count_mask + 1));
631 threads_core = (shipped - 1) & count_mask;
632 if (level == 2)
633 threads_l2 = threads_core;
634 else
635 threads_l3 = threads_core;
636 count &= ~(0x1 << 1);
641 if (threads_l2 > 0)
642 threads_l2 += 1;
643 if (threads_l3 > 0)
644 threads_l3 += 1;
645 if (level == 2)
647 if (threads_l2)
649 threads = threads_l2;
650 if (threads > 2 && family == 6)
651 switch (model)
653 case 0x37:
654 case 0x4a:
655 case 0x4d:
656 case 0x5a:
657 case 0x5d:
658 /* Silvermont has L2 cache shared by 2 cores. */
659 threads = 2;
660 break;
661 default:
662 break;
666 else if (threads_l3)
667 threads = threads_l3;
669 else
671 intel_bug_no_cache_info:
672 /* Assume that all logical threads share the highest cache
673 level. */
675 threads
676 = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
677 >> 16) & 0xff);
680 /* Cap usage of highest cache level to the number of supported
681 threads. */
682 if (shared > 0 && threads > 0)
683 shared /= threads;
686 /* Account for non-inclusive L2 and L3 caches. */
687 if (!inclusive_cache)
689 if (threads_l2 > 0)
690 core /= threads_l2;
691 shared += core;
694 /* This spells out "AuthenticAMD". */
695 else if (is_amd)
697 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
698 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
699 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
701 /* Get maximum extended function. */
702 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
704 if (shared <= 0)
705 /* No shared L3 cache. All we have is the L2 cache. */
706 shared = core;
707 else
709 /* Figure out the number of logical threads that share L3. */
710 if (max_cpuid_ex >= 0x80000008)
712 /* Get width of APIC ID. */
713 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
714 threads = 1 << ((ecx >> 12) & 0x0f);
717 if (threads == 0)
719 /* If APIC ID width is not available, use logical
720 processor count. */
721 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
723 if ((edx & (1 << 28)) != 0)
724 threads = (ebx >> 16) & 0xff;
727 /* Cap usage of highest cache level to the number of
728 supported threads. */
729 if (threads > 0)
730 shared /= threads;
732 /* Account for exclusive L2 and L3 caches. */
733 shared += core;
736 #ifndef DISABLE_PREFETCHW
737 if (max_cpuid_ex >= 0x80000001)
739 __cpuid (0x80000001, eax, ebx, ecx, edx);
740 /* PREFETCHW || 3DNow! */
741 if ((ecx & 0x100) || (edx & 0x80000000))
742 __x86_prefetchw = -1;
744 #endif
747 if (data > 0)
749 __x86_raw_data_cache_size_half = data / 2;
750 __x86_raw_data_cache_size = data;
751 /* Round data cache size to multiple of 256 bytes. */
752 data = data & ~255L;
753 __x86_data_cache_size_half = data / 2;
754 __x86_data_cache_size = data;
757 if (shared > 0)
759 __x86_raw_shared_cache_size_half = shared / 2;
760 __x86_raw_shared_cache_size = shared;
761 /* Round shared cache size to multiple of 256 bytes. */
762 shared = shared & ~255L;
763 __x86_shared_cache_size_half = shared / 2;
764 __x86_shared_cache_size = shared;
767 /* The large memcpy micro benchmark in glibc shows that 6 times of
768 shared cache size is the approximate value above which non-temporal
769 store becomes faster. */
770 __x86_shared_non_temporal_threshold = __x86_shared_cache_size * 6;