Fix concurrent changes on nscd aware files (BZ #23178)
[glibc.git] / sysdeps / x86 / cacheinfo.c
blobb9444ddd52051e05183463340f4dd3cf3e61f4dc
1 /* x86_64 cache info.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 #if IS_IN (libc)
21 #include <assert.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <unistd.h>
25 #include <cpuid.h>
26 #include <init-arch.h>
28 static const struct intel_02_cache_info
30 unsigned char idx;
31 unsigned char assoc;
32 unsigned char linesize;
33 unsigned char rel_name;
34 unsigned int size;
35 } intel_02_known [] =
37 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
38 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
39 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
40 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
41 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
42 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
43 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
44 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
45 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
46 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
47 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
48 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
49 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
50 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
51 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
52 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
53 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
54 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
55 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
56 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
57 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
58 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
59 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
60 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
61 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
62 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
63 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
64 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
65 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
66 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
67 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
68 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
69 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
70 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
71 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
72 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
73 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
74 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
75 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
76 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
77 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
78 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
79 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
80 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
81 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
82 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
83 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
84 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
85 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
86 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
87 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
88 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
89 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
90 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
91 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
92 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
93 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
94 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
95 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
96 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
97 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
98 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
99 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
100 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
101 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
102 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
103 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
104 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
105 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
108 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
110 static int
111 intel_02_known_compare (const void *p1, const void *p2)
113 const struct intel_02_cache_info *i1;
114 const struct intel_02_cache_info *i2;
116 i1 = (const struct intel_02_cache_info *) p1;
117 i2 = (const struct intel_02_cache_info *) p2;
119 if (i1->idx == i2->idx)
120 return 0;
122 return i1->idx < i2->idx ? -1 : 1;
126 static long int
127 __attribute__ ((noinline))
128 intel_check_word (int name, unsigned int value, bool *has_level_2,
129 bool *no_level_2_or_3,
130 const struct cpu_features *cpu_features)
132 if ((value & 0x80000000) != 0)
133 /* The register value is reserved. */
134 return 0;
136 /* Fold the name. The _SC_ constants are always in the order SIZE,
137 ASSOC, LINESIZE. */
138 int folded_rel_name = (M(name) / 3) * 3;
140 while (value != 0)
142 unsigned int byte = value & 0xff;
144 if (byte == 0x40)
146 *no_level_2_or_3 = true;
148 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
149 /* No need to look further. */
150 break;
152 else if (byte == 0xff)
154 /* CPUID leaf 0x4 contains all the information. We need to
155 iterate over it. */
156 unsigned int eax;
157 unsigned int ebx;
158 unsigned int ecx;
159 unsigned int edx;
161 unsigned int round = 0;
162 while (1)
164 __cpuid_count (4, round, eax, ebx, ecx, edx);
166 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
167 if (type == null)
168 /* That was the end. */
169 break;
171 unsigned int level = (eax >> 5) & 0x7;
173 if ((level == 1 && type == data
174 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
175 || (level == 1 && type == inst
176 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
177 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
178 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
179 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
181 unsigned int offset = M(name) - folded_rel_name;
183 if (offset == 0)
184 /* Cache size. */
185 return (((ebx >> 22) + 1)
186 * (((ebx >> 12) & 0x3ff) + 1)
187 * ((ebx & 0xfff) + 1)
188 * (ecx + 1));
189 if (offset == 1)
190 return (ebx >> 22) + 1;
192 assert (offset == 2);
193 return (ebx & 0xfff) + 1;
196 ++round;
198 /* There is no other cache information anywhere else. */
199 break;
201 else
203 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
205 /* Intel reused this value. For family 15, model 6 it
206 specifies the 3rd level cache. Otherwise the 2nd
207 level cache. */
208 unsigned int family = cpu_features->family;
209 unsigned int model = cpu_features->model;
211 if (family == 15 && model == 6)
213 /* The level 3 cache is encoded for this model like
214 the level 2 cache is for other models. Pretend
215 the caller asked for the level 2 cache. */
216 name = (_SC_LEVEL2_CACHE_SIZE
217 + (name - _SC_LEVEL3_CACHE_SIZE));
218 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
222 struct intel_02_cache_info *found;
223 struct intel_02_cache_info search;
225 search.idx = byte;
226 found = bsearch (&search, intel_02_known, nintel_02_known,
227 sizeof (intel_02_known[0]), intel_02_known_compare);
228 if (found != NULL)
230 if (found->rel_name == folded_rel_name)
232 unsigned int offset = M(name) - folded_rel_name;
234 if (offset == 0)
235 /* Cache size. */
236 return found->size;
237 if (offset == 1)
238 return found->assoc;
240 assert (offset == 2);
241 return found->linesize;
244 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
245 *has_level_2 = true;
249 /* Next byte for the next round. */
250 value >>= 8;
253 /* Nothing found. */
254 return 0;
258 static long int __attribute__ ((noinline))
259 handle_intel (int name, const struct cpu_features *cpu_features)
261 unsigned int maxidx = cpu_features->max_cpuid;
263 /* Return -1 for older CPUs. */
264 if (maxidx < 2)
265 return -1;
267 /* OK, we can use the CPUID instruction to get all info about the
268 caches. */
269 unsigned int cnt = 0;
270 unsigned int max = 1;
271 long int result = 0;
272 bool no_level_2_or_3 = false;
273 bool has_level_2 = false;
275 while (cnt++ < max)
277 unsigned int eax;
278 unsigned int ebx;
279 unsigned int ecx;
280 unsigned int edx;
281 __cpuid (2, eax, ebx, ecx, edx);
283 /* The low byte of EAX in the first round contain the number of
284 rounds we have to make. At least one, the one we are already
285 doing. */
286 if (cnt == 1)
288 max = eax & 0xff;
289 eax &= 0xffffff00;
292 /* Process the individual registers' value. */
293 result = intel_check_word (name, eax, &has_level_2,
294 &no_level_2_or_3, cpu_features);
295 if (result != 0)
296 return result;
298 result = intel_check_word (name, ebx, &has_level_2,
299 &no_level_2_or_3, cpu_features);
300 if (result != 0)
301 return result;
303 result = intel_check_word (name, ecx, &has_level_2,
304 &no_level_2_or_3, cpu_features);
305 if (result != 0)
306 return result;
308 result = intel_check_word (name, edx, &has_level_2,
309 &no_level_2_or_3, cpu_features);
310 if (result != 0)
311 return result;
314 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
315 && no_level_2_or_3)
316 return -1;
318 return 0;
322 static long int __attribute__ ((noinline))
323 handle_amd (int name)
325 unsigned int eax;
326 unsigned int ebx;
327 unsigned int ecx;
328 unsigned int edx;
329 __cpuid (0x80000000, eax, ebx, ecx, edx);
331 /* No level 4 cache (yet). */
332 if (name > _SC_LEVEL3_CACHE_LINESIZE)
333 return 0;
335 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
336 if (eax < fn)
337 return 0;
339 __cpuid (fn, eax, ebx, ecx, edx);
341 if (name < _SC_LEVEL1_DCACHE_SIZE)
343 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
344 ecx = edx;
347 switch (name)
349 case _SC_LEVEL1_DCACHE_SIZE:
350 return (ecx >> 14) & 0x3fc00;
352 case _SC_LEVEL1_DCACHE_ASSOC:
353 ecx >>= 16;
354 if ((ecx & 0xff) == 0xff)
355 /* Fully associative. */
356 return (ecx << 2) & 0x3fc00;
357 return ecx & 0xff;
359 case _SC_LEVEL1_DCACHE_LINESIZE:
360 return ecx & 0xff;
362 case _SC_LEVEL2_CACHE_SIZE:
363 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
365 case _SC_LEVEL2_CACHE_ASSOC:
366 switch ((ecx >> 12) & 0xf)
368 case 0:
369 case 1:
370 case 2:
371 case 4:
372 return (ecx >> 12) & 0xf;
373 case 6:
374 return 8;
375 case 8:
376 return 16;
377 case 10:
378 return 32;
379 case 11:
380 return 48;
381 case 12:
382 return 64;
383 case 13:
384 return 96;
385 case 14:
386 return 128;
387 case 15:
388 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
389 default:
390 return 0;
392 /* NOTREACHED */
394 case _SC_LEVEL2_CACHE_LINESIZE:
395 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
397 case _SC_LEVEL3_CACHE_SIZE:
398 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
400 case _SC_LEVEL3_CACHE_ASSOC:
401 switch ((edx >> 12) & 0xf)
403 case 0:
404 case 1:
405 case 2:
406 case 4:
407 return (edx >> 12) & 0xf;
408 case 6:
409 return 8;
410 case 8:
411 return 16;
412 case 10:
413 return 32;
414 case 11:
415 return 48;
416 case 12:
417 return 64;
418 case 13:
419 return 96;
420 case 14:
421 return 128;
422 case 15:
423 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
424 default:
425 return 0;
427 /* NOTREACHED */
429 case _SC_LEVEL3_CACHE_LINESIZE:
430 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
432 default:
433 assert (! "cannot happen");
435 return -1;
439 /* Get the value of the system variable NAME. */
440 long int
441 attribute_hidden
442 __cache_sysconf (int name)
444 const struct cpu_features *cpu_features = __get_cpu_features ();
446 if (cpu_features->kind == arch_kind_intel)
447 return handle_intel (name, cpu_features);
449 if (cpu_features->kind == arch_kind_amd)
450 return handle_amd (name);
452 // XXX Fill in more vendors.
454 /* CPU not known, we have no information. */
455 return 0;
459 /* Data cache size for use in memory and string routines, typically
460 L1 size, rounded to multiple of 256 bytes. */
461 long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
462 long int __x86_data_cache_size attribute_hidden = 32 * 1024;
463 /* Similar to __x86_data_cache_size_half, but not rounded. */
464 long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
465 /* Similar to __x86_data_cache_size, but not rounded. */
466 long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
467 /* Shared cache size for use in memory and string routines, typically
468 L2 or L3 size, rounded to multiple of 256 bytes. */
469 long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
470 long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
471 /* Similar to __x86_shared_cache_size_half, but not rounded. */
472 long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
473 /* Similar to __x86_shared_cache_size, but not rounded. */
474 long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
476 /* Threshold to use non temporal store. */
477 long int __x86_shared_non_temporal_threshold attribute_hidden;
479 #ifndef DISABLE_PREFETCHW
480 /* PREFETCHW support flag for use in memory and string routines. */
481 int __x86_prefetchw attribute_hidden;
482 #endif
485 static void
486 __attribute__((constructor))
487 init_cacheinfo (void)
489 /* Find out what brand of processor. */
490 unsigned int eax;
491 unsigned int ebx;
492 unsigned int ecx;
493 unsigned int edx;
494 int max_cpuid_ex;
495 long int data = -1;
496 long int shared = -1;
497 unsigned int level;
498 unsigned int threads = 0;
499 const struct cpu_features *cpu_features = __get_cpu_features ();
500 int max_cpuid = cpu_features->max_cpuid;
502 if (cpu_features->kind == arch_kind_intel)
504 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, cpu_features);
506 long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, cpu_features);
507 bool inclusive_cache = true;
509 /* Try L3 first. */
510 level = 3;
511 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, cpu_features);
513 /* Number of logical processors sharing L2 cache. */
514 int threads_l2;
516 /* Number of logical processors sharing L3 cache. */
517 int threads_l3;
519 if (shared <= 0)
521 /* Try L2 otherwise. */
522 level = 2;
523 shared = core;
524 threads_l2 = 0;
525 threads_l3 = -1;
527 else
529 threads_l2 = 0;
530 threads_l3 = 0;
533 /* A value of 0 for the HTT bit indicates there is only a single
534 logical processor. */
535 if (HAS_CPU_FEATURE (HTT))
537 /* Figure out the number of logical threads that share the
538 highest cache level. */
539 if (max_cpuid >= 4)
541 unsigned int family = cpu_features->family;
542 unsigned int model = cpu_features->model;
544 int i = 0;
546 /* Query until cache level 2 and 3 are enumerated. */
547 int check = 0x1 | (threads_l3 == 0) << 1;
550 __cpuid_count (4, i++, eax, ebx, ecx, edx);
552 /* There seems to be a bug in at least some Pentium Ds
553 which sometimes fail to iterate all cache parameters.
554 Do not loop indefinitely here, stop in this case and
555 assume there is no such information. */
556 if ((eax & 0x1f) == 0)
557 goto intel_bug_no_cache_info;
559 switch ((eax >> 5) & 0x7)
561 default:
562 break;
563 case 2:
564 if ((check & 0x1))
566 /* Get maximum number of logical processors
567 sharing L2 cache. */
568 threads_l2 = (eax >> 14) & 0x3ff;
569 check &= ~0x1;
571 break;
572 case 3:
573 if ((check & (0x1 << 1)))
575 /* Get maximum number of logical processors
576 sharing L3 cache. */
577 threads_l3 = (eax >> 14) & 0x3ff;
579 /* Check if L2 and L3 caches are inclusive. */
580 inclusive_cache = (edx & 0x2) != 0;
581 check &= ~(0x1 << 1);
583 break;
586 while (check);
588 /* If max_cpuid >= 11, THREADS_L2/THREADS_L3 are the maximum
589 numbers of addressable IDs for logical processors sharing
590 the cache, instead of the maximum number of threads
591 sharing the cache. */
592 if (max_cpuid >= 11)
594 /* Find the number of logical processors shipped in
595 one core and apply count mask. */
596 i = 0;
598 /* Count SMT only if there is L3 cache. Always count
599 core if there is no L3 cache. */
600 int count = ((threads_l2 > 0 && level == 3)
601 | ((threads_l3 > 0
602 || (threads_l2 > 0 && level == 2)) << 1));
604 while (count)
606 __cpuid_count (11, i++, eax, ebx, ecx, edx);
608 int shipped = ebx & 0xff;
609 int type = ecx & 0xff00;
610 if (shipped == 0 || type == 0)
611 break;
612 else if (type == 0x100)
614 /* Count SMT. */
615 if ((count & 0x1))
617 int count_mask;
619 /* Compute count mask. */
620 asm ("bsr %1, %0"
621 : "=r" (count_mask) : "g" (threads_l2));
622 count_mask = ~(-1 << (count_mask + 1));
623 threads_l2 = (shipped - 1) & count_mask;
624 count &= ~0x1;
627 else if (type == 0x200)
629 /* Count core. */
630 if ((count & (0x1 << 1)))
632 int count_mask;
633 int threads_core
634 = (level == 2 ? threads_l2 : threads_l3);
636 /* Compute count mask. */
637 asm ("bsr %1, %0"
638 : "=r" (count_mask) : "g" (threads_core));
639 count_mask = ~(-1 << (count_mask + 1));
640 threads_core = (shipped - 1) & count_mask;
641 if (level == 2)
642 threads_l2 = threads_core;
643 else
644 threads_l3 = threads_core;
645 count &= ~(0x1 << 1);
650 if (threads_l2 > 0)
651 threads_l2 += 1;
652 if (threads_l3 > 0)
653 threads_l3 += 1;
654 if (level == 2)
656 if (threads_l2)
658 threads = threads_l2;
659 if (threads > 2 && family == 6)
660 switch (model)
662 case 0x37:
663 case 0x4a:
664 case 0x4d:
665 case 0x5a:
666 case 0x5d:
667 /* Silvermont has L2 cache shared by 2 cores. */
668 threads = 2;
669 break;
670 default:
671 break;
675 else if (threads_l3)
676 threads = threads_l3;
678 else
680 intel_bug_no_cache_info:
681 /* Assume that all logical threads share the highest cache
682 level. */
684 threads
685 = ((cpu_features->cpuid[COMMON_CPUID_INDEX_1].ebx
686 >> 16) & 0xff);
689 /* Cap usage of highest cache level to the number of supported
690 threads. */
691 if (shared > 0 && threads > 0)
692 shared /= threads;
695 /* Account for non-inclusive L2 and L3 caches. */
696 if (!inclusive_cache)
698 if (threads_l2 > 0)
699 core /= threads_l2;
700 shared += core;
703 else if (cpu_features->kind == arch_kind_amd)
705 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
706 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
707 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
709 /* Get maximum extended function. */
710 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
712 if (shared <= 0)
713 /* No shared L3 cache. All we have is the L2 cache. */
714 shared = core;
715 else
717 /* Figure out the number of logical threads that share L3. */
718 if (max_cpuid_ex >= 0x80000008)
720 /* Get width of APIC ID. */
721 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
722 threads = 1 << ((ecx >> 12) & 0x0f);
725 if (threads == 0)
727 /* If APIC ID width is not available, use logical
728 processor count. */
729 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
731 if ((edx & (1 << 28)) != 0)
732 threads = (ebx >> 16) & 0xff;
735 /* Cap usage of highest cache level to the number of
736 supported threads. */
737 if (threads > 0)
738 shared /= threads;
740 /* Account for exclusive L2 and L3 caches. */
741 shared += core;
744 #ifndef DISABLE_PREFETCHW
745 if (max_cpuid_ex >= 0x80000001)
747 __cpuid (0x80000001, eax, ebx, ecx, edx);
748 /* PREFETCHW || 3DNow! */
749 if ((ecx & 0x100) || (edx & 0x80000000))
750 __x86_prefetchw = -1;
752 #endif
755 if (cpu_features->data_cache_size != 0)
756 data = cpu_features->data_cache_size;
758 if (data > 0)
760 __x86_raw_data_cache_size_half = data / 2;
761 __x86_raw_data_cache_size = data;
762 /* Round data cache size to multiple of 256 bytes. */
763 data = data & ~255L;
764 __x86_data_cache_size_half = data / 2;
765 __x86_data_cache_size = data;
768 if (cpu_features->shared_cache_size != 0)
769 shared = cpu_features->shared_cache_size;
771 if (shared > 0)
773 __x86_raw_shared_cache_size_half = shared / 2;
774 __x86_raw_shared_cache_size = shared;
775 /* Round shared cache size to multiple of 256 bytes. */
776 shared = shared & ~255L;
777 __x86_shared_cache_size_half = shared / 2;
778 __x86_shared_cache_size = shared;
781 /* The large memcpy micro benchmark in glibc shows that 6 times of
782 shared cache size is the approximate value above which non-temporal
783 store becomes faster on a 8-core processor. This is the 3/4 of the
784 total shared cache size. */
785 __x86_shared_non_temporal_threshold
786 = (cpu_features->non_temporal_threshold != 0
787 ? cpu_features->non_temporal_threshold
788 : __x86_shared_cache_size * threads * 3 / 4);
791 #endif