Remove special L2 cache case for Knights Landing
[glibc.git] / sysdeps / x86 / cacheinfo.c
blob182426b2d0c7e9a59aac7d7c331c87c58c0d04c3
1 /* x86_64 cache info.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <http://www.gnu.org/licenses/>. */
19 #include <assert.h>
20 #include <stdbool.h>
21 #include <stdlib.h>
22 #include <unistd.h>
23 #include <cpuid.h>
24 #include <init-arch.h>
26 #define is_intel GLRO(dl_x86_cpu_features).kind == arch_kind_intel
27 #define is_amd GLRO(dl_x86_cpu_features).kind == arch_kind_amd
28 #define max_cpuid GLRO(dl_x86_cpu_features).max_cpuid
30 static const struct intel_02_cache_info
32 unsigned char idx;
33 unsigned char assoc;
34 unsigned char linesize;
35 unsigned char rel_name;
36 unsigned int size;
37 } intel_02_known [] =
39 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
40 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
41 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
42 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
43 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
44 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
45 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
46 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
47 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
48 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
49 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
50 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
51 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
52 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
53 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
54 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
55 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
56 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
57 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
58 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
59 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
60 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
61 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
62 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
63 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
64 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
65 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
66 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
67 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
68 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
69 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
70 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
71 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
72 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
73 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
74 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
75 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
76 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
77 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
78 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
79 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
80 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
81 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
82 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
83 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
84 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
85 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
86 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
87 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
88 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
89 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
90 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
91 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
92 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
93 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
94 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
95 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
96 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
97 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
98 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
99 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
100 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
101 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
102 { 0xe2, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
103 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
104 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
105 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
106 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
107 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
110 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
112 static int
113 intel_02_known_compare (const void *p1, const void *p2)
115 const struct intel_02_cache_info *i1;
116 const struct intel_02_cache_info *i2;
118 i1 = (const struct intel_02_cache_info *) p1;
119 i2 = (const struct intel_02_cache_info *) p2;
121 if (i1->idx == i2->idx)
122 return 0;
124 return i1->idx < i2->idx ? -1 : 1;
128 static long int
129 __attribute__ ((noinline))
130 intel_check_word (int name, unsigned int value, bool *has_level_2,
131 bool *no_level_2_or_3)
133 if ((value & 0x80000000) != 0)
134 /* The register value is reserved. */
135 return 0;
137 /* Fold the name. The _SC_ constants are always in the order SIZE,
138 ASSOC, LINESIZE. */
139 int folded_rel_name = (M(name) / 3) * 3;
141 while (value != 0)
143 unsigned int byte = value & 0xff;
145 if (byte == 0x40)
147 *no_level_2_or_3 = true;
149 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
150 /* No need to look further. */
151 break;
153 else if (byte == 0xff)
155 /* CPUID leaf 0x4 contains all the information. We need to
156 iterate over it. */
157 unsigned int eax;
158 unsigned int ebx;
159 unsigned int ecx;
160 unsigned int edx;
162 unsigned int round = 0;
163 while (1)
165 __cpuid_count (4, round, eax, ebx, ecx, edx);
167 enum { null = 0, data = 1, inst = 2, uni = 3 } type = eax & 0x1f;
168 if (type == null)
169 /* That was the end. */
170 break;
172 unsigned int level = (eax >> 5) & 0x7;
174 if ((level == 1 && type == data
175 && folded_rel_name == M(_SC_LEVEL1_DCACHE_SIZE))
176 || (level == 1 && type == inst
177 && folded_rel_name == M(_SC_LEVEL1_ICACHE_SIZE))
178 || (level == 2 && folded_rel_name == M(_SC_LEVEL2_CACHE_SIZE))
179 || (level == 3 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
180 || (level == 4 && folded_rel_name == M(_SC_LEVEL4_CACHE_SIZE)))
182 unsigned int offset = M(name) - folded_rel_name;
184 if (offset == 0)
185 /* Cache size. */
186 return (((ebx >> 22) + 1)
187 * (((ebx >> 12) & 0x3ff) + 1)
188 * ((ebx & 0xfff) + 1)
189 * (ecx + 1));
190 if (offset == 1)
191 return (ebx >> 22) + 1;
193 assert (offset == 2);
194 return (ebx & 0xfff) + 1;
197 ++round;
199 /* There is no other cache information anywhere else. */
200 break;
202 else
204 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
206 /* Intel reused this value. For family 15, model 6 it
207 specifies the 3rd level cache. Otherwise the 2nd
208 level cache. */
209 unsigned int family = GLRO(dl_x86_cpu_features).family;
210 unsigned int model = GLRO(dl_x86_cpu_features).model;
212 if (family == 15 && model == 6)
214 /* The level 3 cache is encoded for this model like
215 the level 2 cache is for other models. Pretend
216 the caller asked for the level 2 cache. */
217 name = (_SC_LEVEL2_CACHE_SIZE
218 + (name - _SC_LEVEL3_CACHE_SIZE));
219 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
223 struct intel_02_cache_info *found;
224 struct intel_02_cache_info search;
226 search.idx = byte;
227 found = bsearch (&search, intel_02_known, nintel_02_known,
228 sizeof (intel_02_known[0]), intel_02_known_compare);
229 if (found != NULL)
231 if (found->rel_name == folded_rel_name)
233 unsigned int offset = M(name) - folded_rel_name;
235 if (offset == 0)
236 /* Cache size. */
237 return found->size;
238 if (offset == 1)
239 return found->assoc;
241 assert (offset == 2);
242 return found->linesize;
245 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
246 *has_level_2 = true;
250 /* Next byte for the next round. */
251 value >>= 8;
254 /* Nothing found. */
255 return 0;
259 static long int __attribute__ ((noinline))
260 handle_intel (int name, unsigned int maxidx)
262 assert (maxidx >= 2);
264 /* OK, we can use the CPUID instruction to get all info about the
265 caches. */
266 unsigned int cnt = 0;
267 unsigned int max = 1;
268 long int result = 0;
269 bool no_level_2_or_3 = false;
270 bool has_level_2 = false;
272 while (cnt++ < max)
274 unsigned int eax;
275 unsigned int ebx;
276 unsigned int ecx;
277 unsigned int edx;
278 __cpuid (2, eax, ebx, ecx, edx);
280 /* The low byte of EAX in the first round contain the number of
281 rounds we have to make. At least one, the one we are already
282 doing. */
283 if (cnt == 1)
285 max = eax & 0xff;
286 eax &= 0xffffff00;
289 /* Process the individual registers' value. */
290 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
291 if (result != 0)
292 return result;
294 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
295 if (result != 0)
296 return result;
298 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
299 if (result != 0)
300 return result;
302 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
303 if (result != 0)
304 return result;
307 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
308 && no_level_2_or_3)
309 return -1;
311 return 0;
315 static long int __attribute__ ((noinline))
316 handle_amd (int name)
318 unsigned int eax;
319 unsigned int ebx;
320 unsigned int ecx;
321 unsigned int edx;
322 __cpuid (0x80000000, eax, ebx, ecx, edx);
324 /* No level 4 cache (yet). */
325 if (name > _SC_LEVEL3_CACHE_LINESIZE)
326 return 0;
328 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
329 if (eax < fn)
330 return 0;
332 __cpuid (fn, eax, ebx, ecx, edx);
334 if (name < _SC_LEVEL1_DCACHE_SIZE)
336 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
337 ecx = edx;
340 switch (name)
342 case _SC_LEVEL1_DCACHE_SIZE:
343 return (ecx >> 14) & 0x3fc00;
345 case _SC_LEVEL1_DCACHE_ASSOC:
346 ecx >>= 16;
347 if ((ecx & 0xff) == 0xff)
348 /* Fully associative. */
349 return (ecx << 2) & 0x3fc00;
350 return ecx & 0xff;
352 case _SC_LEVEL1_DCACHE_LINESIZE:
353 return ecx & 0xff;
355 case _SC_LEVEL2_CACHE_SIZE:
356 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
358 case _SC_LEVEL2_CACHE_ASSOC:
359 switch ((ecx >> 12) & 0xf)
361 case 0:
362 case 1:
363 case 2:
364 case 4:
365 return (ecx >> 12) & 0xf;
366 case 6:
367 return 8;
368 case 8:
369 return 16;
370 case 10:
371 return 32;
372 case 11:
373 return 48;
374 case 12:
375 return 64;
376 case 13:
377 return 96;
378 case 14:
379 return 128;
380 case 15:
381 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
382 default:
383 return 0;
385 /* NOTREACHED */
387 case _SC_LEVEL2_CACHE_LINESIZE:
388 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
390 case _SC_LEVEL3_CACHE_SIZE:
391 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
393 case _SC_LEVEL3_CACHE_ASSOC:
394 switch ((edx >> 12) & 0xf)
396 case 0:
397 case 1:
398 case 2:
399 case 4:
400 return (edx >> 12) & 0xf;
401 case 6:
402 return 8;
403 case 8:
404 return 16;
405 case 10:
406 return 32;
407 case 11:
408 return 48;
409 case 12:
410 return 64;
411 case 13:
412 return 96;
413 case 14:
414 return 128;
415 case 15:
416 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
417 default:
418 return 0;
420 /* NOTREACHED */
422 case _SC_LEVEL3_CACHE_LINESIZE:
423 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
425 default:
426 assert (! "cannot happen");
428 return -1;
432 /* Get the value of the system variable NAME. */
433 long int
434 attribute_hidden
435 __cache_sysconf (int name)
437 if (is_intel)
438 return handle_intel (name, max_cpuid);
440 if (is_amd)
441 return handle_amd (name);
443 // XXX Fill in more vendors.
445 /* CPU not known, we have no information. */
446 return 0;
450 /* Data cache size for use in memory and string routines, typically
451 L1 size, rounded to multiple of 256 bytes. */
452 long int __x86_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
453 long int __x86_data_cache_size attribute_hidden = 32 * 1024;
454 /* Similar to __x86_data_cache_size_half, but not rounded. */
455 long int __x86_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
456 /* Similar to __x86_data_cache_size, but not rounded. */
457 long int __x86_raw_data_cache_size attribute_hidden = 32 * 1024;
458 /* Shared cache size for use in memory and string routines, typically
459 L2 or L3 size, rounded to multiple of 256 bytes. */
460 long int __x86_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
461 long int __x86_shared_cache_size attribute_hidden = 1024 * 1024;
462 /* Similar to __x86_shared_cache_size_half, but not rounded. */
463 long int __x86_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
464 /* Similar to __x86_shared_cache_size, but not rounded. */
465 long int __x86_raw_shared_cache_size attribute_hidden = 1024 * 1024;
467 /* Threshold to use non temporal store. */
468 long int __x86_shared_non_temporal_threshold attribute_hidden;
470 #ifndef DISABLE_PREFETCHW
471 /* PREFETCHW support flag for use in memory and string routines. */
472 int __x86_prefetchw attribute_hidden;
473 #endif
476 static void
477 __attribute__((constructor))
478 init_cacheinfo (void)
480 /* Find out what brand of processor. */
481 unsigned int eax;
482 unsigned int ebx;
483 unsigned int ecx;
484 unsigned int edx;
485 int max_cpuid_ex;
486 long int data = -1;
487 long int shared = -1;
488 unsigned int level;
489 unsigned int threads = 0;
491 if (is_intel)
493 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
495 long int core = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
496 bool inclusive_cache = true;
498 /* Try L3 first. */
499 level = 3;
500 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
502 if (shared <= 0)
504 /* Try L2 otherwise. */
505 level = 2;
506 shared = core;
509 /* A value of 0 for the HTT bit indicates there is only a single
510 logical processor. */
511 if (HAS_CPU_FEATURE (HTT))
513 /* Figure out the number of logical threads that share the
514 highest cache level. */
515 if (max_cpuid >= 4)
517 unsigned int family = GLRO(dl_x86_cpu_features).family;
518 unsigned int model = GLRO(dl_x86_cpu_features).model;
520 int i = 0;
522 /* Query until desired cache level is enumerated. */
525 __cpuid_count (4, i++, eax, ebx, ecx, edx);
527 /* There seems to be a bug in at least some Pentium Ds
528 which sometimes fail to iterate all cache parameters.
529 Do not loop indefinitely here, stop in this case and
530 assume there is no such information. */
531 if ((eax & 0x1f) == 0)
532 goto intel_bug_no_cache_info;
534 while (((eax >> 5) & 0x7) != level);
536 /* Check if cache is inclusive of lower cache levels. */
537 inclusive_cache = (edx & 0x2) != 0;
539 threads = (eax >> 14) & 0x3ff;
541 /* If max_cpuid >= 11, THREADS is the maximum number of
542 addressable IDs for logical processors sharing the
543 cache, instead of the maximum number of threads
544 sharing the cache. */
545 if (threads && max_cpuid >= 11)
547 /* Find the number of logical processors shipped in
548 one core and apply count mask. */
549 i = 0;
550 while (1)
552 __cpuid_count (11, i++, eax, ebx, ecx, edx);
554 int shipped = ebx & 0xff;
555 int type = ecx & 0xff00;
556 if (shipped == 0 || type == 0)
557 break;
558 else if (type == 0x200)
560 int count_mask;
562 /* Compute count mask. */
563 asm ("bsr %1, %0"
564 : "=r" (count_mask) : "g" (threads));
565 count_mask = ~(-1 << (count_mask + 1));
566 threads = (shipped - 1) & count_mask;
567 break;
571 threads += 1;
572 if (threads > 2 && level == 2 && family == 6)
574 switch (model)
576 case 0x37:
577 case 0x4a:
578 case 0x4d:
579 case 0x5a:
580 case 0x5d:
581 /* Silvermont has L2 cache shared by 2 cores. */
582 threads = 2;
583 break;
584 default:
585 break;
589 else
591 intel_bug_no_cache_info:
592 /* Assume that all logical threads share the highest cache
593 level. */
595 threads
596 = ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
597 >> 16) & 0xff);
600 /* Cap usage of highest cache level to the number of supported
601 threads. */
602 if (shared > 0 && threads > 0)
603 shared /= threads;
606 /* Account for non-inclusive L2 and L3 caches. */
607 if (level == 3 && !inclusive_cache)
608 shared += core;
610 /* This spells out "AuthenticAMD". */
611 else if (is_amd)
613 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
614 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
615 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
617 /* Get maximum extended function. */
618 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
620 if (shared <= 0)
621 /* No shared L3 cache. All we have is the L2 cache. */
622 shared = core;
623 else
625 /* Figure out the number of logical threads that share L3. */
626 if (max_cpuid_ex >= 0x80000008)
628 /* Get width of APIC ID. */
629 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
630 threads = 1 << ((ecx >> 12) & 0x0f);
633 if (threads == 0)
635 /* If APIC ID width is not available, use logical
636 processor count. */
637 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
639 if ((edx & (1 << 28)) != 0)
640 threads = (ebx >> 16) & 0xff;
643 /* Cap usage of highest cache level to the number of
644 supported threads. */
645 if (threads > 0)
646 shared /= threads;
648 /* Account for exclusive L2 and L3 caches. */
649 shared += core;
652 #ifndef DISABLE_PREFETCHW
653 if (max_cpuid_ex >= 0x80000001)
655 __cpuid (0x80000001, eax, ebx, ecx, edx);
656 /* PREFETCHW || 3DNow! */
657 if ((ecx & 0x100) || (edx & 0x80000000))
658 __x86_prefetchw = -1;
660 #endif
663 if (data > 0)
665 __x86_raw_data_cache_size_half = data / 2;
666 __x86_raw_data_cache_size = data;
667 /* Round data cache size to multiple of 256 bytes. */
668 data = data & ~255L;
669 __x86_data_cache_size_half = data / 2;
670 __x86_data_cache_size = data;
673 if (shared > 0)
675 __x86_raw_shared_cache_size_half = shared / 2;
676 __x86_raw_shared_cache_size = shared;
677 /* Round shared cache size to multiple of 256 bytes. */
678 shared = shared & ~255L;
679 __x86_shared_cache_size_half = shared / 2;
680 __x86_shared_cache_size = shared;
683 /* The large memcpy micro benchmark in glibc shows that 6 times of
684 shared cache size is the approximate value above which non-temporal
685 store becomes faster. */
686 __x86_shared_non_temporal_threshold = __x86_shared_cache_size * 6;