Fix missing dependencies and ensure correct CPPFLAGS.
[glibc.git] / sysdeps / x86_64 / cacheinfo.c
blobeae54e725a658b8bfed6408a4127132c60615cc3
1 /* x86_64 cache info.
2 Copyright (C) 2003, 2004, 2006, 2007, 2009 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA.
21 #include <assert.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <unistd.h>
25 #include <cpuid.h>
27 #ifndef __cpuid_count
28 /* FIXME: Provide __cpuid_count if it isn't defined. Copied from gcc
29 4.4.0. Remove this if gcc 4.4 is the minimum requirement. */
30 # if defined(__i386__) && defined(__PIC__)
31 /* %ebx may be the PIC register. */
32 # define __cpuid_count(level, count, a, b, c, d) \
33 __asm__ ("xchg{l}\t{%%}ebx, %1\n\t" \
34 "cpuid\n\t" \
35 "xchg{l}\t{%%}ebx, %1\n\t" \
36 : "=a" (a), "=r" (b), "=c" (c), "=d" (d) \
37 : "0" (level), "2" (count))
38 # else
39 # define __cpuid_count(level, count, a, b, c, d) \
40 __asm__ ("cpuid\n\t" \
41 : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
42 : "0" (level), "2" (count))
43 # endif
44 #endif
46 #ifdef USE_MULTIARCH
47 # include "multiarch/init-arch.h"
49 # define is_intel __cpu_features.kind == arch_kind_intel
50 # define is_amd __cpu_features.kind == arch_kind_amd
51 # define max_cpuid __cpu_features.max_cpuid
52 #else
53 /* This spells out "GenuineIntel". */
54 # define is_intel \
55 ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69
56 /* This spells out "AuthenticAMD". */
57 # define is_amd \
58 ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65
59 #endif
61 static const struct intel_02_cache_info
63 unsigned char idx;
64 unsigned char assoc;
65 unsigned char linesize;
66 unsigned char rel_name;
67 unsigned int size;
68 } intel_02_known [] =
70 #define M(sc) ((sc) - _SC_LEVEL1_ICACHE_SIZE)
71 { 0x06, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 8192 },
72 { 0x08, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 16384 },
73 { 0x09, 4, 32, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
74 { 0x0a, 2, 32, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
75 { 0x0c, 4, 32, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
76 { 0x0d, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
77 { 0x0e, 6, 64, M(_SC_LEVEL1_DCACHE_SIZE), 24576 },
78 { 0x21, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
79 { 0x22, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
80 { 0x23, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
81 { 0x25, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
82 { 0x29, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
83 { 0x2c, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
84 { 0x30, 8, 64, M(_SC_LEVEL1_ICACHE_SIZE), 32768 },
85 { 0x39, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
86 { 0x3a, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 196608 },
87 { 0x3b, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
88 { 0x3c, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
89 { 0x3d, 6, 64, M(_SC_LEVEL2_CACHE_SIZE), 393216 },
90 { 0x3e, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
91 { 0x3f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
92 { 0x41, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
93 { 0x42, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
94 { 0x43, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
95 { 0x44, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
96 { 0x45, 4, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
97 { 0x46, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
98 { 0x47, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
99 { 0x48, 12, 64, M(_SC_LEVEL2_CACHE_SIZE), 3145728 },
100 { 0x49, 16, 64, M(_SC_LEVEL2_CACHE_SIZE), 4194304 },
101 { 0x4a, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 6291456 },
102 { 0x4b, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
103 { 0x4c, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
104 { 0x4d, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 16777216 },
105 { 0x4e, 24, 64, M(_SC_LEVEL2_CACHE_SIZE), 6291456 },
106 { 0x60, 8, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
107 { 0x66, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 8192 },
108 { 0x67, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 16384 },
109 { 0x68, 4, 64, M(_SC_LEVEL1_DCACHE_SIZE), 32768 },
110 { 0x78, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
111 { 0x79, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 131072 },
112 { 0x7a, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
113 { 0x7b, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
114 { 0x7c, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
115 { 0x7d, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
116 { 0x7f, 2, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
117 { 0x80, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
118 { 0x82, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 262144 },
119 { 0x83, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
120 { 0x84, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
121 { 0x85, 8, 32, M(_SC_LEVEL2_CACHE_SIZE), 2097152 },
122 { 0x86, 4, 64, M(_SC_LEVEL2_CACHE_SIZE), 524288 },
123 { 0x87, 8, 64, M(_SC_LEVEL2_CACHE_SIZE), 1048576 },
124 { 0xd0, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 524288 },
125 { 0xd1, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
126 { 0xd2, 4, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
127 { 0xd6, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 1048576 },
128 { 0xd7, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
129 { 0xd8, 8, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
130 { 0xdc, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
131 { 0xdd, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
132 { 0xde, 12, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
133 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 2097152 },
134 { 0xe3, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 4194304 },
135 { 0xe4, 16, 64, M(_SC_LEVEL3_CACHE_SIZE), 8388608 },
136 { 0xea, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 12582912 },
137 { 0xeb, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 18874368 },
138 { 0xec, 24, 64, M(_SC_LEVEL3_CACHE_SIZE), 25165824 },
141 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
143 static int
144 intel_02_known_compare (const void *p1, const void *p2)
146 const struct intel_02_cache_info *i1;
147 const struct intel_02_cache_info *i2;
149 i1 = (const struct intel_02_cache_info *) p1;
150 i2 = (const struct intel_02_cache_info *) p2;
152 if (i1->idx == i2->idx)
153 return 0;
155 return i1->idx < i2->idx ? -1 : 1;
159 static long int
160 __attribute__ ((noinline))
161 intel_check_word (int name, unsigned int value, bool *has_level_2,
162 bool *no_level_2_or_3)
164 if ((value & 0x80000000) != 0)
165 /* The register value is reserved. */
166 return 0;
168 /* Fold the name. The _SC_ constants are always in the order SIZE,
169 ASSOC, LINESIZE. */
170 int folded_rel_name = (M(name) / 3) * 3;
172 while (value != 0)
174 unsigned int byte = value & 0xff;
176 if (byte == 0x40)
178 *no_level_2_or_3 = true;
180 if (folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
181 /* No need to look further. */
182 break;
184 else
186 if (byte == 0x49 && folded_rel_name == M(_SC_LEVEL3_CACHE_SIZE))
188 /* Intel reused this value. For family 15, model 6 it
189 specifies the 3rd level cache. Otherwise the 2nd
190 level cache. */
191 unsigned int family;
192 unsigned int model;
193 #ifdef USE_MULTIARCH
194 family = __cpu_features.family;
195 model = __cpu_features.model;
196 #else
197 unsigned int eax;
198 unsigned int ebx;
199 unsigned int ecx;
200 unsigned int edx;
201 __cpuid (1, eax, ebx, ecx, edx);
203 family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
204 model = (((eax >>16) & 0xf) << 4) + ((eax >> 4) & 0xf);
205 #endif
207 if (family == 15 && model == 6)
209 /* The level 3 cache is encoded for this model like
210 the level 2 cache is for other models. Pretend
211 the caller asked for the level 2 cache. */
212 name = (_SC_LEVEL2_CACHE_SIZE
213 + (name - _SC_LEVEL3_CACHE_SIZE));
214 folded_rel_name = M(_SC_LEVEL2_CACHE_SIZE);
218 struct intel_02_cache_info *found;
219 struct intel_02_cache_info search;
221 search.idx = byte;
222 found = bsearch (&search, intel_02_known, nintel_02_known,
223 sizeof (intel_02_known[0]), intel_02_known_compare);
224 if (found != NULL)
226 if (found->rel_name == folded_rel_name)
228 unsigned int offset = M(name) - folded_rel_name;
230 if (offset == 0)
231 /* Cache size. */
232 return found->size;
233 if (offset == 1)
234 return found->assoc;
236 assert (offset == 2);
237 return found->linesize;
240 if (found->rel_name == M(_SC_LEVEL2_CACHE_SIZE))
241 *has_level_2 = true;
245 /* Next byte for the next round. */
246 value >>= 8;
249 /* Nothing found. */
250 return 0;
254 static long int __attribute__ ((noinline))
255 handle_intel (int name, unsigned int maxidx)
257 assert (maxidx >= 2);
259 /* OK, we can use the CPUID instruction to get all info about the
260 caches. */
261 unsigned int cnt = 0;
262 unsigned int max = 1;
263 long int result = 0;
264 bool no_level_2_or_3 = false;
265 bool has_level_2 = false;
267 while (cnt++ < max)
269 unsigned int eax;
270 unsigned int ebx;
271 unsigned int ecx;
272 unsigned int edx;
273 __cpuid (2, eax, ebx, ecx, edx);
275 /* The low byte of EAX in the first round contain the number of
276 rounds we have to make. At least one, the one we are already
277 doing. */
278 if (cnt == 1)
280 max = eax & 0xff;
281 eax &= 0xffffff00;
284 /* Process the individual registers' value. */
285 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
286 if (result != 0)
287 return result;
289 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
290 if (result != 0)
291 return result;
293 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
294 if (result != 0)
295 return result;
297 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
298 if (result != 0)
299 return result;
302 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
303 && no_level_2_or_3)
304 return -1;
306 return 0;
310 static long int __attribute__ ((noinline))
311 handle_amd (int name)
313 unsigned int eax;
314 unsigned int ebx;
315 unsigned int ecx;
316 unsigned int edx;
317 __cpuid (0x80000000, eax, ebx, ecx, edx);
319 /* No level 4 cache (yet). */
320 if (name > _SC_LEVEL3_CACHE_LINESIZE)
321 return 0;
323 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
324 if (eax < fn)
325 return 0;
327 __cpuid (fn, eax, ebx, ecx, edx);
329 if (name < _SC_LEVEL1_DCACHE_SIZE)
331 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
332 ecx = edx;
335 switch (name)
337 case _SC_LEVEL1_DCACHE_SIZE:
338 return (ecx >> 14) & 0x3fc00;
340 case _SC_LEVEL1_DCACHE_ASSOC:
341 ecx >>= 16;
342 if ((ecx & 0xff) == 0xff)
343 /* Fully associative. */
344 return (ecx << 2) & 0x3fc00;
345 return ecx & 0xff;
347 case _SC_LEVEL1_DCACHE_LINESIZE:
348 return ecx & 0xff;
350 case _SC_LEVEL2_CACHE_SIZE:
351 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
353 case _SC_LEVEL2_CACHE_ASSOC:
354 switch ((ecx >> 12) & 0xf)
356 case 0:
357 case 1:
358 case 2:
359 case 4:
360 return (ecx >> 12) & 0xf;
361 case 6:
362 return 8;
363 case 8:
364 return 16;
365 case 10:
366 return 32;
367 case 11:
368 return 48;
369 case 12:
370 return 64;
371 case 13:
372 return 96;
373 case 14:
374 return 128;
375 case 15:
376 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
377 default:
378 return 0;
380 /* NOTREACHED */
382 case _SC_LEVEL2_CACHE_LINESIZE:
383 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
385 case _SC_LEVEL3_CACHE_SIZE:
386 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
388 case _SC_LEVEL3_CACHE_ASSOC:
389 switch ((edx >> 12) & 0xf)
391 case 0:
392 case 1:
393 case 2:
394 case 4:
395 return (edx >> 12) & 0xf;
396 case 6:
397 return 8;
398 case 8:
399 return 16;
400 case 10:
401 return 32;
402 case 11:
403 return 48;
404 case 12:
405 return 64;
406 case 13:
407 return 96;
408 case 14:
409 return 128;
410 case 15:
411 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
412 default:
413 return 0;
415 /* NOTREACHED */
417 case _SC_LEVEL3_CACHE_LINESIZE:
418 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
420 default:
421 assert (! "cannot happen");
423 return -1;
427 /* Get the value of the system variable NAME. */
428 long int
429 attribute_hidden
430 __cache_sysconf (int name)
432 #ifdef USE_MULTIARCH
433 if (__cpu_features.kind == arch_kind_unknown)
434 __init_cpu_features ();
435 #else
436 /* Find out what brand of processor. */
437 unsigned int max_cpuid;
438 unsigned int ebx;
439 unsigned int ecx;
440 unsigned int edx;
441 __cpuid (0, max_cpuid, ebx, ecx, edx);
442 #endif
444 if (is_intel)
445 return handle_intel (name, max_cpuid);
447 if (is_amd)
448 return handle_amd (name);
450 // XXX Fill in more vendors.
452 /* CPU not known, we have no information. */
453 return 0;
457 /* Data cache size for use in memory and string routines, typically
458 L1 size, rounded to multiple of 256 bytes. */
459 long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
460 long int __x86_64_data_cache_size attribute_hidden = 32 * 1024;
461 /* Similar to __x86_64_data_cache_size_half, but not rounded. */
462 long int __x86_64_raw_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
463 /* Similar to __x86_64_data_cache_size, but not rounded. */
464 long int __x86_64_raw_data_cache_size attribute_hidden = 32 * 1024;
465 /* Shared cache size for use in memory and string routines, typically
466 L2 or L3 size, rounded to multiple of 256 bytes. */
467 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
468 long int __x86_64_shared_cache_size attribute_hidden = 1024 * 1024;
469 /* Similar to __x86_64_shared_cache_size_half, but not rounded. */
470 long int __x86_64_raw_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
471 /* Similar to __x86_64_shared_cache_size, but not rounded. */
472 long int __x86_64_raw_shared_cache_size attribute_hidden = 1024 * 1024;
474 #ifndef DISABLE_PREFETCHW
475 /* PREFETCHW support flag for use in memory and string routines. */
476 int __x86_64_prefetchw attribute_hidden;
477 #endif
479 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
480 /* Instructions preferred for memory and string routines.
482 0: Regular instructions
483 1: MMX instructions
484 2: SSE2 instructions
485 3: SSSE3 instructions
488 int __x86_64_preferred_memory_instruction attribute_hidden;
489 #endif
492 static void
493 __attribute__((constructor))
494 init_cacheinfo (void)
496 /* Find out what brand of processor. */
497 unsigned int eax;
498 unsigned int ebx;
499 unsigned int ecx;
500 unsigned int edx;
501 int max_cpuid_ex;
502 long int data = -1;
503 long int shared = -1;
504 unsigned int level;
505 unsigned int threads = 0;
507 #ifdef USE_MULTIARCH
508 if (__cpu_features.kind == arch_kind_unknown)
509 __init_cpu_features ();
510 #else
511 int max_cpuid;
512 __cpuid (0, max_cpuid, ebx, ecx, edx);
513 #endif
515 if (is_intel)
517 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
519 /* Try L3 first. */
520 level = 3;
521 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
523 if (shared <= 0)
525 /* Try L2 otherwise. */
526 level = 2;
527 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
530 unsigned int ebx_1;
532 #ifdef USE_MULTIARCH
533 eax = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].eax;
534 ebx_1 = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ebx;
535 ecx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].ecx;
536 edx = __cpu_features.cpuid[COMMON_CPUID_INDEX_1].edx;
537 #else
538 __cpuid (1, eax, ebx_1, ecx, edx);
539 #endif
541 #ifndef DISABLE_PREFERRED_MEMORY_INSTRUCTION
542 /* Intel prefers SSSE3 instructions for memory/string routines
543 if they are avaiable. */
544 if ((ecx & 0x200))
545 __x86_64_preferred_memory_instruction = 3;
546 else
547 __x86_64_preferred_memory_instruction = 2;
548 #endif
550 /* Figure out the number of logical threads that share the
551 highest cache level. */
552 if (max_cpuid >= 4)
554 int i = 0;
556 /* Query until desired cache level is enumerated. */
559 __cpuid_count (4, i++, eax, ebx, ecx, edx);
561 /* There seems to be a bug in at least some Pentium Ds
562 which sometimes fail to iterate all cache parameters.
563 Do not loop indefinitely here, stop in this case and
564 assume there is no such information. */
565 if ((eax & 0x1f) == 0)
566 goto intel_bug_no_cache_info;
568 while (((eax >> 5) & 0x7) != level);
570 threads = (eax >> 14) & 0x3ff;
572 /* If max_cpuid >= 11, THREADS is the maximum number of
573 addressable IDs for logical processors sharing the
574 cache, instead of the maximum number of threads
575 sharing the cache. */
576 if (threads && max_cpuid >= 11)
578 /* Find the number of logical processors shipped in
579 one core and apply count mask. */
580 i = 0;
581 while (1)
583 __cpuid_count (11, i++, eax, ebx, ecx, edx);
585 int shipped = ebx & 0xff;
586 int type = ecx & 0xff0;
587 if (shipped == 0 || type == 0)
588 break;
589 else if (type == 0x200)
591 int count_mask;
593 /* Compute count mask. */
594 asm ("bsr %1, %0"
595 : "=r" (count_mask) : "g" (threads));
596 count_mask = ~(-1 << (count_mask + 1));
597 threads = (shipped - 1) & count_mask;
598 break;
602 threads += 1;
604 else
606 intel_bug_no_cache_info:
607 /* Assume that all logical threads share the highest cache level. */
609 threads = (ebx_1 >> 16) & 0xff;
612 /* Cap usage of highest cache level to the number of supported
613 threads. */
614 if (shared > 0 && threads > 0)
615 shared /= threads;
617 /* This spells out "AuthenticAMD". */
618 else if (is_amd)
620 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
621 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
622 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
624 /* Get maximum extended function. */
625 __cpuid (0x80000000, max_cpuid_ex, ebx, ecx, edx);
627 if (shared <= 0)
628 /* No shared L3 cache. All we have is the L2 cache. */
629 shared = core;
630 else
632 /* Figure out the number of logical threads that share L3. */
633 if (max_cpuid_ex >= 0x80000008)
635 /* Get width of APIC ID. */
636 __cpuid (0x80000008, max_cpuid_ex, ebx, ecx, edx);
637 threads = 1 << ((ecx >> 12) & 0x0f);
640 if (threads == 0)
642 /* If APIC ID width is not available, use logical
643 processor count. */
644 __cpuid (0x00000001, max_cpuid_ex, ebx, ecx, edx);
646 if ((edx & (1 << 28)) != 0)
647 threads = (ebx >> 16) & 0xff;
650 /* Cap usage of highest cache level to the number of
651 supported threads. */
652 if (threads > 0)
653 shared /= threads;
655 /* Account for exclusive L2 and L3 caches. */
656 shared += core;
659 #ifndef DISABLE_PREFETCHW
660 if (max_cpuid_ex >= 0x80000001)
662 __cpuid (0x80000001, eax, ebx, ecx, edx);
663 /* PREFETCHW || 3DNow! */
664 if ((ecx & 0x100) || (edx & 0x80000000))
665 __x86_64_prefetchw = -1;
667 #endif
670 if (data > 0)
672 __x86_64_raw_data_cache_size_half = data / 2;
673 __x86_64_raw_data_cache_size = data;
674 /* Round data cache size to multiple of 256 bytes. */
675 data = data & ~255L;
676 __x86_64_data_cache_size_half = data / 2;
677 __x86_64_data_cache_size = data;
680 if (shared > 0)
682 __x86_64_raw_shared_cache_size_half = shared / 2;
683 __x86_64_raw_shared_cache_size = shared;
684 /* Round shared cache size to multiple of 256 bytes. */
685 shared = shared & ~255L;
686 __x86_64_shared_cache_size_half = shared / 2;
687 __x86_64_shared_cache_size = shared;