Updated to fedora-glibc-20071010T2047
[glibc.git] / sysdeps / x86_64 / cacheinfo.c
blob12102fea81c0798613cb56d38f98c7b122f1c1a5
1 /* x86_64 cache info.
2 Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA.
21 #include <assert.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <unistd.h>
26 static const struct intel_02_cache_info
28 unsigned int idx;
29 int name;
30 long int size;
31 long int assoc;
32 long int linesize;
33 } intel_02_known [] =
35 { 0x06, _SC_LEVEL1_ICACHE_SIZE, 8192, 4, 32 },
36 { 0x08, _SC_LEVEL1_ICACHE_SIZE, 16384, 4, 32 },
37 { 0x0a, _SC_LEVEL1_DCACHE_SIZE, 8192, 2, 32 },
38 { 0x0c, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 32 },
39 { 0x22, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 },
40 { 0x23, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 },
41 { 0x25, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 },
42 { 0x29, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 },
43 { 0x2c, _SC_LEVEL1_DCACHE_SIZE, 32768, 8, 64 },
44 { 0x30, _SC_LEVEL1_ICACHE_SIZE, 32768, 8, 64 },
45 { 0x39, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 64 },
46 { 0x3a, _SC_LEVEL2_CACHE_SIZE, 196608, 6, 64 },
47 { 0x3b, _SC_LEVEL2_CACHE_SIZE, 131072, 2, 64 },
48 { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 },
49 { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 },
50 { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
51 { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 },
52 { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 },
53 { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 },
54 { 0x44, _SC_LEVEL2_CACHE_SIZE, 1048576, 4, 32 },
55 { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 },
56 { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 },
57 { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 },
58 { 0x48, _SC_LEVEL2_CACHE_SIZE, 3145728, 12, 64 },
59 { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 },
60 { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 },
61 { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
62 { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
63 { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
64 { 0x4e, _SC_LEVEL2_CACHE_SIZE, 6291456, 24, 64 },
65 { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 },
66 { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 },
67 { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },
68 { 0x68, _SC_LEVEL1_DCACHE_SIZE, 32768, 4, 64 },
69 { 0x78, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
70 { 0x79, _SC_LEVEL2_CACHE_SIZE, 131072, 8, 64 },
71 { 0x7a, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 64 },
72 { 0x7b, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 64 },
73 { 0x7c, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
74 { 0x7d, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 64 },
75 { 0x7f, _SC_LEVEL2_CACHE_SIZE, 524288, 2, 64 },
76 { 0x82, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 32 },
77 { 0x83, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 32 },
78 { 0x84, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 32 },
79 { 0x85, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 32 },
80 { 0x86, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
81 { 0x87, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
84 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
86 static int
87 intel_02_known_compare (const void *p1, const void *p2)
89 const struct intel_02_cache_info *i1;
90 const struct intel_02_cache_info *i2;
92 i1 = (const struct intel_02_cache_info *) p1;
93 i2 = (const struct intel_02_cache_info *) p2;
95 if (i1->idx == i2->idx)
96 return 0;
98 return i1->idx < i2->idx ? -1 : 1;
102 static long int
103 __attribute__ ((noinline))
104 intel_check_word (int name, unsigned int value, bool *has_level_2,
105 bool *no_level_2_or_3)
107 if ((value & 0x80000000) != 0)
108 /* The register value is reserved. */
109 return 0;
111 /* Fold the name. The _SC_ constants are always in the order SIZE,
112 ASSOC, LINESIZE. */
113 int folded_name = (_SC_LEVEL1_ICACHE_SIZE
114 + ((name - _SC_LEVEL1_ICACHE_SIZE) / 3) * 3);
116 while (value != 0)
118 unsigned int byte = value & 0xff;
120 if (byte == 0x40)
122 *no_level_2_or_3 = true;
124 if (folded_name == _SC_LEVEL3_CACHE_SIZE)
125 /* No need to look further. */
126 break;
128 else
130 if (byte == 0x49 && folded_name == _SC_LEVEL3_CACHE_SIZE)
132 /* Intel reused this value. For family 15, model 6 it
133 specifies the 3rd level cache. Otherwise the 2nd
134 level cache. */
135 unsigned int eax;
136 unsigned int ebx;
137 unsigned int ecx;
138 unsigned int edx;
139 asm volatile ("cpuid"
140 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
141 : "0" (1));
143 unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
144 unsigned int model = ((((eax >>16) & 0xf) << 4)
145 + ((eax >> 4) & 0xf));
146 if (family == 15 && model == 6)
148 /* The level 3 cache is encoded for this model like
149 the level 2 cache is for other models. Pretend
150 the caller asked for the level 2 cache. */
151 name = (_SC_LEVEL2_CACHE_SIZE
152 + (name - _SC_LEVEL3_CACHE_SIZE));
153 folded_name = _SC_LEVEL3_CACHE_SIZE;
157 struct intel_02_cache_info *found;
158 struct intel_02_cache_info search;
160 search.idx = byte;
161 found = bsearch (&search, intel_02_known, nintel_02_known,
162 sizeof (intel_02_known[0]), intel_02_known_compare);
163 if (found != NULL)
165 if (found->name == folded_name)
167 unsigned int offset = name - folded_name;
169 if (offset == 0)
170 /* Cache size. */
171 return found->size;
172 if (offset == 1)
173 return found->assoc;
175 assert (offset == 2);
176 return found->linesize;
179 if (found->name == _SC_LEVEL2_CACHE_SIZE)
180 *has_level_2 = true;
184 /* Next byte for the next round. */
185 value >>= 8;
188 /* Nothing found. */
189 return 0;
193 static long int __attribute__ ((noinline))
194 handle_intel (int name, unsigned int maxidx)
196 assert (maxidx >= 2);
198 /* OK, we can use the CPUID instruction to get all info about the
199 caches. */
200 unsigned int cnt = 0;
201 unsigned int max = 1;
202 long int result = 0;
203 bool no_level_2_or_3 = false;
204 bool has_level_2 = false;
206 while (cnt++ < max)
208 unsigned int eax;
209 unsigned int ebx;
210 unsigned int ecx;
211 unsigned int edx;
212 asm volatile ("cpuid"
213 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
214 : "0" (2));
216 /* The low byte of EAX in the first round contain the number of
217 rounds we have to make. At least one, the one we are already
218 doing. */
219 if (cnt == 1)
221 max = eax & 0xff;
222 eax &= 0xffffff00;
225 /* Process the individual registers' value. */
226 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
227 if (result != 0)
228 return result;
230 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
231 if (result != 0)
232 return result;
234 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
235 if (result != 0)
236 return result;
238 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
239 if (result != 0)
240 return result;
243 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
244 && no_level_2_or_3)
245 return -1;
247 return 0;
251 static long int __attribute__ ((noinline))
252 handle_amd (int name)
254 unsigned int eax;
255 unsigned int ebx;
256 unsigned int ecx;
257 unsigned int edx;
258 asm volatile ("cpuid"
259 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
260 : "0" (0x80000000));
262 /* No level 4 cache (yet). */
263 if (name > _SC_LEVEL3_CACHE_LINESIZE)
264 return 0;
266 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
267 if (eax < fn)
268 return 0;
270 asm volatile ("cpuid"
271 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
272 : "0" (fn));
274 if (name < _SC_LEVEL1_DCACHE_SIZE)
276 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
277 ecx = edx;
280 switch (name)
282 case _SC_LEVEL1_DCACHE_SIZE:
283 return (ecx >> 14) & 0x3fc00;
285 case _SC_LEVEL1_DCACHE_ASSOC:
286 ecx >>= 16;
287 if ((ecx & 0xff) == 0xff)
288 /* Fully associative. */
289 return (ecx << 2) & 0x3fc00;
290 return ecx & 0xff;
292 case _SC_LEVEL1_DCACHE_LINESIZE:
293 return ecx & 0xff;
295 case _SC_LEVEL2_CACHE_SIZE:
296 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
298 case _SC_LEVEL2_CACHE_ASSOC:
299 switch ((ecx >> 12) & 0xf)
301 case 0:
302 case 1:
303 case 2:
304 case 4:
305 return (ecx >> 12) & 0xf;
306 case 6:
307 return 8;
308 case 8:
309 return 16;
310 case 10:
311 return 32;
312 case 11:
313 return 48;
314 case 12:
315 return 64;
316 case 13:
317 return 96;
318 case 14:
319 return 128;
320 case 15:
321 return ((ecx >> 6) & 0x3fffc00) / (ecx & 0xff);
322 default:
323 return 0;
325 /* NOTREACHED */
327 case _SC_LEVEL2_CACHE_LINESIZE:
328 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
330 case _SC_LEVEL3_CACHE_SIZE:
331 return (edx & 0xf000) == 0 ? 0 : (edx & 0x3ffc0000) << 1;
333 case _SC_LEVEL3_CACHE_ASSOC:
334 switch ((edx >> 12) & 0xf)
336 case 0:
337 case 1:
338 case 2:
339 case 4:
340 return (edx >> 12) & 0xf;
341 case 6:
342 return 8;
343 case 8:
344 return 16;
345 case 10:
346 return 32;
347 case 11:
348 return 48;
349 case 12:
350 return 64;
351 case 13:
352 return 96;
353 case 14:
354 return 128;
355 case 15:
356 return ((edx & 0x3ffc0000) << 1) / (edx & 0xff);
357 default:
358 return 0;
360 /* NOTREACHED */
362 case _SC_LEVEL3_CACHE_LINESIZE:
363 return (edx & 0xf000) == 0 ? 0 : edx & 0xff;
365 default:
366 assert (! "cannot happen");
368 return -1;
372 /* Get the value of the system variable NAME. */
373 long int
374 attribute_hidden
375 __cache_sysconf (int name)
377 /* Find out what brand of processor. */
378 unsigned int eax;
379 unsigned int ebx;
380 unsigned int ecx;
381 unsigned int edx;
382 asm volatile ("cpuid"
383 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
384 : "0" (0));
386 /* This spells out "GenuineIntel". */
387 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
388 return handle_intel (name, eax);
390 /* This spells out "AuthenticAMD". */
391 if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
392 return handle_amd (name);
394 // XXX Fill in more vendors.
396 /* CPU not known, we have no information. */
397 return 0;
401 /* Half the data cache size for use in memory and string routines, typically
402 L1 size. */
403 long int __x86_64_data_cache_size_half attribute_hidden = 32 * 1024 / 2;
404 /* Shared cache size for use in memory and string routines, typically
405 L2 or L3 size. */
406 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
407 /* PREFETCHW support flag for use in memory and string routines. */
408 int __x86_64_prefetchw attribute_hidden;
411 static void
412 __attribute__((constructor))
413 init_cacheinfo (void)
415 /* Find out what brand of processor. */
416 unsigned int eax;
417 unsigned int ebx;
418 unsigned int ecx;
419 unsigned int edx;
420 int max_cpuid;
421 int max_cpuid_ex;
422 long int data = -1;
423 long int shared = -1;
424 unsigned int level;
425 unsigned int threads = 0;
427 asm volatile ("cpuid"
428 : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
429 : "0" (0));
431 /* This spells out "GenuineIntel". */
432 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
434 data = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
436 /* Try L3 first. */
437 level = 3;
438 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
440 if (shared <= 0)
442 /* Try L2 otherwise. */
443 level = 2;
444 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
447 /* Figure out the number of logical threads that share the
448 highest cache level. */
449 if (max_cpuid >= 4)
451 int i = 0;
453 /* Query until desired cache level is enumerated. */
456 asm volatile ("cpuid"
457 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
458 : "0" (4), "2" (i++));
460 /* There seems to be a bug in at least some Pentium Ds
461 which sometimes fail to iterate all cache parameters.
462 Do not loop indefinitely here, stop in this case and
463 assume there is no such information. */
464 if ((eax & 0x1f) == 0)
465 goto intel_bug_no_cache_info;
467 while (((eax >> 5) & 0x7) != level);
469 threads = ((eax >> 14) & 0x3ff) + 1;
471 else
473 intel_bug_no_cache_info:
474 /* Assume that all logical threads share the highest cache level. */
475 asm volatile ("cpuid"
476 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
477 : "0" (1));
479 threads = (ebx >> 16) & 0xff;
482 /* Cap usage of highest cache level to the number of supported
483 threads. */
484 if (shared > 0 && threads > 0)
485 shared /= threads;
487 /* This spells out "AuthenticAMD". */
488 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
490 data = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
491 long int core = handle_amd (_SC_LEVEL2_CACHE_SIZE);
492 shared = handle_amd (_SC_LEVEL3_CACHE_SIZE);
494 /* Get maximum extended function. */
495 asm volatile ("cpuid"
496 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx)
497 : "0" (0x80000000));
499 if (shared <= 0)
500 /* No shared L3 cache. All we have is the L2 cache. */
501 shared = core;
502 else
504 /* Figure out the number of logical threads that share L3. */
505 if (max_cpuid_ex >= 0x80000008)
507 /* Get width of APIC ID. */
508 asm volatile ("cpuid"
509 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
510 "=d" (edx)
511 : "0" (0x80000008));
512 threads = 1 << ((ecx >> 12) & 0x0f);
515 if (threads == 0)
517 /* If APIC ID width is not available, use logical
518 processor count. */
519 asm volatile ("cpuid"
520 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx),
521 "=d" (edx)
522 : "0" (0x00000001));
524 if ((edx & (1 << 28)) != 0)
525 threads = (ebx >> 16) & 0xff;
528 /* Cap usage of highest cache level to the number of
529 supported threads. */
530 if (threads > 0)
531 shared /= threads;
533 /* Account for exclusive L2 and L3 caches. */
534 shared += core;
537 if (max_cpuid_ex >= 0x80000001)
539 asm volatile ("cpuid"
540 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
541 : "0" (0x80000001));
542 /* PREFETCHW || 3DNow! */
543 if ((ecx & 0x100) || (edx & 0x80000000))
544 __x86_64_prefetchw = -1;
548 if (data > 0)
549 __x86_64_data_cache_size_half = data / 2;
551 if (shared > 0)
552 __x86_64_shared_cache_size_half = shared / 2;