* sysdeps/x86_64/cacheinfo.c (intel_02_known): Add new entries.
[glibc.git] / sysdeps / x86_64 / cacheinfo.c
blob8855b6d45f906a0d3c6948dd5d673a633ecf295e
1 /* x86_64 cache info.
2 Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA.
21 #include <assert.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <unistd.h>
26 static const struct intel_02_cache_info
28 unsigned int idx;
29 int name;
30 long int size;
31 long int assoc;
32 long int linesize;
33 } intel_02_known [] =
35 { 0x06, _SC_LEVEL1_ICACHE_SIZE, 8192, 4, 32 },
36 { 0x08, _SC_LEVEL1_ICACHE_SIZE, 16384, 4, 32 },
37 { 0x0a, _SC_LEVEL1_DCACHE_SIZE, 8192, 2, 32 },
38 { 0x0c, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 32 },
39 { 0x22, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 },
40 { 0x23, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 },
41 { 0x25, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 },
42 { 0x29, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 },
43 { 0x2c, _SC_LEVEL1_DCACHE_SIZE, 32768, 8, 64 },
44 { 0x30, _SC_LEVEL1_ICACHE_SIZE, 32768, 8, 64 },
45 { 0x39, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 64 },
46 { 0x3a, _SC_LEVEL2_CACHE_SIZE, 196608, 6, 64 },
47 { 0x3b, _SC_LEVEL2_CACHE_SIZE, 131072, 2, 64 },
48 { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 },
49 { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 },
50 { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
51 { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 },
52 { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 },
53 { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 },
54 { 0x44, _SC_LEVEL2_CACHE_SIZE, 1048576, 4, 32 },
55 { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 },
56 { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 },
57 { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 },
58 { 0x48, _SC_LEVEL2_CACHE_SIZE, 3145728, 12, 64 },
59 { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 },
60 { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 },
61 { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
62 { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
63 { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
64 { 0x4e, _SC_LEVEL2_CACHE_SIZE, 6291456, 24, 64 },
65 { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 },
66 { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 },
67 { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },
68 { 0x68, _SC_LEVEL1_DCACHE_SIZE, 32768, 4, 64 },
69 { 0x78, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
70 { 0x79, _SC_LEVEL2_CACHE_SIZE, 131072, 8, 64 },
71 { 0x7a, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 64 },
72 { 0x7b, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 64 },
73 { 0x7c, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
74 { 0x7d, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 64 },
75 { 0x7f, _SC_LEVEL2_CACHE_SIZE, 524288, 2, 64 },
76 { 0x82, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 32 },
77 { 0x83, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 32 },
78 { 0x84, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 32 },
79 { 0x85, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 32 },
80 { 0x86, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
81 { 0x87, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
84 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
86 static int
87 intel_02_known_compare (const void *p1, const void *p2)
89 const struct intel_02_cache_info *i1;
90 const struct intel_02_cache_info *i2;
92 i1 = (const struct intel_02_cache_info *) p1;
93 i2 = (const struct intel_02_cache_info *) p2;
95 if (i1->idx == i2->idx)
96 return 0;
98 return i1->idx < i2->idx ? -1 : 1;
102 static long int
103 __attribute__ ((noinline))
104 intel_check_word (int name, unsigned int value, bool *has_level_2,
105 bool *no_level_2_or_3)
107 if ((value & 0x80000000) != 0)
108 /* The register value is reserved. */
109 return 0;
111 /* Fold the name. The _SC_ constants are always in the order SIZE,
112 ASSOC, LINESIZE. */
113 int folded_name = (_SC_LEVEL1_ICACHE_SIZE
114 + ((name - _SC_LEVEL1_ICACHE_SIZE) / 3) * 3);
116 while (value != 0)
118 unsigned int byte = value & 0xff;
120 if (byte == 0x40)
122 *no_level_2_or_3 = true;
124 if (folded_name == _SC_LEVEL3_CACHE_SIZE)
125 /* No need to look further. */
126 break;
128 else
130 if (byte == 0x49 && folded_name == _SC_LEVEL3_CACHE_SIZE)
132 /* Intel reused this value. For family 15, model 6 it
133 specifies the 3rd level cache. Otherwise the 2nd
134 level cache. */
135 unsigned int eax;
136 unsigned int ebx;
137 unsigned int ecx;
138 unsigned int edx;
139 asm volatile ("cpuid"
140 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
141 : "0" (1));
143 unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
144 unsigned int model = ((((eax >>16) & 0xf) << 4)
145 + ((eax >> 4) & 0xf));
146 if (family == 15 && model == 6)
148 /* The level 3 cache is encoded for this model like
149 the level 2 cache is for other models. Pretend
150 the caller asked for the level 2 cache. */
151 name = (_SC_LEVEL2_CACHE_SIZE
152 + (name - _SC_LEVEL3_CACHE_SIZE));
153 folded_name = _SC_LEVEL3_CACHE_SIZE;
157 struct intel_02_cache_info *found;
158 struct intel_02_cache_info search;
160 search.idx = byte;
161 found = bsearch (&search, intel_02_known, nintel_02_known,
162 sizeof (intel_02_known[0]), intel_02_known_compare);
163 if (found != NULL)
165 if (found->name == folded_name)
167 unsigned int offset = name - folded_name;
169 if (offset == 0)
170 /* Cache size. */
171 return found->size;
172 if (offset == 1)
173 return found->assoc;
175 assert (offset == 2);
176 return found->linesize;
179 if (found->name == _SC_LEVEL2_CACHE_SIZE)
180 *has_level_2 = true;
184 /* Next byte for the next round. */
185 value >>= 8;
188 /* Nothing found. */
189 return 0;
193 static long int __attribute__ ((noinline))
194 handle_intel (int name, unsigned int maxidx)
196 assert (maxidx >= 2);
198 /* OK, we can use the CPUID instruction to get all info about the
199 caches. */
200 unsigned int cnt = 0;
201 unsigned int max = 1;
202 long int result = 0;
203 bool no_level_2_or_3 = false;
204 bool has_level_2 = false;
206 while (cnt++ < max)
208 unsigned int eax;
209 unsigned int ebx;
210 unsigned int ecx;
211 unsigned int edx;
212 asm volatile ("cpuid"
213 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
214 : "0" (2));
216 /* The low byte of EAX in the first round contain the number of
217 rounds we have to make. At least one, the one we are already
218 doing. */
219 if (cnt == 1)
221 max = eax & 0xff;
222 eax &= 0xffffff00;
225 /* Process the individual registers' value. */
226 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
227 if (result != 0)
228 return result;
230 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
231 if (result != 0)
232 return result;
234 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
235 if (result != 0)
236 return result;
238 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
239 if (result != 0)
240 return result;
243 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
244 && no_level_2_or_3)
245 return -1;
247 return 0;
251 static long int __attribute__ ((noinline))
252 handle_amd (int name)
254 unsigned int eax;
255 unsigned int ebx;
256 unsigned int ecx;
257 unsigned int edx;
258 asm volatile ("cpuid"
259 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
260 : "0" (0x80000000));
262 if (name >= _SC_LEVEL3_CACHE_SIZE)
263 return 0;
265 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
266 if (eax < fn)
267 return 0;
269 asm volatile ("cpuid"
270 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
271 : "0" (fn));
273 if (name < _SC_LEVEL1_DCACHE_SIZE)
275 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
276 ecx = edx;
279 switch (name)
281 case _SC_LEVEL1_DCACHE_SIZE:
282 return (ecx >> 14) & 0x3fc00;
283 case _SC_LEVEL1_DCACHE_ASSOC:
284 ecx >>= 16;
285 if ((ecx & 0xff) == 0xff)
286 /* Fully associative. */
287 return (ecx << 2) & 0x3fc00;
288 return ecx & 0xff;
289 case _SC_LEVEL1_DCACHE_LINESIZE:
290 return ecx & 0xff;
291 case _SC_LEVEL2_CACHE_SIZE:
292 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
293 case _SC_LEVEL2_CACHE_ASSOC:
294 ecx >>= 12;
295 switch (ecx & 0xf)
297 case 0:
298 case 1:
299 case 2:
300 case 4:
301 return ecx & 0xf;
302 case 6:
303 return 8;
304 case 8:
305 return 16;
306 case 0xf:
307 return (ecx << 6) & 0x3fffc00;
308 default:
309 return 0;
311 case _SC_LEVEL2_CACHE_LINESIZE:
312 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
313 default:
314 assert (! "cannot happen");
316 return -1;
320 /* Get the value of the system variable NAME. */
321 long int
322 attribute_hidden
323 __cache_sysconf (int name)
325 /* Find out what brand of processor. */
326 unsigned int eax;
327 unsigned int ebx;
328 unsigned int ecx;
329 unsigned int edx;
330 asm volatile ("cpuid"
331 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
332 : "0" (0));
334 /* This spells out "GenuineIntel". */
335 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
336 return handle_intel (name, eax);
338 /* This spells out "AuthenticAMD". */
339 if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
340 return handle_amd (name);
342 // XXX Fill in more vendors.
344 /* CPU not known, we have no information. */
345 return 0;
349 /* Half the core cache size for use in memory and string routines, typically
350 L1 size. */
351 long int __x86_64_core_cache_size_half attribute_hidden = 32 * 1024 / 2;
352 /* Shared cache size for use in memory and string routines, typically
353 L2 or L3 size. */
354 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
355 /* PREFETCHW support flag for use in memory and string routines. */
356 int __x86_64_prefetchw attribute_hidden;
359 static void
360 __attribute__((constructor))
361 init_cacheinfo (void)
363 /* Find out what brand of processor. */
364 unsigned int eax;
365 unsigned int ebx;
366 unsigned int ecx;
367 unsigned int edx;
368 int max_cpuid;
369 int max_cpuid_ex;
370 long int core = -1;
371 long int shared = -1;
372 unsigned int level;
373 unsigned int threads = 0;
375 asm volatile ("cpuid"
376 : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
377 : "0" (0));
379 /* This spells out "GenuineIntel". */
380 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
382 core = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
384 /* Try L3 first. */
385 level = 3;
386 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
388 if (shared <= 0)
390 /* Try L2 otherwise. */
391 level = 2;
392 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
395 /* Figure out the number of logical threads that share the
396 highest cache level. */
397 if (max_cpuid >= 4)
399 int i = 0;
401 /* Query until desired cache level is enumerated. */
404 asm volatile ("cpuid"
405 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
406 : "0" (4), "2" (i++));
408 while (((eax >> 5) & 0x7) != level);
410 threads = ((eax >> 14) & 0x3ff) + 1;
412 else
414 /* Assume that all logical threads share the highest cache level. */
415 asm volatile ("cpuid"
416 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
417 : "0" (1));
419 threads = (ebx >> 16) & 0xff;
422 /* Cap usage of highest cache level to the number of supported
423 threads. */
424 if (shared > 0 && threads > 0)
425 shared /= threads;
427 /* This spells out "AuthenticAMD". */
428 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
430 core = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
431 shared = handle_amd (_SC_LEVEL2_CACHE_SIZE);
433 asm volatile ("cpuid"
434 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx)
435 : "0" (0x80000000));
437 if (max_cpuid_ex >= 0x80000001)
439 asm volatile ("cpuid"
440 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
441 : "0" (0x80000001));
442 /* PREFETCHW || 3DNow! */
443 if ((ecx & 0x100) || (edx & 0x80000000))
444 __x86_64_prefetchw = -1;
448 if (core > 0)
449 __x86_64_core_cache_size_half = core / 2;
451 if (shared > 0)
452 __x86_64_shared_cache_size_half = shared / 2;