* sysdeps/x86_64/cacheinfo.c (init_cacheinfo): Pass correct value
[glibc.git] / sysdeps / x86_64 / cacheinfo.c
blobf8217a1757d9beec3bac9a01ba8204519ba774a6
1 /* x86_64 cache info.
2 Copyright (C) 2003, 2004, 2006, 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA.
21 #include <assert.h>
22 #include <stdbool.h>
23 #include <stdlib.h>
24 #include <unistd.h>
26 static const struct intel_02_cache_info
28 unsigned int idx;
29 int name;
30 long int size;
31 long int assoc;
32 long int linesize;
33 } intel_02_known [] =
35 { 0x06, _SC_LEVEL1_ICACHE_SIZE, 8192, 4, 32 },
36 { 0x08, _SC_LEVEL1_ICACHE_SIZE, 16384, 4, 32 },
37 { 0x0a, _SC_LEVEL1_DCACHE_SIZE, 8192, 2, 32 },
38 { 0x0c, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 32 },
39 { 0x22, _SC_LEVEL3_CACHE_SIZE, 524288, 4, 64 },
40 { 0x23, _SC_LEVEL3_CACHE_SIZE, 1048576, 8, 64 },
41 { 0x25, _SC_LEVEL3_CACHE_SIZE, 2097152, 8, 64 },
42 { 0x29, _SC_LEVEL3_CACHE_SIZE, 4194304, 8, 64 },
43 { 0x2c, _SC_LEVEL1_DCACHE_SIZE, 32768, 8, 64 },
44 { 0x30, _SC_LEVEL1_ICACHE_SIZE, 32768, 8, 64 },
45 { 0x39, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 64 },
46 { 0x3a, _SC_LEVEL2_CACHE_SIZE, 196608, 6, 64 },
47 { 0x3b, _SC_LEVEL2_CACHE_SIZE, 131072, 2, 64 },
48 { 0x3c, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 64 },
49 { 0x3d, _SC_LEVEL2_CACHE_SIZE, 393216, 6, 64 },
50 { 0x3e, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
51 { 0x41, _SC_LEVEL2_CACHE_SIZE, 131072, 4, 32 },
52 { 0x42, _SC_LEVEL2_CACHE_SIZE, 262144, 4, 32 },
53 { 0x43, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 32 },
54 { 0x44, _SC_LEVEL2_CACHE_SIZE, 1048576, 4, 32 },
55 { 0x45, _SC_LEVEL2_CACHE_SIZE, 2097152, 4, 32 },
56 { 0x46, _SC_LEVEL3_CACHE_SIZE, 4194304, 4, 64 },
57 { 0x47, _SC_LEVEL3_CACHE_SIZE, 8388608, 8, 64 },
58 { 0x49, _SC_LEVEL2_CACHE_SIZE, 4194304, 16, 64 },
59 { 0x4a, _SC_LEVEL3_CACHE_SIZE, 6291456, 12, 64 },
60 { 0x4b, _SC_LEVEL3_CACHE_SIZE, 8388608, 16, 64 },
61 { 0x4c, _SC_LEVEL3_CACHE_SIZE, 12582912, 12, 64 },
62 { 0x4d, _SC_LEVEL3_CACHE_SIZE, 16777216, 16, 64 },
63 { 0x60, _SC_LEVEL1_DCACHE_SIZE, 16384, 8, 64 },
64 { 0x66, _SC_LEVEL1_DCACHE_SIZE, 8192, 4, 64 },
65 { 0x67, _SC_LEVEL1_DCACHE_SIZE, 16384, 4, 64 },
66 { 0x68, _SC_LEVEL1_DCACHE_SIZE, 32768, 4, 64 },
67 { 0x78, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
68 { 0x79, _SC_LEVEL2_CACHE_SIZE, 131072, 8, 64 },
69 { 0x7a, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 64 },
70 { 0x7b, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 64 },
71 { 0x7c, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
72 { 0x7d, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 64 },
73 { 0x7f, _SC_LEVEL2_CACHE_SIZE, 524288, 2, 64 },
74 { 0x82, _SC_LEVEL2_CACHE_SIZE, 262144, 8, 32 },
75 { 0x83, _SC_LEVEL2_CACHE_SIZE, 524288, 8, 32 },
76 { 0x84, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 32 },
77 { 0x85, _SC_LEVEL2_CACHE_SIZE, 2097152, 8, 32 },
78 { 0x86, _SC_LEVEL2_CACHE_SIZE, 524288, 4, 64 },
79 { 0x87, _SC_LEVEL2_CACHE_SIZE, 1048576, 8, 64 },
82 #define nintel_02_known (sizeof (intel_02_known) / sizeof (intel_02_known [0]))
84 static int
85 intel_02_known_compare (const void *p1, const void *p2)
87 const struct intel_02_cache_info *i1;
88 const struct intel_02_cache_info *i2;
90 i1 = (const struct intel_02_cache_info *) p1;
91 i2 = (const struct intel_02_cache_info *) p2;
93 if (i1->idx == i2->idx)
94 return 0;
96 return i1->idx < i2->idx ? -1 : 1;
100 static long int
101 __attribute__ ((noinline))
102 intel_check_word (int name, unsigned int value, bool *has_level_2,
103 bool *no_level_2_or_3)
105 if ((value & 0x80000000) != 0)
106 /* The register value is reserved. */
107 return 0;
109 /* Fold the name. The _SC_ constants are always in the order SIZE,
110 ASSOC, LINESIZE. */
111 int folded_name = (_SC_LEVEL1_ICACHE_SIZE
112 + ((name - _SC_LEVEL1_ICACHE_SIZE) / 3) * 3);
114 while (value != 0)
116 unsigned int byte = value & 0xff;
118 if (byte == 0x40)
120 *no_level_2_or_3 = true;
122 if (folded_name == _SC_LEVEL3_CACHE_SIZE)
123 /* No need to look further. */
124 break;
126 else
128 if (byte == 0x49 && folded_name == _SC_LEVEL3_CACHE_SIZE)
130 /* Intel reused this value. For family 15, model 6 it
131 specifies the 3rd level cache. Otherwise the 2nd
132 level cache. */
133 unsigned int eax;
134 unsigned int ebx;
135 unsigned int ecx;
136 unsigned int edx;
137 asm volatile ("cpuid"
138 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
139 : "0" (1));
141 unsigned int family = ((eax >> 20) & 0xff) + ((eax >> 8) & 0xf);
142 unsigned int model = ((((eax >>16) & 0xf) << 4)
143 + ((eax >> 4) & 0xf));
144 if (family == 15 && model == 6)
146 /* The level 3 cache is encoded for this model like
147 the level 2 cache is for other models. Pretend
148 the caller asked for the level 2 cache. */
149 name = (_SC_LEVEL2_CACHE_SIZE
150 + (name - _SC_LEVEL3_CACHE_SIZE));
151 folded_name = _SC_LEVEL3_CACHE_SIZE;
155 struct intel_02_cache_info *found;
156 struct intel_02_cache_info search;
158 search.idx = byte;
159 found = bsearch (&search, intel_02_known, nintel_02_known,
160 sizeof (intel_02_known[0]), intel_02_known_compare);
161 if (found != NULL)
163 if (found->name == folded_name)
165 unsigned int offset = name - folded_name;
167 if (offset == 0)
168 /* Cache size. */
169 return found->size;
170 if (offset == 1)
171 return found->assoc;
173 assert (offset == 2);
174 return found->linesize;
177 if (found->name == _SC_LEVEL2_CACHE_SIZE)
178 *has_level_2 = true;
182 /* Next byte for the next round. */
183 value >>= 8;
186 /* Nothing found. */
187 return 0;
191 static long int __attribute__ ((noinline))
192 handle_intel (int name, unsigned int maxidx)
194 assert (maxidx >= 2);
196 /* OK, we can use the CPUID instruction to get all info about the
197 caches. */
198 unsigned int cnt = 0;
199 unsigned int max = 1;
200 long int result = 0;
201 bool no_level_2_or_3 = false;
202 bool has_level_2 = false;
204 while (cnt++ < max)
206 unsigned int eax;
207 unsigned int ebx;
208 unsigned int ecx;
209 unsigned int edx;
210 asm volatile ("cpuid"
211 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
212 : "0" (2));
214 /* The low byte of EAX in the first round contain the number of
215 rounds we have to make. At least one, the one we are already
216 doing. */
217 if (cnt == 1)
219 max = eax & 0xff;
220 eax &= 0xffffff00;
223 /* Process the individual registers' value. */
224 result = intel_check_word (name, eax, &has_level_2, &no_level_2_or_3);
225 if (result != 0)
226 return result;
228 result = intel_check_word (name, ebx, &has_level_2, &no_level_2_or_3);
229 if (result != 0)
230 return result;
232 result = intel_check_word (name, ecx, &has_level_2, &no_level_2_or_3);
233 if (result != 0)
234 return result;
236 result = intel_check_word (name, edx, &has_level_2, &no_level_2_or_3);
237 if (result != 0)
238 return result;
241 if (name >= _SC_LEVEL2_CACHE_SIZE && name <= _SC_LEVEL3_CACHE_LINESIZE
242 && no_level_2_or_3)
243 return -1;
245 return 0;
249 static long int __attribute__ ((noinline))
250 handle_amd (int name)
252 unsigned int eax;
253 unsigned int ebx;
254 unsigned int ecx;
255 unsigned int edx;
256 asm volatile ("cpuid"
257 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
258 : "0" (0x80000000));
260 if (name >= _SC_LEVEL3_CACHE_SIZE)
261 return 0;
263 unsigned int fn = 0x80000005 + (name >= _SC_LEVEL2_CACHE_SIZE);
264 if (eax < fn)
265 return 0;
267 asm volatile ("cpuid"
268 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
269 : "0" (fn));
271 if (name < _SC_LEVEL1_DCACHE_SIZE)
273 name += _SC_LEVEL1_DCACHE_SIZE - _SC_LEVEL1_ICACHE_SIZE;
274 ecx = edx;
277 switch (name)
279 case _SC_LEVEL1_DCACHE_SIZE:
280 return (ecx >> 14) & 0x3fc00;
281 case _SC_LEVEL1_DCACHE_ASSOC:
282 ecx >>= 16;
283 if ((ecx & 0xff) == 0xff)
284 /* Fully associative. */
285 return (ecx << 2) & 0x3fc00;
286 return ecx & 0xff;
287 case _SC_LEVEL1_DCACHE_LINESIZE:
288 return ecx & 0xff;
289 case _SC_LEVEL2_CACHE_SIZE:
290 return (ecx & 0xf000) == 0 ? 0 : (ecx >> 6) & 0x3fffc00;
291 case _SC_LEVEL2_CACHE_ASSOC:
292 ecx >>= 12;
293 switch (ecx & 0xf)
295 case 0:
296 case 1:
297 case 2:
298 case 4:
299 return ecx & 0xf;
300 case 6:
301 return 8;
302 case 8:
303 return 16;
304 case 0xf:
305 return (ecx << 6) & 0x3fffc00;
306 default:
307 return 0;
309 case _SC_LEVEL2_CACHE_LINESIZE:
310 return (ecx & 0xf000) == 0 ? 0 : ecx & 0xff;
311 default:
312 assert (! "cannot happen");
314 return -1;
318 /* Get the value of the system variable NAME. */
319 long int
320 attribute_hidden
321 __cache_sysconf (int name)
323 /* Find out what brand of processor. */
324 unsigned int eax;
325 unsigned int ebx;
326 unsigned int ecx;
327 unsigned int edx;
328 asm volatile ("cpuid"
329 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
330 : "0" (0));
332 /* This spells out "GenuineIntel". */
333 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
334 return handle_intel (name, eax);
336 /* This spells out "AuthenticAMD". */
337 if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
338 return handle_amd (name);
340 // XXX Fill in more vendors.
342 /* CPU not known, we have no information. */
343 return 0;
347 /* Half the core cache size for use in memory and string routines, typically
348 L1 size. */
349 long int __x86_64_core_cache_size_half attribute_hidden = 32 * 1024 / 2;
350 /* Shared cache size for use in memory and string routines, typically
351 L2 or L3 size. */
352 long int __x86_64_shared_cache_size_half attribute_hidden = 1024 * 1024 / 2;
353 /* PREFETCHW support flag for use in memory and string routines. */
354 int __x86_64_prefetchw attribute_hidden;
357 static void
358 __attribute__((constructor))
359 init_cacheinfo (void)
361 /* Find out what brand of processor. */
362 unsigned int eax;
363 unsigned int ebx;
364 unsigned int ecx;
365 unsigned int edx;
366 int max_cpuid;
367 int max_cpuid_ex;
368 long int core = -1;
369 long int shared = -1;
370 unsigned int level;
371 unsigned int threads = 0;
373 asm volatile ("cpuid"
374 : "=a" (max_cpuid), "=b" (ebx), "=c" (ecx), "=d" (edx)
375 : "0" (0));
377 /* This spells out "GenuineIntel". */
378 if (ebx == 0x756e6547 && ecx == 0x6c65746e && edx == 0x49656e69)
380 core = handle_intel (_SC_LEVEL1_DCACHE_SIZE, max_cpuid);
382 /* Try L3 first. */
383 level = 3;
384 shared = handle_intel (_SC_LEVEL3_CACHE_SIZE, max_cpuid);
386 if (shared <= 0)
388 /* Try L2 otherwise. */
389 level = 2;
390 shared = handle_intel (_SC_LEVEL2_CACHE_SIZE, max_cpuid);
393 /* Figure out the number of logical threads that share the
394 highest cache level. */
395 if (max_cpuid >= 4)
397 int i = 0;
399 /* Query until desired cache level is enumerated. */
402 asm volatile ("cpuid"
403 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
404 : "0" (4), "2" (i++));
406 while (((eax >> 5) & 0x7) != level);
408 threads = ((eax >> 14) & 0x3ff) + 1;
410 else
412 /* Assume that all logical threads share the highest cache level. */
413 asm volatile ("cpuid"
414 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
415 : "0" (1));
417 threads = (ebx >> 16) & 0xff;
420 /* Cap usage of highest cache level to the number of supported
421 threads. */
422 if (shared > 0 && threads > 0)
423 shared /= threads;
425 /* This spells out "AuthenticAMD". */
426 else if (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65)
428 core = handle_amd (_SC_LEVEL1_DCACHE_SIZE);
429 shared = handle_amd (_SC_LEVEL2_CACHE_SIZE);
431 asm volatile ("cpuid"
432 : "=a" (max_cpuid_ex), "=b" (ebx), "=c" (ecx), "=d" (edx)
433 : "0" (0x80000000));
435 if (max_cpuid_ex >= 0x80000001)
437 asm volatile ("cpuid"
438 : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
439 : "0" (0x80000001));
440 /* PREFETCHW || 3DNow! */
441 if ((ecx & 0x100) || (edx & 0x80000000))
442 __x86_64_prefetchw = -1;
446 if (core > 0)
447 __x86_64_core_cache_size_half = core / 2;
449 if (shared > 0)
450 __x86_64_shared_cache_size_half = shared / 2;