perf: Fix race in callchains
[linux-2.6/x86.git] / arch / x86 / kernel / cpu / intel_cacheinfo.c
blob33eae2062cf55b06c864ad8511062440f37f10fe
1 /*
2 * Routines to indentify caches on Intel CPU.
4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/device.h>
13 #include <linux/compiler.h>
14 #include <linux/cpu.h>
15 #include <linux/sched.h>
16 #include <linux/pci.h>
18 #include <asm/processor.h>
19 #include <linux/smp.h>
20 #include <asm/k8.h>
21 #include <asm/smp.h>
23 #define LVL_1_INST 1
24 #define LVL_1_DATA 2
25 #define LVL_2 3
26 #define LVL_3 4
27 #define LVL_TRACE 5
29 struct _cache_table {
30 unsigned char descriptor;
31 char cache_type;
32 short size;
35 #define MB(x) ((x) * 1024)
37 /* All the cache descriptor types we care about (no TLB or
38 trace cache entries) */
40 static const struct _cache_table __cpuinitconst cache_table[] =
42 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
43 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
44 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
45 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
46 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
47 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
48 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
49 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
50 { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
51 { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */
52 { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */
53 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
54 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
55 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
56 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
57 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
58 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
59 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
60 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
61 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
62 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
63 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
64 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
65 { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */
66 { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */
67 { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */
68 { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */
69 { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
70 { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */
71 { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
72 { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */
73 { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */
74 { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */
75 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
76 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
77 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
78 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
79 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
80 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
81 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
82 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
83 { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */
84 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
85 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
86 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
87 { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */
88 { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */
89 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
90 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
91 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
92 { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */
93 { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */
94 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
95 { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */
96 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
97 { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */
98 { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */
99 { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */
100 { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */
101 { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
102 { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */
103 { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */
104 { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */
105 { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */
106 { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */
107 { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */
108 { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */
109 { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */
110 { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */
111 { 0x00, 0, 0}
115 enum _cache_type {
116 CACHE_TYPE_NULL = 0,
117 CACHE_TYPE_DATA = 1,
118 CACHE_TYPE_INST = 2,
119 CACHE_TYPE_UNIFIED = 3
122 union _cpuid4_leaf_eax {
123 struct {
124 enum _cache_type type:5;
125 unsigned int level:3;
126 unsigned int is_self_initializing:1;
127 unsigned int is_fully_associative:1;
128 unsigned int reserved:4;
129 unsigned int num_threads_sharing:12;
130 unsigned int num_cores_on_die:6;
131 } split;
132 u32 full;
135 union _cpuid4_leaf_ebx {
136 struct {
137 unsigned int coherency_line_size:12;
138 unsigned int physical_line_partition:10;
139 unsigned int ways_of_associativity:10;
140 } split;
141 u32 full;
144 union _cpuid4_leaf_ecx {
145 struct {
146 unsigned int number_of_sets:32;
147 } split;
148 u32 full;
151 struct amd_l3_cache {
152 struct pci_dev *dev;
153 bool can_disable;
154 unsigned indices;
155 u8 subcaches[4];
158 struct _cpuid4_info {
159 union _cpuid4_leaf_eax eax;
160 union _cpuid4_leaf_ebx ebx;
161 union _cpuid4_leaf_ecx ecx;
162 unsigned long size;
163 struct amd_l3_cache *l3;
164 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
167 /* subset of above _cpuid4_info w/o shared_cpu_map */
168 struct _cpuid4_info_regs {
169 union _cpuid4_leaf_eax eax;
170 union _cpuid4_leaf_ebx ebx;
171 union _cpuid4_leaf_ecx ecx;
172 unsigned long size;
173 struct amd_l3_cache *l3;
176 unsigned short num_cache_leaves;
178 /* AMD doesn't have CPUID4. Emulate it here to report the same
179 information to the user. This makes some assumptions about the machine:
180 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
182 In theory the TLBs could be reported as fake type (they are in "dummy").
183 Maybe later */
184 union l1_cache {
185 struct {
186 unsigned line_size:8;
187 unsigned lines_per_tag:8;
188 unsigned assoc:8;
189 unsigned size_in_kb:8;
191 unsigned val;
194 union l2_cache {
195 struct {
196 unsigned line_size:8;
197 unsigned lines_per_tag:4;
198 unsigned assoc:4;
199 unsigned size_in_kb:16;
201 unsigned val;
204 union l3_cache {
205 struct {
206 unsigned line_size:8;
207 unsigned lines_per_tag:4;
208 unsigned assoc:4;
209 unsigned res:2;
210 unsigned size_encoded:14;
212 unsigned val;
215 static const unsigned short __cpuinitconst assocs[] = {
216 [1] = 1,
217 [2] = 2,
218 [4] = 4,
219 [6] = 8,
220 [8] = 16,
221 [0xa] = 32,
222 [0xb] = 48,
223 [0xc] = 64,
224 [0xd] = 96,
225 [0xe] = 128,
226 [0xf] = 0xffff /* fully associative - no way to show this currently */
229 static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
230 static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
232 static void __cpuinit
233 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
234 union _cpuid4_leaf_ebx *ebx,
235 union _cpuid4_leaf_ecx *ecx)
237 unsigned dummy;
238 unsigned line_size, lines_per_tag, assoc, size_in_kb;
239 union l1_cache l1i, l1d;
240 union l2_cache l2;
241 union l3_cache l3;
242 union l1_cache *l1 = &l1d;
244 eax->full = 0;
245 ebx->full = 0;
246 ecx->full = 0;
248 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
249 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
251 switch (leaf) {
252 case 1:
253 l1 = &l1i;
254 case 0:
255 if (!l1->val)
256 return;
257 assoc = assocs[l1->assoc];
258 line_size = l1->line_size;
259 lines_per_tag = l1->lines_per_tag;
260 size_in_kb = l1->size_in_kb;
261 break;
262 case 2:
263 if (!l2.val)
264 return;
265 assoc = assocs[l2.assoc];
266 line_size = l2.line_size;
267 lines_per_tag = l2.lines_per_tag;
268 /* cpu_data has errata corrections for K7 applied */
269 size_in_kb = current_cpu_data.x86_cache_size;
270 break;
271 case 3:
272 if (!l3.val)
273 return;
274 assoc = assocs[l3.assoc];
275 line_size = l3.line_size;
276 lines_per_tag = l3.lines_per_tag;
277 size_in_kb = l3.size_encoded * 512;
278 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
279 size_in_kb = size_in_kb >> 1;
280 assoc = assoc >> 1;
282 break;
283 default:
284 return;
287 eax->split.is_self_initializing = 1;
288 eax->split.type = types[leaf];
289 eax->split.level = levels[leaf];
290 eax->split.num_threads_sharing = 0;
291 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
294 if (assoc == 0xffff)
295 eax->split.is_fully_associative = 1;
296 ebx->split.coherency_line_size = line_size - 1;
297 ebx->split.ways_of_associativity = assoc - 1;
298 ebx->split.physical_line_partition = lines_per_tag - 1;
299 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
300 (ebx->split.ways_of_associativity + 1) - 1;
303 struct _cache_attr {
304 struct attribute attr;
305 ssize_t (*show)(struct _cpuid4_info *, char *);
306 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
309 #ifdef CONFIG_CPU_SUP_AMD
312 * L3 cache descriptors
314 static struct amd_l3_cache **__cpuinitdata l3_caches;
316 static void __cpuinit amd_calc_l3_indices(struct amd_l3_cache *l3)
318 unsigned int sc0, sc1, sc2, sc3;
319 u32 val = 0;
321 pci_read_config_dword(l3->dev, 0x1C4, &val);
323 /* calculate subcache sizes */
324 l3->subcaches[0] = sc0 = !(val & BIT(0));
325 l3->subcaches[1] = sc1 = !(val & BIT(4));
326 l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9));
327 l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13));
329 l3->indices = (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
332 static struct amd_l3_cache * __cpuinit amd_init_l3_cache(int node)
334 struct amd_l3_cache *l3;
335 struct pci_dev *dev = node_to_k8_nb_misc(node);
337 l3 = kzalloc(sizeof(struct amd_l3_cache), GFP_ATOMIC);
338 if (!l3) {
339 printk(KERN_WARNING "Error allocating L3 struct\n");
340 return NULL;
343 l3->dev = dev;
345 amd_calc_l3_indices(l3);
347 return l3;
350 static void __cpuinit
351 amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
353 int node;
355 if (boot_cpu_data.x86 != 0x10)
356 return;
358 if (index < 3)
359 return;
361 /* see errata #382 and #388 */
362 if (boot_cpu_data.x86_model < 0x8)
363 return;
365 if ((boot_cpu_data.x86_model == 0x8 ||
366 boot_cpu_data.x86_model == 0x9)
368 boot_cpu_data.x86_mask < 0x1)
369 return;
371 /* not in virtualized environments */
372 if (num_k8_northbridges == 0)
373 return;
376 * Strictly speaking, the amount in @size below is leaked since it is
377 * never freed but this is done only on shutdown so it doesn't matter.
379 if (!l3_caches) {
380 int size = num_k8_northbridges * sizeof(struct amd_l3_cache *);
382 l3_caches = kzalloc(size, GFP_ATOMIC);
383 if (!l3_caches)
384 return;
387 node = amd_get_nb_id(smp_processor_id());
389 if (!l3_caches[node]) {
390 l3_caches[node] = amd_init_l3_cache(node);
391 l3_caches[node]->can_disable = true;
394 WARN_ON(!l3_caches[node]);
396 this_leaf->l3 = l3_caches[node];
399 static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
400 unsigned int slot)
402 struct pci_dev *dev = this_leaf->l3->dev;
403 unsigned int reg = 0;
405 if (!this_leaf->l3 || !this_leaf->l3->can_disable)
406 return -EINVAL;
408 if (!dev)
409 return -EINVAL;
411 pci_read_config_dword(dev, 0x1BC + slot * 4, &reg);
412 return sprintf(buf, "0x%08x\n", reg);
415 #define SHOW_CACHE_DISABLE(slot) \
416 static ssize_t \
417 show_cache_disable_##slot(struct _cpuid4_info *this_leaf, char *buf) \
419 return show_cache_disable(this_leaf, buf, slot); \
421 SHOW_CACHE_DISABLE(0)
422 SHOW_CACHE_DISABLE(1)
424 static void amd_l3_disable_index(struct amd_l3_cache *l3, int cpu,
425 unsigned slot, unsigned long idx)
427 int i;
429 idx |= BIT(30);
432 * disable index in all 4 subcaches
434 for (i = 0; i < 4; i++) {
435 u32 reg = idx | (i << 20);
437 if (!l3->subcaches[i])
438 continue;
440 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
443 * We need to WBINVD on a core on the node containing the L3
444 * cache which indices we disable therefore a simple wbinvd()
445 * is not sufficient.
447 wbinvd_on_cpu(cpu);
449 reg |= BIT(31);
450 pci_write_config_dword(l3->dev, 0x1BC + slot * 4, reg);
455 static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
456 const char *buf, size_t count,
457 unsigned int slot)
459 struct pci_dev *dev = this_leaf->l3->dev;
460 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
461 unsigned long val = 0;
463 #define SUBCACHE_MASK (3UL << 20)
464 #define SUBCACHE_INDEX 0xfff
466 if (!this_leaf->l3 || !this_leaf->l3->can_disable)
467 return -EINVAL;
469 if (!capable(CAP_SYS_ADMIN))
470 return -EPERM;
472 if (!dev)
473 return -EINVAL;
475 if (strict_strtoul(buf, 10, &val) < 0)
476 return -EINVAL;
478 /* do not allow writes outside of allowed bits */
479 if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
480 ((val & SUBCACHE_INDEX) > this_leaf->l3->indices))
481 return -EINVAL;
483 amd_l3_disable_index(this_leaf->l3, cpu, slot, val);
485 return count;
488 #define STORE_CACHE_DISABLE(slot) \
489 static ssize_t \
490 store_cache_disable_##slot(struct _cpuid4_info *this_leaf, \
491 const char *buf, size_t count) \
493 return store_cache_disable(this_leaf, buf, count, slot); \
495 STORE_CACHE_DISABLE(0)
496 STORE_CACHE_DISABLE(1)
498 static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
499 show_cache_disable_0, store_cache_disable_0);
500 static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
501 show_cache_disable_1, store_cache_disable_1);
503 #else /* CONFIG_CPU_SUP_AMD */
504 static void __cpuinit
505 amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
508 #endif /* CONFIG_CPU_SUP_AMD */
510 static int
511 __cpuinit cpuid4_cache_lookup_regs(int index,
512 struct _cpuid4_info_regs *this_leaf)
514 union _cpuid4_leaf_eax eax;
515 union _cpuid4_leaf_ebx ebx;
516 union _cpuid4_leaf_ecx ecx;
517 unsigned edx;
519 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
520 amd_cpuid4(index, &eax, &ebx, &ecx);
521 amd_check_l3_disable(index, this_leaf);
522 } else {
523 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
526 if (eax.split.type == CACHE_TYPE_NULL)
527 return -EIO; /* better error ? */
529 this_leaf->eax = eax;
530 this_leaf->ebx = ebx;
531 this_leaf->ecx = ecx;
532 this_leaf->size = (ecx.split.number_of_sets + 1) *
533 (ebx.split.coherency_line_size + 1) *
534 (ebx.split.physical_line_partition + 1) *
535 (ebx.split.ways_of_associativity + 1);
536 return 0;
539 static int __cpuinit find_num_cache_leaves(void)
541 unsigned int eax, ebx, ecx, edx;
542 union _cpuid4_leaf_eax cache_eax;
543 int i = -1;
545 do {
546 ++i;
547 /* Do cpuid(4) loop to find out num_cache_leaves */
548 cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
549 cache_eax.full = eax;
550 } while (cache_eax.split.type != CACHE_TYPE_NULL);
551 return i;
554 unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
556 /* Cache sizes */
557 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
558 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
559 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
560 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
561 #ifdef CONFIG_X86_HT
562 unsigned int cpu = c->cpu_index;
563 #endif
565 if (c->cpuid_level > 3) {
566 static int is_initialized;
568 if (is_initialized == 0) {
569 /* Init num_cache_leaves from boot CPU */
570 num_cache_leaves = find_num_cache_leaves();
571 is_initialized++;
575 * Whenever possible use cpuid(4), deterministic cache
576 * parameters cpuid leaf to find the cache details
578 for (i = 0; i < num_cache_leaves; i++) {
579 struct _cpuid4_info_regs this_leaf;
580 int retval;
582 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
583 if (retval >= 0) {
584 switch (this_leaf.eax.split.level) {
585 case 1:
586 if (this_leaf.eax.split.type ==
587 CACHE_TYPE_DATA)
588 new_l1d = this_leaf.size/1024;
589 else if (this_leaf.eax.split.type ==
590 CACHE_TYPE_INST)
591 new_l1i = this_leaf.size/1024;
592 break;
593 case 2:
594 new_l2 = this_leaf.size/1024;
595 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
596 index_msb = get_count_order(num_threads_sharing);
597 l2_id = c->apicid >> index_msb;
598 break;
599 case 3:
600 new_l3 = this_leaf.size/1024;
601 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
602 index_msb = get_count_order(
603 num_threads_sharing);
604 l3_id = c->apicid >> index_msb;
605 break;
606 default:
607 break;
613 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
614 * trace cache
616 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
617 /* supports eax=2 call */
618 int j, n;
619 unsigned int regs[4];
620 unsigned char *dp = (unsigned char *)regs;
621 int only_trace = 0;
623 if (num_cache_leaves != 0 && c->x86 == 15)
624 only_trace = 1;
626 /* Number of times to iterate */
627 n = cpuid_eax(2) & 0xFF;
629 for (i = 0 ; i < n ; i++) {
630 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
632 /* If bit 31 is set, this is an unknown format */
633 for (j = 0 ; j < 3 ; j++)
634 if (regs[j] & (1 << 31))
635 regs[j] = 0;
637 /* Byte 0 is level count, not a descriptor */
638 for (j = 1 ; j < 16 ; j++) {
639 unsigned char des = dp[j];
640 unsigned char k = 0;
642 /* look up this descriptor in the table */
643 while (cache_table[k].descriptor != 0) {
644 if (cache_table[k].descriptor == des) {
645 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
646 break;
647 switch (cache_table[k].cache_type) {
648 case LVL_1_INST:
649 l1i += cache_table[k].size;
650 break;
651 case LVL_1_DATA:
652 l1d += cache_table[k].size;
653 break;
654 case LVL_2:
655 l2 += cache_table[k].size;
656 break;
657 case LVL_3:
658 l3 += cache_table[k].size;
659 break;
660 case LVL_TRACE:
661 trace += cache_table[k].size;
662 break;
665 break;
668 k++;
674 if (new_l1d)
675 l1d = new_l1d;
677 if (new_l1i)
678 l1i = new_l1i;
680 if (new_l2) {
681 l2 = new_l2;
682 #ifdef CONFIG_X86_HT
683 per_cpu(cpu_llc_id, cpu) = l2_id;
684 #endif
687 if (new_l3) {
688 l3 = new_l3;
689 #ifdef CONFIG_X86_HT
690 per_cpu(cpu_llc_id, cpu) = l3_id;
691 #endif
694 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
696 return l2;
699 #ifdef CONFIG_SYSFS
701 /* pointer to _cpuid4_info array (for each cache leaf) */
702 static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
703 #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
705 #ifdef CONFIG_SMP
706 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
708 struct _cpuid4_info *this_leaf, *sibling_leaf;
709 unsigned long num_threads_sharing;
710 int index_msb, i, sibling;
711 struct cpuinfo_x86 *c = &cpu_data(cpu);
713 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
714 for_each_cpu(i, c->llc_shared_map) {
715 if (!per_cpu(ici_cpuid4_info, i))
716 continue;
717 this_leaf = CPUID4_INFO_IDX(i, index);
718 for_each_cpu(sibling, c->llc_shared_map) {
719 if (!cpu_online(sibling))
720 continue;
721 set_bit(sibling, this_leaf->shared_cpu_map);
724 return;
726 this_leaf = CPUID4_INFO_IDX(cpu, index);
727 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
729 if (num_threads_sharing == 1)
730 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
731 else {
732 index_msb = get_count_order(num_threads_sharing);
734 for_each_online_cpu(i) {
735 if (cpu_data(i).apicid >> index_msb ==
736 c->apicid >> index_msb) {
737 cpumask_set_cpu(i,
738 to_cpumask(this_leaf->shared_cpu_map));
739 if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
740 sibling_leaf =
741 CPUID4_INFO_IDX(i, index);
742 cpumask_set_cpu(cpu, to_cpumask(
743 sibling_leaf->shared_cpu_map));
749 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
751 struct _cpuid4_info *this_leaf, *sibling_leaf;
752 int sibling;
754 this_leaf = CPUID4_INFO_IDX(cpu, index);
755 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
756 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
757 cpumask_clear_cpu(cpu,
758 to_cpumask(sibling_leaf->shared_cpu_map));
761 #else
762 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
766 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
769 #endif
771 static void __cpuinit free_cache_attributes(unsigned int cpu)
773 int i;
775 for (i = 0; i < num_cache_leaves; i++)
776 cache_remove_shared_cpu_map(cpu, i);
778 kfree(per_cpu(ici_cpuid4_info, cpu)->l3);
779 kfree(per_cpu(ici_cpuid4_info, cpu));
780 per_cpu(ici_cpuid4_info, cpu) = NULL;
783 static int
784 __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
786 struct _cpuid4_info_regs *leaf_regs =
787 (struct _cpuid4_info_regs *)this_leaf;
789 return cpuid4_cache_lookup_regs(index, leaf_regs);
792 static void __cpuinit get_cpu_leaves(void *_retval)
794 int j, *retval = _retval, cpu = smp_processor_id();
796 /* Do cpuid and store the results */
797 for (j = 0; j < num_cache_leaves; j++) {
798 struct _cpuid4_info *this_leaf;
799 this_leaf = CPUID4_INFO_IDX(cpu, j);
800 *retval = cpuid4_cache_lookup(j, this_leaf);
801 if (unlikely(*retval < 0)) {
802 int i;
804 for (i = 0; i < j; i++)
805 cache_remove_shared_cpu_map(cpu, i);
806 break;
808 cache_shared_cpu_map_setup(cpu, j);
812 static int __cpuinit detect_cache_attributes(unsigned int cpu)
814 int retval;
816 if (num_cache_leaves == 0)
817 return -ENOENT;
819 per_cpu(ici_cpuid4_info, cpu) = kzalloc(
820 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
821 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
822 return -ENOMEM;
824 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
825 if (retval) {
826 kfree(per_cpu(ici_cpuid4_info, cpu));
827 per_cpu(ici_cpuid4_info, cpu) = NULL;
830 return retval;
833 #include <linux/kobject.h>
834 #include <linux/sysfs.h>
836 extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
838 /* pointer to kobject for cpuX/cache */
839 static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
841 struct _index_kobject {
842 struct kobject kobj;
843 unsigned int cpu;
844 unsigned short index;
847 /* pointer to array of kobjects for cpuX/cache/indexY */
848 static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
849 #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
851 #define show_one_plus(file_name, object, val) \
852 static ssize_t show_##file_name \
853 (struct _cpuid4_info *this_leaf, char *buf) \
855 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
858 show_one_plus(level, eax.split.level, 0);
859 show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
860 show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
861 show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
862 show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
864 static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
866 return sprintf(buf, "%luK\n", this_leaf->size / 1024);
869 static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
870 int type, char *buf)
872 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
873 int n = 0;
875 if (len > 1) {
876 const struct cpumask *mask;
878 mask = to_cpumask(this_leaf->shared_cpu_map);
879 n = type ?
880 cpulist_scnprintf(buf, len-2, mask) :
881 cpumask_scnprintf(buf, len-2, mask);
882 buf[n++] = '\n';
883 buf[n] = '\0';
885 return n;
888 static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
890 return show_shared_cpu_map_func(leaf, 0, buf);
893 static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
895 return show_shared_cpu_map_func(leaf, 1, buf);
898 static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
900 switch (this_leaf->eax.split.type) {
901 case CACHE_TYPE_DATA:
902 return sprintf(buf, "Data\n");
903 case CACHE_TYPE_INST:
904 return sprintf(buf, "Instruction\n");
905 case CACHE_TYPE_UNIFIED:
906 return sprintf(buf, "Unified\n");
907 default:
908 return sprintf(buf, "Unknown\n");
912 #define to_object(k) container_of(k, struct _index_kobject, kobj)
913 #define to_attr(a) container_of(a, struct _cache_attr, attr)
915 #define define_one_ro(_name) \
916 static struct _cache_attr _name = \
917 __ATTR(_name, 0444, show_##_name, NULL)
919 define_one_ro(level);
920 define_one_ro(type);
921 define_one_ro(coherency_line_size);
922 define_one_ro(physical_line_partition);
923 define_one_ro(ways_of_associativity);
924 define_one_ro(number_of_sets);
925 define_one_ro(size);
926 define_one_ro(shared_cpu_map);
927 define_one_ro(shared_cpu_list);
929 #define DEFAULT_SYSFS_CACHE_ATTRS \
930 &type.attr, \
931 &level.attr, \
932 &coherency_line_size.attr, \
933 &physical_line_partition.attr, \
934 &ways_of_associativity.attr, \
935 &number_of_sets.attr, \
936 &size.attr, \
937 &shared_cpu_map.attr, \
938 &shared_cpu_list.attr
940 static struct attribute *default_attrs[] = {
941 DEFAULT_SYSFS_CACHE_ATTRS,
942 NULL
945 static struct attribute *default_l3_attrs[] = {
946 DEFAULT_SYSFS_CACHE_ATTRS,
947 #ifdef CONFIG_CPU_SUP_AMD
948 &cache_disable_0.attr,
949 &cache_disable_1.attr,
950 #endif
951 NULL
954 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
956 struct _cache_attr *fattr = to_attr(attr);
957 struct _index_kobject *this_leaf = to_object(kobj);
958 ssize_t ret;
960 ret = fattr->show ?
961 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
962 buf) :
964 return ret;
967 static ssize_t store(struct kobject *kobj, struct attribute *attr,
968 const char *buf, size_t count)
970 struct _cache_attr *fattr = to_attr(attr);
971 struct _index_kobject *this_leaf = to_object(kobj);
972 ssize_t ret;
974 ret = fattr->store ?
975 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
976 buf, count) :
978 return ret;
981 static const struct sysfs_ops sysfs_ops = {
982 .show = show,
983 .store = store,
986 static struct kobj_type ktype_cache = {
987 .sysfs_ops = &sysfs_ops,
988 .default_attrs = default_attrs,
991 static struct kobj_type ktype_percpu_entry = {
992 .sysfs_ops = &sysfs_ops,
995 static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
997 kfree(per_cpu(ici_cache_kobject, cpu));
998 kfree(per_cpu(ici_index_kobject, cpu));
999 per_cpu(ici_cache_kobject, cpu) = NULL;
1000 per_cpu(ici_index_kobject, cpu) = NULL;
1001 free_cache_attributes(cpu);
1004 static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
1006 int err;
1008 if (num_cache_leaves == 0)
1009 return -ENOENT;
1011 err = detect_cache_attributes(cpu);
1012 if (err)
1013 return err;
1015 /* Allocate all required memory */
1016 per_cpu(ici_cache_kobject, cpu) =
1017 kzalloc(sizeof(struct kobject), GFP_KERNEL);
1018 if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
1019 goto err_out;
1021 per_cpu(ici_index_kobject, cpu) = kzalloc(
1022 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
1023 if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
1024 goto err_out;
1026 return 0;
1028 err_out:
1029 cpuid4_cache_sysfs_exit(cpu);
1030 return -ENOMEM;
1033 static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
1035 /* Add/Remove cache interface for CPU device */
1036 static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
1038 unsigned int cpu = sys_dev->id;
1039 unsigned long i, j;
1040 struct _index_kobject *this_object;
1041 struct _cpuid4_info *this_leaf;
1042 int retval;
1044 retval = cpuid4_cache_sysfs_init(cpu);
1045 if (unlikely(retval < 0))
1046 return retval;
1048 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
1049 &ktype_percpu_entry,
1050 &sys_dev->kobj, "%s", "cache");
1051 if (retval < 0) {
1052 cpuid4_cache_sysfs_exit(cpu);
1053 return retval;
1056 for (i = 0; i < num_cache_leaves; i++) {
1057 this_object = INDEX_KOBJECT_PTR(cpu, i);
1058 this_object->cpu = cpu;
1059 this_object->index = i;
1061 this_leaf = CPUID4_INFO_IDX(cpu, i);
1063 if (this_leaf->l3 && this_leaf->l3->can_disable)
1064 ktype_cache.default_attrs = default_l3_attrs;
1065 else
1066 ktype_cache.default_attrs = default_attrs;
1068 retval = kobject_init_and_add(&(this_object->kobj),
1069 &ktype_cache,
1070 per_cpu(ici_cache_kobject, cpu),
1071 "index%1lu", i);
1072 if (unlikely(retval)) {
1073 for (j = 0; j < i; j++)
1074 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
1075 kobject_put(per_cpu(ici_cache_kobject, cpu));
1076 cpuid4_cache_sysfs_exit(cpu);
1077 return retval;
1079 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
1081 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
1083 kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
1084 return 0;
1087 static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
1089 unsigned int cpu = sys_dev->id;
1090 unsigned long i;
1092 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1093 return;
1094 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1095 return;
1096 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1098 for (i = 0; i < num_cache_leaves; i++)
1099 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1100 kobject_put(per_cpu(ici_cache_kobject, cpu));
1101 cpuid4_cache_sysfs_exit(cpu);
1104 static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1105 unsigned long action, void *hcpu)
1107 unsigned int cpu = (unsigned long)hcpu;
1108 struct sys_device *sys_dev;
1110 sys_dev = get_cpu_sysdev(cpu);
1111 switch (action) {
1112 case CPU_ONLINE:
1113 case CPU_ONLINE_FROZEN:
1114 cache_add_dev(sys_dev);
1115 break;
1116 case CPU_DEAD:
1117 case CPU_DEAD_FROZEN:
1118 cache_remove_dev(sys_dev);
1119 break;
1121 return NOTIFY_OK;
1124 static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1125 .notifier_call = cacheinfo_cpu_callback,
1128 static int __cpuinit cache_sysfs_init(void)
1130 int i;
1132 if (num_cache_leaves == 0)
1133 return 0;
1135 for_each_online_cpu(i) {
1136 int err;
1137 struct sys_device *sys_dev = get_cpu_sysdev(i);
1139 err = cache_add_dev(sys_dev);
1140 if (err)
1141 return err;
1143 register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1144 return 0;
1147 device_initcall(cache_sysfs_init);
1149 #endif