x86, cacheinfo: Remove NUMA dependency, fix for AMD Fam10h rev D1
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / arch / x86 / kernel / cpu / intel_cacheinfo.c
blobbe5f5c28ddfbfb52aaab081f426055ead7b5cba6
1 /*
2 * Routines to indentify caches on Intel CPU.
4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */
10 #include <linux/init.h>
11 #include <linux/slab.h>
12 #include <linux/device.h>
13 #include <linux/compiler.h>
14 #include <linux/cpu.h>
15 #include <linux/sched.h>
16 #include <linux/pci.h>
18 #include <asm/processor.h>
19 #include <linux/smp.h>
20 #include <asm/k8.h>
21 #include <asm/smp.h>
23 #define LVL_1_INST 1
24 #define LVL_1_DATA 2
25 #define LVL_2 3
26 #define LVL_3 4
27 #define LVL_TRACE 5
29 struct _cache_table {
30 unsigned char descriptor;
31 char cache_type;
32 short size;
35 /* All the cache descriptor types we care about (no TLB or
36 trace cache entries) */
38 static const struct _cache_table __cpuinitconst cache_table[] =
40 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
41 { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */
42 { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */
43 { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */
44 { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */
45 { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */
46 { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */
47 { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
48 { 0x23, LVL_3, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
49 { 0x25, LVL_3, 2048 }, /* 8-way set assoc, sectored cache, 64 byte line size */
50 { 0x29, LVL_3, 4096 }, /* 8-way set assoc, sectored cache, 64 byte line size */
51 { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */
52 { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */
53 { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */
54 { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */
55 { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */
56 { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */
57 { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */
58 { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */
59 { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */
60 { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */
61 { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */
62 { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */
63 { 0x44, LVL_2, 1024 }, /* 4-way set assoc, 32 byte line size */
64 { 0x45, LVL_2, 2048 }, /* 4-way set assoc, 32 byte line size */
65 { 0x46, LVL_3, 4096 }, /* 4-way set assoc, 64 byte line size */
66 { 0x47, LVL_3, 8192 }, /* 8-way set assoc, 64 byte line size */
67 { 0x49, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */
68 { 0x4a, LVL_3, 6144 }, /* 12-way set assoc, 64 byte line size */
69 { 0x4b, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
70 { 0x4c, LVL_3, 12288 }, /* 12-way set assoc, 64 byte line size */
71 { 0x4d, LVL_3, 16384 }, /* 16-way set assoc, 64 byte line size */
72 { 0x4e, LVL_2, 6144 }, /* 24-way set assoc, 64 byte line size */
73 { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */
74 { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */
75 { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */
76 { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */
77 { 0x70, LVL_TRACE, 12 }, /* 8-way set assoc */
78 { 0x71, LVL_TRACE, 16 }, /* 8-way set assoc */
79 { 0x72, LVL_TRACE, 32 }, /* 8-way set assoc */
80 { 0x73, LVL_TRACE, 64 }, /* 8-way set assoc */
81 { 0x78, LVL_2, 1024 }, /* 4-way set assoc, 64 byte line size */
82 { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */
83 { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */
84 { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */
85 { 0x7c, LVL_2, 1024 }, /* 8-way set assoc, sectored cache, 64 byte line size */
86 { 0x7d, LVL_2, 2048 }, /* 8-way set assoc, 64 byte line size */
87 { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */
88 { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */
89 { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */
90 { 0x84, LVL_2, 1024 }, /* 8-way set assoc, 32 byte line size */
91 { 0x85, LVL_2, 2048 }, /* 8-way set assoc, 32 byte line size */
92 { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */
93 { 0x87, LVL_2, 1024 }, /* 8-way set assoc, 64 byte line size */
94 { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */
95 { 0xd1, LVL_3, 1024 }, /* 4-way set assoc, 64 byte line size */
96 { 0xd2, LVL_3, 2048 }, /* 4-way set assoc, 64 byte line size */
97 { 0xd6, LVL_3, 1024 }, /* 8-way set assoc, 64 byte line size */
98 { 0xd7, LVL_3, 2048 }, /* 8-way set assoc, 64 byte line size */
99 { 0xd8, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */
100 { 0xdc, LVL_3, 2048 }, /* 12-way set assoc, 64 byte line size */
101 { 0xdd, LVL_3, 4096 }, /* 12-way set assoc, 64 byte line size */
102 { 0xde, LVL_3, 8192 }, /* 12-way set assoc, 64 byte line size */
103 { 0xe2, LVL_3, 2048 }, /* 16-way set assoc, 64 byte line size */
104 { 0xe3, LVL_3, 4096 }, /* 16-way set assoc, 64 byte line size */
105 { 0xe4, LVL_3, 8192 }, /* 16-way set assoc, 64 byte line size */
106 { 0xea, LVL_3, 12288 }, /* 24-way set assoc, 64 byte line size */
107 { 0xeb, LVL_3, 18432 }, /* 24-way set assoc, 64 byte line size */
108 { 0xec, LVL_3, 24576 }, /* 24-way set assoc, 64 byte line size */
109 { 0x00, 0, 0}
113 enum _cache_type {
114 CACHE_TYPE_NULL = 0,
115 CACHE_TYPE_DATA = 1,
116 CACHE_TYPE_INST = 2,
117 CACHE_TYPE_UNIFIED = 3
120 union _cpuid4_leaf_eax {
121 struct {
122 enum _cache_type type:5;
123 unsigned int level:3;
124 unsigned int is_self_initializing:1;
125 unsigned int is_fully_associative:1;
126 unsigned int reserved:4;
127 unsigned int num_threads_sharing:12;
128 unsigned int num_cores_on_die:6;
129 } split;
130 u32 full;
133 union _cpuid4_leaf_ebx {
134 struct {
135 unsigned int coherency_line_size:12;
136 unsigned int physical_line_partition:10;
137 unsigned int ways_of_associativity:10;
138 } split;
139 u32 full;
142 union _cpuid4_leaf_ecx {
143 struct {
144 unsigned int number_of_sets:32;
145 } split;
146 u32 full;
149 struct _cpuid4_info {
150 union _cpuid4_leaf_eax eax;
151 union _cpuid4_leaf_ebx ebx;
152 union _cpuid4_leaf_ecx ecx;
153 unsigned long size;
154 bool can_disable;
155 unsigned int l3_indices;
156 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
159 /* subset of above _cpuid4_info w/o shared_cpu_map */
160 struct _cpuid4_info_regs {
161 union _cpuid4_leaf_eax eax;
162 union _cpuid4_leaf_ebx ebx;
163 union _cpuid4_leaf_ecx ecx;
164 unsigned long size;
165 bool can_disable;
166 unsigned int l3_indices;
169 unsigned short num_cache_leaves;
171 /* AMD doesn't have CPUID4. Emulate it here to report the same
172 information to the user. This makes some assumptions about the machine:
173 L2 not shared, no SMT etc. that is currently true on AMD CPUs.
175 In theory the TLBs could be reported as fake type (they are in "dummy").
176 Maybe later */
177 union l1_cache {
178 struct {
179 unsigned line_size:8;
180 unsigned lines_per_tag:8;
181 unsigned assoc:8;
182 unsigned size_in_kb:8;
184 unsigned val;
187 union l2_cache {
188 struct {
189 unsigned line_size:8;
190 unsigned lines_per_tag:4;
191 unsigned assoc:4;
192 unsigned size_in_kb:16;
194 unsigned val;
197 union l3_cache {
198 struct {
199 unsigned line_size:8;
200 unsigned lines_per_tag:4;
201 unsigned assoc:4;
202 unsigned res:2;
203 unsigned size_encoded:14;
205 unsigned val;
208 static const unsigned short __cpuinitconst assocs[] = {
209 [1] = 1,
210 [2] = 2,
211 [4] = 4,
212 [6] = 8,
213 [8] = 16,
214 [0xa] = 32,
215 [0xb] = 48,
216 [0xc] = 64,
217 [0xd] = 96,
218 [0xe] = 128,
219 [0xf] = 0xffff /* fully associative - no way to show this currently */
222 static const unsigned char __cpuinitconst levels[] = { 1, 1, 2, 3 };
223 static const unsigned char __cpuinitconst types[] = { 1, 2, 3, 3 };
225 static void __cpuinit
226 amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
227 union _cpuid4_leaf_ebx *ebx,
228 union _cpuid4_leaf_ecx *ecx)
230 unsigned dummy;
231 unsigned line_size, lines_per_tag, assoc, size_in_kb;
232 union l1_cache l1i, l1d;
233 union l2_cache l2;
234 union l3_cache l3;
235 union l1_cache *l1 = &l1d;
237 eax->full = 0;
238 ebx->full = 0;
239 ecx->full = 0;
241 cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
242 cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
244 switch (leaf) {
245 case 1:
246 l1 = &l1i;
247 case 0:
248 if (!l1->val)
249 return;
250 assoc = assocs[l1->assoc];
251 line_size = l1->line_size;
252 lines_per_tag = l1->lines_per_tag;
253 size_in_kb = l1->size_in_kb;
254 break;
255 case 2:
256 if (!l2.val)
257 return;
258 assoc = assocs[l2.assoc];
259 line_size = l2.line_size;
260 lines_per_tag = l2.lines_per_tag;
261 /* cpu_data has errata corrections for K7 applied */
262 size_in_kb = current_cpu_data.x86_cache_size;
263 break;
264 case 3:
265 if (!l3.val)
266 return;
267 assoc = assocs[l3.assoc];
268 line_size = l3.line_size;
269 lines_per_tag = l3.lines_per_tag;
270 size_in_kb = l3.size_encoded * 512;
271 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
272 size_in_kb = size_in_kb >> 1;
273 assoc = assoc >> 1;
275 break;
276 default:
277 return;
280 eax->split.is_self_initializing = 1;
281 eax->split.type = types[leaf];
282 eax->split.level = levels[leaf];
283 eax->split.num_threads_sharing = 0;
284 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
287 if (assoc == 0xffff)
288 eax->split.is_fully_associative = 1;
289 ebx->split.coherency_line_size = line_size - 1;
290 ebx->split.ways_of_associativity = assoc - 1;
291 ebx->split.physical_line_partition = lines_per_tag - 1;
292 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
293 (ebx->split.ways_of_associativity + 1) - 1;
296 static unsigned int __cpuinit amd_calc_l3_indices(void)
299 * We're called over smp_call_function_single() and therefore
300 * are on the correct cpu.
302 int cpu = smp_processor_id();
303 int node = cpu_to_node(cpu);
304 struct pci_dev *dev = node_to_k8_nb_misc(node);
305 unsigned int sc0, sc1, sc2, sc3;
306 u32 val;
308 pci_read_config_dword(dev, 0x1C4, &val);
310 /* calculate subcache sizes */
311 sc0 = !(val & BIT(0));
312 sc1 = !(val & BIT(4));
313 sc2 = !(val & BIT(8)) + !(val & BIT(9));
314 sc3 = !(val & BIT(12)) + !(val & BIT(13));
316 return (max(max(max(sc0, sc1), sc2), sc3) << 10) - 1;
319 static void __cpuinit
320 amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
322 if (index < 3)
323 return;
325 if (boot_cpu_data.x86 == 0x11)
326 return;
328 /* see errata #382 and #388 */
329 if ((boot_cpu_data.x86 == 0x10) &&
330 ((boot_cpu_data.x86_model < 0x8) ||
331 (boot_cpu_data.x86_mask < 0x1)))
332 return;
334 this_leaf->can_disable = true;
335 this_leaf->l3_indices = amd_calc_l3_indices();
338 static int
339 __cpuinit cpuid4_cache_lookup_regs(int index,
340 struct _cpuid4_info_regs *this_leaf)
342 union _cpuid4_leaf_eax eax;
343 union _cpuid4_leaf_ebx ebx;
344 union _cpuid4_leaf_ecx ecx;
345 unsigned edx;
347 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
348 amd_cpuid4(index, &eax, &ebx, &ecx);
349 if (boot_cpu_data.x86 >= 0x10)
350 amd_check_l3_disable(index, this_leaf);
351 } else {
352 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
355 if (eax.split.type == CACHE_TYPE_NULL)
356 return -EIO; /* better error ? */
358 this_leaf->eax = eax;
359 this_leaf->ebx = ebx;
360 this_leaf->ecx = ecx;
361 this_leaf->size = (ecx.split.number_of_sets + 1) *
362 (ebx.split.coherency_line_size + 1) *
363 (ebx.split.physical_line_partition + 1) *
364 (ebx.split.ways_of_associativity + 1);
365 return 0;
368 static int __cpuinit find_num_cache_leaves(void)
370 unsigned int eax, ebx, ecx, edx;
371 union _cpuid4_leaf_eax cache_eax;
372 int i = -1;
374 do {
375 ++i;
376 /* Do cpuid(4) loop to find out num_cache_leaves */
377 cpuid_count(4, i, &eax, &ebx, &ecx, &edx);
378 cache_eax.full = eax;
379 } while (cache_eax.split.type != CACHE_TYPE_NULL);
380 return i;
383 unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
385 /* Cache sizes */
386 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
387 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
388 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
389 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
390 #ifdef CONFIG_X86_HT
391 unsigned int cpu = c->cpu_index;
392 #endif
394 if (c->cpuid_level > 3) {
395 static int is_initialized;
397 if (is_initialized == 0) {
398 /* Init num_cache_leaves from boot CPU */
399 num_cache_leaves = find_num_cache_leaves();
400 is_initialized++;
404 * Whenever possible use cpuid(4), deterministic cache
405 * parameters cpuid leaf to find the cache details
407 for (i = 0; i < num_cache_leaves; i++) {
408 struct _cpuid4_info_regs this_leaf;
409 int retval;
411 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
412 if (retval >= 0) {
413 switch (this_leaf.eax.split.level) {
414 case 1:
415 if (this_leaf.eax.split.type ==
416 CACHE_TYPE_DATA)
417 new_l1d = this_leaf.size/1024;
418 else if (this_leaf.eax.split.type ==
419 CACHE_TYPE_INST)
420 new_l1i = this_leaf.size/1024;
421 break;
422 case 2:
423 new_l2 = this_leaf.size/1024;
424 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
425 index_msb = get_count_order(num_threads_sharing);
426 l2_id = c->apicid >> index_msb;
427 break;
428 case 3:
429 new_l3 = this_leaf.size/1024;
430 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
431 index_msb = get_count_order(
432 num_threads_sharing);
433 l3_id = c->apicid >> index_msb;
434 break;
435 default:
436 break;
442 * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
443 * trace cache
445 if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
446 /* supports eax=2 call */
447 int j, n;
448 unsigned int regs[4];
449 unsigned char *dp = (unsigned char *)regs;
450 int only_trace = 0;
452 if (num_cache_leaves != 0 && c->x86 == 15)
453 only_trace = 1;
455 /* Number of times to iterate */
456 n = cpuid_eax(2) & 0xFF;
458 for (i = 0 ; i < n ; i++) {
459 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
461 /* If bit 31 is set, this is an unknown format */
462 for (j = 0 ; j < 3 ; j++)
463 if (regs[j] & (1 << 31))
464 regs[j] = 0;
466 /* Byte 0 is level count, not a descriptor */
467 for (j = 1 ; j < 16 ; j++) {
468 unsigned char des = dp[j];
469 unsigned char k = 0;
471 /* look up this descriptor in the table */
472 while (cache_table[k].descriptor != 0) {
473 if (cache_table[k].descriptor == des) {
474 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
475 break;
476 switch (cache_table[k].cache_type) {
477 case LVL_1_INST:
478 l1i += cache_table[k].size;
479 break;
480 case LVL_1_DATA:
481 l1d += cache_table[k].size;
482 break;
483 case LVL_2:
484 l2 += cache_table[k].size;
485 break;
486 case LVL_3:
487 l3 += cache_table[k].size;
488 break;
489 case LVL_TRACE:
490 trace += cache_table[k].size;
491 break;
494 break;
497 k++;
503 if (new_l1d)
504 l1d = new_l1d;
506 if (new_l1i)
507 l1i = new_l1i;
509 if (new_l2) {
510 l2 = new_l2;
511 #ifdef CONFIG_X86_HT
512 per_cpu(cpu_llc_id, cpu) = l2_id;
513 #endif
516 if (new_l3) {
517 l3 = new_l3;
518 #ifdef CONFIG_X86_HT
519 per_cpu(cpu_llc_id, cpu) = l3_id;
520 #endif
523 c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
525 return l2;
528 #ifdef CONFIG_SYSFS
530 /* pointer to _cpuid4_info array (for each cache leaf) */
531 static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info);
532 #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y]))
534 #ifdef CONFIG_SMP
535 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
537 struct _cpuid4_info *this_leaf, *sibling_leaf;
538 unsigned long num_threads_sharing;
539 int index_msb, i, sibling;
540 struct cpuinfo_x86 *c = &cpu_data(cpu);
542 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
543 for_each_cpu(i, c->llc_shared_map) {
544 if (!per_cpu(ici_cpuid4_info, i))
545 continue;
546 this_leaf = CPUID4_INFO_IDX(i, index);
547 for_each_cpu(sibling, c->llc_shared_map) {
548 if (!cpu_online(sibling))
549 continue;
550 set_bit(sibling, this_leaf->shared_cpu_map);
553 return;
555 this_leaf = CPUID4_INFO_IDX(cpu, index);
556 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
558 if (num_threads_sharing == 1)
559 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
560 else {
561 index_msb = get_count_order(num_threads_sharing);
563 for_each_online_cpu(i) {
564 if (cpu_data(i).apicid >> index_msb ==
565 c->apicid >> index_msb) {
566 cpumask_set_cpu(i,
567 to_cpumask(this_leaf->shared_cpu_map));
568 if (i != cpu && per_cpu(ici_cpuid4_info, i)) {
569 sibling_leaf =
570 CPUID4_INFO_IDX(i, index);
571 cpumask_set_cpu(cpu, to_cpumask(
572 sibling_leaf->shared_cpu_map));
578 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
580 struct _cpuid4_info *this_leaf, *sibling_leaf;
581 int sibling;
583 this_leaf = CPUID4_INFO_IDX(cpu, index);
584 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
585 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
586 cpumask_clear_cpu(cpu,
587 to_cpumask(sibling_leaf->shared_cpu_map));
590 #else
591 static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
595 static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
598 #endif
600 static void __cpuinit free_cache_attributes(unsigned int cpu)
602 int i;
604 for (i = 0; i < num_cache_leaves; i++)
605 cache_remove_shared_cpu_map(cpu, i);
607 kfree(per_cpu(ici_cpuid4_info, cpu));
608 per_cpu(ici_cpuid4_info, cpu) = NULL;
611 static int
612 __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
614 struct _cpuid4_info_regs *leaf_regs =
615 (struct _cpuid4_info_regs *)this_leaf;
617 return cpuid4_cache_lookup_regs(index, leaf_regs);
620 static void __cpuinit get_cpu_leaves(void *_retval)
622 int j, *retval = _retval, cpu = smp_processor_id();
624 /* Do cpuid and store the results */
625 for (j = 0; j < num_cache_leaves; j++) {
626 struct _cpuid4_info *this_leaf;
627 this_leaf = CPUID4_INFO_IDX(cpu, j);
628 *retval = cpuid4_cache_lookup(j, this_leaf);
629 if (unlikely(*retval < 0)) {
630 int i;
632 for (i = 0; i < j; i++)
633 cache_remove_shared_cpu_map(cpu, i);
634 break;
636 cache_shared_cpu_map_setup(cpu, j);
640 static int __cpuinit detect_cache_attributes(unsigned int cpu)
642 int retval;
644 if (num_cache_leaves == 0)
645 return -ENOENT;
647 per_cpu(ici_cpuid4_info, cpu) = kzalloc(
648 sizeof(struct _cpuid4_info) * num_cache_leaves, GFP_KERNEL);
649 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
650 return -ENOMEM;
652 smp_call_function_single(cpu, get_cpu_leaves, &retval, true);
653 if (retval) {
654 kfree(per_cpu(ici_cpuid4_info, cpu));
655 per_cpu(ici_cpuid4_info, cpu) = NULL;
658 return retval;
661 #include <linux/kobject.h>
662 #include <linux/sysfs.h>
664 extern struct sysdev_class cpu_sysdev_class; /* from drivers/base/cpu.c */
666 /* pointer to kobject for cpuX/cache */
667 static DEFINE_PER_CPU(struct kobject *, ici_cache_kobject);
669 struct _index_kobject {
670 struct kobject kobj;
671 unsigned int cpu;
672 unsigned short index;
675 /* pointer to array of kobjects for cpuX/cache/indexY */
676 static DEFINE_PER_CPU(struct _index_kobject *, ici_index_kobject);
677 #define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(ici_index_kobject, x))[y]))
679 #define show_one_plus(file_name, object, val) \
680 static ssize_t show_##file_name \
681 (struct _cpuid4_info *this_leaf, char *buf) \
683 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
686 show_one_plus(level, eax.split.level, 0);
687 show_one_plus(coherency_line_size, ebx.split.coherency_line_size, 1);
688 show_one_plus(physical_line_partition, ebx.split.physical_line_partition, 1);
689 show_one_plus(ways_of_associativity, ebx.split.ways_of_associativity, 1);
690 show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
692 static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
694 return sprintf(buf, "%luK\n", this_leaf->size / 1024);
697 static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
698 int type, char *buf)
700 ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
701 int n = 0;
703 if (len > 1) {
704 const struct cpumask *mask;
706 mask = to_cpumask(this_leaf->shared_cpu_map);
707 n = type ?
708 cpulist_scnprintf(buf, len-2, mask) :
709 cpumask_scnprintf(buf, len-2, mask);
710 buf[n++] = '\n';
711 buf[n] = '\0';
713 return n;
716 static inline ssize_t show_shared_cpu_map(struct _cpuid4_info *leaf, char *buf)
718 return show_shared_cpu_map_func(leaf, 0, buf);
721 static inline ssize_t show_shared_cpu_list(struct _cpuid4_info *leaf, char *buf)
723 return show_shared_cpu_map_func(leaf, 1, buf);
726 static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf)
728 switch (this_leaf->eax.split.type) {
729 case CACHE_TYPE_DATA:
730 return sprintf(buf, "Data\n");
731 case CACHE_TYPE_INST:
732 return sprintf(buf, "Instruction\n");
733 case CACHE_TYPE_UNIFIED:
734 return sprintf(buf, "Unified\n");
735 default:
736 return sprintf(buf, "Unknown\n");
740 #define to_object(k) container_of(k, struct _index_kobject, kobj)
741 #define to_attr(a) container_of(a, struct _cache_attr, attr)
743 static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf,
744 unsigned int index)
746 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
747 int node = amd_get_nb_id(cpu);
748 struct pci_dev *dev = node_to_k8_nb_misc(node);
749 unsigned int reg = 0;
751 if (!this_leaf->can_disable)
752 return -EINVAL;
754 if (!dev)
755 return -EINVAL;
757 pci_read_config_dword(dev, 0x1BC + index * 4, &reg);
758 return sprintf(buf, "0x%08x\n", reg);
761 #define SHOW_CACHE_DISABLE(index) \
762 static ssize_t \
763 show_cache_disable_##index(struct _cpuid4_info *this_leaf, char *buf) \
765 return show_cache_disable(this_leaf, buf, index); \
767 SHOW_CACHE_DISABLE(0)
768 SHOW_CACHE_DISABLE(1)
770 static ssize_t store_cache_disable(struct _cpuid4_info *this_leaf,
771 const char *buf, size_t count, unsigned int index)
773 int cpu = cpumask_first(to_cpumask(this_leaf->shared_cpu_map));
774 int node = amd_get_nb_id(cpu);
775 struct pci_dev *dev = node_to_k8_nb_misc(node);
776 unsigned long val = 0;
778 #define SUBCACHE_MASK (3UL << 20)
779 #define SUBCACHE_INDEX 0xfff
781 if (!this_leaf->can_disable)
782 return -EINVAL;
784 if (!capable(CAP_SYS_ADMIN))
785 return -EPERM;
787 if (!dev)
788 return -EINVAL;
790 if (strict_strtoul(buf, 10, &val) < 0)
791 return -EINVAL;
793 /* do not allow writes outside of allowed bits */
794 if ((val & ~(SUBCACHE_MASK | SUBCACHE_INDEX)) ||
795 ((val & SUBCACHE_INDEX) > this_leaf->l3_indices))
796 return -EINVAL;
798 val |= BIT(30);
799 pci_write_config_dword(dev, 0x1BC + index * 4, val);
801 * We need to WBINVD on a core on the node containing the L3 cache which
802 * indices we disable therefore a simple wbinvd() is not sufficient.
804 wbinvd_on_cpu(cpu);
805 pci_write_config_dword(dev, 0x1BC + index * 4, val | BIT(31));
806 return count;
809 #define STORE_CACHE_DISABLE(index) \
810 static ssize_t \
811 store_cache_disable_##index(struct _cpuid4_info *this_leaf, \
812 const char *buf, size_t count) \
814 return store_cache_disable(this_leaf, buf, count, index); \
816 STORE_CACHE_DISABLE(0)
817 STORE_CACHE_DISABLE(1)
819 struct _cache_attr {
820 struct attribute attr;
821 ssize_t (*show)(struct _cpuid4_info *, char *);
822 ssize_t (*store)(struct _cpuid4_info *, const char *, size_t count);
825 #define define_one_ro(_name) \
826 static struct _cache_attr _name = \
827 __ATTR(_name, 0444, show_##_name, NULL)
829 define_one_ro(level);
830 define_one_ro(type);
831 define_one_ro(coherency_line_size);
832 define_one_ro(physical_line_partition);
833 define_one_ro(ways_of_associativity);
834 define_one_ro(number_of_sets);
835 define_one_ro(size);
836 define_one_ro(shared_cpu_map);
837 define_one_ro(shared_cpu_list);
839 static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
840 show_cache_disable_0, store_cache_disable_0);
841 static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
842 show_cache_disable_1, store_cache_disable_1);
844 #define DEFAULT_SYSFS_CACHE_ATTRS \
845 &type.attr, \
846 &level.attr, \
847 &coherency_line_size.attr, \
848 &physical_line_partition.attr, \
849 &ways_of_associativity.attr, \
850 &number_of_sets.attr, \
851 &size.attr, \
852 &shared_cpu_map.attr, \
853 &shared_cpu_list.attr
855 static struct attribute *default_attrs[] = {
856 DEFAULT_SYSFS_CACHE_ATTRS,
857 NULL
860 static struct attribute *default_l3_attrs[] = {
861 DEFAULT_SYSFS_CACHE_ATTRS,
862 &cache_disable_0.attr,
863 &cache_disable_1.attr,
864 NULL
867 static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
869 struct _cache_attr *fattr = to_attr(attr);
870 struct _index_kobject *this_leaf = to_object(kobj);
871 ssize_t ret;
873 ret = fattr->show ?
874 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
875 buf) :
877 return ret;
880 static ssize_t store(struct kobject *kobj, struct attribute *attr,
881 const char *buf, size_t count)
883 struct _cache_attr *fattr = to_attr(attr);
884 struct _index_kobject *this_leaf = to_object(kobj);
885 ssize_t ret;
887 ret = fattr->store ?
888 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
889 buf, count) :
891 return ret;
894 static struct sysfs_ops sysfs_ops = {
895 .show = show,
896 .store = store,
899 static struct kobj_type ktype_cache = {
900 .sysfs_ops = &sysfs_ops,
901 .default_attrs = default_attrs,
904 static struct kobj_type ktype_percpu_entry = {
905 .sysfs_ops = &sysfs_ops,
908 static void __cpuinit cpuid4_cache_sysfs_exit(unsigned int cpu)
910 kfree(per_cpu(ici_cache_kobject, cpu));
911 kfree(per_cpu(ici_index_kobject, cpu));
912 per_cpu(ici_cache_kobject, cpu) = NULL;
913 per_cpu(ici_index_kobject, cpu) = NULL;
914 free_cache_attributes(cpu);
917 static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
919 int err;
921 if (num_cache_leaves == 0)
922 return -ENOENT;
924 err = detect_cache_attributes(cpu);
925 if (err)
926 return err;
928 /* Allocate all required memory */
929 per_cpu(ici_cache_kobject, cpu) =
930 kzalloc(sizeof(struct kobject), GFP_KERNEL);
931 if (unlikely(per_cpu(ici_cache_kobject, cpu) == NULL))
932 goto err_out;
934 per_cpu(ici_index_kobject, cpu) = kzalloc(
935 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
936 if (unlikely(per_cpu(ici_index_kobject, cpu) == NULL))
937 goto err_out;
939 return 0;
941 err_out:
942 cpuid4_cache_sysfs_exit(cpu);
943 return -ENOMEM;
946 static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
948 /* Add/Remove cache interface for CPU device */
949 static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
951 unsigned int cpu = sys_dev->id;
952 unsigned long i, j;
953 struct _index_kobject *this_object;
954 struct _cpuid4_info *this_leaf;
955 int retval;
957 retval = cpuid4_cache_sysfs_init(cpu);
958 if (unlikely(retval < 0))
959 return retval;
961 retval = kobject_init_and_add(per_cpu(ici_cache_kobject, cpu),
962 &ktype_percpu_entry,
963 &sys_dev->kobj, "%s", "cache");
964 if (retval < 0) {
965 cpuid4_cache_sysfs_exit(cpu);
966 return retval;
969 for (i = 0; i < num_cache_leaves; i++) {
970 this_object = INDEX_KOBJECT_PTR(cpu, i);
971 this_object->cpu = cpu;
972 this_object->index = i;
974 this_leaf = CPUID4_INFO_IDX(cpu, i);
976 if (this_leaf->can_disable)
977 ktype_cache.default_attrs = default_l3_attrs;
978 else
979 ktype_cache.default_attrs = default_attrs;
981 retval = kobject_init_and_add(&(this_object->kobj),
982 &ktype_cache,
983 per_cpu(ici_cache_kobject, cpu),
984 "index%1lu", i);
985 if (unlikely(retval)) {
986 for (j = 0; j < i; j++)
987 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
988 kobject_put(per_cpu(ici_cache_kobject, cpu));
989 cpuid4_cache_sysfs_exit(cpu);
990 return retval;
992 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
994 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
996 kobject_uevent(per_cpu(ici_cache_kobject, cpu), KOBJ_ADD);
997 return 0;
1000 static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
1002 unsigned int cpu = sys_dev->id;
1003 unsigned long i;
1005 if (per_cpu(ici_cpuid4_info, cpu) == NULL)
1006 return;
1007 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
1008 return;
1009 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
1011 for (i = 0; i < num_cache_leaves; i++)
1012 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
1013 kobject_put(per_cpu(ici_cache_kobject, cpu));
1014 cpuid4_cache_sysfs_exit(cpu);
1017 static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
1018 unsigned long action, void *hcpu)
1020 unsigned int cpu = (unsigned long)hcpu;
1021 struct sys_device *sys_dev;
1023 sys_dev = get_cpu_sysdev(cpu);
1024 switch (action) {
1025 case CPU_ONLINE:
1026 case CPU_ONLINE_FROZEN:
1027 cache_add_dev(sys_dev);
1028 break;
1029 case CPU_DEAD:
1030 case CPU_DEAD_FROZEN:
1031 cache_remove_dev(sys_dev);
1032 break;
1034 return NOTIFY_OK;
1037 static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
1038 .notifier_call = cacheinfo_cpu_callback,
1041 static int __cpuinit cache_sysfs_init(void)
1043 int i;
1045 if (num_cache_leaves == 0)
1046 return 0;
1048 for_each_online_cpu(i) {
1049 int err;
1050 struct sys_device *sys_dev = get_cpu_sysdev(i);
1052 err = cache_add_dev(sys_dev);
1053 if (err)
1054 return err;
1056 register_hotcpu_notifier(&cacheinfo_cpu_notifier);
1057 return 0;
1060 device_initcall(cache_sysfs_init);
1062 #endif