2 * (c) 2005 Advanced Micro Devices, Inc.
3 * Your use of this code is subject to the terms and conditions of the
4 * GNU general public license version 2. See "COPYING" or
5 * http://www.gnu.org/licenses/gpl.html
7 * Written by Jacob Shin - AMD, Inc.
9 * Support : jacob.shin@amd.com
11 * MC4_MISC0 DRAM ECC Error Threshold available under AMD K8 Rev F.
12 * MC4_MISC0 exists per physical processor.
16 #include <linux/cpu.h>
17 #include <linux/errno.h>
18 #include <linux/init.h>
19 #include <linux/interrupt.h>
20 #include <linux/kobject.h>
21 #include <linux/notifier.h>
22 #include <linux/sched.h>
23 #include <linux/smp.h>
24 #include <linux/sysdev.h>
25 #include <linux/sysfs.h>
29 #include <asm/percpu.h>
32 #define PFX "mce_threshold: "
33 #define VERSION "version 1.00.9"
35 #define THRESHOLD_MAX 0xFFF
36 #define INT_TYPE_APIC 0x00020000
37 #define MASK_VALID_HI 0x80000000
38 #define MASK_LVTOFF_HI 0x00F00000
39 #define MASK_COUNT_EN_HI 0x00080000
40 #define MASK_INT_TYPE_HI 0x00060000
41 #define MASK_OVERFLOW_HI 0x00010000
42 #define MASK_ERR_COUNT_HI 0x00000FFF
43 #define MASK_OVERFLOW 0x0001000000000000L
45 struct threshold_bank
{
53 static struct threshold_bank threshold_defaults
= {
54 .interrupt_enable
= 0,
55 .threshold_limit
= THRESHOLD_MAX
,
59 static unsigned char shared_bank
[NR_BANKS
] = {
64 static DEFINE_PER_CPU(unsigned char, bank_map
); /* see which banks are on */
70 /* must be called with correct cpu affinity */
71 static void threshold_restart_bank(struct threshold_bank
*b
,
72 int reset
, u16 old_limit
)
74 u32 mci_misc_hi
, mci_misc_lo
;
76 rdmsr(MSR_IA32_MC0_MISC
+ b
->bank
* 4, mci_misc_lo
, mci_misc_hi
);
78 if (b
->threshold_limit
< (mci_misc_hi
& THRESHOLD_MAX
))
79 reset
= 1; /* limit cannot be lower than err count */
81 if (reset
) { /* reset err count and overflow bit */
83 (mci_misc_hi
& ~(MASK_ERR_COUNT_HI
| MASK_OVERFLOW_HI
)) |
84 (THRESHOLD_MAX
- b
->threshold_limit
);
85 } else if (old_limit
) { /* change limit w/o reset */
86 int new_count
= (mci_misc_hi
& THRESHOLD_MAX
) +
87 (old_limit
- b
->threshold_limit
);
88 mci_misc_hi
= (mci_misc_hi
& ~MASK_ERR_COUNT_HI
) |
89 (new_count
& THRESHOLD_MAX
);
93 (mci_misc_hi
= (mci_misc_hi
& ~MASK_INT_TYPE_HI
) | INT_TYPE_APIC
) :
94 (mci_misc_hi
&= ~MASK_INT_TYPE_HI
);
96 mci_misc_hi
|= MASK_COUNT_EN_HI
;
97 wrmsr(MSR_IA32_MC0_MISC
+ b
->bank
* 4, mci_misc_lo
, mci_misc_hi
);
100 void __cpuinit
mce_amd_feature_init(struct cpuinfo_x86
*c
)
103 u32 mci_misc_lo
, mci_misc_hi
;
104 unsigned int cpu
= smp_processor_id();
106 for (bank
= 0; bank
< NR_BANKS
; ++bank
) {
107 rdmsr(MSR_IA32_MC0_MISC
+ bank
* 4, mci_misc_lo
, mci_misc_hi
);
109 /* !valid, !counter present, bios locked */
110 if (!(mci_misc_hi
& MASK_VALID_HI
) ||
111 !(mci_misc_hi
& MASK_VALID_HI
>> 1) ||
112 (mci_misc_hi
& MASK_VALID_HI
>> 2))
115 per_cpu(bank_map
, cpu
) |= (1 << bank
);
118 if (shared_bank
[bank
] && cpu_core_id
[cpu
])
122 setup_threshold_lvt((mci_misc_hi
& MASK_LVTOFF_HI
) >> 20);
123 threshold_defaults
.cpu
= cpu
;
124 threshold_defaults
.bank
= bank
;
125 threshold_restart_bank(&threshold_defaults
, 0, 0);
130 * APIC Interrupt Handler
134 * threshold interrupt handler will service THRESHOLD_APIC_VECTOR.
135 * the interrupt goes off when error_count reaches threshold_limit.
136 * the handler will simply log mcelog w/ software defined bank number.
138 asmlinkage
void mce_threshold_interrupt(void)
147 memset(&m
, 0, sizeof(m
));
149 m
.cpu
= smp_processor_id();
151 /* assume first bank caused it */
152 for (bank
= 0; bank
< NR_BANKS
; ++bank
) {
153 m
.bank
= MCE_THRESHOLD_BASE
+ bank
;
154 rdmsrl(MSR_IA32_MC0_MISC
+ bank
* 4, m
.misc
);
156 if (m
.misc
& MASK_OVERFLOW
) {
169 static struct sysdev_class threshold_sysclass
= {
170 set_kset_name("threshold"),
173 static DEFINE_PER_CPU(struct sys_device
, device_threshold
);
175 struct threshold_attr
{
176 struct attribute attr
;
177 ssize_t(*show
) (struct threshold_bank
*, char *);
178 ssize_t(*store
) (struct threshold_bank
*, const char *, size_t count
);
181 static DEFINE_PER_CPU(struct threshold_bank
*, threshold_banks
[NR_BANKS
]);
183 static cpumask_t
affinity_set(unsigned int cpu
)
185 cpumask_t oldmask
= current
->cpus_allowed
;
186 cpumask_t newmask
= CPU_MASK_NONE
;
187 cpu_set(cpu
, newmask
);
188 set_cpus_allowed(current
, newmask
);
192 static void affinity_restore(cpumask_t oldmask
)
194 set_cpus_allowed(current
, oldmask
);
197 #define SHOW_FIELDS(name) \
198 static ssize_t show_ ## name(struct threshold_bank * b, char *buf) \
200 return sprintf(buf, "%lx\n", (unsigned long) b->name); \
202 SHOW_FIELDS(interrupt_enable
)
203 SHOW_FIELDS(threshold_limit
)
205 static ssize_t
store_interrupt_enable(struct threshold_bank
*b
,
206 const char *buf
, size_t count
)
210 unsigned long new = simple_strtoul(buf
, &end
, 0);
213 b
->interrupt_enable
= !!new;
215 oldmask
= affinity_set(b
->cpu
);
216 threshold_restart_bank(b
, 0, 0);
217 affinity_restore(oldmask
);
222 static ssize_t
store_threshold_limit(struct threshold_bank
*b
,
223 const char *buf
, size_t count
)
228 unsigned long new = simple_strtoul(buf
, &end
, 0);
231 if (new > THRESHOLD_MAX
)
235 old
= b
->threshold_limit
;
236 b
->threshold_limit
= new;
238 oldmask
= affinity_set(b
->cpu
);
239 threshold_restart_bank(b
, 0, old
);
240 affinity_restore(oldmask
);
245 static ssize_t
show_error_count(struct threshold_bank
*b
, char *buf
)
249 oldmask
= affinity_set(b
->cpu
);
250 rdmsr(MSR_IA32_MC0_MISC
+ b
->bank
* 4, low
, high
); /* ignore low 32 */
251 affinity_restore(oldmask
);
252 return sprintf(buf
, "%x\n",
253 (high
& 0xFFF) - (THRESHOLD_MAX
- b
->threshold_limit
));
256 static ssize_t
store_error_count(struct threshold_bank
*b
,
257 const char *buf
, size_t count
)
260 oldmask
= affinity_set(b
->cpu
);
261 threshold_restart_bank(b
, 1, 0);
262 affinity_restore(oldmask
);
266 #define THRESHOLD_ATTR(_name,_mode,_show,_store) { \
267 .attr = {.name = __stringify(_name), .mode = _mode }, \
272 #define ATTR_FIELDS(name) \
273 static struct threshold_attr name = \
274 THRESHOLD_ATTR(name, 0644, show_## name, store_## name)
276 ATTR_FIELDS(interrupt_enable
);
277 ATTR_FIELDS(threshold_limit
);
278 ATTR_FIELDS(error_count
);
280 static struct attribute
*default_attrs
[] = {
281 &interrupt_enable
.attr
,
282 &threshold_limit
.attr
,
287 #define to_bank(k) container_of(k,struct threshold_bank,kobj)
288 #define to_attr(a) container_of(a,struct threshold_attr,attr)
290 static ssize_t
show(struct kobject
*kobj
, struct attribute
*attr
, char *buf
)
292 struct threshold_bank
*b
= to_bank(kobj
);
293 struct threshold_attr
*a
= to_attr(attr
);
295 ret
= a
->show
? a
->show(b
, buf
) : -EIO
;
299 static ssize_t
store(struct kobject
*kobj
, struct attribute
*attr
,
300 const char *buf
, size_t count
)
302 struct threshold_bank
*b
= to_bank(kobj
);
303 struct threshold_attr
*a
= to_attr(attr
);
305 ret
= a
->store
? a
->store(b
, buf
, count
) : -EIO
;
309 static struct sysfs_ops threshold_ops
= {
314 static struct kobj_type threshold_ktype
= {
315 .sysfs_ops
= &threshold_ops
,
316 .default_attrs
= default_attrs
,
319 /* symlinks sibling shared banks to first core. first core owns dir/files. */
320 static __cpuinit
int threshold_create_bank(unsigned int cpu
, int bank
)
323 struct threshold_bank
*b
= NULL
;
326 if (cpu_core_id
[cpu
] && shared_bank
[bank
]) { /* symlink */
328 unsigned lcpu
= first_cpu(cpu_core_map
[cpu
]);
329 if (cpu_core_id
[lcpu
])
330 goto out
; /* first core not up yet */
332 b
= per_cpu(threshold_banks
, lcpu
)[bank
];
335 sprintf(name
, "bank%i", bank
);
336 err
= sysfs_create_link(&per_cpu(device_threshold
, cpu
).kobj
,
340 per_cpu(threshold_banks
, cpu
)[bank
] = b
;
345 b
= kmalloc(sizeof(struct threshold_bank
), GFP_KERNEL
);
350 memset(b
, 0, sizeof(struct threshold_bank
));
354 b
->interrupt_enable
= 0;
355 b
->threshold_limit
= THRESHOLD_MAX
;
356 kobject_set_name(&b
->kobj
, "bank%i", bank
);
357 b
->kobj
.parent
= &per_cpu(device_threshold
, cpu
).kobj
;
358 b
->kobj
.ktype
= &threshold_ktype
;
360 err
= kobject_register(&b
->kobj
);
365 per_cpu(threshold_banks
, cpu
)[bank
] = b
;
370 /* create dir/files for all valid threshold banks */
371 static __cpuinit
int threshold_create_device(unsigned int cpu
)
376 per_cpu(device_threshold
, cpu
).id
= cpu
;
377 per_cpu(device_threshold
, cpu
).cls
= &threshold_sysclass
;
378 err
= sysdev_register(&per_cpu(device_threshold
, cpu
));
382 for (bank
= 0; bank
< NR_BANKS
; ++bank
) {
383 if (!(per_cpu(bank_map
, cpu
) & 1 << bank
))
385 err
= threshold_create_bank(cpu
, bank
);
393 #ifdef CONFIG_HOTPLUG_CPU
395 * let's be hotplug friendly.
396 * in case of multiple core processors, the first core always takes ownership
397 * of shared sysfs dir/files, and rest of the cores will be symlinked to it.
400 /* cpu hotplug call removes all symlinks before first core dies */
401 static __cpuinit
void threshold_remove_bank(unsigned int cpu
, int bank
)
403 struct threshold_bank
*b
;
406 b
= per_cpu(threshold_banks
, cpu
)[bank
];
409 if (shared_bank
[bank
] && atomic_read(&b
->kobj
.kref
.refcount
) > 2) {
410 sprintf(name
, "bank%i", bank
);
411 sysfs_remove_link(&per_cpu(device_threshold
, cpu
).kobj
, name
);
412 per_cpu(threshold_banks
, cpu
)[bank
] = NULL
;
414 kobject_unregister(&b
->kobj
);
415 kfree(per_cpu(threshold_banks
, cpu
)[bank
]);
419 static __cpuinit
void threshold_remove_device(unsigned int cpu
)
423 for (bank
= 0; bank
< NR_BANKS
; ++bank
) {
424 if (!(per_cpu(bank_map
, cpu
) & 1 << bank
))
426 threshold_remove_bank(cpu
, bank
);
428 sysdev_unregister(&per_cpu(device_threshold
, cpu
));
431 /* link all existing siblings when first core comes up */
432 static __cpuinit
int threshold_create_symlinks(unsigned int cpu
)
435 unsigned int lcpu
= 0;
437 if (cpu_core_id
[cpu
])
439 for_each_cpu_mask(lcpu
, cpu_core_map
[cpu
]) {
442 for (bank
= 0; bank
< NR_BANKS
; ++bank
) {
443 if (!(per_cpu(bank_map
, cpu
) & 1 << bank
))
445 if (!shared_bank
[bank
])
447 err
= threshold_create_bank(lcpu
, bank
);
453 /* remove all symlinks before first core dies. */
454 static __cpuinit
void threshold_remove_symlinks(unsigned int cpu
)
457 unsigned int lcpu
= 0;
458 if (cpu_core_id
[cpu
])
460 for_each_cpu_mask(lcpu
, cpu_core_map
[cpu
]) {
463 for (bank
= 0; bank
< NR_BANKS
; ++bank
) {
464 if (!(per_cpu(bank_map
, cpu
) & 1 << bank
))
466 if (!shared_bank
[bank
])
468 threshold_remove_bank(lcpu
, bank
);
472 #else /* !CONFIG_HOTPLUG_CPU */
473 static __cpuinit
void threshold_create_symlinks(unsigned int cpu
)
476 static __cpuinit
void threshold_remove_symlinks(unsigned int cpu
)
479 static void threshold_remove_device(unsigned int cpu
)
484 /* get notified when a cpu comes on/off */
485 static int threshold_cpu_callback(struct notifier_block
*nfb
,
486 unsigned long action
, void *hcpu
)
488 /* cpu was unsigned int to begin with */
489 unsigned int cpu
= (unsigned long)hcpu
;
496 threshold_create_device(cpu
);
497 threshold_create_symlinks(cpu
);
499 case CPU_DOWN_PREPARE
:
500 threshold_remove_symlinks(cpu
);
502 case CPU_DOWN_FAILED
:
503 threshold_create_symlinks(cpu
);
506 threshold_remove_device(cpu
);
515 static struct notifier_block threshold_cpu_notifier
= {
516 .notifier_call
= threshold_cpu_callback
,
519 static __init
int threshold_init_device(void)
524 err
= sysdev_class_register(&threshold_sysclass
);
528 /* to hit CPUs online before the notifier is up */
529 for_each_online_cpu(lcpu
) {
530 err
= threshold_create_device(lcpu
);
534 register_cpu_notifier(&threshold_cpu_notifier
);
540 device_initcall(threshold_init_device
);