specialix: fix compiler warning on specialix_pci_tbl
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / drivers / edac / mce_amd.c
blobc0181093b4901ea90dd0381fcdfcfdc91c1bb4c3
1 #include <linux/module.h>
2 #include <linux/slab.h>
4 #include "mce_amd.h"
6 static struct amd_decoder_ops *fam_ops;
8 static u8 nb_err_cpumask = 0xf;
10 static bool report_gart_errors;
11 static void (*nb_bus_decoder)(int node_id, struct mce *m, u32 nbcfg);
13 void amd_report_gart_errors(bool v)
15 report_gart_errors = v;
17 EXPORT_SYMBOL_GPL(amd_report_gart_errors);
19 void amd_register_ecc_decoder(void (*f)(int, struct mce *, u32))
21 nb_bus_decoder = f;
23 EXPORT_SYMBOL_GPL(amd_register_ecc_decoder);
25 void amd_unregister_ecc_decoder(void (*f)(int, struct mce *, u32))
27 if (nb_bus_decoder) {
28 WARN_ON(nb_bus_decoder != f);
30 nb_bus_decoder = NULL;
33 EXPORT_SYMBOL_GPL(amd_unregister_ecc_decoder);
36 * string representation for the different MCA reported error types, see F3x48
37 * or MSR0000_0411.
40 /* transaction type */
41 const char *tt_msgs[] = { "INSN", "DATA", "GEN", "RESV" };
42 EXPORT_SYMBOL_GPL(tt_msgs);
44 /* cache level */
45 const char *ll_msgs[] = { "RESV", "L1", "L2", "L3/GEN" };
46 EXPORT_SYMBOL_GPL(ll_msgs);
48 /* memory transaction type */
49 const char *rrrr_msgs[] = {
50 "GEN", "RD", "WR", "DRD", "DWR", "IRD", "PRF", "EV", "SNP"
52 EXPORT_SYMBOL_GPL(rrrr_msgs);
54 /* participating processor */
55 const char *pp_msgs[] = { "SRC", "RES", "OBS", "GEN" };
56 EXPORT_SYMBOL_GPL(pp_msgs);
58 /* request timeout */
59 const char *to_msgs[] = { "no timeout", "timed out" };
60 EXPORT_SYMBOL_GPL(to_msgs);
62 /* memory or i/o */
63 const char *ii_msgs[] = { "MEM", "RESV", "IO", "GEN" };
64 EXPORT_SYMBOL_GPL(ii_msgs);
66 static const char *f10h_nb_mce_desc[] = {
67 "HT link data error",
68 "Protocol error (link, L3, probe filter, etc.)",
69 "Parity error in NB-internal arrays",
70 "Link Retry due to IO link transmission error",
71 "L3 ECC data cache error",
72 "ECC error in L3 cache tag",
73 "L3 LRU parity bits error",
74 "ECC Error in the Probe Filter directory"
77 static bool f12h_dc_mce(u16 ec)
79 bool ret = false;
81 if (MEM_ERROR(ec)) {
82 u8 ll = ec & 0x3;
83 ret = true;
85 if (ll == LL_L2)
86 pr_cont("during L1 linefill from L2.\n");
87 else if (ll == LL_L1)
88 pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec));
89 else
90 ret = false;
92 return ret;
95 static bool f10h_dc_mce(u16 ec)
97 u8 r4 = (ec >> 4) & 0xf;
98 u8 ll = ec & 0x3;
100 if (r4 == R4_GEN && ll == LL_L1) {
101 pr_cont("during data scrub.\n");
102 return true;
104 return f12h_dc_mce(ec);
107 static bool k8_dc_mce(u16 ec)
109 if (BUS_ERROR(ec)) {
110 pr_cont("during system linefill.\n");
111 return true;
114 return f10h_dc_mce(ec);
117 static bool f14h_dc_mce(u16 ec)
119 u8 r4 = (ec >> 4) & 0xf;
120 u8 ll = ec & 0x3;
121 u8 tt = (ec >> 2) & 0x3;
122 u8 ii = tt;
123 bool ret = true;
125 if (MEM_ERROR(ec)) {
127 if (tt != TT_DATA || ll != LL_L1)
128 return false;
130 switch (r4) {
131 case R4_DRD:
132 case R4_DWR:
133 pr_cont("Data/Tag parity error due to %s.\n",
134 (r4 == R4_DRD ? "load/hw prf" : "store"));
135 break;
136 case R4_EVICT:
137 pr_cont("Copyback parity error on a tag miss.\n");
138 break;
139 case R4_SNOOP:
140 pr_cont("Tag parity error during snoop.\n");
141 break;
142 default:
143 ret = false;
145 } else if (BUS_ERROR(ec)) {
147 if ((ii != II_MEM && ii != II_IO) || ll != LL_LG)
148 return false;
150 pr_cont("System read data error on a ");
152 switch (r4) {
153 case R4_RD:
154 pr_cont("TLB reload.\n");
155 break;
156 case R4_DWR:
157 pr_cont("store.\n");
158 break;
159 case R4_DRD:
160 pr_cont("load.\n");
161 break;
162 default:
163 ret = false;
165 } else {
166 ret = false;
169 return ret;
172 static void amd_decode_dc_mce(struct mce *m)
174 u16 ec = m->status & 0xffff;
175 u8 xec = (m->status >> 16) & 0xf;
177 pr_emerg(HW_ERR "Data Cache Error: ");
179 /* TLB error signatures are the same across families */
180 if (TLB_ERROR(ec)) {
181 u8 tt = (ec >> 2) & 0x3;
183 if (tt == TT_DATA) {
184 pr_cont("%s TLB %s.\n", LL_MSG(ec),
185 (xec ? "multimatch" : "parity error"));
186 return;
188 else
189 goto wrong_dc_mce;
192 if (!fam_ops->dc_mce(ec))
193 goto wrong_dc_mce;
195 return;
197 wrong_dc_mce:
198 pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
201 static bool k8_ic_mce(u16 ec)
203 u8 ll = ec & 0x3;
204 u8 r4 = (ec >> 4) & 0xf;
205 bool ret = true;
207 if (!MEM_ERROR(ec))
208 return false;
210 if (ll == 0x2)
211 pr_cont("during a linefill from L2.\n");
212 else if (ll == 0x1) {
213 switch (r4) {
214 case R4_IRD:
215 pr_cont("Parity error during data load.\n");
216 break;
218 case R4_EVICT:
219 pr_cont("Copyback Parity/Victim error.\n");
220 break;
222 case R4_SNOOP:
223 pr_cont("Tag Snoop error.\n");
224 break;
226 default:
227 ret = false;
228 break;
230 } else
231 ret = false;
233 return ret;
236 static bool f14h_ic_mce(u16 ec)
238 u8 ll = ec & 0x3;
239 u8 tt = (ec >> 2) & 0x3;
240 u8 r4 = (ec >> 4) & 0xf;
241 bool ret = true;
243 if (MEM_ERROR(ec)) {
244 if (tt != 0 || ll != 1)
245 ret = false;
247 if (r4 == R4_IRD)
248 pr_cont("Data/tag array parity error for a tag hit.\n");
249 else if (r4 == R4_SNOOP)
250 pr_cont("Tag error during snoop/victimization.\n");
251 else
252 ret = false;
254 return ret;
257 static void amd_decode_ic_mce(struct mce *m)
259 u16 ec = m->status & 0xffff;
260 u8 xec = (m->status >> 16) & 0xf;
262 pr_emerg(HW_ERR "Instruction Cache Error: ");
264 if (TLB_ERROR(ec))
265 pr_cont("%s TLB %s.\n", LL_MSG(ec),
266 (xec ? "multimatch" : "parity error"));
267 else if (BUS_ERROR(ec)) {
268 bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
270 pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
271 } else if (fam_ops->ic_mce(ec))
273 else
274 pr_emerg(HW_ERR "Corrupted IC MCE info?\n");
277 static void amd_decode_bu_mce(struct mce *m)
279 u32 ec = m->status & 0xffff;
280 u32 xec = (m->status >> 16) & 0xf;
282 pr_emerg(HW_ERR "Bus Unit Error");
284 if (xec == 0x1)
285 pr_cont(" in the write data buffers.\n");
286 else if (xec == 0x3)
287 pr_cont(" in the victim data buffers.\n");
288 else if (xec == 0x2 && MEM_ERROR(ec))
289 pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
290 else if (xec == 0x0) {
291 if (TLB_ERROR(ec))
292 pr_cont(": %s error in a Page Descriptor Cache or "
293 "Guest TLB.\n", TT_MSG(ec));
294 else if (BUS_ERROR(ec))
295 pr_cont(": %s/ECC error in data read from NB: %s.\n",
296 RRRR_MSG(ec), PP_MSG(ec));
297 else if (MEM_ERROR(ec)) {
298 u8 rrrr = (ec >> 4) & 0xf;
300 if (rrrr >= 0x7)
301 pr_cont(": %s error during data copyback.\n",
302 RRRR_MSG(ec));
303 else if (rrrr <= 0x1)
304 pr_cont(": %s parity/ECC error during data "
305 "access from L2.\n", RRRR_MSG(ec));
306 else
307 goto wrong_bu_mce;
308 } else
309 goto wrong_bu_mce;
310 } else
311 goto wrong_bu_mce;
313 return;
315 wrong_bu_mce:
316 pr_emerg(HW_ERR "Corrupted BU MCE info?\n");
319 static void amd_decode_ls_mce(struct mce *m)
321 u16 ec = m->status & 0xffff;
322 u8 xec = (m->status >> 16) & 0xf;
324 if (boot_cpu_data.x86 == 0x14) {
325 pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
326 " please report on LKML.\n");
327 return;
330 pr_emerg(HW_ERR "Load Store Error");
332 if (xec == 0x0) {
333 u8 r4 = (ec >> 4) & 0xf;
335 if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
336 goto wrong_ls_mce;
338 pr_cont(" during %s.\n", RRRR_MSG(ec));
339 } else
340 goto wrong_ls_mce;
342 return;
344 wrong_ls_mce:
345 pr_emerg(HW_ERR "Corrupted LS MCE info?\n");
348 static bool k8_nb_mce(u16 ec, u8 xec)
350 bool ret = true;
352 switch (xec) {
353 case 0x1:
354 pr_cont("CRC error detected on HT link.\n");
355 break;
357 case 0x5:
358 pr_cont("Invalid GART PTE entry during GART table walk.\n");
359 break;
361 case 0x6:
362 pr_cont("Unsupported atomic RMW received from an IO link.\n");
363 break;
365 case 0x0:
366 case 0x8:
367 if (boot_cpu_data.x86 == 0x11)
368 return false;
370 pr_cont("DRAM ECC error detected on the NB.\n");
371 break;
373 case 0xd:
374 pr_cont("Parity error on the DRAM addr/ctl signals.\n");
375 break;
377 default:
378 ret = false;
379 break;
382 return ret;
385 static bool f10h_nb_mce(u16 ec, u8 xec)
387 bool ret = true;
388 u8 offset = 0;
390 if (k8_nb_mce(ec, xec))
391 return true;
393 switch(xec) {
394 case 0xa ... 0xc:
395 offset = 10;
396 break;
398 case 0xe:
399 offset = 11;
400 break;
402 case 0xf:
403 if (TLB_ERROR(ec))
404 pr_cont("GART Table Walk data error.\n");
405 else if (BUS_ERROR(ec))
406 pr_cont("DMA Exclusion Vector Table Walk error.\n");
407 else
408 ret = false;
410 goto out;
411 break;
413 case 0x1c ... 0x1f:
414 offset = 24;
415 break;
417 default:
418 ret = false;
420 goto out;
421 break;
424 pr_cont("%s.\n", f10h_nb_mce_desc[xec - offset]);
426 out:
427 return ret;
430 static bool nb_noop_mce(u16 ec, u8 xec)
432 return false;
435 void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg)
437 u8 xec = (m->status >> 16) & 0x1f;
438 u16 ec = m->status & 0xffff;
439 u32 nbsh = (u32)(m->status >> 32);
441 pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id);
444 * F10h, revD can disable ErrCpu[3:0] so check that first and also the
445 * value encoding has changed so interpret those differently
447 if ((boot_cpu_data.x86 == 0x10) &&
448 (boot_cpu_data.x86_model > 7)) {
449 if (nbsh & K8_NBSH_ERR_CPU_VAL)
450 pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask));
451 } else {
452 u8 assoc_cpus = nbsh & nb_err_cpumask;
454 if (assoc_cpus > 0)
455 pr_cont(", core: %d", fls(assoc_cpus) - 1);
458 switch (xec) {
459 case 0x2:
460 pr_cont("Sync error (sync packets on HT link detected).\n");
461 return;
463 case 0x3:
464 pr_cont("HT Master abort.\n");
465 return;
467 case 0x4:
468 pr_cont("HT Target abort.\n");
469 return;
471 case 0x7:
472 pr_cont("NB Watchdog timeout.\n");
473 return;
475 case 0x9:
476 pr_cont("SVM DMA Exclusion Vector error.\n");
477 return;
479 default:
480 break;
483 if (!fam_ops->nb_mce(ec, xec))
484 goto wrong_nb_mce;
486 if (boot_cpu_data.x86 == 0xf || boot_cpu_data.x86 == 0x10)
487 if ((xec == 0x8 || xec == 0x0) && nb_bus_decoder)
488 nb_bus_decoder(node_id, m, nbcfg);
490 return;
492 wrong_nb_mce:
493 pr_emerg(HW_ERR "Corrupted NB MCE info?\n");
495 EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
497 static void amd_decode_fr_mce(struct mce *m)
499 if (boot_cpu_data.x86 == 0xf ||
500 boot_cpu_data.x86 == 0x11)
501 goto wrong_fr_mce;
503 /* we have only one error signature so match all fields at once. */
504 if ((m->status & 0xffff) == 0x0f0f) {
505 pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n");
506 return;
509 wrong_fr_mce:
510 pr_emerg(HW_ERR "Corrupted FR MCE info?\n");
513 static inline void amd_decode_err_code(u16 ec)
515 if (TLB_ERROR(ec)) {
516 pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n",
517 TT_MSG(ec), LL_MSG(ec));
518 } else if (MEM_ERROR(ec)) {
519 pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n",
520 RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
521 } else if (BUS_ERROR(ec)) {
522 pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, "
523 "Participating Processor: %s\n",
524 RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
525 PP_MSG(ec));
526 } else
527 pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec);
531 * Filter out unwanted MCE signatures here.
533 static bool amd_filter_mce(struct mce *m)
535 u8 xec = (m->status >> 16) & 0x1f;
538 * NB GART TLB error reporting is disabled by default.
540 if (m->bank == 4 && xec == 0x5 && !report_gart_errors)
541 return true;
543 return false;
546 int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
548 struct mce *m = (struct mce *)data;
549 int node, ecc;
551 if (amd_filter_mce(m))
552 return NOTIFY_STOP;
554 pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank);
556 pr_cont("%sorrected error, other errors lost: %s, "
557 "CPU context corrupt: %s",
558 ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
559 ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
560 ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
562 /* do the two bits[14:13] together */
563 ecc = (m->status >> 45) & 0x3;
564 if (ecc)
565 pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
567 pr_cont("\n");
569 switch (m->bank) {
570 case 0:
571 amd_decode_dc_mce(m);
572 break;
574 case 1:
575 amd_decode_ic_mce(m);
576 break;
578 case 2:
579 amd_decode_bu_mce(m);
580 break;
582 case 3:
583 amd_decode_ls_mce(m);
584 break;
586 case 4:
587 node = amd_get_nb_id(m->extcpu);
588 amd_decode_nb_mce(node, m, 0);
589 break;
591 case 5:
592 amd_decode_fr_mce(m);
593 break;
595 default:
596 break;
599 amd_decode_err_code(m->status & 0xffff);
601 return NOTIFY_STOP;
603 EXPORT_SYMBOL_GPL(amd_decode_mce);
605 static struct notifier_block amd_mce_dec_nb = {
606 .notifier_call = amd_decode_mce,
609 static int __init mce_amd_init(void)
611 if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
612 return 0;
614 if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) &&
615 (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf))
616 return 0;
618 fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
619 if (!fam_ops)
620 return -ENOMEM;
622 switch (boot_cpu_data.x86) {
623 case 0xf:
624 fam_ops->dc_mce = k8_dc_mce;
625 fam_ops->ic_mce = k8_ic_mce;
626 fam_ops->nb_mce = k8_nb_mce;
627 break;
629 case 0x10:
630 fam_ops->dc_mce = f10h_dc_mce;
631 fam_ops->ic_mce = k8_ic_mce;
632 fam_ops->nb_mce = f10h_nb_mce;
633 break;
635 case 0x11:
636 fam_ops->dc_mce = k8_dc_mce;
637 fam_ops->ic_mce = k8_ic_mce;
638 fam_ops->nb_mce = f10h_nb_mce;
639 break;
641 case 0x12:
642 fam_ops->dc_mce = f12h_dc_mce;
643 fam_ops->ic_mce = k8_ic_mce;
644 fam_ops->nb_mce = nb_noop_mce;
645 break;
647 case 0x14:
648 nb_err_cpumask = 0x3;
649 fam_ops->dc_mce = f14h_dc_mce;
650 fam_ops->ic_mce = f14h_ic_mce;
651 fam_ops->nb_mce = nb_noop_mce;
652 break;
654 default:
655 printk(KERN_WARNING "Huh? What family is that: %d?!\n",
656 boot_cpu_data.x86);
657 kfree(fam_ops);
658 return -EINVAL;
661 pr_info("MCE: In-kernel MCE decoding enabled.\n");
663 atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
665 return 0;
667 early_initcall(mce_amd_init);
669 #ifdef MODULE
670 static void __exit mce_amd_exit(void)
672 atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
673 kfree(fam_ops);
676 MODULE_DESCRIPTION("AMD MCE decoder");
677 MODULE_ALIAS("edac-mce-amd");
678 MODULE_LICENSE("GPL");
679 module_exit(mce_amd_exit);
680 #endif