i7core_edac: some fixes at error injection code
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / drivers / edac / i7core_edac.c
blob72859e87aeb293d2b5d487e5b4e93d694a257800
1 /* Intel 7 core Memory Controller kernel module (Nehalem)
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
9 * Red Hat Inc. http://www.redhat.com
11 * Forked and adapted from the i5400_edac driver
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
19 * also available at:
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/pci.h>
26 #include <linux/pci_ids.h>
27 #include <linux/slab.h>
28 #include <linux/edac.h>
29 #include <linux/mmzone.h>
30 #include <linux/edac_mce.h>
31 #include <linux/spinlock.h>
33 #include "edac_core.h"
36 * Alter this version for the module when modifications are made
38 #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
39 #define EDAC_MOD_STR "i7core_edac"
41 /* HACK: temporary, just to enable all logs, for now */
42 #undef debugf0
43 #define debugf0(fmt, arg...) edac_printk(KERN_INFO, "i7core", fmt, ##arg)
46 * Debug macros
48 #define i7core_printk(level, fmt, arg...) \
49 edac_printk(level, "i7core", fmt, ##arg)
51 #define i7core_mc_printk(mci, level, fmt, arg...) \
52 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
55 * i7core Memory Controller Registers
58 /* OFFSETS for Device 0 Function 0 */
60 #define MC_CFG_CONTROL 0x90
62 /* OFFSETS for Device 3 Function 0 */
64 #define MC_CONTROL 0x48
65 #define MC_STATUS 0x4c
66 #define MC_MAX_DOD 0x64
69 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
70 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
73 #define MC_TEST_ERR_RCV1 0x60
74 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
76 #define MC_TEST_ERR_RCV0 0x64
77 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
78 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
80 /* OFFSETS for Devices 4,5 and 6 Function 0 */
82 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
83 #define THREE_DIMMS_PRESENT (1 << 24)
84 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
85 #define QUAD_RANK_PRESENT (1 << 22)
86 #define REGISTERED_DIMM (1 << 15)
88 #define MC_CHANNEL_MAPPER 0x60
89 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
90 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
92 #define MC_CHANNEL_RANK_PRESENT 0x7c
93 #define RANK_PRESENT_MASK 0xffff
95 #define MC_CHANNEL_ADDR_MATCH 0xf0
96 #define MC_CHANNEL_ERROR_MASK 0xf8
97 #define MC_CHANNEL_ERROR_INJECT 0xfc
98 #define INJECT_ADDR_PARITY 0x10
99 #define INJECT_ECC 0x08
100 #define MASK_CACHELINE 0x06
101 #define MASK_FULL_CACHELINE 0x06
102 #define MASK_MSB32_CACHELINE 0x04
103 #define MASK_LSB32_CACHELINE 0x02
104 #define NO_MASK_CACHELINE 0x00
105 #define REPEAT_EN 0x01
107 /* OFFSETS for Devices 4,5 and 6 Function 1 */
108 #define MC_DOD_CH_DIMM0 0x48
109 #define MC_DOD_CH_DIMM1 0x4c
110 #define MC_DOD_CH_DIMM2 0x50
111 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
112 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
113 #define DIMM_PRESENT_MASK (1 << 9)
114 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
115 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
116 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
117 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
118 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
119 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
120 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
121 #define MC_DOD_NUMCOL_MASK 3
122 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
124 #define MC_RANK_PRESENT 0x7c
126 #define MC_SAG_CH_0 0x80
127 #define MC_SAG_CH_1 0x84
128 #define MC_SAG_CH_2 0x88
129 #define MC_SAG_CH_3 0x8c
130 #define MC_SAG_CH_4 0x90
131 #define MC_SAG_CH_5 0x94
132 #define MC_SAG_CH_6 0x98
133 #define MC_SAG_CH_7 0x9c
135 #define MC_RIR_LIMIT_CH_0 0x40
136 #define MC_RIR_LIMIT_CH_1 0x44
137 #define MC_RIR_LIMIT_CH_2 0x48
138 #define MC_RIR_LIMIT_CH_3 0x4C
139 #define MC_RIR_LIMIT_CH_4 0x50
140 #define MC_RIR_LIMIT_CH_5 0x54
141 #define MC_RIR_LIMIT_CH_6 0x58
142 #define MC_RIR_LIMIT_CH_7 0x5C
143 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
145 #define MC_RIR_WAY_CH 0x80
146 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
147 #define MC_RIR_WAY_RANK_MASK 0x7
150 * i7core structs
153 #define NUM_CHANS 3
154 #define MAX_DIMMS 3 /* Max DIMMS per channel */
155 #define NUM_SOCKETS 2 /* Max number of MC sockets */
156 #define MAX_MCR_FUNC 4
157 #define MAX_CHAN_FUNC 3
159 struct i7core_info {
160 u32 mc_control;
161 u32 mc_status;
162 u32 max_dod;
163 u32 ch_map;
167 struct i7core_inject {
168 int enable;
170 u8 socket;
171 u32 section;
172 u32 type;
173 u32 eccmask;
175 /* Error address mask */
176 int channel, dimm, rank, bank, page, col;
179 struct i7core_channel {
180 u32 ranks;
181 u32 dimms;
184 struct pci_id_descr {
185 int dev;
186 int func;
187 int dev_id;
188 struct pci_dev *pdev[NUM_SOCKETS];
191 struct i7core_pvt {
192 struct pci_dev *pci_noncore[NUM_SOCKETS];
193 struct pci_dev *pci_mcr[NUM_SOCKETS][MAX_MCR_FUNC + 1];
194 struct pci_dev *pci_ch[NUM_SOCKETS][NUM_CHANS][MAX_CHAN_FUNC + 1];
196 struct i7core_info info;
197 struct i7core_inject inject;
198 struct i7core_channel channel[NUM_SOCKETS][NUM_CHANS];
200 int sockets; /* Number of sockets */
201 int channels; /* Number of active channels */
203 int ce_count_available[NUM_SOCKETS];
204 /* ECC corrected errors counts per dimm */
205 unsigned long ce_count[NUM_SOCKETS][MAX_DIMMS];
206 int last_ce_count[NUM_SOCKETS][MAX_DIMMS];
208 /* mcelog glue */
209 struct edac_mce edac_mce;
210 struct mce mce_entry[MCE_LOG_LEN];
211 unsigned mce_count;
212 spinlock_t mce_lock;
215 /* Device name and register DID (Device ID) */
216 struct i7core_dev_info {
217 const char *ctl_name; /* name for this device */
218 u16 fsb_mapping_errors; /* DID for the branchmap,control */
221 #define PCI_DESCR(device, function, device_id) \
222 .dev = (device), \
223 .func = (function), \
224 .dev_id = (device_id)
226 struct pci_id_descr pci_devs[] = {
227 /* Memory controller */
228 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
229 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
230 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS) }, /* if RDIMM is supported */
231 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
233 /* Channel 0 */
234 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
235 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
236 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
237 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
239 /* Channel 1 */
240 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
241 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
242 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
243 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
245 /* Channel 2 */
246 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
247 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
248 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
249 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
251 /* Generic Non-core registers */
253 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
254 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
255 * the probing code needs to test for the other address in case of
256 * failure of this one
258 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NOCORE) },
261 #define N_DEVS ARRAY_SIZE(pci_devs)
264 * pci_device_id table for which devices we are looking for
265 * This should match the first device at pci_devs table
267 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
268 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
269 {0,} /* 0 terminated list. */
273 /* Table of devices attributes supported by this driver */
274 static const struct i7core_dev_info i7core_devs[] = {
276 .ctl_name = "i7 Core",
277 .fsb_mapping_errors = PCI_DEVICE_ID_INTEL_I7_MCR,
281 static struct edac_pci_ctl_info *i7core_pci;
283 /****************************************************************************
284 Anciliary status routines
285 ****************************************************************************/
287 /* MC_CONTROL bits */
288 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
289 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
291 /* MC_STATUS bits */
292 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 3))
293 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
295 /* MC_MAX_DOD read functions */
296 static inline int numdimms(u32 dimms)
298 return (dimms & 0x3) + 1;
301 static inline int numrank(u32 rank)
303 static int ranks[4] = { 1, 2, 4, -EINVAL };
305 return ranks[rank & 0x3];
308 static inline int numbank(u32 bank)
310 static int banks[4] = { 4, 8, 16, -EINVAL };
312 return banks[bank & 0x3];
315 static inline int numrow(u32 row)
317 static int rows[8] = {
318 1 << 12, 1 << 13, 1 << 14, 1 << 15,
319 1 << 16, -EINVAL, -EINVAL, -EINVAL,
322 return rows[row & 0x7];
325 static inline int numcol(u32 col)
327 static int cols[8] = {
328 1 << 10, 1 << 11, 1 << 12, -EINVAL,
330 return cols[col & 0x3];
333 /****************************************************************************
334 Memory check routines
335 ****************************************************************************/
336 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
337 unsigned func)
339 int i;
341 for (i = 0; i < N_DEVS; i++) {
342 if (!pci_devs[i].pdev[socket])
343 continue;
345 if (PCI_SLOT(pci_devs[i].pdev[socket]->devfn) == slot &&
346 PCI_FUNC(pci_devs[i].pdev[socket]->devfn) == func) {
347 return pci_devs[i].pdev[socket];
351 return NULL;
355 * i7core_get_active_channels() - gets the number of channels and csrows
356 * @socket: Quick Path Interconnect socket
357 * @channels: Number of channels that will be returned
358 * @csrows: Number of csrows found
360 * Since EDAC core needs to know in advance the number of available channels
361 * and csrows, in order to allocate memory for csrows/channels, it is needed
362 * to run two similar steps. At the first step, implemented on this function,
363 * it checks the number of csrows/channels present at one socket.
364 * this is used in order to properly allocate the size of mci components.
366 * It should be noticed that none of the current available datasheets explain
367 * or even mention how csrows are seen by the memory controller. So, we need
368 * to add a fake description for csrows.
369 * So, this driver is attributing one DIMM memory for one csrow.
371 static int i7core_get_active_channels(u8 socket, unsigned *channels,
372 unsigned *csrows)
374 struct pci_dev *pdev = NULL;
375 int i, j;
376 u32 status, control;
378 *channels = 0;
379 *csrows = 0;
381 pdev = get_pdev_slot_func(socket, 3, 0);
382 if (!pdev) {
383 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
384 socket);
385 return -ENODEV;
388 /* Device 3 function 0 reads */
389 pci_read_config_dword(pdev, MC_STATUS, &status);
390 pci_read_config_dword(pdev, MC_CONTROL, &control);
392 for (i = 0; i < NUM_CHANS; i++) {
393 u32 dimm_dod[3];
394 /* Check if the channel is active */
395 if (!(control & (1 << (8 + i))))
396 continue;
398 /* Check if the channel is disabled */
399 if (status & (1 << i))
400 continue;
402 pdev = get_pdev_slot_func(socket, i + 4, 1);
403 if (!pdev) {
404 i7core_printk(KERN_ERR, "Couldn't find socket %d "
405 "fn %d.%d!!!\n",
406 socket, i + 4, 1);
407 return -ENODEV;
409 /* Devices 4-6 function 1 */
410 pci_read_config_dword(pdev,
411 MC_DOD_CH_DIMM0, &dimm_dod[0]);
412 pci_read_config_dword(pdev,
413 MC_DOD_CH_DIMM1, &dimm_dod[1]);
414 pci_read_config_dword(pdev,
415 MC_DOD_CH_DIMM2, &dimm_dod[2]);
417 (*channels)++;
419 for (j = 0; j < 3; j++) {
420 if (!DIMM_PRESENT(dimm_dod[j]))
421 continue;
422 (*csrows)++;
426 debugf0("Number of active channels on socket %d: %d\n",
427 socket, *channels);
429 return 0;
432 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow, u8 socket)
434 struct i7core_pvt *pvt = mci->pvt_info;
435 struct csrow_info *csr;
436 struct pci_dev *pdev;
437 int i, j;
438 unsigned long last_page = 0;
439 enum edac_type mode;
440 enum mem_type mtype;
442 /* Get data from the MC register, function 0 */
443 pdev = pvt->pci_mcr[socket][0];
444 if (!pdev)
445 return -ENODEV;
447 /* Device 3 function 0 reads */
448 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
449 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
450 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
451 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
453 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
454 socket, pvt->info.mc_control, pvt->info.mc_status,
455 pvt->info.max_dod, pvt->info.ch_map);
457 if (ECC_ENABLED(pvt)) {
458 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
459 if (ECCx8(pvt))
460 mode = EDAC_S8ECD8ED;
461 else
462 mode = EDAC_S4ECD4ED;
463 } else {
464 debugf0("ECC disabled\n");
465 mode = EDAC_NONE;
468 /* FIXME: need to handle the error codes */
469 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
470 "x%x x 0x%x\n",
471 numdimms(pvt->info.max_dod),
472 numrank(pvt->info.max_dod >> 2),
473 numbank(pvt->info.max_dod >> 4),
474 numrow(pvt->info.max_dod >> 6),
475 numcol(pvt->info.max_dod >> 9));
477 for (i = 0; i < NUM_CHANS; i++) {
478 u32 data, dimm_dod[3], value[8];
480 if (!CH_ACTIVE(pvt, i)) {
481 debugf0("Channel %i is not active\n", i);
482 continue;
484 if (CH_DISABLED(pvt, i)) {
485 debugf0("Channel %i is disabled\n", i);
486 continue;
489 /* Devices 4-6 function 0 */
490 pci_read_config_dword(pvt->pci_ch[socket][i][0],
491 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
493 pvt->channel[socket][i].ranks = (data & QUAD_RANK_PRESENT) ?
494 4 : 2;
496 if (data & REGISTERED_DIMM)
497 mtype = MEM_RDDR3;
498 else
499 mtype = MEM_DDR3;
500 #if 0
501 if (data & THREE_DIMMS_PRESENT)
502 pvt->channel[i].dimms = 3;
503 else if (data & SINGLE_QUAD_RANK_PRESENT)
504 pvt->channel[i].dimms = 1;
505 else
506 pvt->channel[i].dimms = 2;
507 #endif
509 /* Devices 4-6 function 1 */
510 pci_read_config_dword(pvt->pci_ch[socket][i][1],
511 MC_DOD_CH_DIMM0, &dimm_dod[0]);
512 pci_read_config_dword(pvt->pci_ch[socket][i][1],
513 MC_DOD_CH_DIMM1, &dimm_dod[1]);
514 pci_read_config_dword(pvt->pci_ch[socket][i][1],
515 MC_DOD_CH_DIMM2, &dimm_dod[2]);
517 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
518 "%d ranks, %cDIMMs\n",
520 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
521 data,
522 pvt->channel[socket][i].ranks,
523 (data & REGISTERED_DIMM) ? 'R' : 'U');
525 for (j = 0; j < 3; j++) {
526 u32 banks, ranks, rows, cols;
527 u32 size, npages;
529 if (!DIMM_PRESENT(dimm_dod[j]))
530 continue;
532 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
533 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
534 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
535 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
537 /* DDR3 has 8 I/O banks */
538 size = (rows * cols * banks * ranks) >> (20 - 3);
540 pvt->channel[socket][i].dimms++;
542 debugf0("\tdimm %d %d Mb offset: %x, "
543 "bank: %d, rank: %d, row: %#x, col: %#x\n",
544 j, size,
545 RANKOFFSET(dimm_dod[j]),
546 banks, ranks, rows, cols);
548 #if PAGE_SHIFT > 20
549 npages = size >> (PAGE_SHIFT - 20);
550 #else
551 npages = size << (20 - PAGE_SHIFT);
552 #endif
554 csr = &mci->csrows[*csrow];
555 csr->first_page = last_page + 1;
556 last_page += npages;
557 csr->last_page = last_page;
558 csr->nr_pages = npages;
560 csr->page_mask = 0;
561 csr->grain = 8;
562 csr->csrow_idx = *csrow;
563 csr->nr_channels = 1;
565 csr->channels[0].chan_idx = i;
566 csr->channels[0].ce_count = 0;
568 switch (banks) {
569 case 4:
570 csr->dtype = DEV_X4;
571 break;
572 case 8:
573 csr->dtype = DEV_X8;
574 break;
575 case 16:
576 csr->dtype = DEV_X16;
577 break;
578 default:
579 csr->dtype = DEV_UNKNOWN;
582 csr->edac_mode = mode;
583 csr->mtype = mtype;
585 (*csrow)++;
588 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
589 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
590 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
591 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
592 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
593 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
594 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
595 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
596 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
597 for (j = 0; j < 8; j++)
598 debugf1("\t\t%#x\t%#x\t%#x\n",
599 (value[j] >> 27) & 0x1,
600 (value[j] >> 24) & 0x7,
601 (value[j] && ((1 << 24) - 1)));
604 return 0;
607 /****************************************************************************
608 Error insertion routines
609 ****************************************************************************/
611 /* The i7core has independent error injection features per channel.
612 However, to have a simpler code, we don't allow enabling error injection
613 on more than one channel.
614 Also, since a change at an inject parameter will be applied only at enable,
615 we're disabling error injection on all write calls to the sysfs nodes that
616 controls the error code injection.
618 static int disable_inject(struct mem_ctl_info *mci)
620 struct i7core_pvt *pvt = mci->pvt_info;
622 pvt->inject.enable = 0;
624 if (!pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0])
625 return -ENODEV;
627 pci_write_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
628 MC_CHANNEL_ERROR_MASK, 0);
630 return 0;
634 * i7core inject inject.socket
636 * accept and store error injection inject.socket value
638 static ssize_t i7core_inject_socket_store(struct mem_ctl_info *mci,
639 const char *data, size_t count)
641 struct i7core_pvt *pvt = mci->pvt_info;
642 unsigned long value;
643 int rc;
645 rc = strict_strtoul(data, 10, &value);
646 if ((rc < 0) || (value >= pvt->sockets))
647 return 0;
649 pvt->inject.section = (u32) value;
650 return count;
653 static ssize_t i7core_inject_socket_show(struct mem_ctl_info *mci,
654 char *data)
656 struct i7core_pvt *pvt = mci->pvt_info;
657 return sprintf(data, "%d\n", pvt->inject.socket);
661 * i7core inject inject.section
663 * accept and store error injection inject.section value
664 * bit 0 - refers to the lower 32-byte half cacheline
665 * bit 1 - refers to the upper 32-byte half cacheline
667 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
668 const char *data, size_t count)
670 struct i7core_pvt *pvt = mci->pvt_info;
671 unsigned long value;
672 int rc;
674 if (pvt->inject.enable)
675 disable_inject(mci);
677 rc = strict_strtoul(data, 10, &value);
678 if ((rc < 0) || (value > 3))
679 return 0;
681 pvt->inject.section = (u32) value;
682 return count;
685 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
686 char *data)
688 struct i7core_pvt *pvt = mci->pvt_info;
689 return sprintf(data, "0x%08x\n", pvt->inject.section);
693 * i7core inject.type
695 * accept and store error injection inject.section value
696 * bit 0 - repeat enable - Enable error repetition
697 * bit 1 - inject ECC error
698 * bit 2 - inject parity error
700 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
701 const char *data, size_t count)
703 struct i7core_pvt *pvt = mci->pvt_info;
704 unsigned long value;
705 int rc;
707 if (pvt->inject.enable)
708 disable_inject(mci);
710 rc = strict_strtoul(data, 10, &value);
711 if ((rc < 0) || (value > 7))
712 return 0;
714 pvt->inject.type = (u32) value;
715 return count;
718 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
719 char *data)
721 struct i7core_pvt *pvt = mci->pvt_info;
722 return sprintf(data, "0x%08x\n", pvt->inject.type);
726 * i7core_inject_inject.eccmask_store
728 * The type of error (UE/CE) will depend on the inject.eccmask value:
729 * Any bits set to a 1 will flip the corresponding ECC bit
730 * Correctable errors can be injected by flipping 1 bit or the bits within
731 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
732 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
733 * uncorrectable error to be injected.
735 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
736 const char *data, size_t count)
738 struct i7core_pvt *pvt = mci->pvt_info;
739 unsigned long value;
740 int rc;
742 if (pvt->inject.enable)
743 disable_inject(mci);
745 rc = strict_strtoul(data, 10, &value);
746 if (rc < 0)
747 return 0;
749 pvt->inject.eccmask = (u32) value;
750 return count;
753 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
754 char *data)
756 struct i7core_pvt *pvt = mci->pvt_info;
757 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
761 * i7core_addrmatch
763 * The type of error (UE/CE) will depend on the inject.eccmask value:
764 * Any bits set to a 1 will flip the corresponding ECC bit
765 * Correctable errors can be injected by flipping 1 bit or the bits within
766 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
767 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
768 * uncorrectable error to be injected.
770 static ssize_t i7core_inject_addrmatch_store(struct mem_ctl_info *mci,
771 const char *data, size_t count)
773 struct i7core_pvt *pvt = mci->pvt_info;
774 char *cmd, *val;
775 long value;
776 int rc;
778 if (pvt->inject.enable)
779 disable_inject(mci);
781 do {
782 cmd = strsep((char **) &data, ":");
783 if (!cmd)
784 break;
785 val = strsep((char **) &data, " \n\t");
786 if (!val)
787 return cmd - data;
789 if (!strcasecmp(val, "any"))
790 value = -1;
791 else {
792 rc = strict_strtol(val, 10, &value);
793 if ((rc < 0) || (value < 0))
794 return cmd - data;
797 if (!strcasecmp(cmd, "channel")) {
798 if (value < 3)
799 pvt->inject.channel = value;
800 else
801 return cmd - data;
802 } else if (!strcasecmp(cmd, "dimm")) {
803 if (value < 3)
804 pvt->inject.dimm = value;
805 else
806 return cmd - data;
807 } else if (!strcasecmp(cmd, "rank")) {
808 if (value < 4)
809 pvt->inject.rank = value;
810 else
811 return cmd - data;
812 } else if (!strcasecmp(cmd, "bank")) {
813 if (value < 32)
814 pvt->inject.bank = value;
815 else
816 return cmd - data;
817 } else if (!strcasecmp(cmd, "page")) {
818 if (value <= 0xffff)
819 pvt->inject.page = value;
820 else
821 return cmd - data;
822 } else if (!strcasecmp(cmd, "col") ||
823 !strcasecmp(cmd, "column")) {
824 if (value <= 0x3fff)
825 pvt->inject.col = value;
826 else
827 return cmd - data;
829 } while (1);
831 return count;
834 static ssize_t i7core_inject_addrmatch_show(struct mem_ctl_info *mci,
835 char *data)
837 struct i7core_pvt *pvt = mci->pvt_info;
838 char channel[4], dimm[4], bank[4], rank[4], page[7], col[7];
840 if (pvt->inject.channel < 0)
841 sprintf(channel, "any");
842 else
843 sprintf(channel, "%d", pvt->inject.channel);
844 if (pvt->inject.dimm < 0)
845 sprintf(dimm, "any");
846 else
847 sprintf(dimm, "%d", pvt->inject.dimm);
848 if (pvt->inject.bank < 0)
849 sprintf(bank, "any");
850 else
851 sprintf(bank, "%d", pvt->inject.bank);
852 if (pvt->inject.rank < 0)
853 sprintf(rank, "any");
854 else
855 sprintf(rank, "%d", pvt->inject.rank);
856 if (pvt->inject.page < 0)
857 sprintf(page, "any");
858 else
859 sprintf(page, "0x%04x", pvt->inject.page);
860 if (pvt->inject.col < 0)
861 sprintf(col, "any");
862 else
863 sprintf(col, "0x%04x", pvt->inject.col);
865 return sprintf(data, "channel: %s\ndimm: %s\nbank: %s\n"
866 "rank: %s\npage: %s\ncolumn: %s\n",
867 channel, dimm, bank, rank, page, col);
870 static int write_and_test(struct pci_dev *dev, int where, u32 val)
872 u32 read;
873 int count;
875 for (count = 0; count < 10; count++) {
876 if (count)
877 msleep (100);
878 pci_write_config_dword(dev, where, val);
879 pci_read_config_dword(dev, where, &read);
881 if (read == val)
882 return 0;
885 debugf0("Error Injection Register 0x%02x: Tried to write 0x%08x, "
886 "but read: 0x%08x\n", where, val, read);
888 return -EINVAL;
893 * This routine prepares the Memory Controller for error injection.
894 * The error will be injected when some process tries to write to the
895 * memory that matches the given criteria.
896 * The criteria can be set in terms of a mask where dimm, rank, bank, page
897 * and col can be specified.
898 * A -1 value for any of the mask items will make the MCU to ignore
899 * that matching criteria for error injection.
901 * It should be noticed that the error will only happen after a write operation
902 * on a memory that matches the condition. if REPEAT_EN is not enabled at
903 * inject mask, then it will produce just one error. Otherwise, it will repeat
904 * until the injectmask would be cleaned.
906 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
907 * is reliable enough to check if the MC is using the
908 * three channels. However, this is not clear at the datasheet.
910 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
911 const char *data, size_t count)
913 struct i7core_pvt *pvt = mci->pvt_info;
914 u32 injectmask;
915 u64 mask = 0;
916 int rc;
917 long enable;
919 if (!pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0])
920 return 0;
922 rc = strict_strtoul(data, 10, &enable);
923 if ((rc < 0))
924 return 0;
926 if (enable) {
927 pvt->inject.enable = 1;
928 } else {
929 disable_inject(mci);
930 return count;
933 /* Sets pvt->inject.dimm mask */
934 if (pvt->inject.dimm < 0)
935 mask |= 1L << 41;
936 else {
937 if (pvt->channel[pvt->inject.socket][pvt->inject.channel].dimms > 2)
938 mask |= (pvt->inject.dimm & 0x3L) << 35;
939 else
940 mask |= (pvt->inject.dimm & 0x1L) << 36;
943 /* Sets pvt->inject.rank mask */
944 if (pvt->inject.rank < 0)
945 mask |= 1L << 40;
946 else {
947 if (pvt->channel[pvt->inject.socket][pvt->inject.channel].dimms > 2)
948 mask |= (pvt->inject.rank & 0x1L) << 34;
949 else
950 mask |= (pvt->inject.rank & 0x3L) << 34;
953 /* Sets pvt->inject.bank mask */
954 if (pvt->inject.bank < 0)
955 mask |= 1L << 39;
956 else
957 mask |= (pvt->inject.bank & 0x15L) << 30;
959 /* Sets pvt->inject.page mask */
960 if (pvt->inject.page < 0)
961 mask |= 1L << 38;
962 else
963 mask |= (pvt->inject.page & 0xffffL) << 14;
965 /* Sets pvt->inject.column mask */
966 if (pvt->inject.col < 0)
967 mask |= 1L << 37;
968 else
969 mask |= (pvt->inject.col & 0x3fffL);
972 * bit 0: REPEAT_EN
973 * bits 1-2: MASK_HALF_CACHELINE
974 * bit 3: INJECT_ECC
975 * bit 4: INJECT_ADDR_PARITY
978 injectmask = (pvt->inject.type & 1) |
979 (pvt->inject.section & 0x3) << 1 |
980 (pvt->inject.type & 0x6) << (3 - 1);
982 /* Unlock writes to registers - this register is write only */
983 pci_write_config_dword(pvt->pci_noncore[pvt->inject.socket],
984 MC_CFG_CONTROL, 0x2);
986 #if 0
987 /* Zeroes error count registers */
988 pci_write_config_dword(pvt->pci_mcr[pvt->inject.socket][4],
989 MC_TEST_ERR_RCV1, 0);
990 pci_write_config_dword(pvt->pci_mcr[pvt->inject.socket][4],
991 MC_TEST_ERR_RCV0, 0);
992 pvt->ce_count_available[pvt->inject.socket] = 0;
993 #endif
995 write_and_test(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
996 MC_CHANNEL_ADDR_MATCH, mask);
997 write_and_test(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
998 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
1000 write_and_test(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
1001 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
1003 write_and_test(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
1004 MC_CHANNEL_ERROR_MASK, injectmask);
1007 * This is something undocumented, based on my tests
1008 * Without writing 8 to this register, errors aren't injected. Not sure
1009 * why.
1011 pci_write_config_dword(pvt->pci_noncore[pvt->inject.socket],
1012 MC_CFG_CONTROL, 8);
1014 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1015 " inject 0x%08x\n",
1016 mask, pvt->inject.eccmask, injectmask);
1019 return count;
1022 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
1023 char *data)
1025 struct i7core_pvt *pvt = mci->pvt_info;
1026 u32 injectmask;
1028 pci_read_config_dword(pvt->pci_ch[pvt->inject.socket][pvt->inject.channel][0],
1029 MC_CHANNEL_ERROR_MASK, &injectmask);
1031 debugf0("Inject error read: 0x%018x\n", injectmask);
1033 if (injectmask & 0x0c)
1034 pvt->inject.enable = 1;
1036 return sprintf(data, "%d\n", pvt->inject.enable);
1039 static ssize_t i7core_ce_regs_show(struct mem_ctl_info *mci, char *data)
1041 unsigned i, count, total = 0;
1042 struct i7core_pvt *pvt = mci->pvt_info;
1044 for (i = 0; i < pvt->sockets; i++) {
1045 if (!pvt->ce_count_available[i])
1046 count = sprintf(data, "socket 0 data unavailable\n");
1047 else
1048 count = sprintf(data, "socket %d, dimm0: %lu\n"
1049 "dimm1: %lu\ndimm2: %lu\n",
1051 pvt->ce_count[i][0],
1052 pvt->ce_count[i][1],
1053 pvt->ce_count[i][2]);
1054 data += count;
1055 total += count;
1058 return total;
1062 * Sysfs struct
1064 static struct mcidev_sysfs_attribute i7core_inj_attrs[] = {
1066 .attr = {
1067 .name = "inject_socket",
1068 .mode = (S_IRUGO | S_IWUSR)
1070 .show = i7core_inject_socket_show,
1071 .store = i7core_inject_socket_store,
1072 }, {
1073 .attr = {
1074 .name = "inject_section",
1075 .mode = (S_IRUGO | S_IWUSR)
1077 .show = i7core_inject_section_show,
1078 .store = i7core_inject_section_store,
1079 }, {
1080 .attr = {
1081 .name = "inject_type",
1082 .mode = (S_IRUGO | S_IWUSR)
1084 .show = i7core_inject_type_show,
1085 .store = i7core_inject_type_store,
1086 }, {
1087 .attr = {
1088 .name = "inject_eccmask",
1089 .mode = (S_IRUGO | S_IWUSR)
1091 .show = i7core_inject_eccmask_show,
1092 .store = i7core_inject_eccmask_store,
1093 }, {
1094 .attr = {
1095 .name = "inject_addrmatch",
1096 .mode = (S_IRUGO | S_IWUSR)
1098 .show = i7core_inject_addrmatch_show,
1099 .store = i7core_inject_addrmatch_store,
1100 }, {
1101 .attr = {
1102 .name = "inject_enable",
1103 .mode = (S_IRUGO | S_IWUSR)
1105 .show = i7core_inject_enable_show,
1106 .store = i7core_inject_enable_store,
1107 }, {
1108 .attr = {
1109 .name = "corrected_error_counts",
1110 .mode = (S_IRUGO | S_IWUSR)
1112 .show = i7core_ce_regs_show,
1113 .store = NULL,
1117 /****************************************************************************
1118 Device initialization routines: put/get, init/exit
1119 ****************************************************************************/
1122 * i7core_put_devices 'put' all the devices that we have
1123 * reserved via 'get'
1125 static void i7core_put_devices(void)
1127 int i, j;
1129 for (i = 0; i < NUM_SOCKETS; i++)
1130 for (j = 0; j < N_DEVS; j++)
1131 pci_dev_put(pci_devs[j].pdev[i]);
1135 * i7core_get_devices Find and perform 'get' operation on the MCH's
1136 * device/functions we want to reference for this driver
1138 * Need to 'get' device 16 func 1 and func 2
1140 int i7core_get_onedevice(struct pci_dev **prev, int devno)
1142 struct pci_dev *pdev = NULL;
1143 u8 bus = 0;
1144 u8 socket = 0;
1146 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1147 pci_devs[devno].dev_id, *prev);
1150 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1151 * aren't announced by acpi. So, we need to use a legacy scan probing
1152 * to detect them
1154 if (unlikely(!pdev && !devno && !prev)) {
1155 pcibios_scan_specific_bus(254);
1156 pcibios_scan_specific_bus(255);
1158 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1159 pci_devs[devno].dev_id, *prev);
1163 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1164 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1165 * to probe for the alternate address in case of failure
1167 if (pci_devs[devno].dev_id == PCI_DEVICE_ID_INTEL_I7_NOCORE && !pdev)
1168 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1169 PCI_DEVICE_ID_INTEL_I7_NOCORE_ALT, *prev);
1171 if (!pdev) {
1172 if (*prev) {
1173 *prev = pdev;
1174 return 0;
1178 * Dev 3 function 2 only exists on chips with RDIMMs
1179 * so, it is ok to not found it
1181 if ((pci_devs[devno].dev == 3) && (pci_devs[devno].func == 2)) {
1182 *prev = pdev;
1183 return 0;
1186 i7core_printk(KERN_ERR,
1187 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1188 pci_devs[devno].dev, pci_devs[devno].func,
1189 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1191 /* End of list, leave */
1192 return -ENODEV;
1194 bus = pdev->bus->number;
1196 if (bus == 0x3f)
1197 socket = 0;
1198 else
1199 socket = 255 - bus;
1201 if (socket >= NUM_SOCKETS) {
1202 i7core_printk(KERN_ERR,
1203 "Unexpected socket for "
1204 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1205 bus, pci_devs[devno].dev, pci_devs[devno].func,
1206 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1207 pci_dev_put(pdev);
1208 return -ENODEV;
1211 if (pci_devs[devno].pdev[socket]) {
1212 i7core_printk(KERN_ERR,
1213 "Duplicated device for "
1214 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1215 bus, pci_devs[devno].dev, pci_devs[devno].func,
1216 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1217 pci_dev_put(pdev);
1218 return -ENODEV;
1221 pci_devs[devno].pdev[socket] = pdev;
1223 /* Sanity check */
1224 if (unlikely(PCI_SLOT(pdev->devfn) != pci_devs[devno].dev ||
1225 PCI_FUNC(pdev->devfn) != pci_devs[devno].func)) {
1226 i7core_printk(KERN_ERR,
1227 "Device PCI ID %04x:%04x "
1228 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1229 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id,
1230 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1231 bus, pci_devs[devno].dev, pci_devs[devno].func);
1232 return -ENODEV;
1235 /* Be sure that the device is enabled */
1236 if (unlikely(pci_enable_device(pdev) < 0)) {
1237 i7core_printk(KERN_ERR,
1238 "Couldn't enable "
1239 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1240 bus, pci_devs[devno].dev, pci_devs[devno].func,
1241 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1242 return -ENODEV;
1245 i7core_printk(KERN_INFO,
1246 "Registered socket %d "
1247 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1248 socket, bus, pci_devs[devno].dev, pci_devs[devno].func,
1249 PCI_VENDOR_ID_INTEL, pci_devs[devno].dev_id);
1251 *prev = pdev;
1253 return 0;
1256 static int i7core_get_devices(void)
1258 int i;
1259 struct pci_dev *pdev = NULL;
1261 for (i = 0; i < N_DEVS; i++) {
1262 pdev = NULL;
1263 do {
1264 if (i7core_get_onedevice(&pdev, i) < 0) {
1265 i7core_put_devices();
1266 return -ENODEV;
1268 } while (pdev);
1270 return 0;
1273 static int mci_bind_devs(struct mem_ctl_info *mci)
1275 struct i7core_pvt *pvt = mci->pvt_info;
1276 struct pci_dev *pdev;
1277 int i, j, func, slot;
1279 for (i = 0; i < pvt->sockets; i++) {
1280 for (j = 0; j < N_DEVS; j++) {
1281 pdev = pci_devs[j].pdev[i];
1282 if (!pdev)
1283 continue;
1285 func = PCI_FUNC(pdev->devfn);
1286 slot = PCI_SLOT(pdev->devfn);
1287 if (slot == 3) {
1288 if (unlikely(func > MAX_MCR_FUNC))
1289 goto error;
1290 pvt->pci_mcr[i][func] = pdev;
1291 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1292 if (unlikely(func > MAX_CHAN_FUNC))
1293 goto error;
1294 pvt->pci_ch[i][slot - 4][func] = pdev;
1295 } else if (!slot && !func)
1296 pvt->pci_noncore[i] = pdev;
1297 else
1298 goto error;
1300 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1301 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1302 pdev, i);
1306 return 0;
1308 error:
1309 i7core_printk(KERN_ERR, "Device %d, function %d "
1310 "is out of the expected range\n",
1311 slot, func);
1312 return -EINVAL;
1315 /****************************************************************************
1316 Error check routines
1317 ****************************************************************************/
1319 /* This function is based on the device 3 function 4 registers as described on:
1320 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1321 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1322 * also available at:
1323 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1325 static void check_mc_test_err(struct mem_ctl_info *mci, u8 socket)
1327 struct i7core_pvt *pvt = mci->pvt_info;
1328 u32 rcv1, rcv0;
1329 int new0, new1, new2;
1331 if (!pvt->pci_mcr[socket][4]) {
1332 debugf0("%s MCR registers not found\n",__func__);
1333 return;
1336 /* Corrected error reads */
1337 pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV1, &rcv1);
1338 pci_read_config_dword(pvt->pci_mcr[socket][4], MC_TEST_ERR_RCV0, &rcv0);
1340 /* Store the new values */
1341 new2 = DIMM2_COR_ERR(rcv1);
1342 new1 = DIMM1_COR_ERR(rcv0);
1343 new0 = DIMM0_COR_ERR(rcv0);
1345 #if 0
1346 debugf2("%s CE rcv1=0x%08x rcv0=0x%08x, %d %d %d\n",
1347 (pvt->ce_count_available ? "UPDATE" : "READ"),
1348 rcv1, rcv0, new0, new1, new2);
1349 #endif
1351 /* Updates CE counters if it is not the first time here */
1352 if (pvt->ce_count_available[socket]) {
1353 /* Updates CE counters */
1354 int add0, add1, add2;
1356 add2 = new2 - pvt->last_ce_count[socket][2];
1357 add1 = new1 - pvt->last_ce_count[socket][1];
1358 add0 = new0 - pvt->last_ce_count[socket][0];
1360 if (add2 < 0)
1361 add2 += 0x7fff;
1362 pvt->ce_count[socket][2] += add2;
1364 if (add1 < 0)
1365 add1 += 0x7fff;
1366 pvt->ce_count[socket][1] += add1;
1368 if (add0 < 0)
1369 add0 += 0x7fff;
1370 pvt->ce_count[socket][0] += add0;
1371 } else
1372 pvt->ce_count_available[socket] = 1;
1374 /* Store the new values */
1375 pvt->last_ce_count[socket][2] = new2;
1376 pvt->last_ce_count[socket][1] = new1;
1377 pvt->last_ce_count[socket][0] = new0;
1381 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1382 * Architectures Software Developer’s Manual Volume 3B.
1383 * Nehalem are defined as family 0x06, model 0x1a
1385 * The MCA registers used here are the following ones:
1386 * struct mce field MCA Register
1387 * m->status MSR_IA32_MC8_STATUS
1388 * m->addr MSR_IA32_MC8_ADDR
1389 * m->misc MSR_IA32_MC8_MISC
1390 * In the case of Nehalem, the error information is masked at .status and .misc
1391 * fields
1393 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1394 struct mce *m)
1396 char *type, *optype, *err, *msg;
1397 unsigned long error = m->status & 0x1ff0000l;
1398 u32 optypenum = (m->status >> 4) & 0x07;
1399 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1400 u32 dimm = (m->misc >> 16) & 0x3;
1401 u32 channel = (m->misc >> 18) & 0x3;
1402 u32 syndrome = m->misc >> 32;
1403 u32 errnum = find_first_bit(&error, 32);
1405 if (m->mcgstatus & 1)
1406 type = "FATAL";
1407 else
1408 type = "NON_FATAL";
1410 switch (optypenum) {
1411 case 0:
1412 optype = "generic undef request";
1413 break;
1414 case 1:
1415 optype = "read error";
1416 break;
1417 case 2:
1418 optype = "write error";
1419 break;
1420 case 3:
1421 optype = "addr/cmd error";
1422 break;
1423 case 4:
1424 optype = "scrubbing error";
1425 break;
1426 default:
1427 optype = "reserved";
1428 break;
1431 switch (errnum) {
1432 case 16:
1433 err = "read ECC error";
1434 break;
1435 case 17:
1436 err = "RAS ECC error";
1437 break;
1438 case 18:
1439 err = "write parity error";
1440 break;
1441 case 19:
1442 err = "redundacy loss";
1443 break;
1444 case 20:
1445 err = "reserved";
1446 break;
1447 case 21:
1448 err = "memory range error";
1449 break;
1450 case 22:
1451 err = "RTID out of range";
1452 break;
1453 case 23:
1454 err = "address parity error";
1455 break;
1456 case 24:
1457 err = "byte enable parity error";
1458 break;
1459 default:
1460 err = "unknown";
1463 /* FIXME: should convert addr into bank and rank information */
1464 msg = kasprintf(GFP_ATOMIC,
1465 "%s (addr = 0x%08llx, socket=%d, Dimm=%d, Channel=%d, "
1466 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1467 type, (long long) m->addr, m->cpu, dimm, channel,
1468 syndrome, core_err_cnt, (long long)m->status,
1469 (long long)m->misc, optype, err);
1471 debugf0("%s", msg);
1473 /* Call the helper to output message */
1474 edac_mc_handle_fbd_ue(mci, 0 /* FIXME: should be rank here */,
1475 0, 0 /* FIXME: should be channel here */, msg);
1477 kfree(msg);
1481 * i7core_check_error Retrieve and process errors reported by the
1482 * hardware. Called by the Core module.
1484 static void i7core_check_error(struct mem_ctl_info *mci)
1486 struct i7core_pvt *pvt = mci->pvt_info;
1487 int i;
1488 unsigned count = 0;
1489 struct mce *m = NULL;
1490 unsigned long flags;
1492 /* Copy all mce errors into a temporary buffer */
1493 spin_lock_irqsave(&pvt->mce_lock, flags);
1494 if (pvt->mce_count) {
1495 m = kmalloc(sizeof(*m) * pvt->mce_count, GFP_ATOMIC);
1496 if (m) {
1497 count = pvt->mce_count;
1498 memcpy(m, &pvt->mce_entry, sizeof(*m) * count);
1500 pvt->mce_count = 0;
1502 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1504 /* proccess mcelog errors */
1505 for (i = 0; i < count; i++)
1506 i7core_mce_output_error(mci, &m[i]);
1508 kfree(m);
1510 /* check memory count errors */
1511 for (i = 0; i < pvt->sockets; i++)
1512 check_mc_test_err(mci, i);
1516 * i7core_mce_check_error Replicates mcelog routine to get errors
1517 * This routine simply queues mcelog errors, and
1518 * return. The error itself should be handled later
1519 * by i7core_check_error.
1521 static int i7core_mce_check_error(void *priv, struct mce *mce)
1523 struct mem_ctl_info *mci = priv;
1524 struct i7core_pvt *pvt = mci->pvt_info;
1525 unsigned long flags;
1528 * Just let mcelog handle it if the error is
1529 * outside the memory controller
1531 if (((mce->status & 0xffff) >> 7) != 1)
1532 return 0;
1534 /* Bank 8 registers are the only ones that we know how to handle */
1535 if (mce->bank != 8)
1536 return 0;
1538 spin_lock_irqsave(&pvt->mce_lock, flags);
1539 if (pvt->mce_count < MCE_LOG_LEN) {
1540 memcpy(&pvt->mce_entry[pvt->mce_count], mce, sizeof(*mce));
1541 pvt->mce_count++;
1543 spin_unlock_irqrestore(&pvt->mce_lock, flags);
1545 /* Handle fatal errors immediately */
1546 if (mce->mcgstatus & 1)
1547 i7core_check_error(mci);
1549 /* Advice mcelog that the error were handled */
1550 return 1;
1554 * i7core_probe Probe for ONE instance of device to see if it is
1555 * present.
1556 * return:
1557 * 0 for FOUND a device
1558 * < 0 for error code
1560 static int __devinit i7core_probe(struct pci_dev *pdev,
1561 const struct pci_device_id *id)
1563 struct mem_ctl_info *mci;
1564 struct i7core_pvt *pvt;
1565 int num_channels = 0;
1566 int num_csrows = 0;
1567 int csrow = 0;
1568 int dev_idx = id->driver_data;
1569 int rc, i;
1570 u8 sockets;
1572 if (unlikely(dev_idx >= ARRAY_SIZE(i7core_devs)))
1573 return -EINVAL;
1575 /* get the pci devices we want to reserve for our use */
1576 rc = i7core_get_devices();
1577 if (unlikely(rc < 0))
1578 return rc;
1580 sockets = 1;
1581 for (i = NUM_SOCKETS - 1; i > 0; i--)
1582 if (pci_devs[0].pdev[i]) {
1583 sockets = i + 1;
1584 break;
1587 for (i = 0; i < sockets; i++) {
1588 int channels;
1589 int csrows;
1591 /* Check the number of active and not disabled channels */
1592 rc = i7core_get_active_channels(i, &channels, &csrows);
1593 if (unlikely(rc < 0))
1594 goto fail0;
1596 num_channels += channels;
1597 num_csrows += csrows;
1600 /* allocate a new MC control structure */
1601 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels, 0);
1602 if (unlikely(!mci)) {
1603 rc = -ENOMEM;
1604 goto fail0;
1607 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1609 mci->dev = &pdev->dev; /* record ptr to the generic device */
1610 pvt = mci->pvt_info;
1611 memset(pvt, 0, sizeof(*pvt));
1612 pvt->sockets = sockets;
1613 mci->mc_idx = 0;
1616 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1617 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1618 * memory channels
1620 mci->mtype_cap = MEM_FLAG_DDR3;
1621 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1622 mci->edac_cap = EDAC_FLAG_NONE;
1623 mci->mod_name = "i7core_edac.c";
1624 mci->mod_ver = I7CORE_REVISION;
1625 mci->ctl_name = i7core_devs[dev_idx].ctl_name;
1626 mci->dev_name = pci_name(pdev);
1627 mci->ctl_page_to_phys = NULL;
1628 mci->mc_driver_sysfs_attributes = i7core_inj_attrs;
1629 /* Set the function pointer to an actual operation function */
1630 mci->edac_check = i7core_check_error;
1632 /* Store pci devices at mci for faster access */
1633 rc = mci_bind_devs(mci);
1634 if (unlikely(rc < 0))
1635 goto fail1;
1637 /* Get dimm basic config */
1638 for (i = 0; i < sockets; i++)
1639 get_dimm_config(mci, &csrow, i);
1641 /* add this new MC control structure to EDAC's list of MCs */
1642 if (unlikely(edac_mc_add_mc(mci))) {
1643 debugf0("MC: " __FILE__
1644 ": %s(): failed edac_mc_add_mc()\n", __func__);
1645 /* FIXME: perhaps some code should go here that disables error
1646 * reporting if we just enabled it
1649 rc = -EINVAL;
1650 goto fail1;
1653 /* allocating generic PCI control info */
1654 i7core_pci = edac_pci_create_generic_ctl(&pdev->dev, EDAC_MOD_STR);
1655 if (unlikely(!i7core_pci)) {
1656 printk(KERN_WARNING
1657 "%s(): Unable to create PCI control\n",
1658 __func__);
1659 printk(KERN_WARNING
1660 "%s(): PCI error report via EDAC not setup\n",
1661 __func__);
1664 /* Default error mask is any memory */
1665 pvt->inject.channel = 0;
1666 pvt->inject.dimm = -1;
1667 pvt->inject.rank = -1;
1668 pvt->inject.bank = -1;
1669 pvt->inject.page = -1;
1670 pvt->inject.col = -1;
1672 /* Registers on edac_mce in order to receive memory errors */
1673 pvt->edac_mce.priv = mci;
1674 pvt->edac_mce.check_error = i7core_mce_check_error;
1675 spin_lock_init(&pvt->mce_lock);
1677 rc = edac_mce_register(&pvt->edac_mce);
1678 if (unlikely (rc < 0)) {
1679 debugf0("MC: " __FILE__
1680 ": %s(): failed edac_mce_register()\n", __func__);
1681 goto fail1;
1684 i7core_printk(KERN_INFO, "Driver loaded.\n");
1686 return 0;
1688 fail1:
1689 edac_mc_free(mci);
1691 fail0:
1692 i7core_put_devices();
1693 return rc;
1697 * i7core_remove destructor for one instance of device
1700 static void __devexit i7core_remove(struct pci_dev *pdev)
1702 struct mem_ctl_info *mci;
1703 struct i7core_pvt *pvt;
1705 debugf0(__FILE__ ": %s()\n", __func__);
1707 if (i7core_pci)
1708 edac_pci_release_generic_ctl(i7core_pci);
1711 mci = edac_mc_del_mc(&pdev->dev);
1712 if (!mci)
1713 return;
1715 /* Unregisters on edac_mce in order to receive memory errors */
1716 pvt = mci->pvt_info;
1717 edac_mce_unregister(&pvt->edac_mce);
1719 /* retrieve references to resources, and free those resources */
1720 i7core_put_devices();
1722 edac_mc_free(mci);
1725 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1728 * i7core_driver pci_driver structure for this module
1731 static struct pci_driver i7core_driver = {
1732 .name = "i7core_edac",
1733 .probe = i7core_probe,
1734 .remove = __devexit_p(i7core_remove),
1735 .id_table = i7core_pci_tbl,
1739 * i7core_init Module entry function
1740 * Try to initialize this module for its devices
1742 static int __init i7core_init(void)
1744 int pci_rc;
1746 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1748 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1749 opstate_init();
1751 pci_rc = pci_register_driver(&i7core_driver);
1753 return (pci_rc < 0) ? pci_rc : 0;
1757 * i7core_exit() Module exit function
1758 * Unregister the driver
1760 static void __exit i7core_exit(void)
1762 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1763 pci_unregister_driver(&i7core_driver);
1766 module_init(i7core_init);
1767 module_exit(i7core_exit);
1769 MODULE_LICENSE("GPL");
1770 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1771 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1772 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1773 I7CORE_REVISION);
1775 module_param(edac_op_state, int, 0444);
1776 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");