i7core_edac: Use a more generic approach for probing PCI devices
[linux-2.6/linux-acpi-2.6/ibm-acpi-2.6.git] / drivers / edac / i7core_edac.c
blobb6fce2e38e3d852a306d09f1c181b780065814b2
1 /* Intel 7 core Memory Controller kernel module (Nehalem)
3 * This file may be distributed under the terms of the
4 * GNU General Public License version 2 only.
6 * Copyright (c) 2009 by:
7 * Mauro Carvalho Chehab <mchehab@redhat.com>
9 * Red Hat Inc. http://www.redhat.com
11 * Forked and adapted from the i5400_edac driver
13 * Based on the following public Intel datasheets:
14 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
15 * Datasheet, Volume 2:
16 * http://download.intel.com/design/processor/datashts/320835.pdf
17 * Intel Xeon Processor 5500 Series Datasheet Volume 2
18 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
19 * also available at:
20 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
23 #include <linux/module.h>
24 #include <linux/init.h>
25 #include <linux/pci.h>
26 #include <linux/pci_ids.h>
27 #include <linux/slab.h>
28 #include <linux/edac.h>
29 #include <linux/mmzone.h>
30 #include <linux/edac_mce.h>
31 #include <linux/smp.h>
32 #include <asm/processor.h>
34 #include "edac_core.h"
37 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
38 * registers start at bus 255, and are not reported by BIOS.
39 * We currently find devices with only 2 sockets. In order to support more QPI
40 * Quick Path Interconnect, just increment this number.
42 #define MAX_SOCKET_BUSES 2
46 * Alter this version for the module when modifications are made
48 #define I7CORE_REVISION " Ver: 1.0.0 " __DATE__
49 #define EDAC_MOD_STR "i7core_edac"
52 * Debug macros
54 #define i7core_printk(level, fmt, arg...) \
55 edac_printk(level, "i7core", fmt, ##arg)
57 #define i7core_mc_printk(mci, level, fmt, arg...) \
58 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
61 * i7core Memory Controller Registers
64 /* OFFSETS for Device 0 Function 0 */
66 #define MC_CFG_CONTROL 0x90
68 /* OFFSETS for Device 3 Function 0 */
70 #define MC_CONTROL 0x48
71 #define MC_STATUS 0x4c
72 #define MC_MAX_DOD 0x64
75 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
76 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
79 #define MC_TEST_ERR_RCV1 0x60
80 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
82 #define MC_TEST_ERR_RCV0 0x64
83 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
84 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
86 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
87 #define MC_COR_ECC_CNT_0 0x80
88 #define MC_COR_ECC_CNT_1 0x84
89 #define MC_COR_ECC_CNT_2 0x88
90 #define MC_COR_ECC_CNT_3 0x8c
91 #define MC_COR_ECC_CNT_4 0x90
92 #define MC_COR_ECC_CNT_5 0x94
94 #define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
95 #define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
98 /* OFFSETS for Devices 4,5 and 6 Function 0 */
100 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
101 #define THREE_DIMMS_PRESENT (1 << 24)
102 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
103 #define QUAD_RANK_PRESENT (1 << 22)
104 #define REGISTERED_DIMM (1 << 15)
106 #define MC_CHANNEL_MAPPER 0x60
107 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
108 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
110 #define MC_CHANNEL_RANK_PRESENT 0x7c
111 #define RANK_PRESENT_MASK 0xffff
113 #define MC_CHANNEL_ADDR_MATCH 0xf0
114 #define MC_CHANNEL_ERROR_MASK 0xf8
115 #define MC_CHANNEL_ERROR_INJECT 0xfc
116 #define INJECT_ADDR_PARITY 0x10
117 #define INJECT_ECC 0x08
118 #define MASK_CACHELINE 0x06
119 #define MASK_FULL_CACHELINE 0x06
120 #define MASK_MSB32_CACHELINE 0x04
121 #define MASK_LSB32_CACHELINE 0x02
122 #define NO_MASK_CACHELINE 0x00
123 #define REPEAT_EN 0x01
125 /* OFFSETS for Devices 4,5 and 6 Function 1 */
127 #define MC_DOD_CH_DIMM0 0x48
128 #define MC_DOD_CH_DIMM1 0x4c
129 #define MC_DOD_CH_DIMM2 0x50
130 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
131 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
132 #define DIMM_PRESENT_MASK (1 << 9)
133 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
134 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
135 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
136 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
137 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
138 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
139 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
140 #define MC_DOD_NUMCOL_MASK 3
141 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
143 #define MC_RANK_PRESENT 0x7c
145 #define MC_SAG_CH_0 0x80
146 #define MC_SAG_CH_1 0x84
147 #define MC_SAG_CH_2 0x88
148 #define MC_SAG_CH_3 0x8c
149 #define MC_SAG_CH_4 0x90
150 #define MC_SAG_CH_5 0x94
151 #define MC_SAG_CH_6 0x98
152 #define MC_SAG_CH_7 0x9c
154 #define MC_RIR_LIMIT_CH_0 0x40
155 #define MC_RIR_LIMIT_CH_1 0x44
156 #define MC_RIR_LIMIT_CH_2 0x48
157 #define MC_RIR_LIMIT_CH_3 0x4C
158 #define MC_RIR_LIMIT_CH_4 0x50
159 #define MC_RIR_LIMIT_CH_5 0x54
160 #define MC_RIR_LIMIT_CH_6 0x58
161 #define MC_RIR_LIMIT_CH_7 0x5C
162 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
164 #define MC_RIR_WAY_CH 0x80
165 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
166 #define MC_RIR_WAY_RANK_MASK 0x7
169 * i7core structs
172 #define NUM_CHANS 3
173 #define MAX_DIMMS 3 /* Max DIMMS per channel */
174 #define MAX_MCR_FUNC 4
175 #define MAX_CHAN_FUNC 3
177 struct i7core_info {
178 u32 mc_control;
179 u32 mc_status;
180 u32 max_dod;
181 u32 ch_map;
185 struct i7core_inject {
186 int enable;
188 u32 section;
189 u32 type;
190 u32 eccmask;
192 /* Error address mask */
193 int channel, dimm, rank, bank, page, col;
196 struct i7core_channel {
197 u32 ranks;
198 u32 dimms;
201 struct pci_id_descr {
202 int dev;
203 int func;
204 int dev_id;
205 int optional;
208 struct i7core_dev {
209 struct list_head list;
210 u8 socket;
211 struct pci_dev **pdev;
212 int n_devs;
213 struct mem_ctl_info *mci;
216 struct i7core_pvt {
217 struct pci_dev *pci_noncore;
218 struct pci_dev *pci_mcr[MAX_MCR_FUNC + 1];
219 struct pci_dev *pci_ch[NUM_CHANS][MAX_CHAN_FUNC + 1];
221 struct i7core_dev *i7core_dev;
223 struct i7core_info info;
224 struct i7core_inject inject;
225 struct i7core_channel channel[NUM_CHANS];
227 int channels; /* Number of active channels */
229 int ce_count_available;
230 int csrow_map[NUM_CHANS][MAX_DIMMS];
232 /* ECC corrected errors counts per udimm */
233 unsigned long udimm_ce_count[MAX_DIMMS];
234 int udimm_last_ce_count[MAX_DIMMS];
235 /* ECC corrected errors counts per rdimm */
236 unsigned long rdimm_ce_count[NUM_CHANS][MAX_DIMMS];
237 int rdimm_last_ce_count[NUM_CHANS][MAX_DIMMS];
239 unsigned int is_registered;
241 /* mcelog glue */
242 struct edac_mce edac_mce;
244 /* Fifo double buffers */
245 struct mce mce_entry[MCE_LOG_LEN];
246 struct mce mce_outentry[MCE_LOG_LEN];
248 /* Fifo in/out counters */
249 unsigned mce_in, mce_out;
251 /* Count indicator to show errors not got */
252 unsigned mce_overrun;
255 /* Static vars */
256 static LIST_HEAD(i7core_edac_list);
257 static DEFINE_MUTEX(i7core_edac_lock);
259 #define PCI_DESCR(device, function, device_id) \
260 .dev = (device), \
261 .func = (function), \
262 .dev_id = (device_id)
264 struct pci_id_descr pci_dev_descr_i7core[] = {
265 /* Memory controller */
266 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR) },
267 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD) },
268 /* Exists only for RDIMM */
269 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS), .optional = 1 },
270 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST) },
272 /* Channel 0 */
273 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL) },
274 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR) },
275 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK) },
276 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC) },
278 /* Channel 1 */
279 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL) },
280 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR) },
281 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK) },
282 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC) },
284 /* Channel 2 */
285 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL) },
286 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR) },
287 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK) },
288 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC) },
290 /* Generic Non-core registers */
292 * This is the PCI device on i7core and on Xeon 35xx (8086:2c41)
293 * On Xeon 55xx, however, it has a different id (8086:2c40). So,
294 * the probing code needs to test for the other address in case of
295 * failure of this one
297 { PCI_DESCR(0, 0, PCI_DEVICE_ID_INTEL_I7_NONCORE) },
302 * pci_device_id table for which devices we are looking for
304 static const struct pci_device_id i7core_pci_tbl[] __devinitdata = {
305 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT)},
306 {0,} /* 0 terminated list. */
309 static struct edac_pci_ctl_info *i7core_pci;
311 /****************************************************************************
312 Anciliary status routines
313 ****************************************************************************/
315 /* MC_CONTROL bits */
316 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
317 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
319 /* MC_STATUS bits */
320 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
321 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
323 /* MC_MAX_DOD read functions */
324 static inline int numdimms(u32 dimms)
326 return (dimms & 0x3) + 1;
329 static inline int numrank(u32 rank)
331 static int ranks[4] = { 1, 2, 4, -EINVAL };
333 return ranks[rank & 0x3];
336 static inline int numbank(u32 bank)
338 static int banks[4] = { 4, 8, 16, -EINVAL };
340 return banks[bank & 0x3];
343 static inline int numrow(u32 row)
345 static int rows[8] = {
346 1 << 12, 1 << 13, 1 << 14, 1 << 15,
347 1 << 16, -EINVAL, -EINVAL, -EINVAL,
350 return rows[row & 0x7];
353 static inline int numcol(u32 col)
355 static int cols[8] = {
356 1 << 10, 1 << 11, 1 << 12, -EINVAL,
358 return cols[col & 0x3];
361 static struct i7core_dev *get_i7core_dev(u8 socket)
363 struct i7core_dev *i7core_dev;
365 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
366 if (i7core_dev->socket == socket)
367 return i7core_dev;
370 return NULL;
373 /****************************************************************************
374 Memory check routines
375 ****************************************************************************/
376 static struct pci_dev *get_pdev_slot_func(u8 socket, unsigned slot,
377 unsigned func)
379 struct i7core_dev *i7core_dev = get_i7core_dev(socket);
380 int i;
382 if (!i7core_dev)
383 return NULL;
385 for (i = 0; i < i7core_dev->n_devs; i++) {
386 if (!i7core_dev->pdev[i])
387 continue;
389 if (PCI_SLOT(i7core_dev->pdev[i]->devfn) == slot &&
390 PCI_FUNC(i7core_dev->pdev[i]->devfn) == func) {
391 return i7core_dev->pdev[i];
395 return NULL;
399 * i7core_get_active_channels() - gets the number of channels and csrows
400 * @socket: Quick Path Interconnect socket
401 * @channels: Number of channels that will be returned
402 * @csrows: Number of csrows found
404 * Since EDAC core needs to know in advance the number of available channels
405 * and csrows, in order to allocate memory for csrows/channels, it is needed
406 * to run two similar steps. At the first step, implemented on this function,
407 * it checks the number of csrows/channels present at one socket.
408 * this is used in order to properly allocate the size of mci components.
410 * It should be noticed that none of the current available datasheets explain
411 * or even mention how csrows are seen by the memory controller. So, we need
412 * to add a fake description for csrows.
413 * So, this driver is attributing one DIMM memory for one csrow.
415 static int i7core_get_active_channels(u8 socket, unsigned *channels,
416 unsigned *csrows)
418 struct pci_dev *pdev = NULL;
419 int i, j;
420 u32 status, control;
422 *channels = 0;
423 *csrows = 0;
425 pdev = get_pdev_slot_func(socket, 3, 0);
426 if (!pdev) {
427 i7core_printk(KERN_ERR, "Couldn't find socket %d fn 3.0!!!\n",
428 socket);
429 return -ENODEV;
432 /* Device 3 function 0 reads */
433 pci_read_config_dword(pdev, MC_STATUS, &status);
434 pci_read_config_dword(pdev, MC_CONTROL, &control);
436 for (i = 0; i < NUM_CHANS; i++) {
437 u32 dimm_dod[3];
438 /* Check if the channel is active */
439 if (!(control & (1 << (8 + i))))
440 continue;
442 /* Check if the channel is disabled */
443 if (status & (1 << i))
444 continue;
446 pdev = get_pdev_slot_func(socket, i + 4, 1);
447 if (!pdev) {
448 i7core_printk(KERN_ERR, "Couldn't find socket %d "
449 "fn %d.%d!!!\n",
450 socket, i + 4, 1);
451 return -ENODEV;
453 /* Devices 4-6 function 1 */
454 pci_read_config_dword(pdev,
455 MC_DOD_CH_DIMM0, &dimm_dod[0]);
456 pci_read_config_dword(pdev,
457 MC_DOD_CH_DIMM1, &dimm_dod[1]);
458 pci_read_config_dword(pdev,
459 MC_DOD_CH_DIMM2, &dimm_dod[2]);
461 (*channels)++;
463 for (j = 0; j < 3; j++) {
464 if (!DIMM_PRESENT(dimm_dod[j]))
465 continue;
466 (*csrows)++;
470 debugf0("Number of active channels on socket %d: %d\n",
471 socket, *channels);
473 return 0;
476 static int get_dimm_config(struct mem_ctl_info *mci, int *csrow)
478 struct i7core_pvt *pvt = mci->pvt_info;
479 struct csrow_info *csr;
480 struct pci_dev *pdev;
481 int i, j;
482 unsigned long last_page = 0;
483 enum edac_type mode;
484 enum mem_type mtype;
486 /* Get data from the MC register, function 0 */
487 pdev = pvt->pci_mcr[0];
488 if (!pdev)
489 return -ENODEV;
491 /* Device 3 function 0 reads */
492 pci_read_config_dword(pdev, MC_CONTROL, &pvt->info.mc_control);
493 pci_read_config_dword(pdev, MC_STATUS, &pvt->info.mc_status);
494 pci_read_config_dword(pdev, MC_MAX_DOD, &pvt->info.max_dod);
495 pci_read_config_dword(pdev, MC_CHANNEL_MAPPER, &pvt->info.ch_map);
497 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
498 pvt->i7core_dev->socket, pvt->info.mc_control, pvt->info.mc_status,
499 pvt->info.max_dod, pvt->info.ch_map);
501 if (ECC_ENABLED(pvt)) {
502 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt) ? 8 : 4);
503 if (ECCx8(pvt))
504 mode = EDAC_S8ECD8ED;
505 else
506 mode = EDAC_S4ECD4ED;
507 } else {
508 debugf0("ECC disabled\n");
509 mode = EDAC_NONE;
512 /* FIXME: need to handle the error codes */
513 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
514 "x%x x 0x%x\n",
515 numdimms(pvt->info.max_dod),
516 numrank(pvt->info.max_dod >> 2),
517 numbank(pvt->info.max_dod >> 4),
518 numrow(pvt->info.max_dod >> 6),
519 numcol(pvt->info.max_dod >> 9));
521 for (i = 0; i < NUM_CHANS; i++) {
522 u32 data, dimm_dod[3], value[8];
524 if (!CH_ACTIVE(pvt, i)) {
525 debugf0("Channel %i is not active\n", i);
526 continue;
528 if (CH_DISABLED(pvt, i)) {
529 debugf0("Channel %i is disabled\n", i);
530 continue;
533 /* Devices 4-6 function 0 */
534 pci_read_config_dword(pvt->pci_ch[i][0],
535 MC_CHANNEL_DIMM_INIT_PARAMS, &data);
537 pvt->channel[i].ranks = (data & QUAD_RANK_PRESENT) ?
538 4 : 2;
540 if (data & REGISTERED_DIMM)
541 mtype = MEM_RDDR3;
542 else
543 mtype = MEM_DDR3;
544 #if 0
545 if (data & THREE_DIMMS_PRESENT)
546 pvt->channel[i].dimms = 3;
547 else if (data & SINGLE_QUAD_RANK_PRESENT)
548 pvt->channel[i].dimms = 1;
549 else
550 pvt->channel[i].dimms = 2;
551 #endif
553 /* Devices 4-6 function 1 */
554 pci_read_config_dword(pvt->pci_ch[i][1],
555 MC_DOD_CH_DIMM0, &dimm_dod[0]);
556 pci_read_config_dword(pvt->pci_ch[i][1],
557 MC_DOD_CH_DIMM1, &dimm_dod[1]);
558 pci_read_config_dword(pvt->pci_ch[i][1],
559 MC_DOD_CH_DIMM2, &dimm_dod[2]);
561 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
562 "%d ranks, %cDIMMs\n",
564 RDLCH(pvt->info.ch_map, i), WRLCH(pvt->info.ch_map, i),
565 data,
566 pvt->channel[i].ranks,
567 (data & REGISTERED_DIMM) ? 'R' : 'U');
569 for (j = 0; j < 3; j++) {
570 u32 banks, ranks, rows, cols;
571 u32 size, npages;
573 if (!DIMM_PRESENT(dimm_dod[j]))
574 continue;
576 banks = numbank(MC_DOD_NUMBANK(dimm_dod[j]));
577 ranks = numrank(MC_DOD_NUMRANK(dimm_dod[j]));
578 rows = numrow(MC_DOD_NUMROW(dimm_dod[j]));
579 cols = numcol(MC_DOD_NUMCOL(dimm_dod[j]));
581 /* DDR3 has 8 I/O banks */
582 size = (rows * cols * banks * ranks) >> (20 - 3);
584 pvt->channel[i].dimms++;
586 debugf0("\tdimm %d %d Mb offset: %x, "
587 "bank: %d, rank: %d, row: %#x, col: %#x\n",
588 j, size,
589 RANKOFFSET(dimm_dod[j]),
590 banks, ranks, rows, cols);
592 #if PAGE_SHIFT > 20
593 npages = size >> (PAGE_SHIFT - 20);
594 #else
595 npages = size << (20 - PAGE_SHIFT);
596 #endif
598 csr = &mci->csrows[*csrow];
599 csr->first_page = last_page + 1;
600 last_page += npages;
601 csr->last_page = last_page;
602 csr->nr_pages = npages;
604 csr->page_mask = 0;
605 csr->grain = 8;
606 csr->csrow_idx = *csrow;
607 csr->nr_channels = 1;
609 csr->channels[0].chan_idx = i;
610 csr->channels[0].ce_count = 0;
612 pvt->csrow_map[i][j] = *csrow;
614 switch (banks) {
615 case 4:
616 csr->dtype = DEV_X4;
617 break;
618 case 8:
619 csr->dtype = DEV_X8;
620 break;
621 case 16:
622 csr->dtype = DEV_X16;
623 break;
624 default:
625 csr->dtype = DEV_UNKNOWN;
628 csr->edac_mode = mode;
629 csr->mtype = mtype;
631 (*csrow)++;
634 pci_read_config_dword(pdev, MC_SAG_CH_0, &value[0]);
635 pci_read_config_dword(pdev, MC_SAG_CH_1, &value[1]);
636 pci_read_config_dword(pdev, MC_SAG_CH_2, &value[2]);
637 pci_read_config_dword(pdev, MC_SAG_CH_3, &value[3]);
638 pci_read_config_dword(pdev, MC_SAG_CH_4, &value[4]);
639 pci_read_config_dword(pdev, MC_SAG_CH_5, &value[5]);
640 pci_read_config_dword(pdev, MC_SAG_CH_6, &value[6]);
641 pci_read_config_dword(pdev, MC_SAG_CH_7, &value[7]);
642 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i);
643 for (j = 0; j < 8; j++)
644 debugf1("\t\t%#x\t%#x\t%#x\n",
645 (value[j] >> 27) & 0x1,
646 (value[j] >> 24) & 0x7,
647 (value[j] && ((1 << 24) - 1)));
650 return 0;
653 /****************************************************************************
654 Error insertion routines
655 ****************************************************************************/
657 /* The i7core has independent error injection features per channel.
658 However, to have a simpler code, we don't allow enabling error injection
659 on more than one channel.
660 Also, since a change at an inject parameter will be applied only at enable,
661 we're disabling error injection on all write calls to the sysfs nodes that
662 controls the error code injection.
664 static int disable_inject(struct mem_ctl_info *mci)
666 struct i7core_pvt *pvt = mci->pvt_info;
668 pvt->inject.enable = 0;
670 if (!pvt->pci_ch[pvt->inject.channel][0])
671 return -ENODEV;
673 pci_write_config_dword(pvt->pci_ch[pvt->inject.channel][0],
674 MC_CHANNEL_ERROR_INJECT, 0);
676 return 0;
680 * i7core inject inject.section
682 * accept and store error injection inject.section value
683 * bit 0 - refers to the lower 32-byte half cacheline
684 * bit 1 - refers to the upper 32-byte half cacheline
686 static ssize_t i7core_inject_section_store(struct mem_ctl_info *mci,
687 const char *data, size_t count)
689 struct i7core_pvt *pvt = mci->pvt_info;
690 unsigned long value;
691 int rc;
693 if (pvt->inject.enable)
694 disable_inject(mci);
696 rc = strict_strtoul(data, 10, &value);
697 if ((rc < 0) || (value > 3))
698 return -EIO;
700 pvt->inject.section = (u32) value;
701 return count;
704 static ssize_t i7core_inject_section_show(struct mem_ctl_info *mci,
705 char *data)
707 struct i7core_pvt *pvt = mci->pvt_info;
708 return sprintf(data, "0x%08x\n", pvt->inject.section);
712 * i7core inject.type
714 * accept and store error injection inject.section value
715 * bit 0 - repeat enable - Enable error repetition
716 * bit 1 - inject ECC error
717 * bit 2 - inject parity error
719 static ssize_t i7core_inject_type_store(struct mem_ctl_info *mci,
720 const char *data, size_t count)
722 struct i7core_pvt *pvt = mci->pvt_info;
723 unsigned long value;
724 int rc;
726 if (pvt->inject.enable)
727 disable_inject(mci);
729 rc = strict_strtoul(data, 10, &value);
730 if ((rc < 0) || (value > 7))
731 return -EIO;
733 pvt->inject.type = (u32) value;
734 return count;
737 static ssize_t i7core_inject_type_show(struct mem_ctl_info *mci,
738 char *data)
740 struct i7core_pvt *pvt = mci->pvt_info;
741 return sprintf(data, "0x%08x\n", pvt->inject.type);
745 * i7core_inject_inject.eccmask_store
747 * The type of error (UE/CE) will depend on the inject.eccmask value:
748 * Any bits set to a 1 will flip the corresponding ECC bit
749 * Correctable errors can be injected by flipping 1 bit or the bits within
750 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
751 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
752 * uncorrectable error to be injected.
754 static ssize_t i7core_inject_eccmask_store(struct mem_ctl_info *mci,
755 const char *data, size_t count)
757 struct i7core_pvt *pvt = mci->pvt_info;
758 unsigned long value;
759 int rc;
761 if (pvt->inject.enable)
762 disable_inject(mci);
764 rc = strict_strtoul(data, 10, &value);
765 if (rc < 0)
766 return -EIO;
768 pvt->inject.eccmask = (u32) value;
769 return count;
772 static ssize_t i7core_inject_eccmask_show(struct mem_ctl_info *mci,
773 char *data)
775 struct i7core_pvt *pvt = mci->pvt_info;
776 return sprintf(data, "0x%08x\n", pvt->inject.eccmask);
780 * i7core_addrmatch
782 * The type of error (UE/CE) will depend on the inject.eccmask value:
783 * Any bits set to a 1 will flip the corresponding ECC bit
784 * Correctable errors can be injected by flipping 1 bit or the bits within
785 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
786 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
787 * uncorrectable error to be injected.
790 #define DECLARE_ADDR_MATCH(param, limit) \
791 static ssize_t i7core_inject_store_##param( \
792 struct mem_ctl_info *mci, \
793 const char *data, size_t count) \
795 struct i7core_pvt *pvt; \
796 long value; \
797 int rc; \
799 debugf1("%s()\n", __func__); \
800 pvt = mci->pvt_info; \
802 if (pvt->inject.enable) \
803 disable_inject(mci); \
805 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
806 value = -1; \
807 else { \
808 rc = strict_strtoul(data, 10, &value); \
809 if ((rc < 0) || (value >= limit)) \
810 return -EIO; \
813 pvt->inject.param = value; \
815 return count; \
818 static ssize_t i7core_inject_show_##param( \
819 struct mem_ctl_info *mci, \
820 char *data) \
822 struct i7core_pvt *pvt; \
824 pvt = mci->pvt_info; \
825 debugf1("%s() pvt=%p\n", __func__, pvt); \
826 if (pvt->inject.param < 0) \
827 return sprintf(data, "any\n"); \
828 else \
829 return sprintf(data, "%d\n", pvt->inject.param);\
832 #define ATTR_ADDR_MATCH(param) \
834 .attr = { \
835 .name = #param, \
836 .mode = (S_IRUGO | S_IWUSR) \
837 }, \
838 .show = i7core_inject_show_##param, \
839 .store = i7core_inject_store_##param, \
842 DECLARE_ADDR_MATCH(channel, 3);
843 DECLARE_ADDR_MATCH(dimm, 3);
844 DECLARE_ADDR_MATCH(rank, 4);
845 DECLARE_ADDR_MATCH(bank, 32);
846 DECLARE_ADDR_MATCH(page, 0x10000);
847 DECLARE_ADDR_MATCH(col, 0x4000);
849 static int write_and_test(struct pci_dev *dev, int where, u32 val)
851 u32 read;
852 int count;
854 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
855 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
856 where, val);
858 for (count = 0; count < 10; count++) {
859 if (count)
860 msleep(100);
861 pci_write_config_dword(dev, where, val);
862 pci_read_config_dword(dev, where, &read);
864 if (read == val)
865 return 0;
868 i7core_printk(KERN_ERR, "Error during set pci %02x:%02x.%x reg=%02x "
869 "write=%08x. Read=%08x\n",
870 dev->bus->number, PCI_SLOT(dev->devfn), PCI_FUNC(dev->devfn),
871 where, val, read);
873 return -EINVAL;
877 * This routine prepares the Memory Controller for error injection.
878 * The error will be injected when some process tries to write to the
879 * memory that matches the given criteria.
880 * The criteria can be set in terms of a mask where dimm, rank, bank, page
881 * and col can be specified.
882 * A -1 value for any of the mask items will make the MCU to ignore
883 * that matching criteria for error injection.
885 * It should be noticed that the error will only happen after a write operation
886 * on a memory that matches the condition. if REPEAT_EN is not enabled at
887 * inject mask, then it will produce just one error. Otherwise, it will repeat
888 * until the injectmask would be cleaned.
890 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
891 * is reliable enough to check if the MC is using the
892 * three channels. However, this is not clear at the datasheet.
894 static ssize_t i7core_inject_enable_store(struct mem_ctl_info *mci,
895 const char *data, size_t count)
897 struct i7core_pvt *pvt = mci->pvt_info;
898 u32 injectmask;
899 u64 mask = 0;
900 int rc;
901 long enable;
903 if (!pvt->pci_ch[pvt->inject.channel][0])
904 return 0;
906 rc = strict_strtoul(data, 10, &enable);
907 if ((rc < 0))
908 return 0;
910 if (enable) {
911 pvt->inject.enable = 1;
912 } else {
913 disable_inject(mci);
914 return count;
917 /* Sets pvt->inject.dimm mask */
918 if (pvt->inject.dimm < 0)
919 mask |= 1L << 41;
920 else {
921 if (pvt->channel[pvt->inject.channel].dimms > 2)
922 mask |= (pvt->inject.dimm & 0x3L) << 35;
923 else
924 mask |= (pvt->inject.dimm & 0x1L) << 36;
927 /* Sets pvt->inject.rank mask */
928 if (pvt->inject.rank < 0)
929 mask |= 1L << 40;
930 else {
931 if (pvt->channel[pvt->inject.channel].dimms > 2)
932 mask |= (pvt->inject.rank & 0x1L) << 34;
933 else
934 mask |= (pvt->inject.rank & 0x3L) << 34;
937 /* Sets pvt->inject.bank mask */
938 if (pvt->inject.bank < 0)
939 mask |= 1L << 39;
940 else
941 mask |= (pvt->inject.bank & 0x15L) << 30;
943 /* Sets pvt->inject.page mask */
944 if (pvt->inject.page < 0)
945 mask |= 1L << 38;
946 else
947 mask |= (pvt->inject.page & 0xffffL) << 14;
949 /* Sets pvt->inject.column mask */
950 if (pvt->inject.col < 0)
951 mask |= 1L << 37;
952 else
953 mask |= (pvt->inject.col & 0x3fffL);
956 * bit 0: REPEAT_EN
957 * bits 1-2: MASK_HALF_CACHELINE
958 * bit 3: INJECT_ECC
959 * bit 4: INJECT_ADDR_PARITY
962 injectmask = (pvt->inject.type & 1) |
963 (pvt->inject.section & 0x3) << 1 |
964 (pvt->inject.type & 0x6) << (3 - 1);
966 /* Unlock writes to registers - this register is write only */
967 pci_write_config_dword(pvt->pci_noncore,
968 MC_CFG_CONTROL, 0x2);
970 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
971 MC_CHANNEL_ADDR_MATCH, mask);
972 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
973 MC_CHANNEL_ADDR_MATCH + 4, mask >> 32L);
975 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
976 MC_CHANNEL_ERROR_MASK, pvt->inject.eccmask);
978 write_and_test(pvt->pci_ch[pvt->inject.channel][0],
979 MC_CHANNEL_ERROR_INJECT, injectmask);
982 * This is something undocumented, based on my tests
983 * Without writing 8 to this register, errors aren't injected. Not sure
984 * why.
986 pci_write_config_dword(pvt->pci_noncore,
987 MC_CFG_CONTROL, 8);
989 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
990 " inject 0x%08x\n",
991 mask, pvt->inject.eccmask, injectmask);
994 return count;
997 static ssize_t i7core_inject_enable_show(struct mem_ctl_info *mci,
998 char *data)
1000 struct i7core_pvt *pvt = mci->pvt_info;
1001 u32 injectmask;
1003 pci_read_config_dword(pvt->pci_ch[pvt->inject.channel][0],
1004 MC_CHANNEL_ERROR_INJECT, &injectmask);
1006 debugf0("Inject error read: 0x%018x\n", injectmask);
1008 if (injectmask & 0x0c)
1009 pvt->inject.enable = 1;
1011 return sprintf(data, "%d\n", pvt->inject.enable);
1014 #define DECLARE_COUNTER(param) \
1015 static ssize_t i7core_show_counter_##param( \
1016 struct mem_ctl_info *mci, \
1017 char *data) \
1019 struct i7core_pvt *pvt = mci->pvt_info; \
1021 debugf1("%s() \n", __func__); \
1022 if (!pvt->ce_count_available || (pvt->is_registered)) \
1023 return sprintf(data, "data unavailable\n"); \
1024 return sprintf(data, "%lu\n", \
1025 pvt->udimm_ce_count[param]); \
1028 #define ATTR_COUNTER(param) \
1030 .attr = { \
1031 .name = __stringify(udimm##param), \
1032 .mode = (S_IRUGO | S_IWUSR) \
1033 }, \
1034 .show = i7core_show_counter_##param \
1037 DECLARE_COUNTER(0);
1038 DECLARE_COUNTER(1);
1039 DECLARE_COUNTER(2);
1042 * Sysfs struct
1046 static struct mcidev_sysfs_attribute i7core_addrmatch_attrs[] = {
1047 ATTR_ADDR_MATCH(channel),
1048 ATTR_ADDR_MATCH(dimm),
1049 ATTR_ADDR_MATCH(rank),
1050 ATTR_ADDR_MATCH(bank),
1051 ATTR_ADDR_MATCH(page),
1052 ATTR_ADDR_MATCH(col),
1053 { .attr = { .name = NULL } }
1056 static struct mcidev_sysfs_group i7core_inject_addrmatch = {
1057 .name = "inject_addrmatch",
1058 .mcidev_attr = i7core_addrmatch_attrs,
1061 static struct mcidev_sysfs_attribute i7core_udimm_counters_attrs[] = {
1062 ATTR_COUNTER(0),
1063 ATTR_COUNTER(1),
1064 ATTR_COUNTER(2),
1067 static struct mcidev_sysfs_group i7core_udimm_counters = {
1068 .name = "all_channel_counts",
1069 .mcidev_attr = i7core_udimm_counters_attrs,
1072 static struct mcidev_sysfs_attribute i7core_sysfs_attrs[] = {
1074 .attr = {
1075 .name = "inject_section",
1076 .mode = (S_IRUGO | S_IWUSR)
1078 .show = i7core_inject_section_show,
1079 .store = i7core_inject_section_store,
1080 }, {
1081 .attr = {
1082 .name = "inject_type",
1083 .mode = (S_IRUGO | S_IWUSR)
1085 .show = i7core_inject_type_show,
1086 .store = i7core_inject_type_store,
1087 }, {
1088 .attr = {
1089 .name = "inject_eccmask",
1090 .mode = (S_IRUGO | S_IWUSR)
1092 .show = i7core_inject_eccmask_show,
1093 .store = i7core_inject_eccmask_store,
1094 }, {
1095 .grp = &i7core_inject_addrmatch,
1096 }, {
1097 .attr = {
1098 .name = "inject_enable",
1099 .mode = (S_IRUGO | S_IWUSR)
1101 .show = i7core_inject_enable_show,
1102 .store = i7core_inject_enable_store,
1104 { .attr = { .name = NULL } }, /* Reserved for udimm counters */
1105 { .attr = { .name = NULL } }
1108 /****************************************************************************
1109 Device initialization routines: put/get, init/exit
1110 ****************************************************************************/
1113 * i7core_put_devices 'put' all the devices that we have
1114 * reserved via 'get'
1116 static void i7core_put_devices(struct i7core_dev *i7core_dev)
1118 int i;
1120 debugf0(__FILE__ ": %s()\n", __func__);
1121 for (i = 0; i < i7core_dev->n_devs; i++) {
1122 struct pci_dev *pdev = i7core_dev->pdev[i];
1123 if (!pdev)
1124 continue;
1125 debugf0("Removing dev %02x:%02x.%d\n",
1126 pdev->bus->number,
1127 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1128 pci_dev_put(pdev);
1130 kfree(i7core_dev->pdev);
1131 list_del(&i7core_dev->list);
1132 kfree(i7core_dev);
1135 static void i7core_put_all_devices(void)
1137 struct i7core_dev *i7core_dev, *tmp;
1139 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list)
1140 i7core_put_devices(i7core_dev);
1143 static void i7core_xeon_pci_fixup(int dev_id)
1145 struct pci_dev *pdev = NULL;
1146 int i;
1148 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core pci buses
1149 * aren't announced by acpi. So, we need to use a legacy scan probing
1150 * to detect them
1152 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, dev_id, NULL);
1153 if (unlikely(!pdev)) {
1154 for (i = 0; i < MAX_SOCKET_BUSES; i++)
1155 pcibios_scan_specific_bus(255-i);
1160 * i7core_get_devices Find and perform 'get' operation on the MCH's
1161 * device/functions we want to reference for this driver
1163 * Need to 'get' device 16 func 1 and func 2
1165 int i7core_get_onedevice(struct pci_dev **prev, int devno,
1166 struct pci_id_descr *dev_descr, unsigned n_devs)
1168 struct i7core_dev *i7core_dev;
1170 struct pci_dev *pdev = NULL;
1171 u8 bus = 0;
1172 u8 socket = 0;
1174 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1175 dev_descr->dev_id, *prev);
1178 * On Xeon 55xx, the Intel Quckpath Arch Generic Non-core regs
1179 * is at addr 8086:2c40, instead of 8086:2c41. So, we need
1180 * to probe for the alternate address in case of failure
1182 if (dev_descr->dev_id == PCI_DEVICE_ID_INTEL_I7_NONCORE && !pdev)
1183 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1184 PCI_DEVICE_ID_INTEL_I7_NONCORE_ALT, *prev);
1186 if (!pdev) {
1187 if (*prev) {
1188 *prev = pdev;
1189 return 0;
1192 if (dev_descr->optional)
1193 return 0;
1195 i7core_printk(KERN_ERR,
1196 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1197 dev_descr->dev, dev_descr->func,
1198 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1200 /* End of list, leave */
1201 return -ENODEV;
1203 bus = pdev->bus->number;
1205 if (bus == 0x3f)
1206 socket = 0;
1207 else
1208 socket = 255 - bus;
1210 i7core_dev = get_i7core_dev(socket);
1211 if (!i7core_dev) {
1212 i7core_dev = kzalloc(sizeof(*i7core_dev), GFP_KERNEL);
1213 if (!i7core_dev)
1214 return -ENOMEM;
1215 i7core_dev->pdev = kzalloc(sizeof(*i7core_dev->pdev) * n_devs,
1216 GFP_KERNEL);
1217 if (!i7core_dev->pdev)
1218 return -ENOMEM;
1219 i7core_dev->socket = socket;
1220 i7core_dev->n_devs = n_devs;
1221 list_add_tail(&i7core_dev->list, &i7core_edac_list);
1224 if (i7core_dev->pdev[devno]) {
1225 i7core_printk(KERN_ERR,
1226 "Duplicated device for "
1227 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1228 bus, dev_descr->dev, dev_descr->func,
1229 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1230 pci_dev_put(pdev);
1231 return -ENODEV;
1234 i7core_dev->pdev[devno] = pdev;
1236 /* Sanity check */
1237 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1238 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1239 i7core_printk(KERN_ERR,
1240 "Device PCI ID %04x:%04x "
1241 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1242 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1243 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1244 bus, dev_descr->dev, dev_descr->func);
1245 return -ENODEV;
1248 /* Be sure that the device is enabled */
1249 if (unlikely(pci_enable_device(pdev) < 0)) {
1250 i7core_printk(KERN_ERR,
1251 "Couldn't enable "
1252 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1253 bus, dev_descr->dev, dev_descr->func,
1254 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1255 return -ENODEV;
1258 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1259 socket, bus, dev_descr->dev,
1260 dev_descr->func,
1261 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1263 *prev = pdev;
1265 return 0;
1268 static int i7core_get_devices(struct pci_id_descr dev_descr[], unsigned n_devs)
1270 int i, rc;
1271 struct pci_dev *pdev = NULL;
1273 for (i = 0; i < n_devs; i++) {
1274 pdev = NULL;
1275 do {
1276 rc = i7core_get_onedevice(&pdev, i, &dev_descr[i],
1277 n_devs);
1278 if (rc < 0) {
1279 i7core_put_all_devices();
1280 return -ENODEV;
1282 } while (pdev);
1285 return 0;
1288 static int mci_bind_devs(struct mem_ctl_info *mci,
1289 struct i7core_dev *i7core_dev)
1291 struct i7core_pvt *pvt = mci->pvt_info;
1292 struct pci_dev *pdev;
1293 int i, func, slot;
1295 /* Associates i7core_dev and mci for future usage */
1296 pvt->i7core_dev = i7core_dev;
1297 i7core_dev->mci = mci;
1299 pvt->is_registered = 0;
1300 for (i = 0; i < i7core_dev->n_devs; i++) {
1301 pdev = i7core_dev->pdev[i];
1302 if (!pdev)
1303 continue;
1305 func = PCI_FUNC(pdev->devfn);
1306 slot = PCI_SLOT(pdev->devfn);
1307 if (slot == 3) {
1308 if (unlikely(func > MAX_MCR_FUNC))
1309 goto error;
1310 pvt->pci_mcr[func] = pdev;
1311 } else if (likely(slot >= 4 && slot < 4 + NUM_CHANS)) {
1312 if (unlikely(func > MAX_CHAN_FUNC))
1313 goto error;
1314 pvt->pci_ch[slot - 4][func] = pdev;
1315 } else if (!slot && !func)
1316 pvt->pci_noncore = pdev;
1317 else
1318 goto error;
1320 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1321 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1322 pdev, i7core_dev->socket);
1324 if (PCI_SLOT(pdev->devfn) == 3 &&
1325 PCI_FUNC(pdev->devfn) == 2)
1326 pvt->is_registered = 1;
1330 * Add extra nodes to count errors on udimm
1331 * For registered memory, this is not needed, since the counters
1332 * are already displayed at the standard locations
1334 if (!pvt->is_registered)
1335 i7core_sysfs_attrs[ARRAY_SIZE(i7core_sysfs_attrs)-2].grp =
1336 &i7core_udimm_counters;
1338 return 0;
1340 error:
1341 i7core_printk(KERN_ERR, "Device %d, function %d "
1342 "is out of the expected range\n",
1343 slot, func);
1344 return -EINVAL;
1347 /****************************************************************************
1348 Error check routines
1349 ****************************************************************************/
1350 static void i7core_rdimm_update_csrow(struct mem_ctl_info *mci,
1351 int chan, int dimm, int add)
1353 char *msg;
1354 struct i7core_pvt *pvt = mci->pvt_info;
1355 int row = pvt->csrow_map[chan][dimm], i;
1357 for (i = 0; i < add; i++) {
1358 msg = kasprintf(GFP_KERNEL, "Corrected error "
1359 "(Socket=%d channel=%d dimm=%d)",
1360 pvt->i7core_dev->socket, chan, dimm);
1362 edac_mc_handle_fbd_ce(mci, row, 0, msg);
1363 kfree (msg);
1367 static void i7core_rdimm_update_ce_count(struct mem_ctl_info *mci,
1368 int chan, int new0, int new1, int new2)
1370 struct i7core_pvt *pvt = mci->pvt_info;
1371 int add0 = 0, add1 = 0, add2 = 0;
1372 /* Updates CE counters if it is not the first time here */
1373 if (pvt->ce_count_available) {
1374 /* Updates CE counters */
1376 add2 = new2 - pvt->rdimm_last_ce_count[chan][2];
1377 add1 = new1 - pvt->rdimm_last_ce_count[chan][1];
1378 add0 = new0 - pvt->rdimm_last_ce_count[chan][0];
1380 if (add2 < 0)
1381 add2 += 0x7fff;
1382 pvt->rdimm_ce_count[chan][2] += add2;
1384 if (add1 < 0)
1385 add1 += 0x7fff;
1386 pvt->rdimm_ce_count[chan][1] += add1;
1388 if (add0 < 0)
1389 add0 += 0x7fff;
1390 pvt->rdimm_ce_count[chan][0] += add0;
1391 } else
1392 pvt->ce_count_available = 1;
1394 /* Store the new values */
1395 pvt->rdimm_last_ce_count[chan][2] = new2;
1396 pvt->rdimm_last_ce_count[chan][1] = new1;
1397 pvt->rdimm_last_ce_count[chan][0] = new0;
1399 /*updated the edac core */
1400 if (add0 != 0)
1401 i7core_rdimm_update_csrow(mci, chan, 0, add0);
1402 if (add1 != 0)
1403 i7core_rdimm_update_csrow(mci, chan, 1, add1);
1404 if (add2 != 0)
1405 i7core_rdimm_update_csrow(mci, chan, 2, add2);
1409 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1411 struct i7core_pvt *pvt = mci->pvt_info;
1412 u32 rcv[3][2];
1413 int i, new0, new1, new2;
1415 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
1416 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_0,
1417 &rcv[0][0]);
1418 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_1,
1419 &rcv[0][1]);
1420 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_2,
1421 &rcv[1][0]);
1422 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_3,
1423 &rcv[1][1]);
1424 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_4,
1425 &rcv[2][0]);
1426 pci_read_config_dword(pvt->pci_mcr[2], MC_COR_ECC_CNT_5,
1427 &rcv[2][1]);
1428 for (i = 0 ; i < 3; i++) {
1429 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1430 (i * 2), rcv[i][0], (i * 2) + 1, rcv[i][1]);
1431 /*if the channel has 3 dimms*/
1432 if (pvt->channel[i].dimms > 2) {
1433 new0 = DIMM_BOT_COR_ERR(rcv[i][0]);
1434 new1 = DIMM_TOP_COR_ERR(rcv[i][0]);
1435 new2 = DIMM_BOT_COR_ERR(rcv[i][1]);
1436 } else {
1437 new0 = DIMM_TOP_COR_ERR(rcv[i][0]) +
1438 DIMM_BOT_COR_ERR(rcv[i][0]);
1439 new1 = DIMM_TOP_COR_ERR(rcv[i][1]) +
1440 DIMM_BOT_COR_ERR(rcv[i][1]);
1441 new2 = 0;
1444 i7core_rdimm_update_ce_count(mci, i, new0, new1, new2);
1448 /* This function is based on the device 3 function 4 registers as described on:
1449 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1450 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1451 * also available at:
1452 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1454 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info *mci)
1456 struct i7core_pvt *pvt = mci->pvt_info;
1457 u32 rcv1, rcv0;
1458 int new0, new1, new2;
1460 if (!pvt->pci_mcr[4]) {
1461 debugf0("%s MCR registers not found\n", __func__);
1462 return;
1465 /* Corrected test errors */
1466 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV1, &rcv1);
1467 pci_read_config_dword(pvt->pci_mcr[4], MC_TEST_ERR_RCV0, &rcv0);
1469 /* Store the new values */
1470 new2 = DIMM2_COR_ERR(rcv1);
1471 new1 = DIMM1_COR_ERR(rcv0);
1472 new0 = DIMM0_COR_ERR(rcv0);
1474 /* Updates CE counters if it is not the first time here */
1475 if (pvt->ce_count_available) {
1476 /* Updates CE counters */
1477 int add0, add1, add2;
1479 add2 = new2 - pvt->udimm_last_ce_count[2];
1480 add1 = new1 - pvt->udimm_last_ce_count[1];
1481 add0 = new0 - pvt->udimm_last_ce_count[0];
1483 if (add2 < 0)
1484 add2 += 0x7fff;
1485 pvt->udimm_ce_count[2] += add2;
1487 if (add1 < 0)
1488 add1 += 0x7fff;
1489 pvt->udimm_ce_count[1] += add1;
1491 if (add0 < 0)
1492 add0 += 0x7fff;
1493 pvt->udimm_ce_count[0] += add0;
1495 if (add0 | add1 | add2)
1496 i7core_printk(KERN_ERR, "New Corrected error(s): "
1497 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1498 add0, add1, add2);
1499 } else
1500 pvt->ce_count_available = 1;
1502 /* Store the new values */
1503 pvt->udimm_last_ce_count[2] = new2;
1504 pvt->udimm_last_ce_count[1] = new1;
1505 pvt->udimm_last_ce_count[0] = new0;
1509 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1510 * Architectures Software Developer’s Manual Volume 3B.
1511 * Nehalem are defined as family 0x06, model 0x1a
1513 * The MCA registers used here are the following ones:
1514 * struct mce field MCA Register
1515 * m->status MSR_IA32_MC8_STATUS
1516 * m->addr MSR_IA32_MC8_ADDR
1517 * m->misc MSR_IA32_MC8_MISC
1518 * In the case of Nehalem, the error information is masked at .status and .misc
1519 * fields
1521 static void i7core_mce_output_error(struct mem_ctl_info *mci,
1522 struct mce *m)
1524 struct i7core_pvt *pvt = mci->pvt_info;
1525 char *type, *optype, *err, *msg;
1526 unsigned long error = m->status & 0x1ff0000l;
1527 u32 optypenum = (m->status >> 4) & 0x07;
1528 u32 core_err_cnt = (m->status >> 38) && 0x7fff;
1529 u32 dimm = (m->misc >> 16) & 0x3;
1530 u32 channel = (m->misc >> 18) & 0x3;
1531 u32 syndrome = m->misc >> 32;
1532 u32 errnum = find_first_bit(&error, 32);
1533 int csrow;
1535 if (m->mcgstatus & 1)
1536 type = "FATAL";
1537 else
1538 type = "NON_FATAL";
1540 switch (optypenum) {
1541 case 0:
1542 optype = "generic undef request";
1543 break;
1544 case 1:
1545 optype = "read error";
1546 break;
1547 case 2:
1548 optype = "write error";
1549 break;
1550 case 3:
1551 optype = "addr/cmd error";
1552 break;
1553 case 4:
1554 optype = "scrubbing error";
1555 break;
1556 default:
1557 optype = "reserved";
1558 break;
1561 switch (errnum) {
1562 case 16:
1563 err = "read ECC error";
1564 break;
1565 case 17:
1566 err = "RAS ECC error";
1567 break;
1568 case 18:
1569 err = "write parity error";
1570 break;
1571 case 19:
1572 err = "redundacy loss";
1573 break;
1574 case 20:
1575 err = "reserved";
1576 break;
1577 case 21:
1578 err = "memory range error";
1579 break;
1580 case 22:
1581 err = "RTID out of range";
1582 break;
1583 case 23:
1584 err = "address parity error";
1585 break;
1586 case 24:
1587 err = "byte enable parity error";
1588 break;
1589 default:
1590 err = "unknown";
1593 /* FIXME: should convert addr into bank and rank information */
1594 msg = kasprintf(GFP_ATOMIC,
1595 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1596 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1597 type, (long long) m->addr, m->cpu, dimm, channel,
1598 syndrome, core_err_cnt, (long long)m->status,
1599 (long long)m->misc, optype, err);
1601 debugf0("%s", msg);
1603 csrow = pvt->csrow_map[channel][dimm];
1605 /* Call the helper to output message */
1606 if (m->mcgstatus & 1)
1607 edac_mc_handle_fbd_ue(mci, csrow, 0,
1608 0 /* FIXME: should be channel here */, msg);
1609 else if (!pvt->is_registered)
1610 edac_mc_handle_fbd_ce(mci, csrow,
1611 0 /* FIXME: should be channel here */, msg);
1613 kfree(msg);
1617 * i7core_check_error Retrieve and process errors reported by the
1618 * hardware. Called by the Core module.
1620 static void i7core_check_error(struct mem_ctl_info *mci)
1622 struct i7core_pvt *pvt = mci->pvt_info;
1623 int i;
1624 unsigned count = 0;
1625 struct mce *m;
1628 * MCE first step: Copy all mce errors into a temporary buffer
1629 * We use a double buffering here, to reduce the risk of
1630 * loosing an error.
1632 smp_rmb();
1633 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1634 % MCE_LOG_LEN;
1635 if (!count)
1636 return;
1638 m = pvt->mce_outentry;
1639 if (pvt->mce_in + count > MCE_LOG_LEN) {
1640 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1642 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1643 smp_wmb();
1644 pvt->mce_in = 0;
1645 count -= l;
1646 m += l;
1648 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1649 smp_wmb();
1650 pvt->mce_in += count;
1652 smp_rmb();
1653 if (pvt->mce_overrun) {
1654 i7core_printk(KERN_ERR, "Lost %d memory errors\n",
1655 pvt->mce_overrun);
1656 smp_wmb();
1657 pvt->mce_overrun = 0;
1661 * MCE second step: parse errors and display
1663 for (i = 0; i < count; i++)
1664 i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
1667 * Now, let's increment CE error counts
1669 if (!pvt->is_registered)
1670 i7core_udimm_check_mc_ecc_err(mci);
1671 else
1672 i7core_rdimm_check_mc_ecc_err(mci);
1676 * i7core_mce_check_error Replicates mcelog routine to get errors
1677 * This routine simply queues mcelog errors, and
1678 * return. The error itself should be handled later
1679 * by i7core_check_error.
1680 * WARNING: As this routine should be called at NMI time, extra care should
1681 * be taken to avoid deadlocks, and to be as fast as possible.
1683 static int i7core_mce_check_error(void *priv, struct mce *mce)
1685 struct mem_ctl_info *mci = priv;
1686 struct i7core_pvt *pvt = mci->pvt_info;
1689 * Just let mcelog handle it if the error is
1690 * outside the memory controller
1692 if (((mce->status & 0xffff) >> 7) != 1)
1693 return 0;
1695 /* Bank 8 registers are the only ones that we know how to handle */
1696 if (mce->bank != 8)
1697 return 0;
1699 /* Only handle if it is the right mc controller */
1700 if (cpu_data(mce->cpu).phys_proc_id != pvt->i7core_dev->socket)
1701 return 0;
1703 smp_rmb();
1704 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1705 smp_wmb();
1706 pvt->mce_overrun++;
1707 return 0;
1710 /* Copy memory error at the ringbuffer */
1711 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1712 smp_wmb();
1713 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1715 /* Handle fatal errors immediately */
1716 if (mce->mcgstatus & 1)
1717 i7core_check_error(mci);
1719 /* Advice mcelog that the error were handled */
1720 return 1;
1723 static int i7core_register_mci(struct i7core_dev *i7core_dev,
1724 int num_channels, int num_csrows)
1726 struct mem_ctl_info *mci;
1727 struct i7core_pvt *pvt;
1728 int csrow = 0;
1729 int rc;
1731 /* allocate a new MC control structure */
1732 mci = edac_mc_alloc(sizeof(*pvt), num_csrows, num_channels,
1733 i7core_dev->socket);
1734 if (unlikely(!mci))
1735 return -ENOMEM;
1737 debugf0("MC: " __FILE__ ": %s(): mci = %p\n", __func__, mci);
1739 /* record ptr to the generic device */
1740 mci->dev = &i7core_dev->pdev[0]->dev;
1742 pvt = mci->pvt_info;
1743 memset(pvt, 0, sizeof(*pvt));
1746 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1747 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1748 * memory channels
1750 mci->mtype_cap = MEM_FLAG_DDR3;
1751 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1752 mci->edac_cap = EDAC_FLAG_NONE;
1753 mci->mod_name = "i7core_edac.c";
1754 mci->mod_ver = I7CORE_REVISION;
1755 mci->ctl_name = kasprintf(GFP_KERNEL, "i7 core #%d",
1756 i7core_dev->socket);
1757 mci->dev_name = pci_name(i7core_dev->pdev[0]);
1758 mci->ctl_page_to_phys = NULL;
1759 mci->mc_driver_sysfs_attributes = i7core_sysfs_attrs;
1760 /* Set the function pointer to an actual operation function */
1761 mci->edac_check = i7core_check_error;
1763 /* Store pci devices at mci for faster access */
1764 rc = mci_bind_devs(mci, i7core_dev);
1765 if (unlikely(rc < 0))
1766 goto fail;
1768 /* Get dimm basic config */
1769 get_dimm_config(mci, &csrow);
1771 /* add this new MC control structure to EDAC's list of MCs */
1772 if (unlikely(edac_mc_add_mc(mci))) {
1773 debugf0("MC: " __FILE__
1774 ": %s(): failed edac_mc_add_mc()\n", __func__);
1775 /* FIXME: perhaps some code should go here that disables error
1776 * reporting if we just enabled it
1779 rc = -EINVAL;
1780 goto fail;
1783 /* allocating generic PCI control info */
1784 i7core_pci = edac_pci_create_generic_ctl(&i7core_dev->pdev[0]->dev,
1785 EDAC_MOD_STR);
1786 if (unlikely(!i7core_pci)) {
1787 printk(KERN_WARNING
1788 "%s(): Unable to create PCI control\n",
1789 __func__);
1790 printk(KERN_WARNING
1791 "%s(): PCI error report via EDAC not setup\n",
1792 __func__);
1795 /* Default error mask is any memory */
1796 pvt->inject.channel = 0;
1797 pvt->inject.dimm = -1;
1798 pvt->inject.rank = -1;
1799 pvt->inject.bank = -1;
1800 pvt->inject.page = -1;
1801 pvt->inject.col = -1;
1803 /* Registers on edac_mce in order to receive memory errors */
1804 pvt->edac_mce.priv = mci;
1805 pvt->edac_mce.check_error = i7core_mce_check_error;
1807 rc = edac_mce_register(&pvt->edac_mce);
1808 if (unlikely(rc < 0)) {
1809 debugf0("MC: " __FILE__
1810 ": %s(): failed edac_mce_register()\n", __func__);
1813 fail:
1814 edac_mc_free(mci);
1815 return rc;
1819 * i7core_probe Probe for ONE instance of device to see if it is
1820 * present.
1821 * return:
1822 * 0 for FOUND a device
1823 * < 0 for error code
1825 static int __devinit i7core_probe(struct pci_dev *pdev,
1826 const struct pci_device_id *id)
1828 int dev_idx = id->driver_data;
1829 int rc;
1830 struct i7core_dev *i7core_dev;
1833 * All memory controllers are allocated at the first pass.
1835 if (unlikely(dev_idx >= 1))
1836 return -EINVAL;
1838 /* get the pci devices we want to reserve for our use */
1839 mutex_lock(&i7core_edac_lock);
1841 rc = i7core_get_devices(pci_dev_descr_i7core,
1842 ARRAY_SIZE(pci_dev_descr_i7core));
1843 if (unlikely(rc < 0))
1844 goto fail0;
1846 list_for_each_entry(i7core_dev, &i7core_edac_list, list) {
1847 int channels;
1848 int csrows;
1850 /* Check the number of active and not disabled channels */
1851 rc = i7core_get_active_channels(i7core_dev->socket,
1852 &channels, &csrows);
1853 if (unlikely(rc < 0))
1854 goto fail1;
1856 rc = i7core_register_mci(i7core_dev, channels, csrows);
1857 if (unlikely(rc < 0))
1858 goto fail1;
1861 i7core_printk(KERN_INFO, "Driver loaded.\n");
1863 mutex_unlock(&i7core_edac_lock);
1864 return 0;
1866 fail1:
1867 i7core_put_all_devices();
1868 fail0:
1869 mutex_unlock(&i7core_edac_lock);
1870 return rc;
1874 * i7core_remove destructor for one instance of device
1877 static void __devexit i7core_remove(struct pci_dev *pdev)
1879 struct mem_ctl_info *mci;
1880 struct i7core_dev *i7core_dev, *tmp;
1882 debugf0(__FILE__ ": %s()\n", __func__);
1884 if (i7core_pci)
1885 edac_pci_release_generic_ctl(i7core_pci);
1888 * we have a trouble here: pdev value for removal will be wrong, since
1889 * it will point to the X58 register used to detect that the machine
1890 * is a Nehalem or upper design. However, due to the way several PCI
1891 * devices are grouped together to provide MC functionality, we need
1892 * to use a different method for releasing the devices
1895 mutex_lock(&i7core_edac_lock);
1896 list_for_each_entry_safe(i7core_dev, tmp, &i7core_edac_list, list) {
1897 mci = edac_mc_del_mc(&i7core_dev->pdev[0]->dev);
1898 if (mci) {
1899 struct i7core_pvt *pvt = mci->pvt_info;
1901 i7core_dev = pvt->i7core_dev;
1902 edac_mce_unregister(&pvt->edac_mce);
1903 kfree(mci->ctl_name);
1904 edac_mc_free(mci);
1905 i7core_put_devices(i7core_dev);
1906 } else {
1907 i7core_printk(KERN_ERR,
1908 "Couldn't find mci for socket %d\n",
1909 i7core_dev->socket);
1912 mutex_unlock(&i7core_edac_lock);
1915 MODULE_DEVICE_TABLE(pci, i7core_pci_tbl);
1918 * i7core_driver pci_driver structure for this module
1921 static struct pci_driver i7core_driver = {
1922 .name = "i7core_edac",
1923 .probe = i7core_probe,
1924 .remove = __devexit_p(i7core_remove),
1925 .id_table = i7core_pci_tbl,
1929 * i7core_init Module entry function
1930 * Try to initialize this module for its devices
1932 static int __init i7core_init(void)
1934 int pci_rc;
1936 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1938 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1939 opstate_init();
1941 i7core_xeon_pci_fixup(pci_dev_descr_i7core[0].dev_id);
1943 pci_rc = pci_register_driver(&i7core_driver);
1945 if (pci_rc >= 0)
1946 return 0;
1948 i7core_printk(KERN_ERR, "Failed to register device with error %d.\n",
1949 pci_rc);
1951 return pci_rc;
1955 * i7core_exit() Module exit function
1956 * Unregister the driver
1958 static void __exit i7core_exit(void)
1960 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1961 pci_unregister_driver(&i7core_driver);
1964 module_init(i7core_init);
1965 module_exit(i7core_exit);
1967 MODULE_LICENSE("GPL");
1968 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1969 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1970 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
1971 I7CORE_REVISION);
1973 module_param(edac_op_state, int, 0444);
1974 MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");