1 /* Intel i7 core/Nehalem Memory Controller kernel module
3 * This driver supports the memory controllers found on the Intel
4 * processor families i7core, i7core 7xx/8xx, i5core, Xeon 35xx,
5 * Xeon 55xx and Xeon 56xx also known as Nehalem, Nehalem-EP, Lynnfield
8 * This file may be distributed under the terms of the
9 * GNU General Public License version 2 only.
11 * Copyright (c) 2009-2010 by:
12 * Mauro Carvalho Chehab <mchehab@redhat.com>
14 * Red Hat Inc. http://www.redhat.com
16 * Forked and adapted from the i5400_edac driver
18 * Based on the following public Intel datasheets:
19 * Intel Core i7 Processor Extreme Edition and Intel Core i7 Processor
20 * Datasheet, Volume 2:
21 * http://download.intel.com/design/processor/datashts/320835.pdf
22 * Intel Xeon Processor 5500 Series Datasheet Volume 2
23 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
25 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
28 #include <linux/module.h>
29 #include <linux/init.h>
30 #include <linux/pci.h>
31 #include <linux/pci_ids.h>
32 #include <linux/slab.h>
33 #include <linux/delay.h>
34 #include <linux/edac.h>
35 #include <linux/mmzone.h>
36 #include <linux/edac_mce.h>
37 #include <linux/smp.h>
38 #include <asm/processor.h>
40 #include "edac_core.h"
43 static LIST_HEAD(i7core_edac_list
);
44 static DEFINE_MUTEX(i7core_edac_lock
);
47 static int use_pci_fixup
;
48 module_param(use_pci_fixup
, int, 0444);
49 MODULE_PARM_DESC(use_pci_fixup
, "Enable PCI fixup to seek for hidden devices");
51 * This is used for Nehalem-EP and Nehalem-EX devices, where the non-core
52 * registers start at bus 255, and are not reported by BIOS.
53 * We currently find devices with only 2 sockets. In order to support more QPI
54 * Quick Path Interconnect, just increment this number.
56 #define MAX_SOCKET_BUSES 2
60 * Alter this version for the module when modifications are made
62 #define I7CORE_REVISION " Ver: 1.0.0"
63 #define EDAC_MOD_STR "i7core_edac"
68 #define i7core_printk(level, fmt, arg...) \
69 edac_printk(level, "i7core", fmt, ##arg)
71 #define i7core_mc_printk(mci, level, fmt, arg...) \
72 edac_mc_chipset_printk(mci, level, "i7core", fmt, ##arg)
75 * i7core Memory Controller Registers
78 /* OFFSETS for Device 0 Function 0 */
80 #define MC_CFG_CONTROL 0x90
82 /* OFFSETS for Device 3 Function 0 */
84 #define MC_CONTROL 0x48
85 #define MC_STATUS 0x4c
86 #define MC_MAX_DOD 0x64
89 * OFFSETS for Device 3 Function 4, as inicated on Xeon 5500 datasheet:
90 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
93 #define MC_TEST_ERR_RCV1 0x60
94 #define DIMM2_COR_ERR(r) ((r) & 0x7fff)
96 #define MC_TEST_ERR_RCV0 0x64
97 #define DIMM1_COR_ERR(r) (((r) >> 16) & 0x7fff)
98 #define DIMM0_COR_ERR(r) ((r) & 0x7fff)
100 /* OFFSETS for Device 3 Function 2, as inicated on Xeon 5500 datasheet */
101 #define MC_COR_ECC_CNT_0 0x80
102 #define MC_COR_ECC_CNT_1 0x84
103 #define MC_COR_ECC_CNT_2 0x88
104 #define MC_COR_ECC_CNT_3 0x8c
105 #define MC_COR_ECC_CNT_4 0x90
106 #define MC_COR_ECC_CNT_5 0x94
108 #define DIMM_TOP_COR_ERR(r) (((r) >> 16) & 0x7fff)
109 #define DIMM_BOT_COR_ERR(r) ((r) & 0x7fff)
112 /* OFFSETS for Devices 4,5 and 6 Function 0 */
114 #define MC_CHANNEL_DIMM_INIT_PARAMS 0x58
115 #define THREE_DIMMS_PRESENT (1 << 24)
116 #define SINGLE_QUAD_RANK_PRESENT (1 << 23)
117 #define QUAD_RANK_PRESENT (1 << 22)
118 #define REGISTERED_DIMM (1 << 15)
120 #define MC_CHANNEL_MAPPER 0x60
121 #define RDLCH(r, ch) ((((r) >> (3 + (ch * 6))) & 0x07) - 1)
122 #define WRLCH(r, ch) ((((r) >> (ch * 6)) & 0x07) - 1)
124 #define MC_CHANNEL_RANK_PRESENT 0x7c
125 #define RANK_PRESENT_MASK 0xffff
127 #define MC_CHANNEL_ADDR_MATCH 0xf0
128 #define MC_CHANNEL_ERROR_MASK 0xf8
129 #define MC_CHANNEL_ERROR_INJECT 0xfc
130 #define INJECT_ADDR_PARITY 0x10
131 #define INJECT_ECC 0x08
132 #define MASK_CACHELINE 0x06
133 #define MASK_FULL_CACHELINE 0x06
134 #define MASK_MSB32_CACHELINE 0x04
135 #define MASK_LSB32_CACHELINE 0x02
136 #define NO_MASK_CACHELINE 0x00
137 #define REPEAT_EN 0x01
139 /* OFFSETS for Devices 4,5 and 6 Function 1 */
141 #define MC_DOD_CH_DIMM0 0x48
142 #define MC_DOD_CH_DIMM1 0x4c
143 #define MC_DOD_CH_DIMM2 0x50
144 #define RANKOFFSET_MASK ((1 << 12) | (1 << 11) | (1 << 10))
145 #define RANKOFFSET(x) ((x & RANKOFFSET_MASK) >> 10)
146 #define DIMM_PRESENT_MASK (1 << 9)
147 #define DIMM_PRESENT(x) (((x) & DIMM_PRESENT_MASK) >> 9)
148 #define MC_DOD_NUMBANK_MASK ((1 << 8) | (1 << 7))
149 #define MC_DOD_NUMBANK(x) (((x) & MC_DOD_NUMBANK_MASK) >> 7)
150 #define MC_DOD_NUMRANK_MASK ((1 << 6) | (1 << 5))
151 #define MC_DOD_NUMRANK(x) (((x) & MC_DOD_NUMRANK_MASK) >> 5)
152 #define MC_DOD_NUMROW_MASK ((1 << 4) | (1 << 3) | (1 << 2))
153 #define MC_DOD_NUMROW(x) (((x) & MC_DOD_NUMROW_MASK) >> 2)
154 #define MC_DOD_NUMCOL_MASK 3
155 #define MC_DOD_NUMCOL(x) ((x) & MC_DOD_NUMCOL_MASK)
157 #define MC_RANK_PRESENT 0x7c
159 #define MC_SAG_CH_0 0x80
160 #define MC_SAG_CH_1 0x84
161 #define MC_SAG_CH_2 0x88
162 #define MC_SAG_CH_3 0x8c
163 #define MC_SAG_CH_4 0x90
164 #define MC_SAG_CH_5 0x94
165 #define MC_SAG_CH_6 0x98
166 #define MC_SAG_CH_7 0x9c
168 #define MC_RIR_LIMIT_CH_0 0x40
169 #define MC_RIR_LIMIT_CH_1 0x44
170 #define MC_RIR_LIMIT_CH_2 0x48
171 #define MC_RIR_LIMIT_CH_3 0x4C
172 #define MC_RIR_LIMIT_CH_4 0x50
173 #define MC_RIR_LIMIT_CH_5 0x54
174 #define MC_RIR_LIMIT_CH_6 0x58
175 #define MC_RIR_LIMIT_CH_7 0x5C
176 #define MC_RIR_LIMIT_MASK ((1 << 10) - 1)
178 #define MC_RIR_WAY_CH 0x80
179 #define MC_RIR_WAY_OFFSET_MASK (((1 << 14) - 1) & ~0x7)
180 #define MC_RIR_WAY_RANK_MASK 0x7
187 #define MAX_DIMMS 3 /* Max DIMMS per channel */
188 #define MAX_MCR_FUNC 4
189 #define MAX_CHAN_FUNC 3
199 struct i7core_inject
{
206 /* Error address mask */
207 int channel
, dimm
, rank
, bank
, page
, col
;
210 struct i7core_channel
{
215 struct pci_id_descr
{
222 struct pci_id_table
{
223 const struct pci_id_descr
*descr
;
228 struct list_head list
;
230 struct pci_dev
**pdev
;
232 struct mem_ctl_info
*mci
;
236 struct pci_dev
*pci_noncore
;
237 struct pci_dev
*pci_mcr
[MAX_MCR_FUNC
+ 1];
238 struct pci_dev
*pci_ch
[NUM_CHANS
][MAX_CHAN_FUNC
+ 1];
240 struct i7core_dev
*i7core_dev
;
242 struct i7core_info info
;
243 struct i7core_inject inject
;
244 struct i7core_channel channel
[NUM_CHANS
];
246 int ce_count_available
;
247 int csrow_map
[NUM_CHANS
][MAX_DIMMS
];
249 /* ECC corrected errors counts per udimm */
250 unsigned long udimm_ce_count
[MAX_DIMMS
];
251 int udimm_last_ce_count
[MAX_DIMMS
];
252 /* ECC corrected errors counts per rdimm */
253 unsigned long rdimm_ce_count
[NUM_CHANS
][MAX_DIMMS
];
254 int rdimm_last_ce_count
[NUM_CHANS
][MAX_DIMMS
];
256 unsigned int is_registered
;
259 struct edac_mce edac_mce
;
261 /* Fifo double buffers */
262 struct mce mce_entry
[MCE_LOG_LEN
];
263 struct mce mce_outentry
[MCE_LOG_LEN
];
265 /* Fifo in/out counters */
266 unsigned mce_in
, mce_out
;
268 /* Count indicator to show errors not got */
269 unsigned mce_overrun
;
271 /* Struct to control EDAC polling */
272 struct edac_pci_ctl_info
*i7core_pci
;
275 #define PCI_DESCR(device, function, device_id) \
277 .func = (function), \
278 .dev_id = (device_id)
280 static const struct pci_id_descr pci_dev_descr_i7core_nehalem
[] = {
281 /* Memory controller */
282 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_I7_MCR
) },
283 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_I7_MC_TAD
) },
285 /* Exists only for RDIMM */
286 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_I7_MC_RAS
), .optional
= 1 },
287 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_I7_MC_TEST
) },
290 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH0_CTRL
) },
291 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH0_ADDR
) },
292 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH0_RANK
) },
293 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH0_TC
) },
296 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH1_CTRL
) },
297 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH1_ADDR
) },
298 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH1_RANK
) },
299 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH1_TC
) },
302 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_I7_MC_CH2_CTRL
) },
303 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_I7_MC_CH2_ADDR
) },
304 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_I7_MC_CH2_RANK
) },
305 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_I7_MC_CH2_TC
) },
308 static const struct pci_id_descr pci_dev_descr_lynnfield
[] = {
309 { PCI_DESCR( 3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR
) },
310 { PCI_DESCR( 3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD
) },
311 { PCI_DESCR( 3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST
) },
313 { PCI_DESCR( 4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL
) },
314 { PCI_DESCR( 4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR
) },
315 { PCI_DESCR( 4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK
) },
316 { PCI_DESCR( 4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC
) },
318 { PCI_DESCR( 5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL
) },
319 { PCI_DESCR( 5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR
) },
320 { PCI_DESCR( 5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK
) },
321 { PCI_DESCR( 5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC
) },
324 static const struct pci_id_descr pci_dev_descr_i7core_westmere
[] = {
325 /* Memory controller */
326 { PCI_DESCR(3, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MCR_REV2
) },
327 { PCI_DESCR(3, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TAD_REV2
) },
328 /* Exists only for RDIMM */
329 { PCI_DESCR(3, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_RAS_REV2
), .optional
= 1 },
330 { PCI_DESCR(3, 4, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_TEST_REV2
) },
333 { PCI_DESCR(4, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_CTRL_REV2
) },
334 { PCI_DESCR(4, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_ADDR_REV2
) },
335 { PCI_DESCR(4, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_RANK_REV2
) },
336 { PCI_DESCR(4, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH0_TC_REV2
) },
339 { PCI_DESCR(5, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_CTRL_REV2
) },
340 { PCI_DESCR(5, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_ADDR_REV2
) },
341 { PCI_DESCR(5, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_RANK_REV2
) },
342 { PCI_DESCR(5, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH1_TC_REV2
) },
345 { PCI_DESCR(6, 0, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_CTRL_REV2
) },
346 { PCI_DESCR(6, 1, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_ADDR_REV2
) },
347 { PCI_DESCR(6, 2, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_RANK_REV2
) },
348 { PCI_DESCR(6, 3, PCI_DEVICE_ID_INTEL_LYNNFIELD_MC_CH2_TC_REV2
) },
351 #define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
352 static const struct pci_id_table pci_dev_table
[] = {
353 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_nehalem
),
354 PCI_ID_TABLE_ENTRY(pci_dev_descr_lynnfield
),
355 PCI_ID_TABLE_ENTRY(pci_dev_descr_i7core_westmere
),
356 {0,} /* 0 terminated list. */
360 * pci_device_id table for which devices we are looking for
362 static const struct pci_device_id i7core_pci_tbl
[] __devinitdata
= {
363 {PCI_DEVICE(PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_X58_HUB_MGMT
)},
364 {PCI_DEVICE(PCI_VENDOR_ID_INTEL
, PCI_DEVICE_ID_INTEL_LYNNFIELD_QPI_LINK0
)},
365 {0,} /* 0 terminated list. */
368 /****************************************************************************
369 Anciliary status routines
370 ****************************************************************************/
372 /* MC_CONTROL bits */
373 #define CH_ACTIVE(pvt, ch) ((pvt)->info.mc_control & (1 << (8 + ch)))
374 #define ECCx8(pvt) ((pvt)->info.mc_control & (1 << 1))
377 #define ECC_ENABLED(pvt) ((pvt)->info.mc_status & (1 << 4))
378 #define CH_DISABLED(pvt, ch) ((pvt)->info.mc_status & (1 << ch))
380 /* MC_MAX_DOD read functions */
381 static inline int numdimms(u32 dimms
)
383 return (dimms
& 0x3) + 1;
386 static inline int numrank(u32 rank
)
388 static int ranks
[4] = { 1, 2, 4, -EINVAL
};
390 return ranks
[rank
& 0x3];
393 static inline int numbank(u32 bank
)
395 static int banks
[4] = { 4, 8, 16, -EINVAL
};
397 return banks
[bank
& 0x3];
400 static inline int numrow(u32 row
)
402 static int rows
[8] = {
403 1 << 12, 1 << 13, 1 << 14, 1 << 15,
404 1 << 16, -EINVAL
, -EINVAL
, -EINVAL
,
407 return rows
[row
& 0x7];
410 static inline int numcol(u32 col
)
412 static int cols
[8] = {
413 1 << 10, 1 << 11, 1 << 12, -EINVAL
,
415 return cols
[col
& 0x3];
418 static struct i7core_dev
*get_i7core_dev(u8 socket
)
420 struct i7core_dev
*i7core_dev
;
422 list_for_each_entry(i7core_dev
, &i7core_edac_list
, list
) {
423 if (i7core_dev
->socket
== socket
)
430 static struct i7core_dev
*alloc_i7core_dev(u8 socket
,
431 const struct pci_id_table
*table
)
433 struct i7core_dev
*i7core_dev
;
435 i7core_dev
= kzalloc(sizeof(*i7core_dev
), GFP_KERNEL
);
439 i7core_dev
->pdev
= kzalloc(sizeof(*i7core_dev
->pdev
) * table
->n_devs
,
441 if (!i7core_dev
->pdev
) {
446 i7core_dev
->socket
= socket
;
447 i7core_dev
->n_devs
= table
->n_devs
;
448 list_add_tail(&i7core_dev
->list
, &i7core_edac_list
);
453 static void free_i7core_dev(struct i7core_dev
*i7core_dev
)
455 list_del(&i7core_dev
->list
);
456 kfree(i7core_dev
->pdev
);
460 /****************************************************************************
461 Memory check routines
462 ****************************************************************************/
463 static struct pci_dev
*get_pdev_slot_func(u8 socket
, unsigned slot
,
466 struct i7core_dev
*i7core_dev
= get_i7core_dev(socket
);
472 for (i
= 0; i
< i7core_dev
->n_devs
; i
++) {
473 if (!i7core_dev
->pdev
[i
])
476 if (PCI_SLOT(i7core_dev
->pdev
[i
]->devfn
) == slot
&&
477 PCI_FUNC(i7core_dev
->pdev
[i
]->devfn
) == func
) {
478 return i7core_dev
->pdev
[i
];
486 * i7core_get_active_channels() - gets the number of channels and csrows
487 * @socket: Quick Path Interconnect socket
488 * @channels: Number of channels that will be returned
489 * @csrows: Number of csrows found
491 * Since EDAC core needs to know in advance the number of available channels
492 * and csrows, in order to allocate memory for csrows/channels, it is needed
493 * to run two similar steps. At the first step, implemented on this function,
494 * it checks the number of csrows/channels present at one socket.
495 * this is used in order to properly allocate the size of mci components.
497 * It should be noticed that none of the current available datasheets explain
498 * or even mention how csrows are seen by the memory controller. So, we need
499 * to add a fake description for csrows.
500 * So, this driver is attributing one DIMM memory for one csrow.
502 static int i7core_get_active_channels(const u8 socket
, unsigned *channels
,
505 struct pci_dev
*pdev
= NULL
;
512 pdev
= get_pdev_slot_func(socket
, 3, 0);
514 i7core_printk(KERN_ERR
, "Couldn't find socket %d fn 3.0!!!\n",
519 /* Device 3 function 0 reads */
520 pci_read_config_dword(pdev
, MC_STATUS
, &status
);
521 pci_read_config_dword(pdev
, MC_CONTROL
, &control
);
523 for (i
= 0; i
< NUM_CHANS
; i
++) {
525 /* Check if the channel is active */
526 if (!(control
& (1 << (8 + i
))))
529 /* Check if the channel is disabled */
530 if (status
& (1 << i
))
533 pdev
= get_pdev_slot_func(socket
, i
+ 4, 1);
535 i7core_printk(KERN_ERR
, "Couldn't find socket %d "
540 /* Devices 4-6 function 1 */
541 pci_read_config_dword(pdev
,
542 MC_DOD_CH_DIMM0
, &dimm_dod
[0]);
543 pci_read_config_dword(pdev
,
544 MC_DOD_CH_DIMM1
, &dimm_dod
[1]);
545 pci_read_config_dword(pdev
,
546 MC_DOD_CH_DIMM2
, &dimm_dod
[2]);
550 for (j
= 0; j
< 3; j
++) {
551 if (!DIMM_PRESENT(dimm_dod
[j
]))
557 debugf0("Number of active channels on socket %d: %d\n",
563 static int get_dimm_config(const struct mem_ctl_info
*mci
)
565 struct i7core_pvt
*pvt
= mci
->pvt_info
;
566 struct csrow_info
*csr
;
567 struct pci_dev
*pdev
;
570 unsigned long last_page
= 0;
574 /* Get data from the MC register, function 0 */
575 pdev
= pvt
->pci_mcr
[0];
579 /* Device 3 function 0 reads */
580 pci_read_config_dword(pdev
, MC_CONTROL
, &pvt
->info
.mc_control
);
581 pci_read_config_dword(pdev
, MC_STATUS
, &pvt
->info
.mc_status
);
582 pci_read_config_dword(pdev
, MC_MAX_DOD
, &pvt
->info
.max_dod
);
583 pci_read_config_dword(pdev
, MC_CHANNEL_MAPPER
, &pvt
->info
.ch_map
);
585 debugf0("QPI %d control=0x%08x status=0x%08x dod=0x%08x map=0x%08x\n",
586 pvt
->i7core_dev
->socket
, pvt
->info
.mc_control
, pvt
->info
.mc_status
,
587 pvt
->info
.max_dod
, pvt
->info
.ch_map
);
589 if (ECC_ENABLED(pvt
)) {
590 debugf0("ECC enabled with x%d SDCC\n", ECCx8(pvt
) ? 8 : 4);
592 mode
= EDAC_S8ECD8ED
;
594 mode
= EDAC_S4ECD4ED
;
596 debugf0("ECC disabled\n");
600 /* FIXME: need to handle the error codes */
601 debugf0("DOD Max limits: DIMMS: %d, %d-ranked, %d-banked "
603 numdimms(pvt
->info
.max_dod
),
604 numrank(pvt
->info
.max_dod
>> 2),
605 numbank(pvt
->info
.max_dod
>> 4),
606 numrow(pvt
->info
.max_dod
>> 6),
607 numcol(pvt
->info
.max_dod
>> 9));
609 for (i
= 0; i
< NUM_CHANS
; i
++) {
610 u32 data
, dimm_dod
[3], value
[8];
612 if (!pvt
->pci_ch
[i
][0])
615 if (!CH_ACTIVE(pvt
, i
)) {
616 debugf0("Channel %i is not active\n", i
);
619 if (CH_DISABLED(pvt
, i
)) {
620 debugf0("Channel %i is disabled\n", i
);
624 /* Devices 4-6 function 0 */
625 pci_read_config_dword(pvt
->pci_ch
[i
][0],
626 MC_CHANNEL_DIMM_INIT_PARAMS
, &data
);
628 pvt
->channel
[i
].ranks
= (data
& QUAD_RANK_PRESENT
) ?
631 if (data
& REGISTERED_DIMM
)
636 if (data
& THREE_DIMMS_PRESENT
)
637 pvt
->channel
[i
].dimms
= 3;
638 else if (data
& SINGLE_QUAD_RANK_PRESENT
)
639 pvt
->channel
[i
].dimms
= 1;
641 pvt
->channel
[i
].dimms
= 2;
644 /* Devices 4-6 function 1 */
645 pci_read_config_dword(pvt
->pci_ch
[i
][1],
646 MC_DOD_CH_DIMM0
, &dimm_dod
[0]);
647 pci_read_config_dword(pvt
->pci_ch
[i
][1],
648 MC_DOD_CH_DIMM1
, &dimm_dod
[1]);
649 pci_read_config_dword(pvt
->pci_ch
[i
][1],
650 MC_DOD_CH_DIMM2
, &dimm_dod
[2]);
652 debugf0("Ch%d phy rd%d, wr%d (0x%08x): "
653 "%d ranks, %cDIMMs\n",
655 RDLCH(pvt
->info
.ch_map
, i
), WRLCH(pvt
->info
.ch_map
, i
),
657 pvt
->channel
[i
].ranks
,
658 (data
& REGISTERED_DIMM
) ? 'R' : 'U');
660 for (j
= 0; j
< 3; j
++) {
661 u32 banks
, ranks
, rows
, cols
;
664 if (!DIMM_PRESENT(dimm_dod
[j
]))
667 banks
= numbank(MC_DOD_NUMBANK(dimm_dod
[j
]));
668 ranks
= numrank(MC_DOD_NUMRANK(dimm_dod
[j
]));
669 rows
= numrow(MC_DOD_NUMROW(dimm_dod
[j
]));
670 cols
= numcol(MC_DOD_NUMCOL(dimm_dod
[j
]));
672 /* DDR3 has 8 I/O banks */
673 size
= (rows
* cols
* banks
* ranks
) >> (20 - 3);
675 pvt
->channel
[i
].dimms
++;
677 debugf0("\tdimm %d %d Mb offset: %x, "
678 "bank: %d, rank: %d, row: %#x, col: %#x\n",
680 RANKOFFSET(dimm_dod
[j
]),
681 banks
, ranks
, rows
, cols
);
683 npages
= MiB_TO_PAGES(size
);
685 csr
= &mci
->csrows
[csrow
];
686 csr
->first_page
= last_page
+ 1;
688 csr
->last_page
= last_page
;
689 csr
->nr_pages
= npages
;
693 csr
->csrow_idx
= csrow
;
694 csr
->nr_channels
= 1;
696 csr
->channels
[0].chan_idx
= i
;
697 csr
->channels
[0].ce_count
= 0;
699 pvt
->csrow_map
[i
][j
] = csrow
;
709 csr
->dtype
= DEV_X16
;
712 csr
->dtype
= DEV_UNKNOWN
;
715 csr
->edac_mode
= mode
;
721 pci_read_config_dword(pdev
, MC_SAG_CH_0
, &value
[0]);
722 pci_read_config_dword(pdev
, MC_SAG_CH_1
, &value
[1]);
723 pci_read_config_dword(pdev
, MC_SAG_CH_2
, &value
[2]);
724 pci_read_config_dword(pdev
, MC_SAG_CH_3
, &value
[3]);
725 pci_read_config_dword(pdev
, MC_SAG_CH_4
, &value
[4]);
726 pci_read_config_dword(pdev
, MC_SAG_CH_5
, &value
[5]);
727 pci_read_config_dword(pdev
, MC_SAG_CH_6
, &value
[6]);
728 pci_read_config_dword(pdev
, MC_SAG_CH_7
, &value
[7]);
729 debugf1("\t[%i] DIVBY3\tREMOVED\tOFFSET\n", i
);
730 for (j
= 0; j
< 8; j
++)
731 debugf1("\t\t%#x\t%#x\t%#x\n",
732 (value
[j
] >> 27) & 0x1,
733 (value
[j
] >> 24) & 0x7,
734 (value
[j
] && ((1 << 24) - 1)));
740 /****************************************************************************
741 Error insertion routines
742 ****************************************************************************/
744 /* The i7core has independent error injection features per channel.
745 However, to have a simpler code, we don't allow enabling error injection
746 on more than one channel.
747 Also, since a change at an inject parameter will be applied only at enable,
748 we're disabling error injection on all write calls to the sysfs nodes that
749 controls the error code injection.
751 static int disable_inject(const struct mem_ctl_info
*mci
)
753 struct i7core_pvt
*pvt
= mci
->pvt_info
;
755 pvt
->inject
.enable
= 0;
757 if (!pvt
->pci_ch
[pvt
->inject
.channel
][0])
760 pci_write_config_dword(pvt
->pci_ch
[pvt
->inject
.channel
][0],
761 MC_CHANNEL_ERROR_INJECT
, 0);
767 * i7core inject inject.section
769 * accept and store error injection inject.section value
770 * bit 0 - refers to the lower 32-byte half cacheline
771 * bit 1 - refers to the upper 32-byte half cacheline
773 static ssize_t
i7core_inject_section_store(struct mem_ctl_info
*mci
,
774 const char *data
, size_t count
)
776 struct i7core_pvt
*pvt
= mci
->pvt_info
;
780 if (pvt
->inject
.enable
)
783 rc
= strict_strtoul(data
, 10, &value
);
784 if ((rc
< 0) || (value
> 3))
787 pvt
->inject
.section
= (u32
) value
;
791 static ssize_t
i7core_inject_section_show(struct mem_ctl_info
*mci
,
794 struct i7core_pvt
*pvt
= mci
->pvt_info
;
795 return sprintf(data
, "0x%08x\n", pvt
->inject
.section
);
801 * accept and store error injection inject.section value
802 * bit 0 - repeat enable - Enable error repetition
803 * bit 1 - inject ECC error
804 * bit 2 - inject parity error
806 static ssize_t
i7core_inject_type_store(struct mem_ctl_info
*mci
,
807 const char *data
, size_t count
)
809 struct i7core_pvt
*pvt
= mci
->pvt_info
;
813 if (pvt
->inject
.enable
)
816 rc
= strict_strtoul(data
, 10, &value
);
817 if ((rc
< 0) || (value
> 7))
820 pvt
->inject
.type
= (u32
) value
;
824 static ssize_t
i7core_inject_type_show(struct mem_ctl_info
*mci
,
827 struct i7core_pvt
*pvt
= mci
->pvt_info
;
828 return sprintf(data
, "0x%08x\n", pvt
->inject
.type
);
832 * i7core_inject_inject.eccmask_store
834 * The type of error (UE/CE) will depend on the inject.eccmask value:
835 * Any bits set to a 1 will flip the corresponding ECC bit
836 * Correctable errors can be injected by flipping 1 bit or the bits within
837 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
838 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
839 * uncorrectable error to be injected.
841 static ssize_t
i7core_inject_eccmask_store(struct mem_ctl_info
*mci
,
842 const char *data
, size_t count
)
844 struct i7core_pvt
*pvt
= mci
->pvt_info
;
848 if (pvt
->inject
.enable
)
851 rc
= strict_strtoul(data
, 10, &value
);
855 pvt
->inject
.eccmask
= (u32
) value
;
859 static ssize_t
i7core_inject_eccmask_show(struct mem_ctl_info
*mci
,
862 struct i7core_pvt
*pvt
= mci
->pvt_info
;
863 return sprintf(data
, "0x%08x\n", pvt
->inject
.eccmask
);
869 * The type of error (UE/CE) will depend on the inject.eccmask value:
870 * Any bits set to a 1 will flip the corresponding ECC bit
871 * Correctable errors can be injected by flipping 1 bit or the bits within
872 * a symbol pair (2 consecutive aligned 8-bit pairs - i.e. 7:0 and 15:8 or
873 * 23:16 and 31:24). Flipping bits in two symbol pairs will cause an
874 * uncorrectable error to be injected.
877 #define DECLARE_ADDR_MATCH(param, limit) \
878 static ssize_t i7core_inject_store_##param( \
879 struct mem_ctl_info *mci, \
880 const char *data, size_t count) \
882 struct i7core_pvt *pvt; \
886 debugf1("%s()\n", __func__); \
887 pvt = mci->pvt_info; \
889 if (pvt->inject.enable) \
890 disable_inject(mci); \
892 if (!strcasecmp(data, "any") || !strcasecmp(data, "any\n"))\
895 rc = strict_strtoul(data, 10, &value); \
896 if ((rc < 0) || (value >= limit)) \
900 pvt->inject.param = value; \
905 static ssize_t i7core_inject_show_##param( \
906 struct mem_ctl_info *mci, \
909 struct i7core_pvt *pvt; \
911 pvt = mci->pvt_info; \
912 debugf1("%s() pvt=%p\n", __func__, pvt); \
913 if (pvt->inject.param < 0) \
914 return sprintf(data, "any\n"); \
916 return sprintf(data, "%d\n", pvt->inject.param);\
919 #define ATTR_ADDR_MATCH(param) \
923 .mode = (S_IRUGO | S_IWUSR) \
925 .show = i7core_inject_show_##param, \
926 .store = i7core_inject_store_##param, \
929 DECLARE_ADDR_MATCH(channel
, 3);
930 DECLARE_ADDR_MATCH(dimm
, 3);
931 DECLARE_ADDR_MATCH(rank
, 4);
932 DECLARE_ADDR_MATCH(bank
, 32);
933 DECLARE_ADDR_MATCH(page
, 0x10000);
934 DECLARE_ADDR_MATCH(col
, 0x4000);
936 static int write_and_test(struct pci_dev
*dev
, const int where
, const u32 val
)
941 debugf0("setting pci %02x:%02x.%x reg=%02x value=%08x\n",
942 dev
->bus
->number
, PCI_SLOT(dev
->devfn
), PCI_FUNC(dev
->devfn
),
945 for (count
= 0; count
< 10; count
++) {
948 pci_write_config_dword(dev
, where
, val
);
949 pci_read_config_dword(dev
, where
, &read
);
955 i7core_printk(KERN_ERR
, "Error during set pci %02x:%02x.%x reg=%02x "
956 "write=%08x. Read=%08x\n",
957 dev
->bus
->number
, PCI_SLOT(dev
->devfn
), PCI_FUNC(dev
->devfn
),
964 * This routine prepares the Memory Controller for error injection.
965 * The error will be injected when some process tries to write to the
966 * memory that matches the given criteria.
967 * The criteria can be set in terms of a mask where dimm, rank, bank, page
968 * and col can be specified.
969 * A -1 value for any of the mask items will make the MCU to ignore
970 * that matching criteria for error injection.
972 * It should be noticed that the error will only happen after a write operation
973 * on a memory that matches the condition. if REPEAT_EN is not enabled at
974 * inject mask, then it will produce just one error. Otherwise, it will repeat
975 * until the injectmask would be cleaned.
977 * FIXME: This routine assumes that MAXNUMDIMMS value of MC_MAX_DOD
978 * is reliable enough to check if the MC is using the
979 * three channels. However, this is not clear at the datasheet.
981 static ssize_t
i7core_inject_enable_store(struct mem_ctl_info
*mci
,
982 const char *data
, size_t count
)
984 struct i7core_pvt
*pvt
= mci
->pvt_info
;
990 if (!pvt
->pci_ch
[pvt
->inject
.channel
][0])
993 rc
= strict_strtoul(data
, 10, &enable
);
998 pvt
->inject
.enable
= 1;
1000 disable_inject(mci
);
1004 /* Sets pvt->inject.dimm mask */
1005 if (pvt
->inject
.dimm
< 0)
1008 if (pvt
->channel
[pvt
->inject
.channel
].dimms
> 2)
1009 mask
|= (pvt
->inject
.dimm
& 0x3LL
) << 35;
1011 mask
|= (pvt
->inject
.dimm
& 0x1LL
) << 36;
1014 /* Sets pvt->inject.rank mask */
1015 if (pvt
->inject
.rank
< 0)
1018 if (pvt
->channel
[pvt
->inject
.channel
].dimms
> 2)
1019 mask
|= (pvt
->inject
.rank
& 0x1LL
) << 34;
1021 mask
|= (pvt
->inject
.rank
& 0x3LL
) << 34;
1024 /* Sets pvt->inject.bank mask */
1025 if (pvt
->inject
.bank
< 0)
1028 mask
|= (pvt
->inject
.bank
& 0x15LL
) << 30;
1030 /* Sets pvt->inject.page mask */
1031 if (pvt
->inject
.page
< 0)
1034 mask
|= (pvt
->inject
.page
& 0xffff) << 14;
1036 /* Sets pvt->inject.column mask */
1037 if (pvt
->inject
.col
< 0)
1040 mask
|= (pvt
->inject
.col
& 0x3fff);
1044 * bits 1-2: MASK_HALF_CACHELINE
1046 * bit 4: INJECT_ADDR_PARITY
1049 injectmask
= (pvt
->inject
.type
& 1) |
1050 (pvt
->inject
.section
& 0x3) << 1 |
1051 (pvt
->inject
.type
& 0x6) << (3 - 1);
1053 /* Unlock writes to registers - this register is write only */
1054 pci_write_config_dword(pvt
->pci_noncore
,
1055 MC_CFG_CONTROL
, 0x2);
1057 write_and_test(pvt
->pci_ch
[pvt
->inject
.channel
][0],
1058 MC_CHANNEL_ADDR_MATCH
, mask
);
1059 write_and_test(pvt
->pci_ch
[pvt
->inject
.channel
][0],
1060 MC_CHANNEL_ADDR_MATCH
+ 4, mask
>> 32L);
1062 write_and_test(pvt
->pci_ch
[pvt
->inject
.channel
][0],
1063 MC_CHANNEL_ERROR_MASK
, pvt
->inject
.eccmask
);
1065 write_and_test(pvt
->pci_ch
[pvt
->inject
.channel
][0],
1066 MC_CHANNEL_ERROR_INJECT
, injectmask
);
1069 * This is something undocumented, based on my tests
1070 * Without writing 8 to this register, errors aren't injected. Not sure
1073 pci_write_config_dword(pvt
->pci_noncore
,
1076 debugf0("Error inject addr match 0x%016llx, ecc 0x%08x,"
1078 mask
, pvt
->inject
.eccmask
, injectmask
);
1084 static ssize_t
i7core_inject_enable_show(struct mem_ctl_info
*mci
,
1087 struct i7core_pvt
*pvt
= mci
->pvt_info
;
1090 if (!pvt
->pci_ch
[pvt
->inject
.channel
][0])
1093 pci_read_config_dword(pvt
->pci_ch
[pvt
->inject
.channel
][0],
1094 MC_CHANNEL_ERROR_INJECT
, &injectmask
);
1096 debugf0("Inject error read: 0x%018x\n", injectmask
);
1098 if (injectmask
& 0x0c)
1099 pvt
->inject
.enable
= 1;
1101 return sprintf(data
, "%d\n", pvt
->inject
.enable
);
1104 #define DECLARE_COUNTER(param) \
1105 static ssize_t i7core_show_counter_##param( \
1106 struct mem_ctl_info *mci, \
1109 struct i7core_pvt *pvt = mci->pvt_info; \
1111 debugf1("%s() \n", __func__); \
1112 if (!pvt->ce_count_available || (pvt->is_registered)) \
1113 return sprintf(data, "data unavailable\n"); \
1114 return sprintf(data, "%lu\n", \
1115 pvt->udimm_ce_count[param]); \
1118 #define ATTR_COUNTER(param) \
1121 .name = __stringify(udimm##param), \
1122 .mode = (S_IRUGO | S_IWUSR) \
1124 .show = i7core_show_counter_##param \
1135 static const struct mcidev_sysfs_attribute i7core_addrmatch_attrs
[] = {
1136 ATTR_ADDR_MATCH(channel
),
1137 ATTR_ADDR_MATCH(dimm
),
1138 ATTR_ADDR_MATCH(rank
),
1139 ATTR_ADDR_MATCH(bank
),
1140 ATTR_ADDR_MATCH(page
),
1141 ATTR_ADDR_MATCH(col
),
1142 { } /* End of list */
1145 static const struct mcidev_sysfs_group i7core_inject_addrmatch
= {
1146 .name
= "inject_addrmatch",
1147 .mcidev_attr
= i7core_addrmatch_attrs
,
1150 static const struct mcidev_sysfs_attribute i7core_udimm_counters_attrs
[] = {
1154 { .attr
= { .name
= NULL
} }
1157 static const struct mcidev_sysfs_group i7core_udimm_counters
= {
1158 .name
= "all_channel_counts",
1159 .mcidev_attr
= i7core_udimm_counters_attrs
,
1162 static const struct mcidev_sysfs_attribute i7core_sysfs_rdimm_attrs
[] = {
1165 .name
= "inject_section",
1166 .mode
= (S_IRUGO
| S_IWUSR
)
1168 .show
= i7core_inject_section_show
,
1169 .store
= i7core_inject_section_store
,
1172 .name
= "inject_type",
1173 .mode
= (S_IRUGO
| S_IWUSR
)
1175 .show
= i7core_inject_type_show
,
1176 .store
= i7core_inject_type_store
,
1179 .name
= "inject_eccmask",
1180 .mode
= (S_IRUGO
| S_IWUSR
)
1182 .show
= i7core_inject_eccmask_show
,
1183 .store
= i7core_inject_eccmask_store
,
1185 .grp
= &i7core_inject_addrmatch
,
1188 .name
= "inject_enable",
1189 .mode
= (S_IRUGO
| S_IWUSR
)
1191 .show
= i7core_inject_enable_show
,
1192 .store
= i7core_inject_enable_store
,
1194 { } /* End of list */
1197 static const struct mcidev_sysfs_attribute i7core_sysfs_udimm_attrs
[] = {
1200 .name
= "inject_section",
1201 .mode
= (S_IRUGO
| S_IWUSR
)
1203 .show
= i7core_inject_section_show
,
1204 .store
= i7core_inject_section_store
,
1207 .name
= "inject_type",
1208 .mode
= (S_IRUGO
| S_IWUSR
)
1210 .show
= i7core_inject_type_show
,
1211 .store
= i7core_inject_type_store
,
1214 .name
= "inject_eccmask",
1215 .mode
= (S_IRUGO
| S_IWUSR
)
1217 .show
= i7core_inject_eccmask_show
,
1218 .store
= i7core_inject_eccmask_store
,
1220 .grp
= &i7core_inject_addrmatch
,
1223 .name
= "inject_enable",
1224 .mode
= (S_IRUGO
| S_IWUSR
)
1226 .show
= i7core_inject_enable_show
,
1227 .store
= i7core_inject_enable_store
,
1229 .grp
= &i7core_udimm_counters
,
1231 { } /* End of list */
1234 /****************************************************************************
1235 Device initialization routines: put/get, init/exit
1236 ****************************************************************************/
1239 * i7core_put_all_devices 'put' all the devices that we have
1240 * reserved via 'get'
1242 static void i7core_put_devices(struct i7core_dev
*i7core_dev
)
1246 debugf0(__FILE__
": %s()\n", __func__
);
1247 for (i
= 0; i
< i7core_dev
->n_devs
; i
++) {
1248 struct pci_dev
*pdev
= i7core_dev
->pdev
[i
];
1251 debugf0("Removing dev %02x:%02x.%d\n",
1253 PCI_SLOT(pdev
->devfn
), PCI_FUNC(pdev
->devfn
));
1258 static void i7core_put_all_devices(void)
1260 struct i7core_dev
*i7core_dev
, *tmp
;
1262 list_for_each_entry_safe(i7core_dev
, tmp
, &i7core_edac_list
, list
) {
1263 i7core_put_devices(i7core_dev
);
1264 free_i7core_dev(i7core_dev
);
1268 static void __init
i7core_xeon_pci_fixup(const struct pci_id_table
*table
)
1270 struct pci_dev
*pdev
= NULL
;
1274 * On Xeon 55xx, the Intel Quick Path Arch Generic Non-core pci buses
1275 * aren't announced by acpi. So, we need to use a legacy scan probing
1278 while (table
&& table
->descr
) {
1279 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
, table
->descr
[0].dev_id
, NULL
);
1280 if (unlikely(!pdev
)) {
1281 for (i
= 0; i
< MAX_SOCKET_BUSES
; i
++)
1282 pcibios_scan_specific_bus(255-i
);
1289 static unsigned i7core_pci_lastbus(void)
1291 int last_bus
= 0, bus
;
1292 struct pci_bus
*b
= NULL
;
1294 while ((b
= pci_find_next_bus(b
)) != NULL
) {
1296 debugf0("Found bus %d\n", bus
);
1301 debugf0("Last bus %d\n", last_bus
);
1307 * i7core_get_all_devices Find and perform 'get' operation on the MCH's
1308 * device/functions we want to reference for this driver
1310 * Need to 'get' device 16 func 1 and func 2
1312 static int i7core_get_onedevice(struct pci_dev
**prev
,
1313 const struct pci_id_table
*table
,
1314 const unsigned devno
,
1315 const unsigned last_bus
)
1317 struct i7core_dev
*i7core_dev
;
1318 const struct pci_id_descr
*dev_descr
= &table
->descr
[devno
];
1320 struct pci_dev
*pdev
= NULL
;
1324 pdev
= pci_get_device(PCI_VENDOR_ID_INTEL
,
1325 dev_descr
->dev_id
, *prev
);
1333 if (dev_descr
->optional
)
1339 i7core_printk(KERN_INFO
,
1340 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1341 dev_descr
->dev
, dev_descr
->func
,
1342 PCI_VENDOR_ID_INTEL
, dev_descr
->dev_id
);
1344 /* End of list, leave */
1347 bus
= pdev
->bus
->number
;
1349 socket
= last_bus
- bus
;
1351 i7core_dev
= get_i7core_dev(socket
);
1353 i7core_dev
= alloc_i7core_dev(socket
, table
);
1360 if (i7core_dev
->pdev
[devno
]) {
1361 i7core_printk(KERN_ERR
,
1362 "Duplicated device for "
1363 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1364 bus
, dev_descr
->dev
, dev_descr
->func
,
1365 PCI_VENDOR_ID_INTEL
, dev_descr
->dev_id
);
1370 i7core_dev
->pdev
[devno
] = pdev
;
1373 if (unlikely(PCI_SLOT(pdev
->devfn
) != dev_descr
->dev
||
1374 PCI_FUNC(pdev
->devfn
) != dev_descr
->func
)) {
1375 i7core_printk(KERN_ERR
,
1376 "Device PCI ID %04x:%04x "
1377 "has dev %02x:%02x.%d instead of dev %02x:%02x.%d\n",
1378 PCI_VENDOR_ID_INTEL
, dev_descr
->dev_id
,
1379 bus
, PCI_SLOT(pdev
->devfn
), PCI_FUNC(pdev
->devfn
),
1380 bus
, dev_descr
->dev
, dev_descr
->func
);
1384 /* Be sure that the device is enabled */
1385 if (unlikely(pci_enable_device(pdev
) < 0)) {
1386 i7core_printk(KERN_ERR
,
1388 "dev %02x:%02x.%d PCI ID %04x:%04x\n",
1389 bus
, dev_descr
->dev
, dev_descr
->func
,
1390 PCI_VENDOR_ID_INTEL
, dev_descr
->dev_id
);
1394 debugf0("Detected socket %d dev %02x:%02x.%d PCI ID %04x:%04x\n",
1395 socket
, bus
, dev_descr
->dev
,
1397 PCI_VENDOR_ID_INTEL
, dev_descr
->dev_id
);
1400 * As stated on drivers/pci/search.c, the reference count for
1401 * @from is always decremented if it is not %NULL. So, as we need
1402 * to get all devices up to null, we need to do a get for the device
1411 static int i7core_get_all_devices(void)
1413 int i
, rc
, last_bus
;
1414 struct pci_dev
*pdev
= NULL
;
1415 const struct pci_id_table
*table
= pci_dev_table
;
1417 last_bus
= i7core_pci_lastbus();
1419 while (table
&& table
->descr
) {
1420 for (i
= 0; i
< table
->n_devs
; i
++) {
1423 rc
= i7core_get_onedevice(&pdev
, table
, i
,
1430 i7core_put_all_devices();
1441 static int mci_bind_devs(struct mem_ctl_info
*mci
,
1442 struct i7core_dev
*i7core_dev
)
1444 struct i7core_pvt
*pvt
= mci
->pvt_info
;
1445 struct pci_dev
*pdev
;
1448 pvt
->is_registered
= 0;
1449 for (i
= 0; i
< i7core_dev
->n_devs
; i
++) {
1450 pdev
= i7core_dev
->pdev
[i
];
1454 func
= PCI_FUNC(pdev
->devfn
);
1455 slot
= PCI_SLOT(pdev
->devfn
);
1457 if (unlikely(func
> MAX_MCR_FUNC
))
1459 pvt
->pci_mcr
[func
] = pdev
;
1460 } else if (likely(slot
>= 4 && slot
< 4 + NUM_CHANS
)) {
1461 if (unlikely(func
> MAX_CHAN_FUNC
))
1463 pvt
->pci_ch
[slot
- 4][func
] = pdev
;
1464 } else if (!slot
&& !func
)
1465 pvt
->pci_noncore
= pdev
;
1469 debugf0("Associated fn %d.%d, dev = %p, socket %d\n",
1470 PCI_SLOT(pdev
->devfn
), PCI_FUNC(pdev
->devfn
),
1471 pdev
, i7core_dev
->socket
);
1473 if (PCI_SLOT(pdev
->devfn
) == 3 &&
1474 PCI_FUNC(pdev
->devfn
) == 2)
1475 pvt
->is_registered
= 1;
1481 i7core_printk(KERN_ERR
, "Device %d, function %d "
1482 "is out of the expected range\n",
1487 /****************************************************************************
1488 Error check routines
1489 ****************************************************************************/
1490 static void i7core_rdimm_update_csrow(struct mem_ctl_info
*mci
,
1496 struct i7core_pvt
*pvt
= mci
->pvt_info
;
1497 int row
= pvt
->csrow_map
[chan
][dimm
], i
;
1499 for (i
= 0; i
< add
; i
++) {
1500 msg
= kasprintf(GFP_KERNEL
, "Corrected error "
1501 "(Socket=%d channel=%d dimm=%d)",
1502 pvt
->i7core_dev
->socket
, chan
, dimm
);
1504 edac_mc_handle_fbd_ce(mci
, row
, 0, msg
);
1509 static void i7core_rdimm_update_ce_count(struct mem_ctl_info
*mci
,
1515 struct i7core_pvt
*pvt
= mci
->pvt_info
;
1516 int add0
= 0, add1
= 0, add2
= 0;
1517 /* Updates CE counters if it is not the first time here */
1518 if (pvt
->ce_count_available
) {
1519 /* Updates CE counters */
1521 add2
= new2
- pvt
->rdimm_last_ce_count
[chan
][2];
1522 add1
= new1
- pvt
->rdimm_last_ce_count
[chan
][1];
1523 add0
= new0
- pvt
->rdimm_last_ce_count
[chan
][0];
1527 pvt
->rdimm_ce_count
[chan
][2] += add2
;
1531 pvt
->rdimm_ce_count
[chan
][1] += add1
;
1535 pvt
->rdimm_ce_count
[chan
][0] += add0
;
1537 pvt
->ce_count_available
= 1;
1539 /* Store the new values */
1540 pvt
->rdimm_last_ce_count
[chan
][2] = new2
;
1541 pvt
->rdimm_last_ce_count
[chan
][1] = new1
;
1542 pvt
->rdimm_last_ce_count
[chan
][0] = new0
;
1544 /*updated the edac core */
1546 i7core_rdimm_update_csrow(mci
, chan
, 0, add0
);
1548 i7core_rdimm_update_csrow(mci
, chan
, 1, add1
);
1550 i7core_rdimm_update_csrow(mci
, chan
, 2, add2
);
1554 static void i7core_rdimm_check_mc_ecc_err(struct mem_ctl_info
*mci
)
1556 struct i7core_pvt
*pvt
= mci
->pvt_info
;
1558 int i
, new0
, new1
, new2
;
1560 /*Read DEV 3: FUN 2: MC_COR_ECC_CNT regs directly*/
1561 pci_read_config_dword(pvt
->pci_mcr
[2], MC_COR_ECC_CNT_0
,
1563 pci_read_config_dword(pvt
->pci_mcr
[2], MC_COR_ECC_CNT_1
,
1565 pci_read_config_dword(pvt
->pci_mcr
[2], MC_COR_ECC_CNT_2
,
1567 pci_read_config_dword(pvt
->pci_mcr
[2], MC_COR_ECC_CNT_3
,
1569 pci_read_config_dword(pvt
->pci_mcr
[2], MC_COR_ECC_CNT_4
,
1571 pci_read_config_dword(pvt
->pci_mcr
[2], MC_COR_ECC_CNT_5
,
1573 for (i
= 0 ; i
< 3; i
++) {
1574 debugf3("MC_COR_ECC_CNT%d = 0x%x; MC_COR_ECC_CNT%d = 0x%x\n",
1575 (i
* 2), rcv
[i
][0], (i
* 2) + 1, rcv
[i
][1]);
1576 /*if the channel has 3 dimms*/
1577 if (pvt
->channel
[i
].dimms
> 2) {
1578 new0
= DIMM_BOT_COR_ERR(rcv
[i
][0]);
1579 new1
= DIMM_TOP_COR_ERR(rcv
[i
][0]);
1580 new2
= DIMM_BOT_COR_ERR(rcv
[i
][1]);
1582 new0
= DIMM_TOP_COR_ERR(rcv
[i
][0]) +
1583 DIMM_BOT_COR_ERR(rcv
[i
][0]);
1584 new1
= DIMM_TOP_COR_ERR(rcv
[i
][1]) +
1585 DIMM_BOT_COR_ERR(rcv
[i
][1]);
1589 i7core_rdimm_update_ce_count(mci
, i
, new0
, new1
, new2
);
1593 /* This function is based on the device 3 function 4 registers as described on:
1594 * Intel Xeon Processor 5500 Series Datasheet Volume 2
1595 * http://www.intel.com/Assets/PDF/datasheet/321322.pdf
1596 * also available at:
1597 * http://www.arrownac.com/manufacturers/intel/s/nehalem/5500-datasheet-v2.pdf
1599 static void i7core_udimm_check_mc_ecc_err(struct mem_ctl_info
*mci
)
1601 struct i7core_pvt
*pvt
= mci
->pvt_info
;
1603 int new0
, new1
, new2
;
1605 if (!pvt
->pci_mcr
[4]) {
1606 debugf0("%s MCR registers not found\n", __func__
);
1610 /* Corrected test errors */
1611 pci_read_config_dword(pvt
->pci_mcr
[4], MC_TEST_ERR_RCV1
, &rcv1
);
1612 pci_read_config_dword(pvt
->pci_mcr
[4], MC_TEST_ERR_RCV0
, &rcv0
);
1614 /* Store the new values */
1615 new2
= DIMM2_COR_ERR(rcv1
);
1616 new1
= DIMM1_COR_ERR(rcv0
);
1617 new0
= DIMM0_COR_ERR(rcv0
);
1619 /* Updates CE counters if it is not the first time here */
1620 if (pvt
->ce_count_available
) {
1621 /* Updates CE counters */
1622 int add0
, add1
, add2
;
1624 add2
= new2
- pvt
->udimm_last_ce_count
[2];
1625 add1
= new1
- pvt
->udimm_last_ce_count
[1];
1626 add0
= new0
- pvt
->udimm_last_ce_count
[0];
1630 pvt
->udimm_ce_count
[2] += add2
;
1634 pvt
->udimm_ce_count
[1] += add1
;
1638 pvt
->udimm_ce_count
[0] += add0
;
1640 if (add0
| add1
| add2
)
1641 i7core_printk(KERN_ERR
, "New Corrected error(s): "
1642 "dimm0: +%d, dimm1: +%d, dimm2 +%d\n",
1645 pvt
->ce_count_available
= 1;
1647 /* Store the new values */
1648 pvt
->udimm_last_ce_count
[2] = new2
;
1649 pvt
->udimm_last_ce_count
[1] = new1
;
1650 pvt
->udimm_last_ce_count
[0] = new0
;
1654 * According with tables E-11 and E-12 of chapter E.3.3 of Intel 64 and IA-32
1655 * Architectures Software Developer’s Manual Volume 3B.
1656 * Nehalem are defined as family 0x06, model 0x1a
1658 * The MCA registers used here are the following ones:
1659 * struct mce field MCA Register
1660 * m->status MSR_IA32_MC8_STATUS
1661 * m->addr MSR_IA32_MC8_ADDR
1662 * m->misc MSR_IA32_MC8_MISC
1663 * In the case of Nehalem, the error information is masked at .status and .misc
1666 static void i7core_mce_output_error(struct mem_ctl_info
*mci
,
1667 const struct mce
*m
)
1669 struct i7core_pvt
*pvt
= mci
->pvt_info
;
1670 char *type
, *optype
, *err
, *msg
;
1671 unsigned long error
= m
->status
& 0x1ff0000l
;
1672 u32 optypenum
= (m
->status
>> 4) & 0x07;
1673 u32 core_err_cnt
= (m
->status
>> 38) && 0x7fff;
1674 u32 dimm
= (m
->misc
>> 16) & 0x3;
1675 u32 channel
= (m
->misc
>> 18) & 0x3;
1676 u32 syndrome
= m
->misc
>> 32;
1677 u32 errnum
= find_first_bit(&error
, 32);
1680 if (m
->mcgstatus
& 1)
1685 switch (optypenum
) {
1687 optype
= "generic undef request";
1690 optype
= "read error";
1693 optype
= "write error";
1696 optype
= "addr/cmd error";
1699 optype
= "scrubbing error";
1702 optype
= "reserved";
1708 err
= "read ECC error";
1711 err
= "RAS ECC error";
1714 err
= "write parity error";
1717 err
= "redundacy loss";
1723 err
= "memory range error";
1726 err
= "RTID out of range";
1729 err
= "address parity error";
1732 err
= "byte enable parity error";
1738 /* FIXME: should convert addr into bank and rank information */
1739 msg
= kasprintf(GFP_ATOMIC
,
1740 "%s (addr = 0x%08llx, cpu=%d, Dimm=%d, Channel=%d, "
1741 "syndrome=0x%08x, count=%d, Err=%08llx:%08llx (%s: %s))\n",
1742 type
, (long long) m
->addr
, m
->cpu
, dimm
, channel
,
1743 syndrome
, core_err_cnt
, (long long)m
->status
,
1744 (long long)m
->misc
, optype
, err
);
1748 csrow
= pvt
->csrow_map
[channel
][dimm
];
1750 /* Call the helper to output message */
1751 if (m
->mcgstatus
& 1)
1752 edac_mc_handle_fbd_ue(mci
, csrow
, 0,
1753 0 /* FIXME: should be channel here */, msg
);
1754 else if (!pvt
->is_registered
)
1755 edac_mc_handle_fbd_ce(mci
, csrow
,
1756 0 /* FIXME: should be channel here */, msg
);
1762 * i7core_check_error Retrieve and process errors reported by the
1763 * hardware. Called by the Core module.
1765 static void i7core_check_error(struct mem_ctl_info
*mci
)
1767 struct i7core_pvt
*pvt
= mci
->pvt_info
;
1773 * MCE first step: Copy all mce errors into a temporary buffer
1774 * We use a double buffering here, to reduce the risk of
1778 count
= (pvt
->mce_out
+ MCE_LOG_LEN
- pvt
->mce_in
)
1781 goto check_ce_error
;
1783 m
= pvt
->mce_outentry
;
1784 if (pvt
->mce_in
+ count
> MCE_LOG_LEN
) {
1785 unsigned l
= MCE_LOG_LEN
- pvt
->mce_in
;
1787 memcpy(m
, &pvt
->mce_entry
[pvt
->mce_in
], sizeof(*m
) * l
);
1793 memcpy(m
, &pvt
->mce_entry
[pvt
->mce_in
], sizeof(*m
) * count
);
1795 pvt
->mce_in
+= count
;
1798 if (pvt
->mce_overrun
) {
1799 i7core_printk(KERN_ERR
, "Lost %d memory errors\n",
1802 pvt
->mce_overrun
= 0;
1806 * MCE second step: parse errors and display
1808 for (i
= 0; i
< count
; i
++)
1809 i7core_mce_output_error(mci
, &pvt
->mce_outentry
[i
]);
1812 * Now, let's increment CE error counts
1815 if (!pvt
->is_registered
)
1816 i7core_udimm_check_mc_ecc_err(mci
);
1818 i7core_rdimm_check_mc_ecc_err(mci
);
1822 * i7core_mce_check_error Replicates mcelog routine to get errors
1823 * This routine simply queues mcelog errors, and
1824 * return. The error itself should be handled later
1825 * by i7core_check_error.
1826 * WARNING: As this routine should be called at NMI time, extra care should
1827 * be taken to avoid deadlocks, and to be as fast as possible.
1829 static int i7core_mce_check_error(void *priv
, struct mce
*mce
)
1831 struct mem_ctl_info
*mci
= priv
;
1832 struct i7core_pvt
*pvt
= mci
->pvt_info
;
1835 * Just let mcelog handle it if the error is
1836 * outside the memory controller
1838 if (((mce
->status
& 0xffff) >> 7) != 1)
1841 /* Bank 8 registers are the only ones that we know how to handle */
1846 /* Only handle if it is the right mc controller */
1847 if (cpu_data(mce
->cpu
).phys_proc_id
!= pvt
->i7core_dev
->socket
)
1852 if ((pvt
->mce_out
+ 1) % MCE_LOG_LEN
== pvt
->mce_in
) {
1858 /* Copy memory error at the ringbuffer */
1859 memcpy(&pvt
->mce_entry
[pvt
->mce_out
], mce
, sizeof(*mce
));
1861 pvt
->mce_out
= (pvt
->mce_out
+ 1) % MCE_LOG_LEN
;
1863 /* Handle fatal errors immediately */
1864 if (mce
->mcgstatus
& 1)
1865 i7core_check_error(mci
);
1867 /* Advise mcelog that the errors were handled */
1871 static void i7core_pci_ctl_create(struct i7core_pvt
*pvt
)
1873 pvt
->i7core_pci
= edac_pci_create_generic_ctl(
1874 &pvt
->i7core_dev
->pdev
[0]->dev
,
1876 if (unlikely(!pvt
->i7core_pci
))
1877 pr_warn("Unable to setup PCI error report via EDAC\n");
1880 static void i7core_pci_ctl_release(struct i7core_pvt
*pvt
)
1882 if (likely(pvt
->i7core_pci
))
1883 edac_pci_release_generic_ctl(pvt
->i7core_pci
);
1885 i7core_printk(KERN_ERR
,
1886 "Couldn't find mem_ctl_info for socket %d\n",
1887 pvt
->i7core_dev
->socket
);
1888 pvt
->i7core_pci
= NULL
;
1891 static void i7core_unregister_mci(struct i7core_dev
*i7core_dev
)
1893 struct mem_ctl_info
*mci
= i7core_dev
->mci
;
1894 struct i7core_pvt
*pvt
;
1896 if (unlikely(!mci
|| !mci
->pvt_info
)) {
1897 debugf0("MC: " __FILE__
": %s(): dev = %p\n",
1898 __func__
, &i7core_dev
->pdev
[0]->dev
);
1900 i7core_printk(KERN_ERR
, "Couldn't find mci handler\n");
1904 pvt
= mci
->pvt_info
;
1906 debugf0("MC: " __FILE__
": %s(): mci = %p, dev = %p\n",
1907 __func__
, mci
, &i7core_dev
->pdev
[0]->dev
);
1909 /* Disable MCE NMI handler */
1910 edac_mce_unregister(&pvt
->edac_mce
);
1912 /* Disable EDAC polling */
1913 i7core_pci_ctl_release(pvt
);
1915 /* Remove MC sysfs nodes */
1916 edac_mc_del_mc(mci
->dev
);
1918 debugf1("%s: free mci struct\n", mci
->ctl_name
);
1919 kfree(mci
->ctl_name
);
1921 i7core_dev
->mci
= NULL
;
1924 static int i7core_register_mci(struct i7core_dev
*i7core_dev
)
1926 struct mem_ctl_info
*mci
;
1927 struct i7core_pvt
*pvt
;
1928 int rc
, channels
, csrows
;
1930 /* Check the number of active and not disabled channels */
1931 rc
= i7core_get_active_channels(i7core_dev
->socket
, &channels
, &csrows
);
1932 if (unlikely(rc
< 0))
1935 /* allocate a new MC control structure */
1936 mci
= edac_mc_alloc(sizeof(*pvt
), csrows
, channels
, i7core_dev
->socket
);
1940 debugf0("MC: " __FILE__
": %s(): mci = %p, dev = %p\n",
1941 __func__
, mci
, &i7core_dev
->pdev
[0]->dev
);
1943 pvt
= mci
->pvt_info
;
1944 memset(pvt
, 0, sizeof(*pvt
));
1946 /* Associates i7core_dev and mci for future usage */
1947 pvt
->i7core_dev
= i7core_dev
;
1948 i7core_dev
->mci
= mci
;
1951 * FIXME: how to handle RDDR3 at MCI level? It is possible to have
1952 * Mixed RDDR3/UDDR3 with Nehalem, provided that they are on different
1955 mci
->mtype_cap
= MEM_FLAG_DDR3
;
1956 mci
->edac_ctl_cap
= EDAC_FLAG_NONE
;
1957 mci
->edac_cap
= EDAC_FLAG_NONE
;
1958 mci
->mod_name
= "i7core_edac.c";
1959 mci
->mod_ver
= I7CORE_REVISION
;
1960 mci
->ctl_name
= kasprintf(GFP_KERNEL
, "i7 core #%d",
1961 i7core_dev
->socket
);
1962 mci
->dev_name
= pci_name(i7core_dev
->pdev
[0]);
1963 mci
->ctl_page_to_phys
= NULL
;
1965 /* Store pci devices at mci for faster access */
1966 rc
= mci_bind_devs(mci
, i7core_dev
);
1967 if (unlikely(rc
< 0))
1970 if (pvt
->is_registered
)
1971 mci
->mc_driver_sysfs_attributes
= i7core_sysfs_rdimm_attrs
;
1973 mci
->mc_driver_sysfs_attributes
= i7core_sysfs_udimm_attrs
;
1975 /* Get dimm basic config */
1976 get_dimm_config(mci
);
1977 /* record ptr to the generic device */
1978 mci
->dev
= &i7core_dev
->pdev
[0]->dev
;
1979 /* Set the function pointer to an actual operation function */
1980 mci
->edac_check
= i7core_check_error
;
1982 /* add this new MC control structure to EDAC's list of MCs */
1983 if (unlikely(edac_mc_add_mc(mci
))) {
1984 debugf0("MC: " __FILE__
1985 ": %s(): failed edac_mc_add_mc()\n", __func__
);
1986 /* FIXME: perhaps some code should go here that disables error
1987 * reporting if we just enabled it
1994 /* Default error mask is any memory */
1995 pvt
->inject
.channel
= 0;
1996 pvt
->inject
.dimm
= -1;
1997 pvt
->inject
.rank
= -1;
1998 pvt
->inject
.bank
= -1;
1999 pvt
->inject
.page
= -1;
2000 pvt
->inject
.col
= -1;
2002 /* allocating generic PCI control info */
2003 i7core_pci_ctl_create(pvt
);
2005 /* Registers on edac_mce in order to receive memory errors */
2006 pvt
->edac_mce
.priv
= mci
;
2007 pvt
->edac_mce
.check_error
= i7core_mce_check_error
;
2008 rc
= edac_mce_register(&pvt
->edac_mce
);
2009 if (unlikely(rc
< 0)) {
2010 debugf0("MC: " __FILE__
2011 ": %s(): failed edac_mce_register()\n", __func__
);
2018 i7core_pci_ctl_release(pvt
);
2019 edac_mc_del_mc(mci
->dev
);
2021 kfree(mci
->ctl_name
);
2023 i7core_dev
->mci
= NULL
;
2028 * i7core_probe Probe for ONE instance of device to see if it is
2031 * 0 for FOUND a device
2032 * < 0 for error code
2035 static int __devinit
i7core_probe(struct pci_dev
*pdev
,
2036 const struct pci_device_id
*id
)
2039 struct i7core_dev
*i7core_dev
;
2041 /* get the pci devices we want to reserve for our use */
2042 mutex_lock(&i7core_edac_lock
);
2045 * All memory controllers are allocated at the first pass.
2047 if (unlikely(probed
>= 1)) {
2048 mutex_unlock(&i7core_edac_lock
);
2053 rc
= i7core_get_all_devices();
2054 if (unlikely(rc
< 0))
2057 list_for_each_entry(i7core_dev
, &i7core_edac_list
, list
) {
2058 rc
= i7core_register_mci(i7core_dev
);
2059 if (unlikely(rc
< 0))
2063 i7core_printk(KERN_INFO
, "Driver loaded.\n");
2065 mutex_unlock(&i7core_edac_lock
);
2069 list_for_each_entry(i7core_dev
, &i7core_edac_list
, list
)
2070 i7core_unregister_mci(i7core_dev
);
2072 i7core_put_all_devices();
2074 mutex_unlock(&i7core_edac_lock
);
2079 * i7core_remove destructor for one instance of device
2082 static void __devexit
i7core_remove(struct pci_dev
*pdev
)
2084 struct i7core_dev
*i7core_dev
;
2086 debugf0(__FILE__
": %s()\n", __func__
);
2089 * we have a trouble here: pdev value for removal will be wrong, since
2090 * it will point to the X58 register used to detect that the machine
2091 * is a Nehalem or upper design. However, due to the way several PCI
2092 * devices are grouped together to provide MC functionality, we need
2093 * to use a different method for releasing the devices
2096 mutex_lock(&i7core_edac_lock
);
2098 if (unlikely(!probed
)) {
2099 mutex_unlock(&i7core_edac_lock
);
2103 list_for_each_entry(i7core_dev
, &i7core_edac_list
, list
)
2104 i7core_unregister_mci(i7core_dev
);
2106 /* Release PCI resources */
2107 i7core_put_all_devices();
2111 mutex_unlock(&i7core_edac_lock
);
2114 MODULE_DEVICE_TABLE(pci
, i7core_pci_tbl
);
2117 * i7core_driver pci_driver structure for this module
2120 static struct pci_driver i7core_driver
= {
2121 .name
= "i7core_edac",
2122 .probe
= i7core_probe
,
2123 .remove
= __devexit_p(i7core_remove
),
2124 .id_table
= i7core_pci_tbl
,
2128 * i7core_init Module entry function
2129 * Try to initialize this module for its devices
2131 static int __init
i7core_init(void)
2135 debugf2("MC: " __FILE__
": %s()\n", __func__
);
2137 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
2141 i7core_xeon_pci_fixup(pci_dev_table
);
2143 pci_rc
= pci_register_driver(&i7core_driver
);
2148 i7core_printk(KERN_ERR
, "Failed to register device with error %d.\n",
2155 * i7core_exit() Module exit function
2156 * Unregister the driver
2158 static void __exit
i7core_exit(void)
2160 debugf2("MC: " __FILE__
": %s()\n", __func__
);
2161 pci_unregister_driver(&i7core_driver
);
2164 module_init(i7core_init
);
2165 module_exit(i7core_exit
);
2167 MODULE_LICENSE("GPL");
2168 MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
2169 MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
2170 MODULE_DESCRIPTION("MC Driver for Intel i7 Core memory controllers - "
2173 module_param(edac_op_state
, int, 0444);
2174 MODULE_PARM_DESC(edac_op_state
, "EDAC Error Reporting state: 0=Poll,1=NMI");