1 // SPDX-License-Identifier: GPL-2.0
3 * Driver for Intel(R) 10nm server memory controller.
4 * Copyright (c) 2019, Intel Corporation.
8 #include <linux/kernel.h>
10 #include <asm/cpu_device_id.h>
11 #include <asm/intel-family.h>
13 #include "edac_module.h"
14 #include "skx_common.h"
16 #define I10NM_REVISION "v0.0.6"
17 #define EDAC_MOD_STR "i10nm_edac"
20 #define i10nm_printk(level, fmt, arg...) \
21 edac_printk(level, "i10nm", fmt, ##arg)
23 #define I10NM_GET_SCK_BAR(d, reg) \
24 pci_read_config_dword((d)->uracu, 0xd0, &(reg))
25 #define I10NM_GET_IMC_BAR(d, i, reg) \
26 pci_read_config_dword((d)->uracu, \
27 (res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg))
28 #define I10NM_GET_SAD(d, offset, i, reg)\
29 pci_read_config_dword((d)->sad_all, (offset) + (i) * \
30 (res_cfg->type == GNR ? 12 : 8), &(reg))
31 #define I10NM_GET_HBM_IMC_BAR(d, reg) \
32 pci_read_config_dword((d)->uracu, 0xd4, &(reg))
33 #define I10NM_GET_CAPID3_CFG(d, reg) \
34 pci_read_config_dword((d)->pcu_cr3, \
35 res_cfg->type == GNR ? 0x290 : 0x90, &(reg))
36 #define I10NM_GET_CAPID5_CFG(d, reg) \
37 pci_read_config_dword((d)->pcu_cr3, \
38 res_cfg->type == GNR ? 0x298 : 0x98, &(reg))
39 #define I10NM_GET_DIMMMTR(m, i, j) \
40 readl((m)->mbase + ((m)->hbm_mc ? 0x80c : \
41 (res_cfg->type == GNR ? 0xc0c : 0x2080c)) + \
42 (i) * (m)->chan_mmio_sz + (j) * 4)
43 #define I10NM_GET_MCDDRTCFG(m, i) \
44 readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
45 (i) * (m)->chan_mmio_sz)
46 #define I10NM_GET_MCMTR(m, i) \
47 readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \
48 (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \
49 (i) * (m)->chan_mmio_sz)
50 #define I10NM_GET_AMAP(m, i) \
51 readl((m)->mbase + ((m)->hbm_mc ? 0x814 : \
52 (res_cfg->type == GNR ? 0xc14 : 0x20814)) + \
53 (i) * (m)->chan_mmio_sz)
54 #define I10NM_GET_REG32(m, i, offset) \
55 readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
56 #define I10NM_GET_REG64(m, i, offset) \
57 readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
58 #define I10NM_SET_REG32(m, i, offset, v) \
59 writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
61 #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
62 #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
63 #define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \
64 GET_BITFIELD(reg, 0, 10) + 1) << 12)
65 #define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \
66 ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
68 #define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000
69 #define I10NM_GNR_IMC_MMIO_SIZE 0x4000
70 #define I10NM_HBM_IMC_MMIO_SIZE 0x9000
71 #define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24)
72 #define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30)
73 #define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29)
75 #define I10NM_MAX_SAD 16
76 #define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
77 #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
79 #define RETRY_RD_ERR_LOG_UC BIT(1)
80 #define RETRY_RD_ERR_LOG_NOOVER BIT(14)
81 #define RETRY_RD_ERR_LOG_EN BIT(15)
82 #define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1))
83 #define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0))
85 static struct list_head
*i10nm_edac_list
;
87 static struct res_config
*res_cfg
;
88 static int retry_rd_err_log
;
89 static int decoding_via_mca
;
90 static bool mem_cfg_2lm
;
92 static u32 offsets_scrub_icx
[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
93 static u32 offsets_scrub_spr
[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
94 static u32 offsets_scrub_spr_hbm0
[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8};
95 static u32 offsets_scrub_spr_hbm1
[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8};
96 static u32 offsets_demand_icx
[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
97 static u32 offsets_demand_spr
[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
98 static u32 offsets_demand2_spr
[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10};
99 static u32 offsets_demand_spr_hbm0
[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
100 static u32 offsets_demand_spr_hbm1
[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
102 static void __enable_retry_rd_err_log(struct skx_imc
*imc
, int chan
, bool enable
,
103 u32
*offsets_scrub
, u32
*offsets_demand
,
104 u32
*offsets_demand2
)
108 s
= I10NM_GET_REG32(imc
, chan
, offsets_scrub
[0]);
109 d
= I10NM_GET_REG32(imc
, chan
, offsets_demand
[0]);
111 d2
= I10NM_GET_REG32(imc
, chan
, offsets_demand2
[0]);
114 /* Save default configurations */
115 imc
->chan
[chan
].retry_rd_err_log_s
= s
;
116 imc
->chan
[chan
].retry_rd_err_log_d
= d
;
118 imc
->chan
[chan
].retry_rd_err_log_d2
= d2
;
120 s
&= ~RETRY_RD_ERR_LOG_NOOVER_UC
;
121 s
|= RETRY_RD_ERR_LOG_EN
;
122 d
&= ~RETRY_RD_ERR_LOG_NOOVER_UC
;
123 d
|= RETRY_RD_ERR_LOG_EN
;
125 if (offsets_demand2
) {
126 d2
&= ~RETRY_RD_ERR_LOG_UC
;
127 d2
|= RETRY_RD_ERR_LOG_NOOVER
;
128 d2
|= RETRY_RD_ERR_LOG_EN
;
131 /* Restore default configurations */
132 if (imc
->chan
[chan
].retry_rd_err_log_s
& RETRY_RD_ERR_LOG_UC
)
133 s
|= RETRY_RD_ERR_LOG_UC
;
134 if (imc
->chan
[chan
].retry_rd_err_log_s
& RETRY_RD_ERR_LOG_NOOVER
)
135 s
|= RETRY_RD_ERR_LOG_NOOVER
;
136 if (!(imc
->chan
[chan
].retry_rd_err_log_s
& RETRY_RD_ERR_LOG_EN
))
137 s
&= ~RETRY_RD_ERR_LOG_EN
;
138 if (imc
->chan
[chan
].retry_rd_err_log_d
& RETRY_RD_ERR_LOG_UC
)
139 d
|= RETRY_RD_ERR_LOG_UC
;
140 if (imc
->chan
[chan
].retry_rd_err_log_d
& RETRY_RD_ERR_LOG_NOOVER
)
141 d
|= RETRY_RD_ERR_LOG_NOOVER
;
142 if (!(imc
->chan
[chan
].retry_rd_err_log_d
& RETRY_RD_ERR_LOG_EN
))
143 d
&= ~RETRY_RD_ERR_LOG_EN
;
145 if (offsets_demand2
) {
146 if (imc
->chan
[chan
].retry_rd_err_log_d2
& RETRY_RD_ERR_LOG_UC
)
147 d2
|= RETRY_RD_ERR_LOG_UC
;
148 if (!(imc
->chan
[chan
].retry_rd_err_log_d2
& RETRY_RD_ERR_LOG_NOOVER
))
149 d2
&= ~RETRY_RD_ERR_LOG_NOOVER
;
150 if (!(imc
->chan
[chan
].retry_rd_err_log_d2
& RETRY_RD_ERR_LOG_EN
))
151 d2
&= ~RETRY_RD_ERR_LOG_EN
;
155 I10NM_SET_REG32(imc
, chan
, offsets_scrub
[0], s
);
156 I10NM_SET_REG32(imc
, chan
, offsets_demand
[0], d
);
158 I10NM_SET_REG32(imc
, chan
, offsets_demand2
[0], d2
);
161 static void enable_retry_rd_err_log(bool enable
)
163 int i
, j
, imc_num
, chan_num
;
169 list_for_each_entry(d
, i10nm_edac_list
, list
) {
170 imc_num
= res_cfg
->ddr_imc_num
;
171 chan_num
= res_cfg
->ddr_chan_num
;
173 for (i
= 0; i
< imc_num
; i
++) {
178 for (j
= 0; j
< chan_num
; j
++)
179 __enable_retry_rd_err_log(imc
, j
, enable
,
180 res_cfg
->offsets_scrub
,
181 res_cfg
->offsets_demand
,
182 res_cfg
->offsets_demand2
);
185 imc_num
+= res_cfg
->hbm_imc_num
;
186 chan_num
= res_cfg
->hbm_chan_num
;
188 for (; i
< imc_num
; i
++) {
190 if (!imc
->mbase
|| !imc
->hbm_mc
)
193 for (j
= 0; j
< chan_num
; j
++) {
194 __enable_retry_rd_err_log(imc
, j
, enable
,
195 res_cfg
->offsets_scrub_hbm0
,
196 res_cfg
->offsets_demand_hbm0
,
198 __enable_retry_rd_err_log(imc
, j
, enable
,
199 res_cfg
->offsets_scrub_hbm1
,
200 res_cfg
->offsets_demand_hbm1
,
207 static void show_retry_rd_err_log(struct decoded_addr
*res
, char *msg
,
208 int len
, bool scrub_err
)
210 struct skx_imc
*imc
= &res
->dev
->imc
[res
->imc
];
211 u32 log0
, log1
, log2
, log3
, log4
;
212 u32 corr0
, corr1
, corr2
, corr3
;
213 u32 lxg0
, lxg1
, lxg3
, lxg4
;
227 offsets
= scrub_err
? res_cfg
->offsets_scrub_hbm1
:
228 res_cfg
->offsets_demand_hbm1
;
230 offsets
= scrub_err
? res_cfg
->offsets_scrub_hbm0
:
231 res_cfg
->offsets_demand_hbm0
;
234 offsets
= res_cfg
->offsets_scrub
;
236 offsets
= res_cfg
->offsets_demand
;
237 xffsets
= res_cfg
->offsets_demand2
;
241 log0
= I10NM_GET_REG32(imc
, res
->channel
, offsets
[0]);
242 log1
= I10NM_GET_REG32(imc
, res
->channel
, offsets
[1]);
243 log3
= I10NM_GET_REG32(imc
, res
->channel
, offsets
[3]);
244 log4
= I10NM_GET_REG32(imc
, res
->channel
, offsets
[4]);
245 log5
= I10NM_GET_REG64(imc
, res
->channel
, offsets
[5]);
248 lxg0
= I10NM_GET_REG32(imc
, res
->channel
, xffsets
[0]);
249 lxg1
= I10NM_GET_REG32(imc
, res
->channel
, xffsets
[1]);
250 lxg3
= I10NM_GET_REG32(imc
, res
->channel
, xffsets
[3]);
251 lxg4
= I10NM_GET_REG32(imc
, res
->channel
, xffsets
[4]);
252 lxg5
= I10NM_GET_REG64(imc
, res
->channel
, xffsets
[5]);
255 if (res_cfg
->type
== SPR
) {
256 log2a
= I10NM_GET_REG64(imc
, res
->channel
, offsets
[2]);
257 n
= snprintf(msg
, len
, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx",
258 log0
, log1
, log2a
, log3
, log4
, log5
);
262 lxg2a
= I10NM_GET_REG64(imc
, res
->channel
, xffsets
[2]);
263 n
+= snprintf(msg
+ n
, len
- n
, " %.8x %.8x %.16llx %.8x %.8x %.16llx]",
264 lxg0
, lxg1
, lxg2a
, lxg3
, lxg4
, lxg5
);
266 n
+= snprintf(msg
+ n
, len
- n
, "]");
270 log2
= I10NM_GET_REG32(imc
, res
->channel
, offsets
[2]);
271 n
= snprintf(msg
, len
, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
272 log0
, log1
, log2
, log3
, log4
, log5
);
277 corr0
= I10NM_GET_REG32(imc
, res
->channel
, 0x2c18);
278 corr1
= I10NM_GET_REG32(imc
, res
->channel
, 0x2c1c);
279 corr2
= I10NM_GET_REG32(imc
, res
->channel
, 0x2c20);
280 corr3
= I10NM_GET_REG32(imc
, res
->channel
, 0x2c24);
282 corr0
= I10NM_GET_REG32(imc
, res
->channel
, 0x2818);
283 corr1
= I10NM_GET_REG32(imc
, res
->channel
, 0x281c);
284 corr2
= I10NM_GET_REG32(imc
, res
->channel
, 0x2820);
285 corr3
= I10NM_GET_REG32(imc
, res
->channel
, 0x2824);
288 corr0
= I10NM_GET_REG32(imc
, res
->channel
, 0x22c18);
289 corr1
= I10NM_GET_REG32(imc
, res
->channel
, 0x22c1c);
290 corr2
= I10NM_GET_REG32(imc
, res
->channel
, 0x22c20);
291 corr3
= I10NM_GET_REG32(imc
, res
->channel
, 0x22c24);
295 snprintf(msg
+ n
, len
- n
,
296 " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
297 corr0
& 0xffff, corr0
>> 16,
298 corr1
& 0xffff, corr1
>> 16,
299 corr2
& 0xffff, corr2
>> 16,
300 corr3
& 0xffff, corr3
>> 16);
302 /* Clear status bits */
303 if (retry_rd_err_log
== 2) {
304 if (log0
& RETRY_RD_ERR_LOG_OVER_UC_V
) {
305 log0
&= ~RETRY_RD_ERR_LOG_OVER_UC_V
;
306 I10NM_SET_REG32(imc
, res
->channel
, offsets
[0], log0
);
309 if (xffsets
&& (lxg0
& RETRY_RD_ERR_LOG_OVER_UC_V
)) {
310 lxg0
&= ~RETRY_RD_ERR_LOG_OVER_UC_V
;
311 I10NM_SET_REG32(imc
, res
->channel
, xffsets
[0], lxg0
);
316 static struct pci_dev
*pci_get_dev_wrapper(int dom
, unsigned int bus
,
317 unsigned int dev
, unsigned int fun
)
319 struct pci_dev
*pdev
;
321 pdev
= pci_get_domain_bus_and_slot(dom
, bus
, PCI_DEVFN(dev
, fun
));
323 edac_dbg(2, "No device %02x:%02x.%x\n",
328 if (unlikely(pci_enable_device(pdev
) < 0)) {
329 edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
339 * i10nm_get_imc_num() - Get the number of present DDR memory controllers.
341 * @cfg : The pointer to the structure of EDAC resource configurations.
343 * For Granite Rapids CPUs, the number of present DDR memory controllers read
344 * at runtime overwrites the value statically configured in @cfg->ddr_imc_num.
345 * For other CPUs, the number of present DDR memory controllers is statically
346 * configured in @cfg->ddr_imc_num.
348 * RETURNS : 0 on success, < 0 on failure.
350 static int i10nm_get_imc_num(struct res_config
*cfg
)
352 int n
, imc_num
, chan_num
= 0;
356 list_for_each_entry(d
, i10nm_edac_list
, list
) {
357 d
->pcu_cr3
= pci_get_dev_wrapper(d
->seg
, d
->bus
[res_cfg
->pcu_cr3_bdf
.bus
],
358 res_cfg
->pcu_cr3_bdf
.dev
,
359 res_cfg
->pcu_cr3_bdf
.fun
);
363 if (I10NM_GET_CAPID5_CFG(d
, reg
))
366 n
= I10NM_DDR_IMC_CH_CNT(reg
);
370 edac_dbg(2, "Get DDR CH number: %d\n", chan_num
);
371 } else if (chan_num
!= n
) {
372 i10nm_printk(KERN_NOTICE
, "Get DDR CH numbers: %d, %d\n", chan_num
, n
);
379 * One channel per DDR memory controller for Granite Rapids CPUs.
384 i10nm_printk(KERN_ERR
, "Invalid DDR MC number\n");
388 if (imc_num
> I10NM_NUM_DDR_IMC
) {
389 i10nm_printk(KERN_ERR
, "Need to make I10NM_NUM_DDR_IMC >= %d\n", imc_num
);
393 if (cfg
->ddr_imc_num
!= imc_num
) {
395 * Store the number of present DDR memory controllers.
397 cfg
->ddr_imc_num
= imc_num
;
398 edac_dbg(2, "Set DDR MC number: %d", imc_num
);
404 * For other CPUs, the number of present DDR memory controllers
405 * is statically pre-configured in cfg->ddr_imc_num.
411 static bool i10nm_check_2lm(struct res_config
*cfg
)
417 list_for_each_entry(d
, i10nm_edac_list
, list
) {
418 d
->sad_all
= pci_get_dev_wrapper(d
->seg
, d
->bus
[res_cfg
->sad_all_bdf
.bus
],
419 res_cfg
->sad_all_bdf
.dev
,
420 res_cfg
->sad_all_bdf
.fun
);
424 for (i
= 0; i
< I10NM_MAX_SAD
; i
++) {
425 I10NM_GET_SAD(d
, cfg
->sad_all_offset
, i
, reg
);
426 if (I10NM_SAD_ENABLE(reg
) && I10NM_SAD_NM_CACHEABLE(reg
)) {
427 edac_dbg(2, "2-level memory configuration.\n");
437 * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code.
438 * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS.
440 static bool i10nm_mscod_is_ddrt(u32 mscod
)
442 switch (res_cfg
->type
) {
445 case 0x0106: case 0x0107:
446 case 0x0800: case 0x0804:
447 case 0x0806 ... 0x0808:
448 case 0x080a ... 0x080e:
449 case 0x0810: case 0x0811:
450 case 0x0816: case 0x081e:
458 case 0x0800: case 0x0804:
459 case 0x0806 ... 0x0808:
460 case 0x080a ... 0x080e:
461 case 0x0810: case 0x0811:
462 case 0x0816: case 0x081e:
475 static bool i10nm_mc_decode_available(struct mce
*mce
)
477 #define ICX_IMCx_CHy 0x06666000
480 if (!decoding_via_mca
|| mem_cfg_2lm
)
483 if ((mce
->status
& (MCI_STATUS_MISCV
| MCI_STATUS_ADDRV
))
484 != (MCI_STATUS_MISCV
| MCI_STATUS_ADDRV
))
489 switch (res_cfg
->type
) {
491 /* Check whether the bank is one of {13,14,17,18,21,22,25,26} */
492 if (!(ICX_IMCx_CHy
& (1 << bank
)))
496 if (bank
< 13 || bank
> 20)
503 /* DDRT errors can't be decoded from MCA bank registers */
504 if (MCI_MISC_ECC_MODE(mce
->misc
) == MCI_MISC_ECC_DDRT
)
507 if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce
->status
)))
513 static bool i10nm_mc_decode(struct decoded_addr
*res
)
515 struct mce
*m
= res
->mce
;
519 if (!i10nm_mc_decode_available(m
))
522 list_for_each_entry(d
, i10nm_edac_list
, list
) {
523 if (d
->imc
[0].src_id
== m
->socketid
) {
524 res
->socket
= m
->socketid
;
530 switch (res_cfg
->type
) {
534 res
->channel
= bank
% 2;
535 res
->column
= GET_BITFIELD(m
->misc
, 9, 18) << 2;
536 res
->row
= GET_BITFIELD(m
->misc
, 19, 39);
537 res
->bank_group
= GET_BITFIELD(m
->misc
, 40, 41);
538 res
->bank_address
= GET_BITFIELD(m
->misc
, 42, 43);
539 res
->bank_group
|= GET_BITFIELD(m
->misc
, 44, 44) << 2;
540 res
->rank
= GET_BITFIELD(m
->misc
, 56, 58);
541 res
->dimm
= res
->rank
>> 2;
542 res
->rank
= res
->rank
% 4;
547 res
->channel
= bank
% 2;
548 res
->column
= GET_BITFIELD(m
->misc
, 9, 18) << 2;
549 res
->row
= GET_BITFIELD(m
->misc
, 19, 36);
550 res
->bank_group
= GET_BITFIELD(m
->misc
, 37, 38);
551 res
->bank_address
= GET_BITFIELD(m
->misc
, 39, 40);
552 res
->bank_group
|= GET_BITFIELD(m
->misc
, 41, 41) << 2;
553 res
->rank
= GET_BITFIELD(m
->misc
, 57, 57);
554 res
->dimm
= GET_BITFIELD(m
->misc
, 58, 58);
561 skx_printk(KERN_ERR
, "No device for src_id %d imc %d\n",
562 m
->socketid
, res
->imc
);
570 * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller.
572 * @d : The pointer to the structure of CPU socket EDAC device.
573 * @logical_idx : The logical index of the present memory controller (0 ~ max present MC# - 1).
574 * @physical_idx : To store the corresponding physical index of @logical_idx.
576 * RETURNS : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure.
578 static struct pci_dev
*get_gnr_mdev(struct skx_dev
*d
, int logical_idx
, int *physical_idx
)
580 #define GNR_MAX_IMC_PCI_CNT 28
582 struct pci_dev
*mdev
;
586 * Detect present memory controllers from { PCI device: 8-5, function 7-1 }
588 for (i
= 0; i
< GNR_MAX_IMC_PCI_CNT
; i
++) {
589 mdev
= pci_get_dev_wrapper(d
->seg
,
590 d
->bus
[res_cfg
->ddr_mdev_bdf
.bus
],
591 res_cfg
->ddr_mdev_bdf
.dev
+ i
/ 7,
592 res_cfg
->ddr_mdev_bdf
.fun
+ i
% 7);
595 if (logical
== logical_idx
) {
609 * get_ddr_munit() - Get the resource of the i-th DDR memory controller.
611 * @d : The pointer to the structure of CPU socket EDAC device.
612 * @i : The index of the CPU socket relative DDR memory controller.
613 * @offset : To store the MMIO offset of the i-th DDR memory controller.
614 * @size : To store the MMIO size of the i-th DDR memory controller.
616 * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure.
618 static struct pci_dev
*get_ddr_munit(struct skx_dev
*d
, int i
, u32
*offset
, unsigned long *size
)
620 struct pci_dev
*mdev
;
624 switch (res_cfg
->type
) {
626 if (I10NM_GET_IMC_BAR(d
, 0, reg
)) {
627 i10nm_printk(KERN_ERR
, "Failed to get mc0 bar\n");
631 mdev
= get_gnr_mdev(d
, i
, &physical_idx
);
635 *offset
= I10NM_GET_IMC_MMIO_OFFSET(reg
) +
636 I10NM_GNR_IMC_MMIO_OFFSET
+
637 physical_idx
* I10NM_GNR_IMC_MMIO_SIZE
;
638 *size
= I10NM_GNR_IMC_MMIO_SIZE
;
642 if (I10NM_GET_IMC_BAR(d
, i
, reg
)) {
643 i10nm_printk(KERN_ERR
, "Failed to get mc%d bar\n", i
);
647 mdev
= pci_get_dev_wrapper(d
->seg
,
648 d
->bus
[res_cfg
->ddr_mdev_bdf
.bus
],
649 res_cfg
->ddr_mdev_bdf
.dev
+ i
,
650 res_cfg
->ddr_mdev_bdf
.fun
);
654 *offset
= I10NM_GET_IMC_MMIO_OFFSET(reg
);
655 *size
= I10NM_GET_IMC_MMIO_SIZE(reg
);
662 * i10nm_imc_absent() - Check whether the memory controller @imc is absent
664 * @imc : The pointer to the structure of memory controller EDAC device.
666 * RETURNS : true if the memory controller EDAC device is absent, false otherwise.
668 static bool i10nm_imc_absent(struct skx_imc
*imc
)
673 switch (res_cfg
->type
) {
675 for (i
= 0; i
< res_cfg
->ddr_chan_num
; i
++) {
676 mcmtr
= I10NM_GET_MCMTR(imc
, i
);
677 edac_dbg(1, "ch%d mcmtr reg %x\n", i
, mcmtr
);
683 * Some workstations' absent memory controllers still
684 * appear as PCIe devices, misleading the EDAC driver.
685 * By observing that the MMIO registers of these absent
686 * memory controllers consistently hold the value of ~0.
688 * We identify a memory controller as absent by checking
689 * if its MMIO register "mcmtr" == ~0 in all its channels.
697 static int i10nm_get_ddr_munits(void)
699 struct pci_dev
*mdev
;
707 list_for_each_entry(d
, i10nm_edac_list
, list
) {
708 d
->util_all
= pci_get_dev_wrapper(d
->seg
, d
->bus
[res_cfg
->util_all_bdf
.bus
],
709 res_cfg
->util_all_bdf
.dev
,
710 res_cfg
->util_all_bdf
.fun
);
714 d
->uracu
= pci_get_dev_wrapper(d
->seg
, d
->bus
[res_cfg
->uracu_bdf
.bus
],
715 res_cfg
->uracu_bdf
.dev
,
716 res_cfg
->uracu_bdf
.fun
);
720 if (I10NM_GET_SCK_BAR(d
, reg
)) {
721 i10nm_printk(KERN_ERR
, "Failed to socket bar\n");
725 base
= I10NM_GET_SCK_MMIO_BASE(reg
);
726 edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
729 for (lmc
= 0, i
= 0; i
< res_cfg
->ddr_imc_num
; i
++) {
730 mdev
= get_ddr_munit(d
, i
, &off
, &size
);
732 if (i
== 0 && !mdev
) {
733 i10nm_printk(KERN_ERR
, "No IMC found\n");
739 edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
740 i
, base
+ off
, size
, reg
);
742 mbase
= ioremap(base
+ off
, size
);
744 i10nm_printk(KERN_ERR
, "Failed to ioremap 0x%llx\n",
749 d
->imc
[lmc
].mbase
= mbase
;
750 if (i10nm_imc_absent(&d
->imc
[lmc
])) {
753 d
->imc
[lmc
].mbase
= NULL
;
754 edac_dbg(2, "Skip absent mc%d\n", i
);
757 d
->imc
[lmc
].mdev
= mdev
;
766 static bool i10nm_check_hbm_imc(struct skx_dev
*d
)
770 if (I10NM_GET_CAPID3_CFG(d
, reg
)) {
771 i10nm_printk(KERN_ERR
, "Failed to get capid3_cfg\n");
775 return I10NM_IS_HBM_PRESENT(reg
) != 0;
778 static int i10nm_get_hbm_munits(void)
780 struct pci_dev
*mdev
;
787 list_for_each_entry(d
, i10nm_edac_list
, list
) {
791 if (!i10nm_check_hbm_imc(d
)) {
792 i10nm_printk(KERN_DEBUG
, "No hbm memory\n");
796 if (I10NM_GET_SCK_BAR(d
, reg
)) {
797 i10nm_printk(KERN_ERR
, "Failed to get socket bar\n");
800 base
= I10NM_GET_SCK_MMIO_BASE(reg
);
802 if (I10NM_GET_HBM_IMC_BAR(d
, reg
)) {
803 i10nm_printk(KERN_ERR
, "Failed to get hbm mc bar\n");
806 base
+= I10NM_GET_HBM_IMC_MMIO_OFFSET(reg
);
808 lmc
= res_cfg
->ddr_imc_num
;
810 for (i
= 0; i
< res_cfg
->hbm_imc_num
; i
++) {
811 mdev
= pci_get_dev_wrapper(d
->seg
, d
->bus
[res_cfg
->hbm_mdev_bdf
.bus
],
812 res_cfg
->hbm_mdev_bdf
.dev
+ i
/ 4,
813 res_cfg
->hbm_mdev_bdf
.fun
+ i
% 4);
815 if (i
== 0 && !mdev
) {
816 i10nm_printk(KERN_ERR
, "No hbm mc found\n");
822 d
->imc
[lmc
].mdev
= mdev
;
823 off
= i
* I10NM_HBM_IMC_MMIO_SIZE
;
825 edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
826 lmc
, base
+ off
, I10NM_HBM_IMC_MMIO_SIZE
);
828 mbase
= ioremap(base
+ off
, I10NM_HBM_IMC_MMIO_SIZE
);
830 pci_dev_put(d
->imc
[lmc
].mdev
);
831 d
->imc
[lmc
].mdev
= NULL
;
833 i10nm_printk(KERN_ERR
, "Failed to ioremap for hbm mc 0x%llx\n",
838 d
->imc
[lmc
].mbase
= mbase
;
839 d
->imc
[lmc
].hbm_mc
= true;
841 mcmtr
= I10NM_GET_MCMTR(&d
->imc
[lmc
], 0);
842 if (!I10NM_IS_HBM_IMC(mcmtr
)) {
843 iounmap(d
->imc
[lmc
].mbase
);
844 d
->imc
[lmc
].mbase
= NULL
;
845 d
->imc
[lmc
].hbm_mc
= false;
846 pci_dev_put(d
->imc
[lmc
].mdev
);
847 d
->imc
[lmc
].mdev
= NULL
;
849 i10nm_printk(KERN_ERR
, "This isn't an hbm mc!\n");
860 static struct res_config i10nm_cfg0
= {
863 .busno_cfg_offset
= 0xcc,
867 .ddr_chan_mmio_sz
= 0x4000,
868 .sad_all_bdf
= {1, 29, 0},
869 .pcu_cr3_bdf
= {1, 30, 3},
870 .util_all_bdf
= {1, 29, 1},
871 .uracu_bdf
= {0, 0, 1},
872 .ddr_mdev_bdf
= {0, 12, 0},
873 .hbm_mdev_bdf
= {0, 12, 1},
874 .sad_all_offset
= 0x108,
875 .offsets_scrub
= offsets_scrub_icx
,
876 .offsets_demand
= offsets_demand_icx
,
879 static struct res_config i10nm_cfg1
= {
882 .busno_cfg_offset
= 0xd0,
886 .ddr_chan_mmio_sz
= 0x4000,
887 .sad_all_bdf
= {1, 29, 0},
888 .pcu_cr3_bdf
= {1, 30, 3},
889 .util_all_bdf
= {1, 29, 1},
890 .uracu_bdf
= {0, 0, 1},
891 .ddr_mdev_bdf
= {0, 12, 0},
892 .hbm_mdev_bdf
= {0, 12, 1},
893 .sad_all_offset
= 0x108,
894 .offsets_scrub
= offsets_scrub_icx
,
895 .offsets_demand
= offsets_demand_icx
,
898 static struct res_config spr_cfg
= {
901 .busno_cfg_offset
= 0xd0,
908 .ddr_chan_mmio_sz
= 0x8000,
909 .hbm_chan_mmio_sz
= 0x4000,
910 .support_ddr5
= true,
911 .sad_all_bdf
= {1, 10, 0},
912 .pcu_cr3_bdf
= {1, 30, 3},
913 .util_all_bdf
= {1, 29, 1},
914 .uracu_bdf
= {0, 0, 1},
915 .ddr_mdev_bdf
= {0, 12, 0},
916 .hbm_mdev_bdf
= {0, 12, 1},
917 .sad_all_offset
= 0x300,
918 .offsets_scrub
= offsets_scrub_spr
,
919 .offsets_scrub_hbm0
= offsets_scrub_spr_hbm0
,
920 .offsets_scrub_hbm1
= offsets_scrub_spr_hbm1
,
921 .offsets_demand
= offsets_demand_spr
,
922 .offsets_demand2
= offsets_demand2_spr
,
923 .offsets_demand_hbm0
= offsets_demand_spr_hbm0
,
924 .offsets_demand_hbm1
= offsets_demand_spr_hbm1
,
927 static struct res_config gnr_cfg
= {
930 .busno_cfg_offset
= 0xd0,
934 .ddr_chan_mmio_sz
= 0x4000,
935 .support_ddr5
= true,
936 .sad_all_bdf
= {0, 13, 0},
937 .pcu_cr3_bdf
= {0, 5, 0},
938 .util_all_bdf
= {0, 13, 1},
939 .uracu_bdf
= {0, 0, 1},
940 .ddr_mdev_bdf
= {0, 5, 1},
941 .sad_all_offset
= 0x300,
944 static const struct x86_cpu_id i10nm_cpuids
[] = {
945 X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_TREMONT_D
, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0
),
946 X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_TREMONT_D
, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1
),
947 X86_MATCH_VFM_STEPPINGS(INTEL_ICELAKE_X
, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0
),
948 X86_MATCH_VFM_STEPPINGS(INTEL_ICELAKE_X
, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1
),
949 X86_MATCH_VFM_STEPPINGS(INTEL_ICELAKE_D
, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1
),
950 X86_MATCH_VFM_STEPPINGS(INTEL_SAPPHIRERAPIDS_X
, X86_STEPPINGS(0x0, 0xf), &spr_cfg
),
951 X86_MATCH_VFM_STEPPINGS(INTEL_EMERALDRAPIDS_X
, X86_STEPPINGS(0x0, 0xf), &spr_cfg
),
952 X86_MATCH_VFM_STEPPINGS(INTEL_GRANITERAPIDS_X
, X86_STEPPINGS(0x0, 0xf), &gnr_cfg
),
953 X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_CRESTMONT_X
, X86_STEPPINGS(0x0, 0xf), &gnr_cfg
),
954 X86_MATCH_VFM_STEPPINGS(INTEL_ATOM_CRESTMONT
, X86_STEPPINGS(0x0, 0xf), &gnr_cfg
),
957 MODULE_DEVICE_TABLE(x86cpu
, i10nm_cpuids
);
959 static bool i10nm_check_ecc(struct skx_imc
*imc
, int chan
)
963 mcmtr
= I10NM_GET_MCMTR(imc
, chan
);
964 edac_dbg(1, "ch%d mcmtr reg %x\n", chan
, mcmtr
);
966 return !!GET_BITFIELD(mcmtr
, 2, 2);
969 static int i10nm_get_dimm_config(struct mem_ctl_info
*mci
,
970 struct res_config
*cfg
)
972 struct skx_pvt
*pvt
= mci
->pvt_info
;
973 struct skx_imc
*imc
= pvt
->imc
;
974 u32 mtr
, amap
, mcddrtcfg
= 0;
975 struct dimm_info
*dimm
;
978 for (i
= 0; i
< imc
->num_channels
; i
++) {
983 amap
= I10NM_GET_AMAP(imc
, i
);
985 if (res_cfg
->type
!= GNR
)
986 mcddrtcfg
= I10NM_GET_MCDDRTCFG(imc
, i
);
988 for (j
= 0; j
< imc
->num_dimms
; j
++) {
989 dimm
= edac_get_dimm(mci
, i
, j
, 0);
990 mtr
= I10NM_GET_DIMMMTR(imc
, i
, j
);
991 edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
992 mtr
, mcddrtcfg
, imc
->mc
, i
, j
);
994 if (IS_DIMM_PRESENT(mtr
))
995 ndimms
+= skx_get_dimm_info(mtr
, 0, amap
, dimm
,
997 else if (IS_NVDIMM_PRESENT(mcddrtcfg
, j
))
998 ndimms
+= skx_get_nvdimm_info(dimm
, imc
, i
, j
,
1001 if (ndimms
&& !i10nm_check_ecc(imc
, i
)) {
1002 i10nm_printk(KERN_ERR
, "ECC is disabled on imc %d channel %d\n",
1011 static struct notifier_block i10nm_mce_dec
= {
1012 .notifier_call
= skx_mce_check_error
,
1013 .priority
= MCE_PRIO_EDAC
,
1016 #ifdef CONFIG_EDAC_DEBUG
1019 * Exercise the address decode logic by writing an address to
1020 * /sys/kernel/debug/edac/i10nm_test/addr.
1022 static struct dentry
*i10nm_test
;
1024 static int debugfs_u64_set(void *data
, u64 val
)
1028 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val
);
1030 memset(&m
, 0, sizeof(m
));
1031 /* ADDRV + MemRd + Unknown channel */
1032 m
.status
= MCI_STATUS_ADDRV
+ 0x90;
1033 /* One corrected error */
1034 m
.status
|= BIT_ULL(MCI_STATUS_CEC_SHIFT
);
1036 skx_mce_check_error(NULL
, 0, &m
);
1040 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo
, NULL
, debugfs_u64_set
, "%llu\n");
1042 static void setup_i10nm_debug(void)
1044 i10nm_test
= edac_debugfs_create_dir("i10nm_test");
1048 if (!edac_debugfs_create_file("addr", 0200, i10nm_test
,
1049 NULL
, &fops_u64_wo
)) {
1050 debugfs_remove(i10nm_test
);
1055 static void teardown_i10nm_debug(void)
1057 debugfs_remove_recursive(i10nm_test
);
1060 static inline void setup_i10nm_debug(void) {}
1061 static inline void teardown_i10nm_debug(void) {}
1062 #endif /*CONFIG_EDAC_DEBUG*/
1064 static int __init
i10nm_init(void)
1066 u8 mc
= 0, src_id
= 0, node_id
= 0;
1067 const struct x86_cpu_id
*id
;
1068 struct res_config
*cfg
;
1071 int rc
, i
, off
[3] = {0xd0, 0xc8, 0xcc};
1077 if (ghes_get_devices())
1080 owner
= edac_get_owner();
1081 if (owner
&& strncmp(owner
, EDAC_MOD_STR
, sizeof(EDAC_MOD_STR
)))
1084 if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR
))
1087 id
= x86_match_cpu(i10nm_cpuids
);
1091 cfg
= (struct res_config
*)id
->driver_data
;
1094 rc
= skx_get_hi_lo(0x09a2, off
, &tolm
, &tohm
);
1098 rc
= skx_get_all_bus_mappings(cfg
, &i10nm_edac_list
);
1102 i10nm_printk(KERN_ERR
, "No memory controllers found\n");
1106 rc
= i10nm_get_imc_num(cfg
);
1110 mem_cfg_2lm
= i10nm_check_2lm(cfg
);
1111 skx_set_mem_cfg(mem_cfg_2lm
);
1113 rc
= i10nm_get_ddr_munits();
1115 if (i10nm_get_hbm_munits() && rc
)
1118 imc_num
= res_cfg
->ddr_imc_num
+ res_cfg
->hbm_imc_num
;
1120 list_for_each_entry(d
, i10nm_edac_list
, list
) {
1121 rc
= skx_get_src_id(d
, 0xf8, &src_id
);
1125 rc
= skx_get_node_id(d
, &node_id
);
1129 edac_dbg(2, "src_id = %d node_id = %d\n", src_id
, node_id
);
1130 for (i
= 0; i
< imc_num
; i
++) {
1131 if (!d
->imc
[i
].mdev
)
1134 d
->imc
[i
].mc
= mc
++;
1136 d
->imc
[i
].src_id
= src_id
;
1137 d
->imc
[i
].node_id
= node_id
;
1138 if (d
->imc
[i
].hbm_mc
) {
1139 d
->imc
[i
].chan_mmio_sz
= cfg
->hbm_chan_mmio_sz
;
1140 d
->imc
[i
].num_channels
= cfg
->hbm_chan_num
;
1141 d
->imc
[i
].num_dimms
= cfg
->hbm_dimm_num
;
1143 d
->imc
[i
].chan_mmio_sz
= cfg
->ddr_chan_mmio_sz
;
1144 d
->imc
[i
].num_channels
= cfg
->ddr_chan_num
;
1145 d
->imc
[i
].num_dimms
= cfg
->ddr_dimm_num
;
1148 rc
= skx_register_mci(&d
->imc
[i
], d
->imc
[i
].mdev
,
1149 "Intel_10nm Socket", EDAC_MOD_STR
,
1150 i10nm_get_dimm_config
, cfg
);
1156 rc
= skx_adxl_get();
1161 mce_register_decode_chain(&i10nm_mce_dec
);
1162 setup_i10nm_debug();
1164 if (retry_rd_err_log
&& res_cfg
->offsets_scrub
&& res_cfg
->offsets_demand
) {
1165 skx_set_decode(i10nm_mc_decode
, show_retry_rd_err_log
);
1166 if (retry_rd_err_log
== 2)
1167 enable_retry_rd_err_log(true);
1169 skx_set_decode(i10nm_mc_decode
, NULL
);
1172 i10nm_printk(KERN_INFO
, "%s\n", I10NM_REVISION
);
1180 static void __exit
i10nm_exit(void)
1184 if (retry_rd_err_log
&& res_cfg
->offsets_scrub
&& res_cfg
->offsets_demand
) {
1185 skx_set_decode(NULL
, NULL
);
1186 if (retry_rd_err_log
== 2)
1187 enable_retry_rd_err_log(false);
1190 teardown_i10nm_debug();
1191 mce_unregister_decode_chain(&i10nm_mce_dec
);
1196 module_init(i10nm_init
);
1197 module_exit(i10nm_exit
);
1199 static int set_decoding_via_mca(const char *buf
, const struct kernel_param
*kp
)
1204 ret
= kstrtoul(buf
, 0, &val
);
1209 if (val
&& mem_cfg_2lm
) {
1210 i10nm_printk(KERN_NOTICE
, "Decoding errors via MCA banks for 2LM isn't supported yet\n");
1214 ret
= param_set_int(buf
, kp
);
1219 static const struct kernel_param_ops decoding_via_mca_param_ops
= {
1220 .set
= set_decoding_via_mca
,
1221 .get
= param_get_int
,
1224 module_param_cb(decoding_via_mca
, &decoding_via_mca_param_ops
, &decoding_via_mca
, 0644);
1225 MODULE_PARM_DESC(decoding_via_mca
, "decoding_via_mca: 0=off(default), 1=enable");
1227 module_param(retry_rd_err_log
, int, 0444);
1228 MODULE_PARM_DESC(retry_rd_err_log
, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
1230 MODULE_LICENSE("GPL v2");
1231 MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");