1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Driver for Intel(R) 10nm server memory controller.
4 * Copyright (c) 2019, Intel Corporation.
5 *
6 */
7
8 #include <linux/kernel.h>
9 #include <linux/io.h>
10 #include <asm/cpu_device_id.h>
11 #include <asm/intel-family.h>
12 #include <asm/mce.h>
13 #include "edac_module.h"
14 #include "skx_common.h"
15
16 #define I10NM_REVISION "v0.0.6"
17 #define EDAC_MOD_STR "i10nm_edac"
18
19 /* Debug macros */
20 #define i10nm_printk(level, fmt, arg...) \
21 edac_printk(level, "i10nm", fmt, ##arg)
22
23 #define I10NM_GET_SCK_BAR(d, reg) \
24 pci_read_config_dword((d)->uracu, 0xd0, &(reg))
25 #define I10NM_GET_IMC_BAR(d, i, reg) \
26 pci_read_config_dword((d)->uracu, \
27 (res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg))
28 #define I10NM_GET_SAD(d, offset, i, reg)\
29 pci_read_config_dword((d)->sad_all, (offset) + (i) * \
30 (res_cfg->type == GNR ? 12 : 8), &(reg))
31 #define I10NM_GET_HBM_IMC_BAR(d, reg) \
32 pci_read_config_dword((d)->uracu, 0xd4, &(reg))
33 #define I10NM_GET_CAPID3_CFG(d, reg) \
34 pci_read_config_dword((d)->pcu_cr3, \
35 res_cfg->type == GNR ? 0x290 : 0x90, &(reg))
36 #define I10NM_GET_CAPID5_CFG(d, reg) \
37 pci_read_config_dword((d)->pcu_cr3, \
38 res_cfg->type == GNR ? 0x298 : 0x98, &(reg))
39 #define I10NM_GET_DIMMMTR(m, i, j) \
40 readl((m)->mbase + ((m)->hbm_mc ? 0x80c : \
41 (res_cfg->type == GNR ? 0xc0c : 0x2080c)) + \
42 (i) * (m)->chan_mmio_sz + (j) * 4)
43 #define I10NM_GET_MCDDRTCFG(m, i) \
44 readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
45 (i) * (m)->chan_mmio_sz)
46 #define I10NM_GET_MCMTR(m, i) \
47 readl((m)->mbase + ((m)->hbm_mc ? 0xef8 : \
48 (res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) + \
49 (i) * (m)->chan_mmio_sz)
50 #define I10NM_GET_AMAP(m, i) \
51 readl((m)->mbase + ((m)->hbm_mc ? 0x814 : \
52 (res_cfg->type == GNR ? 0xc14 : 0x20814)) + \
53 (i) * (m)->chan_mmio_sz)
54 #define I10NM_GET_REG32(m, i, offset) \
55 readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
56 #define I10NM_GET_REG64(m, i, offset) \
57 readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
58 #define I10NM_SET_REG32(m, i, offset, v) \
59 writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
60
61 #define I10NM_GET_SCK_MMIO_BASE(reg) (GET_BITFIELD(reg, 0, 28) << 23)
62 #define I10NM_GET_IMC_MMIO_OFFSET(reg) (GET_BITFIELD(reg, 0, 10) << 12)
63 #define I10NM_GET_IMC_MMIO_SIZE(reg) ((GET_BITFIELD(reg, 13, 23) - \
64 GET_BITFIELD(reg, 0, 10) + 1) << 12)
65 #define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg) \
66 ((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
67
68 #define I10NM_GNR_IMC_MMIO_OFFSET 0x24c000
69 #define I10NM_GNR_IMC_MMIO_SIZE 0x4000
70 #define I10NM_HBM_IMC_MMIO_SIZE 0x9000
71 #define I10NM_DDR_IMC_CH_CNT(reg) GET_BITFIELD(reg, 21, 24)
72 #define I10NM_IS_HBM_PRESENT(reg) GET_BITFIELD(reg, 27, 30)
73 #define I10NM_IS_HBM_IMC(reg) GET_BITFIELD(reg, 29, 29)
74
75 #define I10NM_MAX_SAD 16
76 #define I10NM_SAD_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
77 #define I10NM_SAD_NM_CACHEABLE(reg) GET_BITFIELD(reg, 5, 5)
78
79 #define RETRY_RD_ERR_LOG_UC BIT(1)
80 #define RETRY_RD_ERR_LOG_NOOVER BIT(14)
81 #define RETRY_RD_ERR_LOG_EN BIT(15)
82 #define RETRY_RD_ERR_LOG_NOOVER_UC (BIT(14) | BIT(1))
83 #define RETRY_RD_ERR_LOG_OVER_UC_V (BIT(2) | BIT(1) | BIT(0))
84
85 static struct list_head *i10nm_edac_list;
86
87 static struct res_config *res_cfg;
88 static int retry_rd_err_log;
89 static int decoding_via_mca;
90 static bool mem_cfg_2lm;
91
92 static u32 offsets_scrub_icx[] = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
93 static u32 offsets_scrub_spr[] = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
94 static u32 offsets_scrub_spr_hbm0[] = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8};
95 static u32 offsets_scrub_spr_hbm1[] = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8};
96 static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
97 static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
98 static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10};
99 static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
100 static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
101
__enable_retry_rd_err_log(struct skx_imc * imc,int chan,bool enable,u32 * offsets_scrub,u32 * offsets_demand,u32 * offsets_demand2)102 static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable,
103 u32 *offsets_scrub, u32 *offsets_demand,
104 u32 *offsets_demand2)
105 {
106 u32 s, d, d2;
107
108 s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]);
109 d = I10NM_GET_REG32(imc, chan, offsets_demand[0]);
110 if (offsets_demand2)
111 d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]);
112
113 if (enable) {
114 /* Save default configurations */
115 imc->chan[chan].retry_rd_err_log_s = s;
116 imc->chan[chan].retry_rd_err_log_d = d;
117 if (offsets_demand2)
118 imc->chan[chan].retry_rd_err_log_d2 = d2;
119
120 s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
121 s |= RETRY_RD_ERR_LOG_EN;
122 d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
123 d |= RETRY_RD_ERR_LOG_EN;
124
125 if (offsets_demand2) {
126 d2 &= ~RETRY_RD_ERR_LOG_UC;
127 d2 |= RETRY_RD_ERR_LOG_NOOVER;
128 d2 |= RETRY_RD_ERR_LOG_EN;
129 }
130 } else {
131 /* Restore default configurations */
132 if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_UC)
133 s |= RETRY_RD_ERR_LOG_UC;
134 if (imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_NOOVER)
135 s |= RETRY_RD_ERR_LOG_NOOVER;
136 if (!(imc->chan[chan].retry_rd_err_log_s & RETRY_RD_ERR_LOG_EN))
137 s &= ~RETRY_RD_ERR_LOG_EN;
138 if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_UC)
139 d |= RETRY_RD_ERR_LOG_UC;
140 if (imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_NOOVER)
141 d |= RETRY_RD_ERR_LOG_NOOVER;
142 if (!(imc->chan[chan].retry_rd_err_log_d & RETRY_RD_ERR_LOG_EN))
143 d &= ~RETRY_RD_ERR_LOG_EN;
144
145 if (offsets_demand2) {
146 if (imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_UC)
147 d2 |= RETRY_RD_ERR_LOG_UC;
148 if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_NOOVER))
149 d2 &= ~RETRY_RD_ERR_LOG_NOOVER;
150 if (!(imc->chan[chan].retry_rd_err_log_d2 & RETRY_RD_ERR_LOG_EN))
151 d2 &= ~RETRY_RD_ERR_LOG_EN;
152 }
153 }
154
155 I10NM_SET_REG32(imc, chan, offsets_scrub[0], s);
156 I10NM_SET_REG32(imc, chan, offsets_demand[0], d);
157 if (offsets_demand2)
158 I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2);
159 }
160
enable_retry_rd_err_log(bool enable)161 static void enable_retry_rd_err_log(bool enable)
162 {
163 int i, j, imc_num, chan_num;
164 struct skx_imc *imc;
165 struct skx_dev *d;
166
167 edac_dbg(2, "\n");
168
169 list_for_each_entry(d, i10nm_edac_list, list) {
170 imc_num = res_cfg->ddr_imc_num;
171 chan_num = res_cfg->ddr_chan_num;
172
173 for (i = 0; i < imc_num; i++) {
174 imc = &d->imc[i];
175 if (!imc->mbase)
176 continue;
177
178 for (j = 0; j < chan_num; j++)
179 __enable_retry_rd_err_log(imc, j, enable,
180 res_cfg->offsets_scrub,
181 res_cfg->offsets_demand,
182 res_cfg->offsets_demand2);
183 }
184
185 imc_num += res_cfg->hbm_imc_num;
186 chan_num = res_cfg->hbm_chan_num;
187
188 for (; i < imc_num; i++) {
189 imc = &d->imc[i];
190 if (!imc->mbase || !imc->hbm_mc)
191 continue;
192
193 for (j = 0; j < chan_num; j++) {
194 __enable_retry_rd_err_log(imc, j, enable,
195 res_cfg->offsets_scrub_hbm0,
196 res_cfg->offsets_demand_hbm0,
197 NULL);
198 __enable_retry_rd_err_log(imc, j, enable,
199 res_cfg->offsets_scrub_hbm1,
200 res_cfg->offsets_demand_hbm1,
201 NULL);
202 }
203 }
204 }
205 }
206
show_retry_rd_err_log(struct decoded_addr * res,char * msg,int len,bool scrub_err)207 static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
208 int len, bool scrub_err)
209 {
210 struct skx_imc *imc = &res->dev->imc[res->imc];
211 u32 log0, log1, log2, log3, log4;
212 u32 corr0, corr1, corr2, corr3;
213 u32 lxg0, lxg1, lxg3, lxg4;
214 u32 *xffsets = NULL;
215 u64 log2a, log5;
216 u64 lxg2a, lxg5;
217 u32 *offsets;
218 int n, pch;
219
220 if (!imc->mbase)
221 return;
222
223 if (imc->hbm_mc) {
224 pch = res->cs & 1;
225
226 if (pch)
227 offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 :
228 res_cfg->offsets_demand_hbm1;
229 else
230 offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 :
231 res_cfg->offsets_demand_hbm0;
232 } else {
233 if (scrub_err) {
234 offsets = res_cfg->offsets_scrub;
235 } else {
236 offsets = res_cfg->offsets_demand;
237 xffsets = res_cfg->offsets_demand2;
238 }
239 }
240
241 log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
242 log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
243 log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
244 log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
245 log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
246
247 if (xffsets) {
248 lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]);
249 lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]);
250 lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]);
251 lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]);
252 lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]);
253 }
254
255 if (res_cfg->type == SPR) {
256 log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
257 n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx",
258 log0, log1, log2a, log3, log4, log5);
259
260 if (len - n > 0) {
261 if (xffsets) {
262 lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]);
263 n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]",
264 lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5);
265 } else {
266 n += snprintf(msg + n, len - n, "]");
267 }
268 }
269 } else {
270 log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
271 n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
272 log0, log1, log2, log3, log4, log5);
273 }
274
275 if (imc->hbm_mc) {
276 if (pch) {
277 corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18);
278 corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c);
279 corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20);
280 corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24);
281 } else {
282 corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818);
283 corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c);
284 corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820);
285 corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824);
286 }
287 } else {
288 corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
289 corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
290 corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
291 corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
292 }
293
294 if (len - n > 0)
295 snprintf(msg + n, len - n,
296 " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
297 corr0 & 0xffff, corr0 >> 16,
298 corr1 & 0xffff, corr1 >> 16,
299 corr2 & 0xffff, corr2 >> 16,
300 corr3 & 0xffff, corr3 >> 16);
301
302 /* Clear status bits */
303 if (retry_rd_err_log == 2) {
304 if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) {
305 log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
306 I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
307 }
308
309 if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
310 lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
311 I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0);
312 }
313 }
314 }
315
pci_get_dev_wrapper(int dom,unsigned int bus,unsigned int dev,unsigned int fun)316 static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
317 unsigned int dev, unsigned int fun)
318 {
319 struct pci_dev *pdev;
320
321 pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun));
322 if (!pdev) {
323 edac_dbg(2, "No device %02x:%02x.%x\n",
324 bus, dev, fun);
325 return NULL;
326 }
327
328 if (unlikely(pci_enable_device(pdev) < 0)) {
329 edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
330 bus, dev, fun);
331 pci_dev_put(pdev);
332 return NULL;
333 }
334
335 return pdev;
336 }
337
338 /**
339 * i10nm_get_imc_num() - Get the number of present DDR memory controllers.
340 *
341 * @cfg : The pointer to the structure of EDAC resource configurations.
342 *
343 * For Granite Rapids CPUs, the number of present DDR memory controllers read
344 * at runtime overwrites the value statically configured in @cfg->ddr_imc_num.
345 * For other CPUs, the number of present DDR memory controllers is statically
346 * configured in @cfg->ddr_imc_num.
347 *
348 * RETURNS : 0 on success, < 0 on failure.
349 */
i10nm_get_imc_num(struct res_config * cfg)350 static int i10nm_get_imc_num(struct res_config *cfg)
351 {
352 int n, imc_num, chan_num = 0;
353 struct skx_dev *d;
354 u32 reg;
355
356 list_for_each_entry(d, i10nm_edac_list, list) {
357 d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus],
358 res_cfg->pcu_cr3_bdf.dev,
359 res_cfg->pcu_cr3_bdf.fun);
360 if (!d->pcu_cr3)
361 continue;
362
363 if (I10NM_GET_CAPID5_CFG(d, reg))
364 continue;
365
366 n = I10NM_DDR_IMC_CH_CNT(reg);
367
368 if (!chan_num) {
369 chan_num = n;
370 edac_dbg(2, "Get DDR CH number: %d\n", chan_num);
371 } else if (chan_num != n) {
372 i10nm_printk(KERN_NOTICE, "Get DDR CH numbers: %d, %d\n", chan_num, n);
373 }
374 }
375
376 switch (cfg->type) {
377 case GNR:
378 /*
379 * One channel per DDR memory controller for Granite Rapids CPUs.
380 */
381 imc_num = chan_num;
382
383 if (!imc_num) {
384 i10nm_printk(KERN_ERR, "Invalid DDR MC number\n");
385 return -ENODEV;
386 }
387
388 if (imc_num > I10NM_NUM_DDR_IMC) {
389 i10nm_printk(KERN_ERR, "Need to make I10NM_NUM_DDR_IMC >= %d\n", imc_num);
390 return -EINVAL;
391 }
392
393 if (cfg->ddr_imc_num != imc_num) {
394 /*
395 * Store the number of present DDR memory controllers.
396 */
397 cfg->ddr_imc_num = imc_num;
398 edac_dbg(2, "Set DDR MC number: %d", imc_num);
399 }
400
401 return 0;
402 default:
403 /*
404 * For other CPUs, the number of present DDR memory controllers
405 * is statically pre-configured in cfg->ddr_imc_num.
406 */
407 return 0;
408 }
409 }
410
i10nm_check_2lm(struct res_config * cfg)411 static bool i10nm_check_2lm(struct res_config *cfg)
412 {
413 struct skx_dev *d;
414 u32 reg;
415 int i;
416
417 list_for_each_entry(d, i10nm_edac_list, list) {
418 d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->sad_all_bdf.bus],
419 res_cfg->sad_all_bdf.dev,
420 res_cfg->sad_all_bdf.fun);
421 if (!d->sad_all)
422 continue;
423
424 for (i = 0; i < I10NM_MAX_SAD; i++) {
425 I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg);
426 if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
427 edac_dbg(2, "2-level memory configuration.\n");
428 return true;
429 }
430 }
431 }
432
433 return false;
434 }
435
436 /*
437 * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code.
438 * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS.
439 */
i10nm_mscod_is_ddrt(u32 mscod)440 static bool i10nm_mscod_is_ddrt(u32 mscod)
441 {
442 switch (res_cfg->type) {
443 case I10NM:
444 switch (mscod) {
445 case 0x0106: case 0x0107:
446 case 0x0800: case 0x0804:
447 case 0x0806 ... 0x0808:
448 case 0x080a ... 0x080e:
449 case 0x0810: case 0x0811:
450 case 0x0816: case 0x081e:
451 case 0x081f:
452 return true;
453 }
454
455 break;
456 case SPR:
457 switch (mscod) {
458 case 0x0800: case 0x0804:
459 case 0x0806 ... 0x0808:
460 case 0x080a ... 0x080e:
461 case 0x0810: case 0x0811:
462 case 0x0816: case 0x081e:
463 case 0x081f:
464 return true;
465 }
466
467 break;
468 default:
469 return false;
470 }
471
472 return false;
473 }
474
i10nm_mc_decode_available(struct mce * mce)475 static bool i10nm_mc_decode_available(struct mce *mce)
476 {
477 #define ICX_IMCx_CHy 0x06666000
478 u8 bank;
479
480 if (!decoding_via_mca || mem_cfg_2lm)
481 return false;
482
483 if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
484 != (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
485 return false;
486
487 bank = mce->bank;
488
489 switch (res_cfg->type) {
490 case I10NM:
491 /* Check whether the bank is one of {13,14,17,18,21,22,25,26} */
492 if (!(ICX_IMCx_CHy & (1 << bank)))
493 return false;
494 break;
495 case SPR:
496 if (bank < 13 || bank > 20)
497 return false;
498 break;
499 default:
500 return false;
501 }
502
503 /* DDRT errors can't be decoded from MCA bank registers */
504 if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT)
505 return false;
506
507 if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status)))
508 return false;
509
510 return true;
511 }
512
i10nm_mc_decode(struct decoded_addr * res)513 static bool i10nm_mc_decode(struct decoded_addr *res)
514 {
515 struct mce *m = res->mce;
516 struct skx_dev *d;
517 u8 bank;
518
519 if (!i10nm_mc_decode_available(m))
520 return false;
521
522 list_for_each_entry(d, i10nm_edac_list, list) {
523 if (d->imc[0].src_id == m->socketid) {
524 res->socket = m->socketid;
525 res->dev = d;
526 break;
527 }
528 }
529
530 switch (res_cfg->type) {
531 case I10NM:
532 bank = m->bank - 13;
533 res->imc = bank / 4;
534 res->channel = bank % 2;
535 res->column = GET_BITFIELD(m->misc, 9, 18) << 2;
536 res->row = GET_BITFIELD(m->misc, 19, 39);
537 res->bank_group = GET_BITFIELD(m->misc, 40, 41);
538 res->bank_address = GET_BITFIELD(m->misc, 42, 43);
539 res->bank_group |= GET_BITFIELD(m->misc, 44, 44) << 2;
540 res->rank = GET_BITFIELD(m->misc, 56, 58);
541 res->dimm = res->rank >> 2;
542 res->rank = res->rank % 4;
543 break;
544 case SPR:
545 bank = m->bank - 13;
546 res->imc = bank / 2;
547 res->channel = bank % 2;
548 res->column = GET_BITFIELD(m->misc, 9, 18) << 2;
549 res->row = GET_BITFIELD(m->misc, 19, 36);
550 res->bank_group = GET_BITFIELD(m->misc, 37, 38);
551 res->bank_address = GET_BITFIELD(m->misc, 39, 40);
552 res->bank_group |= GET_BITFIELD(m->misc, 41, 41) << 2;
553 res->rank = GET_BITFIELD(m->misc, 57, 57);
554 res->dimm = GET_BITFIELD(m->misc, 58, 58);
555 break;
556 default:
557 return false;
558 }
559
560 if (!res->dev) {
561 skx_printk(KERN_ERR, "No device for src_id %d imc %d\n",
562 m->socketid, res->imc);
563 return false;
564 }
565
566 return true;
567 }
568
569 /**
570 * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller.
571 *
572 * @d : The pointer to the structure of CPU socket EDAC device.
573 * @logical_idx : The logical index of the present memory controller (0 ~ max present MC# - 1).
574 * @physical_idx : To store the corresponding physical index of @logical_idx.
575 *
576 * RETURNS : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure.
577 */
get_gnr_mdev(struct skx_dev * d,int logical_idx,int * physical_idx)578 static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx)
579 {
580 #define GNR_MAX_IMC_PCI_CNT 28
581
582 struct pci_dev *mdev;
583 int i, logical = 0;
584
585 /*
586 * Detect present memory controllers from { PCI device: 8-5, function 7-1 }
587 */
588 for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) {
589 mdev = pci_get_dev_wrapper(d->seg,
590 d->bus[res_cfg->ddr_mdev_bdf.bus],
591 res_cfg->ddr_mdev_bdf.dev + i / 7,
592 res_cfg->ddr_mdev_bdf.fun + i % 7);
593
594 if (mdev) {
595 if (logical == logical_idx) {
596 *physical_idx = i;
597 return mdev;
598 }
599
600 pci_dev_put(mdev);
601 logical++;
602 }
603 }
604
605 return NULL;
606 }
607
608 /**
609 * get_ddr_munit() - Get the resource of the i-th DDR memory controller.
610 *
611 * @d : The pointer to the structure of CPU socket EDAC device.
612 * @i : The index of the CPU socket relative DDR memory controller.
613 * @offset : To store the MMIO offset of the i-th DDR memory controller.
614 * @size : To store the MMIO size of the i-th DDR memory controller.
615 *
616 * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure.
617 */
get_ddr_munit(struct skx_dev * d,int i,u32 * offset,unsigned long * size)618 static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsigned long *size)
619 {
620 struct pci_dev *mdev;
621 int physical_idx;
622 u32 reg;
623
624 switch (res_cfg->type) {
625 case GNR:
626 if (I10NM_GET_IMC_BAR(d, 0, reg)) {
627 i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n");
628 return NULL;
629 }
630
631 mdev = get_gnr_mdev(d, i, &physical_idx);
632 if (!mdev)
633 return NULL;
634
635 *offset = I10NM_GET_IMC_MMIO_OFFSET(reg) +
636 I10NM_GNR_IMC_MMIO_OFFSET +
637 physical_idx * I10NM_GNR_IMC_MMIO_SIZE;
638 *size = I10NM_GNR_IMC_MMIO_SIZE;
639
640 break;
641 default:
642 if (I10NM_GET_IMC_BAR(d, i, reg)) {
643 i10nm_printk(KERN_ERR, "Failed to get mc%d bar\n", i);
644 return NULL;
645 }
646
647 mdev = pci_get_dev_wrapper(d->seg,
648 d->bus[res_cfg->ddr_mdev_bdf.bus],
649 res_cfg->ddr_mdev_bdf.dev + i,
650 res_cfg->ddr_mdev_bdf.fun);
651 if (!mdev)
652 return NULL;
653
654 *offset = I10NM_GET_IMC_MMIO_OFFSET(reg);
655 *size = I10NM_GET_IMC_MMIO_SIZE(reg);
656 }
657
658 return mdev;
659 }
660
661 /**
662 * i10nm_imc_absent() - Check whether the memory controller @imc is absent
663 *
664 * @imc : The pointer to the structure of memory controller EDAC device.
665 *
666 * RETURNS : true if the memory controller EDAC device is absent, false otherwise.
667 */
i10nm_imc_absent(struct skx_imc * imc)668 static bool i10nm_imc_absent(struct skx_imc *imc)
669 {
670 u32 mcmtr;
671 int i;
672
673 switch (res_cfg->type) {
674 case SPR:
675 for (i = 0; i < res_cfg->ddr_chan_num; i++) {
676 mcmtr = I10NM_GET_MCMTR(imc, i);
677 edac_dbg(1, "ch%d mcmtr reg %x\n", i, mcmtr);
678 if (mcmtr != ~0)
679 return false;
680 }
681
682 /*
683 * Some workstations' absent memory controllers still
684 * appear as PCIe devices, misleading the EDAC driver.
685 * By observing that the MMIO registers of these absent
686 * memory controllers consistently hold the value of ~0.
687 *
688 * We identify a memory controller as absent by checking
689 * if its MMIO register "mcmtr" == ~0 in all its channels.
690 */
691 return true;
692 default:
693 return false;
694 }
695 }
696
i10nm_get_ddr_munits(void)697 static int i10nm_get_ddr_munits(void)
698 {
699 struct pci_dev *mdev;
700 void __iomem *mbase;
701 unsigned long size;
702 struct skx_dev *d;
703 int i, lmc, j = 0;
704 u32 reg, off;
705 u64 base;
706
707 list_for_each_entry(d, i10nm_edac_list, list) {
708 d->util_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->util_all_bdf.bus],
709 res_cfg->util_all_bdf.dev,
710 res_cfg->util_all_bdf.fun);
711 if (!d->util_all)
712 return -ENODEV;
713
714 d->uracu = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->uracu_bdf.bus],
715 res_cfg->uracu_bdf.dev,
716 res_cfg->uracu_bdf.fun);
717 if (!d->uracu)
718 return -ENODEV;
719
720 if (I10NM_GET_SCK_BAR(d, reg)) {
721 i10nm_printk(KERN_ERR, "Failed to socket bar\n");
722 return -ENODEV;
723 }
724
725 base = I10NM_GET_SCK_MMIO_BASE(reg);
726 edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
727 j++, base, reg);
728
729 for (lmc = 0, i = 0; i < res_cfg->ddr_imc_num; i++) {
730 mdev = get_ddr_munit(d, i, &off, &size);
731
732 if (i == 0 && !mdev) {
733 i10nm_printk(KERN_ERR, "No IMC found\n");
734 return -ENODEV;
735 }
736 if (!mdev)
737 continue;
738
739 edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
740 i, base + off, size, reg);
741
742 mbase = ioremap(base + off, size);
743 if (!mbase) {
744 i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n",
745 base + off);
746 return -ENODEV;
747 }
748
749 d->imc[lmc].mbase = mbase;
750 if (i10nm_imc_absent(&d->imc[lmc])) {
751 pci_dev_put(mdev);
752 iounmap(mbase);
753 d->imc[lmc].mbase = NULL;
754 edac_dbg(2, "Skip absent mc%d\n", i);
755 continue;
756 } else {
757 d->imc[lmc].mdev = mdev;
758 lmc++;
759 }
760 }
761 }
762
763 return 0;
764 }
765
i10nm_check_hbm_imc(struct skx_dev * d)766 static bool i10nm_check_hbm_imc(struct skx_dev *d)
767 {
768 u32 reg;
769
770 if (I10NM_GET_CAPID3_CFG(d, reg)) {
771 i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n");
772 return false;
773 }
774
775 return I10NM_IS_HBM_PRESENT(reg) != 0;
776 }
777
i10nm_get_hbm_munits(void)778 static int i10nm_get_hbm_munits(void)
779 {
780 struct pci_dev *mdev;
781 void __iomem *mbase;
782 u32 reg, off, mcmtr;
783 struct skx_dev *d;
784 int i, lmc;
785 u64 base;
786
787 list_for_each_entry(d, i10nm_edac_list, list) {
788 if (!d->pcu_cr3)
789 return -ENODEV;
790
791 if (!i10nm_check_hbm_imc(d)) {
792 i10nm_printk(KERN_DEBUG, "No hbm memory\n");
793 return -ENODEV;
794 }
795
796 if (I10NM_GET_SCK_BAR(d, reg)) {
797 i10nm_printk(KERN_ERR, "Failed to get socket bar\n");
798 return -ENODEV;
799 }
800 base = I10NM_GET_SCK_MMIO_BASE(reg);
801
802 if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
803 i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n");
804 return -ENODEV;
805 }
806 base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
807
808 lmc = res_cfg->ddr_imc_num;
809
810 for (i = 0; i < res_cfg->hbm_imc_num; i++) {
811 mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->hbm_mdev_bdf.bus],
812 res_cfg->hbm_mdev_bdf.dev + i / 4,
813 res_cfg->hbm_mdev_bdf.fun + i % 4);
814
815 if (i == 0 && !mdev) {
816 i10nm_printk(KERN_ERR, "No hbm mc found\n");
817 return -ENODEV;
818 }
819 if (!mdev)
820 continue;
821
822 d->imc[lmc].mdev = mdev;
823 off = i * I10NM_HBM_IMC_MMIO_SIZE;
824
825 edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
826 lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);
827
828 mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
829 if (!mbase) {
830 pci_dev_put(d->imc[lmc].mdev);
831 d->imc[lmc].mdev = NULL;
832
833 i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
834 base + off);
835 return -ENOMEM;
836 }
837
838 d->imc[lmc].mbase = mbase;
839 d->imc[lmc].hbm_mc = true;
840
841 mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
842 if (!I10NM_IS_HBM_IMC(mcmtr)) {
843 iounmap(d->imc[lmc].mbase);
844 d->imc[lmc].mbase = NULL;
845 d->imc[lmc].hbm_mc = false;
846 pci_dev_put(d->imc[lmc].mdev);
847 d->imc[lmc].mdev = NULL;
848
849 i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
850 return -ENODEV;
851 }
852
853 lmc++;
854 }
855 }
856
857 return 0;
858 }
859
860 static struct res_config i10nm_cfg0 = {
861 .type = I10NM,
862 .decs_did = 0x3452,
863 .busno_cfg_offset = 0xcc,
864 .ddr_imc_num = 4,
865 .ddr_chan_num = 2,
866 .ddr_dimm_num = 2,
867 .ddr_chan_mmio_sz = 0x4000,
868 .sad_all_bdf = {1, 29, 0},
869 .pcu_cr3_bdf = {1, 30, 3},
870 .util_all_bdf = {1, 29, 1},
871 .uracu_bdf = {0, 0, 1},
872 .ddr_mdev_bdf = {0, 12, 0},
873 .hbm_mdev_bdf = {0, 12, 1},
874 .sad_all_offset = 0x108,
875 .offsets_scrub = offsets_scrub_icx,
876 .offsets_demand = offsets_demand_icx,
877 };
878
879 static struct res_config i10nm_cfg1 = {
880 .type = I10NM,
881 .decs_did = 0x3452,
882 .busno_cfg_offset = 0xd0,
883 .ddr_imc_num = 4,
884 .ddr_chan_num = 2,
885 .ddr_dimm_num = 2,
886 .ddr_chan_mmio_sz = 0x4000,
887 .sad_all_bdf = {1, 29, 0},
888 .pcu_cr3_bdf = {1, 30, 3},
889 .util_all_bdf = {1, 29, 1},
890 .uracu_bdf = {0, 0, 1},
891 .ddr_mdev_bdf = {0, 12, 0},
892 .hbm_mdev_bdf = {0, 12, 1},
893 .sad_all_offset = 0x108,
894 .offsets_scrub = offsets_scrub_icx,
895 .offsets_demand = offsets_demand_icx,
896 };
897
898 static struct res_config spr_cfg = {
899 .type = SPR,
900 .decs_did = 0x3252,
901 .busno_cfg_offset = 0xd0,
902 .ddr_imc_num = 4,
903 .ddr_chan_num = 2,
904 .ddr_dimm_num = 2,
905 .hbm_imc_num = 16,
906 .hbm_chan_num = 2,
907 .hbm_dimm_num = 1,
908 .ddr_chan_mmio_sz = 0x8000,
909 .hbm_chan_mmio_sz = 0x4000,
910 .support_ddr5 = true,
911 .sad_all_bdf = {1, 10, 0},
912 .pcu_cr3_bdf = {1, 30, 3},
913 .util_all_bdf = {1, 29, 1},
914 .uracu_bdf = {0, 0, 1},
915 .ddr_mdev_bdf = {0, 12, 0},
916 .hbm_mdev_bdf = {0, 12, 1},
917 .sad_all_offset = 0x300,
918 .offsets_scrub = offsets_scrub_spr,
919 .offsets_scrub_hbm0 = offsets_scrub_spr_hbm0,
920 .offsets_scrub_hbm1 = offsets_scrub_spr_hbm1,
921 .offsets_demand = offsets_demand_spr,
922 .offsets_demand2 = offsets_demand2_spr,
923 .offsets_demand_hbm0 = offsets_demand_spr_hbm0,
924 .offsets_demand_hbm1 = offsets_demand_spr_hbm1,
925 };
926
927 static struct res_config gnr_cfg = {
928 .type = GNR,
929 .decs_did = 0x3252,
930 .busno_cfg_offset = 0xd0,
931 .ddr_imc_num = 12,
932 .ddr_chan_num = 1,
933 .ddr_dimm_num = 2,
934 .ddr_chan_mmio_sz = 0x4000,
935 .support_ddr5 = true,
936 .sad_all_bdf = {0, 13, 0},
937 .pcu_cr3_bdf = {0, 5, 0},
938 .util_all_bdf = {0, 13, 1},
939 .uracu_bdf = {0, 0, 1},
940 .ddr_mdev_bdf = {0, 5, 1},
941 .sad_all_offset = 0x300,
942 };
943
944 static const struct x86_cpu_id i10nm_cpuids[] = {
945 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
946 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
947 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
948 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X, X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
949 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D, X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1),
950 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
951 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X, X86_STEPPINGS(0x0, 0xf), &spr_cfg),
952 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
953 X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X, X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
954 {}
955 };
956 MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
957
i10nm_check_ecc(struct skx_imc * imc,int chan)958 static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
959 {
960 u32 mcmtr;
961
962 mcmtr = I10NM_GET_MCMTR(imc, chan);
963 edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr);
964
965 return !!GET_BITFIELD(mcmtr, 2, 2);
966 }
967
i10nm_get_dimm_config(struct mem_ctl_info * mci,struct res_config * cfg)968 static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
969 struct res_config *cfg)
970 {
971 struct skx_pvt *pvt = mci->pvt_info;
972 struct skx_imc *imc = pvt->imc;
973 u32 mtr, amap, mcddrtcfg = 0;
974 struct dimm_info *dimm;
975 int i, j, ndimms;
976
977 for (i = 0; i < imc->num_channels; i++) {
978 if (!imc->mbase)
979 continue;
980
981 ndimms = 0;
982 amap = I10NM_GET_AMAP(imc, i);
983
984 if (res_cfg->type != GNR)
985 mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
986
987 for (j = 0; j < imc->num_dimms; j++) {
988 dimm = edac_get_dimm(mci, i, j, 0);
989 mtr = I10NM_GET_DIMMMTR(imc, i, j);
990 edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
991 mtr, mcddrtcfg, imc->mc, i, j);
992
993 if (IS_DIMM_PRESENT(mtr))
994 ndimms += skx_get_dimm_info(mtr, 0, amap, dimm,
995 imc, i, j, cfg);
996 else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
997 ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
998 EDAC_MOD_STR);
999 }
1000 if (ndimms && !i10nm_check_ecc(imc, i)) {
1001 i10nm_printk(KERN_ERR, "ECC is disabled on imc %d channel %d\n",
1002 imc->mc, i);
1003 return -ENODEV;
1004 }
1005 }
1006
1007 return 0;
1008 }
1009
1010 static struct notifier_block i10nm_mce_dec = {
1011 .notifier_call = skx_mce_check_error,
1012 .priority = MCE_PRIO_EDAC,
1013 };
1014
1015 #ifdef CONFIG_EDAC_DEBUG
1016 /*
1017 * Debug feature.
1018 * Exercise the address decode logic by writing an address to
1019 * /sys/kernel/debug/edac/i10nm_test/addr.
1020 */
1021 static struct dentry *i10nm_test;
1022
debugfs_u64_set(void * data,u64 val)1023 static int debugfs_u64_set(void *data, u64 val)
1024 {
1025 struct mce m;
1026
1027 pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
1028
1029 memset(&m, 0, sizeof(m));
1030 /* ADDRV + MemRd + Unknown channel */
1031 m.status = MCI_STATUS_ADDRV + 0x90;
1032 /* One corrected error */
1033 m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
1034 m.addr = val;
1035 skx_mce_check_error(NULL, 0, &m);
1036
1037 return 0;
1038 }
1039 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
1040
setup_i10nm_debug(void)1041 static void setup_i10nm_debug(void)
1042 {
1043 i10nm_test = edac_debugfs_create_dir("i10nm_test");
1044 if (!i10nm_test)
1045 return;
1046
1047 if (!edac_debugfs_create_file("addr", 0200, i10nm_test,
1048 NULL, &fops_u64_wo)) {
1049 debugfs_remove(i10nm_test);
1050 i10nm_test = NULL;
1051 }
1052 }
1053
teardown_i10nm_debug(void)1054 static void teardown_i10nm_debug(void)
1055 {
1056 debugfs_remove_recursive(i10nm_test);
1057 }
1058 #else
setup_i10nm_debug(void)1059 static inline void setup_i10nm_debug(void) {}
teardown_i10nm_debug(void)1060 static inline void teardown_i10nm_debug(void) {}
1061 #endif /*CONFIG_EDAC_DEBUG*/
1062
i10nm_init(void)1063 static int __init i10nm_init(void)
1064 {
1065 u8 mc = 0, src_id = 0, node_id = 0;
1066 const struct x86_cpu_id *id;
1067 struct res_config *cfg;
1068 const char *owner;
1069 struct skx_dev *d;
1070 int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
1071 u64 tolm, tohm;
1072 int imc_num;
1073
1074 edac_dbg(2, "\n");
1075
1076 if (ghes_get_devices())
1077 return -EBUSY;
1078
1079 owner = edac_get_owner();
1080 if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
1081 return -EBUSY;
1082
1083 if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
1084 return -ENODEV;
1085
1086 id = x86_match_cpu(i10nm_cpuids);
1087 if (!id)
1088 return -ENODEV;
1089
1090 cfg = (struct res_config *)id->driver_data;
1091 res_cfg = cfg;
1092
1093 rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
1094 if (rc)
1095 return rc;
1096
1097 rc = skx_get_all_bus_mappings(cfg, &i10nm_edac_list);
1098 if (rc < 0)
1099 goto fail;
1100 if (rc == 0) {
1101 i10nm_printk(KERN_ERR, "No memory controllers found\n");
1102 return -ENODEV;
1103 }
1104
1105 rc = i10nm_get_imc_num(cfg);
1106 if (rc < 0)
1107 goto fail;
1108
1109 mem_cfg_2lm = i10nm_check_2lm(cfg);
1110 skx_set_mem_cfg(mem_cfg_2lm);
1111
1112 rc = i10nm_get_ddr_munits();
1113
1114 if (i10nm_get_hbm_munits() && rc)
1115 goto fail;
1116
1117 imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num;
1118
1119 list_for_each_entry(d, i10nm_edac_list, list) {
1120 rc = skx_get_src_id(d, 0xf8, &src_id);
1121 if (rc < 0)
1122 goto fail;
1123
1124 rc = skx_get_node_id(d, &node_id);
1125 if (rc < 0)
1126 goto fail;
1127
1128 edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
1129 for (i = 0; i < imc_num; i++) {
1130 if (!d->imc[i].mdev)
1131 continue;
1132
1133 d->imc[i].mc = mc++;
1134 d->imc[i].lmc = i;
1135 d->imc[i].src_id = src_id;
1136 d->imc[i].node_id = node_id;
1137 if (d->imc[i].hbm_mc) {
1138 d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
1139 d->imc[i].num_channels = cfg->hbm_chan_num;
1140 d->imc[i].num_dimms = cfg->hbm_dimm_num;
1141 } else {
1142 d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
1143 d->imc[i].num_channels = cfg->ddr_chan_num;
1144 d->imc[i].num_dimms = cfg->ddr_dimm_num;
1145 }
1146
1147 rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
1148 "Intel_10nm Socket", EDAC_MOD_STR,
1149 i10nm_get_dimm_config, cfg);
1150 if (rc < 0)
1151 goto fail;
1152 }
1153 }
1154
1155 rc = skx_adxl_get();
1156 if (rc)
1157 goto fail;
1158
1159 opstate_init();
1160 mce_register_decode_chain(&i10nm_mce_dec);
1161 setup_i10nm_debug();
1162
1163 if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
1164 skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
1165 if (retry_rd_err_log == 2)
1166 enable_retry_rd_err_log(true);
1167 } else {
1168 skx_set_decode(i10nm_mc_decode, NULL);
1169 }
1170
1171 i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
1172
1173 return 0;
1174 fail:
1175 skx_remove();
1176 return rc;
1177 }
1178
i10nm_exit(void)1179 static void __exit i10nm_exit(void)
1180 {
1181 edac_dbg(2, "\n");
1182
1183 if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
1184 skx_set_decode(NULL, NULL);
1185 if (retry_rd_err_log == 2)
1186 enable_retry_rd_err_log(false);
1187 }
1188
1189 teardown_i10nm_debug();
1190 mce_unregister_decode_chain(&i10nm_mce_dec);
1191 skx_adxl_put();
1192 skx_remove();
1193 }
1194
1195 module_init(i10nm_init);
1196 module_exit(i10nm_exit);
1197
set_decoding_via_mca(const char * buf,const struct kernel_param * kp)1198 static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp)
1199 {
1200 unsigned long val;
1201 int ret;
1202
1203 ret = kstrtoul(buf, 0, &val);
1204
1205 if (ret || val > 1)
1206 return -EINVAL;
1207
1208 if (val && mem_cfg_2lm) {
1209 i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n");
1210 return -EIO;
1211 }
1212
1213 ret = param_set_int(buf, kp);
1214
1215 return ret;
1216 }
1217
1218 static const struct kernel_param_ops decoding_via_mca_param_ops = {
1219 .set = set_decoding_via_mca,
1220 .get = param_get_int,
1221 };
1222
1223 module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644);
1224 MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable");
1225
1226 module_param(retry_rd_err_log, int, 0444);
1227 MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
1228
1229 MODULE_LICENSE("GPL v2");
1230 MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");
1231