xref: /openbmc/linux/drivers/edac/i10nm_base.c (revision 44ad3baf1cca483e418b6aadf2d3994f69e0f16a)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Driver for Intel(R) 10nm server memory controller.
4  * Copyright (c) 2019, Intel Corporation.
5  *
6  */
7 
8 #include <linux/kernel.h>
9 #include <linux/io.h>
10 #include <asm/cpu_device_id.h>
11 #include <asm/intel-family.h>
12 #include <asm/mce.h>
13 #include "edac_module.h"
14 #include "skx_common.h"
15 
16 #define I10NM_REVISION	"v0.0.6"
17 #define EDAC_MOD_STR	"i10nm_edac"
18 
19 /* Debug macros */
20 #define i10nm_printk(level, fmt, arg...)	\
21 	edac_printk(level, "i10nm", fmt, ##arg)
22 
23 #define I10NM_GET_SCK_BAR(d, reg)	\
24 	pci_read_config_dword((d)->uracu, 0xd0, &(reg))
25 #define I10NM_GET_IMC_BAR(d, i, reg)		\
26 	pci_read_config_dword((d)->uracu,	\
27 	(res_cfg->type == GNR ? 0xd4 : 0xd8) + (i) * 4, &(reg))
28 #define I10NM_GET_SAD(d, offset, i, reg)\
29 	pci_read_config_dword((d)->sad_all, (offset) + (i) * \
30 	(res_cfg->type == GNR ? 12 : 8), &(reg))
31 #define I10NM_GET_HBM_IMC_BAR(d, reg)	\
32 	pci_read_config_dword((d)->uracu, 0xd4, &(reg))
33 #define I10NM_GET_CAPID3_CFG(d, reg)	\
34 	pci_read_config_dword((d)->pcu_cr3,	\
35 	res_cfg->type == GNR ? 0x290 : 0x90, &(reg))
36 #define I10NM_GET_CAPID5_CFG(d, reg)	\
37 	pci_read_config_dword((d)->pcu_cr3,	\
38 	res_cfg->type == GNR ? 0x298 : 0x98, &(reg))
39 #define I10NM_GET_DIMMMTR(m, i, j)	\
40 	readl((m)->mbase + ((m)->hbm_mc ? 0x80c :	\
41 	(res_cfg->type == GNR ? 0xc0c : 0x2080c)) +	\
42 	(i) * (m)->chan_mmio_sz + (j) * 4)
43 #define I10NM_GET_MCDDRTCFG(m, i)	\
44 	readl((m)->mbase + ((m)->hbm_mc ? 0x970 : 0x20970) + \
45 	(i) * (m)->chan_mmio_sz)
46 #define I10NM_GET_MCMTR(m, i)		\
47 	readl((m)->mbase + ((m)->hbm_mc ? 0xef8 :	\
48 	(res_cfg->type == GNR ? 0xaf8 : 0x20ef8)) +	\
49 	(i) * (m)->chan_mmio_sz)
50 #define I10NM_GET_AMAP(m, i)		\
51 	readl((m)->mbase + ((m)->hbm_mc ? 0x814 :	\
52 	(res_cfg->type == GNR ? 0xc14 : 0x20814)) +	\
53 	(i) * (m)->chan_mmio_sz)
54 #define I10NM_GET_REG32(m, i, offset)	\
55 	readl((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
56 #define I10NM_GET_REG64(m, i, offset)	\
57 	readq((m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
58 #define I10NM_SET_REG32(m, i, offset, v)	\
59 	writel(v, (m)->mbase + (i) * (m)->chan_mmio_sz + (offset))
60 
61 #define I10NM_GET_SCK_MMIO_BASE(reg)	(GET_BITFIELD(reg, 0, 28) << 23)
62 #define I10NM_GET_IMC_MMIO_OFFSET(reg)	(GET_BITFIELD(reg, 0, 10) << 12)
63 #define I10NM_GET_IMC_MMIO_SIZE(reg)	((GET_BITFIELD(reg, 13, 23) - \
64 					 GET_BITFIELD(reg, 0, 10) + 1) << 12)
65 #define I10NM_GET_HBM_IMC_MMIO_OFFSET(reg)	\
66 	((GET_BITFIELD(reg, 0, 10) << 12) + 0x140000)
67 
68 #define I10NM_GNR_IMC_MMIO_OFFSET	0x24c000
69 #define I10NM_GNR_IMC_MMIO_SIZE		0x4000
70 #define I10NM_HBM_IMC_MMIO_SIZE		0x9000
71 #define I10NM_DDR_IMC_CH_CNT(reg)	GET_BITFIELD(reg, 21, 24)
72 #define I10NM_IS_HBM_PRESENT(reg)	GET_BITFIELD(reg, 27, 30)
73 #define I10NM_IS_HBM_IMC(reg)		GET_BITFIELD(reg, 29, 29)
74 
75 #define I10NM_MAX_SAD			16
76 #define I10NM_SAD_ENABLE(reg)		GET_BITFIELD(reg, 0, 0)
77 #define I10NM_SAD_NM_CACHEABLE(reg)	GET_BITFIELD(reg, 5, 5)
78 
79 #define RETRY_RD_ERR_LOG_UC		BIT(1)
80 #define RETRY_RD_ERR_LOG_NOOVER		BIT(14)
81 #define RETRY_RD_ERR_LOG_EN		BIT(15)
82 #define RETRY_RD_ERR_LOG_NOOVER_UC	(BIT(14) | BIT(1))
83 #define RETRY_RD_ERR_LOG_OVER_UC_V	(BIT(2) | BIT(1) | BIT(0))
84 
85 static struct list_head *i10nm_edac_list;
86 
87 static struct res_config *res_cfg;
88 static int retry_rd_err_log;
89 static int decoding_via_mca;
90 static bool mem_cfg_2lm;
91 
92 static u32 offsets_scrub_icx[]  = {0x22c60, 0x22c54, 0x22c5c, 0x22c58, 0x22c28, 0x20ed8};
93 static u32 offsets_scrub_spr[]  = {0x22c60, 0x22c54, 0x22f08, 0x22c58, 0x22c28, 0x20ed8};
94 static u32 offsets_scrub_spr_hbm0[]  = {0x2860, 0x2854, 0x2b08, 0x2858, 0x2828, 0x0ed8};
95 static u32 offsets_scrub_spr_hbm1[]  = {0x2c60, 0x2c54, 0x2f08, 0x2c58, 0x2c28, 0x0fa8};
96 static u32 offsets_demand_icx[] = {0x22e54, 0x22e60, 0x22e64, 0x22e58, 0x22e5c, 0x20ee0};
97 static u32 offsets_demand_spr[] = {0x22e54, 0x22e60, 0x22f10, 0x22e58, 0x22e5c, 0x20ee0};
98 static u32 offsets_demand2_spr[] = {0x22c70, 0x22d80, 0x22f18, 0x22d58, 0x22c64, 0x20f10};
99 static u32 offsets_demand_spr_hbm0[] = {0x2a54, 0x2a60, 0x2b10, 0x2a58, 0x2a5c, 0x0ee0};
100 static u32 offsets_demand_spr_hbm1[] = {0x2e54, 0x2e60, 0x2f10, 0x2e58, 0x2e5c, 0x0fb0};
101 
__enable_retry_rd_err_log(struct skx_imc * imc,int chan,bool enable,u32 * rrl_ctl,u32 * offsets_scrub,u32 * offsets_demand,u32 * offsets_demand2)102 static void __enable_retry_rd_err_log(struct skx_imc *imc, int chan, bool enable, u32 *rrl_ctl,
103 				      u32 *offsets_scrub, u32 *offsets_demand,
104 				      u32 *offsets_demand2)
105 {
106 	u32 s, d, d2;
107 
108 	s = I10NM_GET_REG32(imc, chan, offsets_scrub[0]);
109 	d = I10NM_GET_REG32(imc, chan, offsets_demand[0]);
110 	if (offsets_demand2)
111 		d2 = I10NM_GET_REG32(imc, chan, offsets_demand2[0]);
112 
113 	if (enable) {
114 		/* Save default configurations */
115 		rrl_ctl[0] = s;
116 		rrl_ctl[1] = d;
117 		if (offsets_demand2)
118 			rrl_ctl[2] = d2;
119 
120 		s &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
121 		s |=  RETRY_RD_ERR_LOG_EN;
122 		d &= ~RETRY_RD_ERR_LOG_NOOVER_UC;
123 		d |=  RETRY_RD_ERR_LOG_EN;
124 
125 		if (offsets_demand2) {
126 			d2 &= ~RETRY_RD_ERR_LOG_UC;
127 			d2 |=  RETRY_RD_ERR_LOG_NOOVER;
128 			d2 |=  RETRY_RD_ERR_LOG_EN;
129 		}
130 	} else {
131 		/* Restore default configurations */
132 		if (rrl_ctl[0] & RETRY_RD_ERR_LOG_UC)
133 			s |=  RETRY_RD_ERR_LOG_UC;
134 		if (rrl_ctl[0] & RETRY_RD_ERR_LOG_NOOVER)
135 			s |=  RETRY_RD_ERR_LOG_NOOVER;
136 		if (!(rrl_ctl[0] & RETRY_RD_ERR_LOG_EN))
137 			s &= ~RETRY_RD_ERR_LOG_EN;
138 		if (rrl_ctl[1] & RETRY_RD_ERR_LOG_UC)
139 			d |=  RETRY_RD_ERR_LOG_UC;
140 		if (rrl_ctl[1] & RETRY_RD_ERR_LOG_NOOVER)
141 			d |=  RETRY_RD_ERR_LOG_NOOVER;
142 		if (!(rrl_ctl[1] & RETRY_RD_ERR_LOG_EN))
143 			d &= ~RETRY_RD_ERR_LOG_EN;
144 
145 		if (offsets_demand2) {
146 			if (rrl_ctl[2] & RETRY_RD_ERR_LOG_UC)
147 				d2 |=  RETRY_RD_ERR_LOG_UC;
148 			if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_NOOVER))
149 				d2 &=  ~RETRY_RD_ERR_LOG_NOOVER;
150 			if (!(rrl_ctl[2] & RETRY_RD_ERR_LOG_EN))
151 				d2 &= ~RETRY_RD_ERR_LOG_EN;
152 		}
153 	}
154 
155 	I10NM_SET_REG32(imc, chan, offsets_scrub[0], s);
156 	I10NM_SET_REG32(imc, chan, offsets_demand[0], d);
157 	if (offsets_demand2)
158 		I10NM_SET_REG32(imc, chan, offsets_demand2[0], d2);
159 }
160 
enable_retry_rd_err_log(bool enable)161 static void enable_retry_rd_err_log(bool enable)
162 {
163 	int i, j, imc_num, chan_num;
164 	struct skx_channel *chan;
165 	struct skx_imc *imc;
166 	struct skx_dev *d;
167 
168 	edac_dbg(2, "\n");
169 
170 	list_for_each_entry(d, i10nm_edac_list, list) {
171 		imc_num  = res_cfg->ddr_imc_num;
172 		chan_num = res_cfg->ddr_chan_num;
173 
174 		for (i = 0; i < imc_num; i++) {
175 			imc = &d->imc[i];
176 			if (!imc->mbase)
177 				continue;
178 
179 			chan = d->imc[i].chan;
180 			for (j = 0; j < chan_num; j++)
181 				__enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0],
182 							  res_cfg->offsets_scrub,
183 							  res_cfg->offsets_demand,
184 							  res_cfg->offsets_demand2);
185 		}
186 
187 		imc_num += res_cfg->hbm_imc_num;
188 		chan_num = res_cfg->hbm_chan_num;
189 
190 		for (; i < imc_num; i++) {
191 			imc = &d->imc[i];
192 			if (!imc->mbase || !imc->hbm_mc)
193 				continue;
194 
195 			chan = d->imc[i].chan;
196 			for (j = 0; j < chan_num; j++) {
197 				__enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[0],
198 							  res_cfg->offsets_scrub_hbm0,
199 							  res_cfg->offsets_demand_hbm0,
200 							  NULL);
201 				__enable_retry_rd_err_log(imc, j, enable, chan[j].rrl_ctl[1],
202 							  res_cfg->offsets_scrub_hbm1,
203 							  res_cfg->offsets_demand_hbm1,
204 							  NULL);
205 			}
206 		}
207 	}
208 }
209 
show_retry_rd_err_log(struct decoded_addr * res,char * msg,int len,bool scrub_err)210 static void show_retry_rd_err_log(struct decoded_addr *res, char *msg,
211 				  int len, bool scrub_err)
212 {
213 	struct skx_imc *imc = &res->dev->imc[res->imc];
214 	u32 log0, log1, log2, log3, log4;
215 	u32 corr0, corr1, corr2, corr3;
216 	u32 lxg0, lxg1, lxg3, lxg4;
217 	u32 *xffsets = NULL;
218 	u64 log2a, log5;
219 	u64 lxg2a, lxg5;
220 	u32 *offsets;
221 	int n, pch;
222 
223 	if (!imc->mbase)
224 		return;
225 
226 	if (imc->hbm_mc) {
227 		pch = res->cs & 1;
228 
229 		if (pch)
230 			offsets = scrub_err ? res_cfg->offsets_scrub_hbm1 :
231 					      res_cfg->offsets_demand_hbm1;
232 		else
233 			offsets = scrub_err ? res_cfg->offsets_scrub_hbm0 :
234 					      res_cfg->offsets_demand_hbm0;
235 	} else {
236 		if (scrub_err) {
237 			offsets = res_cfg->offsets_scrub;
238 		} else {
239 			offsets = res_cfg->offsets_demand;
240 			xffsets = res_cfg->offsets_demand2;
241 		}
242 	}
243 
244 	log0 = I10NM_GET_REG32(imc, res->channel, offsets[0]);
245 	log1 = I10NM_GET_REG32(imc, res->channel, offsets[1]);
246 	log3 = I10NM_GET_REG32(imc, res->channel, offsets[3]);
247 	log4 = I10NM_GET_REG32(imc, res->channel, offsets[4]);
248 	log5 = I10NM_GET_REG64(imc, res->channel, offsets[5]);
249 
250 	if (xffsets) {
251 		lxg0 = I10NM_GET_REG32(imc, res->channel, xffsets[0]);
252 		lxg1 = I10NM_GET_REG32(imc, res->channel, xffsets[1]);
253 		lxg3 = I10NM_GET_REG32(imc, res->channel, xffsets[3]);
254 		lxg4 = I10NM_GET_REG32(imc, res->channel, xffsets[4]);
255 		lxg5 = I10NM_GET_REG64(imc, res->channel, xffsets[5]);
256 	}
257 
258 	if (res_cfg->type == SPR) {
259 		log2a = I10NM_GET_REG64(imc, res->channel, offsets[2]);
260 		n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.16llx %.8x %.8x %.16llx",
261 			     log0, log1, log2a, log3, log4, log5);
262 
263 		if (len - n > 0) {
264 			if (xffsets) {
265 				lxg2a = I10NM_GET_REG64(imc, res->channel, xffsets[2]);
266 				n += snprintf(msg + n, len - n, " %.8x %.8x %.16llx %.8x %.8x %.16llx]",
267 					     lxg0, lxg1, lxg2a, lxg3, lxg4, lxg5);
268 			} else {
269 				n += snprintf(msg + n, len - n, "]");
270 			}
271 		}
272 	} else {
273 		log2 = I10NM_GET_REG32(imc, res->channel, offsets[2]);
274 		n = snprintf(msg, len, " retry_rd_err_log[%.8x %.8x %.8x %.8x %.8x %.16llx]",
275 			     log0, log1, log2, log3, log4, log5);
276 	}
277 
278 	if (imc->hbm_mc) {
279 		if (pch) {
280 			corr0 = I10NM_GET_REG32(imc, res->channel, 0x2c18);
281 			corr1 = I10NM_GET_REG32(imc, res->channel, 0x2c1c);
282 			corr2 = I10NM_GET_REG32(imc, res->channel, 0x2c20);
283 			corr3 = I10NM_GET_REG32(imc, res->channel, 0x2c24);
284 		} else {
285 			corr0 = I10NM_GET_REG32(imc, res->channel, 0x2818);
286 			corr1 = I10NM_GET_REG32(imc, res->channel, 0x281c);
287 			corr2 = I10NM_GET_REG32(imc, res->channel, 0x2820);
288 			corr3 = I10NM_GET_REG32(imc, res->channel, 0x2824);
289 		}
290 	} else {
291 		corr0 = I10NM_GET_REG32(imc, res->channel, 0x22c18);
292 		corr1 = I10NM_GET_REG32(imc, res->channel, 0x22c1c);
293 		corr2 = I10NM_GET_REG32(imc, res->channel, 0x22c20);
294 		corr3 = I10NM_GET_REG32(imc, res->channel, 0x22c24);
295 	}
296 
297 	if (len - n > 0)
298 		snprintf(msg + n, len - n,
299 			 " correrrcnt[%.4x %.4x %.4x %.4x %.4x %.4x %.4x %.4x]",
300 			 corr0 & 0xffff, corr0 >> 16,
301 			 corr1 & 0xffff, corr1 >> 16,
302 			 corr2 & 0xffff, corr2 >> 16,
303 			 corr3 & 0xffff, corr3 >> 16);
304 
305 	/* Clear status bits */
306 	if (retry_rd_err_log == 2) {
307 		if (log0 & RETRY_RD_ERR_LOG_OVER_UC_V) {
308 			log0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
309 			I10NM_SET_REG32(imc, res->channel, offsets[0], log0);
310 		}
311 
312 		if (xffsets && (lxg0 & RETRY_RD_ERR_LOG_OVER_UC_V)) {
313 			lxg0 &= ~RETRY_RD_ERR_LOG_OVER_UC_V;
314 			I10NM_SET_REG32(imc, res->channel, xffsets[0], lxg0);
315 		}
316 	}
317 }
318 
pci_get_dev_wrapper(int dom,unsigned int bus,unsigned int dev,unsigned int fun)319 static struct pci_dev *pci_get_dev_wrapper(int dom, unsigned int bus,
320 					   unsigned int dev, unsigned int fun)
321 {
322 	struct pci_dev *pdev;
323 
324 	pdev = pci_get_domain_bus_and_slot(dom, bus, PCI_DEVFN(dev, fun));
325 	if (!pdev) {
326 		edac_dbg(2, "No device %02x:%02x.%x\n",
327 			 bus, dev, fun);
328 		return NULL;
329 	}
330 
331 	if (unlikely(pci_enable_device(pdev) < 0)) {
332 		edac_dbg(2, "Failed to enable device %02x:%02x.%x\n",
333 			 bus, dev, fun);
334 		pci_dev_put(pdev);
335 		return NULL;
336 	}
337 
338 	return pdev;
339 }
340 
341 /**
342  * i10nm_get_imc_num() - Get the number of present DDR memory controllers.
343  *
344  * @cfg : The pointer to the structure of EDAC resource configurations.
345  *
346  * For Granite Rapids CPUs, the number of present DDR memory controllers read
347  * at runtime overwrites the value statically configured in @cfg->ddr_imc_num.
348  * For other CPUs, the number of present DDR memory controllers is statically
349  * configured in @cfg->ddr_imc_num.
350  *
351  * RETURNS : 0 on success, < 0 on failure.
352  */
i10nm_get_imc_num(struct res_config * cfg)353 static int i10nm_get_imc_num(struct res_config *cfg)
354 {
355 	int n, imc_num, chan_num = 0;
356 	struct skx_dev *d;
357 	u32 reg;
358 
359 	list_for_each_entry(d, i10nm_edac_list, list) {
360 		d->pcu_cr3 = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->pcu_cr3_bdf.bus],
361 						 res_cfg->pcu_cr3_bdf.dev,
362 						 res_cfg->pcu_cr3_bdf.fun);
363 		if (!d->pcu_cr3)
364 			continue;
365 
366 		if (I10NM_GET_CAPID5_CFG(d, reg))
367 			continue;
368 
369 		n = I10NM_DDR_IMC_CH_CNT(reg);
370 
371 		if (!chan_num) {
372 			chan_num = n;
373 			edac_dbg(2, "Get DDR CH number: %d\n", chan_num);
374 		} else if (chan_num != n) {
375 			i10nm_printk(KERN_NOTICE, "Get DDR CH numbers: %d, %d\n", chan_num, n);
376 		}
377 	}
378 
379 	switch (cfg->type) {
380 	case GNR:
381 		/*
382 		 * One channel per DDR memory controller for Granite Rapids CPUs.
383 		 */
384 		imc_num = chan_num;
385 
386 		if (!imc_num) {
387 			i10nm_printk(KERN_ERR, "Invalid DDR MC number\n");
388 			return -ENODEV;
389 		}
390 
391 		if (imc_num > I10NM_NUM_DDR_IMC) {
392 			i10nm_printk(KERN_ERR, "Need to make I10NM_NUM_DDR_IMC >= %d\n", imc_num);
393 			return -EINVAL;
394 		}
395 
396 		if (cfg->ddr_imc_num != imc_num) {
397 			/*
398 			 * Store the number of present DDR memory controllers.
399 			 */
400 			cfg->ddr_imc_num = imc_num;
401 			edac_dbg(2, "Set DDR MC number: %d", imc_num);
402 		}
403 
404 		return 0;
405 	default:
406 		/*
407 		 * For other CPUs, the number of present DDR memory controllers
408 		 * is statically pre-configured in cfg->ddr_imc_num.
409 		 */
410 		return 0;
411 	}
412 }
413 
i10nm_check_2lm(struct res_config * cfg)414 static bool i10nm_check_2lm(struct res_config *cfg)
415 {
416 	struct skx_dev *d;
417 	u32 reg;
418 	int i;
419 
420 	list_for_each_entry(d, i10nm_edac_list, list) {
421 		d->sad_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->sad_all_bdf.bus],
422 						 res_cfg->sad_all_bdf.dev,
423 						 res_cfg->sad_all_bdf.fun);
424 		if (!d->sad_all)
425 			continue;
426 
427 		for (i = 0; i < I10NM_MAX_SAD; i++) {
428 			I10NM_GET_SAD(d, cfg->sad_all_offset, i, reg);
429 			if (I10NM_SAD_ENABLE(reg) && I10NM_SAD_NM_CACHEABLE(reg)) {
430 				edac_dbg(2, "2-level memory configuration.\n");
431 				return true;
432 			}
433 		}
434 	}
435 
436 	return false;
437 }
438 
439 /*
440  * Check whether the error comes from DDRT by ICX/Tremont/SPR model specific error code.
441  * Refer to SDM vol3B 17.11.3/17.13.2 Intel IMC MC error codes for IA32_MCi_STATUS.
442  */
i10nm_mscod_is_ddrt(u32 mscod)443 static bool i10nm_mscod_is_ddrt(u32 mscod)
444 {
445 	switch (res_cfg->type) {
446 	case I10NM:
447 		switch (mscod) {
448 		case 0x0106: case 0x0107:
449 		case 0x0800: case 0x0804:
450 		case 0x0806 ... 0x0808:
451 		case 0x080a ... 0x080e:
452 		case 0x0810: case 0x0811:
453 		case 0x0816: case 0x081e:
454 		case 0x081f:
455 			return true;
456 		}
457 
458 		break;
459 	case SPR:
460 		switch (mscod) {
461 		case 0x0800: case 0x0804:
462 		case 0x0806 ... 0x0808:
463 		case 0x080a ... 0x080e:
464 		case 0x0810: case 0x0811:
465 		case 0x0816: case 0x081e:
466 		case 0x081f:
467 			return true;
468 		}
469 
470 		break;
471 	default:
472 		return false;
473 	}
474 
475 	return false;
476 }
477 
i10nm_mc_decode_available(struct mce * mce)478 static bool i10nm_mc_decode_available(struct mce *mce)
479 {
480 #define ICX_IMCx_CHy		0x06666000
481 	u8 bank;
482 
483 	if (!decoding_via_mca || mem_cfg_2lm)
484 		return false;
485 
486 	if ((mce->status & (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
487 			!= (MCI_STATUS_MISCV | MCI_STATUS_ADDRV))
488 		return false;
489 
490 	bank = mce->bank;
491 
492 	switch (res_cfg->type) {
493 	case I10NM:
494 		/* Check whether the bank is one of {13,14,17,18,21,22,25,26} */
495 		if (!(ICX_IMCx_CHy & (1 << bank)))
496 			return false;
497 		break;
498 	case SPR:
499 		if (bank < 13 || bank > 20)
500 			return false;
501 		break;
502 	default:
503 		return false;
504 	}
505 
506 	/* DDRT errors can't be decoded from MCA bank registers */
507 	if (MCI_MISC_ECC_MODE(mce->misc) == MCI_MISC_ECC_DDRT)
508 		return false;
509 
510 	if (i10nm_mscod_is_ddrt(MCI_STATUS_MSCOD(mce->status)))
511 		return false;
512 
513 	return true;
514 }
515 
i10nm_mc_decode(struct decoded_addr * res)516 static bool i10nm_mc_decode(struct decoded_addr *res)
517 {
518 	struct mce *m = res->mce;
519 	struct skx_dev *d;
520 	u8 bank;
521 
522 	if (!i10nm_mc_decode_available(m))
523 		return false;
524 
525 	list_for_each_entry(d, i10nm_edac_list, list) {
526 		if (d->imc[0].src_id == m->socketid) {
527 			res->socket = m->socketid;
528 			res->dev = d;
529 			break;
530 		}
531 	}
532 
533 	switch (res_cfg->type) {
534 	case I10NM:
535 		bank              = m->bank - 13;
536 		res->imc          = bank / 4;
537 		res->channel      = bank % 2;
538 		res->column       = GET_BITFIELD(m->misc, 9, 18) << 2;
539 		res->row          = GET_BITFIELD(m->misc, 19, 39);
540 		res->bank_group   = GET_BITFIELD(m->misc, 40, 41);
541 		res->bank_address = GET_BITFIELD(m->misc, 42, 43);
542 		res->bank_group  |= GET_BITFIELD(m->misc, 44, 44) << 2;
543 		res->rank         = GET_BITFIELD(m->misc, 56, 58);
544 		res->dimm         = res->rank >> 2;
545 		res->rank         = res->rank % 4;
546 		break;
547 	case SPR:
548 		bank              = m->bank - 13;
549 		res->imc          = bank / 2;
550 		res->channel      = bank % 2;
551 		res->column       = GET_BITFIELD(m->misc, 9, 18) << 2;
552 		res->row          = GET_BITFIELD(m->misc, 19, 36);
553 		res->bank_group   = GET_BITFIELD(m->misc, 37, 38);
554 		res->bank_address = GET_BITFIELD(m->misc, 39, 40);
555 		res->bank_group  |= GET_BITFIELD(m->misc, 41, 41) << 2;
556 		res->rank         = GET_BITFIELD(m->misc, 57, 57);
557 		res->dimm         = GET_BITFIELD(m->misc, 58, 58);
558 		break;
559 	default:
560 		return false;
561 	}
562 
563 	if (!res->dev) {
564 		skx_printk(KERN_ERR, "No device for src_id %d imc %d\n",
565 			   m->socketid, res->imc);
566 		return false;
567 	}
568 
569 	return true;
570 }
571 
572 /**
573  * get_gnr_mdev() - Get the PCI device of the @logical_idx-th DDR memory controller.
574  *
575  * @d            : The pointer to the structure of CPU socket EDAC device.
576  * @logical_idx  : The logical index of the present memory controller (0 ~ max present MC# - 1).
577  * @physical_idx : To store the corresponding physical index of @logical_idx.
578  *
579  * RETURNS       : The PCI device of the @logical_idx-th DDR memory controller, NULL on failure.
580  */
get_gnr_mdev(struct skx_dev * d,int logical_idx,int * physical_idx)581 static struct pci_dev *get_gnr_mdev(struct skx_dev *d, int logical_idx, int *physical_idx)
582 {
583 #define GNR_MAX_IMC_PCI_CNT	28
584 
585 	struct pci_dev *mdev;
586 	int i, logical = 0;
587 
588 	/*
589 	 * Detect present memory controllers from { PCI device: 8-5, function 7-1 }
590 	 */
591 	for (i = 0; i < GNR_MAX_IMC_PCI_CNT; i++) {
592 		mdev = pci_get_dev_wrapper(d->seg,
593 					   d->bus[res_cfg->ddr_mdev_bdf.bus],
594 					   res_cfg->ddr_mdev_bdf.dev + i / 7,
595 					   res_cfg->ddr_mdev_bdf.fun + i % 7);
596 
597 		if (mdev) {
598 			if (logical == logical_idx) {
599 				*physical_idx = i;
600 				return mdev;
601 			}
602 
603 			pci_dev_put(mdev);
604 			logical++;
605 		}
606 	}
607 
608 	return NULL;
609 }
610 
611 /**
612  * get_ddr_munit() - Get the resource of the i-th DDR memory controller.
613  *
614  * @d      : The pointer to the structure of CPU socket EDAC device.
615  * @i      : The index of the CPU socket relative DDR memory controller.
616  * @offset : To store the MMIO offset of the i-th DDR memory controller.
617  * @size   : To store the MMIO size of the i-th DDR memory controller.
618  *
619  * RETURNS : The PCI device of the i-th DDR memory controller, NULL on failure.
620  */
get_ddr_munit(struct skx_dev * d,int i,u32 * offset,unsigned long * size)621 static struct pci_dev *get_ddr_munit(struct skx_dev *d, int i, u32 *offset, unsigned long *size)
622 {
623 	struct pci_dev *mdev;
624 	int physical_idx;
625 	u32 reg;
626 
627 	switch (res_cfg->type) {
628 	case GNR:
629 		if (I10NM_GET_IMC_BAR(d, 0, reg)) {
630 			i10nm_printk(KERN_ERR, "Failed to get mc0 bar\n");
631 			return NULL;
632 		}
633 
634 		mdev = get_gnr_mdev(d, i, &physical_idx);
635 		if (!mdev)
636 			return NULL;
637 
638 		*offset = I10NM_GET_IMC_MMIO_OFFSET(reg) +
639 			  I10NM_GNR_IMC_MMIO_OFFSET +
640 			  physical_idx * I10NM_GNR_IMC_MMIO_SIZE;
641 		*size   = I10NM_GNR_IMC_MMIO_SIZE;
642 
643 		break;
644 	default:
645 		if (I10NM_GET_IMC_BAR(d, i, reg)) {
646 			i10nm_printk(KERN_ERR, "Failed to get mc%d bar\n", i);
647 			return NULL;
648 		}
649 
650 		mdev = pci_get_dev_wrapper(d->seg,
651 					   d->bus[res_cfg->ddr_mdev_bdf.bus],
652 					   res_cfg->ddr_mdev_bdf.dev + i,
653 					   res_cfg->ddr_mdev_bdf.fun);
654 		if (!mdev)
655 			return NULL;
656 
657 		*offset  = I10NM_GET_IMC_MMIO_OFFSET(reg);
658 		*size    = I10NM_GET_IMC_MMIO_SIZE(reg);
659 	}
660 
661 	return mdev;
662 }
663 
664 /**
665  * i10nm_imc_absent() - Check whether the memory controller @imc is absent
666  *
667  * @imc    : The pointer to the structure of memory controller EDAC device.
668  *
669  * RETURNS : true if the memory controller EDAC device is absent, false otherwise.
670  */
i10nm_imc_absent(struct skx_imc * imc)671 static bool i10nm_imc_absent(struct skx_imc *imc)
672 {
673 	u32 mcmtr;
674 	int i;
675 
676 	switch (res_cfg->type) {
677 	case SPR:
678 		for (i = 0; i < res_cfg->ddr_chan_num; i++) {
679 			mcmtr = I10NM_GET_MCMTR(imc, i);
680 			edac_dbg(1, "ch%d mcmtr reg %x\n", i, mcmtr);
681 			if (mcmtr != ~0)
682 				return false;
683 		}
684 
685 		/*
686 		 * Some workstations' absent memory controllers still
687 		 * appear as PCIe devices, misleading the EDAC driver.
688 		 * By observing that the MMIO registers of these absent
689 		 * memory controllers consistently hold the value of ~0.
690 		 *
691 		 * We identify a memory controller as absent by checking
692 		 * if its MMIO register "mcmtr" == ~0 in all its channels.
693 		 */
694 		return true;
695 	default:
696 		return false;
697 	}
698 }
699 
i10nm_get_ddr_munits(void)700 static int i10nm_get_ddr_munits(void)
701 {
702 	struct pci_dev *mdev;
703 	void __iomem *mbase;
704 	unsigned long size;
705 	struct skx_dev *d;
706 	int i, lmc, j = 0;
707 	u32 reg, off;
708 	u64 base;
709 
710 	list_for_each_entry(d, i10nm_edac_list, list) {
711 		d->util_all = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->util_all_bdf.bus],
712 						  res_cfg->util_all_bdf.dev,
713 						  res_cfg->util_all_bdf.fun);
714 		if (!d->util_all)
715 			return -ENODEV;
716 
717 		d->uracu = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->uracu_bdf.bus],
718 					       res_cfg->uracu_bdf.dev,
719 					       res_cfg->uracu_bdf.fun);
720 		if (!d->uracu)
721 			return -ENODEV;
722 
723 		if (I10NM_GET_SCK_BAR(d, reg)) {
724 			i10nm_printk(KERN_ERR, "Failed to socket bar\n");
725 			return -ENODEV;
726 		}
727 
728 		base = I10NM_GET_SCK_MMIO_BASE(reg);
729 		edac_dbg(2, "socket%d mmio base 0x%llx (reg 0x%x)\n",
730 			 j++, base, reg);
731 
732 		for (lmc = 0, i = 0; i < res_cfg->ddr_imc_num; i++) {
733 			mdev = get_ddr_munit(d, i, &off, &size);
734 
735 			if (i == 0 && !mdev) {
736 				i10nm_printk(KERN_ERR, "No IMC found\n");
737 				return -ENODEV;
738 			}
739 			if (!mdev)
740 				continue;
741 
742 			edac_dbg(2, "mc%d mmio base 0x%llx size 0x%lx (reg 0x%x)\n",
743 				 i, base + off, size, reg);
744 
745 			mbase = ioremap(base + off, size);
746 			if (!mbase) {
747 				i10nm_printk(KERN_ERR, "Failed to ioremap 0x%llx\n",
748 					     base + off);
749 				return -ENODEV;
750 			}
751 
752 			d->imc[lmc].mbase = mbase;
753 			if (i10nm_imc_absent(&d->imc[lmc])) {
754 				pci_dev_put(mdev);
755 				iounmap(mbase);
756 				d->imc[lmc].mbase = NULL;
757 				edac_dbg(2, "Skip absent mc%d\n", i);
758 				continue;
759 			} else {
760 				d->imc[lmc].mdev = mdev;
761 				if (res_cfg->type == SPR)
762 					skx_set_mc_mapping(d, i, lmc);
763 				lmc++;
764 			}
765 		}
766 	}
767 
768 	return 0;
769 }
770 
i10nm_check_hbm_imc(struct skx_dev * d)771 static bool i10nm_check_hbm_imc(struct skx_dev *d)
772 {
773 	u32 reg;
774 
775 	if (I10NM_GET_CAPID3_CFG(d, reg)) {
776 		i10nm_printk(KERN_ERR, "Failed to get capid3_cfg\n");
777 		return false;
778 	}
779 
780 	return I10NM_IS_HBM_PRESENT(reg) != 0;
781 }
782 
i10nm_get_hbm_munits(void)783 static int i10nm_get_hbm_munits(void)
784 {
785 	struct pci_dev *mdev;
786 	void __iomem *mbase;
787 	u32 reg, off, mcmtr;
788 	struct skx_dev *d;
789 	int i, lmc;
790 	u64 base;
791 
792 	list_for_each_entry(d, i10nm_edac_list, list) {
793 		if (!d->pcu_cr3)
794 			return -ENODEV;
795 
796 		if (!i10nm_check_hbm_imc(d)) {
797 			i10nm_printk(KERN_DEBUG, "No hbm memory\n");
798 			return -ENODEV;
799 		}
800 
801 		if (I10NM_GET_SCK_BAR(d, reg)) {
802 			i10nm_printk(KERN_ERR, "Failed to get socket bar\n");
803 			return -ENODEV;
804 		}
805 		base = I10NM_GET_SCK_MMIO_BASE(reg);
806 
807 		if (I10NM_GET_HBM_IMC_BAR(d, reg)) {
808 			i10nm_printk(KERN_ERR, "Failed to get hbm mc bar\n");
809 			return -ENODEV;
810 		}
811 		base += I10NM_GET_HBM_IMC_MMIO_OFFSET(reg);
812 
813 		lmc = res_cfg->ddr_imc_num;
814 
815 		for (i = 0; i < res_cfg->hbm_imc_num; i++) {
816 			mdev = pci_get_dev_wrapper(d->seg, d->bus[res_cfg->hbm_mdev_bdf.bus],
817 						   res_cfg->hbm_mdev_bdf.dev + i / 4,
818 						   res_cfg->hbm_mdev_bdf.fun + i % 4);
819 
820 			if (i == 0 && !mdev) {
821 				i10nm_printk(KERN_ERR, "No hbm mc found\n");
822 				return -ENODEV;
823 			}
824 			if (!mdev)
825 				continue;
826 
827 			d->imc[lmc].mdev = mdev;
828 			off = i * I10NM_HBM_IMC_MMIO_SIZE;
829 
830 			edac_dbg(2, "hbm mc%d mmio base 0x%llx size 0x%x\n",
831 				 lmc, base + off, I10NM_HBM_IMC_MMIO_SIZE);
832 
833 			mbase = ioremap(base + off, I10NM_HBM_IMC_MMIO_SIZE);
834 			if (!mbase) {
835 				pci_dev_put(d->imc[lmc].mdev);
836 				d->imc[lmc].mdev = NULL;
837 
838 				i10nm_printk(KERN_ERR, "Failed to ioremap for hbm mc 0x%llx\n",
839 					     base + off);
840 				return -ENOMEM;
841 			}
842 
843 			d->imc[lmc].mbase = mbase;
844 			d->imc[lmc].hbm_mc = true;
845 
846 			mcmtr = I10NM_GET_MCMTR(&d->imc[lmc], 0);
847 			if (!I10NM_IS_HBM_IMC(mcmtr)) {
848 				iounmap(d->imc[lmc].mbase);
849 				d->imc[lmc].mbase = NULL;
850 				d->imc[lmc].hbm_mc = false;
851 				pci_dev_put(d->imc[lmc].mdev);
852 				d->imc[lmc].mdev = NULL;
853 
854 				i10nm_printk(KERN_ERR, "This isn't an hbm mc!\n");
855 				return -ENODEV;
856 			}
857 
858 			lmc++;
859 		}
860 	}
861 
862 	return 0;
863 }
864 
865 static struct res_config i10nm_cfg0 = {
866 	.type			= I10NM,
867 	.decs_did		= 0x3452,
868 	.busno_cfg_offset	= 0xcc,
869 	.ddr_imc_num		= 4,
870 	.ddr_chan_num		= 2,
871 	.ddr_dimm_num		= 2,
872 	.ddr_chan_mmio_sz	= 0x4000,
873 	.sad_all_bdf		= {1, 29, 0},
874 	.pcu_cr3_bdf		= {1, 30, 3},
875 	.util_all_bdf		= {1, 29, 1},
876 	.uracu_bdf		= {0, 0, 1},
877 	.ddr_mdev_bdf		= {0, 12, 0},
878 	.hbm_mdev_bdf		= {0, 12, 1},
879 	.sad_all_offset		= 0x108,
880 	.offsets_scrub		= offsets_scrub_icx,
881 	.offsets_demand		= offsets_demand_icx,
882 };
883 
884 static struct res_config i10nm_cfg1 = {
885 	.type			= I10NM,
886 	.decs_did		= 0x3452,
887 	.busno_cfg_offset	= 0xd0,
888 	.ddr_imc_num		= 4,
889 	.ddr_chan_num		= 2,
890 	.ddr_dimm_num		= 2,
891 	.ddr_chan_mmio_sz	= 0x4000,
892 	.sad_all_bdf		= {1, 29, 0},
893 	.pcu_cr3_bdf		= {1, 30, 3},
894 	.util_all_bdf		= {1, 29, 1},
895 	.uracu_bdf		= {0, 0, 1},
896 	.ddr_mdev_bdf		= {0, 12, 0},
897 	.hbm_mdev_bdf		= {0, 12, 1},
898 	.sad_all_offset		= 0x108,
899 	.offsets_scrub		= offsets_scrub_icx,
900 	.offsets_demand		= offsets_demand_icx,
901 };
902 
903 static struct res_config spr_cfg = {
904 	.type			= SPR,
905 	.decs_did		= 0x3252,
906 	.busno_cfg_offset	= 0xd0,
907 	.ddr_imc_num		= 4,
908 	.ddr_chan_num		= 2,
909 	.ddr_dimm_num		= 2,
910 	.hbm_imc_num		= 16,
911 	.hbm_chan_num		= 2,
912 	.hbm_dimm_num		= 1,
913 	.ddr_chan_mmio_sz	= 0x8000,
914 	.hbm_chan_mmio_sz	= 0x4000,
915 	.support_ddr5		= true,
916 	.sad_all_bdf		= {1, 10, 0},
917 	.pcu_cr3_bdf		= {1, 30, 3},
918 	.util_all_bdf		= {1, 29, 1},
919 	.uracu_bdf		= {0, 0, 1},
920 	.ddr_mdev_bdf		= {0, 12, 0},
921 	.hbm_mdev_bdf		= {0, 12, 1},
922 	.sad_all_offset		= 0x300,
923 	.offsets_scrub		= offsets_scrub_spr,
924 	.offsets_scrub_hbm0	= offsets_scrub_spr_hbm0,
925 	.offsets_scrub_hbm1	= offsets_scrub_spr_hbm1,
926 	.offsets_demand		= offsets_demand_spr,
927 	.offsets_demand2	= offsets_demand2_spr,
928 	.offsets_demand_hbm0	= offsets_demand_spr_hbm0,
929 	.offsets_demand_hbm1	= offsets_demand_spr_hbm1,
930 };
931 
932 static struct res_config gnr_cfg = {
933 	.type			= GNR,
934 	.decs_did		= 0x3252,
935 	.busno_cfg_offset	= 0xd0,
936 	.ddr_imc_num		= 12,
937 	.ddr_chan_num		= 1,
938 	.ddr_dimm_num		= 2,
939 	.ddr_chan_mmio_sz	= 0x4000,
940 	.support_ddr5		= true,
941 	.sad_all_bdf		= {0, 13, 0},
942 	.pcu_cr3_bdf		= {0, 5, 0},
943 	.util_all_bdf		= {0, 13, 1},
944 	.uracu_bdf		= {0, 0, 1},
945 	.ddr_mdev_bdf		= {0, 5, 1},
946 	.sad_all_offset		= 0x300,
947 };
948 
949 static const struct x86_cpu_id i10nm_cpuids[] = {
950 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D,	X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
951 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_TREMONT_D,	X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
952 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X,		X86_STEPPINGS(0x0, 0x3), &i10nm_cfg0),
953 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_X,		X86_STEPPINGS(0x4, 0xf), &i10nm_cfg1),
954 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ICELAKE_D,		X86_STEPPINGS(0x0, 0xf), &i10nm_cfg1),
955 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SAPPHIRERAPIDS_X,	X86_STEPPINGS(0x0, 0xf), &spr_cfg),
956 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(EMERALDRAPIDS_X,	X86_STEPPINGS(0x0, 0xf), &spr_cfg),
957 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(GRANITERAPIDS_X,	X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
958 	X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(ATOM_CRESTMONT_X,	X86_STEPPINGS(0x0, 0xf), &gnr_cfg),
959 	{}
960 };
961 MODULE_DEVICE_TABLE(x86cpu, i10nm_cpuids);
962 
i10nm_check_ecc(struct skx_imc * imc,int chan)963 static bool i10nm_check_ecc(struct skx_imc *imc, int chan)
964 {
965 	u32 mcmtr;
966 
967 	mcmtr = I10NM_GET_MCMTR(imc, chan);
968 	edac_dbg(1, "ch%d mcmtr reg %x\n", chan, mcmtr);
969 
970 	return !!GET_BITFIELD(mcmtr, 2, 2);
971 }
972 
i10nm_get_dimm_config(struct mem_ctl_info * mci,struct res_config * cfg)973 static int i10nm_get_dimm_config(struct mem_ctl_info *mci,
974 				 struct res_config *cfg)
975 {
976 	struct skx_pvt *pvt = mci->pvt_info;
977 	struct skx_imc *imc = pvt->imc;
978 	u32 mtr, amap, mcddrtcfg = 0;
979 	struct dimm_info *dimm;
980 	int i, j, ndimms;
981 
982 	for (i = 0; i < imc->num_channels; i++) {
983 		if (!imc->mbase)
984 			continue;
985 
986 		ndimms = 0;
987 		amap = I10NM_GET_AMAP(imc, i);
988 
989 		if (res_cfg->type != GNR)
990 			mcddrtcfg = I10NM_GET_MCDDRTCFG(imc, i);
991 
992 		for (j = 0; j < imc->num_dimms; j++) {
993 			dimm = edac_get_dimm(mci, i, j, 0);
994 			mtr = I10NM_GET_DIMMMTR(imc, i, j);
995 			edac_dbg(1, "dimmmtr 0x%x mcddrtcfg 0x%x (mc%d ch%d dimm%d)\n",
996 				 mtr, mcddrtcfg, imc->mc, i, j);
997 
998 			if (IS_DIMM_PRESENT(mtr))
999 				ndimms += skx_get_dimm_info(mtr, 0, amap, dimm,
1000 							    imc, i, j, cfg);
1001 			else if (IS_NVDIMM_PRESENT(mcddrtcfg, j))
1002 				ndimms += skx_get_nvdimm_info(dimm, imc, i, j,
1003 							      EDAC_MOD_STR);
1004 		}
1005 		if (ndimms && !i10nm_check_ecc(imc, i)) {
1006 			i10nm_printk(KERN_ERR, "ECC is disabled on imc %d channel %d\n",
1007 				     imc->mc, i);
1008 			return -ENODEV;
1009 		}
1010 	}
1011 
1012 	return 0;
1013 }
1014 
1015 static struct notifier_block i10nm_mce_dec = {
1016 	.notifier_call	= skx_mce_check_error,
1017 	.priority	= MCE_PRIO_EDAC,
1018 };
1019 
1020 #ifdef CONFIG_EDAC_DEBUG
1021 /*
1022  * Debug feature.
1023  * Exercise the address decode logic by writing an address to
1024  * /sys/kernel/debug/edac/i10nm_test/addr.
1025  */
1026 static struct dentry *i10nm_test;
1027 
debugfs_u64_set(void * data,u64 val)1028 static int debugfs_u64_set(void *data, u64 val)
1029 {
1030 	struct mce m;
1031 
1032 	pr_warn_once("Fake error to 0x%llx injected via debugfs\n", val);
1033 
1034 	memset(&m, 0, sizeof(m));
1035 	/* ADDRV + MemRd + Unknown channel */
1036 	m.status = MCI_STATUS_ADDRV + 0x90;
1037 	/* One corrected error */
1038 	m.status |= BIT_ULL(MCI_STATUS_CEC_SHIFT);
1039 	m.addr = val;
1040 	skx_mce_check_error(NULL, 0, &m);
1041 
1042 	return 0;
1043 }
1044 DEFINE_SIMPLE_ATTRIBUTE(fops_u64_wo, NULL, debugfs_u64_set, "%llu\n");
1045 
setup_i10nm_debug(void)1046 static void setup_i10nm_debug(void)
1047 {
1048 	i10nm_test = edac_debugfs_create_dir("i10nm_test");
1049 	if (!i10nm_test)
1050 		return;
1051 
1052 	if (!edac_debugfs_create_file("addr", 0200, i10nm_test,
1053 				      NULL, &fops_u64_wo)) {
1054 		debugfs_remove(i10nm_test);
1055 		i10nm_test = NULL;
1056 	}
1057 }
1058 
teardown_i10nm_debug(void)1059 static void teardown_i10nm_debug(void)
1060 {
1061 	debugfs_remove_recursive(i10nm_test);
1062 }
1063 #else
setup_i10nm_debug(void)1064 static inline void setup_i10nm_debug(void) {}
teardown_i10nm_debug(void)1065 static inline void teardown_i10nm_debug(void) {}
1066 #endif /*CONFIG_EDAC_DEBUG*/
1067 
i10nm_init(void)1068 static int __init i10nm_init(void)
1069 {
1070 	u8 mc = 0, src_id = 0, node_id = 0;
1071 	const struct x86_cpu_id *id;
1072 	struct res_config *cfg;
1073 	const char *owner;
1074 	struct skx_dev *d;
1075 	int rc, i, off[3] = {0xd0, 0xc8, 0xcc};
1076 	u64 tolm, tohm;
1077 	int imc_num;
1078 
1079 	edac_dbg(2, "\n");
1080 
1081 	if (ghes_get_devices())
1082 		return -EBUSY;
1083 
1084 	owner = edac_get_owner();
1085 	if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
1086 		return -EBUSY;
1087 
1088 	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
1089 		return -ENODEV;
1090 
1091 	id = x86_match_cpu(i10nm_cpuids);
1092 	if (!id)
1093 		return -ENODEV;
1094 
1095 	cfg = (struct res_config *)id->driver_data;
1096 	skx_set_res_cfg(cfg);
1097 	res_cfg = cfg;
1098 
1099 	rc = skx_get_hi_lo(0x09a2, off, &tolm, &tohm);
1100 	if (rc)
1101 		return rc;
1102 
1103 	rc = skx_get_all_bus_mappings(cfg, &i10nm_edac_list);
1104 	if (rc < 0)
1105 		goto fail;
1106 	if (rc == 0) {
1107 		i10nm_printk(KERN_ERR, "No memory controllers found\n");
1108 		return -ENODEV;
1109 	}
1110 
1111 	rc = i10nm_get_imc_num(cfg);
1112 	if (rc < 0)
1113 		goto fail;
1114 
1115 	mem_cfg_2lm = i10nm_check_2lm(cfg);
1116 	skx_set_mem_cfg(mem_cfg_2lm);
1117 
1118 	rc = i10nm_get_ddr_munits();
1119 
1120 	if (i10nm_get_hbm_munits() && rc)
1121 		goto fail;
1122 
1123 	imc_num = res_cfg->ddr_imc_num + res_cfg->hbm_imc_num;
1124 
1125 	list_for_each_entry(d, i10nm_edac_list, list) {
1126 		rc = skx_get_src_id(d, 0xf8, &src_id);
1127 		if (rc < 0)
1128 			goto fail;
1129 
1130 		rc = skx_get_node_id(d, &node_id);
1131 		if (rc < 0)
1132 			goto fail;
1133 
1134 		edac_dbg(2, "src_id = %d node_id = %d\n", src_id, node_id);
1135 		for (i = 0; i < imc_num; i++) {
1136 			if (!d->imc[i].mdev)
1137 				continue;
1138 
1139 			d->imc[i].mc  = mc++;
1140 			d->imc[i].lmc = i;
1141 			d->imc[i].src_id  = src_id;
1142 			d->imc[i].node_id = node_id;
1143 			if (d->imc[i].hbm_mc) {
1144 				d->imc[i].chan_mmio_sz = cfg->hbm_chan_mmio_sz;
1145 				d->imc[i].num_channels = cfg->hbm_chan_num;
1146 				d->imc[i].num_dimms    = cfg->hbm_dimm_num;
1147 			} else {
1148 				d->imc[i].chan_mmio_sz = cfg->ddr_chan_mmio_sz;
1149 				d->imc[i].num_channels = cfg->ddr_chan_num;
1150 				d->imc[i].num_dimms    = cfg->ddr_dimm_num;
1151 			}
1152 
1153 			rc = skx_register_mci(&d->imc[i], d->imc[i].mdev,
1154 					      "Intel_10nm Socket", EDAC_MOD_STR,
1155 					      i10nm_get_dimm_config, cfg);
1156 			if (rc < 0)
1157 				goto fail;
1158 		}
1159 	}
1160 
1161 	rc = skx_adxl_get();
1162 	if (rc)
1163 		goto fail;
1164 
1165 	opstate_init();
1166 	mce_register_decode_chain(&i10nm_mce_dec);
1167 	setup_i10nm_debug();
1168 
1169 	if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
1170 		skx_set_decode(i10nm_mc_decode, show_retry_rd_err_log);
1171 		if (retry_rd_err_log == 2)
1172 			enable_retry_rd_err_log(true);
1173 	} else {
1174 		skx_set_decode(i10nm_mc_decode, NULL);
1175 	}
1176 
1177 	i10nm_printk(KERN_INFO, "%s\n", I10NM_REVISION);
1178 
1179 	return 0;
1180 fail:
1181 	skx_remove();
1182 	return rc;
1183 }
1184 
i10nm_exit(void)1185 static void __exit i10nm_exit(void)
1186 {
1187 	edac_dbg(2, "\n");
1188 
1189 	if (retry_rd_err_log && res_cfg->offsets_scrub && res_cfg->offsets_demand) {
1190 		skx_set_decode(NULL, NULL);
1191 		if (retry_rd_err_log == 2)
1192 			enable_retry_rd_err_log(false);
1193 	}
1194 
1195 	teardown_i10nm_debug();
1196 	mce_unregister_decode_chain(&i10nm_mce_dec);
1197 	skx_adxl_put();
1198 	skx_remove();
1199 }
1200 
1201 module_init(i10nm_init);
1202 module_exit(i10nm_exit);
1203 
set_decoding_via_mca(const char * buf,const struct kernel_param * kp)1204 static int set_decoding_via_mca(const char *buf, const struct kernel_param *kp)
1205 {
1206 	unsigned long val;
1207 	int ret;
1208 
1209 	ret = kstrtoul(buf, 0, &val);
1210 
1211 	if (ret || val > 1)
1212 		return -EINVAL;
1213 
1214 	if (val && mem_cfg_2lm) {
1215 		i10nm_printk(KERN_NOTICE, "Decoding errors via MCA banks for 2LM isn't supported yet\n");
1216 		return -EIO;
1217 	}
1218 
1219 	ret = param_set_int(buf, kp);
1220 
1221 	return ret;
1222 }
1223 
1224 static const struct kernel_param_ops decoding_via_mca_param_ops = {
1225 	.set = set_decoding_via_mca,
1226 	.get = param_get_int,
1227 };
1228 
1229 module_param_cb(decoding_via_mca, &decoding_via_mca_param_ops, &decoding_via_mca, 0644);
1230 MODULE_PARM_DESC(decoding_via_mca, "decoding_via_mca: 0=off(default), 1=enable");
1231 
1232 module_param(retry_rd_err_log, int, 0444);
1233 MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)");
1234 
1235 MODULE_LICENSE("GPL v2");
1236 MODULE_DESCRIPTION("MC Driver for Intel 10nm server processors");
1237