xref: /openbmc/linux/drivers/edac/skx_common.h (revision ba987eaa)
188a242c9SQiuxu Zhuo /* SPDX-License-Identifier: GPL-2.0 */
288a242c9SQiuxu Zhuo /*
388a242c9SQiuxu Zhuo  * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
488a242c9SQiuxu Zhuo  * Originally split out from the skx_edac driver.
588a242c9SQiuxu Zhuo  *
688a242c9SQiuxu Zhuo  * Copyright (c) 2018, Intel Corporation.
788a242c9SQiuxu Zhuo  */
888a242c9SQiuxu Zhuo 
988a242c9SQiuxu Zhuo #ifndef _SKX_COMM_EDAC_H
1088a242c9SQiuxu Zhuo #define _SKX_COMM_EDAC_H
1188a242c9SQiuxu Zhuo 
122f4348e5SQiuxu Zhuo #include <linux/bits.h>
132738c69aSYouquan Song #include <asm/mce.h>
142f4348e5SQiuxu Zhuo 
1588a242c9SQiuxu Zhuo #define MSG_SIZE		1024
1688a242c9SQiuxu Zhuo 
1788a242c9SQiuxu Zhuo /*
1888a242c9SQiuxu Zhuo  * Debug macros
1988a242c9SQiuxu Zhuo  */
2088a242c9SQiuxu Zhuo #define skx_printk(level, fmt, arg...)			\
2188a242c9SQiuxu Zhuo 	edac_printk(level, "skx", fmt, ##arg)
2288a242c9SQiuxu Zhuo 
2388a242c9SQiuxu Zhuo #define skx_mc_printk(mci, level, fmt, arg...)		\
2488a242c9SQiuxu Zhuo 	edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
2588a242c9SQiuxu Zhuo 
2688a242c9SQiuxu Zhuo /*
2788a242c9SQiuxu Zhuo  * Get a bit field at register value <v>, from bit <lo> to bit <hi>
2888a242c9SQiuxu Zhuo  */
2988a242c9SQiuxu Zhuo #define GET_BITFIELD(v, lo, hi) \
3088a242c9SQiuxu Zhuo 	(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
3188a242c9SQiuxu Zhuo 
3288a242c9SQiuxu Zhuo #define SKX_NUM_IMC		2	/* Memory controllers per socket */
3388a242c9SQiuxu Zhuo #define SKX_NUM_CHANNELS	3	/* Channels per memory controller */
3488a242c9SQiuxu Zhuo #define SKX_NUM_DIMMS		2	/* Max DIMMS per channel */
3588a242c9SQiuxu Zhuo 
36*ba987eaaSQiuxu Zhuo #define I10NM_NUM_DDR_IMC	12
37c9450883SQiuxu Zhuo #define I10NM_NUM_DDR_CHANNELS	2
38c9450883SQiuxu Zhuo #define I10NM_NUM_DDR_DIMMS	2
39c9450883SQiuxu Zhuo 
40c9450883SQiuxu Zhuo #define I10NM_NUM_HBM_IMC	16
41c9450883SQiuxu Zhuo #define I10NM_NUM_HBM_CHANNELS	2
42c9450883SQiuxu Zhuo #define I10NM_NUM_HBM_DIMMS	1
43c9450883SQiuxu Zhuo 
44c9450883SQiuxu Zhuo #define I10NM_NUM_IMC		(I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
45c9450883SQiuxu Zhuo #define I10NM_NUM_CHANNELS	MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
46c9450883SQiuxu Zhuo #define I10NM_NUM_DIMMS		MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
4788a242c9SQiuxu Zhuo 
4888a242c9SQiuxu Zhuo #define MAX(a, b)	((a) > (b) ? (a) : (b))
4988a242c9SQiuxu Zhuo #define NUM_IMC		MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
5088a242c9SQiuxu Zhuo #define NUM_CHANNELS	MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
5188a242c9SQiuxu Zhuo #define NUM_DIMMS	MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
5288a242c9SQiuxu Zhuo 
5388a242c9SQiuxu Zhuo #define IS_DIMM_PRESENT(r)		GET_BITFIELD(r, 15, 15)
5488a242c9SQiuxu Zhuo #define IS_NVDIMM_PRESENT(r, i)		GET_BITFIELD(r, i, i)
5588a242c9SQiuxu Zhuo 
562738c69aSYouquan Song #define MCI_MISC_ECC_MODE(m)	(((m) >> 59) & 15)
572738c69aSYouquan Song #define MCI_MISC_ECC_DDRT	8	/* read from DDRT */
582738c69aSYouquan Song 
5988a242c9SQiuxu Zhuo /*
606e8746cbSQiuxu Zhuo  * According to Intel Architecture spec vol 3B,
616e8746cbSQiuxu Zhuo  * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
626e8746cbSQiuxu Zhuo  * memory errors should fit one of these masks:
636e8746cbSQiuxu Zhuo  *	000f 0000 1mmm cccc (binary)
646e8746cbSQiuxu Zhuo  *	000f 0010 1mmm cccc (binary)	[RAM used as cache]
656e8746cbSQiuxu Zhuo  * where:
666e8746cbSQiuxu Zhuo  *	f = Correction Report Filtering Bit. If 1, subsequent errors
676e8746cbSQiuxu Zhuo  *	    won't be shown
686e8746cbSQiuxu Zhuo  *	mmm = error type
696e8746cbSQiuxu Zhuo  *	cccc = channel
706e8746cbSQiuxu Zhuo  */
716e8746cbSQiuxu Zhuo #define MCACOD_MEM_ERR_MASK	0xef80
726e8746cbSQiuxu Zhuo /*
736e8746cbSQiuxu Zhuo  * Errors from either the memory of the 1-level memory system or the
746e8746cbSQiuxu Zhuo  * 2nd level memory (the slow "far" memory) of the 2-level memory system.
756e8746cbSQiuxu Zhuo  */
766e8746cbSQiuxu Zhuo #define MCACOD_MEM_CTL_ERR	0x80
776e8746cbSQiuxu Zhuo /*
786e8746cbSQiuxu Zhuo  * Errors from the 1st level memory (the fast "near" memory as cache)
796e8746cbSQiuxu Zhuo  * of the 2-level memory system.
806e8746cbSQiuxu Zhuo  */
816e8746cbSQiuxu Zhuo #define MCACOD_EXT_MEM_ERR	0x280
826e8746cbSQiuxu Zhuo 
836e8746cbSQiuxu Zhuo /*
8488a242c9SQiuxu Zhuo  * Each cpu socket contains some pci devices that provide global
8588a242c9SQiuxu Zhuo  * information, and also some that are local to each of the two
8688a242c9SQiuxu Zhuo  * memory controllers on the die.
8788a242c9SQiuxu Zhuo  */
8888a242c9SQiuxu Zhuo struct skx_dev {
8988a242c9SQiuxu Zhuo 	struct list_head list;
9088a242c9SQiuxu Zhuo 	u8 bus[4];
9188a242c9SQiuxu Zhuo 	int seg;
9288a242c9SQiuxu Zhuo 	struct pci_dev *sad_all;
9388a242c9SQiuxu Zhuo 	struct pci_dev *util_all;
9488a242c9SQiuxu Zhuo 	struct pci_dev *uracu; /* for i10nm CPU */
95c9450883SQiuxu Zhuo 	struct pci_dev *pcu_cr3; /* for HBM memory detection */
9688a242c9SQiuxu Zhuo 	u32 mcroute;
9788a242c9SQiuxu Zhuo 	struct skx_imc {
9888a242c9SQiuxu Zhuo 		struct mem_ctl_info *mci;
9988a242c9SQiuxu Zhuo 		struct pci_dev *mdev; /* for i10nm CPU */
10088a242c9SQiuxu Zhuo 		void __iomem *mbase;  /* for i10nm CPU */
101479f58ddSQiuxu Zhuo 		int chan_mmio_sz;     /* for i10nm CPU */
102c9450883SQiuxu Zhuo 		int num_channels; /* channels per memory controller */
103c9450883SQiuxu Zhuo 		int num_dimms; /* dimms per channel */
104c9450883SQiuxu Zhuo 		bool hbm_mc;
10588a242c9SQiuxu Zhuo 		u8 mc;	/* system wide mc# */
10688a242c9SQiuxu Zhuo 		u8 lmc;	/* socket relative mc# */
10788a242c9SQiuxu Zhuo 		u8 src_id, node_id;
10888a242c9SQiuxu Zhuo 		struct skx_channel {
10988a242c9SQiuxu Zhuo 			struct pci_dev	*cdev;
110e80634a7STony Luck 			struct pci_dev	*edev;
111cf4e6d52SYouquan Song 			u32 retry_rd_err_log_s;
112cf4e6d52SYouquan Song 			u32 retry_rd_err_log_d;
113d5f5e499SQiuxu Zhuo 			u32 retry_rd_err_log_d2;
11488a242c9SQiuxu Zhuo 			struct skx_dimm {
11588a242c9SQiuxu Zhuo 				u8 close_pg;
11688a242c9SQiuxu Zhuo 				u8 bank_xor_enable;
11788a242c9SQiuxu Zhuo 				u8 fine_grain_bank;
11888a242c9SQiuxu Zhuo 				u8 rowbits;
11988a242c9SQiuxu Zhuo 				u8 colbits;
12088a242c9SQiuxu Zhuo 			} dimms[NUM_DIMMS];
12188a242c9SQiuxu Zhuo 		} chan[NUM_CHANNELS];
12288a242c9SQiuxu Zhuo 	} imc[NUM_IMC];
12388a242c9SQiuxu Zhuo };
12488a242c9SQiuxu Zhuo 
12588a242c9SQiuxu Zhuo struct skx_pvt {
12688a242c9SQiuxu Zhuo 	struct skx_imc	*imc;
12788a242c9SQiuxu Zhuo };
12888a242c9SQiuxu Zhuo 
12988a242c9SQiuxu Zhuo enum type {
13088a242c9SQiuxu Zhuo 	SKX,
131479f58ddSQiuxu Zhuo 	I10NM,
132*ba987eaaSQiuxu Zhuo 	SPR,
133*ba987eaaSQiuxu Zhuo 	GNR
13488a242c9SQiuxu Zhuo };
13588a242c9SQiuxu Zhuo 
13688a242c9SQiuxu Zhuo enum {
13788a242c9SQiuxu Zhuo 	INDEX_SOCKET,
13888a242c9SQiuxu Zhuo 	INDEX_MEMCTRL,
13988a242c9SQiuxu Zhuo 	INDEX_CHANNEL,
14088a242c9SQiuxu Zhuo 	INDEX_DIMM,
14114646de4SQiuxu Zhuo 	INDEX_CS,
1422f4348e5SQiuxu Zhuo 	INDEX_NM_FIRST,
1432f4348e5SQiuxu Zhuo 	INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
1442f4348e5SQiuxu Zhuo 	INDEX_NM_CHANNEL,
1452f4348e5SQiuxu Zhuo 	INDEX_NM_DIMM,
14614646de4SQiuxu Zhuo 	INDEX_NM_CS,
14788a242c9SQiuxu Zhuo 	INDEX_MAX
14888a242c9SQiuxu Zhuo };
14988a242c9SQiuxu Zhuo 
1502f4348e5SQiuxu Zhuo #define BIT_NM_MEMCTRL	BIT_ULL(INDEX_NM_MEMCTRL)
1512f4348e5SQiuxu Zhuo #define BIT_NM_CHANNEL	BIT_ULL(INDEX_NM_CHANNEL)
1522f4348e5SQiuxu Zhuo #define BIT_NM_DIMM	BIT_ULL(INDEX_NM_DIMM)
15314646de4SQiuxu Zhuo #define BIT_NM_CS	BIT_ULL(INDEX_NM_CS)
1542f4348e5SQiuxu Zhuo 
15588a242c9SQiuxu Zhuo struct decoded_addr {
1562738c69aSYouquan Song 	struct mce *mce;
15788a242c9SQiuxu Zhuo 	struct skx_dev *dev;
15888a242c9SQiuxu Zhuo 	u64	addr;
15988a242c9SQiuxu Zhuo 	int	socket;
16088a242c9SQiuxu Zhuo 	int	imc;
16188a242c9SQiuxu Zhuo 	int	channel;
16288a242c9SQiuxu Zhuo 	u64	chan_addr;
16388a242c9SQiuxu Zhuo 	int	sktways;
16488a242c9SQiuxu Zhuo 	int	chanways;
16588a242c9SQiuxu Zhuo 	int	dimm;
16614646de4SQiuxu Zhuo 	int	cs;
16788a242c9SQiuxu Zhuo 	int	rank;
16888a242c9SQiuxu Zhuo 	int	channel_rank;
16988a242c9SQiuxu Zhuo 	u64	rank_address;
17088a242c9SQiuxu Zhuo 	int	row;
17188a242c9SQiuxu Zhuo 	int	column;
17288a242c9SQiuxu Zhuo 	int	bank_address;
17388a242c9SQiuxu Zhuo 	int	bank_group;
174fe32f366SQiuxu Zhuo 	bool	decoded_by_adxl;
17588a242c9SQiuxu Zhuo };
17688a242c9SQiuxu Zhuo 
177dd7814b7SQiuxu Zhuo struct pci_bdf {
178dd7814b7SQiuxu Zhuo 	u32 bus : 8;
179dd7814b7SQiuxu Zhuo 	u32 dev : 5;
180dd7814b7SQiuxu Zhuo 	u32 fun : 3;
181dd7814b7SQiuxu Zhuo };
182dd7814b7SQiuxu Zhuo 
183ee5340abSQiuxu Zhuo struct res_config {
184ee5340abSQiuxu Zhuo 	enum type type;
185ee5340abSQiuxu Zhuo 	/* Configuration agent device ID */
186ee5340abSQiuxu Zhuo 	unsigned int decs_did;
187ee5340abSQiuxu Zhuo 	/* Default bus number configuration register offset */
188ee5340abSQiuxu Zhuo 	int busno_cfg_offset;
189dd7814b7SQiuxu Zhuo 	/* DDR memory controllers per socket */
190dd7814b7SQiuxu Zhuo 	int ddr_imc_num;
191dd7814b7SQiuxu Zhuo 	/* DDR channels per DDR memory controller */
192dd7814b7SQiuxu Zhuo 	int ddr_chan_num;
193dd7814b7SQiuxu Zhuo 	/* DDR DIMMs per DDR memory channel */
194dd7814b7SQiuxu Zhuo 	int ddr_dimm_num;
195479f58ddSQiuxu Zhuo 	/* Per DDR channel memory-mapped I/O size */
196479f58ddSQiuxu Zhuo 	int ddr_chan_mmio_sz;
197dd7814b7SQiuxu Zhuo 	/* HBM memory controllers per socket */
198dd7814b7SQiuxu Zhuo 	int hbm_imc_num;
199dd7814b7SQiuxu Zhuo 	/* HBM channels per HBM memory controller */
200dd7814b7SQiuxu Zhuo 	int hbm_chan_num;
201dd7814b7SQiuxu Zhuo 	/* HBM DIMMs per HBM memory channel */
202dd7814b7SQiuxu Zhuo 	int hbm_dimm_num;
203c9450883SQiuxu Zhuo 	/* Per HBM channel memory-mapped I/O size */
204c9450883SQiuxu Zhuo 	int hbm_chan_mmio_sz;
205479f58ddSQiuxu Zhuo 	bool support_ddr5;
206dd7814b7SQiuxu Zhuo 	/* SAD device BDF */
207dd7814b7SQiuxu Zhuo 	struct pci_bdf sad_all_bdf;
208dd7814b7SQiuxu Zhuo 	/* PCU device BDF */
209dd7814b7SQiuxu Zhuo 	struct pci_bdf pcu_cr3_bdf;
210dd7814b7SQiuxu Zhuo 	/* UTIL device BDF */
211dd7814b7SQiuxu Zhuo 	struct pci_bdf util_all_bdf;
212dd7814b7SQiuxu Zhuo 	/* URACU device BDF */
213dd7814b7SQiuxu Zhuo 	struct pci_bdf uracu_bdf;
214dd7814b7SQiuxu Zhuo 	/* DDR mdev device BDF */
215dd7814b7SQiuxu Zhuo 	struct pci_bdf ddr_mdev_bdf;
216dd7814b7SQiuxu Zhuo 	/* HBM mdev device BDF */
217dd7814b7SQiuxu Zhuo 	struct pci_bdf hbm_mdev_bdf;
2184bd4d32eSQiuxu Zhuo 	int sad_all_offset;
219cf4e6d52SYouquan Song 	/* Offsets of retry_rd_err_log registers */
220cf4e6d52SYouquan Song 	u32 *offsets_scrub;
221acd4cf68SQiuxu Zhuo 	u32 *offsets_scrub_hbm0;
222acd4cf68SQiuxu Zhuo 	u32 *offsets_scrub_hbm1;
223cf4e6d52SYouquan Song 	u32 *offsets_demand;
224d5f5e499SQiuxu Zhuo 	u32 *offsets_demand2;
225acd4cf68SQiuxu Zhuo 	u32 *offsets_demand_hbm0;
226acd4cf68SQiuxu Zhuo 	u32 *offsets_demand_hbm1;
227ee5340abSQiuxu Zhuo };
228ee5340abSQiuxu Zhuo 
229479f58ddSQiuxu Zhuo typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
230479f58ddSQiuxu Zhuo 				 struct res_config *cfg);
23188a242c9SQiuxu Zhuo typedef bool (*skx_decode_f)(struct decoded_addr *res);
232cf4e6d52SYouquan Song typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
23388a242c9SQiuxu Zhuo 
23488a242c9SQiuxu Zhuo int __init skx_adxl_get(void);
23588a242c9SQiuxu Zhuo void __exit skx_adxl_put(void);
236e80634a7STony Luck void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
2372f4348e5SQiuxu Zhuo void skx_set_mem_cfg(bool mem_cfg_2lm);
23888a242c9SQiuxu Zhuo 
2391dc78f1fSQiuxu Zhuo int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
24088a242c9SQiuxu Zhuo int skx_get_node_id(struct skx_dev *d, u8 *id);
24188a242c9SQiuxu Zhuo 
242ee5340abSQiuxu Zhuo int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
24388a242c9SQiuxu Zhuo 
24488a242c9SQiuxu Zhuo int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
24588a242c9SQiuxu Zhuo 
24610320950SQiuxu Zhuo int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
247479f58ddSQiuxu Zhuo 		      struct skx_imc *imc, int chan, int dimmno,
248479f58ddSQiuxu Zhuo 		      struct res_config *cfg);
24988a242c9SQiuxu Zhuo 
25088a242c9SQiuxu Zhuo int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
25188a242c9SQiuxu Zhuo 			int chan, int dimmno, const char *mod_str);
25288a242c9SQiuxu Zhuo 
25388a242c9SQiuxu Zhuo int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
25488a242c9SQiuxu Zhuo 		     const char *ctl_name, const char *mod_str,
255479f58ddSQiuxu Zhuo 		     get_dimm_config_f get_dimm_config,
256479f58ddSQiuxu Zhuo 		     struct res_config *cfg);
25788a242c9SQiuxu Zhuo 
25888a242c9SQiuxu Zhuo int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
25988a242c9SQiuxu Zhuo 			void *data);
26088a242c9SQiuxu Zhuo 
26188a242c9SQiuxu Zhuo void skx_remove(void);
26288a242c9SQiuxu Zhuo 
26388a242c9SQiuxu Zhuo #endif /* _SKX_COMM_EDAC_H */
264