xref: /openbmc/linux/drivers/edac/skx_common.h (revision b694e3c604e999343258c49e574abd7be012e726)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver.
4  * Originally split out from the skx_edac driver.
5  *
6  * Copyright (c) 2018, Intel Corporation.
7  */
8 
9 #ifndef _SKX_COMM_EDAC_H
10 #define _SKX_COMM_EDAC_H
11 
12 #include <linux/bits.h>
13 #include <asm/mce.h>
14 
15 #define MSG_SIZE		1024
16 
17 /*
18  * Debug macros
19  */
20 #define skx_printk(level, fmt, arg...)			\
21 	edac_printk(level, "skx", fmt, ##arg)
22 
23 #define skx_mc_printk(mci, level, fmt, arg...)		\
24 	edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg)
25 
26 /*
27  * Get a bit field at register value <v>, from bit <lo> to bit <hi>
28  */
29 #define GET_BITFIELD(v, lo, hi) \
30 	(((v) & GENMASK_ULL((hi), (lo))) >> (lo))
31 
32 #define SKX_NUM_IMC		2	/* Memory controllers per socket */
33 #define SKX_NUM_CHANNELS	3	/* Channels per memory controller */
34 #define SKX_NUM_DIMMS		2	/* Max DIMMS per channel */
35 
36 #define I10NM_NUM_DDR_IMC	12
37 #define I10NM_NUM_DDR_CHANNELS	2
38 #define I10NM_NUM_DDR_DIMMS	2
39 
40 #define I10NM_NUM_HBM_IMC	16
41 #define I10NM_NUM_HBM_CHANNELS	2
42 #define I10NM_NUM_HBM_DIMMS	1
43 
44 #define I10NM_NUM_IMC		(I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC)
45 #define I10NM_NUM_CHANNELS	MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS)
46 #define I10NM_NUM_DIMMS		MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS)
47 
48 #define MAX(a, b)	((a) > (b) ? (a) : (b))
49 #define NUM_IMC		MAX(SKX_NUM_IMC, I10NM_NUM_IMC)
50 #define NUM_CHANNELS	MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS)
51 #define NUM_DIMMS	MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS)
52 
53 #define IS_DIMM_PRESENT(r)		GET_BITFIELD(r, 15, 15)
54 #define IS_NVDIMM_PRESENT(r, i)		GET_BITFIELD(r, i, i)
55 
56 #define MCI_MISC_ECC_MODE(m)	(((m) >> 59) & 15)
57 #define MCI_MISC_ECC_DDRT	8	/* read from DDRT */
58 
59 /*
60  * According to Intel Architecture spec vol 3B,
61  * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding"
62  * memory errors should fit one of these masks:
63  *	000f 0000 1mmm cccc (binary)
64  *	000f 0010 1mmm cccc (binary)	[RAM used as cache]
65  * where:
66  *	f = Correction Report Filtering Bit. If 1, subsequent errors
67  *	    won't be shown
68  *	mmm = error type
69  *	cccc = channel
70  */
71 #define MCACOD_MEM_ERR_MASK	0xef80
72 /*
73  * Errors from either the memory of the 1-level memory system or the
74  * 2nd level memory (the slow "far" memory) of the 2-level memory system.
75  */
76 #define MCACOD_MEM_CTL_ERR	0x80
77 /*
78  * Errors from the 1st level memory (the fast "near" memory as cache)
79  * of the 2-level memory system.
80  */
81 #define MCACOD_EXT_MEM_ERR	0x280
82 
83 /*
84  * Each cpu socket contains some pci devices that provide global
85  * information, and also some that are local to each of the two
86  * memory controllers on the die.
87  */
88 struct skx_dev {
89 	struct list_head list;
90 	u8 bus[4];
91 	int seg;
92 	struct pci_dev *sad_all;
93 	struct pci_dev *util_all;
94 	struct pci_dev *uracu; /* for i10nm CPU */
95 	struct pci_dev *pcu_cr3; /* for HBM memory detection */
96 	u32 mcroute;
97 	/*
98 	 * Some server BIOS may hide certain memory controllers, and the
99 	 * EDAC driver skips those hidden memory controllers. However, the
100 	 * ADXL still decodes memory error address using physical memory
101 	 * controller indices. The mapping table is used to convert the
102 	 * physical indices (reported by ADXL) to the logical indices
103 	 * (used the EDAC driver) of present memory controllers during the
104 	 * error handling process.
105 	 */
106 	u8 mc_mapping[NUM_IMC];
107 	struct skx_imc {
108 		struct mem_ctl_info *mci;
109 		struct pci_dev *mdev; /* for i10nm CPU */
110 		void __iomem *mbase;  /* for i10nm CPU */
111 		int chan_mmio_sz;     /* for i10nm CPU */
112 		int num_channels; /* channels per memory controller */
113 		int num_dimms; /* dimms per channel */
114 		bool hbm_mc;
115 		u8 mc;	/* system wide mc# */
116 		u8 lmc;	/* socket relative mc# */
117 		u8 src_id, node_id;
118 		struct skx_channel {
119 			struct pci_dev	*cdev;
120 			struct pci_dev	*edev;
121 			u32 retry_rd_err_log_s;
122 			u32 retry_rd_err_log_d;
123 			u32 retry_rd_err_log_d2;
124 			struct skx_dimm {
125 				u8 close_pg;
126 				u8 bank_xor_enable;
127 				u8 fine_grain_bank;
128 				u8 rowbits;
129 				u8 colbits;
130 			} dimms[NUM_DIMMS];
131 		} chan[NUM_CHANNELS];
132 	} imc[NUM_IMC];
133 };
134 
135 struct skx_pvt {
136 	struct skx_imc	*imc;
137 };
138 
139 enum type {
140 	SKX,
141 	I10NM,
142 	SPR,
143 	GNR
144 };
145 
146 enum {
147 	INDEX_SOCKET,
148 	INDEX_MEMCTRL,
149 	INDEX_CHANNEL,
150 	INDEX_DIMM,
151 	INDEX_CS,
152 	INDEX_NM_FIRST,
153 	INDEX_NM_MEMCTRL = INDEX_NM_FIRST,
154 	INDEX_NM_CHANNEL,
155 	INDEX_NM_DIMM,
156 	INDEX_NM_CS,
157 	INDEX_MAX
158 };
159 
160 enum error_source {
161 	ERR_SRC_1LM,
162 	ERR_SRC_2LM_NM,
163 	ERR_SRC_2LM_FM,
164 	ERR_SRC_NOT_MEMORY,
165 };
166 
167 #define BIT_NM_MEMCTRL	BIT_ULL(INDEX_NM_MEMCTRL)
168 #define BIT_NM_CHANNEL	BIT_ULL(INDEX_NM_CHANNEL)
169 #define BIT_NM_DIMM	BIT_ULL(INDEX_NM_DIMM)
170 #define BIT_NM_CS	BIT_ULL(INDEX_NM_CS)
171 
172 struct decoded_addr {
173 	struct mce *mce;
174 	struct skx_dev *dev;
175 	u64	addr;
176 	int	socket;
177 	int	imc;
178 	int	channel;
179 	u64	chan_addr;
180 	int	sktways;
181 	int	chanways;
182 	int	dimm;
183 	int	cs;
184 	int	rank;
185 	int	channel_rank;
186 	u64	rank_address;
187 	int	row;
188 	int	column;
189 	int	bank_address;
190 	int	bank_group;
191 	bool	decoded_by_adxl;
192 };
193 
194 struct pci_bdf {
195 	u32 bus : 8;
196 	u32 dev : 5;
197 	u32 fun : 3;
198 };
199 
200 struct res_config {
201 	enum type type;
202 	/* Configuration agent device ID */
203 	unsigned int decs_did;
204 	/* Default bus number configuration register offset */
205 	int busno_cfg_offset;
206 	/* DDR memory controllers per socket */
207 	int ddr_imc_num;
208 	/* DDR channels per DDR memory controller */
209 	int ddr_chan_num;
210 	/* DDR DIMMs per DDR memory channel */
211 	int ddr_dimm_num;
212 	/* Per DDR channel memory-mapped I/O size */
213 	int ddr_chan_mmio_sz;
214 	/* HBM memory controllers per socket */
215 	int hbm_imc_num;
216 	/* HBM channels per HBM memory controller */
217 	int hbm_chan_num;
218 	/* HBM DIMMs per HBM memory channel */
219 	int hbm_dimm_num;
220 	/* Per HBM channel memory-mapped I/O size */
221 	int hbm_chan_mmio_sz;
222 	bool support_ddr5;
223 	/* SAD device BDF */
224 	struct pci_bdf sad_all_bdf;
225 	/* PCU device BDF */
226 	struct pci_bdf pcu_cr3_bdf;
227 	/* UTIL device BDF */
228 	struct pci_bdf util_all_bdf;
229 	/* URACU device BDF */
230 	struct pci_bdf uracu_bdf;
231 	/* DDR mdev device BDF */
232 	struct pci_bdf ddr_mdev_bdf;
233 	/* HBM mdev device BDF */
234 	struct pci_bdf hbm_mdev_bdf;
235 	int sad_all_offset;
236 	/* Offsets of retry_rd_err_log registers */
237 	u32 *offsets_scrub;
238 	u32 *offsets_scrub_hbm0;
239 	u32 *offsets_scrub_hbm1;
240 	u32 *offsets_demand;
241 	u32 *offsets_demand2;
242 	u32 *offsets_demand_hbm0;
243 	u32 *offsets_demand_hbm1;
244 };
245 
246 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci,
247 				 struct res_config *cfg);
248 typedef bool (*skx_decode_f)(struct decoded_addr *res);
249 typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err);
250 
251 int skx_adxl_get(void);
252 void skx_adxl_put(void);
253 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log);
254 void skx_set_mem_cfg(bool mem_cfg_2lm);
255 void skx_set_res_cfg(struct res_config *cfg);
256 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc);
257 
258 int skx_get_src_id(struct skx_dev *d, int off, u8 *id);
259 int skx_get_node_id(struct skx_dev *d, u8 *id);
260 
261 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list);
262 
263 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm);
264 
265 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm,
266 		      struct skx_imc *imc, int chan, int dimmno,
267 		      struct res_config *cfg);
268 
269 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
270 			int chan, int dimmno, const char *mod_str);
271 
272 int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev,
273 		     const char *ctl_name, const char *mod_str,
274 		     get_dimm_config_f get_dimm_config,
275 		     struct res_config *cfg);
276 
277 int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
278 			void *data);
279 
280 void skx_remove(void);
281 
282 #endif /* _SKX_COMM_EDAC_H */
283