1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Common codes for both the skx_edac driver and Intel 10nm server EDAC driver. 4 * Originally split out from the skx_edac driver. 5 * 6 * Copyright (c) 2018, Intel Corporation. 7 */ 8 9 #ifndef _SKX_COMM_EDAC_H 10 #define _SKX_COMM_EDAC_H 11 12 #include <linux/bits.h> 13 #include <asm/mce.h> 14 15 #define MSG_SIZE 1024 16 17 /* 18 * Debug macros 19 */ 20 #define skx_printk(level, fmt, arg...) \ 21 edac_printk(level, "skx", fmt, ##arg) 22 23 #define skx_mc_printk(mci, level, fmt, arg...) \ 24 edac_mc_chipset_printk(mci, level, "skx", fmt, ##arg) 25 26 /* 27 * Get a bit field at register value <v>, from bit <lo> to bit <hi> 28 */ 29 #define GET_BITFIELD(v, lo, hi) \ 30 (((v) & GENMASK_ULL((hi), (lo))) >> (lo)) 31 32 #define SKX_NUM_IMC 2 /* Memory controllers per socket */ 33 #define SKX_NUM_CHANNELS 3 /* Channels per memory controller */ 34 #define SKX_NUM_DIMMS 2 /* Max DIMMS per channel */ 35 36 #define I10NM_NUM_DDR_IMC 12 37 #define I10NM_NUM_DDR_CHANNELS 2 38 #define I10NM_NUM_DDR_DIMMS 2 39 40 #define I10NM_NUM_HBM_IMC 16 41 #define I10NM_NUM_HBM_CHANNELS 2 42 #define I10NM_NUM_HBM_DIMMS 1 43 44 #define I10NM_NUM_IMC (I10NM_NUM_DDR_IMC + I10NM_NUM_HBM_IMC) 45 #define I10NM_NUM_CHANNELS MAX(I10NM_NUM_DDR_CHANNELS, I10NM_NUM_HBM_CHANNELS) 46 #define I10NM_NUM_DIMMS MAX(I10NM_NUM_DDR_DIMMS, I10NM_NUM_HBM_DIMMS) 47 48 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 49 #define NUM_IMC MAX(SKX_NUM_IMC, I10NM_NUM_IMC) 50 #define NUM_CHANNELS MAX(SKX_NUM_CHANNELS, I10NM_NUM_CHANNELS) 51 #define NUM_DIMMS MAX(SKX_NUM_DIMMS, I10NM_NUM_DIMMS) 52 53 #define IS_DIMM_PRESENT(r) GET_BITFIELD(r, 15, 15) 54 #define IS_NVDIMM_PRESENT(r, i) GET_BITFIELD(r, i, i) 55 56 #define MCI_MISC_ECC_MODE(m) (((m) >> 59) & 15) 57 #define MCI_MISC_ECC_DDRT 8 /* read from DDRT */ 58 59 /* 60 * According to Intel Architecture spec vol 3B, 61 * Table 15-10 "IA32_MCi_Status [15:0] Compound Error Code Encoding" 62 * memory errors should fit one of these masks: 63 * 000f 0000 1mmm cccc (binary) 64 * 000f 0010 1mmm cccc (binary) [RAM used as cache] 65 * where: 66 * f = Correction Report Filtering Bit. If 1, subsequent errors 67 * won't be shown 68 * mmm = error type 69 * cccc = channel 70 */ 71 #define MCACOD_MEM_ERR_MASK 0xef80 72 /* 73 * Errors from either the memory of the 1-level memory system or the 74 * 2nd level memory (the slow "far" memory) of the 2-level memory system. 75 */ 76 #define MCACOD_MEM_CTL_ERR 0x80 77 /* 78 * Errors from the 1st level memory (the fast "near" memory as cache) 79 * of the 2-level memory system. 80 */ 81 #define MCACOD_EXT_MEM_ERR 0x280 82 83 /* Max RRL register sets per {,sub-,pseudo-}channel. */ 84 #define NUM_RRL_SET 3 85 86 /* 87 * Each cpu socket contains some pci devices that provide global 88 * information, and also some that are local to each of the two 89 * memory controllers on the die. 90 */ 91 struct skx_dev { 92 struct list_head list; 93 u8 bus[4]; 94 int seg; 95 struct pci_dev *sad_all; 96 struct pci_dev *util_all; 97 struct pci_dev *uracu; /* for i10nm CPU */ 98 struct pci_dev *pcu_cr3; /* for HBM memory detection */ 99 u32 mcroute; 100 /* 101 * Some server BIOS may hide certain memory controllers, and the 102 * EDAC driver skips those hidden memory controllers. However, the 103 * ADXL still decodes memory error address using physical memory 104 * controller indices. The mapping table is used to convert the 105 * physical indices (reported by ADXL) to the logical indices 106 * (used the EDAC driver) of present memory controllers during the 107 * error handling process. 108 */ 109 u8 mc_mapping[NUM_IMC]; 110 struct skx_imc { 111 struct mem_ctl_info *mci; 112 struct pci_dev *mdev; /* for i10nm CPU */ 113 void __iomem *mbase; /* for i10nm CPU */ 114 int chan_mmio_sz; /* for i10nm CPU */ 115 int num_channels; /* channels per memory controller */ 116 int num_dimms; /* dimms per channel */ 117 bool hbm_mc; 118 u8 mc; /* system wide mc# */ 119 u8 lmc; /* socket relative mc# */ 120 u8 src_id, node_id; 121 struct skx_channel { 122 struct pci_dev *cdev; 123 struct pci_dev *edev; 124 /* 125 * Two groups of RRL control registers per channel to save default RRL 126 * settings of two {sub-,pseudo-}channels in Linux RRL control mode. 127 */ 128 u32 rrl_ctl[2][NUM_RRL_SET]; 129 struct skx_dimm { 130 u8 close_pg; 131 u8 bank_xor_enable; 132 u8 fine_grain_bank; 133 u8 rowbits; 134 u8 colbits; 135 } dimms[NUM_DIMMS]; 136 } chan[NUM_CHANNELS]; 137 } imc[NUM_IMC]; 138 }; 139 140 struct skx_pvt { 141 struct skx_imc *imc; 142 }; 143 144 enum type { 145 SKX, 146 I10NM, 147 SPR, 148 GNR 149 }; 150 151 enum { 152 INDEX_SOCKET, 153 INDEX_MEMCTRL, 154 INDEX_CHANNEL, 155 INDEX_DIMM, 156 INDEX_CS, 157 INDEX_NM_FIRST, 158 INDEX_NM_MEMCTRL = INDEX_NM_FIRST, 159 INDEX_NM_CHANNEL, 160 INDEX_NM_DIMM, 161 INDEX_NM_CS, 162 INDEX_MAX 163 }; 164 165 enum error_source { 166 ERR_SRC_1LM, 167 ERR_SRC_2LM_NM, 168 ERR_SRC_2LM_FM, 169 ERR_SRC_NOT_MEMORY, 170 }; 171 172 #define BIT_NM_MEMCTRL BIT_ULL(INDEX_NM_MEMCTRL) 173 #define BIT_NM_CHANNEL BIT_ULL(INDEX_NM_CHANNEL) 174 #define BIT_NM_DIMM BIT_ULL(INDEX_NM_DIMM) 175 #define BIT_NM_CS BIT_ULL(INDEX_NM_CS) 176 177 struct decoded_addr { 178 struct mce *mce; 179 struct skx_dev *dev; 180 u64 addr; 181 int socket; 182 int imc; 183 int channel; 184 u64 chan_addr; 185 int sktways; 186 int chanways; 187 int dimm; 188 int cs; 189 int rank; 190 int channel_rank; 191 u64 rank_address; 192 int row; 193 int column; 194 int bank_address; 195 int bank_group; 196 bool decoded_by_adxl; 197 }; 198 199 struct pci_bdf { 200 u32 bus : 8; 201 u32 dev : 5; 202 u32 fun : 3; 203 }; 204 205 struct res_config { 206 enum type type; 207 /* Configuration agent device ID */ 208 unsigned int decs_did; 209 /* Default bus number configuration register offset */ 210 int busno_cfg_offset; 211 /* DDR memory controllers per socket */ 212 int ddr_imc_num; 213 /* DDR channels per DDR memory controller */ 214 int ddr_chan_num; 215 /* DDR DIMMs per DDR memory channel */ 216 int ddr_dimm_num; 217 /* Per DDR channel memory-mapped I/O size */ 218 int ddr_chan_mmio_sz; 219 /* HBM memory controllers per socket */ 220 int hbm_imc_num; 221 /* HBM channels per HBM memory controller */ 222 int hbm_chan_num; 223 /* HBM DIMMs per HBM memory channel */ 224 int hbm_dimm_num; 225 /* Per HBM channel memory-mapped I/O size */ 226 int hbm_chan_mmio_sz; 227 bool support_ddr5; 228 /* SAD device BDF */ 229 struct pci_bdf sad_all_bdf; 230 /* PCU device BDF */ 231 struct pci_bdf pcu_cr3_bdf; 232 /* UTIL device BDF */ 233 struct pci_bdf util_all_bdf; 234 /* URACU device BDF */ 235 struct pci_bdf uracu_bdf; 236 /* DDR mdev device BDF */ 237 struct pci_bdf ddr_mdev_bdf; 238 /* HBM mdev device BDF */ 239 struct pci_bdf hbm_mdev_bdf; 240 int sad_all_offset; 241 /* Offsets of retry_rd_err_log registers */ 242 u32 *offsets_scrub; 243 u32 *offsets_scrub_hbm0; 244 u32 *offsets_scrub_hbm1; 245 u32 *offsets_demand; 246 u32 *offsets_demand2; 247 u32 *offsets_demand_hbm0; 248 u32 *offsets_demand_hbm1; 249 }; 250 251 typedef int (*get_dimm_config_f)(struct mem_ctl_info *mci, 252 struct res_config *cfg); 253 typedef bool (*skx_decode_f)(struct decoded_addr *res); 254 typedef void (*skx_show_retry_log_f)(struct decoded_addr *res, char *msg, int len, bool scrub_err); 255 256 int skx_adxl_get(void); 257 void skx_adxl_put(void); 258 void skx_set_decode(skx_decode_f decode, skx_show_retry_log_f show_retry_log); 259 void skx_set_mem_cfg(bool mem_cfg_2lm); 260 void skx_set_res_cfg(struct res_config *cfg); 261 void skx_set_mc_mapping(struct skx_dev *d, u8 pmc, u8 lmc); 262 263 int skx_get_src_id(struct skx_dev *d, int off, u8 *id); 264 int skx_get_node_id(struct skx_dev *d, u8 *id); 265 266 int skx_get_all_bus_mappings(struct res_config *cfg, struct list_head **list); 267 268 int skx_get_hi_lo(unsigned int did, int off[], u64 *tolm, u64 *tohm); 269 270 int skx_get_dimm_info(u32 mtr, u32 mcmtr, u32 amap, struct dimm_info *dimm, 271 struct skx_imc *imc, int chan, int dimmno, 272 struct res_config *cfg); 273 274 int skx_get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc, 275 int chan, int dimmno, const char *mod_str); 276 277 int skx_register_mci(struct skx_imc *imc, struct pci_dev *pdev, 278 const char *ctl_name, const char *mod_str, 279 get_dimm_config_f get_dimm_config, 280 struct res_config *cfg); 281 282 int skx_mce_check_error(struct notifier_block *nb, unsigned long val, 283 void *data); 284 285 void skx_remove(void); 286 287 #endif /* _SKX_COMM_EDAC_H */ 288