1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 */ 5 #include <linux/bitfield.h> 6 #include <linux/bitops.h> 7 #include <linux/edac.h> 8 #include <linux/of_irq.h> 9 #include <linux/platform_device.h> 10 #include <linux/spinlock.h> 11 #include "edac_module.h" 12 13 /* Registers Offset */ 14 #define AL_MC_ECC_CFG 0x70 15 #define AL_MC_ECC_CLEAR 0x7c 16 #define AL_MC_ECC_ERR_COUNT 0x80 17 #define AL_MC_ECC_CE_ADDR0 0x84 18 #define AL_MC_ECC_CE_ADDR1 0x88 19 #define AL_MC_ECC_UE_ADDR0 0xa4 20 #define AL_MC_ECC_UE_ADDR1 0xa8 21 #define AL_MC_ECC_CE_SYND0 0x8c 22 #define AL_MC_ECC_CE_SYND1 0x90 23 #define AL_MC_ECC_CE_SYND2 0x94 24 #define AL_MC_ECC_UE_SYND0 0xac 25 #define AL_MC_ECC_UE_SYND1 0xb0 26 #define AL_MC_ECC_UE_SYND2 0xb4 27 28 /* Registers Fields */ 29 #define AL_MC_ECC_CFG_SCRUB_DISABLED BIT(4) 30 31 #define AL_MC_ECC_CLEAR_UE_COUNT BIT(3) 32 #define AL_MC_ECC_CLEAR_CE_COUNT BIT(2) 33 #define AL_MC_ECC_CLEAR_UE_ERR BIT(1) 34 #define AL_MC_ECC_CLEAR_CE_ERR BIT(0) 35 36 #define AL_MC_ECC_ERR_COUNT_UE GENMASK(31, 16) 37 #define AL_MC_ECC_ERR_COUNT_CE GENMASK(15, 0) 38 39 #define AL_MC_ECC_CE_ADDR0_RANK GENMASK(25, 24) 40 #define AL_MC_ECC_CE_ADDR0_ROW GENMASK(17, 0) 41 42 #define AL_MC_ECC_CE_ADDR1_BG GENMASK(25, 24) 43 #define AL_MC_ECC_CE_ADDR1_BANK GENMASK(18, 16) 44 #define AL_MC_ECC_CE_ADDR1_COLUMN GENMASK(11, 0) 45 46 #define AL_MC_ECC_UE_ADDR0_RANK GENMASK(25, 24) 47 #define AL_MC_ECC_UE_ADDR0_ROW GENMASK(17, 0) 48 49 #define AL_MC_ECC_UE_ADDR1_BG GENMASK(25, 24) 50 #define AL_MC_ECC_UE_ADDR1_BANK GENMASK(18, 16) 51 #define AL_MC_ECC_UE_ADDR1_COLUMN GENMASK(11, 0) 52 53 #define DRV_NAME "al_mc_edac" 54 #define AL_MC_EDAC_MSG_MAX 256 55 56 struct al_mc_edac { 57 void __iomem *mmio_base; 58 spinlock_t lock; 59 int irq_ce; 60 int irq_ue; 61 }; 62 63 static void prepare_msg(char *message, size_t buffer_size, 64 enum hw_event_mc_err_type type, 65 u8 rank, u32 row, u8 bg, u8 bank, u16 column, 66 u32 syn0, u32 syn1, u32 syn2) 67 { 68 snprintf(message, buffer_size, 69 "%s rank=0x%x row=0x%x bg=0x%x bank=0x%x col=0x%x syn0: 0x%x syn1: 0x%x syn2: 0x%x", 70 type == HW_EVENT_ERR_UNCORRECTED ? "UE" : "CE", 71 rank, row, bg, bank, column, syn0, syn1, syn2); 72 } 73 74 static int handle_ce(struct mem_ctl_info *mci) 75 { 76 u32 eccerrcnt, ecccaddr0, ecccaddr1, ecccsyn0, ecccsyn1, ecccsyn2, row; 77 struct al_mc_edac *al_mc = mci->pvt_info; 78 char msg[AL_MC_EDAC_MSG_MAX]; 79 u16 ce_count, column; 80 unsigned long flags; 81 u8 rank, bg, bank; 82 83 eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT); 84 ce_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_CE, eccerrcnt); 85 if (!ce_count) 86 return 0; 87 88 ecccaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR0); 89 ecccaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_ADDR1); 90 ecccsyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND0); 91 ecccsyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND1); 92 ecccsyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_CE_SYND2); 93 94 writel_relaxed(AL_MC_ECC_CLEAR_CE_COUNT | AL_MC_ECC_CLEAR_CE_ERR, 95 al_mc->mmio_base + AL_MC_ECC_CLEAR); 96 97 dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n", 98 ecccaddr0, ecccaddr1); 99 100 rank = FIELD_GET(AL_MC_ECC_CE_ADDR0_RANK, ecccaddr0); 101 row = FIELD_GET(AL_MC_ECC_CE_ADDR0_ROW, ecccaddr0); 102 103 bg = FIELD_GET(AL_MC_ECC_CE_ADDR1_BG, ecccaddr1); 104 bank = FIELD_GET(AL_MC_ECC_CE_ADDR1_BANK, ecccaddr1); 105 column = FIELD_GET(AL_MC_ECC_CE_ADDR1_COLUMN, ecccaddr1); 106 107 prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_CORRECTED, 108 rank, row, bg, bank, column, 109 ecccsyn0, ecccsyn1, ecccsyn2); 110 111 spin_lock_irqsave(&al_mc->lock, flags); 112 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 113 ce_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg); 114 spin_unlock_irqrestore(&al_mc->lock, flags); 115 116 return ce_count; 117 } 118 119 static int handle_ue(struct mem_ctl_info *mci) 120 { 121 u32 eccerrcnt, eccuaddr0, eccuaddr1, eccusyn0, eccusyn1, eccusyn2, row; 122 struct al_mc_edac *al_mc = mci->pvt_info; 123 char msg[AL_MC_EDAC_MSG_MAX]; 124 u16 ue_count, column; 125 unsigned long flags; 126 u8 rank, bg, bank; 127 128 eccerrcnt = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_ERR_COUNT); 129 ue_count = FIELD_GET(AL_MC_ECC_ERR_COUNT_UE, eccerrcnt); 130 if (!ue_count) 131 return 0; 132 133 eccuaddr0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR0); 134 eccuaddr1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_ADDR1); 135 eccusyn0 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND0); 136 eccusyn1 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND1); 137 eccusyn2 = readl_relaxed(al_mc->mmio_base + AL_MC_ECC_UE_SYND2); 138 139 writel_relaxed(AL_MC_ECC_CLEAR_UE_COUNT | AL_MC_ECC_CLEAR_UE_ERR, 140 al_mc->mmio_base + AL_MC_ECC_CLEAR); 141 142 dev_dbg(mci->pdev, "eccuaddr0=0x%08x eccuaddr1=0x%08x\n", 143 eccuaddr0, eccuaddr1); 144 145 rank = FIELD_GET(AL_MC_ECC_UE_ADDR0_RANK, eccuaddr0); 146 row = FIELD_GET(AL_MC_ECC_UE_ADDR0_ROW, eccuaddr0); 147 148 bg = FIELD_GET(AL_MC_ECC_UE_ADDR1_BG, eccuaddr1); 149 bank = FIELD_GET(AL_MC_ECC_UE_ADDR1_BANK, eccuaddr1); 150 column = FIELD_GET(AL_MC_ECC_UE_ADDR1_COLUMN, eccuaddr1); 151 152 prepare_msg(msg, sizeof(msg), HW_EVENT_ERR_UNCORRECTED, 153 rank, row, bg, bank, column, 154 eccusyn0, eccusyn1, eccusyn2); 155 156 spin_lock_irqsave(&al_mc->lock, flags); 157 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 158 ue_count, 0, 0, 0, 0, 0, -1, mci->ctl_name, msg); 159 spin_unlock_irqrestore(&al_mc->lock, flags); 160 161 return ue_count; 162 } 163 164 static void al_mc_edac_check(struct mem_ctl_info *mci) 165 { 166 struct al_mc_edac *al_mc = mci->pvt_info; 167 168 if (al_mc->irq_ue <= 0) 169 handle_ue(mci); 170 171 if (al_mc->irq_ce <= 0) 172 handle_ce(mci); 173 } 174 175 static irqreturn_t al_mc_edac_irq_handler_ue(int irq, void *info) 176 { 177 struct platform_device *pdev = info; 178 struct mem_ctl_info *mci = platform_get_drvdata(pdev); 179 180 if (handle_ue(mci)) 181 return IRQ_HANDLED; 182 return IRQ_NONE; 183 } 184 185 static irqreturn_t al_mc_edac_irq_handler_ce(int irq, void *info) 186 { 187 struct platform_device *pdev = info; 188 struct mem_ctl_info *mci = platform_get_drvdata(pdev); 189 190 if (handle_ce(mci)) 191 return IRQ_HANDLED; 192 return IRQ_NONE; 193 } 194 195 static enum scrub_type get_scrub_mode(void __iomem *mmio_base) 196 { 197 u32 ecccfg0; 198 199 ecccfg0 = readl(mmio_base + AL_MC_ECC_CFG); 200 201 if (FIELD_GET(AL_MC_ECC_CFG_SCRUB_DISABLED, ecccfg0)) 202 return SCRUB_NONE; 203 else 204 return SCRUB_HW_SRC; 205 } 206 207 static void devm_al_mc_edac_free(void *data) 208 { 209 edac_mc_free(data); 210 } 211 212 static void devm_al_mc_edac_del(void *data) 213 { 214 edac_mc_del_mc(data); 215 } 216 217 static int al_mc_edac_probe(struct platform_device *pdev) 218 { 219 struct edac_mc_layer layers[1]; 220 struct mem_ctl_info *mci; 221 struct al_mc_edac *al_mc; 222 void __iomem *mmio_base; 223 struct dimm_info *dimm; 224 int ret; 225 226 mmio_base = devm_platform_ioremap_resource(pdev, 0); 227 if (IS_ERR(mmio_base)) { 228 dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n", 229 PTR_ERR(mmio_base)); 230 return PTR_ERR(mmio_base); 231 } 232 233 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; 234 layers[0].size = 1; 235 layers[0].is_virt_csrow = false; 236 mci = edac_mc_alloc(0, ARRAY_SIZE(layers), layers, 237 sizeof(struct al_mc_edac)); 238 if (!mci) 239 return -ENOMEM; 240 241 ret = devm_add_action(&pdev->dev, devm_al_mc_edac_free, mci); 242 if (ret) { 243 edac_mc_free(mci); 244 return ret; 245 } 246 247 platform_set_drvdata(pdev, mci); 248 al_mc = mci->pvt_info; 249 250 al_mc->mmio_base = mmio_base; 251 252 al_mc->irq_ue = of_irq_get_byname(pdev->dev.of_node, "ue"); 253 if (al_mc->irq_ue <= 0) 254 dev_dbg(&pdev->dev, 255 "no IRQ defined for UE - falling back to polling\n"); 256 257 al_mc->irq_ce = of_irq_get_byname(pdev->dev.of_node, "ce"); 258 if (al_mc->irq_ce <= 0) 259 dev_dbg(&pdev->dev, 260 "no IRQ defined for CE - falling back to polling\n"); 261 262 /* 263 * In case both interrupts (ue/ce) are to be found, use interrupt mode. 264 * In case none of the interrupt are foud, use polling mode. 265 * In case only one interrupt is found, use interrupt mode for it but 266 * keep polling mode enable for the other. 267 */ 268 if (al_mc->irq_ue <= 0 || al_mc->irq_ce <= 0) { 269 edac_op_state = EDAC_OPSTATE_POLL; 270 mci->edac_check = al_mc_edac_check; 271 } else { 272 edac_op_state = EDAC_OPSTATE_INT; 273 } 274 275 spin_lock_init(&al_mc->lock); 276 277 mci->mtype_cap = MEM_FLAG_DDR3 | MEM_FLAG_DDR4; 278 mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; 279 mci->edac_cap = EDAC_FLAG_SECDED; 280 mci->mod_name = DRV_NAME; 281 mci->ctl_name = "al_mc"; 282 mci->pdev = &pdev->dev; 283 mci->scrub_mode = get_scrub_mode(mmio_base); 284 285 dimm = *mci->dimms; 286 dimm->grain = 1; 287 288 ret = edac_mc_add_mc(mci); 289 if (ret < 0) { 290 dev_err(&pdev->dev, 291 "fail to add memory controller device (%d)\n", 292 ret); 293 return ret; 294 } 295 296 ret = devm_add_action(&pdev->dev, devm_al_mc_edac_del, &pdev->dev); 297 if (ret) { 298 edac_mc_del_mc(&pdev->dev); 299 return ret; 300 } 301 302 if (al_mc->irq_ue > 0) { 303 ret = devm_request_irq(&pdev->dev, 304 al_mc->irq_ue, 305 al_mc_edac_irq_handler_ue, 306 IRQF_SHARED, 307 pdev->name, 308 pdev); 309 if (ret != 0) { 310 dev_err(&pdev->dev, 311 "failed to request UE IRQ %d (%d)\n", 312 al_mc->irq_ue, ret); 313 return ret; 314 } 315 } 316 317 if (al_mc->irq_ce > 0) { 318 ret = devm_request_irq(&pdev->dev, 319 al_mc->irq_ce, 320 al_mc_edac_irq_handler_ce, 321 IRQF_SHARED, 322 pdev->name, 323 pdev); 324 if (ret != 0) { 325 dev_err(&pdev->dev, 326 "failed to request CE IRQ %d (%d)\n", 327 al_mc->irq_ce, ret); 328 return ret; 329 } 330 } 331 332 return 0; 333 } 334 335 static const struct of_device_id al_mc_edac_of_match[] = { 336 { .compatible = "amazon,al-mc-edac", }, 337 {}, 338 }; 339 340 MODULE_DEVICE_TABLE(of, al_mc_edac_of_match); 341 342 static struct platform_driver al_mc_edac_driver = { 343 .probe = al_mc_edac_probe, 344 .driver = { 345 .name = DRV_NAME, 346 .of_match_table = al_mc_edac_of_match, 347 }, 348 }; 349 350 module_platform_driver(al_mc_edac_driver); 351 352 MODULE_LICENSE("GPL v2"); 353 MODULE_AUTHOR("Talel Shenhar"); 354 MODULE_DESCRIPTION("Amazon's Annapurna Lab's Memory Controller EDAC Driver"); 355