1 /* 2 * APM X-Gene SoC EDAC (error detection and correction) 3 * 4 * Copyright (c) 2015, Applied Micro Circuits Corporation 5 * Author: Feng Kan <fkan@apm.com> 6 * Loc Ho <lho@apm.com> 7 * 8 * This program is free software; you can redistribute it and/or modify it 9 * under the terms of the GNU General Public License as published by the 10 * Free Software Foundation; either version 2 of the License, or (at your 11 * option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program. If not, see <http://www.gnu.org/licenses/>. 20 */ 21 22 #include <linux/ctype.h> 23 #include <linux/edac.h> 24 #include <linux/interrupt.h> 25 #include <linux/mfd/syscon.h> 26 #include <linux/module.h> 27 #include <linux/of.h> 28 #include <linux/of_address.h> 29 #include <linux/regmap.h> 30 31 #include "edac_core.h" 32 33 #define EDAC_MOD_STR "xgene_edac" 34 35 /* Global error configuration status registers (CSR) */ 36 #define PCPHPERRINTSTS 0x0000 37 #define PCPHPERRINTMSK 0x0004 38 #define MCU_CTL_ERR_MASK BIT(12) 39 #define IOB_PA_ERR_MASK BIT(11) 40 #define IOB_BA_ERR_MASK BIT(10) 41 #define IOB_XGIC_ERR_MASK BIT(9) 42 #define IOB_RB_ERR_MASK BIT(8) 43 #define L3C_UNCORR_ERR_MASK BIT(5) 44 #define MCU_UNCORR_ERR_MASK BIT(4) 45 #define PMD3_MERR_MASK BIT(3) 46 #define PMD2_MERR_MASK BIT(2) 47 #define PMD1_MERR_MASK BIT(1) 48 #define PMD0_MERR_MASK BIT(0) 49 #define PCPLPERRINTSTS 0x0008 50 #define PCPLPERRINTMSK 0x000C 51 #define CSW_SWITCH_TRACE_ERR_MASK BIT(2) 52 #define L3C_CORR_ERR_MASK BIT(1) 53 #define MCU_CORR_ERR_MASK BIT(0) 54 #define MEMERRINTSTS 0x0010 55 #define MEMERRINTMSK 0x0014 56 57 struct xgene_edac { 58 struct device *dev; 59 struct regmap *csw_map; 60 struct regmap *mcba_map; 61 struct regmap *mcbb_map; 62 struct regmap *efuse_map; 63 void __iomem *pcp_csr; 64 spinlock_t lock; 65 struct dentry *dfs; 66 67 struct list_head mcus; 68 struct list_head pmds; 69 70 struct mutex mc_lock; 71 int mc_active_mask; 72 int mc_registered_mask; 73 }; 74 75 static void xgene_edac_pcp_rd(struct xgene_edac *edac, u32 reg, u32 *val) 76 { 77 *val = readl(edac->pcp_csr + reg); 78 } 79 80 static void xgene_edac_pcp_clrbits(struct xgene_edac *edac, u32 reg, 81 u32 bits_mask) 82 { 83 u32 val; 84 85 spin_lock(&edac->lock); 86 val = readl(edac->pcp_csr + reg); 87 val &= ~bits_mask; 88 writel(val, edac->pcp_csr + reg); 89 spin_unlock(&edac->lock); 90 } 91 92 static void xgene_edac_pcp_setbits(struct xgene_edac *edac, u32 reg, 93 u32 bits_mask) 94 { 95 u32 val; 96 97 spin_lock(&edac->lock); 98 val = readl(edac->pcp_csr + reg); 99 val |= bits_mask; 100 writel(val, edac->pcp_csr + reg); 101 spin_unlock(&edac->lock); 102 } 103 104 /* Memory controller error CSR */ 105 #define MCU_MAX_RANK 8 106 #define MCU_RANK_STRIDE 0x40 107 108 #define MCUGECR 0x0110 109 #define MCU_GECR_DEMANDUCINTREN_MASK BIT(0) 110 #define MCU_GECR_BACKUCINTREN_MASK BIT(1) 111 #define MCU_GECR_CINTREN_MASK BIT(2) 112 #define MUC_GECR_MCUADDRERREN_MASK BIT(9) 113 #define MCUGESR 0x0114 114 #define MCU_GESR_ADDRNOMATCH_ERR_MASK BIT(7) 115 #define MCU_GESR_ADDRMULTIMATCH_ERR_MASK BIT(6) 116 #define MCU_GESR_PHYP_ERR_MASK BIT(3) 117 #define MCUESRR0 0x0314 118 #define MCU_ESRR_MULTUCERR_MASK BIT(3) 119 #define MCU_ESRR_BACKUCERR_MASK BIT(2) 120 #define MCU_ESRR_DEMANDUCERR_MASK BIT(1) 121 #define MCU_ESRR_CERR_MASK BIT(0) 122 #define MCUESRRA0 0x0318 123 #define MCUEBLRR0 0x031c 124 #define MCU_EBLRR_ERRBANK_RD(src) (((src) & 0x00000007) >> 0) 125 #define MCUERCRR0 0x0320 126 #define MCU_ERCRR_ERRROW_RD(src) (((src) & 0xFFFF0000) >> 16) 127 #define MCU_ERCRR_ERRCOL_RD(src) ((src) & 0x00000FFF) 128 #define MCUSBECNT0 0x0324 129 #define MCU_SBECNT_COUNT(src) ((src) & 0xFFFF) 130 131 #define CSW_CSWCR 0x0000 132 #define CSW_CSWCR_DUALMCB_MASK BIT(0) 133 134 #define MCBADDRMR 0x0000 135 #define MCBADDRMR_MCU_INTLV_MODE_MASK BIT(3) 136 #define MCBADDRMR_DUALMCU_MODE_MASK BIT(2) 137 #define MCBADDRMR_MCB_INTLV_MODE_MASK BIT(1) 138 #define MCBADDRMR_ADDRESS_MODE_MASK BIT(0) 139 140 struct xgene_edac_mc_ctx { 141 struct list_head next; 142 char *name; 143 struct mem_ctl_info *mci; 144 struct xgene_edac *edac; 145 void __iomem *mcu_csr; 146 u32 mcu_id; 147 }; 148 149 static ssize_t xgene_edac_mc_err_inject_write(struct file *file, 150 const char __user *data, 151 size_t count, loff_t *ppos) 152 { 153 struct mem_ctl_info *mci = file->private_data; 154 struct xgene_edac_mc_ctx *ctx = mci->pvt_info; 155 int i; 156 157 for (i = 0; i < MCU_MAX_RANK; i++) { 158 writel(MCU_ESRR_MULTUCERR_MASK | MCU_ESRR_BACKUCERR_MASK | 159 MCU_ESRR_DEMANDUCERR_MASK | MCU_ESRR_CERR_MASK, 160 ctx->mcu_csr + MCUESRRA0 + i * MCU_RANK_STRIDE); 161 } 162 return count; 163 } 164 165 static const struct file_operations xgene_edac_mc_debug_inject_fops = { 166 .open = simple_open, 167 .write = xgene_edac_mc_err_inject_write, 168 .llseek = generic_file_llseek, 169 }; 170 171 static void xgene_edac_mc_create_debugfs_node(struct mem_ctl_info *mci) 172 { 173 if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) 174 return; 175 #ifdef CONFIG_EDAC_DEBUG 176 if (!mci->debugfs) 177 return; 178 debugfs_create_file("inject_ctrl", S_IWUSR, mci->debugfs, mci, 179 &xgene_edac_mc_debug_inject_fops); 180 #endif 181 } 182 183 static void xgene_edac_mc_check(struct mem_ctl_info *mci) 184 { 185 struct xgene_edac_mc_ctx *ctx = mci->pvt_info; 186 unsigned int pcp_hp_stat; 187 unsigned int pcp_lp_stat; 188 u32 reg; 189 u32 rank; 190 u32 bank; 191 u32 count; 192 u32 col_row; 193 194 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat); 195 xgene_edac_pcp_rd(ctx->edac, PCPLPERRINTSTS, &pcp_lp_stat); 196 if (!((MCU_UNCORR_ERR_MASK & pcp_hp_stat) || 197 (MCU_CTL_ERR_MASK & pcp_hp_stat) || 198 (MCU_CORR_ERR_MASK & pcp_lp_stat))) 199 return; 200 201 for (rank = 0; rank < MCU_MAX_RANK; rank++) { 202 reg = readl(ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE); 203 204 /* Detect uncorrectable memory error */ 205 if (reg & (MCU_ESRR_DEMANDUCERR_MASK | 206 MCU_ESRR_BACKUCERR_MASK)) { 207 /* Detected uncorrectable memory error */ 208 edac_mc_chipset_printk(mci, KERN_ERR, "X-Gene", 209 "MCU uncorrectable error at rank %d\n", rank); 210 211 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 212 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, ""); 213 } 214 215 /* Detect correctable memory error */ 216 if (reg & MCU_ESRR_CERR_MASK) { 217 bank = readl(ctx->mcu_csr + MCUEBLRR0 + 218 rank * MCU_RANK_STRIDE); 219 col_row = readl(ctx->mcu_csr + MCUERCRR0 + 220 rank * MCU_RANK_STRIDE); 221 count = readl(ctx->mcu_csr + MCUSBECNT0 + 222 rank * MCU_RANK_STRIDE); 223 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene", 224 "MCU correctable error at rank %d bank %d column %d row %d count %d\n", 225 rank, MCU_EBLRR_ERRBANK_RD(bank), 226 MCU_ERCRR_ERRCOL_RD(col_row), 227 MCU_ERCRR_ERRROW_RD(col_row), 228 MCU_SBECNT_COUNT(count)); 229 230 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 231 1, 0, 0, 0, 0, 0, -1, mci->ctl_name, ""); 232 } 233 234 /* Clear all error registers */ 235 writel(0x0, ctx->mcu_csr + MCUEBLRR0 + rank * MCU_RANK_STRIDE); 236 writel(0x0, ctx->mcu_csr + MCUERCRR0 + rank * MCU_RANK_STRIDE); 237 writel(0x0, ctx->mcu_csr + MCUSBECNT0 + 238 rank * MCU_RANK_STRIDE); 239 writel(reg, ctx->mcu_csr + MCUESRR0 + rank * MCU_RANK_STRIDE); 240 } 241 242 /* Detect memory controller error */ 243 reg = readl(ctx->mcu_csr + MCUGESR); 244 if (reg) { 245 if (reg & MCU_GESR_ADDRNOMATCH_ERR_MASK) 246 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene", 247 "MCU address miss-match error\n"); 248 if (reg & MCU_GESR_ADDRMULTIMATCH_ERR_MASK) 249 edac_mc_chipset_printk(mci, KERN_WARNING, "X-Gene", 250 "MCU address multi-match error\n"); 251 252 writel(reg, ctx->mcu_csr + MCUGESR); 253 } 254 } 255 256 static void xgene_edac_mc_irq_ctl(struct mem_ctl_info *mci, bool enable) 257 { 258 struct xgene_edac_mc_ctx *ctx = mci->pvt_info; 259 unsigned int val; 260 261 if (edac_op_state != EDAC_OPSTATE_INT) 262 return; 263 264 mutex_lock(&ctx->edac->mc_lock); 265 266 /* 267 * As there is only single bit for enable error and interrupt mask, 268 * we must only enable top level interrupt after all MCUs are 269 * registered. Otherwise, if there is an error and the corresponding 270 * MCU has not registered, the interrupt will never get cleared. To 271 * determine all MCU have registered, we will keep track of active 272 * MCUs and registered MCUs. 273 */ 274 if (enable) { 275 /* Set registered MCU bit */ 276 ctx->edac->mc_registered_mask |= 1 << ctx->mcu_id; 277 278 /* Enable interrupt after all active MCU registered */ 279 if (ctx->edac->mc_registered_mask == 280 ctx->edac->mc_active_mask) { 281 /* Enable memory controller top level interrupt */ 282 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK, 283 MCU_UNCORR_ERR_MASK | 284 MCU_CTL_ERR_MASK); 285 xgene_edac_pcp_clrbits(ctx->edac, PCPLPERRINTMSK, 286 MCU_CORR_ERR_MASK); 287 } 288 289 /* Enable MCU interrupt and error reporting */ 290 val = readl(ctx->mcu_csr + MCUGECR); 291 val |= MCU_GECR_DEMANDUCINTREN_MASK | 292 MCU_GECR_BACKUCINTREN_MASK | 293 MCU_GECR_CINTREN_MASK | 294 MUC_GECR_MCUADDRERREN_MASK; 295 writel(val, ctx->mcu_csr + MCUGECR); 296 } else { 297 /* Disable MCU interrupt */ 298 val = readl(ctx->mcu_csr + MCUGECR); 299 val &= ~(MCU_GECR_DEMANDUCINTREN_MASK | 300 MCU_GECR_BACKUCINTREN_MASK | 301 MCU_GECR_CINTREN_MASK | 302 MUC_GECR_MCUADDRERREN_MASK); 303 writel(val, ctx->mcu_csr + MCUGECR); 304 305 /* Disable memory controller top level interrupt */ 306 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK, 307 MCU_UNCORR_ERR_MASK | MCU_CTL_ERR_MASK); 308 xgene_edac_pcp_setbits(ctx->edac, PCPLPERRINTMSK, 309 MCU_CORR_ERR_MASK); 310 311 /* Clear registered MCU bit */ 312 ctx->edac->mc_registered_mask &= ~(1 << ctx->mcu_id); 313 } 314 315 mutex_unlock(&ctx->edac->mc_lock); 316 } 317 318 static int xgene_edac_mc_is_active(struct xgene_edac_mc_ctx *ctx, int mc_idx) 319 { 320 unsigned int reg; 321 u32 mcu_mask; 322 323 if (regmap_read(ctx->edac->csw_map, CSW_CSWCR, ®)) 324 return 0; 325 326 if (reg & CSW_CSWCR_DUALMCB_MASK) { 327 /* 328 * Dual MCB active - Determine if all 4 active or just MCU0 329 * and MCU2 active 330 */ 331 if (regmap_read(ctx->edac->mcbb_map, MCBADDRMR, ®)) 332 return 0; 333 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0xF : 0x5; 334 } else { 335 /* 336 * Single MCB active - Determine if MCU0/MCU1 or just MCU0 337 * active 338 */ 339 if (regmap_read(ctx->edac->mcba_map, MCBADDRMR, ®)) 340 return 0; 341 mcu_mask = (reg & MCBADDRMR_DUALMCU_MODE_MASK) ? 0x3 : 0x1; 342 } 343 344 /* Save active MC mask if hasn't set already */ 345 if (!ctx->edac->mc_active_mask) 346 ctx->edac->mc_active_mask = mcu_mask; 347 348 return (mcu_mask & (1 << mc_idx)) ? 1 : 0; 349 } 350 351 static int xgene_edac_mc_add(struct xgene_edac *edac, struct device_node *np) 352 { 353 struct mem_ctl_info *mci; 354 struct edac_mc_layer layers[2]; 355 struct xgene_edac_mc_ctx tmp_ctx; 356 struct xgene_edac_mc_ctx *ctx; 357 struct resource res; 358 int rc; 359 360 memset(&tmp_ctx, 0, sizeof(tmp_ctx)); 361 tmp_ctx.edac = edac; 362 363 if (!devres_open_group(edac->dev, xgene_edac_mc_add, GFP_KERNEL)) 364 return -ENOMEM; 365 366 rc = of_address_to_resource(np, 0, &res); 367 if (rc < 0) { 368 dev_err(edac->dev, "no MCU resource address\n"); 369 goto err_group; 370 } 371 tmp_ctx.mcu_csr = devm_ioremap_resource(edac->dev, &res); 372 if (IS_ERR(tmp_ctx.mcu_csr)) { 373 dev_err(edac->dev, "unable to map MCU resource\n"); 374 rc = PTR_ERR(tmp_ctx.mcu_csr); 375 goto err_group; 376 } 377 378 /* Ignore non-active MCU */ 379 if (of_property_read_u32(np, "memory-controller", &tmp_ctx.mcu_id)) { 380 dev_err(edac->dev, "no memory-controller property\n"); 381 rc = -ENODEV; 382 goto err_group; 383 } 384 if (!xgene_edac_mc_is_active(&tmp_ctx, tmp_ctx.mcu_id)) { 385 rc = -ENODEV; 386 goto err_group; 387 } 388 389 layers[0].type = EDAC_MC_LAYER_CHIP_SELECT; 390 layers[0].size = 4; 391 layers[0].is_virt_csrow = true; 392 layers[1].type = EDAC_MC_LAYER_CHANNEL; 393 layers[1].size = 2; 394 layers[1].is_virt_csrow = false; 395 mci = edac_mc_alloc(tmp_ctx.mcu_id, ARRAY_SIZE(layers), layers, 396 sizeof(*ctx)); 397 if (!mci) { 398 rc = -ENOMEM; 399 goto err_group; 400 } 401 402 ctx = mci->pvt_info; 403 *ctx = tmp_ctx; /* Copy over resource value */ 404 ctx->name = "xgene_edac_mc_err"; 405 ctx->mci = mci; 406 mci->pdev = &mci->dev; 407 mci->ctl_name = ctx->name; 408 mci->dev_name = ctx->name; 409 410 mci->mtype_cap = MEM_FLAG_RDDR | MEM_FLAG_RDDR2 | MEM_FLAG_RDDR3 | 411 MEM_FLAG_DDR | MEM_FLAG_DDR2 | MEM_FLAG_DDR3; 412 mci->edac_ctl_cap = EDAC_FLAG_SECDED; 413 mci->edac_cap = EDAC_FLAG_SECDED; 414 mci->mod_name = EDAC_MOD_STR; 415 mci->mod_ver = "0.1"; 416 mci->ctl_page_to_phys = NULL; 417 mci->scrub_cap = SCRUB_FLAG_HW_SRC; 418 mci->scrub_mode = SCRUB_HW_SRC; 419 420 if (edac_op_state == EDAC_OPSTATE_POLL) 421 mci->edac_check = xgene_edac_mc_check; 422 423 if (edac_mc_add_mc(mci)) { 424 dev_err(edac->dev, "edac_mc_add_mc failed\n"); 425 rc = -EINVAL; 426 goto err_free; 427 } 428 429 xgene_edac_mc_create_debugfs_node(mci); 430 431 list_add(&ctx->next, &edac->mcus); 432 433 xgene_edac_mc_irq_ctl(mci, true); 434 435 devres_remove_group(edac->dev, xgene_edac_mc_add); 436 437 dev_info(edac->dev, "X-Gene EDAC MC registered\n"); 438 return 0; 439 440 err_free: 441 edac_mc_free(mci); 442 err_group: 443 devres_release_group(edac->dev, xgene_edac_mc_add); 444 return rc; 445 } 446 447 static int xgene_edac_mc_remove(struct xgene_edac_mc_ctx *mcu) 448 { 449 xgene_edac_mc_irq_ctl(mcu->mci, false); 450 edac_mc_del_mc(&mcu->mci->dev); 451 edac_mc_free(mcu->mci); 452 return 0; 453 } 454 455 /* CPU L1/L2 error CSR */ 456 #define MAX_CPU_PER_PMD 2 457 #define CPU_CSR_STRIDE 0x00100000 458 #define CPU_L2C_PAGE 0x000D0000 459 #define CPU_MEMERR_L2C_PAGE 0x000E0000 460 #define CPU_MEMERR_CPU_PAGE 0x000F0000 461 462 #define MEMERR_CPU_ICFECR_PAGE_OFFSET 0x0000 463 #define MEMERR_CPU_ICFESR_PAGE_OFFSET 0x0004 464 #define MEMERR_CPU_ICFESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24) 465 #define MEMERR_CPU_ICFESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16) 466 #define MEMERR_CPU_ICFESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8) 467 #define MEMERR_CPU_ICFESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4) 468 #define MEMERR_CPU_ICFESR_MULTCERR_MASK BIT(2) 469 #define MEMERR_CPU_ICFESR_CERR_MASK BIT(0) 470 #define MEMERR_CPU_LSUESR_PAGE_OFFSET 0x000c 471 #define MEMERR_CPU_LSUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24) 472 #define MEMERR_CPU_LSUESR_ERRINDEX_RD(src) (((src) & 0x003F0000) >> 16) 473 #define MEMERR_CPU_LSUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8) 474 #define MEMERR_CPU_LSUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4) 475 #define MEMERR_CPU_LSUESR_MULTCERR_MASK BIT(2) 476 #define MEMERR_CPU_LSUESR_CERR_MASK BIT(0) 477 #define MEMERR_CPU_LSUECR_PAGE_OFFSET 0x0008 478 #define MEMERR_CPU_MMUECR_PAGE_OFFSET 0x0010 479 #define MEMERR_CPU_MMUESR_PAGE_OFFSET 0x0014 480 #define MEMERR_CPU_MMUESR_ERRWAY_RD(src) (((src) & 0xFF000000) >> 24) 481 #define MEMERR_CPU_MMUESR_ERRINDEX_RD(src) (((src) & 0x007F0000) >> 16) 482 #define MEMERR_CPU_MMUESR_ERRINFO_RD(src) (((src) & 0x0000FF00) >> 8) 483 #define MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK BIT(7) 484 #define MEMERR_CPU_MMUESR_ERRTYPE_RD(src) (((src) & 0x00000070) >> 4) 485 #define MEMERR_CPU_MMUESR_MULTCERR_MASK BIT(2) 486 #define MEMERR_CPU_MMUESR_CERR_MASK BIT(0) 487 #define MEMERR_CPU_ICFESRA_PAGE_OFFSET 0x0804 488 #define MEMERR_CPU_LSUESRA_PAGE_OFFSET 0x080c 489 #define MEMERR_CPU_MMUESRA_PAGE_OFFSET 0x0814 490 491 #define MEMERR_L2C_L2ECR_PAGE_OFFSET 0x0000 492 #define MEMERR_L2C_L2ESR_PAGE_OFFSET 0x0004 493 #define MEMERR_L2C_L2ESR_ERRSYN_RD(src) (((src) & 0xFF000000) >> 24) 494 #define MEMERR_L2C_L2ESR_ERRWAY_RD(src) (((src) & 0x00FC0000) >> 18) 495 #define MEMERR_L2C_L2ESR_ERRCPU_RD(src) (((src) & 0x00020000) >> 17) 496 #define MEMERR_L2C_L2ESR_ERRGROUP_RD(src) (((src) & 0x0000E000) >> 13) 497 #define MEMERR_L2C_L2ESR_ERRACTION_RD(src) (((src) & 0x00001C00) >> 10) 498 #define MEMERR_L2C_L2ESR_ERRTYPE_RD(src) (((src) & 0x00000300) >> 8) 499 #define MEMERR_L2C_L2ESR_MULTUCERR_MASK BIT(3) 500 #define MEMERR_L2C_L2ESR_MULTICERR_MASK BIT(2) 501 #define MEMERR_L2C_L2ESR_UCERR_MASK BIT(1) 502 #define MEMERR_L2C_L2ESR_ERR_MASK BIT(0) 503 #define MEMERR_L2C_L2EALR_PAGE_OFFSET 0x0008 504 #define CPUX_L2C_L2RTOCR_PAGE_OFFSET 0x0010 505 #define MEMERR_L2C_L2EAHR_PAGE_OFFSET 0x000c 506 #define CPUX_L2C_L2RTOSR_PAGE_OFFSET 0x0014 507 #define MEMERR_L2C_L2RTOSR_MULTERR_MASK BIT(1) 508 #define MEMERR_L2C_L2RTOSR_ERR_MASK BIT(0) 509 #define CPUX_L2C_L2RTOALR_PAGE_OFFSET 0x0018 510 #define CPUX_L2C_L2RTOAHR_PAGE_OFFSET 0x001c 511 #define MEMERR_L2C_L2ESRA_PAGE_OFFSET 0x0804 512 513 /* 514 * Processor Module Domain (PMD) context - Context for a pair of processsors. 515 * Each PMD consists of 2 CPUs and a shared L2 cache. Each CPU consists of 516 * its own L1 cache. 517 */ 518 struct xgene_edac_pmd_ctx { 519 struct list_head next; 520 struct device ddev; 521 char *name; 522 struct xgene_edac *edac; 523 struct edac_device_ctl_info *edac_dev; 524 void __iomem *pmd_csr; 525 u32 pmd; 526 int version; 527 }; 528 529 static void xgene_edac_pmd_l1_check(struct edac_device_ctl_info *edac_dev, 530 int cpu_idx) 531 { 532 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; 533 void __iomem *pg_f; 534 u32 val; 535 536 pg_f = ctx->pmd_csr + cpu_idx * CPU_CSR_STRIDE + CPU_MEMERR_CPU_PAGE; 537 538 val = readl(pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET); 539 if (val) { 540 dev_err(edac_dev->dev, 541 "CPU%d L1 memory error ICF 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n", 542 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val, 543 MEMERR_CPU_ICFESR_ERRWAY_RD(val), 544 MEMERR_CPU_ICFESR_ERRINDEX_RD(val), 545 MEMERR_CPU_ICFESR_ERRINFO_RD(val)); 546 if (val & MEMERR_CPU_ICFESR_CERR_MASK) 547 dev_err(edac_dev->dev, 548 "One or more correctable error\n"); 549 if (val & MEMERR_CPU_ICFESR_MULTCERR_MASK) 550 dev_err(edac_dev->dev, "Multiple correctable error\n"); 551 switch (MEMERR_CPU_ICFESR_ERRTYPE_RD(val)) { 552 case 1: 553 dev_err(edac_dev->dev, "L1 TLB multiple hit\n"); 554 break; 555 case 2: 556 dev_err(edac_dev->dev, "Way select multiple hit\n"); 557 break; 558 case 3: 559 dev_err(edac_dev->dev, "Physical tag parity error\n"); 560 break; 561 case 4: 562 case 5: 563 dev_err(edac_dev->dev, "L1 data parity error\n"); 564 break; 565 case 6: 566 dev_err(edac_dev->dev, "L1 pre-decode parity error\n"); 567 break; 568 } 569 570 /* Clear any HW errors */ 571 writel(val, pg_f + MEMERR_CPU_ICFESR_PAGE_OFFSET); 572 573 if (val & (MEMERR_CPU_ICFESR_CERR_MASK | 574 MEMERR_CPU_ICFESR_MULTCERR_MASK)) 575 edac_device_handle_ce(edac_dev, 0, 0, 576 edac_dev->ctl_name); 577 } 578 579 val = readl(pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET); 580 if (val) { 581 dev_err(edac_dev->dev, 582 "CPU%d memory error LSU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X\n", 583 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val, 584 MEMERR_CPU_LSUESR_ERRWAY_RD(val), 585 MEMERR_CPU_LSUESR_ERRINDEX_RD(val), 586 MEMERR_CPU_LSUESR_ERRINFO_RD(val)); 587 if (val & MEMERR_CPU_LSUESR_CERR_MASK) 588 dev_err(edac_dev->dev, 589 "One or more correctable error\n"); 590 if (val & MEMERR_CPU_LSUESR_MULTCERR_MASK) 591 dev_err(edac_dev->dev, "Multiple correctable error\n"); 592 switch (MEMERR_CPU_LSUESR_ERRTYPE_RD(val)) { 593 case 0: 594 dev_err(edac_dev->dev, "Load tag error\n"); 595 break; 596 case 1: 597 dev_err(edac_dev->dev, "Load data error\n"); 598 break; 599 case 2: 600 dev_err(edac_dev->dev, "WSL multihit error\n"); 601 break; 602 case 3: 603 dev_err(edac_dev->dev, "Store tag error\n"); 604 break; 605 case 4: 606 dev_err(edac_dev->dev, 607 "DTB multihit from load pipeline error\n"); 608 break; 609 case 5: 610 dev_err(edac_dev->dev, 611 "DTB multihit from store pipeline error\n"); 612 break; 613 } 614 615 /* Clear any HW errors */ 616 writel(val, pg_f + MEMERR_CPU_LSUESR_PAGE_OFFSET); 617 618 if (val & (MEMERR_CPU_LSUESR_CERR_MASK | 619 MEMERR_CPU_LSUESR_MULTCERR_MASK)) 620 edac_device_handle_ce(edac_dev, 0, 0, 621 edac_dev->ctl_name); 622 } 623 624 val = readl(pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET); 625 if (val) { 626 dev_err(edac_dev->dev, 627 "CPU%d memory error MMU 0x%08X Way 0x%02X Index 0x%02X Info 0x%02X %s\n", 628 ctx->pmd * MAX_CPU_PER_PMD + cpu_idx, val, 629 MEMERR_CPU_MMUESR_ERRWAY_RD(val), 630 MEMERR_CPU_MMUESR_ERRINDEX_RD(val), 631 MEMERR_CPU_MMUESR_ERRINFO_RD(val), 632 val & MEMERR_CPU_MMUESR_ERRREQSTR_LSU_MASK ? "LSU" : 633 "ICF"); 634 if (val & MEMERR_CPU_MMUESR_CERR_MASK) 635 dev_err(edac_dev->dev, 636 "One or more correctable error\n"); 637 if (val & MEMERR_CPU_MMUESR_MULTCERR_MASK) 638 dev_err(edac_dev->dev, "Multiple correctable error\n"); 639 switch (MEMERR_CPU_MMUESR_ERRTYPE_RD(val)) { 640 case 0: 641 dev_err(edac_dev->dev, "Stage 1 UTB hit error\n"); 642 break; 643 case 1: 644 dev_err(edac_dev->dev, "Stage 1 UTB miss error\n"); 645 break; 646 case 2: 647 dev_err(edac_dev->dev, "Stage 1 UTB allocate error\n"); 648 break; 649 case 3: 650 dev_err(edac_dev->dev, 651 "TMO operation single bank error\n"); 652 break; 653 case 4: 654 dev_err(edac_dev->dev, "Stage 2 UTB error\n"); 655 break; 656 case 5: 657 dev_err(edac_dev->dev, "Stage 2 UTB miss error\n"); 658 break; 659 case 6: 660 dev_err(edac_dev->dev, "Stage 2 UTB allocate error\n"); 661 break; 662 case 7: 663 dev_err(edac_dev->dev, 664 "TMO operation multiple bank error\n"); 665 break; 666 } 667 668 /* Clear any HW errors */ 669 writel(val, pg_f + MEMERR_CPU_MMUESR_PAGE_OFFSET); 670 671 edac_device_handle_ce(edac_dev, 0, 0, edac_dev->ctl_name); 672 } 673 } 674 675 static void xgene_edac_pmd_l2_check(struct edac_device_ctl_info *edac_dev) 676 { 677 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; 678 void __iomem *pg_d; 679 void __iomem *pg_e; 680 u32 val_hi; 681 u32 val_lo; 682 u32 val; 683 684 /* Check L2 */ 685 pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE; 686 val = readl(pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET); 687 if (val) { 688 val_lo = readl(pg_e + MEMERR_L2C_L2EALR_PAGE_OFFSET); 689 val_hi = readl(pg_e + MEMERR_L2C_L2EAHR_PAGE_OFFSET); 690 dev_err(edac_dev->dev, 691 "PMD%d memory error L2C L2ESR 0x%08X @ 0x%08X.%08X\n", 692 ctx->pmd, val, val_hi, val_lo); 693 dev_err(edac_dev->dev, 694 "ErrSyndrome 0x%02X ErrWay 0x%02X ErrCpu %d ErrGroup 0x%02X ErrAction 0x%02X\n", 695 MEMERR_L2C_L2ESR_ERRSYN_RD(val), 696 MEMERR_L2C_L2ESR_ERRWAY_RD(val), 697 MEMERR_L2C_L2ESR_ERRCPU_RD(val), 698 MEMERR_L2C_L2ESR_ERRGROUP_RD(val), 699 MEMERR_L2C_L2ESR_ERRACTION_RD(val)); 700 701 if (val & MEMERR_L2C_L2ESR_ERR_MASK) 702 dev_err(edac_dev->dev, 703 "One or more correctable error\n"); 704 if (val & MEMERR_L2C_L2ESR_MULTICERR_MASK) 705 dev_err(edac_dev->dev, "Multiple correctable error\n"); 706 if (val & MEMERR_L2C_L2ESR_UCERR_MASK) 707 dev_err(edac_dev->dev, 708 "One or more uncorrectable error\n"); 709 if (val & MEMERR_L2C_L2ESR_MULTUCERR_MASK) 710 dev_err(edac_dev->dev, 711 "Multiple uncorrectable error\n"); 712 713 switch (MEMERR_L2C_L2ESR_ERRTYPE_RD(val)) { 714 case 0: 715 dev_err(edac_dev->dev, "Outbound SDB parity error\n"); 716 break; 717 case 1: 718 dev_err(edac_dev->dev, "Inbound SDB parity error\n"); 719 break; 720 case 2: 721 dev_err(edac_dev->dev, "Tag ECC error\n"); 722 break; 723 case 3: 724 dev_err(edac_dev->dev, "Data ECC error\n"); 725 break; 726 } 727 728 /* Clear any HW errors */ 729 writel(val, pg_e + MEMERR_L2C_L2ESR_PAGE_OFFSET); 730 731 if (val & (MEMERR_L2C_L2ESR_ERR_MASK | 732 MEMERR_L2C_L2ESR_MULTICERR_MASK)) 733 edac_device_handle_ce(edac_dev, 0, 0, 734 edac_dev->ctl_name); 735 if (val & (MEMERR_L2C_L2ESR_UCERR_MASK | 736 MEMERR_L2C_L2ESR_MULTUCERR_MASK)) 737 edac_device_handle_ue(edac_dev, 0, 0, 738 edac_dev->ctl_name); 739 } 740 741 /* Check if any memory request timed out on L2 cache */ 742 pg_d = ctx->pmd_csr + CPU_L2C_PAGE; 743 val = readl(pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET); 744 if (val) { 745 val_lo = readl(pg_d + CPUX_L2C_L2RTOALR_PAGE_OFFSET); 746 val_hi = readl(pg_d + CPUX_L2C_L2RTOAHR_PAGE_OFFSET); 747 dev_err(edac_dev->dev, 748 "PMD%d L2C error L2C RTOSR 0x%08X @ 0x%08X.%08X\n", 749 ctx->pmd, val, val_hi, val_lo); 750 writel(val, pg_d + CPUX_L2C_L2RTOSR_PAGE_OFFSET); 751 } 752 } 753 754 static void xgene_edac_pmd_check(struct edac_device_ctl_info *edac_dev) 755 { 756 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; 757 unsigned int pcp_hp_stat; 758 int i; 759 760 xgene_edac_pcp_rd(ctx->edac, PCPHPERRINTSTS, &pcp_hp_stat); 761 if (!((PMD0_MERR_MASK << ctx->pmd) & pcp_hp_stat)) 762 return; 763 764 /* Check CPU L1 error */ 765 for (i = 0; i < MAX_CPU_PER_PMD; i++) 766 xgene_edac_pmd_l1_check(edac_dev, i); 767 768 /* Check CPU L2 error */ 769 xgene_edac_pmd_l2_check(edac_dev); 770 } 771 772 static void xgene_edac_pmd_cpu_hw_cfg(struct edac_device_ctl_info *edac_dev, 773 int cpu) 774 { 775 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; 776 void __iomem *pg_f = ctx->pmd_csr + cpu * CPU_CSR_STRIDE + 777 CPU_MEMERR_CPU_PAGE; 778 779 /* 780 * Enable CPU memory error: 781 * MEMERR_CPU_ICFESRA, MEMERR_CPU_LSUESRA, and MEMERR_CPU_MMUESRA 782 */ 783 writel(0x00000301, pg_f + MEMERR_CPU_ICFECR_PAGE_OFFSET); 784 writel(0x00000301, pg_f + MEMERR_CPU_LSUECR_PAGE_OFFSET); 785 writel(0x00000101, pg_f + MEMERR_CPU_MMUECR_PAGE_OFFSET); 786 } 787 788 static void xgene_edac_pmd_hw_cfg(struct edac_device_ctl_info *edac_dev) 789 { 790 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; 791 void __iomem *pg_d = ctx->pmd_csr + CPU_L2C_PAGE; 792 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE; 793 794 /* Enable PMD memory error - MEMERR_L2C_L2ECR and L2C_L2RTOCR */ 795 writel(0x00000703, pg_e + MEMERR_L2C_L2ECR_PAGE_OFFSET); 796 /* Configure L2C HW request time out feature if supported */ 797 if (ctx->version > 1) 798 writel(0x00000119, pg_d + CPUX_L2C_L2RTOCR_PAGE_OFFSET); 799 } 800 801 static void xgene_edac_pmd_hw_ctl(struct edac_device_ctl_info *edac_dev, 802 bool enable) 803 { 804 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; 805 int i; 806 807 /* Enable PMD error interrupt */ 808 if (edac_dev->op_state == OP_RUNNING_INTERRUPT) { 809 if (enable) 810 xgene_edac_pcp_clrbits(ctx->edac, PCPHPERRINTMSK, 811 PMD0_MERR_MASK << ctx->pmd); 812 else 813 xgene_edac_pcp_setbits(ctx->edac, PCPHPERRINTMSK, 814 PMD0_MERR_MASK << ctx->pmd); 815 } 816 817 if (enable) { 818 xgene_edac_pmd_hw_cfg(edac_dev); 819 820 /* Two CPUs per a PMD */ 821 for (i = 0; i < MAX_CPU_PER_PMD; i++) 822 xgene_edac_pmd_cpu_hw_cfg(edac_dev, i); 823 } 824 } 825 826 static ssize_t xgene_edac_pmd_l1_inject_ctrl_write(struct file *file, 827 const char __user *data, 828 size_t count, loff_t *ppos) 829 { 830 struct edac_device_ctl_info *edac_dev = file->private_data; 831 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; 832 void __iomem *cpux_pg_f; 833 int i; 834 835 for (i = 0; i < MAX_CPU_PER_PMD; i++) { 836 cpux_pg_f = ctx->pmd_csr + i * CPU_CSR_STRIDE + 837 CPU_MEMERR_CPU_PAGE; 838 839 writel(MEMERR_CPU_ICFESR_MULTCERR_MASK | 840 MEMERR_CPU_ICFESR_CERR_MASK, 841 cpux_pg_f + MEMERR_CPU_ICFESRA_PAGE_OFFSET); 842 writel(MEMERR_CPU_LSUESR_MULTCERR_MASK | 843 MEMERR_CPU_LSUESR_CERR_MASK, 844 cpux_pg_f + MEMERR_CPU_LSUESRA_PAGE_OFFSET); 845 writel(MEMERR_CPU_MMUESR_MULTCERR_MASK | 846 MEMERR_CPU_MMUESR_CERR_MASK, 847 cpux_pg_f + MEMERR_CPU_MMUESRA_PAGE_OFFSET); 848 } 849 return count; 850 } 851 852 static ssize_t xgene_edac_pmd_l2_inject_ctrl_write(struct file *file, 853 const char __user *data, 854 size_t count, loff_t *ppos) 855 { 856 struct edac_device_ctl_info *edac_dev = file->private_data; 857 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; 858 void __iomem *pg_e = ctx->pmd_csr + CPU_MEMERR_L2C_PAGE; 859 860 writel(MEMERR_L2C_L2ESR_MULTUCERR_MASK | 861 MEMERR_L2C_L2ESR_MULTICERR_MASK | 862 MEMERR_L2C_L2ESR_UCERR_MASK | 863 MEMERR_L2C_L2ESR_ERR_MASK, 864 pg_e + MEMERR_L2C_L2ESRA_PAGE_OFFSET); 865 return count; 866 } 867 868 static const struct file_operations xgene_edac_pmd_debug_inject_fops[] = { 869 { 870 .open = simple_open, 871 .write = xgene_edac_pmd_l1_inject_ctrl_write, 872 .llseek = generic_file_llseek, }, 873 { 874 .open = simple_open, 875 .write = xgene_edac_pmd_l2_inject_ctrl_write, 876 .llseek = generic_file_llseek, }, 877 { } 878 }; 879 880 static void xgene_edac_pmd_create_debugfs_nodes( 881 struct edac_device_ctl_info *edac_dev) 882 { 883 struct xgene_edac_pmd_ctx *ctx = edac_dev->pvt_info; 884 struct dentry *edac_debugfs; 885 char name[30]; 886 887 if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) 888 return; 889 890 /* 891 * Todo: Switch to common EDAC debug file system for edac device 892 * when available. 893 */ 894 if (!ctx->edac->dfs) { 895 ctx->edac->dfs = debugfs_create_dir(edac_dev->dev->kobj.name, 896 NULL); 897 if (!ctx->edac->dfs) 898 return; 899 } 900 sprintf(name, "PMD%d", ctx->pmd); 901 edac_debugfs = debugfs_create_dir(name, ctx->edac->dfs); 902 if (!edac_debugfs) 903 return; 904 905 debugfs_create_file("l1_inject_ctrl", S_IWUSR, edac_debugfs, edac_dev, 906 &xgene_edac_pmd_debug_inject_fops[0]); 907 debugfs_create_file("l2_inject_ctrl", S_IWUSR, edac_debugfs, edac_dev, 908 &xgene_edac_pmd_debug_inject_fops[1]); 909 } 910 911 static int xgene_edac_pmd_available(u32 efuse, int pmd) 912 { 913 return (efuse & (1 << pmd)) ? 0 : 1; 914 } 915 916 static int xgene_edac_pmd_add(struct xgene_edac *edac, struct device_node *np, 917 int version) 918 { 919 struct edac_device_ctl_info *edac_dev; 920 struct xgene_edac_pmd_ctx *ctx; 921 struct resource res; 922 char edac_name[10]; 923 u32 pmd; 924 int rc; 925 u32 val; 926 927 if (!devres_open_group(edac->dev, xgene_edac_pmd_add, GFP_KERNEL)) 928 return -ENOMEM; 929 930 /* Determine if this PMD is disabled */ 931 if (of_property_read_u32(np, "pmd-controller", &pmd)) { 932 dev_err(edac->dev, "no pmd-controller property\n"); 933 rc = -ENODEV; 934 goto err_group; 935 } 936 rc = regmap_read(edac->efuse_map, 0, &val); 937 if (rc) 938 goto err_group; 939 if (!xgene_edac_pmd_available(val, pmd)) { 940 rc = -ENODEV; 941 goto err_group; 942 } 943 944 sprintf(edac_name, "l2c%d", pmd); 945 edac_dev = edac_device_alloc_ctl_info(sizeof(*ctx), 946 edac_name, 1, "l2c", 1, 2, NULL, 947 0, edac_device_alloc_index()); 948 if (!edac_dev) { 949 rc = -ENOMEM; 950 goto err_group; 951 } 952 953 ctx = edac_dev->pvt_info; 954 ctx->name = "xgene_pmd_err"; 955 ctx->pmd = pmd; 956 ctx->edac = edac; 957 ctx->edac_dev = edac_dev; 958 ctx->ddev = *edac->dev; 959 ctx->version = version; 960 edac_dev->dev = &ctx->ddev; 961 edac_dev->ctl_name = ctx->name; 962 edac_dev->dev_name = ctx->name; 963 edac_dev->mod_name = EDAC_MOD_STR; 964 965 rc = of_address_to_resource(np, 0, &res); 966 if (rc < 0) { 967 dev_err(edac->dev, "no PMD resource address\n"); 968 goto err_free; 969 } 970 ctx->pmd_csr = devm_ioremap_resource(edac->dev, &res); 971 if (IS_ERR(ctx->pmd_csr)) { 972 dev_err(edac->dev, 973 "devm_ioremap_resource failed for PMD resource address\n"); 974 rc = PTR_ERR(ctx->pmd_csr); 975 goto err_free; 976 } 977 978 if (edac_op_state == EDAC_OPSTATE_POLL) 979 edac_dev->edac_check = xgene_edac_pmd_check; 980 981 xgene_edac_pmd_create_debugfs_nodes(edac_dev); 982 983 rc = edac_device_add_device(edac_dev); 984 if (rc > 0) { 985 dev_err(edac->dev, "edac_device_add_device failed\n"); 986 rc = -ENOMEM; 987 goto err_free; 988 } 989 990 if (edac_op_state == EDAC_OPSTATE_INT) 991 edac_dev->op_state = OP_RUNNING_INTERRUPT; 992 993 list_add(&ctx->next, &edac->pmds); 994 995 xgene_edac_pmd_hw_ctl(edac_dev, 1); 996 997 devres_remove_group(edac->dev, xgene_edac_pmd_add); 998 999 dev_info(edac->dev, "X-Gene EDAC PMD%d registered\n", ctx->pmd); 1000 return 0; 1001 1002 err_free: 1003 edac_device_free_ctl_info(edac_dev); 1004 err_group: 1005 devres_release_group(edac->dev, xgene_edac_pmd_add); 1006 return rc; 1007 } 1008 1009 static int xgene_edac_pmd_remove(struct xgene_edac_pmd_ctx *pmd) 1010 { 1011 struct edac_device_ctl_info *edac_dev = pmd->edac_dev; 1012 1013 xgene_edac_pmd_hw_ctl(edac_dev, 0); 1014 edac_device_del_device(edac_dev->dev); 1015 edac_device_free_ctl_info(edac_dev); 1016 return 0; 1017 } 1018 1019 static irqreturn_t xgene_edac_isr(int irq, void *dev_id) 1020 { 1021 struct xgene_edac *ctx = dev_id; 1022 struct xgene_edac_pmd_ctx *pmd; 1023 unsigned int pcp_hp_stat; 1024 unsigned int pcp_lp_stat; 1025 1026 xgene_edac_pcp_rd(ctx, PCPHPERRINTSTS, &pcp_hp_stat); 1027 xgene_edac_pcp_rd(ctx, PCPLPERRINTSTS, &pcp_lp_stat); 1028 if ((MCU_UNCORR_ERR_MASK & pcp_hp_stat) || 1029 (MCU_CTL_ERR_MASK & pcp_hp_stat) || 1030 (MCU_CORR_ERR_MASK & pcp_lp_stat)) { 1031 struct xgene_edac_mc_ctx *mcu; 1032 1033 list_for_each_entry(mcu, &ctx->mcus, next) { 1034 xgene_edac_mc_check(mcu->mci); 1035 } 1036 } 1037 1038 list_for_each_entry(pmd, &ctx->pmds, next) { 1039 if ((PMD0_MERR_MASK << pmd->pmd) & pcp_hp_stat) 1040 xgene_edac_pmd_check(pmd->edac_dev); 1041 } 1042 1043 return IRQ_HANDLED; 1044 } 1045 1046 static int xgene_edac_probe(struct platform_device *pdev) 1047 { 1048 struct xgene_edac *edac; 1049 struct device_node *child; 1050 struct resource *res; 1051 int rc; 1052 1053 edac = devm_kzalloc(&pdev->dev, sizeof(*edac), GFP_KERNEL); 1054 if (!edac) 1055 return -ENOMEM; 1056 1057 edac->dev = &pdev->dev; 1058 platform_set_drvdata(pdev, edac); 1059 INIT_LIST_HEAD(&edac->mcus); 1060 INIT_LIST_HEAD(&edac->pmds); 1061 spin_lock_init(&edac->lock); 1062 mutex_init(&edac->mc_lock); 1063 1064 edac->csw_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, 1065 "regmap-csw"); 1066 if (IS_ERR(edac->csw_map)) { 1067 dev_err(edac->dev, "unable to get syscon regmap csw\n"); 1068 rc = PTR_ERR(edac->csw_map); 1069 goto out_err; 1070 } 1071 1072 edac->mcba_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, 1073 "regmap-mcba"); 1074 if (IS_ERR(edac->mcba_map)) { 1075 dev_err(edac->dev, "unable to get syscon regmap mcba\n"); 1076 rc = PTR_ERR(edac->mcba_map); 1077 goto out_err; 1078 } 1079 1080 edac->mcbb_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, 1081 "regmap-mcbb"); 1082 if (IS_ERR(edac->mcbb_map)) { 1083 dev_err(edac->dev, "unable to get syscon regmap mcbb\n"); 1084 rc = PTR_ERR(edac->mcbb_map); 1085 goto out_err; 1086 } 1087 edac->efuse_map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, 1088 "regmap-efuse"); 1089 if (IS_ERR(edac->efuse_map)) { 1090 dev_err(edac->dev, "unable to get syscon regmap efuse\n"); 1091 rc = PTR_ERR(edac->efuse_map); 1092 goto out_err; 1093 } 1094 1095 res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1096 edac->pcp_csr = devm_ioremap_resource(&pdev->dev, res); 1097 if (IS_ERR(edac->pcp_csr)) { 1098 dev_err(&pdev->dev, "no PCP resource address\n"); 1099 rc = PTR_ERR(edac->pcp_csr); 1100 goto out_err; 1101 } 1102 1103 if (edac_op_state == EDAC_OPSTATE_INT) { 1104 int irq; 1105 int i; 1106 1107 for (i = 0; i < 3; i++) { 1108 irq = platform_get_irq(pdev, i); 1109 if (irq < 0) { 1110 dev_err(&pdev->dev, "No IRQ resource\n"); 1111 rc = -EINVAL; 1112 goto out_err; 1113 } 1114 rc = devm_request_irq(&pdev->dev, irq, 1115 xgene_edac_isr, IRQF_SHARED, 1116 dev_name(&pdev->dev), edac); 1117 if (rc) { 1118 dev_err(&pdev->dev, 1119 "Could not request IRQ %d\n", irq); 1120 goto out_err; 1121 } 1122 } 1123 } 1124 1125 for_each_child_of_node(pdev->dev.of_node, child) { 1126 if (!of_device_is_available(child)) 1127 continue; 1128 if (of_device_is_compatible(child, "apm,xgene-edac-mc")) 1129 xgene_edac_mc_add(edac, child); 1130 if (of_device_is_compatible(child, "apm,xgene-edac-pmd")) 1131 xgene_edac_pmd_add(edac, child, 1); 1132 if (of_device_is_compatible(child, "apm,xgene-edac-pmd-v2")) 1133 xgene_edac_pmd_add(edac, child, 2); 1134 } 1135 1136 return 0; 1137 1138 out_err: 1139 return rc; 1140 } 1141 1142 static int xgene_edac_remove(struct platform_device *pdev) 1143 { 1144 struct xgene_edac *edac = dev_get_drvdata(&pdev->dev); 1145 struct xgene_edac_mc_ctx *mcu; 1146 struct xgene_edac_mc_ctx *temp_mcu; 1147 struct xgene_edac_pmd_ctx *pmd; 1148 struct xgene_edac_pmd_ctx *temp_pmd; 1149 1150 list_for_each_entry_safe(mcu, temp_mcu, &edac->mcus, next) { 1151 xgene_edac_mc_remove(mcu); 1152 } 1153 1154 list_for_each_entry_safe(pmd, temp_pmd, &edac->pmds, next) { 1155 xgene_edac_pmd_remove(pmd); 1156 } 1157 return 0; 1158 } 1159 1160 static const struct of_device_id xgene_edac_of_match[] = { 1161 { .compatible = "apm,xgene-edac" }, 1162 {}, 1163 }; 1164 MODULE_DEVICE_TABLE(of, xgene_edac_of_match); 1165 1166 static struct platform_driver xgene_edac_driver = { 1167 .probe = xgene_edac_probe, 1168 .remove = xgene_edac_remove, 1169 .driver = { 1170 .name = "xgene-edac", 1171 .of_match_table = xgene_edac_of_match, 1172 }, 1173 }; 1174 1175 static int __init xgene_edac_init(void) 1176 { 1177 int rc; 1178 1179 /* Make sure error reporting method is sane */ 1180 switch (edac_op_state) { 1181 case EDAC_OPSTATE_POLL: 1182 case EDAC_OPSTATE_INT: 1183 break; 1184 default: 1185 edac_op_state = EDAC_OPSTATE_INT; 1186 break; 1187 } 1188 1189 rc = platform_driver_register(&xgene_edac_driver); 1190 if (rc) { 1191 edac_printk(KERN_ERR, EDAC_MOD_STR, 1192 "EDAC fails to register\n"); 1193 goto reg_failed; 1194 } 1195 1196 return 0; 1197 1198 reg_failed: 1199 return rc; 1200 } 1201 module_init(xgene_edac_init); 1202 1203 static void __exit xgene_edac_exit(void) 1204 { 1205 platform_driver_unregister(&xgene_edac_driver); 1206 } 1207 module_exit(xgene_edac_exit); 1208 1209 MODULE_LICENSE("GPL"); 1210 MODULE_AUTHOR("Feng Kan <fkan@apm.com>"); 1211 MODULE_DESCRIPTION("APM X-Gene EDAC driver"); 1212 module_param(edac_op_state, int, 0444); 1213 MODULE_PARM_DESC(edac_op_state, 1214 "EDAC error reporting state: 0=Poll, 2=Interrupt"); 1215